From 04581221f502ff0a0d1048e30e3ac63de9020beb Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sat, 16 Nov 2024 23:15:22 +0100
Subject: [PATCH 01/75] config structure

---
 manga_translator/args.py   | 124 ++++++++++++---------------------
 manga_translator/config.py | 137 +++++++++++++++++++++++++++++++++++++
 2 files changed, 179 insertions(+), 82 deletions(-)
 create mode 100644 manga_translator/config.py

diff --git a/manga_translator/args.py b/manga_translator/args.py
index 3933b04ba..0ac6a65fa 100644
--- a/manga_translator/args.py
+++ b/manga_translator/args.py
@@ -87,98 +87,58 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
 
 
 parser = argparse.ArgumentParser(prog='manga_translator', description='Seamlessly translate mangas into a chosen language', formatter_class=HelpFormatter)
-parser.add_argument('-m', '--mode', default='batch', type=str, choices=['demo', 'batch', 'web', 'web_client', 'ws', 'api'], help='Run demo in single image demo mode (demo), batch translation mode (batch), web service mode (web)')
-parser.add_argument('-i', '--input', default=None, type=path, nargs='+', help='Path to an image file if using demo mode, or path to an image folder if using batch mode')
-parser.add_argument('-o', '--dest', default='', type=str, help='Path to the destination folder for translated images in batch mode')
-parser.add_argument('-l', '--target-lang', default='CHS', type=str, choices=VALID_LANGUAGES, help='Destination language')
 parser.add_argument('-v', '--verbose', action='store_true', help='Print debug info and save intermediate images in result folder')
-parser.add_argument('-f', '--format', default=None, choices=OUTPUT_FORMATS, help='Output format of the translation.')
 parser.add_argument('--attempts', default=0, type=int, help='Retry attempts on encountered error. -1 means infinite times.')
 parser.add_argument('--ignore-errors', action='store_true', help='Skip image on encountered error.')
-parser.add_argument('--overwrite', action='store_true', help='Overwrite already translated images in batch mode.')
-parser.add_argument('--skip-no-text', action='store_true', help='Skip image without text (Will not be saved).')
 parser.add_argument('--model-dir', default=None, type=dir_path, help='Model directory (by default ./models in project root)')
-parser.add_argument('--skip-lang', default=None, type=str, help='Skip translation if source image is one of the provide languages, use comma to separate multiple languages. Example: JPN,ENG')
-
 g = parser.add_mutually_exclusive_group()
 g.add_argument('--use-gpu', action='store_true', help='Turn on/off gpu (auto switch between mps and cuda)')
 g.add_argument('--use-gpu-limited', action='store_true', help='Turn on/off gpu (excluding offline translator)')
-
-parser.add_argument('--detector', default='default', type=str, choices=DETECTORS, help='Text detector used for creating a text mask from an image, DO NOT use craft for manga, it\'s not designed for it')
-parser.add_argument('--ocr', default='48px', type=str, choices=OCRS, help='Optical character recognition (OCR) model to use')
-parser.add_argument('--use-mocr-merge', action='store_true', help='Use bbox merge when Manga OCR inference.')
-parser.add_argument('--inpainter', default='lama_large', type=str, choices=INPAINTERS, help='Inpainting model to use')
-parser.add_argument('--upscaler', default='esrgan', type=str, choices=UPSCALERS, help='Upscaler to use. --upscale-ratio has to be set for it to take effect')
-parser.add_argument('--upscale-ratio', default=None, type=float, help='Image upscale ratio applied before detection. Can improve text detection.')
-parser.add_argument('--colorizer', default=None, type=str, choices=COLORIZERS, help='Colorization model to use.')
-
-g = parser.add_mutually_exclusive_group()
-g.add_argument('--translator', default='google', type=str, choices=TRANSLATORS, help='Language translator to use')
-g.add_argument('--translator-chain', default=None, type=translator_chain, help='Output of one translator goes in another. Example: --translator-chain "google:JPN;sugoi:ENG".')
-g.add_argument('--selective-translation', default=None, type=translator_chain, help='Select a translator based on detected language in image. Note the first translation service acts as default if the language isn\'t defined. Example: --translator-chain "google:JPN;sugoi:ENG".')
-
-parser.add_argument('--revert-upscaling', action='store_true', help='Downscales the previously upscaled image after translation back to original size (Use with --upscale-ratio).')
-parser.add_argument('--detection-size', default=1536, type=int, help='Size of image used for detection')
-parser.add_argument('--det-rotate', action='store_true', help='Rotate the image for detection. Might improve detection.')
-parser.add_argument('--det-auto-rotate', action='store_true', help='Rotate the image for detection to prefer vertical textlines. Might improve detection.')
-parser.add_argument('--det-invert', action='store_true', help='Invert the image colors for detection. Might improve detection.')
-parser.add_argument('--det-gamma-correct', action='store_true', help='Applies gamma correction for detection. Might improve detection.')
-parser.add_argument('--unclip-ratio', default=2.3, type=float, help='How much to extend text skeleton to form bounding box')
-parser.add_argument('--box-threshold', default=0.7, type=float, help='Threshold for bbox generation')
-parser.add_argument('--text-threshold', default=0.5, type=float, help='Threshold for text detection')
-parser.add_argument('--min-text-length', default=0, type=int, help='Minimum text length of a text region')
-parser.add_argument('--no-text-lang-skip', action='store_true', help='Dont skip text that is seemingly already in the target language.')
-parser.add_argument('--inpainting-size', default=2048, type=int, help='Size of image used for inpainting (too large will result in OOM)')
-parser.add_argument('--inpainting-precision', default='fp32', type=str, help='Inpainting precision for lama, use bf16 while you can.', choices=['fp32', 'fp16', 'bf16'])
-parser.add_argument('--colorization-size', default=576, type=int, help='Size of image used for colorization. Set to -1 to use full image size')
-parser.add_argument('--denoise-sigma', default=30, type=int, help='Used by colorizer and affects color strength, range from 0 to 255 (default 30). -1 turns it off.')
-parser.add_argument('--mask-dilation-offset', default=0, type=int, help='By how much to extend the text mask to remove left-over text pixels of the original image.')
-
-parser.add_argument('--disable-font-border', action='store_true', help='Disable font border')
-parser.add_argument('--font-size', default=None, type=int, help='Use fixed font size for rendering')
-parser.add_argument('--font-size-offset', default=0, type=int, help='Offset font size by a given amount, positive number increase font size and vice versa')
-parser.add_argument('--font-size-minimum', default=-1, type=int, help='Minimum output font size. Default is image_sides_sum/200')
-parser.add_argument('--font-color', default=None, type=str, help='Overwrite the text fg/bg color detected by the OCR model. Use hex string without the "#" such as FFFFFF for a white foreground or FFFFFF:000000 to also have a black background around the text.')
-parser.add_argument('--line-spacing', default=None, type=float, help='Line spacing is font_size * this value. Default is 0.01 for horizontal text and 0.2 for vertical.')
-
-g = parser.add_mutually_exclusive_group()
-g.add_argument('--force-horizontal', action='store_true', help='Force text to be rendered horizontally')
-g.add_argument('--force-vertical', action='store_true', help='Force text to be rendered vertically')
-
-g = parser.add_mutually_exclusive_group()
-g.add_argument('--align-left', action='store_true', help='Align rendered text left')
-g.add_argument('--align-center', action='store_true', help='Align rendered text centered')
-g.add_argument('--align-right', action='store_true', help='Align rendered text right')
-
-g = parser.add_mutually_exclusive_group()
-g.add_argument('--uppercase', action='store_true', help='Change text to uppercase')
-g.add_argument('--lowercase', action='store_true', help='Change text to lowercase')
-
-parser.add_argument('--no-hyphenation', action='store_true', help='If renderer should be splitting up words using a hyphen character (-)')
-parser.add_argument('--manga2eng', action='store_true', help='Render english text translated from manga with some additional typesetting. Ignores some other argument options')
-parser.add_argument('--gpt-config', type=file_path, help='Path to GPT config file, more info in README')
-parser.add_argument('--use-mtpe', action='store_true', help='Turn on/off machine translation post editing (MTPE) on the command line (works only on linux right now)')
-
-g = parser.add_mutually_exclusive_group()
-g.add_argument('--save-text', action='store_true', help='Save extracted text and translations into a text file.')
-g.add_argument('--save-text-file', default='', type=str, help='Like --save-text but with a specified file path.')
-
-parser.add_argument('--filter-text', default=None, type=str, help='Filter regions by their text with a regex. Example usage: --text-filter ".*badtext.*"')
-parser.add_argument('--prep-manual', action='store_true', help='Prepare for manual typesetting by outputting blank, inpainted images, plus copies of the original for reference')
 parser.add_argument('--font-path', default='', type=file_path, help='Path to font file')
-parser.add_argument('--gimp-font', default='Sans-serif', type=str, help='Font family to use for gimp rendering.')
-parser.add_argument('--host', default='127.0.0.1', type=str, help='Used by web module to decide which host to attach to')
-parser.add_argument('--port', default=5003, type=int, help='Used by web module to decide which port to attach to')
-parser.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Used by web module as secret for securing internal web server communication')
-# parser.add_argument('--log-web', action='store_true', help='Used by web module to decide if web logs should be surfaced')
-parser.add_argument('--ws-url', default='ws://localhost:5000', type=str, help='Server URL for WebSocket mode')
-parser.add_argument('--save-quality', default=100, type=int, help='Quality of saved JPEG image, range from 0 to 100 with 100 being best')
-parser.add_argument('--ignore-bubble', default=0, type=int, help='The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles')
-
-parser.add_argument('--kernel-size', default=3, type=int, help='Set the convolution kernel size of the text erasure area to completely clean up text residues')
-
 parser.add_argument('--pre-dict', default=None, type=file_path, help='Path to the pre-translation dictionary file')
 parser.add_argument('--post-dict', default=None, type=file_path, help='Path to the post-translation dictionary file')
 
+subparsers = parser.add_subparsers(dest='mode', required=True, help='Mode of operation')
+
+# Batch mode
+parser_batch = subparsers.add_parser('batch', help='Run in batch translation mode')
+parser_batch.add_argument('-i', '--input', required=True, type=dir_path, help='Path to an image folder')
+parser_batch.add_argument('-o', '--dest', default='', type=str, help='Path to the destination folder for translated images')
+parser_batch.add_argument('-f', '--format', default=None, choices=OUTPUT_FORMATS, help='Output format of the translation.')
+parser_batch.add_argument('--overwrite', action='store_true', help='Overwrite already translated images')
+parser_batch.add_argument('--skip-no-text', action='store_true', help='Skip image without text (Will not be saved).')
+parser_batch.add_argument('--use-mtpe', action='store_true', help='Turn on/off machine translation post editing (MTPE) on the command line (works only on linux right now)')
+g_batch = parser_batch.add_mutually_exclusive_group()
+g_batch.add_argument('--save-text', action='store_true', help='Save extracted text and translations into a text file.')
+g_batch.add_argument('--save-text-file', default='', type=str, help='Like --save-text but with a specified file path.')
+parser_batch.add_argument('--prep-manual', action='store_true', help='Prepare for manual typesetting by outputting blank, inpainted images, plus copies of the original for reference')
+parser_batch.add_argument('--save-quality', default=100, type=int, help='Quality of saved JPEG image, range from 0 to 100 with 100 being best')
+
+# Web mode
+parser_web = subparsers.add_parser('web', help='Run in web service mode')
+parser_web.add_argument('--host', default='127.0.0.1', type=str, help='Host for web service')
+parser_web.add_argument('--port', default=5003, type=int, help='Port for web service')
+parser_web.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal web server communication')
+
+# Web client mode
+parser_web_client = subparsers.add_parser('web_client', help='Run in web client mode')
+parser_web_client.add_argument('--host', default='127.0.0.1', type=str, help='Host for web client')
+parser_web_client.add_argument('--port', default=5003, type=int, help='Port for web client')
+parser_web_client.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal web client communication')
+
+# WebSocket mode
+parser_ws = subparsers.add_parser('ws', help='Run in WebSocket mode')
+parser_ws.add_argument('--host', default='127.0.0.1', type=str, help='Host for WebSocket service')
+parser_ws.add_argument('--port', default=5003, type=int, help='Port for WebSocket service')
+parser_ws.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal WebSocket communication')
+parser_ws.add_argument('--ws-url', default='ws://localhost:5000', type=str, help='Server URL for WebSocket mode')
+
+# API mode
+parser_api = subparsers.add_parser('api', help='Run in API mode')
+parser_api.add_argument('--host', default='127.0.0.1', type=str, help='Host for API service')
+parser_api.add_argument('--port', default=5003, type=int, help='Port for API service')
+parser_api.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal API server communication')
+
+
 # Generares dict with a default value for each argument
 DEFAULT_ARGS = vars(parser.parse_args([]))
diff --git a/manga_translator/config.py b/manga_translator/config.py
new file mode 100644
index 000000000..abb7b9268
--- /dev/null
+++ b/manga_translator/config.py
@@ -0,0 +1,137 @@
+from pydantic import BaseModel
+from typing import Optional
+
+class RenderConfig(BaseModel):
+    """"""
+    """Render english text translated from manga with some additional typesetting. Ignores some other argument options"""
+    renderer: str = 'default' #todo: validate {"default", "manga2eng"} #todo: convert to enum
+    """Align rendered text"""
+    alignment: str = 'auto'  # todo: validate {'left','center','right'} #todo: convert to enum
+    """Disable font border"""
+    disable_font_border: bool = False
+    """Offset font size by a given amount, positive number increase font size and vice versa"""
+    font_size_offset: int = 0
+    """Minimum output font size. Default is image_sides_sum/200"""
+    font_size_minimum: int = -1
+    """Force text to be rendered horizontally/vertically/none"""
+    direction: str = 'auto'  # todo: validate {'auto', 'h', 'v'} #todo: convert to enum
+    """Change text to uppercase"""
+    uppercase: bool = False
+    """Change text to lowercase"""
+    lowercase: bool = False
+    """Font family to use for gimp rendering."""
+    gimp_font: str = 'Sans-serif'
+    """If renderer should be splitting up words using a hyphen character (-)"""
+    no_hyphenation: bool = False
+    """Overwrite the text fg/bg color detected by the OCR model. Use hex string without the "#" such as FFFFFF for a white foreground or FFFFFF:000000 to also have a black background around the text."""
+    font_color: Optional[str] = None
+    """Line spacing is font_size * this value. Default is 0.01 for horizontal text and 0.2 for vertical."""
+    line_spacing: Optional[float] = None
+    """Use fixed font size for rendering"""
+    font_size: Optional[int] = None
+
+class UpscaleConfig(BaseModel):
+    """"""
+    """Upscaler to use. --upscale-ratio has to be set for it to take effect"""
+    upscaler: str = 'esrgan' #todo: validate UPSCALERS #todo: convert to enum
+    """Downscales the previously upscaled image after translation back to original size (Use with --upscale-ratio)."""
+    revert_upscaling: bool = False
+    """Image upscale ratio applied before detection. Can improve text detection."""
+    upscale_ratio: Optional[float] = None
+
+
+class TranslatorConfig(BaseModel):
+    """"""
+    """Language translator to use"""
+    translator: str = "google" #todo: validate TRANSLATORS todo: convert to enum
+    """Destination language"""
+    target_lang: str = 'ENG' #todo: validate VALID_LANGUAGES #todo: convert to enum
+    """Dont skip text that is seemingly already in the target language."""
+    no_text_lang_skip: bool = False
+    """Skip translation if source image is one of the provide languages, use comma to separate multiple languages. Example: JPN,ENG"""
+    skip_lang: Optional[str] = None
+    """Path to GPT config file, more info in README"""
+    gpt_config: Optional[str] = None  # todo: no more path
+    """Output of one translator goes in another. Example: --translator-chain "google:JPN;sugoi:ENG"."""
+    translator_chain: Optional[str] = None  # todo: add parser translator_chain #todo: merge into one
+    """Select a translator based on detected language in image. Note the first translation service acts as default if the language isn\'t defined. Example: --translator-chain "google:JPN;sugoi:ENG".'"""
+    selective_translation: Optional[str] = None  # todo: add parser translator_chain #todo: merge into one
+
+class DetectorConfig(BaseModel):
+    """"""
+    """"Text detector used for creating a text mask from an image, DO NOT use craft for manga, it\'s not designed for it"""
+    detector: str = 'default' #todo: validate DETECTORS #todo: convert to enum
+    """Size of image used for detection"""
+    detection_size: int = 1536
+    """Threshold for text detection"""
+    text_threshold: float = 0.5
+    """Rotate the image for detection. Might improve detection."""
+    det_rotate: bool = False
+    """Rotate the image for detection to prefer vertical textlines. Might improve detection."""
+    det_auto_rotate: bool = False
+    """Invert the image colors for detection. Might improve detection."""
+    det_invert: bool = False
+    """Applies gamma correction for detection. Might improve detection."""
+    det_gamma_correct: bool = False
+    """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles"""
+    ignore_bubble: int = 0
+
+class InpainterConfig(BaseModel):
+    """"""
+    """Inpainting model to use"""
+    inpainter: str = 'lama_large' #todo: validate INPAINTERS  #todo: convert to enum
+    """Size of image used for inpainting (too large will result in OOM)"""
+    inpainting_size: int = 2048
+    """Inpainting precision for lama, use bf16 while you can."""
+    inpainting_precision: str = 'fp32' #todo: validate ['fp32', 'fp16', 'bf16'] #todo: convert to enum
+
+class ColorizerConfig(BaseModel):
+    """"""
+    """Size of image used for colorization. Set to -1 to use full image size"""
+    colorization_size: int = 576
+    """Used by colorizer and affects color strength, range from 0 to 255 (default 30). -1 turns it off."""
+    denoise_sigma: int = 30
+    """Colorization model to use."""
+    colorizer: Optional[str] = None  # todo: validate COLORIZERS  #todo: convert to enum
+
+
+class OcrConfig(BaseModel):
+    """"""
+    """Use bbox merge when Manga OCR inference."""
+    use_mocr_merge: bool = False
+    """Optical character recognition (OCR) model to use"""
+    ocr: str = '48px' #todo: validate OCRS #todo: convert to enum
+    """Minimum text length of a text region"""
+    min_text_length: int = 0
+
+class Config(BaseModel):
+    # unclear
+    pre_dict: Optional[str] = None
+    post_dict: Optional[str] = None
+
+    # json
+    """Filter regions by their text with a regex. Example usage: '.*badtext.*'"""
+    filter_text: Optional[str] = None
+    """render configs"""
+    render: RenderConfig
+    """upscaler configs"""
+    upscale: UpscaleConfig
+    """tanslator configs"""
+    translator: TranslatorConfig
+    """detector configs"""
+    detector: DetectorConfig
+    """colorizer configs"""
+    colorizer: ColorizerConfig
+    """inpainter configs"""
+    inpainter: InpainterConfig
+    """Ocr configs"""
+    ocr: OcrConfig
+    # ?
+    """How much to extend text skeleton to form bounding box"""
+    unclip_ratio: float = 2.3
+    """Set the convolution kernel size of the text erasure area to completely clean up text residues"""
+    kernel_size: int = 3
+    """By how much to extend the text mask to remove left-over text pixels of the original image."""
+    mask_dilation_offset: int = 0
+    """Threshold for bbox generation"""
+    box_threshold: float = 0.7
\ No newline at end of file

From 5711acc08a2d073bf4b9d66c4128c60e00ca5dc3 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sat, 16 Nov 2024 23:37:17 +0100
Subject: [PATCH 02/75] i have python. why bellow?

---
 manga_translator/config.py | 136 +++++++++++++++++++++----------------
 1 file changed, 77 insertions(+), 59 deletions(-)

diff --git a/manga_translator/config.py b/manga_translator/config.py
index abb7b9268..5340b2be9 100644
--- a/manga_translator/config.py
+++ b/manga_translator/config.py
@@ -1,108 +1,126 @@
+from enum import Enum, IntEnum
+
 from pydantic import BaseModel
 from typing import Optional
 
+class Renderer(IntEnum):
+    default = 0
+    manga2Eng = 1
+
+class Alignment(IntEnum):
+    auto = 0
+    left = 1
+    center = 2
+    right = 3
+
+class Direction(IntEnum):
+    auto = 0
+    h = 1
+    v = 2
+
+class InpaintPrecision(IntEnum):
+    fp32 = 0
+    fp16 = 1
+    bf16 = 2
+
 class RenderConfig(BaseModel):
-    """"""
+    renderer: Renderer = Renderer.default
     """Render english text translated from manga with some additional typesetting. Ignores some other argument options"""
-    renderer: str = 'default' #todo: validate {"default", "manga2eng"} #todo: convert to enum
+    alignment: Alignment = Alignment.auto
     """Align rendered text"""
-    alignment: str = 'auto'  # todo: validate {'left','center','right'} #todo: convert to enum
-    """Disable font border"""
     disable_font_border: bool = False
-    """Offset font size by a given amount, positive number increase font size and vice versa"""
+    """Disable font border"""
     font_size_offset: int = 0
-    """Minimum output font size. Default is image_sides_sum/200"""
+    """Offset font size by a given amount, positive number increase font size and vice versa"""
     font_size_minimum: int = -1
+    """Minimum output font size. Default is image_sides_sum/200"""
+    direction: Direction = Direction.auto
     """Force text to be rendered horizontally/vertically/none"""
-    direction: str = 'auto'  # todo: validate {'auto', 'h', 'v'} #todo: convert to enum
-    """Change text to uppercase"""
     uppercase: bool = False
-    """Change text to lowercase"""
+    """Change text to uppercase"""
     lowercase: bool = False
-    """Font family to use for gimp rendering."""
+    """Change text to lowercase"""
     gimp_font: str = 'Sans-serif'
-    """If renderer should be splitting up words using a hyphen character (-)"""
+    """Font family to use for gimp rendering."""
     no_hyphenation: bool = False
-    """Overwrite the text fg/bg color detected by the OCR model. Use hex string without the "#" such as FFFFFF for a white foreground or FFFFFF:000000 to also have a black background around the text."""
+    """If renderer should be splitting up words using a hyphen character (-)"""
     font_color: Optional[str] = None
-    """Line spacing is font_size * this value. Default is 0.01 for horizontal text and 0.2 for vertical."""
+    """Overwrite the text fg/bg color detected by the OCR model. Use hex string without the "#" such as FFFFFF for a white foreground or FFFFFF:000000 to also have a black background around the text."""
     line_spacing: Optional[float] = None
-    """Use fixed font size for rendering"""
+    """Line spacing is font_size * this value. Default is 0.01 for horizontal text and 0.2 for vertical."""
     font_size: Optional[int] = None
+    """Use fixed font size for rendering"""
+
 
 class UpscaleConfig(BaseModel):
-    """"""
-    """Upscaler to use. --upscale-ratio has to be set for it to take effect"""
     upscaler: str = 'esrgan' #todo: validate UPSCALERS #todo: convert to enum
-    """Downscales the previously upscaled image after translation back to original size (Use with --upscale-ratio)."""
+    """Upscaler to use. --upscale-ratio has to be set for it to take effect"""
     revert_upscaling: bool = False
-    """Image upscale ratio applied before detection. Can improve text detection."""
+    """Downscales the previously upscaled image after translation back to original size (Use with --upscale-ratio)."""
     upscale_ratio: Optional[float] = None
-
+    """Image upscale ratio applied before detection. Can improve text detection."""
 
 class TranslatorConfig(BaseModel):
-    """"""
-    """Language translator to use"""
     translator: str = "google" #todo: validate TRANSLATORS todo: convert to enum
-    """Destination language"""
+    """Language translator to use"""
     target_lang: str = 'ENG' #todo: validate VALID_LANGUAGES #todo: convert to enum
-    """Dont skip text that is seemingly already in the target language."""
+    """Destination language"""
     no_text_lang_skip: bool = False
-    """Skip translation if source image is one of the provide languages, use comma to separate multiple languages. Example: JPN,ENG"""
+    """Dont skip text that is seemingly already in the target language."""
     skip_lang: Optional[str] = None
-    """Path to GPT config file, more info in README"""
+    """Skip translation if source image is one of the provide languages, use comma to separate multiple languages. Example: JPN,ENG"""
     gpt_config: Optional[str] = None  # todo: no more path
-    """Output of one translator goes in another. Example: --translator-chain "google:JPN;sugoi:ENG"."""
+    """Path to GPT config file, more info in README"""
     translator_chain: Optional[str] = None  # todo: add parser translator_chain #todo: merge into one
-    """Select a translator based on detected language in image. Note the first translation service acts as default if the language isn\'t defined. Example: --translator-chain "google:JPN;sugoi:ENG".'"""
+    """Output of one translator goes in another. Example: --translator-chain "google:JPN;sugoi:ENG"."""
     selective_translation: Optional[str] = None  # todo: add parser translator_chain #todo: merge into one
+    """Select a translator based on detected language in image. Note the first translation service acts as default if the language isn\'t defined. Example: --translator-chain "google:JPN;sugoi:ENG".'"""
 
 class DetectorConfig(BaseModel):
     """"""
-    """"Text detector used for creating a text mask from an image, DO NOT use craft for manga, it\'s not designed for it"""
     detector: str = 'default' #todo: validate DETECTORS #todo: convert to enum
-    """Size of image used for detection"""
+    """"Text detector used for creating a text mask from an image, DO NOT use craft for manga, it\'s not designed for it"""
     detection_size: int = 1536
-    """Threshold for text detection"""
+    """Size of image used for detection"""
     text_threshold: float = 0.5
-    """Rotate the image for detection. Might improve detection."""
+    """Threshold for text detection"""
     det_rotate: bool = False
-    """Rotate the image for detection to prefer vertical textlines. Might improve detection."""
+    """Rotate the image for detection. Might improve detection."""
     det_auto_rotate: bool = False
-    """Invert the image colors for detection. Might improve detection."""
+    """Rotate the image for detection to prefer vertical textlines. Might improve detection."""
     det_invert: bool = False
-    """Applies gamma correction for detection. Might improve detection."""
+    """Invert the image colors for detection. Might improve detection."""
     det_gamma_correct: bool = False
-    """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles"""
+    """Applies gamma correction for detection. Might improve detection."""
     ignore_bubble: int = 0
+    """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles"""
 
 class InpainterConfig(BaseModel):
-    """"""
-    """Inpainting model to use"""
     inpainter: str = 'lama_large' #todo: validate INPAINTERS  #todo: convert to enum
-    """Size of image used for inpainting (too large will result in OOM)"""
+    """Inpainting model to use"""
     inpainting_size: int = 2048
+    """Size of image used for inpainting (too large will result in OOM)"""
+    inpainting_precision: InpaintPrecision = InpaintPrecision.fp32
     """Inpainting precision for lama, use bf16 while you can."""
-    inpainting_precision: str = 'fp32' #todo: validate ['fp32', 'fp16', 'bf16'] #todo: convert to enum
+
 
 class ColorizerConfig(BaseModel):
-    """"""
-    """Size of image used for colorization. Set to -1 to use full image size"""
     colorization_size: int = 576
-    """Used by colorizer and affects color strength, range from 0 to 255 (default 30). -1 turns it off."""
+    """Size of image used for colorization. Set to -1 to use full image size"""
     denoise_sigma: int = 30
-    """Colorization model to use."""
+    """Used by colorizer and affects color strength, range from 0 to 255 (default 30). -1 turns it off."""
     colorizer: Optional[str] = None  # todo: validate COLORIZERS  #todo: convert to enum
+    """Colorization model to use."""
+
 
 
 class OcrConfig(BaseModel):
-    """"""
-    """Use bbox merge when Manga OCR inference."""
     use_mocr_merge: bool = False
-    """Optical character recognition (OCR) model to use"""
+    """Use bbox merge when Manga OCR inference."""
     ocr: str = '48px' #todo: validate OCRS #todo: convert to enum
-    """Minimum text length of a text region"""
+    """Optical character recognition (OCR) model to use"""
     min_text_length: int = 0
+    """Minimum text length of a text region"""
 
 class Config(BaseModel):
     # unclear
@@ -110,28 +128,28 @@ class Config(BaseModel):
     post_dict: Optional[str] = None
 
     # json
-    """Filter regions by their text with a regex. Example usage: '.*badtext.*'"""
     filter_text: Optional[str] = None
-    """render configs"""
+    """Filter regions by their text with a regex. Example usage: '.*badtext.*'"""
     render: RenderConfig
-    """upscaler configs"""
+    """render configs"""
     upscale: UpscaleConfig
-    """tanslator configs"""
+    """upscaler configs"""
     translator: TranslatorConfig
-    """detector configs"""
+    """tanslator configs"""
     detector: DetectorConfig
-    """colorizer configs"""
+    """detector configs"""
     colorizer: ColorizerConfig
-    """inpainter configs"""
+    """colorizer configs"""
     inpainter: InpainterConfig
-    """Ocr configs"""
+    """inpainter configs"""
     ocr: OcrConfig
+    """Ocr configs"""
     # ?
-    """How much to extend text skeleton to form bounding box"""
     unclip_ratio: float = 2.3
-    """Set the convolution kernel size of the text erasure area to completely clean up text residues"""
+    """How much to extend text skeleton to form bounding box"""
     kernel_size: int = 3
-    """By how much to extend the text mask to remove left-over text pixels of the original image."""
+    """Set the convolution kernel size of the text erasure area to completely clean up text residues"""
     mask_dilation_offset: int = 0
+    """By how much to extend the text mask to remove left-over text pixels of the original image."""
+    box_threshold: float = 0.7
     """Threshold for bbox generation"""
-    box_threshold: float = 0.7
\ No newline at end of file

From c5feafea5ead4b2426e801b87a70909351b0d650 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sat, 16 Nov 2024 23:50:18 +0100
Subject: [PATCH 03/75] update to enum

---
 manga_translator/colorization/__init__.py |  3 +-
 manga_translator/config.py                | 66 ++++++++++++++++++++---
 manga_translator/detection/__init__.py    | 11 ++--
 manga_translator/inpainting/__init__.py   | 13 ++---
 manga_translator/ocr/__init__.py          |  9 ++--
 manga_translator/translators/__init__.py  | 49 ++++++++---------
 manga_translator/upscaling/__init__.py    |  7 +--
 7 files changed, 109 insertions(+), 49 deletions(-)

diff --git a/manga_translator/colorization/__init__.py b/manga_translator/colorization/__init__.py
index aef0dcd0e..d7423286c 100644
--- a/manga_translator/colorization/__init__.py
+++ b/manga_translator/colorization/__init__.py
@@ -2,9 +2,10 @@
 
 from .common import CommonColorizer, OfflineColorizer
 from .manga_colorization_v2 import MangaColorizationV2
+from ..config import Colorizer
 
 COLORIZERS = {
-    'mc2': MangaColorizationV2,
+    Colorizer.mc2: MangaColorizationV2,
 }
 colorizer_cache = {}
 
diff --git a/manga_translator/config.py b/manga_translator/config.py
index 5340b2be9..30c44193c 100644
--- a/manga_translator/config.py
+++ b/manga_translator/config.py
@@ -23,6 +23,60 @@ class InpaintPrecision(IntEnum):
     fp16 = 1
     bf16 = 2
 
+class Detector(IntEnum):
+    default = 0
+    dbconvnext = 1
+    ctd = 2
+    craft = 3
+    none = 4
+
+class Inpainter(IntEnum):
+    default = 0
+    lama_large = 1
+    lama_mpe = 2
+    sd = 3
+    none = 4
+    original = 5
+
+class Colorizer(IntEnum):
+    none = 0
+    mc2 = 1
+
+class Ocr(IntEnum):
+    ocr32px = 0
+    ocr48px = 1
+    ocr48px_ctc = 2
+    mocr = 3
+class Translator(IntEnum):
+    youdao = 0
+    baidu = 1
+    deepl = 2
+    papago = 3
+    caiyun = 4
+    gpt3 = 5
+    gpt3_5 = 6
+    gpt4 = 7
+    none = 8
+    original = 9
+    sakura = 10
+    deepseek = 11
+    groq = 12
+    offline = 13
+    nllb = 14
+    nllb_big = 15
+    sugoi = 16
+    jparacrawl = 17
+    jparacrawl_big = 18
+    m2m100 = 19
+    m2m100_big = 20
+    mbart50 = 21
+    qwen2 = 22
+    qwen2_big = 23
+class Upscaler:
+    waifu2x = 0
+    esrgan = 1
+    upscler4xultrasharp = 2
+
 class RenderConfig(BaseModel):
     renderer: Renderer = Renderer.default
     """Render english text translated from manga with some additional typesetting. Ignores some other argument options"""
@@ -53,7 +107,7 @@ class RenderConfig(BaseModel):
 
 
 class UpscaleConfig(BaseModel):
-    upscaler: str = 'esrgan' #todo: validate UPSCALERS #todo: convert to enum
+    upscaler: Upscaler = Upscaler.esrgan
     """Upscaler to use. --upscale-ratio has to be set for it to take effect"""
     revert_upscaling: bool = False
     """Downscales the previously upscaled image after translation back to original size (Use with --upscale-ratio)."""
@@ -61,7 +115,7 @@ class UpscaleConfig(BaseModel):
     """Image upscale ratio applied before detection. Can improve text detection."""
 
 class TranslatorConfig(BaseModel):
-    translator: str = "google" #todo: validate TRANSLATORS todo: convert to enum
+    translator: Translator = Translator.sugoi
     """Language translator to use"""
     target_lang: str = 'ENG' #todo: validate VALID_LANGUAGES #todo: convert to enum
     """Destination language"""
@@ -78,7 +132,7 @@ class TranslatorConfig(BaseModel):
 
 class DetectorConfig(BaseModel):
     """"""
-    detector: str = 'default' #todo: validate DETECTORS #todo: convert to enum
+    detector: Detector =Detector.default
     """"Text detector used for creating a text mask from an image, DO NOT use craft for manga, it\'s not designed for it"""
     detection_size: int = 1536
     """Size of image used for detection"""
@@ -96,7 +150,7 @@ class DetectorConfig(BaseModel):
     """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles"""
 
 class InpainterConfig(BaseModel):
-    inpainter: str = 'lama_large' #todo: validate INPAINTERS  #todo: convert to enum
+    inpainter: Inpainter = Inpainter.lama_large
     """Inpainting model to use"""
     inpainting_size: int = 2048
     """Size of image used for inpainting (too large will result in OOM)"""
@@ -109,7 +163,7 @@ class ColorizerConfig(BaseModel):
     """Size of image used for colorization. Set to -1 to use full image size"""
     denoise_sigma: int = 30
     """Used by colorizer and affects color strength, range from 0 to 255 (default 30). -1 turns it off."""
-    colorizer: Optional[str] = None  # todo: validate COLORIZERS  #todo: convert to enum
+    colorizer: Colorizer = Colorizer.none
     """Colorization model to use."""
 
 
@@ -117,7 +171,7 @@ class ColorizerConfig(BaseModel):
 class OcrConfig(BaseModel):
     use_mocr_merge: bool = False
     """Use bbox merge when Manga OCR inference."""
-    ocr: str = '48px' #todo: validate OCRS #todo: convert to enum
+    ocr: Ocr = Ocr.ocr48px
     """Optical character recognition (OCR) model to use"""
     min_text_length: int = 0
     """Minimum text length of a text region"""
diff --git a/manga_translator/detection/__init__.py b/manga_translator/detection/__init__.py
index 96e930f54..2f8bd24e4 100644
--- a/manga_translator/detection/__init__.py
+++ b/manga_translator/detection/__init__.py
@@ -6,13 +6,14 @@
 from .craft import CRAFTDetector
 from .none import NoneDetector
 from .common import CommonDetector, OfflineDetector
+from ..config import Detector
 
 DETECTORS = {
-    'default': DefaultDetector,
-    'dbconvnext': DBConvNextDetector,
-    'ctd': ComicTextDetector,
-    'craft': CRAFTDetector,
-    'none': NoneDetector,
+    Detector.default: DefaultDetector,
+    Detector.dbconvnext: DBConvNextDetector,
+    Detector.ctd: ComicTextDetector,
+    Detector.craft: CRAFTDetector,
+    Detector.none: NoneDetector,
 }
 detector_cache = {}
 
diff --git a/manga_translator/inpainting/__init__.py b/manga_translator/inpainting/__init__.py
index 5b7a9b161..16f242126 100644
--- a/manga_translator/inpainting/__init__.py
+++ b/manga_translator/inpainting/__init__.py
@@ -6,14 +6,15 @@
 from .inpainting_sd import StableDiffusionInpainter
 from .none import NoneInpainter
 from .original import OriginalInpainter
+from ..config import Inpainter
 
 INPAINTERS = {
-    'default': AotInpainter,
-    'lama_large': LamaLargeInpainter,
-    'lama_mpe': LamaMPEInpainter,
-    'sd': StableDiffusionInpainter,
-    'none': NoneInpainter,
-    'original': OriginalInpainter,
+    Inpainter.default: AotInpainter,
+    Inpainter.lama_large: LamaLargeInpainter,
+    Inpainter.lama_mpe: LamaMPEInpainter,
+    Inpainter.sd: StableDiffusionInpainter,
+    Inpainter.none: NoneInpainter,
+    Inpainter.original: OriginalInpainter,
 }
 inpainter_cache = {}
 
diff --git a/manga_translator/ocr/__init__.py b/manga_translator/ocr/__init__.py
index 15d7a0187..75390d764 100644
--- a/manga_translator/ocr/__init__.py
+++ b/manga_translator/ocr/__init__.py
@@ -6,13 +6,14 @@
 from .model_48px import Model48pxOCR
 from .model_48px_ctc import Model48pxCTCOCR
 from .model_manga_ocr import ModelMangaOCR
+from ..config import Ocr
 from ..utils import Quadrilateral
 
 OCRS = {
-    '32px': Model32pxOCR,
-    '48px': Model48pxOCR,
-    '48px_ctc': Model48pxCTCOCR,
-    'mocr': ModelMangaOCR,
+    Ocr.ocr32px: Model32pxOCR,
+    Ocr.ocr48px: Model48pxOCR,
+    Ocr.ocr48px_ctc: Model48pxCTCOCR,
+    Ocr.mocr: ModelMangaOCR,
 }
 ocr_cache = {}
 
diff --git a/manga_translator/translators/__init__.py b/manga_translator/translators/__init__.py
index 2ff45ea9c..30bff773e 100644
--- a/manga_translator/translators/__init__.py
+++ b/manga_translator/translators/__init__.py
@@ -19,36 +19,37 @@
 from .sakura import SakuraTranslator
 from .qwen2 import Qwen2Translator, Qwen2BigTranslator
 from .groq import GroqTranslator
+from ..config import Translator
 
 OFFLINE_TRANSLATORS = {
-    'offline': SelectiveOfflineTranslator,
-    'nllb': NLLBTranslator,
-    'nllb_big': NLLBBigTranslator,
-    'sugoi': SugoiTranslator,
-    'jparacrawl': JparacrawlTranslator,
-    'jparacrawl_big': JparacrawlBigTranslator,
-    'm2m100': M2M100Translator,
-    'm2m100_big': M2M100BigTranslator,
-    'mbart50': MBart50Translator,
-    'qwen2': Qwen2Translator,
-    'qwen2_big': Qwen2BigTranslator,
+    Translator.offline: SelectiveOfflineTranslator,
+    Translator.nllb: NLLBTranslator,
+    Translator.nllb_big: NLLBBigTranslator,
+    Translator.sugoi: SugoiTranslator,
+    Translator.jparacrawl: JparacrawlTranslator,
+    Translator.jparacrawl_big: JparacrawlBigTranslator,
+    Translator.m2m100: M2M100Translator,
+    Translator.m2m100_big: M2M100BigTranslator,
+    Translator.mbart50: MBart50Translator,
+    Translator.qwen2: Qwen2Translator,
+    Translator.qwen2_big: Qwen2BigTranslator,
 }
 
 TRANSLATORS = {
     # 'google': GoogleTranslator,
-    'youdao': YoudaoTranslator,
-    'baidu': BaiduTranslator,
-    'deepl': DeeplTranslator,
-    'papago': PapagoTranslator,
-    'caiyun': CaiyunTranslator,
-    'gpt3': GPT3Translator,
-    'gpt3.5': GPT35TurboTranslator,
-    'gpt4': GPT4Translator,
-    'none': NoneTranslator,
-    'original': OriginalTranslator,
-    'sakura': SakuraTranslator,
-    'deepseek': DeepseekTranslator,
-    'groq':GroqTranslator,
+    Translator.youdao: YoudaoTranslator,
+    Translator.baidu: BaiduTranslator,
+    Translator.deepl: DeeplTranslator,
+    Translator.papago: PapagoTranslator,
+    Translator.caiyun: CaiyunTranslator,
+    Translator.gpt3: GPT3Translator,
+    Translator.gpt3_5: GPT35TurboTranslator,
+    Translator.gpt4: GPT4Translator,
+    Translator.none: NoneTranslator,
+    Translator.original: OriginalTranslator,
+    Translator.sakura: SakuraTranslator,
+    Translator.deepseek: DeepseekTranslator,
+    Translator.groq:GroqTranslator,
     **OFFLINE_TRANSLATORS,
 }
 translator_cache = {}
diff --git a/manga_translator/upscaling/__init__.py b/manga_translator/upscaling/__init__.py
index b5a972dd6..768527890 100644
--- a/manga_translator/upscaling/__init__.py
+++ b/manga_translator/upscaling/__init__.py
@@ -5,11 +5,12 @@
 from .waifu2x import Waifu2xUpscaler
 from .esrgan import ESRGANUpscaler
 from .esrgan_pytorch import ESRGANUpscalerPytorch
+from ..config import Upscaler
 
 UPSCALERS = {
-    'waifu2x': Waifu2xUpscaler,
-    'esrgan': ESRGANUpscaler,
-    '4xultrasharp': ESRGANUpscalerPytorch,
+    Upscaler.waifu2x: Waifu2xUpscaler,
+    Upscaler.esrgan: ESRGANUpscaler,
+    Upscaler.upscler4xultrasharp: ESRGANUpscalerPytorch,
 }
 upscaler_cache = {}
 

From 2ccd8023aa5b96b5ad8d3ae655f2dae497417c39 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sun, 17 Nov 2024 00:40:57 +0100
Subject: [PATCH 04/75] split manga_translator.py into multiple files

---
 manga_translator/__main__.py         |   6 +-
 manga_translator/manga_translator.py | 891 +--------------------------
 manga_translator/mode/api.py         | 290 +++++++++
 manga_translator/mode/local.py       | 201 ++++++
 manga_translator/mode/web.py         | 151 +++++
 manga_translator/mode/ws.py          | 264 ++++++++
 6 files changed, 916 insertions(+), 887 deletions(-)
 create mode 100644 manga_translator/mode/api.py
 create mode 100644 manga_translator/mode/local.py
 create mode 100644 manga_translator/mode/web.py
 create mode 100644 manga_translator/mode/ws.py

diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py
index c4f580031..c0e01e508 100644
--- a/manga_translator/__main__.py
+++ b/manga_translator/__main__.py
@@ -5,9 +5,6 @@
 
 from .manga_translator import (
     MangaTranslator,
-    MangaTranslatorWeb,
-    MangaTranslatorWS,
-    MangaTranslatorAPI,
     set_main_logger,
 )
 from .args import parser
@@ -71,14 +68,17 @@ async def dispatch(args: Namespace):
         await dispatch(args.host, args.port, translation_params=args_dict)
 
     elif args.mode == 'web_client':
+        from manga_translator.mode.web import MangaTranslatorWeb
         translator = MangaTranslatorWeb(args_dict)
         await translator.listen(args_dict)
 
     elif args.mode == 'ws':
+        from manga_translator.mode.ws import MangaTranslatorWS
         translator = MangaTranslatorWS(args_dict)
         await translator.listen(args_dict)
 
     elif args.mode == 'api':
+        from manga_translator.mode.api import MangaTranslatorAPI
         translator = MangaTranslatorAPI(args_dict)
         await translator.listen(args_dict)
 
diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py
index 85dbe430b..cb17850e2 100644
--- a/manga_translator/manga_translator.py
+++ b/manga_translator/manga_translator.py
@@ -1,65 +1,44 @@
-import asyncio
-import base64
-import io
-
 import cv2
-from aiohttp.web_middlewares import middleware
 from omegaconf import OmegaConf
 import langcodes
 import langdetect
-import requests
 import os
 import re
 import torch
-import time
 import logging
 import numpy as np
 from PIL import Image
-from typing import List, Tuple, Union
-from aiohttp import web
-from marshmallow import Schema, fields, ValidationError
+from typing import Union
 
-from manga_translator.utils.threading import Throttler
 
-from .args import DEFAULT_ARGS, translator_chain
+from .args import DEFAULT_ARGS
 from .utils import (
     BASE_PATH,
     LANGUAGE_ORIENTATION_PRESETS,
     ModelWrapper,
     Context,
-    PriorityLock,
     load_image,
     dump_image,
-    replace_prefix,
     visualize_textblocks,
-    add_file_logger,
-    remove_file_logger,
     is_valuable_text,
-    rgb2hex,
     hex2rgb,
-    get_color_name,
-    natural_sort,
     sort_regions,
 )
 
-from .detection import DETECTORS, dispatch as dispatch_detection, prepare as prepare_detection
+from .detection import dispatch as dispatch_detection, prepare as prepare_detection
 from .upscaling import dispatch as dispatch_upscaling, prepare as prepare_upscaling, UPSCALERS
-from .ocr import OCRS, dispatch as dispatch_ocr, prepare as prepare_ocr
+from .ocr import dispatch as dispatch_ocr, prepare as prepare_ocr
 from .textline_merge import dispatch as dispatch_textline_merge
 from .mask_refinement import dispatch as dispatch_mask_refinement
-from .inpainting import INPAINTERS, dispatch as dispatch_inpainting, prepare as prepare_inpainting
+from .inpainting import dispatch as dispatch_inpainting, prepare as prepare_inpainting
 from .translators import (
-    TRANSLATORS,
-    VALID_LANGUAGES,
     LANGDETECT_MAP,
-    LanguageUnsupportedException,
     TranslatorChain,
     dispatch as dispatch_translation,
     prepare as prepare_translation,
 )
 from .colorization import dispatch as dispatch_colorization, prepare as prepare_colorization
 from .rendering import dispatch as dispatch_rendering, dispatch_eng_render
-from .save import save_result
 
 # Will be overwritten by __main__.py if module is being run directly (with python -m)
 logger = logging.getLogger('manga_translator')
@@ -78,7 +57,7 @@ class TranslationInterrupt(Exception):
     pass
 
 
-class MangaTranslator():
+class MangaTranslator:
 
     def __init__(self, params: dict = None):
         self._progress_hooks = []
@@ -117,158 +96,6 @@ def parse_init_params(self, params: dict):
     def using_gpu(self):
         return self.device.startswith('cuda') or self.device == 'mps'
 
-    async def translate_path(self, path: str, dest: str = None, params: dict[str, Union[int, str]] = None):
-        """
-        Translates an image or folder (recursively) specified through the path.
-        """
-        if not os.path.exists(path):
-            raise FileNotFoundError(path)
-        path = os.path.abspath(os.path.expanduser(path))
-        dest = os.path.abspath(os.path.expanduser(dest)) if dest else ''
-        params = params or {}
-
-        # Handle format
-        file_ext = params.get('format')
-        if params.get('save_quality', 100) < 100:
-            if not params.get('format'):
-                file_ext = 'jpg'
-            elif params.get('format') != 'jpg':
-                raise ValueError('--save-quality of lower than 100 is only supported for .jpg files')
-
-        if os.path.isfile(path):
-            # Determine destination file path
-            if not dest:
-                # Use the same folder as the source
-                p, ext = os.path.splitext(path)
-                _dest = f'{p}-translated.{file_ext or ext[1:]}'
-            elif not os.path.basename(dest):
-                p, ext = os.path.splitext(os.path.basename(path))
-                # If the folders differ use the original filename from the source
-                if os.path.dirname(path) != dest:
-                    _dest = os.path.join(dest, f'{p}.{file_ext or ext[1:]}')
-                else:
-                    _dest = os.path.join(dest, f'{p}-translated.{file_ext or ext[1:]}')
-            else:
-                p, ext = os.path.splitext(dest)
-                _dest = f'{p}.{file_ext or ext[1:]}'
-            await self.translate_file(path, _dest, params)
-
-        elif os.path.isdir(path):
-            # Determine destination folder path
-            if path[-1] == '\\' or path[-1] == '/':
-                path = path[:-1]
-            _dest = dest or path + '-translated'
-            if os.path.exists(_dest) and not os.path.isdir(_dest):
-                raise FileExistsError(_dest)
-
-            translated_count = 0
-            for root, subdirs, files in os.walk(path):
-                files = natural_sort(files)
-                dest_root = replace_prefix(root, path, _dest)
-                os.makedirs(dest_root, exist_ok=True)
-                for f in files:
-                    if f.lower() == '.thumb':
-                        continue
-
-                    file_path = os.path.join(root, f)
-                    output_dest = replace_prefix(file_path, path, _dest)
-                    p, ext = os.path.splitext(output_dest)
-                    output_dest = f'{p}.{file_ext or ext[1:]}'
-
-                    if await self.translate_file(file_path, output_dest, params):
-                        translated_count += 1
-            if translated_count == 0:
-                logger.info('No further untranslated files found. Use --overwrite to write over existing translations.')
-            else:
-                logger.info(f'Done. Translated {translated_count} image{"" if translated_count == 1 else "s"}')
-
-    async def translate_file(self, path: str, dest: str, params: dict):
-        if not params.get('overwrite') and os.path.exists(dest):
-            logger.info(
-                f'Skipping as already translated: "{dest}". Use --overwrite to overwrite existing translations.')
-            await self._report_progress('saved', True)
-            return True
-
-        logger.info(f'Translating: "{path}"')
-
-        # Turn dict to context to make values also accessible through params.<property>
-        params = params or {}
-        ctx = Context(**params)
-        self._preprocess_params(ctx)
-
-        attempts = 0
-        while ctx.attempts == -1 or attempts < ctx.attempts + 1:
-            if attempts > 0:
-                logger.info(f'Retrying translation! Attempt {attempts}'
-                            + (f' of {ctx.attempts}' if ctx.attempts != -1 else ''))
-            try:
-                return await self._translate_file(path, dest, ctx)
-
-            except TranslationInterrupt:
-                break
-            except Exception as e:
-                if isinstance(e, LanguageUnsupportedException):
-                    await self._report_progress('error-lang', True)
-                else:
-                    await self._report_progress('error', True)
-                if not self.ignore_errors and not (ctx.attempts == -1 or attempts < ctx.attempts):
-                    raise
-                else:
-                    logger.error(f'{e.__class__.__name__}: {e}',
-                                 exc_info=e if self.verbose else None)
-            attempts += 1
-        return False
-
-    async def _translate_file(self, path: str, dest: str, ctx: Context) -> bool:
-        if path.endswith('.txt'):
-            with open(path, 'r') as f:
-                queries = f.read().split('\n')
-            translated_sentences = \
-                await dispatch_translation(ctx.translator, queries, ctx.use_mtpe, ctx,
-                                           'cpu' if self._gpu_limited_memory else self.device)
-            p, ext = os.path.splitext(dest)
-            if ext != '.txt':
-                dest = p + '.txt'
-            logger.info(f'Saving "{dest}"')
-            with open(dest, 'w') as f:
-                f.write('\n'.join(translated_sentences))
-            return True
-
-        # TODO: Add .gif handler
-
-        else:  # Treat as image
-            try:
-                img = Image.open(path)
-                img.verify()
-                img = Image.open(path)
-            except Exception:
-                logger.warn(f'Failed to open image: {path}')
-                return False
-
-            ctx = await self.translate(img, ctx)
-            result = ctx.result
-
-            # Save result
-            if ctx.skip_no_text and not ctx.text_regions:
-                logger.debug('Not saving due to --skip-no-text')
-                return True
-            if result:
-                logger.info(f'Saving "{dest}"')
-                save_result(result, dest, ctx)
-                await self._report_progress('saved', True)
-
-                if ctx.save_text or ctx.save_text_file or ctx.prep_manual:
-                    if ctx.prep_manual:
-                        # Save original image next to translated
-                        p, ext = os.path.splitext(dest)
-                        img_filename = p + '-orig' + ext
-                        img_path = os.path.join(os.path.dirname(dest), img_filename)
-                        img.save(img_path, quality=ctx.save_quality)
-                    if ctx.text_regions:
-                        self._save_text_to_file(path, ctx)
-                return True
-        return False
-
     async def translate(self, image: Image.Image, params: Union[dict, Context] = None) -> Context:
         """
         Translates a PIL image from a manga. Returns dict with result and intermediates of translation.
@@ -771,708 +598,4 @@ async def ph(state, finished):
             elif state in LOG_MESSAGES_ERROR:
                 logger.error(LOG_MESSAGES_ERROR[state])
 
-        self.add_progress_hook(ph)
-
-    def _save_text_to_file(self, image_path: str, ctx: Context):
-        cached_colors = []
-
-        def identify_colors(fg_rgb: List[int]):
-            idx = 0
-            for rgb, _ in cached_colors:
-                # If similar color already saved
-                if abs(rgb[0] - fg_rgb[0]) + abs(rgb[1] - fg_rgb[1]) + abs(rgb[2] - fg_rgb[2]) < 50:
-                    break
-                else:
-                    idx += 1
-            else:
-                cached_colors.append((fg_rgb, get_color_name(fg_rgb)))
-            return idx + 1, cached_colors[idx][1]
-
-        s = f'\n[{image_path}]\n'
-        for i, region in enumerate(ctx.text_regions):
-            fore, back = region.get_font_colors()
-            color_id, color_name = identify_colors(fore)
-
-            s += f'\n-- {i + 1} --\n'
-            s += f'color: #{color_id}: {color_name} (fg, bg: {rgb2hex(*fore)} {rgb2hex(*back)})\n'
-            s += f'text:  {region.text}\n'
-            s += f'trans: {region.translation}\n'
-            for line in region.lines:
-                s += f'coords: {list(line.ravel())}\n'
-        s += '\n'
-
-        text_output_file = ctx.text_output_file
-        if not text_output_file:
-            text_output_file = os.path.splitext(image_path)[0] + '_translations.txt'
-
-        with open(text_output_file, 'a', encoding='utf-8') as f:
-            f.write(s)
-
-
-class MangaTranslatorWeb(MangaTranslator):
-    """
-    Translator client that executes tasks on behalf of the webserver in web_main.py.
-    """
-
-    def __init__(self, params: dict = None):
-        super().__init__(params)
-        self.host = params.get('host', '127.0.0.1')
-        if self.host == '0.0.0.0':
-            self.host = '127.0.0.1'
-        self.port = params.get('port', 5003)
-        self.nonce = params.get('nonce', '')
-        self.ignore_errors = params.get('ignore_errors', True)
-        self._task_id = None
-        self._params = None
-
-    async def _init_connection(self):
-        available_translators = []
-        from .translators import MissingAPIKeyException, get_translator
-        for key in TRANSLATORS:
-            try:
-                get_translator(key)
-                available_translators.append(key)
-            except MissingAPIKeyException:
-                pass
-
-        data = {
-            'nonce': self.nonce,
-            'capabilities': {
-                'translators': available_translators,
-            },
-        }
-        requests.post(f'http://{self.host}:{self.port}/connect-internal', json=data)
-
-    async def _send_state(self, state: str, finished: bool):
-        # wait for translation to be saved first (bad solution?)
-        finished = finished and not state == 'finished'
-        while True:
-            try:
-                data = {
-                    'task_id': self._task_id,
-                    'nonce': self.nonce,
-                    'state': state,
-                    'finished': finished,
-                }
-                requests.post(f'http://{self.host}:{self.port}/task-update-internal', json=data, timeout=20)
-                break
-            except Exception:
-                # if translation is finished server has to know
-                if finished:
-                    continue
-                else:
-                    break
-
-    def _get_task(self):
-        try:
-            rjson = requests.get(f'http://{self.host}:{self.port}/task-internal?nonce={self.nonce}',
-                                 timeout=3600).json()
-            return rjson.get('task_id'), rjson.get('data')
-        except Exception:
-            return None, None
-
-    async def listen(self, translation_params: dict = None):
-        """
-        Listens for translation tasks from web server.
-        """
-        logger.info('Waiting for translation tasks')
-
-        await self._init_connection()
-        self.add_progress_hook(self._send_state)
-
-        while True:
-            self._task_id, self._params = self._get_task()
-            if self._params and 'exit' in self._params:
-                break
-            if not (self._task_id and self._params):
-                await asyncio.sleep(0.1)
-                continue
-
-            self.result_sub_folder = self._task_id
-            logger.info(f'Processing task {self._task_id}')
-            if translation_params is not None:
-                # Combine default params with params chosen by webserver
-                for p, default_value in translation_params.items():
-                    current_value = self._params.get(p)
-                    self._params[p] = current_value if current_value is not None else default_value
-            if self.verbose:
-                # Write log file
-                log_file = self._result_path('log.txt')
-                add_file_logger(log_file)
-
-            # final.png will be renamed if format param is set
-            await self.translate_path(self._result_path('input.png'), self._result_path('final.png'),
-                                      params=self._params)
-            print()
-
-            if self.verbose:
-                remove_file_logger(log_file)
-            self._task_id = None
-            self._params = None
-            self.result_sub_folder = ''
-
-    async def _run_text_translation(self, ctx: Context):
-        # Run machine translation as reference for manual translation (if `--translator=none` is not set)
-        text_regions = await super()._run_text_translation(ctx)
-
-        if ctx.get('manual', False):
-            logger.info('Waiting for user input from manual translation')
-            requests.post(f'http://{self.host}:{self.port}/request-manual-internal', json={
-                'task_id': self._task_id,
-                'nonce': self.nonce,
-                'texts': [r.text for r in text_regions],
-                'translations': [r.translation for r in text_regions],
-            }, timeout=20)
-
-            # wait for at most 1 hour for manual translation
-            wait_until = time.time() + 3600
-            while time.time() < wait_until:
-                ret = requests.post(f'http://{self.host}:{self.port}/get-manual-result-internal', json={
-                    'task_id': self._task_id,
-                    'nonce': self.nonce
-                }, timeout=20).json()
-                if 'result' in ret:
-                    manual_translations = ret['result']
-                    if isinstance(manual_translations, str):
-                        if manual_translations == 'error':
-                            return []
-                    i = 0
-                    for translation in manual_translations:
-                        if not translation.strip():
-                            text_regions.pop(i)
-                            i = i - 1
-                        else:
-                            text_regions[i].translation = translation
-                            text_regions[i].target_lang = ctx.translator.langs[-1]
-                        i = i + 1
-                    break
-                elif 'cancel' in ret:
-                    return 'cancel'
-                await asyncio.sleep(0.1)
-        return text_regions
-
-
-class MangaTranslatorWS(MangaTranslator):
-    def __init__(self, params: dict = None):
-        super().__init__(params)
-        self.url = params.get('ws_url')
-        self.secret = params.get('ws_secret', os.getenv('WS_SECRET', ''))
-        self.ignore_errors = params.get('ignore_errors', True)
-
-        self._task_id = None
-        self._websocket = None
-
-    async def listen(self, translation_params: dict = None):
-        from threading import Thread
-        import io
-        import aioshutil
-        from aiofiles import os
-        import websockets
-        from .server import ws_pb2
-
-        self._server_loop = asyncio.new_event_loop()
-        self.task_lock = PriorityLock()
-        self.counter = 0
-
-        async def _send_and_yield(websocket, msg):
-            # send message and yield control to the event loop (to actually send the message)
-            await websocket.send(msg)
-            await asyncio.sleep(0)
-
-        send_throttler = Throttler(0.2)
-        send_and_yield = send_throttler.wrap(_send_and_yield)
-
-        async def sync_state(state, finished):
-            if self._websocket is None:
-                return
-            msg = ws_pb2.WebSocketMessage()
-            msg.status.id = self._task_id
-            msg.status.status = state
-            self._server_loop.call_soon_threadsafe(
-                asyncio.create_task,
-                send_and_yield(self._websocket, msg.SerializeToString())
-            )
-
-        self.add_progress_hook(sync_state)
-
-        async def translate(task_id, websocket, image, params):
-            async with self.task_lock((1 << 31) - params['ws_count']):
-                self._task_id = task_id
-                self._websocket = websocket
-                result = await self.translate(image, params)
-                self._task_id = None
-                self._websocket = None
-            return result
-
-        async def server_send_status(websocket, task_id, status):
-            msg = ws_pb2.WebSocketMessage()
-            msg.status.id = task_id
-            msg.status.status = status
-            await websocket.send(msg.SerializeToString())
-            await asyncio.sleep(0)
-
-        async def server_process_inner(main_loop, logger_task, session, websocket, task) -> Tuple[bool, bool]:
-            logger_task.info(f'-- Processing task {task.id}')
-            await server_send_status(websocket, task.id, 'pending')
-
-            if self.verbose:
-                await aioshutil.rmtree(f'result/{task.id}', ignore_errors=True)
-                await os.makedirs(f'result/{task.id}', exist_ok=True)
-
-            params = {
-                'target_lang': task.target_language,
-                'skip_lang': task.skip_language,
-                'detector': task.detector,
-                'direction': task.direction,
-                'translator': task.translator,
-                'size': task.size,
-                'ws_event_loop': asyncio.get_event_loop(),
-                'ws_count': self.counter,
-            }
-            self.counter += 1
-
-            logger_task.info(f'-- Downloading image from {task.source_image}')
-            await server_send_status(websocket, task.id, 'downloading')
-            async with session.get(task.source_image) as resp:
-                if resp.status == 200:
-                    source_image = await resp.read()
-                else:
-                    msg = ws_pb2.WebSocketMessage()
-                    msg.status.id = task.id
-                    msg.status.status = 'error-download'
-                    await websocket.send(msg.SerializeToString())
-                    await asyncio.sleep(0)
-                    return False, False
-
-            logger_task.info(f'-- Translating image')
-            if translation_params:
-                for p, default_value in translation_params.items():
-                    current_value = params.get(p)
-                    params[p] = current_value if current_value is not None else default_value
-
-            image = Image.open(io.BytesIO(source_image))
-
-            (ori_w, ori_h) = image.size
-            if max(ori_h, ori_w) > 1200:
-                params['upscale_ratio'] = 1
-
-            await server_send_status(websocket, task.id, 'preparing')
-            # translation_dict = await self.translate(image, params)
-            translation_dict = await asyncio.wrap_future(
-                asyncio.run_coroutine_threadsafe(
-                    translate(task.id, websocket, image, params),
-                    main_loop
-                )
-            )
-            await send_throttler.flush()
-
-            output: Image.Image = translation_dict.result
-            if output is not None:
-                await server_send_status(websocket, task.id, 'saving')
-
-                output = output.resize((ori_w, ori_h), resample=Image.LANCZOS)
-
-                img = io.BytesIO()
-                output.save(img, format='PNG')
-                if self.verbose:
-                    output.save(self._result_path('ws_final.png'))
-
-                img_bytes = img.getvalue()
-                logger_task.info(f'-- Uploading result to {task.translation_mask}')
-                await server_send_status(websocket, task.id, 'uploading')
-                async with session.put(task.translation_mask, data=img_bytes) as resp:
-                    if resp.status != 200:
-                        logger_task.error(f'-- Failed to upload result:')
-                        logger_task.error(f'{resp.status}: {resp.reason}')
-                        msg = ws_pb2.WebSocketMessage()
-                        msg.status.id = task.id
-                        msg.status.status = 'error-upload'
-                        await websocket.send(msg.SerializeToString())
-                        await asyncio.sleep(0)
-                        return False, False
-
-            return True, output is not None
-
-        async def server_process(main_loop, session, websocket, task) -> bool:
-            logger_task = logger.getChild(f'{task.id}')
-            try:
-                (success, has_translation_mask) = await server_process_inner(main_loop, logger_task, session, websocket,
-                                                                             task)
-            except Exception as e:
-                logger_task.error(f'-- Task failed with exception:')
-                logger_task.error(f'{e.__class__.__name__}: {e}', exc_info=e if self.verbose else None)
-                (success, has_translation_mask) = False, False
-            finally:
-                result = ws_pb2.WebSocketMessage()
-                result.finish_task.id = task.id
-                result.finish_task.success = success
-                result.finish_task.has_translation_mask = has_translation_mask
-                await websocket.send(result.SerializeToString())
-                await asyncio.sleep(0)
-                logger_task.info(f'-- Task finished')
-
-        async def async_server_thread(main_loop):
-            from aiohttp import ClientSession, ClientTimeout
-            timeout = ClientTimeout(total=30)
-            async with ClientSession(timeout=timeout) as session:
-                logger_conn = logger.getChild('connection')
-                if self.verbose:
-                    logger_conn.setLevel(logging.DEBUG)
-                async for websocket in websockets.connect(
-                        self.url,
-                        extra_headers={
-                            'x-secret': self.secret,
-                        },
-                        max_size=1_000_000,
-                        logger=logger_conn
-                ):
-                    bg_tasks = set()
-                    try:
-                        logger.info('-- Connected to websocket server')
-
-                        async for raw in websocket:
-                            # logger.info(f'Got message: {raw}')
-                            msg = ws_pb2.WebSocketMessage()
-                            msg.ParseFromString(raw)
-                            if msg.WhichOneof('message') == 'new_task':
-                                task = msg.new_task
-                                bg_task = asyncio.create_task(server_process(main_loop, session, websocket, task))
-                                bg_tasks.add(bg_task)
-                                bg_task.add_done_callback(bg_tasks.discard)
-
-                    except Exception as e:
-                        logger.error(f'{e.__class__.__name__}: {e}', exc_info=e if self.verbose else None)
-
-                    finally:
-                        logger.info('-- Disconnected from websocket server')
-                        for bg_task in bg_tasks:
-                            bg_task.cancel()
-
-        def server_thread(future, main_loop, server_loop):
-            asyncio.set_event_loop(server_loop)
-            try:
-                server_loop.run_until_complete(async_server_thread(main_loop))
-            finally:
-                future.set_result(None)
-
-        future = asyncio.Future()
-        Thread(
-            target=server_thread,
-            args=(future, asyncio.get_running_loop(), self._server_loop),
-            daemon=True
-        ).start()
-
-        # create a future that is never done
-        await future
-
-    async def _run_text_translation(self, ctx: Context):
-        coroutine = super()._run_text_translation(ctx)
-        if ctx.translator.has_offline():
-            return await coroutine
-        else:
-            task_id = self._task_id
-            websocket = self._websocket
-            await self.task_lock.release()
-            result = await asyncio.wrap_future(
-                asyncio.run_coroutine_threadsafe(
-                    coroutine,
-                    ctx.ws_event_loop
-                )
-            )
-            await self.task_lock.acquire((1 << 30) - ctx.ws_count)
-            self._task_id = task_id
-            self._websocket = websocket
-            return result
-
-    async def _run_text_rendering(self, ctx: Context):
-        render_mask = (ctx.mask >= 127).astype(np.uint8)[:, :, None]
-
-        output = await super()._run_text_rendering(ctx)
-        render_mask[np.sum(ctx.img_rgb != output, axis=2) > 0] = 1
-        ctx.render_mask = render_mask
-        if self.verbose:
-            cv2.imwrite(self._result_path('ws_render_in.png'), cv2.cvtColor(ctx.img_rgb, cv2.COLOR_RGB2BGR))
-            cv2.imwrite(self._result_path('ws_render_out.png'), cv2.cvtColor(output, cv2.COLOR_RGB2BGR))
-            cv2.imwrite(self._result_path('ws_mask.png'), render_mask * 255)
-
-        # only keep sections in mask
-        if self.verbose:
-            cv2.imwrite(self._result_path('ws_inmask.png'), cv2.cvtColor(ctx.img_rgb, cv2.COLOR_RGB2BGRA) * render_mask)
-        output = cv2.cvtColor(output, cv2.COLOR_RGB2RGBA) * render_mask
-        if self.verbose:
-            cv2.imwrite(self._result_path('ws_output.png'), cv2.cvtColor(output, cv2.COLOR_RGBA2BGRA) * render_mask)
-
-        return output
-
-
-# Experimental. May be replaced by a refactored server/web_main.py in the future.
-class MangaTranslatorAPI(MangaTranslator):
-    def __init__(self, params: dict = None):
-        import nest_asyncio
-        nest_asyncio.apply()
-        super().__init__(params)
-        self.host = params.get('host', '127.0.0.1')
-        self.port = params.get('port', '5003')
-        self.log_web = params.get('log_web', False)
-        self.ignore_errors = params.get('ignore_errors', True)
-        self._task_id = None
-        self._params = None
-        self.params = params
-        self.queue = []
-
-    async def wait_queue(self, id: int):
-        while self.queue[0] != id:
-            await asyncio.sleep(0.05)
-
-    def remove_from_queue(self, id: int):
-        self.queue.remove(id)
-
-    def generate_id(self):
-        try:
-            x = max(self.queue)
-        except:
-            x = 0
-        return x + 1
-
-    def middleware_factory(self):
-        @middleware
-        async def sample_middleware(request, handler):
-            id = self.generate_id()
-            self.queue.append(id)
-            try:
-                await self.wait_queue(id)
-            except Exception as e:
-                print(e)
-            try:
-                # todo make cancellable
-                response = await handler(request)
-            except:
-                response = web.json_response({'error': "Internal Server Error", 'status': 500},
-                                             status=500)
-            # Handle cases where a user leaves the queue, request fails, or is completed
-            try:
-                self.remove_from_queue(id)
-            except Exception as e:
-                print(e)
-            return response
-
-        return sample_middleware
-
-    async def get_file(self, image, base64Images, url) -> Image:
-        if image is not None:
-            content = image.file.read()
-        elif base64Images is not None:
-            base64Images = base64Images
-            if base64Images.__contains__('base64,'):
-                base64Images = base64Images.split('base64,')[1]
-            content = base64.b64decode(base64Images)
-        elif url is not None:
-            from aiohttp import ClientSession
-            async with ClientSession() as session:
-                async with session.get(url) as resp:
-                    if resp.status == 200:
-                        content = await resp.read()
-                    else:
-                        return web.json_response({'status': 'error'})
-        else:
-            raise ValidationError("donest exist")
-        img = Image.open(io.BytesIO(content))
-
-        img.verify()
-        img = Image.open(io.BytesIO(content))
-        if img.width * img.height > 8000 ** 2:
-            raise ValidationError("to large")
-        return img
-
-    async def listen(self, translation_params: dict = None):
-        self.params = translation_params
-        app = web.Application(client_max_size=1024 * 1024 * 50, middlewares=[self.middleware_factory()])
-
-        routes = web.RouteTableDef()
-        run_until_state = ''
-
-        async def hook(state, finished):
-            if run_until_state and run_until_state == state and not finished:
-                raise TranslationInterrupt()
-
-        self.add_progress_hook(hook)
-
-        @routes.post("/get_text")
-        async def text_api(req):
-            nonlocal run_until_state
-            run_until_state = 'translating'
-            return await self.err_handling(self.run_translate, req, self.format_translate)
-
-        @routes.post("/translate")
-        async def translate_api(req):
-            nonlocal run_until_state
-            run_until_state = 'after-translating'
-            return await self.err_handling(self.run_translate, req, self.format_translate)
-
-        @routes.post("/inpaint_translate")
-        async def inpaint_translate_api(req):
-            nonlocal run_until_state
-            run_until_state = 'rendering'
-            return await self.err_handling(self.run_translate, req, self.format_translate)
-
-        @routes.post("/colorize_translate")
-        async def colorize_translate_api(req):
-            nonlocal run_until_state
-            run_until_state = 'rendering'
-            return await self.err_handling(self.run_translate, req, self.format_translate, True)
-
-        # #@routes.post("/file")
-        # async def file_api(req):
-        #     #TODO: return file
-        #     return await self.err_handling(self.file_exec, req, None)
-
-        app.add_routes(routes)
-        web.run_app(app, host=self.host, port=self.port)
-
-    async def run_translate(self, translation_params, img):
-        return await self.translate(img, translation_params)
-
-    async def err_handling(self, func, req, format, ri=False):
-        try:
-            if req.content_type == 'application/json' or req.content_type == 'multipart/form-data':
-                if req.content_type == 'application/json':
-                    d = await req.json()
-                else:
-                    d = await req.post()
-                schema = self.PostSchema()
-                data = schema.load(d)
-                if 'translator_chain' in data:
-                    data['translator_chain'] = translator_chain(data['translator_chain'])
-                if 'selective_translation' in data:
-                    data['selective_translation'] = translator_chain(data['selective_translation'])
-                ctx = Context(**dict(self.params, **data))
-                self._preprocess_params(ctx)
-                if data.get('image') is None and data.get('base64Images') is None and data.get('url') is None:
-                    return web.json_response({'error': "Missing input", 'status': 422})
-                fil = await self.get_file(data.get('image'), data.get('base64Images'), data.get('url'))
-                if 'image' in data:
-                    del data['image']
-                if 'base64Images' in data:
-                    del data['base64Images']
-                if 'url' in data:
-                    del data['url']
-                attempts = 0
-                while ctx.attempts == -1 or attempts <= ctx.attempts:
-                    if attempts > 0:
-                        logger.info(f'Retrying translation! Attempt {attempts}' + (
-                            f' of {ctx.attempts}' if ctx.attempts != -1 else ''))
-                    try:
-                        await func(ctx, fil)
-                        break
-                    except TranslationInterrupt:
-                        break
-                    except Exception as e:
-                        print(e)
-                    attempts += 1
-                if ctx.attempts != -1 and attempts > ctx.attempts:
-                    return web.json_response({'error': "Internal Server Error", 'status': 500},
-                                             status=500)
-                try:
-                    return format(ctx, ri)
-                except Exception as e:
-                    print(e)
-                    return web.json_response({'error': "Failed to format", 'status': 500},
-                                             status=500)
-            else:
-                return web.json_response({'error': "Wrong content type: " + req.content_type, 'status': 415},
-                                         status=415)
-        except ValueError as e:
-            print(e)
-            return web.json_response({'error': "Wrong input type", 'status': 422}, status=422)
-
-        except ValidationError as e:
-            print(e)
-            return web.json_response({'error': "Input invalid", 'status': 422}, status=422)
-
-    def format_translate(self, ctx: Context, return_image: bool):
-        text_regions = ctx.text_regions
-        inpaint = ctx.img_inpainted
-        results = []
-        if 'overlay_ext' in ctx:
-            overlay_ext = ctx['overlay_ext']
-        else:
-            overlay_ext = 'jpg'
-        for i, blk in enumerate(text_regions):
-            minX, minY, maxX, maxY = blk.xyxy
-            if 'translations' in ctx:
-                trans = {key: value[i] for key, value in ctx['translations'].items()}
-            else:
-                trans = {}
-            trans["originalText"] = text_regions[i].text
-            if inpaint is not None:
-                overlay = inpaint[minY:maxY, minX:maxX]
-
-                retval, buffer = cv2.imencode('.' + overlay_ext, overlay)
-                jpg_as_text = base64.b64encode(buffer)
-                background = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8")
-            else:
-                background = None
-            text_region = text_regions[i]
-            text_region.adjust_bg_color = False
-            color1, color2 = text_region.get_font_colors()
-
-            results.append({
-                'text': trans,
-                'minX': int(minX),
-                'minY': int(minY),
-                'maxX': int(maxX),
-                'maxY': int(maxY),
-                'textColor': {
-                    'fg': color1.tolist(),
-                    'bg': color2.tolist()
-                },
-                'language': text_regions[i].source_lang,
-                'background': background
-            })
-        if return_image and ctx.img_colorized is not None:
-            retval, buffer = cv2.imencode('.' + overlay_ext, np.array(ctx.img_colorized))
-            jpg_as_text = base64.b64encode(buffer)
-            img = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8")
-        else:
-            img = None
-        return web.json_response({'details': results, 'img': img})
-
-    class PostSchema(Schema):
-        target_lang = fields.Str(required=False, validate=lambda a: a.upper() in VALID_LANGUAGES)
-        detector = fields.Str(required=False, validate=lambda a: a.lower() in DETECTORS)
-        ocr = fields.Str(required=False, validate=lambda a: a.lower() in OCRS)
-        inpainter = fields.Str(required=False, validate=lambda a: a.lower() in INPAINTERS)
-        upscaler = fields.Str(required=False, validate=lambda a: a.lower() in UPSCALERS)
-        translator = fields.Str(required=False, validate=lambda a: a.lower() in TRANSLATORS)
-        direction = fields.Str(required=False, validate=lambda a: a.lower() in {'auto', 'h', 'v'})
-        skip_language = fields.Str(required=False)
-        upscale_ratio = fields.Integer(required=False)
-        translator_chain = fields.Str(required=False)
-        selective_translation = fields.Str(required=False)
-        attempts = fields.Integer(required=False)
-        detection_size = fields.Integer(required=False)
-        text_threshold = fields.Float(required=False)
-        box_threshold = fields.Float(required=False)
-        unclip_ratio = fields.Float(required=False)
-        inpainting_size = fields.Integer(required=False)
-        det_rotate = fields.Bool(required=False)
-        det_auto_rotate = fields.Bool(required=False)
-        det_invert = fields.Bool(required=False)
-        det_gamma_correct = fields.Bool(required=False)
-        min_text_length = fields.Integer(required=False)
-        colorization_size = fields.Integer(required=False)
-        denoise_sigma = fields.Integer(required=False)
-        mask_dilation_offset = fields.Integer(required=False)
-        ignore_bubble = fields.Integer(required=False)
-        gpt_config = fields.String(required=False)
-        filter_text = fields.String(required=False)
-
-        # api specific
-        overlay_ext = fields.Str(required=False)
-        base64Images = fields.Raw(required=False)
-        image = fields.Raw(required=False)
-        url = fields.Raw(required=False)
-
-        # no functionality except preventing errors when given
-        fingerprint = fields.Raw(required=False)
-        clientUuid = fields.Raw(required=False)
+        self.add_progress_hook(ph)
\ No newline at end of file
diff --git a/manga_translator/mode/api.py b/manga_translator/mode/api.py
new file mode 100644
index 000000000..bfe712567
--- /dev/null
+++ b/manga_translator/mode/api.py
@@ -0,0 +1,290 @@
+# Experimental. May be replaced by a refactored server/web_main.py in the future.
+import asyncio
+import base64
+import io
+
+import cv2
+import numpy as np
+from PIL import Image
+from aiohttp import web
+from aiohttp.web_middlewares import middleware
+from marshmallow import fields, Schema, ValidationError
+
+from manga_translator import MangaTranslator, Context, UPSCALERS, TranslationInterrupt, logger
+from manga_translator.args import translator_chain
+from manga_translator.detection import DETECTORS
+from manga_translator.inpainting import INPAINTERS
+from manga_translator.ocr import OCRS
+from manga_translator.translators import VALID_LANGUAGES, TRANSLATORS
+
+
+class MangaTranslatorAPI(MangaTranslator):
+    def __init__(self, params: dict = None):
+        import nest_asyncio
+        nest_asyncio.apply()
+        super().__init__(params)
+        self.host = params.get('host', '127.0.0.1')
+        self.port = params.get('port', '5003')
+        self.log_web = params.get('log_web', False)
+        self.ignore_errors = params.get('ignore_errors', True)
+        self._task_id = None
+        self._params = None
+        self.params = params
+        self.queue = []
+
+    async def wait_queue(self, id: int):
+        while self.queue[0] != id:
+            await asyncio.sleep(0.05)
+
+    def remove_from_queue(self, id: int):
+        self.queue.remove(id)
+
+    def generate_id(self):
+        try:
+            x = max(self.queue)
+        except:
+            x = 0
+        return x + 1
+
+    def middleware_factory(self):
+        @middleware
+        async def sample_middleware(request, handler):
+            id = self.generate_id()
+            self.queue.append(id)
+            try:
+                await self.wait_queue(id)
+            except Exception as e:
+                print(e)
+            try:
+                # todo make cancellable
+                response = await handler(request)
+            except:
+                response = web.json_response({'error': "Internal Server Error", 'status': 500},
+                                             status=500)
+            # Handle cases where a user leaves the queue, request fails, or is completed
+            try:
+                self.remove_from_queue(id)
+            except Exception as e:
+                print(e)
+            return response
+
+        return sample_middleware
+
+    async def get_file(self, image, base64Images, url) -> Image:
+        if image is not None:
+            content = image.file.read()
+        elif base64Images is not None:
+            base64Images = base64Images
+            if base64Images.__contains__('base64,'):
+                base64Images = base64Images.split('base64,')[1]
+            content = base64.b64decode(base64Images)
+        elif url is not None:
+            from aiohttp import ClientSession
+            async with ClientSession() as session:
+                async with session.get(url) as resp:
+                    if resp.status == 200:
+                        content = await resp.read()
+                    else:
+                        return web.json_response({'status': 'error'})
+        else:
+            raise ValidationError("donest exist")
+        img = Image.open(io.BytesIO(content))
+
+        img.verify()
+        img = Image.open(io.BytesIO(content))
+        if img.width * img.height > 8000 ** 2:
+            raise ValidationError("to large")
+        return img
+
+    async def listen(self, translation_params: dict = None):
+        self.params = translation_params
+        app = web.Application(client_max_size=1024 * 1024 * 50, middlewares=[self.middleware_factory()])
+
+        routes = web.RouteTableDef()
+        run_until_state = ''
+
+        async def hook(state, finished):
+            if run_until_state and run_until_state == state and not finished:
+                raise TranslationInterrupt()
+
+        self.add_progress_hook(hook)
+
+        @routes.post("/get_text")
+        async def text_api(req):
+            nonlocal run_until_state
+            run_until_state = 'translating'
+            return await self.err_handling(self.run_translate, req, self.format_translate)
+
+        @routes.post("/translate")
+        async def translate_api(req):
+            nonlocal run_until_state
+            run_until_state = 'after-translating'
+            return await self.err_handling(self.run_translate, req, self.format_translate)
+
+        @routes.post("/inpaint_translate")
+        async def inpaint_translate_api(req):
+            nonlocal run_until_state
+            run_until_state = 'rendering'
+            return await self.err_handling(self.run_translate, req, self.format_translate)
+
+        @routes.post("/colorize_translate")
+        async def colorize_translate_api(req):
+            nonlocal run_until_state
+            run_until_state = 'rendering'
+            return await self.err_handling(self.run_translate, req, self.format_translate, True)
+
+        # #@routes.post("/file")
+        # async def file_api(req):
+        #     #TODO: return file
+        #     return await self.err_handling(self.file_exec, req, None)
+
+        app.add_routes(routes)
+        web.run_app(app, host=self.host, port=self.port)
+
+    async def run_translate(self, translation_params, img):
+        return await self.translate(img, translation_params)
+
+    async def err_handling(self, func, req, format, ri=False):
+        try:
+            if req.content_type == 'application/json' or req.content_type == 'multipart/form-data':
+                if req.content_type == 'application/json':
+                    d = await req.json()
+                else:
+                    d = await req.post()
+                schema = self.PostSchema()
+                data = schema.load(d)
+                if 'translator_chain' in data:
+                    data['translator_chain'] = translator_chain(data['translator_chain'])
+                if 'selective_translation' in data:
+                    data['selective_translation'] = translator_chain(data['selective_translation'])
+                ctx = Context(**dict(self.params, **data))
+                self._preprocess_params(ctx)
+                if data.get('image') is None and data.get('base64Images') is None and data.get('url') is None:
+                    return web.json_response({'error': "Missing input", 'status': 422})
+                fil = await self.get_file(data.get('image'), data.get('base64Images'), data.get('url'))
+                if 'image' in data:
+                    del data['image']
+                if 'base64Images' in data:
+                    del data['base64Images']
+                if 'url' in data:
+                    del data['url']
+                attempts = 0
+                while ctx.attempts == -1 or attempts <= ctx.attempts:
+                    if attempts > 0:
+                        logger.info(f'Retrying translation! Attempt {attempts}' + (
+                            f' of {ctx.attempts}' if ctx.attempts != -1 else ''))
+                    try:
+                        await func(ctx, fil)
+                        break
+                    except TranslationInterrupt:
+                        break
+                    except Exception as e:
+                        print(e)
+                    attempts += 1
+                if ctx.attempts != -1 and attempts > ctx.attempts:
+                    return web.json_response({'error': "Internal Server Error", 'status': 500},
+                                             status=500)
+                try:
+                    return format(ctx, ri)
+                except Exception as e:
+                    print(e)
+                    return web.json_response({'error': "Failed to format", 'status': 500},
+                                             status=500)
+            else:
+                return web.json_response({'error': "Wrong content type: " + req.content_type, 'status': 415},
+                                         status=415)
+        except ValueError as e:
+            print(e)
+            return web.json_response({'error': "Wrong input type", 'status': 422}, status=422)
+
+        except ValidationError as e:
+            print(e)
+            return web.json_response({'error': "Input invalid", 'status': 422}, status=422)
+
+    def format_translate(self, ctx: Context, return_image: bool):
+        text_regions = ctx.text_regions
+        inpaint = ctx.img_inpainted
+        results = []
+        if 'overlay_ext' in ctx:
+            overlay_ext = ctx['overlay_ext']
+        else:
+            overlay_ext = 'jpg'
+        for i, blk in enumerate(text_regions):
+            minX, minY, maxX, maxY = blk.xyxy
+            if 'translations' in ctx:
+                trans = {key: value[i] for key, value in ctx['translations'].items()}
+            else:
+                trans = {}
+            trans["originalText"] = text_regions[i].text
+            if inpaint is not None:
+                overlay = inpaint[minY:maxY, minX:maxX]
+
+                retval, buffer = cv2.imencode('.' + overlay_ext, overlay)
+                jpg_as_text = base64.b64encode(buffer)
+                background = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8")
+            else:
+                background = None
+            text_region = text_regions[i]
+            text_region.adjust_bg_color = False
+            color1, color2 = text_region.get_font_colors()
+
+            results.append({
+                'text': trans,
+                'minX': int(minX),
+                'minY': int(minY),
+                'maxX': int(maxX),
+                'maxY': int(maxY),
+                'textColor': {
+                    'fg': color1.tolist(),
+                    'bg': color2.tolist()
+                },
+                'language': text_regions[i].source_lang,
+                'background': background
+            })
+        if return_image and ctx.img_colorized is not None:
+            retval, buffer = cv2.imencode('.' + overlay_ext, np.array(ctx.img_colorized))
+            jpg_as_text = base64.b64encode(buffer)
+            img = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8")
+        else:
+            img = None
+        return web.json_response({'details': results, 'img': img})
+
+    class PostSchema(Schema):
+        target_lang = fields.Str(required=False, validate=lambda a: a.upper() in VALID_LANGUAGES)
+        detector = fields.Str(required=False, validate=lambda a: a.lower() in DETECTORS)
+        ocr = fields.Str(required=False, validate=lambda a: a.lower() in OCRS)
+        inpainter = fields.Str(required=False, validate=lambda a: a.lower() in INPAINTERS)
+        upscaler = fields.Str(required=False, validate=lambda a: a.lower() in UPSCALERS)
+        translator = fields.Str(required=False, validate=lambda a: a.lower() in TRANSLATORS)
+        direction = fields.Str(required=False, validate=lambda a: a.lower() in {'auto', 'h', 'v'})
+        skip_language = fields.Str(required=False)
+        upscale_ratio = fields.Integer(required=False)
+        translator_chain = fields.Str(required=False)
+        selective_translation = fields.Str(required=False)
+        attempts = fields.Integer(required=False)
+        detection_size = fields.Integer(required=False)
+        text_threshold = fields.Float(required=False)
+        box_threshold = fields.Float(required=False)
+        unclip_ratio = fields.Float(required=False)
+        inpainting_size = fields.Integer(required=False)
+        det_rotate = fields.Bool(required=False)
+        det_auto_rotate = fields.Bool(required=False)
+        det_invert = fields.Bool(required=False)
+        det_gamma_correct = fields.Bool(required=False)
+        min_text_length = fields.Integer(required=False)
+        colorization_size = fields.Integer(required=False)
+        denoise_sigma = fields.Integer(required=False)
+        mask_dilation_offset = fields.Integer(required=False)
+        ignore_bubble = fields.Integer(required=False)
+        gpt_config = fields.String(required=False)
+        filter_text = fields.String(required=False)
+
+        # api specific
+        overlay_ext = fields.Str(required=False)
+        base64Images = fields.Raw(required=False)
+        image = fields.Raw(required=False)
+        url = fields.Raw(required=False)
+
+        # no functionality except preventing errors when given
+        fingerprint = fields.Raw(required=False)
+        clientUuid = fields.Raw(required=False)
\ No newline at end of file
diff --git a/manga_translator/mode/local.py b/manga_translator/mode/local.py
new file mode 100644
index 000000000..05e87b41b
--- /dev/null
+++ b/manga_translator/mode/local.py
@@ -0,0 +1,201 @@
+import os
+from typing import Union, List
+
+from PIL import Image
+
+from manga_translator import MangaTranslator, logger, Context, TranslationInterrupt
+from ..save import save_result
+from ..translators import (
+    LanguageUnsupportedException,
+    dispatch as dispatch_translation,
+)
+from ..utils import natural_sort, replace_prefix, get_color_name, rgb2hex
+
+
+class MangaTranslatorLocal(MangaTranslator):
+    async def translate_path(self, path: str, dest: str = None, params: dict[str, Union[int, str]] = None):
+        """
+        Translates an image or folder (recursively) specified through the path.
+        """
+        if not os.path.exists(path):
+            raise FileNotFoundError(path)
+        path = os.path.abspath(os.path.expanduser(path))
+        dest = os.path.abspath(os.path.expanduser(dest)) if dest else ''
+        params = params or {}
+
+        # Handle format
+        file_ext = params.get('format')
+        if params.get('save_quality', 100) < 100:
+            if not params.get('format'):
+                file_ext = 'jpg'
+            elif params.get('format') != 'jpg':
+                raise ValueError('--save-quality of lower than 100 is only supported for .jpg files')
+
+        if os.path.isfile(path):
+            # Determine destination file path
+            if not dest:
+                # Use the same folder as the source
+                p, ext = os.path.splitext(path)
+                _dest = f'{p}-translated.{file_ext or ext[1:]}'
+            elif not os.path.basename(dest):
+                p, ext = os.path.splitext(os.path.basename(path))
+                # If the folders differ use the original filename from the source
+                if os.path.dirname(path) != dest:
+                    _dest = os.path.join(dest, f'{p}.{file_ext or ext[1:]}')
+                else:
+                    _dest = os.path.join(dest, f'{p}-translated.{file_ext or ext[1:]}')
+            else:
+                p, ext = os.path.splitext(dest)
+                _dest = f'{p}.{file_ext or ext[1:]}'
+            await self.translate_file(path, _dest, params)
+
+        elif os.path.isdir(path):
+            # Determine destination folder path
+            if path[-1] == '\\' or path[-1] == '/':
+                path = path[:-1]
+            _dest = dest or path + '-translated'
+            if os.path.exists(_dest) and not os.path.isdir(_dest):
+                raise FileExistsError(_dest)
+
+            translated_count = 0
+            for root, subdirs, files in os.walk(path):
+                files = natural_sort(files)
+                dest_root = replace_prefix(root, path, _dest)
+                os.makedirs(dest_root, exist_ok=True)
+                for f in files:
+                    if f.lower() == '.thumb':
+                        continue
+
+                    file_path = os.path.join(root, f)
+                    output_dest = replace_prefix(file_path, path, _dest)
+                    p, ext = os.path.splitext(output_dest)
+                    output_dest = f'{p}.{file_ext or ext[1:]}'
+
+                    if await self.translate_file(file_path, output_dest, params):
+                        translated_count += 1
+            if translated_count == 0:
+                logger.info('No further untranslated files found. Use --overwrite to write over existing translations.')
+            else:
+                logger.info(f'Done. Translated {translated_count} image{"" if translated_count == 1 else "s"}')
+
+    async def translate_file(self, path: str, dest: str, params: dict):
+        if not params.get('overwrite') and os.path.exists(dest):
+            logger.info(
+                f'Skipping as already translated: "{dest}". Use --overwrite to overwrite existing translations.')
+            await self._report_progress('saved', True)
+            return True
+
+        logger.info(f'Translating: "{path}"')
+
+        # Turn dict to context to make values also accessible through params.<property>
+        params = params or {}
+        ctx = Context(**params)
+        self._preprocess_params(ctx)
+
+        attempts = 0
+        while ctx.attempts == -1 or attempts < ctx.attempts + 1:
+            if attempts > 0:
+                logger.info(f'Retrying translation! Attempt {attempts}'
+                            + (f' of {ctx.attempts}' if ctx.attempts != -1 else ''))
+            try:
+                return await self._translate_file(path, dest, ctx)
+
+            except TranslationInterrupt:
+                break
+            except Exception as e:
+                if isinstance(e, LanguageUnsupportedException):
+                    await self._report_progress('error-lang', True)
+                else:
+                    await self._report_progress('error', True)
+                if not self.ignore_errors and not (ctx.attempts == -1 or attempts < ctx.attempts):
+                    raise
+                else:
+                    logger.error(f'{e.__class__.__name__}: {e}',
+                                 exc_info=e if self.verbose else None)
+            attempts += 1
+        return False
+
+    async def _translate_file(self, path: str, dest: str, ctx: Context) -> bool:
+        if path.endswith('.txt'):
+            with open(path, 'r') as f:
+                queries = f.read().split('\n')
+            translated_sentences = \
+                await dispatch_translation(ctx.translator, queries, ctx.use_mtpe, ctx,
+                                           'cpu' if self._gpu_limited_memory else self.device)
+            p, ext = os.path.splitext(dest)
+            if ext != '.txt':
+                dest = p + '.txt'
+            logger.info(f'Saving "{dest}"')
+            with open(dest, 'w') as f:
+                f.write('\n'.join(translated_sentences))
+            return True
+
+        # TODO: Add .gif handler
+
+        else:  # Treat as image
+            try:
+                img = Image.open(path)
+                img.verify()
+                img = Image.open(path)
+            except Exception:
+                logger.warn(f'Failed to open image: {path}')
+                return False
+
+            ctx = await self.translate(img, ctx)
+            result = ctx.result
+
+            # Save result
+            if ctx.skip_no_text and not ctx.text_regions:
+                logger.debug('Not saving due to --skip-no-text')
+                return True
+            if result:
+                logger.info(f'Saving "{dest}"')
+                save_result(result, dest, ctx)
+                await self._report_progress('saved', True)
+
+                if ctx.save_text or ctx.save_text_file or ctx.prep_manual:
+                    if ctx.prep_manual:
+                        # Save original image next to translated
+                        p, ext = os.path.splitext(dest)
+                        img_filename = p + '-orig' + ext
+                        img_path = os.path.join(os.path.dirname(dest), img_filename)
+                        img.save(img_path, quality=ctx.save_quality)
+                    if ctx.text_regions:
+                        self._save_text_to_file(path, ctx)
+                return True
+        return False
+
+    def _save_text_to_file(self, image_path: str, ctx: Context):
+        cached_colors = []
+
+        def identify_colors(fg_rgb: List[int]):
+            idx = 0
+            for rgb, _ in cached_colors:
+                # If similar color already saved
+                if abs(rgb[0] - fg_rgb[0]) + abs(rgb[1] - fg_rgb[1]) + abs(rgb[2] - fg_rgb[2]) < 50:
+                    break
+                else:
+                    idx += 1
+            else:
+                cached_colors.append((fg_rgb, get_color_name(fg_rgb)))
+            return idx + 1, cached_colors[idx][1]
+
+        s = f'\n[{image_path}]\n'
+        for i, region in enumerate(ctx.text_regions):
+            fore, back = region.get_font_colors()
+            color_id, color_name = identify_colors(fore)
+
+            s += f'\n-- {i + 1} --\n'
+            s += f'color: #{color_id}: {color_name} (fg, bg: {rgb2hex(*fore)} {rgb2hex(*back)})\n'
+            s += f'text:  {region.text}\n'
+            s += f'trans: {region.translation}\n'
+            for line in region.lines:
+                s += f'coords: {list(line.ravel())}\n'
+        s += '\n'
+
+        text_output_file = ctx.text_output_file
+        if not text_output_file:
+            text_output_file = os.path.splitext(image_path)[0] + '_translations.txt'
+
+        with open(text_output_file, 'a', encoding='utf-8') as f:
+            f.write(s)
\ No newline at end of file
diff --git a/manga_translator/mode/web.py b/manga_translator/mode/web.py
new file mode 100644
index 000000000..2acc9faf7
--- /dev/null
+++ b/manga_translator/mode/web.py
@@ -0,0 +1,151 @@
+import asyncio
+import time
+
+import requests
+
+from manga_translator import MangaTranslator, logger, Context
+from manga_translator.translators import TRANSLATORS
+from manga_translator.utils import add_file_logger, remove_file_logger
+
+
+class MangaTranslatorWeb(MangaTranslator):
+    """
+    Translator client that executes tasks on behalf of the webserver in web_main.py.
+    """
+
+    def __init__(self, params: dict = None):
+        super().__init__(params)
+        self.host = params.get('host', '127.0.0.1')
+        if self.host == '0.0.0.0':
+            self.host = '127.0.0.1'
+        self.port = params.get('port', 5003)
+        self.nonce = params.get('nonce', '')
+        self.ignore_errors = params.get('ignore_errors', True)
+        self._task_id = None
+        self._params = None
+
+    async def _init_connection(self):
+        available_translators = []
+        from ..translators import MissingAPIKeyException, get_translator
+        for key in TRANSLATORS:
+            try:
+                get_translator(key)
+                available_translators.append(key)
+            except MissingAPIKeyException:
+                pass
+
+        data = {
+            'nonce': self.nonce,
+            'capabilities': {
+                'translators': available_translators,
+            },
+        }
+        requests.post(f'http://{self.host}:{self.port}/connect-internal', json=data)
+
+    async def _send_state(self, state: str, finished: bool):
+        # wait for translation to be saved first (bad solution?)
+        finished = finished and not state == 'finished'
+        while True:
+            try:
+                data = {
+                    'task_id': self._task_id,
+                    'nonce': self.nonce,
+                    'state': state,
+                    'finished': finished,
+                }
+                requests.post(f'http://{self.host}:{self.port}/task-update-internal', json=data, timeout=20)
+                break
+            except Exception:
+                # if translation is finished server has to know
+                if finished:
+                    continue
+                else:
+                    break
+
+    def _get_task(self):
+        try:
+            rjson = requests.get(f'http://{self.host}:{self.port}/task-internal?nonce={self.nonce}',
+                                 timeout=3600).json()
+            return rjson.get('task_id'), rjson.get('data')
+        except Exception:
+            return None, None
+
+    async def listen(self, translation_params: dict = None):
+        """
+        Listens for translation tasks from web server.
+        """
+        logger.info('Waiting for translation tasks')
+
+        await self._init_connection()
+        self.add_progress_hook(self._send_state)
+
+        while True:
+            self._task_id, self._params = self._get_task()
+            if self._params and 'exit' in self._params:
+                break
+            if not (self._task_id and self._params):
+                await asyncio.sleep(0.1)
+                continue
+
+            self.result_sub_folder = self._task_id
+            logger.info(f'Processing task {self._task_id}')
+            if translation_params is not None:
+                # Combine default params with params chosen by webserver
+                for p, default_value in translation_params.items():
+                    current_value = self._params.get(p)
+                    self._params[p] = current_value if current_value is not None else default_value
+            if self.verbose:
+                # Write log file
+                log_file = self._result_path('log.txt')
+                add_file_logger(log_file)
+
+            # final.png will be renamed if format param is set
+            await self.translate_path(self._result_path('input.png'), self._result_path('final.png'),
+                                      params=self._params)
+            print()
+
+            if self.verbose:
+                remove_file_logger(log_file)
+            self._task_id = None
+            self._params = None
+            self.result_sub_folder = ''
+
+    async def _run_text_translation(self, ctx: Context):
+        # Run machine translation as reference for manual translation (if `--translator=none` is not set)
+        text_regions = await super()._run_text_translation(ctx)
+
+        if ctx.get('manual', False):
+            logger.info('Waiting for user input from manual translation')
+            requests.post(f'http://{self.host}:{self.port}/request-manual-internal', json={
+                'task_id': self._task_id,
+                'nonce': self.nonce,
+                'texts': [r.text for r in text_regions],
+                'translations': [r.translation for r in text_regions],
+            }, timeout=20)
+
+            # wait for at most 1 hour for manual translation
+            wait_until = time.time() + 3600
+            while time.time() < wait_until:
+                ret = requests.post(f'http://{self.host}:{self.port}/get-manual-result-internal', json={
+                    'task_id': self._task_id,
+                    'nonce': self.nonce
+                }, timeout=20).json()
+                if 'result' in ret:
+                    manual_translations = ret['result']
+                    if isinstance(manual_translations, str):
+                        if manual_translations == 'error':
+                            return []
+                    i = 0
+                    for translation in manual_translations:
+                        if not translation.strip():
+                            text_regions.pop(i)
+                            i = i - 1
+                        else:
+                            text_regions[i].translation = translation
+                            text_regions[i].target_lang = ctx.translator.langs[-1]
+                        i = i + 1
+                    break
+                elif 'cancel' in ret:
+                    return 'cancel'
+                await asyncio.sleep(0.1)
+        return text_regions
diff --git a/manga_translator/mode/ws.py b/manga_translator/mode/ws.py
new file mode 100644
index 000000000..d696058f8
--- /dev/null
+++ b/manga_translator/mode/ws.py
@@ -0,0 +1,264 @@
+import asyncio
+import logging
+import os
+from typing import Tuple
+
+import cv2
+import numpy as np
+from PIL import Image
+
+from manga_translator import logger, Context, MangaTranslator
+from manga_translator.utils import PriorityLock, Throttler
+
+
+class MangaTranslatorWS(MangaTranslator):
+    def __init__(self, params: dict = None):
+        super().__init__(params)
+        self.url = params.get('ws_url')
+        self.secret = params.get('ws_secret', os.getenv('WS_SECRET', ''))
+        self.ignore_errors = params.get('ignore_errors', True)
+
+        self._task_id = None
+        self._websocket = None
+
+    async def listen(self, translation_params: dict = None):
+        from threading import Thread
+        import io
+        import aioshutil
+        from aiofiles import os
+        import websockets
+        from ..server import ws_pb2
+
+        self._server_loop = asyncio.new_event_loop()
+        self.task_lock = PriorityLock()
+        self.counter = 0
+
+        async def _send_and_yield(websocket, msg):
+            # send message and yield control to the event loop (to actually send the message)
+            await websocket.send(msg)
+            await asyncio.sleep(0)
+
+        send_throttler = Throttler(0.2)
+        send_and_yield = send_throttler.wrap(_send_and_yield)
+
+        async def sync_state(state, finished):
+            if self._websocket is None:
+                return
+            msg = ws_pb2.WebSocketMessage()
+            msg.status.id = self._task_id
+            msg.status.status = state
+            self._server_loop.call_soon_threadsafe(
+                asyncio.create_task,
+                send_and_yield(self._websocket, msg.SerializeToString())
+            )
+
+        self.add_progress_hook(sync_state)
+
+        async def translate(task_id, websocket, image, params):
+            async with self.task_lock((1 << 31) - params['ws_count']):
+                self._task_id = task_id
+                self._websocket = websocket
+                result = await self.translate(image, params)
+                self._task_id = None
+                self._websocket = None
+            return result
+
+        async def server_send_status(websocket, task_id, status):
+            msg = ws_pb2.WebSocketMessage()
+            msg.status.id = task_id
+            msg.status.status = status
+            await websocket.send(msg.SerializeToString())
+            await asyncio.sleep(0)
+
+        async def server_process_inner(main_loop, logger_task, session, websocket, task) -> Tuple[bool, bool]:
+            logger_task.info(f'-- Processing task {task.id}')
+            await server_send_status(websocket, task.id, 'pending')
+
+            if self.verbose:
+                await aioshutil.rmtree(f'result/{task.id}', ignore_errors=True)
+                await os.makedirs(f'result/{task.id}', exist_ok=True)
+
+            params = {
+                'target_lang': task.target_language,
+                'skip_lang': task.skip_language,
+                'detector': task.detector,
+                'direction': task.direction,
+                'translator': task.translator,
+                'size': task.size,
+                'ws_event_loop': asyncio.get_event_loop(),
+                'ws_count': self.counter,
+            }
+            self.counter += 1
+
+            logger_task.info(f'-- Downloading image from {task.source_image}')
+            await server_send_status(websocket, task.id, 'downloading')
+            async with session.get(task.source_image) as resp:
+                if resp.status == 200:
+                    source_image = await resp.read()
+                else:
+                    msg = ws_pb2.WebSocketMessage()
+                    msg.status.id = task.id
+                    msg.status.status = 'error-download'
+                    await websocket.send(msg.SerializeToString())
+                    await asyncio.sleep(0)
+                    return False, False
+
+            logger_task.info(f'-- Translating image')
+            if translation_params:
+                for p, default_value in translation_params.items():
+                    current_value = params.get(p)
+                    params[p] = current_value if current_value is not None else default_value
+
+            image = Image.open(io.BytesIO(source_image))
+
+            (ori_w, ori_h) = image.size
+            if max(ori_h, ori_w) > 1200:
+                params['upscale_ratio'] = 1
+
+            await server_send_status(websocket, task.id, 'preparing')
+            # translation_dict = await self.translate(image, params)
+            translation_dict = await asyncio.wrap_future(
+                asyncio.run_coroutine_threadsafe(
+                    translate(task.id, websocket, image, params),
+                    main_loop
+                )
+            )
+            await send_throttler.flush()
+
+            output: Image.Image = translation_dict.result
+            if output is not None:
+                await server_send_status(websocket, task.id, 'saving')
+
+                output = output.resize((ori_w, ori_h), resample=Image.LANCZOS)
+
+                img = io.BytesIO()
+                output.save(img, format='PNG')
+                if self.verbose:
+                    output.save(self._result_path('ws_final.png'))
+
+                img_bytes = img.getvalue()
+                logger_task.info(f'-- Uploading result to {task.translation_mask}')
+                await server_send_status(websocket, task.id, 'uploading')
+                async with session.put(task.translation_mask, data=img_bytes) as resp:
+                    if resp.status != 200:
+                        logger_task.error(f'-- Failed to upload result:')
+                        logger_task.error(f'{resp.status}: {resp.reason}')
+                        msg = ws_pb2.WebSocketMessage()
+                        msg.status.id = task.id
+                        msg.status.status = 'error-upload'
+                        await websocket.send(msg.SerializeToString())
+                        await asyncio.sleep(0)
+                        return False, False
+
+            return True, output is not None
+
+        async def server_process(main_loop, session, websocket, task) -> bool:
+            logger_task = logger.getChild(f'{task.id}')
+            try:
+                (success, has_translation_mask) = await server_process_inner(main_loop, logger_task, session, websocket,
+                                                                             task)
+            except Exception as e:
+                logger_task.error(f'-- Task failed with exception:')
+                logger_task.error(f'{e.__class__.__name__}: {e}', exc_info=e if self.verbose else None)
+                (success, has_translation_mask) = False, False
+            finally:
+                result = ws_pb2.WebSocketMessage()
+                result.finish_task.id = task.id
+                result.finish_task.success = success
+                result.finish_task.has_translation_mask = has_translation_mask
+                await websocket.send(result.SerializeToString())
+                await asyncio.sleep(0)
+                logger_task.info(f'-- Task finished')
+
+        async def async_server_thread(main_loop):
+            from aiohttp import ClientSession, ClientTimeout
+            timeout = ClientTimeout(total=30)
+            async with ClientSession(timeout=timeout) as session:
+                logger_conn = logger.getChild('connection')
+                if self.verbose:
+                    logger_conn.setLevel(logging.DEBUG)
+                async for websocket in websockets.connect(
+                        self.url,
+                        extra_headers={
+                            'x-secret': self.secret,
+                        },
+                        max_size=1_000_000,
+                        logger=logger_conn
+                ):
+                    bg_tasks = set()
+                    try:
+                        logger.info('-- Connected to websocket server')
+
+                        async for raw in websocket:
+                            # logger.info(f'Got message: {raw}')
+                            msg = ws_pb2.WebSocketMessage()
+                            msg.ParseFromString(raw)
+                            if msg.WhichOneof('message') == 'new_task':
+                                task = msg.new_task
+                                bg_task = asyncio.create_task(server_process(main_loop, session, websocket, task))
+                                bg_tasks.add(bg_task)
+                                bg_task.add_done_callback(bg_tasks.discard)
+
+                    except Exception as e:
+                        logger.error(f'{e.__class__.__name__}: {e}', exc_info=e if self.verbose else None)
+
+                    finally:
+                        logger.info('-- Disconnected from websocket server')
+                        for bg_task in bg_tasks:
+                            bg_task.cancel()
+
+        def server_thread(future, main_loop, server_loop):
+            asyncio.set_event_loop(server_loop)
+            try:
+                server_loop.run_until_complete(async_server_thread(main_loop))
+            finally:
+                future.set_result(None)
+
+        future = asyncio.Future()
+        Thread(
+            target=server_thread,
+            args=(future, asyncio.get_running_loop(), self._server_loop),
+            daemon=True
+        ).start()
+
+        # create a future that is never done
+        await future
+
+    async def _run_text_translation(self, ctx: Context):
+        coroutine = super()._run_text_translation(ctx)
+        if ctx.translator.has_offline():
+            return await coroutine
+        else:
+            task_id = self._task_id
+            websocket = self._websocket
+            await self.task_lock.release()
+            result = await asyncio.wrap_future(
+                asyncio.run_coroutine_threadsafe(
+                    coroutine,
+                    ctx.ws_event_loop
+                )
+            )
+            await self.task_lock.acquire((1 << 30) - ctx.ws_count)
+            self._task_id = task_id
+            self._websocket = websocket
+            return result
+
+    async def _run_text_rendering(self, ctx: Context):
+        render_mask = (ctx.mask >= 127).astype(np.uint8)[:, :, None]
+
+        output = await super()._run_text_rendering(ctx)
+        render_mask[np.sum(ctx.img_rgb != output, axis=2) > 0] = 1
+        ctx.render_mask = render_mask
+        if self.verbose:
+            cv2.imwrite(self._result_path('ws_render_in.png'), cv2.cvtColor(ctx.img_rgb, cv2.COLOR_RGB2BGR))
+            cv2.imwrite(self._result_path('ws_render_out.png'), cv2.cvtColor(output, cv2.COLOR_RGB2BGR))
+            cv2.imwrite(self._result_path('ws_mask.png'), render_mask * 255)
+
+        # only keep sections in mask
+        if self.verbose:
+            cv2.imwrite(self._result_path('ws_inmask.png'), cv2.cvtColor(ctx.img_rgb, cv2.COLOR_RGB2BGRA) * render_mask)
+        output = cv2.cvtColor(output, cv2.COLOR_RGB2RGBA) * render_mask
+        if self.verbose:
+            cv2.imwrite(self._result_path('ws_output.png'), cv2.cvtColor(output, cv2.COLOR_RGBA2BGRA) * render_mask)
+
+        return output

From ed2352adfa2ad56fb1935902a3bb0d65d0e79da8 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sun, 17 Nov 2024 00:45:42 +0100
Subject: [PATCH 05/75] rename batch, demo to local

---
 manga_translator/__main__.py | 9 ++++-----
 manga_translator/args.py     | 2 +-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py
index c0e01e508..8d06b9fc6 100644
--- a/manga_translator/__main__.py
+++ b/manga_translator/__main__.py
@@ -23,18 +23,17 @@ async def dispatch(args: Namespace):
 
     logger.info(f'Running in {args.mode} mode')
 
-    if args.mode in ('demo', 'batch'):
+    if args.mode == 'local':
         if not args.input:
             raise Exception('No input image was supplied. Use -i <image_path>')
-        translator = MangaTranslator(args_dict)
+        from manga_translator.mode.local import MangaTranslatorLocal
+        translator = MangaTranslatorLocal(args_dict)
 
         # Load pre-translation and post-translation dictionaries
         pre_dict = translator.load_dictionary(args.pre_dict)  
         post_dict = translator.load_dictionary(args.post_dict)  
 
-        if args.mode == 'demo':
-            if len(args.input) != 1 or not os.path.isfile(args.input[0]):
-                raise FileNotFoundError(f'Invalid single image file path for demo mode: "{" ".join(args.input)}". Use `-m batch`.')
+        if len(args.input) == 1 and os.path.isfile(args.input[0]):
             dest = os.path.join(BASE_PATH, 'result/final.png')
             args.overwrite = True # Do overwrite result/final.png file
 
diff --git a/manga_translator/args.py b/manga_translator/args.py
index 0ac6a65fa..2eb72c91d 100644
--- a/manga_translator/args.py
+++ b/manga_translator/args.py
@@ -101,7 +101,7 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
 subparsers = parser.add_subparsers(dest='mode', required=True, help='Mode of operation')
 
 # Batch mode
-parser_batch = subparsers.add_parser('batch', help='Run in batch translation mode')
+parser_batch = subparsers.add_parser('local', help='Run in batch translation mode')
 parser_batch.add_argument('-i', '--input', required=True, type=dir_path, help='Path to an image folder')
 parser_batch.add_argument('-o', '--dest', default='', type=str, help='Path to the destination folder for translated images')
 parser_batch.add_argument('-f', '--format', default=None, choices=OUTPUT_FORMATS, help='Output format of the translation.')

From b7c8ace4363141ac8c5769cfd2e37180c8541d52 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sun, 17 Nov 2024 01:28:57 +0100
Subject: [PATCH 06/75] use config in manga_translator

---
 manga_translator/config.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/manga_translator/config.py b/manga_translator/config.py
index 30c44193c..ee78ddefa 100644
--- a/manga_translator/config.py
+++ b/manga_translator/config.py
@@ -6,6 +6,7 @@
 class Renderer(IntEnum):
     default = 0
     manga2Eng = 1
+    none = 2
 
 class Alignment(IntEnum):
     auto = 0
@@ -100,7 +101,7 @@ class RenderConfig(BaseModel):
     """If renderer should be splitting up words using a hyphen character (-)"""
     font_color: Optional[str] = None
     """Overwrite the text fg/bg color detected by the OCR model. Use hex string without the "#" such as FFFFFF for a white foreground or FFFFFF:000000 to also have a black background around the text."""
-    line_spacing: Optional[float] = None
+    line_spacing: Optional[int] = None
     """Line spacing is font_size * this value. Default is 0.01 for horizontal text and 0.2 for vertical."""
     font_size: Optional[int] = None
     """Use fixed font size for rendering"""
@@ -111,7 +112,7 @@ class UpscaleConfig(BaseModel):
     """Upscaler to use. --upscale-ratio has to be set for it to take effect"""
     revert_upscaling: bool = False
     """Downscales the previously upscaled image after translation back to original size (Use with --upscale-ratio)."""
-    upscale_ratio: Optional[float] = None
+    upscale_ratio: Optional[int] = None
     """Image upscale ratio applied before detection. Can improve text detection."""
 
 class TranslatorConfig(BaseModel):
@@ -148,6 +149,10 @@ class DetectorConfig(BaseModel):
     """Applies gamma correction for detection. Might improve detection."""
     ignore_bubble: int = 0
     """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles"""
+    box_threshold: float = 0.7
+    """Threshold for bbox generation"""
+    unclip_ratio: float = 2.3
+    """How much to extend text skeleton to form bounding box"""
 
 class InpainterConfig(BaseModel):
     inpainter: Inpainter = Inpainter.lama_large
@@ -199,11 +204,7 @@ class Config(BaseModel):
     ocr: OcrConfig
     """Ocr configs"""
     # ?
-    unclip_ratio: float = 2.3
-    """How much to extend text skeleton to form bounding box"""
     kernel_size: int = 3
     """Set the convolution kernel size of the text erasure area to completely clean up text residues"""
     mask_dilation_offset: int = 0
     """By how much to extend the text mask to remove left-over text pixels of the original image."""
-    box_threshold: float = 0.7
-    """Threshold for bbox generation"""

From 36a1a14d85551f66d3eebcdb9e57bc6032e7cf80 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sun, 17 Nov 2024 01:29:10 +0100
Subject: [PATCH 07/75] use config in manga_translator

---
 manga_translator/manga_translator.py | 228 +++++++++++++--------------
 1 file changed, 114 insertions(+), 114 deletions(-)

diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py
index cb17850e2..0f85e7a4d 100644
--- a/manga_translator/manga_translator.py
+++ b/manga_translator/manga_translator.py
@@ -8,10 +8,10 @@
 import logging
 import numpy as np
 from PIL import Image
-from typing import Union
-
+from typing import Optional, Any
 
 from .args import DEFAULT_ARGS
+from .config import Config, Colorizer, Detector, Translator, Renderer
 from .utils import (
     BASE_PATH,
     LANGUAGE_ORIENTATION_PRESETS,
@@ -58,8 +58,23 @@ class TranslationInterrupt(Exception):
 
 
 class MangaTranslator:
+    verbose: bool
+    ignore_errors: bool
+    _gpu_limited_memory: bool
+    device: Optional[str]
+    kernel_size: Optional[int]
+    _progress_hooks: list[Any]
+    result_sub_folder: str
 
     def __init__(self, params: dict = None):
+        self.font_path = None
+        self.use_mtpe = False
+        self.kernel_size = None
+        self.device = None
+        self._gpu_limited_memory = False
+        self.ignore_errors = False
+        self.verbose = False
+
         self._progress_hooks = []
         self._add_logger_hook()
 
@@ -76,6 +91,9 @@ def __init__(self, params: dict = None):
 
     def parse_init_params(self, params: dict):
         self.verbose = params.get('verbose', False)
+        self.use_mtpe = params.get('use_mtpe', False)
+        self.font_path = params.get('font_path', None)
+
         self.ignore_errors = params.get('ignore_errors', False)
         # check mps for apple silicon or cuda for nvidia
         device = 'mps' if torch.backends.mps.is_available() else 'cuda'
@@ -85,7 +103,7 @@ def parse_init_params(self, params: dict):
             self.device = device
         if self.using_gpu and ( not torch.cuda.is_available() and not torch.backends.mps.is_available()):
             raise Exception(
-                'CUDA or Metal compatible device could not be found in torch whilst --use-gpu args was set.\n' \
+                'CUDA or Metal compatible device could not be found in torch whilst --use-gpu args was set.\n'
                 'Is the correct pytorch version installed? (See https://pytorch.org/)')
         if params.get('model_dir'):
             ModelWrapper._MODEL_DIR = params.get('model_dir')
@@ -96,7 +114,7 @@ def parse_init_params(self, params: dict):
     def using_gpu(self):
         return self.device.startswith('cuda') or self.device == 'mps'
 
-    async def translate(self, image: Image.Image, params: Union[dict, Context] = None) -> Context:
+    async def translate(self, image: Image.Image, config: Config) -> Context:
         """
         Translates a PIL image from a manga. Returns dict with result and intermediates of translation.
         Default params are taken from args.py.
@@ -108,28 +126,29 @@ async def translate(self, image: Image.Image, params: Union[dict, Context] = Non
         """
         # TODO: Take list of images to speed up batch processing
 
-        if not isinstance(params, Context):
-            params = params or {}
-            ctx = Context(**params)
-            self._preprocess_params(ctx)
-        else:
-            ctx = params
+        ctx = Context()
 
         ctx.input = image
         ctx.result = None
 
         # preload and download models (not strictly necessary, remove to lazy load)
         logger.info('Loading models')
-        if ctx.upscale_ratio:
-            await prepare_upscaling(ctx.upscaler)
-        await prepare_detection(ctx.detector)
-        await prepare_ocr(ctx.ocr, self.device)
-        await prepare_inpainting(ctx.inpainter, self.device)
-        await prepare_translation(ctx.translator)
-        if ctx.colorizer:
-            await prepare_colorization(ctx.colorizer)
+        if config.upscale.upscale_ratio:
+            # todo: fix
+            await prepare_upscaling(config.upscale.upscaler)
+        # todo: fix
+        await prepare_detection(config.detector.detector)
+        # todo: fix
+        await prepare_ocr(config.ocr.ocr, self.device)
+        # todo: fix
+        await prepare_inpainting(config.inpainter.inpainter, self.device)
+        # todo: fix
+        await prepare_translation(config.translator.translator)
+        if config.colorizer.colorizer != Colorizer.none:
+            #todo: fix
+            await prepare_colorization(config.colorizer.colorizer)
         # translate
-        return await self._translate(ctx)
+        return await self._translate(config, ctx)
 
     def load_dictionary(self, file_path):
         dictionary = []
@@ -161,31 +180,12 @@ def apply_dictionary(self, text, dictionary):
         return text
 
     def _preprocess_params(self, ctx: Context):
+        # todo: fix
         # params auto completion
         # TODO: Move args into ctx.args and only calculate once, or just copy into ctx
         for arg in DEFAULT_ARGS:
             ctx.setdefault(arg, DEFAULT_ARGS[arg])
 
-        if 'direction' not in ctx:
-            if ctx.force_horizontal:
-                ctx.direction = 'h'
-            elif ctx.force_vertical:
-                ctx.direction = 'v'
-            else:
-                ctx.direction = 'auto'
-        if 'alignment' not in ctx:
-            if ctx.align_left:
-                ctx.alignment = 'left'
-            elif ctx.align_center:
-                ctx.alignment = 'center'
-            elif ctx.align_right:
-                ctx.alignment = 'right'
-            else:
-                ctx.alignment = 'auto'
-        if ctx.prep_manual:
-            ctx.renderer = 'none'
-        ctx.setdefault('renderer', 'manga2eng' if ctx.manga2eng else 'default')
-
         if ctx.selective_translation is not None:
             ctx.selective_translation.target_lang = ctx.target_lang
             ctx.translator = ctx.selective_translation
@@ -208,21 +208,21 @@ def _preprocess_params(self, ctx: Context):
             except:
                 raise Exception(f'Invalid --font-color value: {ctx.font_color}. Use a hex value such as FF0000')
 
-    async def _translate(self, ctx: Context) -> Context:
+    async def _translate(self, config: Config, ctx: Context) -> Context:
 
         # -- Colorization
-        if ctx.colorizer:
+        if config.colorizer.colorizer != Colorizer.none:
             await self._report_progress('colorizing')
-            ctx.img_colorized = await self._run_colorizer(ctx)
+            ctx.img_colorized = await self._run_colorizer(config, ctx)
         else:
             ctx.img_colorized = ctx.input
 
         # -- Upscaling
         # The default text detector doesn't work very well on smaller images, might want to
         # consider adding automatic upscaling on certain kinds of small images.
-        if ctx.upscale_ratio:
+        if config.upscale.upscale_ratio:
             await self._report_progress('upscaling')
-            ctx.upscaled = await self._run_upscaling(ctx)
+            ctx.upscaled = await self._run_upscaling(config, ctx)
         else:
             ctx.upscaled = ctx.img_colorized
 
@@ -230,7 +230,7 @@ async def _translate(self, ctx: Context) -> Context:
 
         # -- Detection
         await self._report_progress('detection')
-        ctx.textlines, ctx.mask_raw, ctx.mask = await self._run_detection(ctx)
+        ctx.textlines, ctx.mask_raw, ctx.mask = await self._run_detection(config, ctx)
         if self.verbose:
             cv2.imwrite(self._result_path('mask_raw.png'), ctx.mask_raw)
 
@@ -238,7 +238,7 @@ async def _translate(self, ctx: Context) -> Context:
             await self._report_progress('skip-no-regions', True)
             # If no text was found result is intermediate image product
             ctx.result = ctx.upscaled
-            return await self._revert_upscale(ctx)
+            return await self._revert_upscale(config, ctx)
 
         if self.verbose:
             img_bbox_raw = np.copy(ctx.img_rgb)
@@ -248,11 +248,11 @@ async def _translate(self, ctx: Context) -> Context:
 
         # -- OCR
         await self._report_progress('ocr')
-        ctx.textlines = await self._run_ocr(ctx)
+        ctx.textlines = await self._run_ocr(config, ctx)
         
-        if ctx.skip_lang is not None :
+        if config.translator.skip_lang is not None :
             filtered_textlines = []
-            skip_langs = ctx.skip_lang.split(',')
+            skip_langs = config.translator.skip_lang.split(',')
             for txtln in ctx.textlines :
                 try :
                     source_language = LANGDETECT_MAP.get(langdetect.detect(txtln.text), 'UNKNOWN')
@@ -266,10 +266,10 @@ async def _translate(self, ctx: Context) -> Context:
             await self._report_progress('skip-no-text', True)
             # If no text was found result is intermediate image product
             ctx.result = ctx.upscaled
-            return await self._revert_upscale(ctx)
+            return await self._revert_upscale(config, ctx)
 
         # Apply pre-dictionary after OCR
-        pre_dict = self.load_dictionary(ctx.pre_dict)  
+        pre_dict = self.load_dictionary(config.pre_dict)
         pre_replacements = []  
         for textline in ctx.textlines:  
             original = textline.text  
@@ -286,7 +286,7 @@ async def _translate(self, ctx: Context) -> Context:
         
         # -- Textline merge
         await self._report_progress('textline_merge')
-        ctx.text_regions = await self._run_textline_merge(ctx)
+        ctx.text_regions = await self._run_textline_merge(config, ctx)
 
         if self.verbose:
             bboxes = visualize_textblocks(cv2.cvtColor(ctx.img_rgb, cv2.COLOR_BGR2RGB), ctx.text_regions)
@@ -294,34 +294,34 @@ async def _translate(self, ctx: Context) -> Context:
 
         # -- Translation
         await self._report_progress('translating')
-        ctx.text_regions = await self._run_text_translation(ctx)
+        ctx.text_regions = await self._run_text_translation(config, ctx)
         await self._report_progress('after-translating')
 
 
         if not ctx.text_regions:
             await self._report_progress('error-translating', True)
             ctx.result = ctx.upscaled
-            return await self._revert_upscale(ctx)
+            return await self._revert_upscale(config, ctx)
         elif ctx.text_regions == 'cancel':
             await self._report_progress('cancelled', True)
             ctx.result = ctx.upscaled
-            return await self._revert_upscale(ctx)
+            return await self._revert_upscale(config, ctx)
 
         # -- Mask refinement
         # (Delayed to take advantage of the region filtering done after ocr and translation)
         if ctx.mask is None:
             await self._report_progress('mask-generation')
-            ctx.mask = await self._run_mask_refinement(ctx)
+            ctx.mask = await self._run_mask_refinement(config, ctx)
 
         if self.verbose:
-            inpaint_input_img = await dispatch_inpainting('none', ctx.img_rgb, ctx.mask, ctx.inpainting_size,
+            inpaint_input_img = await dispatch_inpainting('none', ctx.img_rgb, ctx.mask, config.inpainter.inpainting_size,
                                                           self.using_gpu, self.verbose)
             cv2.imwrite(self._result_path('inpaint_input.png'), cv2.cvtColor(inpaint_input_img, cv2.COLOR_RGB2BGR))
             cv2.imwrite(self._result_path('mask_final.png'), ctx.mask)
 
         # -- Inpainting
         await self._report_progress('inpainting')
-        ctx.img_inpainted = await self._run_inpainting(ctx)
+        ctx.img_inpainted = await self._run_inpainting(config, ctx)
 
         ctx.gimp_mask = np.dstack((cv2.cvtColor(ctx.img_inpainted, cv2.COLOR_RGB2BGR), ctx.mask))
 
@@ -330,37 +330,37 @@ async def _translate(self, ctx: Context) -> Context:
 
         # -- Rendering
         await self._report_progress('rendering')
-        ctx.img_rendered = await self._run_text_rendering(ctx)
+        ctx.img_rendered = await self._run_text_rendering(config, ctx)
 
         await self._report_progress('finished', True)
         ctx.result = dump_image(ctx.input, ctx.img_rendered, ctx.img_alpha)
 
-        return await self._revert_upscale(ctx)
+        return await self._revert_upscale(config, ctx)
     
     # If `revert_upscaling` is True, revert to input size
     # Else leave `ctx` as-is
-    async def _revert_upscale(self, ctx: Context):
-        if ctx.revert_upscaling:
+    async def _revert_upscale(self, config: Config, ctx: Context):
+        if config.upscale.revert_upscaling:
             await self._report_progress('downscaling')
             ctx.result = ctx.result.resize(ctx.input.size)
 
         return ctx
 
-    async def _run_colorizer(self, ctx: Context):
-        return await dispatch_colorization(ctx.colorizer, device=self.device, image=ctx.input, **ctx)
+    async def _run_colorizer(self, config: Config, ctx: Context):
+        return await dispatch_colorization(config.colorizer.colorizer, device=self.device, image=ctx.input, **ctx)
 
-    async def _run_upscaling(self, ctx: Context):
-        return (await dispatch_upscaling(ctx.upscaler, [ctx.img_colorized], ctx.upscale_ratio, self.device))[0]
+    async def _run_upscaling(self, config: Config, ctx: Context):
+        return (await dispatch_upscaling(config.upscale.upscaler, [ctx.img_colorized], config.upscale.upscale_ratio, self.device))[0]
 
-    async def _run_detection(self, ctx: Context):
-        return await dispatch_detection(ctx.detector, ctx.img_rgb, ctx.detection_size, ctx.text_threshold,
-                                        ctx.box_threshold,
-                                        ctx.unclip_ratio, ctx.det_invert, ctx.det_gamma_correct, ctx.det_rotate,
-                                        ctx.det_auto_rotate,
+    async def _run_detection(self, config: Config, ctx: Context):
+        return await dispatch_detection(config.detector.detector, ctx.img_rgb, config.detector.detection_size, config.detector.text_threshold,
+                                        config.detector.box_threshold,
+                                        config.detector.unclip_ratio, config.detector.det_invert, config.detector.det_gamma_correct, config.detector.det_rotate,
+                                        config.detector.det_auto_rotate,
                                         self.device, self.verbose)
 
-    async def _run_ocr(self, ctx: Context):
-        textlines = await dispatch_ocr(ctx.ocr, ctx.img_rgb, ctx.textlines, ctx, self.device, self.verbose)
+    async def _run_ocr(self, config: Config, ctx: Context):
+        textlines = await dispatch_ocr(config.ocr.ocr, ctx.img_rgb, ctx.textlines, ctx, self.device, self.verbose)
 
         new_textlines = []
         for textline in textlines:
@@ -372,21 +372,21 @@ async def _run_ocr(self, ctx: Context):
                 new_textlines.append(textline)
         return new_textlines
 
-    async def _run_textline_merge(self, ctx: Context):
+    async def _run_textline_merge(self, config: Config, ctx: Context):
         text_regions = await dispatch_textline_merge(ctx.textlines, ctx.img_rgb.shape[1], ctx.img_rgb.shape[0],
                                                      verbose=self.verbose)
         new_text_regions = []
         for region in text_regions:
-            if len(region.text) >= ctx.min_text_length \
+            if len(region.text) >= config.ocr.min_text_length \
                     and not is_valuable_text(region.text) \
-                    or (not ctx.no_text_lang_skip and langcodes.tag_distance(region.source_lang, ctx.target_lang) == 0):
+                    or (not config.translator.no_text_lang_skip and langcodes.tag_distance(region.source_lang, config.translator.target_lang) == 0):
                 if region.text.strip():
                     logger.info(f'Filtered out: {region.text}')
-                    if len(region.text) < ctx.min_text_length:
+                    if len(region.text) < config.ocr.min_text_length:
                         logger.info('Reason: Text length is less than the minimum required length.')
                     elif not is_valuable_text(region.text):
                         logger.info('Reason: Text is not considered valuable.')
-                    elif langcodes.tag_distance(region.source_lang, ctx.target_lang) == 0:
+                    elif langcodes.tag_distance(region.source_lang, config.translator.target_lang) == 0:
                         logger.info('Reason: Text language matches the target language and no_text_lang_skip is False.')
             else:
                 if ctx.font_color_fg or ctx.font_color_bg:
@@ -398,28 +398,28 @@ async def _run_textline_merge(self, ctx: Context):
 
         # Sort ctd (comic text detector) regions left to right. Otherwise right to left.
         # Sorting will improve text translation quality.
-        text_regions = sort_regions(text_regions, right_to_left=True if ctx.detector != 'ctd' else False)
+        text_regions = sort_regions(text_regions, right_to_left=True if config.detector.detector != Detector.ctd else False)
         return text_regions
 
-    async def _run_text_translation(self, ctx: Context):
+    async def _run_text_translation(self, config: Config, ctx: Context):
         translated_sentences = \
-            await dispatch_translation(ctx.translator,
+            await dispatch_translation(config.translator.translator,
                                        [region.text for region in ctx.text_regions],
-                                       ctx.use_mtpe,
+                                       self.use_mtpe,
                                        ctx, 'cpu' if self._gpu_limited_memory else self.device)
 
         for region, translation in zip(ctx.text_regions, translated_sentences):
-            if ctx.uppercase:
+            if config.render.uppercase:
                 translation = translation.upper()
-            elif ctx.lowercase:
+            elif config.render.lowercase:
                 translation = translation.upper()
             region.translation = translation
-            region.target_lang = ctx.target_lang
-            region._alignment = ctx.alignment
-            region._direction = ctx.direction
+            region.target_lang = config.translator.target_lang
+            region._alignment = config.render.alignment
+            region._direction = config.render.direction
 
         # Apply post dictionary after translating
-        post_dict = self.load_dictionary(ctx.post_dict)  
+        post_dict = self.load_dictionary(config.post_dict)
         post_replacements = []  
         for region in ctx.text_regions:  
             original = region.translation  
@@ -441,7 +441,7 @@ async def _run_text_translation(self, ctx: Context):
         special_langs = ['CHS', 'CHT', 'JPN', 'KOR', 'IND', 'UKR', 'RUS', 'THA', 'ARA']  
 
         # Process special language scenarios  
-        if ctx.target_lang in special_langs:  
+        if config.translator.target_lang in special_langs:
             # Categorize regions  
             same_target_regions = []    # Target language regions with identical translation  
             diff_target_regions = []    # Target language regions with different translation  
@@ -453,21 +453,21 @@ async def _run_text_translation(self, ctx: Context):
                 has_target_lang = False  
 
                 # Target language detection  
-                if ctx.target_lang in ['CHS', 'CHT']:  # Chinese  
+                if config.translator.target_lang in ['CHS', 'CHT']:  # Chinese
                     has_target_lang = bool(re.search('[\u4e00-\u9fff]', region.text))  
-                elif ctx.target_lang == 'JPN':  # Japanese  
+                elif config.translator.target_lang == 'JPN':  # Japanese
                     has_target_lang = bool(re.search('[\u3040-\u309f\u30a0-\u30ff\u4e00-\u9fff]', region.text))  
-                elif ctx.target_lang == 'KOR':  # Korean  
+                elif config.translator.target_lang == 'KOR':  # Korean
                     has_target_lang = bool(re.search('[\uac00-\ud7af\u1100-\u11ff]', region.text))  
-                elif ctx.target_lang == 'ARA':  # Arabic  
+                elif config.translator.target_lang == 'ARA':  # Arabic
                     has_target_lang = bool(re.search('[\u0600-\u06ff]', region.text))  
-                elif ctx.target_lang == 'THA':  # Thai  
+                elif config.translator.target_lang == 'THA':  # Thai
                     has_target_lang = bool(re.search('[\u0e00-\u0e7f]', region.text))  
-                elif ctx.target_lang == 'RUS':  # Russian  
+                elif config.translator.target_lang == 'RUS':  # Russian
                     has_target_lang = bool(re.search('[\u0400-\u04ff]', region.text))  
-                elif ctx.target_lang == 'UKR':  # Ukrainian  
+                elif config.translator.target_lang == 'UKR':  # Ukrainian
                     has_target_lang = bool(re.search('[\u0400-\u04ff]', region.text))  
-                elif ctx.target_lang == 'IND':  # Indonesian  
+                elif config.translator.target_lang == 'IND':  # Indonesian
                     has_target_lang = bool(re.search('[A-Za-z]', region.text))
                 
                 # Skip numeric translations and filtered text  
@@ -476,9 +476,9 @@ async def _run_text_translation(self, ctx: Context):
                     logger.info('Reason: Numeric translation')  
                     continue  
                 
-                if ctx.filter_text and re.search(ctx.filter_text, region.translation):  
+                if config.filter_text and re.search(config.filter_text, region.translation):
                     logger.info(f'Filtered out: {region.translation}')  
-                    logger.info(f'Reason: Matched filter text: {ctx.filter_text}')  
+                    logger.info(f'Reason: Matched filter text: {config.filter_text}')
                     continue  
                 
                 if has_target_lang:  
@@ -510,14 +510,14 @@ async def _run_text_translation(self, ctx: Context):
                 should_filter = False  
                 filter_reason = ""  
                 
-                if not ctx.translator == 'none':  
+                if not config.translator.translator == Translator.none:
                     if region.translation.isnumeric():  
                         should_filter = True  
                         filter_reason = "Numeric translation"  
-                    elif ctx.filter_text and re.search(ctx.filter_text, region.translation):  
+                    elif config.filter_text and re.search(config.filter_text, region.translation):
                         should_filter = True  
-                        filter_reason = f"Matched filter text: {ctx.filter_text}"  
-                    elif not ctx.translator == 'original':  
+                        filter_reason = f"Matched filter text: {config.filter_text}"
+                    elif not config.translator.translator == Translator.original:
                         text_equal = region.text.lower().strip() == region.translation.lower().strip()  
                         if text_equal:  
                             should_filter = True  
@@ -533,25 +533,25 @@ async def _run_text_translation(self, ctx: Context):
         return new_text_regions 
                
 
-    async def _run_mask_refinement(self, ctx: Context):
+    async def _run_mask_refinement(self, config: Config, ctx: Context):
         return await dispatch_mask_refinement(ctx.text_regions, ctx.img_rgb, ctx.mask_raw, 'fit_text',
-                                              ctx.mask_dilation_offset, ctx.ignore_bubble, self.verbose,self.kernel_size)
+                                              config.mask_dilation_offset, config.detector.ignore_bubble, self.verbose,self.kernel_size)
 
-    async def _run_inpainting(self, ctx: Context):
-        return await dispatch_inpainting(ctx.inpainter, ctx.img_rgb, ctx.mask, ctx.inpainting_size, self.device,
+    async def _run_inpainting(self, config: Config,ctx: Context):
+        return await dispatch_inpainting(config.inpainter.inpainter, ctx.img_rgb, ctx.mask, config.inpainter.inpainting_size, self.device,
                                          self.verbose)
 
-    async def _run_text_rendering(self, ctx: Context):
-        if ctx.renderer == 'none':
+    async def _run_text_rendering(self, config: Config, ctx: Context):
+        if config.render.renderer == Renderer.none:
             output = ctx.img_inpainted
         # manga2eng currently only supports horizontal left to right rendering
-        elif ctx.renderer == 'manga2eng' and ctx.text_regions and LANGUAGE_ORIENTATION_PRESETS.get(
+        elif config.render.renderer == Renderer.manga2Eng and ctx.text_regions and LANGUAGE_ORIENTATION_PRESETS.get(
                 ctx.text_regions[0].target_lang) == 'h':
-            output = await dispatch_eng_render(ctx.img_inpainted, ctx.img_rgb, ctx.text_regions, ctx.font_path, ctx.line_spacing)
+            output = await dispatch_eng_render(ctx.img_inpainted, ctx.img_rgb, ctx.text_regions, self.font_path, config.render.line_spacing)
         else:
-            output = await dispatch_rendering(ctx.img_inpainted, ctx.text_regions, ctx.font_path, ctx.font_size,
-                                              ctx.font_size_offset,
-                                              ctx.font_size_minimum, not ctx.no_hyphenation, ctx.render_mask, ctx.line_spacing)
+            output = await dispatch_rendering(ctx.img_inpainted, ctx.text_regions, self.font_path, config.render.font_size,
+                                              config.render.font_size_offset,
+                                              config.render.font_size_minimum, not config.render.no_hyphenation, config.render.render_mask, config.render.line_spacing)
         return output
 
     def _result_path(self, path: str) -> str:

From 341b5454b3d4585b70594fb21c8c6f65551d5a3f Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sun, 17 Nov 2024 01:35:52 +0100
Subject: [PATCH 08/75] more ctx fixes

---
 manga_translator/__main__.py              |  15 ++-
 manga_translator/colorization/__init__.py |   6 +-
 manga_translator/detection/__init__.py    |   6 +-
 manga_translator/inpainting/__init__.py   |   6 +-
 manga_translator/manga_translator.py      | 138 +++++++++++-----------
 manga_translator/mode/api.py              |   6 +-
 manga_translator/mode/local.py            |   3 +-
 manga_translator/ocr/__init__.py          |   6 +-
 manga_translator/upscaling/__init__.py    |   6 +-
 9 files changed, 96 insertions(+), 96 deletions(-)

diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py
index 8d06b9fc6..104d41fb4 100644
--- a/manga_translator/__main__.py
+++ b/manga_translator/__main__.py
@@ -4,8 +4,7 @@
 from argparse import Namespace
 
 from .manga_translator import (
-    MangaTranslator,
-    set_main_logger,
+    set_main_logger, load_dictionary, apply_dictionary,
 )
 from .args import parser
 from .utils import (
@@ -30,8 +29,8 @@ async def dispatch(args: Namespace):
         translator = MangaTranslatorLocal(args_dict)
 
         # Load pre-translation and post-translation dictionaries
-        pre_dict = translator.load_dictionary(args.pre_dict)  
-        post_dict = translator.load_dictionary(args.post_dict)  
+        pre_dict = load_dictionary(args.pre_dict)
+        post_dict = load_dictionary(args.post_dict)
 
         if len(args.input) == 1 and os.path.isfile(args.input[0]):
             dest = os.path.join(BASE_PATH, 'result/final.png')
@@ -40,12 +39,12 @@ async def dispatch(args: Namespace):
             # Apply pre-translation dictionaries
             await translator.translate_path(args.input[0], dest, args_dict)
             for textline in translator.textlines:
-                textline.text = translator.apply_dictionary(textline.text, pre_dict)  
+                textline.text = apply_dictionary(textline.text, pre_dict)
                 logger.info(f'Pre-translation dictionary applied: {textline.text}')
 
             # Apply post-translation dictionaries
             for textline in translator.textlines:
-                textline.translation = translator.apply_dictionary(textline.translation, post_dict)  
+                textline.translation = apply_dictionary(textline.translation, post_dict)
                 logger.info(f'Post-translation dictionary applied: {textline.translation}')
 
         else: # batch
@@ -54,12 +53,12 @@ async def dispatch(args: Namespace):
                 # Apply pre-translation dictionaries
                 await translator.translate_path(path, dest, args_dict)
                 for textline in translator.textlines:
-                    textline.text = translator.apply_dictionary(textline.text, pre_dict) 
+                    textline.text = apply_dictionary(textline.text, pre_dict)
                     logger.info(f'Pre-translation dictionary applied: {textline.text}')
 
                 # Apply post-translation dictionaries
                 for textline in translator.textlines:
-                    textline.translation = translator.apply_dictionary(textline.translation, post_dict)  
+                    textline.translation = apply_dictionary(textline.translation, post_dict)
                     logger.info(f'Post-translation dictionary applied: {textline.translation}')
 
     elif args.mode == 'web':
diff --git a/manga_translator/colorization/__init__.py b/manga_translator/colorization/__init__.py
index d7423286c..61a0c0997 100644
--- a/manga_translator/colorization/__init__.py
+++ b/manga_translator/colorization/__init__.py
@@ -9,7 +9,7 @@
 }
 colorizer_cache = {}
 
-def get_colorizer(key: str, *args, **kwargs) -> CommonColorizer:
+def get_colorizer(key: Colorizer, *args, **kwargs) -> CommonColorizer:
     if key not in COLORIZERS:
         raise ValueError(f'Could not find colorizer for: "{key}". Choose from the following: %s' % ','.join(COLORIZERS))
     if not colorizer_cache.get(key):
@@ -17,12 +17,12 @@ def get_colorizer(key: str, *args, **kwargs) -> CommonColorizer:
         colorizer_cache[key] = upscaler(*args, **kwargs)
     return colorizer_cache[key]
 
-async def prepare(key: str):
+async def prepare(key: Colorizer):
     upscaler = get_colorizer(key)
     if isinstance(upscaler, OfflineColorizer):
         await upscaler.download()
 
-async def dispatch(key: str, device: str = 'cpu', **kwargs) -> Image.Image:
+async def dispatch(key: Colorizer, device: str = 'cpu', **kwargs) -> Image.Image:
     colorizer = get_colorizer(key)
     if isinstance(colorizer, OfflineColorizer):
         await colorizer.load(device)
diff --git a/manga_translator/detection/__init__.py b/manga_translator/detection/__init__.py
index 2f8bd24e4..c2655a83f 100644
--- a/manga_translator/detection/__init__.py
+++ b/manga_translator/detection/__init__.py
@@ -17,7 +17,7 @@
 }
 detector_cache = {}
 
-def get_detector(key: str, *args, **kwargs) -> CommonDetector:
+def get_detector(key: Detector, *args, **kwargs) -> CommonDetector:
     if key not in DETECTORS:
         raise ValueError(f'Could not find detector for: "{key}". Choose from the following: %s' % ','.join(DETECTORS))
     if not detector_cache.get(key):
@@ -25,12 +25,12 @@ def get_detector(key: str, *args, **kwargs) -> CommonDetector:
         detector_cache[key] = detector(*args, **kwargs)
     return detector_cache[key]
 
-async def prepare(detector_key: str):
+async def prepare(detector_key: Detector):
     detector = get_detector(detector_key)
     if isinstance(detector, OfflineDetector):
         await detector.download()
 
-async def dispatch(detector_key: str, image: np.ndarray, detect_size: int, text_threshold: float, box_threshold: float, unclip_ratio: float,
+async def dispatch(detector_key: Detector, image: np.ndarray, detect_size: int, text_threshold: float, box_threshold: float, unclip_ratio: float,
                    invert: bool, gamma_correct: bool, rotate: bool, auto_rotate: bool = False, device: str = 'cpu', verbose: bool = False):
     detector = get_detector(detector_key)
     if isinstance(detector, OfflineDetector):
diff --git a/manga_translator/inpainting/__init__.py b/manga_translator/inpainting/__init__.py
index 16f242126..c4ca1ca13 100644
--- a/manga_translator/inpainting/__init__.py
+++ b/manga_translator/inpainting/__init__.py
@@ -18,7 +18,7 @@
 }
 inpainter_cache = {}
 
-def get_inpainter(key: str, *args, **kwargs) -> CommonInpainter:
+def get_inpainter(key: Inpainter, *args, **kwargs) -> CommonInpainter:
     if key not in INPAINTERS:
         raise ValueError(f'Could not find inpainter for: "{key}". Choose from the following: %s' % ','.join(INPAINTERS))
     if not inpainter_cache.get(key):
@@ -26,13 +26,13 @@ def get_inpainter(key: str, *args, **kwargs) -> CommonInpainter:
         inpainter_cache[key] = inpainter(*args, **kwargs)
     return inpainter_cache[key]
 
-async def prepare(inpainter_key: str, device: str = 'cpu'):
+async def prepare(inpainter_key: Inpainter, device: str = 'cpu'):
     inpainter = get_inpainter(inpainter_key)
     if isinstance(inpainter, OfflineInpainter):
         await inpainter.download()
         await inpainter.load(device)
 
-async def dispatch(inpainter_key: str, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, device: str = 'cpu', verbose: bool = False) -> np.ndarray:
+async def dispatch(inpainter_key: Inpainter, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, device: str = 'cpu', verbose: bool = False) -> np.ndarray:
     inpainter = get_inpainter(inpainter_key)
     if isinstance(inpainter, OfflineInpainter):
         await inpainter.load(device)
diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py
index 0f85e7a4d..682216d74 100644
--- a/manga_translator/manga_translator.py
+++ b/manga_translator/manga_translator.py
@@ -11,7 +11,7 @@
 from typing import Optional, Any
 
 from .args import DEFAULT_ARGS
-from .config import Config, Colorizer, Detector, Translator, Renderer
+from .config import Config, Colorizer, Detector, Translator, Renderer, Inpainter
 from .utils import (
     BASE_PATH,
     LANGUAGE_ORIENTATION_PRESETS,
@@ -26,7 +26,7 @@
 )
 
 from .detection import dispatch as dispatch_detection, prepare as prepare_detection
-from .upscaling import dispatch as dispatch_upscaling, prepare as prepare_upscaling, UPSCALERS
+from .upscaling import dispatch as dispatch_upscaling, prepare as prepare_upscaling
 from .ocr import dispatch as dispatch_ocr, prepare as prepare_ocr
 from .textline_merge import dispatch as dispatch_textline_merge
 from .mask_refinement import dispatch as dispatch_mask_refinement
@@ -57,6 +57,67 @@ class TranslationInterrupt(Exception):
     pass
 
 
+def load_dictionary(file_path):
+    dictionary = []
+    if file_path and os.path.exists(file_path):
+        with open(file_path, 'r', encoding='utf-8') as file:
+            for line_number, line in enumerate(file, start=1):
+                # Ignore empty lines and lines starting with '#' or '//'
+                if not line.strip() or line.strip().startswith('#') or line.strip().startswith('//'):
+                    continue
+                # Remove comment parts
+                line = line.split('#')[0].strip()
+                line = line.split('//')[0].strip()
+                parts = line.split()
+                if len(parts) == 1:
+                    # If there is only the left part, the right part defaults to an empty string, meaning delete the left part
+                    pattern = re.compile(parts[0])
+                    dictionary.append((pattern, ''))
+                elif len(parts) == 2:
+                    # If both left and right parts are present, perform the replacement
+                    pattern = re.compile(parts[0])
+                    dictionary.append((pattern, parts[1]))
+                else:
+                    logger.error(f'Invalid dictionary entry at line {line_number}: {line.strip()}')
+    return dictionary
+
+
+def _preprocess_params(ctx: Context):
+    # todo: fix
+    # params auto completion
+    # TODO: Move args into ctx.args and only calculate once, or just copy into ctx
+    for arg in DEFAULT_ARGS:
+        ctx.setdefault(arg, DEFAULT_ARGS[arg])
+
+    if ctx.selective_translation is not None:
+        ctx.selective_translation.target_lang = ctx.target_lang
+        ctx.translator = ctx.selective_translation
+    elif ctx.translator_chain is not None:
+        ctx.target_lang = ctx.translator_chain.langs[-1]
+        ctx.translator = ctx.translator_chain
+    else:
+        ctx.translator = TranslatorChain(f'{ctx.translator}:{ctx.target_lang}')
+    if ctx.gpt_config:
+        ctx.gpt_config = OmegaConf.load(ctx.gpt_config)
+
+    if ctx.filter_text:
+        ctx.filter_text = re.compile(ctx.filter_text)
+
+    if ctx.font_color:
+        colors = ctx.font_color.split(':')
+        try:
+            ctx.font_color_fg = hex2rgb(colors[0])
+            ctx.font_color_bg = hex2rgb(colors[1]) if len(colors) > 1 else None
+        except:
+            raise Exception(f'Invalid --font-color value: {ctx.font_color}. Use a hex value such as FF0000')
+
+
+def apply_dictionary(text, dictionary):
+    for pattern, value in dictionary:
+        text = pattern.sub(value, text)
+    return text
+
+
 class MangaTranslator:
     verbose: bool
     ignore_errors: bool
@@ -134,80 +195,17 @@ async def translate(self, image: Image.Image, config: Config) -> Context:
         # preload and download models (not strictly necessary, remove to lazy load)
         logger.info('Loading models')
         if config.upscale.upscale_ratio:
-            # todo: fix
             await prepare_upscaling(config.upscale.upscaler)
-        # todo: fix
         await prepare_detection(config.detector.detector)
-        # todo: fix
         await prepare_ocr(config.ocr.ocr, self.device)
-        # todo: fix
         await prepare_inpainting(config.inpainter.inpainter, self.device)
         # todo: fix
         await prepare_translation(config.translator.translator)
         if config.colorizer.colorizer != Colorizer.none:
-            #todo: fix
             await prepare_colorization(config.colorizer.colorizer)
         # translate
         return await self._translate(config, ctx)
 
-    def load_dictionary(self, file_path):
-        dictionary = []
-        if file_path and os.path.exists(file_path):
-            with open(file_path, 'r', encoding='utf-8') as file:
-                for line_number, line in enumerate(file, start=1):
-                    # Ignore empty lines and lines starting with '#' or '//'
-                    if not line.strip() or line.strip().startswith('#') or line.strip().startswith('//'):
-                        continue
-                    # Remove comment parts
-                    line = line.split('#')[0].strip()
-                    line = line.split('//')[0].strip()
-                    parts = line.split()
-                    if len(parts) == 1:
-                        # If there is only the left part, the right part defaults to an empty string, meaning delete the left part
-                        pattern = re.compile(parts[0])
-                        dictionary.append((pattern, ''))
-                    elif len(parts) == 2:
-                        # If both left and right parts are present, perform the replacement
-                        pattern = re.compile(parts[0])
-                        dictionary.append((pattern, parts[1]))
-                    else:
-                        logger.error(f'Invalid dictionary entry at line {line_number}: {line.strip()}')
-        return dictionary
-
-    def apply_dictionary(self, text, dictionary):
-        for pattern, value in dictionary:
-            text = pattern.sub(value, text)
-        return text
-
-    def _preprocess_params(self, ctx: Context):
-        # todo: fix
-        # params auto completion
-        # TODO: Move args into ctx.args and only calculate once, or just copy into ctx
-        for arg in DEFAULT_ARGS:
-            ctx.setdefault(arg, DEFAULT_ARGS[arg])
-
-        if ctx.selective_translation is not None:
-            ctx.selective_translation.target_lang = ctx.target_lang
-            ctx.translator = ctx.selective_translation
-        elif ctx.translator_chain is not None:
-            ctx.target_lang = ctx.translator_chain.langs[-1]
-            ctx.translator = ctx.translator_chain
-        else:
-            ctx.translator = TranslatorChain(f'{ctx.translator}:{ctx.target_lang}')
-        if ctx.gpt_config:
-            ctx.gpt_config = OmegaConf.load(ctx.gpt_config)
-
-        if ctx.filter_text:
-            ctx.filter_text = re.compile(ctx.filter_text)
-
-        if ctx.font_color:
-            colors = ctx.font_color.split(':')
-            try:
-                ctx.font_color_fg = hex2rgb(colors[0])
-                ctx.font_color_bg = hex2rgb(colors[1]) if len(colors) > 1 else None
-            except:
-                raise Exception(f'Invalid --font-color value: {ctx.font_color}. Use a hex value such as FF0000')
-
     async def _translate(self, config: Config, ctx: Context) -> Context:
 
         # -- Colorization
@@ -269,11 +267,11 @@ async def _translate(self, config: Config, ctx: Context) -> Context:
             return await self._revert_upscale(config, ctx)
 
         # Apply pre-dictionary after OCR
-        pre_dict = self.load_dictionary(config.pre_dict)
+        pre_dict = load_dictionary(config.pre_dict)
         pre_replacements = []  
         for textline in ctx.textlines:  
             original = textline.text  
-            textline.text = self.apply_dictionary(textline.text, pre_dict)  
+            textline.text = apply_dictionary(textline.text, pre_dict)
             if original != textline.text:  
                 pre_replacements.append(f"{original} => {textline.text}")  
 
@@ -314,7 +312,7 @@ async def _translate(self, config: Config, ctx: Context) -> Context:
             ctx.mask = await self._run_mask_refinement(config, ctx)
 
         if self.verbose:
-            inpaint_input_img = await dispatch_inpainting('none', ctx.img_rgb, ctx.mask, config.inpainter.inpainting_size,
+            inpaint_input_img = await dispatch_inpainting(Inpainter.none, ctx.img_rgb, ctx.mask, config.inpainter.inpainting_size,
                                                           self.using_gpu, self.verbose)
             cv2.imwrite(self._result_path('inpaint_input.png'), cv2.cvtColor(inpaint_input_img, cv2.COLOR_RGB2BGR))
             cv2.imwrite(self._result_path('mask_final.png'), ctx.mask)
@@ -419,11 +417,11 @@ async def _run_text_translation(self, config: Config, ctx: Context):
             region._direction = config.render.direction
 
         # Apply post dictionary after translating
-        post_dict = self.load_dictionary(config.post_dict)
+        post_dict = load_dictionary(config.post_dict)
         post_replacements = []  
         for region in ctx.text_regions:  
             original = region.translation  
-            region.translation = self.apply_dictionary(region.translation, post_dict)  
+            region.translation = apply_dictionary(region.translation, post_dict)
             if original != region.translation:  
                 post_replacements.append(f"{original} => {region.translation}")  
 
diff --git a/manga_translator/mode/api.py b/manga_translator/mode/api.py
index bfe712567..fc71d32e3 100644
--- a/manga_translator/mode/api.py
+++ b/manga_translator/mode/api.py
@@ -10,12 +10,14 @@
 from aiohttp.web_middlewares import middleware
 from marshmallow import fields, Schema, ValidationError
 
-from manga_translator import MangaTranslator, Context, UPSCALERS, TranslationInterrupt, logger
+from manga_translator import MangaTranslator, Context, TranslationInterrupt, logger
 from manga_translator.args import translator_chain
 from manga_translator.detection import DETECTORS
 from manga_translator.inpainting import INPAINTERS
+from manga_translator.manga_translator import _preprocess_params
 from manga_translator.ocr import OCRS
 from manga_translator.translators import VALID_LANGUAGES, TRANSLATORS
+from manga_translator.upscaling import UPSCALERS
 
 
 class MangaTranslatorAPI(MangaTranslator):
@@ -158,7 +160,7 @@ async def err_handling(self, func, req, format, ri=False):
                 if 'selective_translation' in data:
                     data['selective_translation'] = translator_chain(data['selective_translation'])
                 ctx = Context(**dict(self.params, **data))
-                self._preprocess_params(ctx)
+                _preprocess_params(ctx)
                 if data.get('image') is None and data.get('base64Images') is None and data.get('url') is None:
                     return web.json_response({'error': "Missing input", 'status': 422})
                 fil = await self.get_file(data.get('image'), data.get('base64Images'), data.get('url'))
diff --git a/manga_translator/mode/local.py b/manga_translator/mode/local.py
index 05e87b41b..67e67ebb5 100644
--- a/manga_translator/mode/local.py
+++ b/manga_translator/mode/local.py
@@ -4,6 +4,7 @@
 from PIL import Image
 
 from manga_translator import MangaTranslator, logger, Context, TranslationInterrupt
+from ..manga_translator import _preprocess_params
 from ..save import save_result
 from ..translators import (
     LanguageUnsupportedException,
@@ -90,7 +91,7 @@ async def translate_file(self, path: str, dest: str, params: dict):
         # Turn dict to context to make values also accessible through params.<property>
         params = params or {}
         ctx = Context(**params)
-        self._preprocess_params(ctx)
+        _preprocess_params(ctx)
 
         attempts = 0
         while ctx.attempts == -1 or attempts < ctx.attempts + 1:
diff --git a/manga_translator/ocr/__init__.py b/manga_translator/ocr/__init__.py
index 75390d764..14d580f69 100644
--- a/manga_translator/ocr/__init__.py
+++ b/manga_translator/ocr/__init__.py
@@ -17,7 +17,7 @@
 }
 ocr_cache = {}
 
-def get_ocr(key: str, *args, **kwargs) -> CommonOCR:
+def get_ocr(key: Ocr, *args, **kwargs) -> CommonOCR:
     if key not in OCRS:
         raise ValueError(f'Could not find OCR for: "{key}". Choose from the following: %s' % ','.join(OCRS))
     if not ocr_cache.get(key):
@@ -25,13 +25,13 @@ def get_ocr(key: str, *args, **kwargs) -> CommonOCR:
         ocr_cache[key] = ocr(*args, **kwargs)
     return ocr_cache[key]
 
-async def prepare(ocr_key: str, device: str = 'cpu'):
+async def prepare(ocr_key: Ocr, device: str = 'cpu'):
     ocr = get_ocr(ocr_key)
     if isinstance(ocr, OfflineOCR):
         await ocr.download()
         await ocr.load(device)
 
-async def dispatch(ocr_key: str, image: np.ndarray, regions: List[Quadrilateral], args = None, device: str = 'cpu', verbose: bool = False) -> List[Quadrilateral]:
+async def dispatch(ocr_key: Ocr, image: np.ndarray, regions: List[Quadrilateral], args = None, device: str = 'cpu', verbose: bool = False) -> List[Quadrilateral]:
     ocr = get_ocr(ocr_key)
     if isinstance(ocr, OfflineOCR):
         await ocr.load(device)
diff --git a/manga_translator/upscaling/__init__.py b/manga_translator/upscaling/__init__.py
index 768527890..df88d93e2 100644
--- a/manga_translator/upscaling/__init__.py
+++ b/manga_translator/upscaling/__init__.py
@@ -14,7 +14,7 @@
 }
 upscaler_cache = {}
 
-def get_upscaler(key: str, *args, **kwargs) -> CommonUpscaler:
+def get_upscaler(key: Upscaler, *args, **kwargs) -> CommonUpscaler:
     if key not in UPSCALERS:
         raise ValueError(f'Could not find upscaler for: "{key}". Choose from the following: %s' % ','.join(UPSCALERS))
     if not upscaler_cache.get(key):
@@ -22,12 +22,12 @@ def get_upscaler(key: str, *args, **kwargs) -> CommonUpscaler:
         upscaler_cache[key] = upscaler(*args, **kwargs)
     return upscaler_cache[key]
 
-async def prepare(upscaler_key: str):
+async def prepare(upscaler_key: Upscaler):
     upscaler = get_upscaler(upscaler_key)
     if isinstance(upscaler, OfflineUpscaler):
         await upscaler.download()
 
-async def dispatch(upscaler_key: str, image_batch: List[Image.Image], upscale_ratio: int, device: str = 'cpu') -> List[Image.Image]:
+async def dispatch(upscaler_key: Upscaler, image_batch: List[Image.Image], upscale_ratio: int, device: str = 'cpu') -> List[Image.Image]:
     if upscale_ratio == 1:
         return image_batch
     upscaler = get_upscaler(upscaler_key)

From 14bc8ca1fe0cc66031ef3839f7e5b59e25ad8de9 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sun, 17 Nov 2024 11:42:52 +0100
Subject: [PATCH 09/75] move preprocessing into config

---
 manga_translator/config.py               | 57 ++++++++++++++++++++++++
 manga_translator/manga_translator.py     | 53 ++++++----------------
 manga_translator/translators/__init__.py |  2 +-
 3 files changed, 71 insertions(+), 41 deletions(-)

diff --git a/manga_translator/config.py b/manga_translator/config.py
index ee78ddefa..863268522 100644
--- a/manga_translator/config.py
+++ b/manga_translator/config.py
@@ -1,8 +1,14 @@
+import re
 from enum import Enum, IntEnum
 
+from omegaconf import OmegaConf
 from pydantic import BaseModel
 from typing import Optional
 
+from manga_translator import TranslatorChain, hex2rgb
+from manga_translator.args import translator_chain
+
+
 class Renderer(IntEnum):
     default = 0
     manga2Eng = 1
@@ -106,6 +112,29 @@ class RenderConfig(BaseModel):
     font_size: Optional[int] = None
     """Use fixed font size for rendering"""
 
+    @property
+    def font_color_fg(self):
+        if self.font_color and not self._font_color_fg:
+            colors = self.font_color.split(':')
+            try:
+                self._font_color_fg = hex2rgb(colors[0])
+                self._font_color_fg = hex2rgb(colors[1]) if len(colors) > 1 else None
+            except:
+                raise Exception(
+                    f'Invalid --font-color value: {self.font_color}. Use a hex value such as FF0000')
+        return self._font_color_fg
+
+    @property
+    def font_color_bg(self):
+        if self.font_color and not self._font_color_bg:
+            colors = self.font_color.split(':')
+            try:
+                self._font_color_fg = hex2rgb(colors[0])
+                self._font_color_bg = hex2rgb(colors[1]) if len(colors) > 1 else None
+            except:
+                raise Exception(
+                    f'Invalid --font-color value: {self.font_color}. Use a hex value such as FF0000')
+        return self._font_color_bg
 
 class UpscaleConfig(BaseModel):
     upscaler: Upscaler = Upscaler.esrgan
@@ -131,6 +160,28 @@ class TranslatorConfig(BaseModel):
     selective_translation: Optional[str] = None  # todo: add parser translator_chain #todo: merge into one
     """Select a translator based on detected language in image. Note the first translation service acts as default if the language isn\'t defined. Example: --translator-chain "google:JPN;sugoi:ENG".'"""
 
+    @property
+    def translator_gen(self):
+        if self._translator_gen is None:
+            if self.selective_translation is not None:
+                trans =  translator_chain(self.selective_translation)
+                trans.target_lang = self.target_lang
+                self._translator_gen = trans
+            elif self.translator_chain is not None:
+                trans = translator_chain(self.translator_chain)
+                trans.target_lang = trans.langs[-1]
+                self._translator_gen = trans
+            else:
+                self._translator_gen = TranslatorChain(f'{self.translator}:{self.target_lang}')
+        return self._translator_gen
+
+    def chatgpt_config(self):
+        if self.gpt_config is not None and self._gpt_config is None:
+            #todo: load from already loaded file
+            self._gpt_config = OmegaConf.load(self.gpt_config)
+        return self._gpt_config
+
+
 class DetectorConfig(BaseModel):
     """"""
     detector: Detector =Detector.default
@@ -208,3 +259,9 @@ class Config(BaseModel):
     """Set the convolution kernel size of the text erasure area to completely clean up text residues"""
     mask_dilation_offset: int = 0
     """By how much to extend the text mask to remove left-over text pixels of the original image."""
+
+    @property
+    def re_filter_text(self):
+        if self._filter_text is None:
+            self._filter_text = re.compile(self.filter_text)
+        return self._filter_text
diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py
index 682216d74..62d4d154a 100644
--- a/manga_translator/manga_translator.py
+++ b/manga_translator/manga_translator.py
@@ -81,37 +81,6 @@ def load_dictionary(file_path):
                     logger.error(f'Invalid dictionary entry at line {line_number}: {line.strip()}')
     return dictionary
 
-
-def _preprocess_params(ctx: Context):
-    # todo: fix
-    # params auto completion
-    # TODO: Move args into ctx.args and only calculate once, or just copy into ctx
-    for arg in DEFAULT_ARGS:
-        ctx.setdefault(arg, DEFAULT_ARGS[arg])
-
-    if ctx.selective_translation is not None:
-        ctx.selective_translation.target_lang = ctx.target_lang
-        ctx.translator = ctx.selective_translation
-    elif ctx.translator_chain is not None:
-        ctx.target_lang = ctx.translator_chain.langs[-1]
-        ctx.translator = ctx.translator_chain
-    else:
-        ctx.translator = TranslatorChain(f'{ctx.translator}:{ctx.target_lang}')
-    if ctx.gpt_config:
-        ctx.gpt_config = OmegaConf.load(ctx.gpt_config)
-
-    if ctx.filter_text:
-        ctx.filter_text = re.compile(ctx.filter_text)
-
-    if ctx.font_color:
-        colors = ctx.font_color.split(':')
-        try:
-            ctx.font_color_fg = hex2rgb(colors[0])
-            ctx.font_color_bg = hex2rgb(colors[1]) if len(colors) > 1 else None
-        except:
-            raise Exception(f'Invalid --font-color value: {ctx.font_color}. Use a hex value such as FF0000')
-
-
 def apply_dictionary(text, dictionary):
     for pattern, value in dictionary:
         text = pattern.sub(value, text)
@@ -168,6 +137,7 @@ def parse_init_params(self, params: dict):
                 'Is the correct pytorch version installed? (See https://pytorch.org/)')
         if params.get('model_dir'):
             ModelWrapper._MODEL_DIR = params.get('model_dir')
+        #todo: fix
         self.kernel_size=int(params.get('kernel_size'))
         os.environ['INPAINTING_PRECISION'] = params.get('inpainting_precision', 'fp32')
 
@@ -199,8 +169,7 @@ async def translate(self, image: Image.Image, config: Config) -> Context:
         await prepare_detection(config.detector.detector)
         await prepare_ocr(config.ocr.ocr, self.device)
         await prepare_inpainting(config.inpainter.inpainter, self.device)
-        # todo: fix
-        await prepare_translation(config.translator.translator)
+        await prepare_translation(config.translator.translator_gen)
         if config.colorizer.colorizer != Colorizer.none:
             await prepare_colorization(config.colorizer.colorizer)
         # translate
@@ -319,6 +288,7 @@ async def _translate(self, config: Config, ctx: Context) -> Context:
 
         # -- Inpainting
         await self._report_progress('inpainting')
+        #todo: fix _run_inpainting takes ctx
         ctx.img_inpainted = await self._run_inpainting(config, ctx)
 
         ctx.gimp_mask = np.dstack((cv2.cvtColor(ctx.img_inpainted, cv2.COLOR_RGB2BGR), ctx.mask))
@@ -345,6 +315,7 @@ async def _revert_upscale(self, config: Config, ctx: Context):
         return ctx
 
     async def _run_colorizer(self, config: Config, ctx: Context):
+        #todo: fix dispatch_colorization takes ctx
         return await dispatch_colorization(config.colorizer.colorizer, device=self.device, image=ctx.input, **ctx)
 
     async def _run_upscaling(self, config: Config, ctx: Context):
@@ -358,15 +329,16 @@ async def _run_detection(self, config: Config, ctx: Context):
                                         self.device, self.verbose)
 
     async def _run_ocr(self, config: Config, ctx: Context):
+        #todo: fix dispatch_ocr takes ctx
         textlines = await dispatch_ocr(config.ocr.ocr, ctx.img_rgb, ctx.textlines, ctx, self.device, self.verbose)
 
         new_textlines = []
         for textline in textlines:
             if textline.text.strip():
-                if ctx.font_color_fg:
-                    textline.fg_r, textline.fg_g, textline.fg_b = ctx.font_color_fg
-                if ctx.font_color_bg:
-                    textline.bg_r, textline.bg_g, textline.bg_b = ctx.font_color_bg
+                if config.render.font_color_fg:
+                    textline.fg_r, textline.fg_g, textline.fg_b = config.render.font_color_fg
+                if config.render.font_color_bg:
+                    textline.bg_r, textline.bg_g, textline.bg_b = config.render.font_color_bg
                 new_textlines.append(textline)
         return new_textlines
 
@@ -400,8 +372,9 @@ async def _run_textline_merge(self, config: Config, ctx: Context):
         return text_regions
 
     async def _run_text_translation(self, config: Config, ctx: Context):
+        #todo: fix dispatch_translation takes ctx
         translated_sentences = \
-            await dispatch_translation(config.translator.translator,
+            await dispatch_translation(config.translator.translator_gen,
                                        [region.text for region in ctx.text_regions],
                                        self.use_mtpe,
                                        ctx, 'cpu' if self._gpu_limited_memory else self.device)
@@ -474,7 +447,7 @@ async def _run_text_translation(self, config: Config, ctx: Context):
                     logger.info('Reason: Numeric translation')  
                     continue  
                 
-                if config.filter_text and re.search(config.filter_text, region.translation):
+                if config.filter_text and re.search(config.re_filter_text, region.translation):
                     logger.info(f'Filtered out: {region.translation}')  
                     logger.info(f'Reason: Matched filter text: {config.filter_text}')
                     continue  
@@ -512,7 +485,7 @@ async def _run_text_translation(self, config: Config, ctx: Context):
                     if region.translation.isnumeric():  
                         should_filter = True  
                         filter_reason = "Numeric translation"  
-                    elif config.filter_text and re.search(config.filter_text, region.translation):
+                    elif config.filter_text and re.search(config.re_filter_text, region.translation):
                         should_filter = True  
                         filter_reason = f"Matched filter text: {config.filter_text}"
                     elif not config.translator.translator == Translator.original:
diff --git a/manga_translator/translators/__init__.py b/manga_translator/translators/__init__.py
index 30bff773e..f17b5f685 100644
--- a/manga_translator/translators/__init__.py
+++ b/manga_translator/translators/__init__.py
@@ -65,7 +65,7 @@ def get_translator(key: str, *args, **kwargs) -> CommonTranslator:
 prepare_selective_translator(get_translator)
 
 # TODO: Refactor
-class TranslatorChain():
+class TranslatorChain:
     def __init__(self, string: str):
         """
         Parses string in form 'trans1:lang1;trans2:lang2' into chains,

From 82f6ea4de9e4f1eaff7cdbf4282aa0377ce1d29b Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sun, 17 Nov 2024 12:19:26 +0100
Subject: [PATCH 10/75] replace ctx in components

---
 manga_translator/config.py                     | 18 ++++++++++--------
 manga_translator/inpainting/__init__.py        |  8 +++++---
 manga_translator/inpainting/common.py          | 10 +++++-----
 .../inpainting/inpainting_lama_mpe.py          |  6 ++++--
 manga_translator/manga_translator.py           | 18 ++++++------------
 manga_translator/ocr/__init__.py               | 10 +++++-----
 manga_translator/ocr/common.py                 |  9 +++++----
 manga_translator/ocr/model_32px.py             |  5 +++--
 manga_translator/ocr/model_48px.py             |  3 ++-
 manga_translator/ocr/model_48px_ctc.py         |  5 +++--
 manga_translator/ocr/model_manga_ocr.py        | 10 +++-------
 manga_translator/translators/__init__.py       | 13 +++++++++----
 manga_translator/translators/chatgpt.py        |  7 +++++--
 manga_translator/translators/deepseek.py       |  7 +++++--
 manga_translator/translators/groq.py           | 10 ++++------
 manga_translator/utils/inference.py            |  3 ++-
 16 files changed, 76 insertions(+), 66 deletions(-)

diff --git a/manga_translator/config.py b/manga_translator/config.py
index 863268522..aea9f0141 100644
--- a/manga_translator/config.py
+++ b/manga_translator/config.py
@@ -30,6 +30,9 @@ class InpaintPrecision(IntEnum):
     fp16 = 1
     bf16 = 2
 
+    def __str__(self):
+        return self.name
+
 class Detector(IntEnum):
     default = 0
     dbconvnext = 1
@@ -118,7 +121,7 @@ def font_color_fg(self):
             colors = self.font_color.split(':')
             try:
                 self._font_color_fg = hex2rgb(colors[0])
-                self._font_color_fg = hex2rgb(colors[1]) if len(colors) > 1 else None
+                self._font_color_bg = hex2rgb(colors[1]) if len(colors) > 1 else None
             except:
                 raise Exception(
                     f'Invalid --font-color value: {self.font_color}. Use a hex value such as FF0000')
@@ -155,15 +158,16 @@ class TranslatorConfig(BaseModel):
     """Skip translation if source image is one of the provide languages, use comma to separate multiple languages. Example: JPN,ENG"""
     gpt_config: Optional[str] = None  # todo: no more path
     """Path to GPT config file, more info in README"""
-    translator_chain: Optional[str] = None  # todo: add parser translator_chain #todo: merge into one
+    translator_chain: Optional[str] = None
     """Output of one translator goes in another. Example: --translator-chain "google:JPN;sugoi:ENG"."""
-    selective_translation: Optional[str] = None  # todo: add parser translator_chain #todo: merge into one
+    selective_translation: Optional[str] = None
     """Select a translator based on detected language in image. Note the first translation service acts as default if the language isn\'t defined. Example: --translator-chain "google:JPN;sugoi:ENG".'"""
 
     @property
     def translator_gen(self):
         if self._translator_gen is None:
             if self.selective_translation is not None:
+                #todo: refactor TranslatorChain
                 trans =  translator_chain(self.selective_translation)
                 trans.target_lang = self.target_lang
                 self._translator_gen = trans
@@ -175,6 +179,7 @@ def translator_gen(self):
                 self._translator_gen = TranslatorChain(f'{self.translator}:{self.target_lang}')
         return self._translator_gen
 
+    @property
     def chatgpt_config(self):
         if self.gpt_config is not None and self._gpt_config is None:
             #todo: load from already loaded file
@@ -198,8 +203,6 @@ class DetectorConfig(BaseModel):
     """Invert the image colors for detection. Might improve detection."""
     det_gamma_correct: bool = False
     """Applies gamma correction for detection. Might improve detection."""
-    ignore_bubble: int = 0
-    """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles"""
     box_threshold: float = 0.7
     """Threshold for bbox generation"""
     unclip_ratio: float = 2.3
@@ -213,7 +216,6 @@ class InpainterConfig(BaseModel):
     inpainting_precision: InpaintPrecision = InpaintPrecision.fp32
     """Inpainting precision for lama, use bf16 while you can."""
 
-
 class ColorizerConfig(BaseModel):
     colorization_size: int = 576
     """Size of image used for colorization. Set to -1 to use full image size"""
@@ -222,8 +224,6 @@ class ColorizerConfig(BaseModel):
     colorizer: Colorizer = Colorizer.none
     """Colorization model to use."""
 
-
-
 class OcrConfig(BaseModel):
     use_mocr_merge: bool = False
     """Use bbox merge when Manga OCR inference."""
@@ -231,6 +231,8 @@ class OcrConfig(BaseModel):
     """Optical character recognition (OCR) model to use"""
     min_text_length: int = 0
     """Minimum text length of a text region"""
+    ignore_bubble: int = 0
+    """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles"""
 
 class Config(BaseModel):
     # unclear
diff --git a/manga_translator/inpainting/__init__.py b/manga_translator/inpainting/__init__.py
index c4ca1ca13..52635c39f 100644
--- a/manga_translator/inpainting/__init__.py
+++ b/manga_translator/inpainting/__init__.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import numpy as np
 
 from .common import CommonInpainter, OfflineInpainter
@@ -6,7 +8,7 @@
 from .inpainting_sd import StableDiffusionInpainter
 from .none import NoneInpainter
 from .original import OriginalInpainter
-from ..config import Inpainter
+from ..config import Inpainter, InpainterConfig
 
 INPAINTERS = {
     Inpainter.default: AotInpainter,
@@ -32,8 +34,8 @@ async def prepare(inpainter_key: Inpainter, device: str = 'cpu'):
         await inpainter.download()
         await inpainter.load(device)
 
-async def dispatch(inpainter_key: Inpainter, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, device: str = 'cpu', verbose: bool = False) -> np.ndarray:
+async def dispatch(inpainter_key: Inpainter, image: np.ndarray, mask: np.ndarray, config: Optional[InpainterConfig], inpainting_size: int = 1024, device: str = 'cpu', verbose: bool = False) -> np.ndarray:
     inpainter = get_inpainter(inpainter_key)
     if isinstance(inpainter, OfflineInpainter):
         await inpainter.load(device)
-    return await inpainter.inpaint(image, mask, inpainting_size, verbose)
+    return await inpainter.inpaint(image, mask, config or InpainterConfig(), inpainting_size, verbose)
diff --git a/manga_translator/inpainting/common.py b/manga_translator/inpainting/common.py
index 208700144..6aa25da19 100644
--- a/manga_translator/inpainting/common.py
+++ b/manga_translator/inpainting/common.py
@@ -1,16 +1,16 @@
-import os
 import numpy as np
 from abc import abstractmethod
 
+from ..config import InpainterConfig
 from ..utils import InfererModule, ModelWrapper
 
 class CommonInpainter(InfererModule):
 
-    async def inpaint(self, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray:
-        return await self._inpaint(image, mask, inpainting_size, verbose)
+    async def inpaint(self, image: np.ndarray, mask: np.ndarray, config: InpainterConfig, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray:
+        return await self._inpaint(image, mask, config, inpainting_size, verbose)
 
     @abstractmethod
-    async def _inpaint(self, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray:
+    async def _inpaint(self, image: np.ndarray, mask: np.ndarray, config: InpainterConfig, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray:
         pass
 
 class OfflineInpainter(CommonInpainter, ModelWrapper):
@@ -20,5 +20,5 @@ async def _inpaint(self, *args, **kwargs):
         return await self.infer(*args, **kwargs)
 
     @abstractmethod
-    async def _infer(self, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray:
+    async def _infer(self, image: np.ndarray, mask: np.ndarray, config: InpainterConfig, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray:
         pass
diff --git a/manga_translator/inpainting/inpainting_lama_mpe.py b/manga_translator/inpainting/inpainting_lama_mpe.py
index ae7f0ae65..ddfa0cfc8 100644
--- a/manga_translator/inpainting/inpainting_lama_mpe.py
+++ b/manga_translator/inpainting/inpainting_lama_mpe.py
@@ -12,6 +12,7 @@
 from torch import Tensor
 
 from .common import OfflineInpainter
+from ..config import InpainterConfig
 from ..utils import resize_keep_aspect
 
 
@@ -52,7 +53,7 @@ async def _load(self, device: str):
     async def _unload(self):
         del self.model
 
-    async def _infer(self, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray:
+    async def _infer(self, image: np.ndarray, mask: np.ndarray, config: InpainterConfig, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray:
         img_original = np.copy(image)
         mask_original = np.copy(mask)
         mask_original[mask_original < 127] = 0
@@ -95,7 +96,8 @@ async def _infer(self, image: np.ndarray, mask: np.ndarray, inpainting_size: int
             else:
                 # Note: lama's weight shouldn't be convert to fp16 or bf16 otherwise it produces darkened results.
                 # but it can inference under torch.autocast
-                precision = TORCH_DTYPE_MAP[os.environ.get("INPAINTING_PRECISION", "fp32")]
+
+                precision = TORCH_DTYPE_MAP[str(config.inpainting_precision)]
                 
                 if precision == torch.float16:
                     precision = torch.bfloat16
diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py
index 62d4d154a..57ab5eac7 100644
--- a/manga_translator/manga_translator.py
+++ b/manga_translator/manga_translator.py
@@ -10,7 +10,6 @@
 from PIL import Image
 from typing import Optional, Any
 
-from .args import DEFAULT_ARGS
 from .config import Config, Colorizer, Detector, Translator, Renderer, Inpainter
 from .utils import (
     BASE_PATH,
@@ -21,7 +20,6 @@
     dump_image,
     visualize_textblocks,
     is_valuable_text,
-    hex2rgb,
     sort_regions,
 )
 
@@ -33,7 +31,6 @@
 from .inpainting import dispatch as dispatch_inpainting, prepare as prepare_inpainting
 from .translators import (
     LANGDETECT_MAP,
-    TranslatorChain,
     dispatch as dispatch_translation,
     prepare as prepare_translation,
 )
@@ -137,9 +134,8 @@ def parse_init_params(self, params: dict):
                 'Is the correct pytorch version installed? (See https://pytorch.org/)')
         if params.get('model_dir'):
             ModelWrapper._MODEL_DIR = params.get('model_dir')
-        #todo: fix
+        #todo: fix why is kernel size loaded in the constructor
         self.kernel_size=int(params.get('kernel_size'))
-        os.environ['INPAINTING_PRECISION'] = params.get('inpainting_precision', 'fp32')
 
     @property
     def using_gpu(self):
@@ -281,14 +277,13 @@ async def _translate(self, config: Config, ctx: Context) -> Context:
             ctx.mask = await self._run_mask_refinement(config, ctx)
 
         if self.verbose:
-            inpaint_input_img = await dispatch_inpainting(Inpainter.none, ctx.img_rgb, ctx.mask, config.inpainter.inpainting_size,
+            inpaint_input_img = await dispatch_inpainting(Inpainter.none, ctx.img_rgb, ctx.mask, config.inpainter,
                                                           self.using_gpu, self.verbose)
             cv2.imwrite(self._result_path('inpaint_input.png'), cv2.cvtColor(inpaint_input_img, cv2.COLOR_RGB2BGR))
             cv2.imwrite(self._result_path('mask_final.png'), ctx.mask)
 
         # -- Inpainting
         await self._report_progress('inpainting')
-        #todo: fix _run_inpainting takes ctx
         ctx.img_inpainted = await self._run_inpainting(config, ctx)
 
         ctx.gimp_mask = np.dstack((cv2.cvtColor(ctx.img_inpainted, cv2.COLOR_RGB2BGR), ctx.mask))
@@ -315,7 +310,7 @@ async def _revert_upscale(self, config: Config, ctx: Context):
         return ctx
 
     async def _run_colorizer(self, config: Config, ctx: Context):
-        #todo: fix dispatch_colorization takes ctx
+        #todo: im pretty sure the ctx is never used. does it need to be passed in?
         return await dispatch_colorization(config.colorizer.colorizer, device=self.device, image=ctx.input, **ctx)
 
     async def _run_upscaling(self, config: Config, ctx: Context):
@@ -329,8 +324,7 @@ async def _run_detection(self, config: Config, ctx: Context):
                                         self.device, self.verbose)
 
     async def _run_ocr(self, config: Config, ctx: Context):
-        #todo: fix dispatch_ocr takes ctx
-        textlines = await dispatch_ocr(config.ocr.ocr, ctx.img_rgb, ctx.textlines, ctx, self.device, self.verbose)
+        textlines = await dispatch_ocr(config.ocr.ocr, ctx.img_rgb, ctx.textlines, config.ocr, self.device, self.verbose)
 
         new_textlines = []
         for textline in textlines:
@@ -372,10 +366,10 @@ async def _run_textline_merge(self, config: Config, ctx: Context):
         return text_regions
 
     async def _run_text_translation(self, config: Config, ctx: Context):
-        #todo: fix dispatch_translation takes ctx
         translated_sentences = \
             await dispatch_translation(config.translator.translator_gen,
                                        [region.text for region in ctx.text_regions],
+                                       config.translator,
                                        self.use_mtpe,
                                        ctx, 'cpu' if self._gpu_limited_memory else self.device)
 
@@ -508,7 +502,7 @@ async def _run_mask_refinement(self, config: Config, ctx: Context):
         return await dispatch_mask_refinement(ctx.text_regions, ctx.img_rgb, ctx.mask_raw, 'fit_text',
                                               config.mask_dilation_offset, config.detector.ignore_bubble, self.verbose,self.kernel_size)
 
-    async def _run_inpainting(self, config: Config,ctx: Context):
+    async def _run_inpainting(self, config: Config, ctx: Context):
         return await dispatch_inpainting(config.inpainter.inpainter, ctx.img_rgb, ctx.mask, config.inpainter.inpainting_size, self.device,
                                          self.verbose)
 
diff --git a/manga_translator/ocr/__init__.py b/manga_translator/ocr/__init__.py
index 14d580f69..c7bd9b8c5 100644
--- a/manga_translator/ocr/__init__.py
+++ b/manga_translator/ocr/__init__.py
@@ -1,12 +1,12 @@
 import numpy as np
-from typing import List
+from typing import List, Optional
 
 from .common import CommonOCR, OfflineOCR
 from .model_32px import Model32pxOCR
 from .model_48px import Model48pxOCR
 from .model_48px_ctc import Model48pxCTCOCR
 from .model_manga_ocr import ModelMangaOCR
-from ..config import Ocr
+from ..config import Ocr, OcrConfig
 from ..utils import Quadrilateral
 
 OCRS = {
@@ -31,9 +31,9 @@ async def prepare(ocr_key: Ocr, device: str = 'cpu'):
         await ocr.download()
         await ocr.load(device)
 
-async def dispatch(ocr_key: Ocr, image: np.ndarray, regions: List[Quadrilateral], args = None, device: str = 'cpu', verbose: bool = False) -> List[Quadrilateral]:
+async def dispatch(ocr_key: Ocr, image: np.ndarray, regions: List[Quadrilateral], config:Optional[OcrConfig] = None, device: str = 'cpu', verbose: bool = False) -> List[Quadrilateral]:
     ocr = get_ocr(ocr_key)
     if isinstance(ocr, OfflineOCR):
         await ocr.load(device)
-    args = args or {}
-    return await ocr.recognize(image, regions, args, verbose)
+    config = config or OcrConfig()
+    return await ocr.recognize(image, regions, config, verbose)
diff --git a/manga_translator/ocr/common.py b/manga_translator/ocr/common.py
index 1c056195f..ff3b088f4 100644
--- a/manga_translator/ocr/common.py
+++ b/manga_translator/ocr/common.py
@@ -5,6 +5,7 @@
 import networkx as nx
 import itertools
 
+from ..config import OcrConfig
 from ..utils import InfererModule, TextBlock, ModelWrapper, Quadrilateral
 
 class CommonOCR(InfererModule):
@@ -37,15 +38,15 @@ def _generate_text_direction(self, bboxes: List[Union[Quadrilateral, TextBlock]]
                     for node in nodes:
                         yield bboxes[node], majority_dir
 
-    async def recognize(self, image: np.ndarray, textlines: List[Quadrilateral], args: dict, verbose: bool = False) -> List[Quadrilateral]:
+    async def recognize(self, image: np.ndarray, textlines: List[Quadrilateral], config: OcrConfig, verbose: bool = False) -> List[Quadrilateral]:
         '''
         Performs the optical character recognition, using the `textlines` as areas of interests.
         Returns a `textlines` list with the `textline.text` property set to the detected text string.
         '''
-        return await self._recognize(image, textlines, args, verbose)
+        return await self._recognize(image, textlines, config, verbose)
 
     @abstractmethod
-    async def _recognize(self, image: np.ndarray, textlines: List[Quadrilateral], args: dict, verbose: bool = False) -> List[Quadrilateral]:
+    async def _recognize(self, image: np.ndarray, textlines: List[Quadrilateral], config: OcrConfig, verbose: bool = False) -> List[Quadrilateral]:
         pass
 
 
@@ -56,5 +57,5 @@ async def _recognize(self, *args, **kwargs):
         return await self.infer(*args, **kwargs)
 
     @abstractmethod
-    async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], args: dict, verbose: bool = False) -> List[Quadrilateral]:
+    async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], args: OcrConfig, verbose: bool = False) -> List[Quadrilateral]:
         pass
diff --git a/manga_translator/ocr/model_32px.py b/manga_translator/ocr/model_32px.py
index 6c1b1193f..829fb31ac 100644
--- a/manga_translator/ocr/model_32px.py
+++ b/manga_translator/ocr/model_32px.py
@@ -11,6 +11,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+from manga_translator.config import OcrConfig
 from .common import OfflineOCR
 from ..utils import TextBlock, Quadrilateral, chunks
 from ..utils.bubble import is_ignore
@@ -55,10 +56,10 @@ async def _load(self, device: str):
     async def _unload(self):
         del self.model
 
-    async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], args: dict, verbose: bool = False) -> List[TextBlock]:
+    async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], config: OcrConfig, verbose: bool = False) -> List[TextBlock]:
         text_height = 32
         max_chunk_size = 16
-        ignore_bubble = args.get('ignore_bubble', 0)
+        ignore_bubble = config.ignore_bubble
 
         quadrilaterals = list(self._generate_text_direction(textlines))
         region_imgs = [q.get_transformed_region(image, d, text_height) for q, d in quadrilaterals]
diff --git a/manga_translator/ocr/model_48px.py b/manga_translator/ocr/model_48px.py
index fcd45eb03..029788af4 100644
--- a/manga_translator/ocr/model_48px.py
+++ b/manga_translator/ocr/model_48px.py
@@ -12,6 +12,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+from manga_translator.config import OcrConfig
 from .xpos_relative_position import XPOS
 
 # Roformer with Xpos and Local Attention ViT
@@ -63,7 +64,7 @@ async def _load(self, device: str):
     async def _unload(self):
         del self.model
     
-    async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], args: dict, verbose: bool = False, ignore_bubble: int = 0) -> List[TextBlock]:
+    async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], config: OcrConfig, verbose: bool = False, ignore_bubble: int = 0) -> List[TextBlock]:
         text_height = 48
         max_chunk_size = 16
 
diff --git a/manga_translator/ocr/model_48px_ctc.py b/manga_translator/ocr/model_48px_ctc.py
index d42866e93..062b93e18 100644
--- a/manga_translator/ocr/model_48px_ctc.py
+++ b/manga_translator/ocr/model_48px_ctc.py
@@ -10,6 +10,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+from manga_translator.config import OcrConfig
 from .common import OfflineOCR
 from ..utils import TextBlock, Quadrilateral, AvgMeter, chunks
 from ..utils.bubble import is_ignore
@@ -58,10 +59,10 @@ async def _load(self, device: str):
     async def _unload(self):
         del self.model
 
-    async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], args: dict, verbose: bool = False) -> List[TextBlock]:
+    async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], config: OcrConfig, verbose: bool = False) -> List[TextBlock]:
         text_height = 48
         max_chunk_size = 16
-        ignore_bubble = args.get('ignore_bubble', 0)
+        ignore_bubble = config.ignore_bubble
 
         quadrilaterals = list(self._generate_text_direction(textlines))
         region_imgs = [q.get_transformed_region(image, d, text_height) for q, d in quadrilaterals]
diff --git a/manga_translator/ocr/model_manga_ocr.py b/manga_translator/ocr/model_manga_ocr.py
index ee8ba65ab..34f6146fb 100644
--- a/manga_translator/ocr/model_manga_ocr.py
+++ b/manga_translator/ocr/model_manga_ocr.py
@@ -12,19 +12,15 @@
 from shapely.geometry import Polygon
 
 import torch
-import torch.nn as nn
-import torch.nn.functional as F
 
 from manga_ocr import MangaOcr
 
-from .xpos_relative_position import XPOS
-
 from .common import OfflineOCR
 from .model_48px import OCR
+from ..config import OcrConfig
 from ..textline_merge import split_text_region
 from ..utils import TextBlock, Quadrilateral, quadrilateral_can_merge_region, chunks
 from ..utils.generic import AvgMeter
-from ..utils.bubble import is_ignore
 
 async def merge_bboxes(bboxes: List[Quadrilateral], width: int, height: int) -> Tuple[List[Quadrilateral], int]:
     # step 1: divide into multiple text region candidates
@@ -133,7 +129,7 @@ async def _unload(self):
         del self.model
         del self.mocr
     
-    async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], args: dict, verbose: bool = False, ignore_bubble: int = 0) -> List[TextBlock]:
+    async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], config: OcrConfig, verbose: bool = False, ignore_bubble: int = 0) -> List[TextBlock]:
         text_height = 48
         max_chunk_size = 16
 
@@ -147,7 +143,7 @@ async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], args:
             is_quadrilaterals = True
         
         texts = {}
-        if args.get('use_mocr_merge', False):
+        if config.use_mocr_merge:
             merged_textlines, merged_idx = await merge_bboxes(textlines, image.shape[1], image.shape[0])
             merged_quadrilaterals = list(self._generate_text_direction(merged_textlines))
         else:
diff --git a/manga_translator/translators/__init__.py b/manga_translator/translators/__init__.py
index f17b5f685..ab2445f3e 100644
--- a/manga_translator/translators/__init__.py
+++ b/manga_translator/translators/__init__.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import py3langid as langid
 
 from .common import *
@@ -19,7 +21,8 @@
 from .sakura import SakuraTranslator
 from .qwen2 import Qwen2Translator, Qwen2BigTranslator
 from .groq import GroqTranslator
-from ..config import Translator
+from .. import Context
+from ..config import Translator, TranslatorConfig
 
 OFFLINE_TRANSLATORS = {
     Translator.offline: SelectiveOfflineTranslator,
@@ -103,7 +106,7 @@ async def prepare(chain: TranslatorChain):
             await translator.download()
 
 # TODO: Optionally take in strings instead of TranslatorChain for simplicity
-async def dispatch(chain: TranslatorChain, queries: List[str], use_mtpe: bool = False, args = None, device: str = 'cpu') -> List[str]:
+async def dispatch(chain: TranslatorChain, queries: List[str], translator_config: Optional[TranslatorConfig] = None, use_mtpe: bool = False, args:Optional[Context] = None, device: str = 'cpu') -> List[str]:
     if not queries:
         return queries
 
@@ -118,7 +121,8 @@ async def dispatch(chain: TranslatorChain, queries: List[str], use_mtpe: bool =
             translator = get_translator(chain.langs[0])
         if isinstance(translator, OfflineTranslator):
             await translator.load('auto', chain.target_lang, device)
-        translator.parse_args(args)
+        if translator_config:
+            translator.parse_args(translator_config)
         queries = await translator.translate('auto', chain.target_lang, queries, use_mtpe)
         return queries
     if args is not None:
@@ -127,7 +131,8 @@ async def dispatch(chain: TranslatorChain, queries: List[str], use_mtpe: bool =
         translator = get_translator(key)
         if isinstance(translator, OfflineTranslator):
             await translator.load('auto', tgt_lang, device)
-        translator.parse_args(args)
+        if translator_config:
+            translator.parse_args(translator_config)
         queries = await translator.translate('auto', tgt_lang, queries, use_mtpe)
         if args is not None:
             args['translations'][tgt_lang] = queries
diff --git a/manga_translator/translators/chatgpt.py b/manga_translator/translators/chatgpt.py
index b65e6461b..7763e0b41 100644
--- a/manga_translator/translators/chatgpt.py
+++ b/manga_translator/translators/chatgpt.py
@@ -1,4 +1,7 @@
 import re
+
+from ..config import TranslatorConfig
+
 try:
     import openai
 except ImportError:
@@ -71,8 +74,8 @@ def __init__(self, check_openai_key = True):
         self.token_count_last = 0
         self.config = None
 
-    def parse_args(self, args):
-        self.config = args.gpt_config
+    def parse_args(self, args: TranslatorConfig):
+        self.config = args.chatgpt_config
 
     def _config_get(self, key: str, default=None):
         if not self.config:
diff --git a/manga_translator/translators/deepseek.py b/manga_translator/translators/deepseek.py
index 7a108d4d8..7c712d7e3 100644
--- a/manga_translator/translators/deepseek.py
+++ b/manga_translator/translators/deepseek.py
@@ -1,4 +1,7 @@
 import re
+
+from ..config import TranslatorConfig
+
 try:
     import openai
 except ImportError:
@@ -94,8 +97,8 @@ def __init__(self, check_openai_key=True):
         self.token_count_last = 0
         self.config = None
 
-    def parse_args(self, args):
-        self.config = args.gpt_config
+    def parse_args(self, args: TranslatorConfig):
+        self.config = args.chatgpt_config
 
     def _config_get(self, key: str, default=None):
         if not self.config:
diff --git a/manga_translator/translators/groq.py b/manga_translator/translators/groq.py
index f9218b065..2de7becae 100644
--- a/manga_translator/translators/groq.py
+++ b/manga_translator/translators/groq.py
@@ -1,11 +1,8 @@
 import groq
-import asyncio
-import time
-import json
-import re
 import os
 from typing import List
 
+from . import TranslatorChain
 from .common import CommonTranslator, MissingAPIKeyException
 from .keys import GROQ_API_KEY
 
@@ -64,8 +61,9 @@ def __init__(self, check_groq_key=True):
 
 
 
-    def parse_args(self, args):
-        self.config = args.groq_config
+    def parse_args(self, args: TranslatorChain):
+        #todo: is nver set
+        self.config = None
 
     def _config_get(self, key: str, default=None):
         if not self.config:
diff --git a/manga_translator/utils/inference.py b/manga_translator/utils/inference.py
index e18ae3b04..47fef6b01 100644
--- a/manga_translator/utils/inference.py
+++ b/manga_translator/utils/inference.py
@@ -19,6 +19,7 @@
     get_filename_from_url,
 )
 from .log import get_logger
+from ..config import TranslatorConfig
 
 
 class InfererModule(ABC):
@@ -26,7 +27,7 @@ def __init__(self):
         self.logger = get_logger(self.__class__.__name__)
         super().__init__()
 
-    def parse_args(self, args: Context):
+    def parse_args(self, args: TranslatorConfig):
         """May be overwritten by super classes to parse commandline arguments"""
         pass
 

From f8996c5426ec718731a91aeb2dd550b2ce6c6419 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sun, 17 Nov 2024 12:21:26 +0100
Subject: [PATCH 11/75] fix imports

---
 manga_translator/config.py           | 5 +++--
 manga_translator/manga_translator.py | 1 -
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/manga_translator/config.py b/manga_translator/config.py
index aea9f0141..b8e4c81cb 100644
--- a/manga_translator/config.py
+++ b/manga_translator/config.py
@@ -1,12 +1,13 @@
 import re
-from enum import Enum, IntEnum
+from enum import IntEnum
 
 from omegaconf import OmegaConf
 from pydantic import BaseModel
 from typing import Optional
 
-from manga_translator import TranslatorChain, hex2rgb
 from manga_translator.args import translator_chain
+from manga_translator.translators import TranslatorChain
+from manga_translator.utils import hex2rgb
 
 
 class Renderer(IntEnum):
diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py
index 57ab5eac7..13ad6860a 100644
--- a/manga_translator/manga_translator.py
+++ b/manga_translator/manga_translator.py
@@ -1,5 +1,4 @@
 import cv2
-from omegaconf import OmegaConf
 import langcodes
 import langdetect
 import os

From e23510cc935092ad9f6e0e905962a6cde1fe5ef9 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sun, 17 Nov 2024 12:24:28 +0100
Subject: [PATCH 12/75] fix stuff i forgot

---
 manga_translator/manga_translator.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py
index 13ad6860a..746a59be7 100644
--- a/manga_translator/manga_translator.py
+++ b/manga_translator/manga_translator.py
@@ -352,8 +352,8 @@ async def _run_textline_merge(self, config: Config, ctx: Context):
                     elif langcodes.tag_distance(region.source_lang, config.translator.target_lang) == 0:
                         logger.info('Reason: Text language matches the target language and no_text_lang_skip is False.')
             else:
-                if ctx.font_color_fg or ctx.font_color_bg:
-                    if ctx.font_color_bg:
+                if config.render.font_color_fg or config.render.font_color_bg:
+                    if config.render.font_color_bg:
                         region.adjust_bg_color = False
                 new_text_regions.append(region)
         text_regions = new_text_regions
@@ -502,7 +502,7 @@ async def _run_mask_refinement(self, config: Config, ctx: Context):
                                               config.mask_dilation_offset, config.detector.ignore_bubble, self.verbose,self.kernel_size)
 
     async def _run_inpainting(self, config: Config, ctx: Context):
-        return await dispatch_inpainting(config.inpainter.inpainter, ctx.img_rgb, ctx.mask, config.inpainter.inpainting_size, self.device,
+        return await dispatch_inpainting(config.inpainter.inpainter, ctx.img_rgb, ctx.mask, config.inpainter, self.device,
                                          self.verbose)
 
     async def _run_text_rendering(self, config: Config, ctx: Context):

From fa6cd8e90c4fe5714e1e8057e1687dfe1fbc5e10 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sun, 17 Nov 2024 12:40:16 +0100
Subject: [PATCH 13/75] convert str to enum in translatorchain

---
 manga_translator/config.py               | 7 ++++++-
 manga_translator/translators/__init__.py | 7 ++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/manga_translator/config.py b/manga_translator/config.py
index b8e4c81cb..fef1ee371 100644
--- a/manga_translator/config.py
+++ b/manga_translator/config.py
@@ -58,6 +58,7 @@ class Ocr(IntEnum):
     ocr48px = 1
     ocr48px_ctc = 2
     mocr = 3
+
 class Translator(IntEnum):
     youdao = 0
     baidu = 1
@@ -83,6 +84,10 @@ class Translator(IntEnum):
     mbart50 = 21
     qwen2 = 22
     qwen2_big = 23
+
+    def __str__(self):
+        return self.name
+
 class Upscaler:
     waifu2x = 0
     esrgan = 1
@@ -177,7 +182,7 @@ def translator_gen(self):
                 trans.target_lang = trans.langs[-1]
                 self._translator_gen = trans
             else:
-                self._translator_gen = TranslatorChain(f'{self.translator}:{self.target_lang}')
+                self._translator_gen = TranslatorChain(f'{str(self.translator)}:{self.target_lang}')
         return self._translator_gen
 
     @property
diff --git a/manga_translator/translators/__init__.py b/manga_translator/translators/__init__.py
index ab2445f3e..0ac0c00c6 100644
--- a/manga_translator/translators/__init__.py
+++ b/manga_translator/translators/__init__.py
@@ -57,7 +57,7 @@
 }
 translator_cache = {}
 
-def get_translator(key: str, *args, **kwargs) -> CommonTranslator:
+def get_translator(key: Translator, *args, **kwargs) -> CommonTranslator:
     if key not in TRANSLATORS:
         raise ValueError(f'Could not find translator for: "{key}". Choose from the following: %s' % ','.join(TRANSLATORS))
     if not translator_cache.get(key):
@@ -80,11 +80,12 @@ def __init__(self, string: str):
         self.target_lang = None
         for g in string.split(';'):
             trans, lang = g.split(':')
-            if trans not in TRANSLATORS:
+            translator = Translator[trans]
+            if translator not in TRANSLATORS:
                 raise ValueError(f'Invalid choice: %s (choose from %s)' % (trans, ', '.join(map(repr, TRANSLATORS))))
             if lang not in VALID_LANGUAGES:
                 raise ValueError(f'Invalid choice: %s (choose from %s)' % (lang, ', '.join(map(repr, VALID_LANGUAGES))))
-            self.chain.append((trans, lang))
+            self.chain.append((translator, lang))
         self.translators, self.langs = list(zip(*self.chain))
     
     def has_offline(self) -> bool:

From 85d6739b98199afa95b581341bfc67d57fe67fa2 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Mon, 18 Nov 2024 19:44:19 +0100
Subject: [PATCH 14/75] circular import fix

---
 manga_translator/args.py                 |  9 ---
 manga_translator/config.py               | 70 +++++++++++++++++++-----
 manga_translator/mode/local.py           | 44 +++++++++------
 manga_translator/translators/__init__.py | 36 +-----------
 manga_translator/translators/groq.py     |  3 +-
 manga_translator/utils/inference.py      |  1 -
 6 files changed, 87 insertions(+), 76 deletions(-)

diff --git a/manga_translator/args.py b/manga_translator/args.py
index 2eb72c91d..c35e02f8a 100644
--- a/manga_translator/args.py
+++ b/manga_translator/args.py
@@ -51,15 +51,6 @@ def dir_path(string):
 #         return string
 #     return _func
 
-def translator_chain(string):
-    try:
-        return TranslatorChain(string)
-    except ValueError as e:
-        raise argparse.ArgumentTypeError(e)
-    except Exception:
-        raise argparse.ArgumentTypeError(f'Invalid translator_chain value: "{string}". Example usage: --translator "google:sugoi" -l "JPN:ENG"')
-
-
 class HelpFormatter(argparse.HelpFormatter):
     INDENT_INCREMENT = 2
     MAX_HELP_POSITION = 24
diff --git a/manga_translator/config.py b/manga_translator/config.py
index fef1ee371..428685b82 100644
--- a/manga_translator/config.py
+++ b/manga_translator/config.py
@@ -1,14 +1,58 @@
+import argparse
 import re
 from enum import IntEnum
 
-from omegaconf import OmegaConf
-from pydantic import BaseModel
 from typing import Optional
 
-from manga_translator.args import translator_chain
-from manga_translator.translators import TranslatorChain
-from manga_translator.utils import hex2rgb
+from omegaconf import OmegaConf
+
+# TODO: Refactor
+class TranslatorChain:
+    def __init__(self, string: str):
+        """
+        Parses string in form 'trans1:lang1;trans2:lang2' into chains,
+        which will be executed one after another when passed to the dispatch function.
+        """
+        from manga_translator.translators import TRANSLATORS, VALID_LANGUAGES
+        if not string:
+            raise Exception('Invalid translator chain')
+        self.chain = []
+        self.target_lang = None
+        for g in string.split(';'):
+            trans, lang = g.split(':')
+            translator = Translator[trans]
+            if translator not in TRANSLATORS:
+                raise ValueError(f'Invalid choice: %s (choose from %s)' % (trans, ', '.join(map(repr, TRANSLATORS))))
+            if lang not in VALID_LANGUAGES:
+                raise ValueError(f'Invalid choice: %s (choose from %s)' % (lang, ', '.join(map(repr, VALID_LANGUAGES))))
+            self.chain.append((translator, lang))
+        self.translators, self.langs = list(zip(*self.chain))
+
+    def has_offline(self) -> bool:
+        """
+        Returns True if the chain contains offline translators.
+        """
+        from manga_translator.translators import OFFLINE_TRANSLATORS
+        return any(translator in OFFLINE_TRANSLATORS for translator in self.translators)
+
+    def __eq__(self, __o: object) -> bool:
+        if type(__o) is str:
+            return __o == self.translators[0]
+        return super.__eq__(self, __o)
+
+
+def translator_chain(string):
+    try:
+        return TranslatorChain(string)
+    except ValueError as e:
+        raise argparse.ArgumentTypeError(e)
+    except Exception:
+        raise argparse.ArgumentTypeError(f'Invalid translator_chain value: "{string}". Example usage: --translator "google:sugoi" -l "JPN:ENG"')
+
 
+def hex2rgb(h):
+    h = h.lstrip('#')
+    return tuple(int(h[i:i+2], 16) for i in (0, 2, 4))
 
 class Renderer(IntEnum):
     default = 0
@@ -93,7 +137,7 @@ class Upscaler:
     esrgan = 1
     upscler4xultrasharp = 2
 
-class RenderConfig(BaseModel):
+class RenderConfig:
     renderer: Renderer = Renderer.default
     """Render english text translated from manga with some additional typesetting. Ignores some other argument options"""
     alignment: Alignment = Alignment.auto
@@ -145,7 +189,7 @@ def font_color_bg(self):
                     f'Invalid --font-color value: {self.font_color}. Use a hex value such as FF0000')
         return self._font_color_bg
 
-class UpscaleConfig(BaseModel):
+class UpscaleConfig:
     upscaler: Upscaler = Upscaler.esrgan
     """Upscaler to use. --upscale-ratio has to be set for it to take effect"""
     revert_upscaling: bool = False
@@ -153,7 +197,7 @@ class UpscaleConfig(BaseModel):
     upscale_ratio: Optional[int] = None
     """Image upscale ratio applied before detection. Can improve text detection."""
 
-class TranslatorConfig(BaseModel):
+class TranslatorConfig:
     translator: Translator = Translator.sugoi
     """Language translator to use"""
     target_lang: str = 'ENG' #todo: validate VALID_LANGUAGES #todo: convert to enum
@@ -193,7 +237,7 @@ def chatgpt_config(self):
         return self._gpt_config
 
 
-class DetectorConfig(BaseModel):
+class DetectorConfig:
     """"""
     detector: Detector =Detector.default
     """"Text detector used for creating a text mask from an image, DO NOT use craft for manga, it\'s not designed for it"""
@@ -214,7 +258,7 @@ class DetectorConfig(BaseModel):
     unclip_ratio: float = 2.3
     """How much to extend text skeleton to form bounding box"""
 
-class InpainterConfig(BaseModel):
+class InpainterConfig:
     inpainter: Inpainter = Inpainter.lama_large
     """Inpainting model to use"""
     inpainting_size: int = 2048
@@ -222,7 +266,7 @@ class InpainterConfig(BaseModel):
     inpainting_precision: InpaintPrecision = InpaintPrecision.fp32
     """Inpainting precision for lama, use bf16 while you can."""
 
-class ColorizerConfig(BaseModel):
+class ColorizerConfig:
     colorization_size: int = 576
     """Size of image used for colorization. Set to -1 to use full image size"""
     denoise_sigma: int = 30
@@ -230,7 +274,7 @@ class ColorizerConfig(BaseModel):
     colorizer: Colorizer = Colorizer.none
     """Colorization model to use."""
 
-class OcrConfig(BaseModel):
+class OcrConfig:
     use_mocr_merge: bool = False
     """Use bbox merge when Manga OCR inference."""
     ocr: Ocr = Ocr.ocr48px
@@ -240,7 +284,7 @@ class OcrConfig(BaseModel):
     ignore_bubble: int = 0
     """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles"""
 
-class Config(BaseModel):
+class Config:
     # unclear
     pre_dict: Optional[str] = None
     post_dict: Optional[str] = None
diff --git a/manga_translator/mode/local.py b/manga_translator/mode/local.py
index 67e67ebb5..dc395cf80 100644
--- a/manga_translator/mode/local.py
+++ b/manga_translator/mode/local.py
@@ -3,8 +3,7 @@
 
 from PIL import Image
 
-from manga_translator import MangaTranslator, logger, Context, TranslationInterrupt
-from ..manga_translator import _preprocess_params
+from manga_translator import MangaTranslator, logger, Context, TranslationInterrupt, Config
 from ..save import save_result
 from ..translators import (
     LanguageUnsupportedException,
@@ -14,6 +13,17 @@
 
 
 class MangaTranslatorLocal(MangaTranslator):
+    def __init__(self, params: dict = None):
+        super().__init__(params)
+        self.attempts = params.get('attempts', None)
+        self.skip_no_text = params.get('skip_no_text', False)
+        self.text_output_file = params.get('text_output_file', None)
+        self.save_quality = params.get('save_quality', None)
+        self.text_regions = params.get('text_regions', None)
+        self.save_text_file = params.get('save_text_file', None)
+        self.save_text = params.get('save_text', None)
+        self.prep_manual = params.get('prep_manual', None)
+
     async def translate_path(self, path: str, dest: str = None, params: dict[str, Union[int, str]] = None):
         """
         Translates an image or folder (recursively) specified through the path.
@@ -79,7 +89,7 @@ async def translate_path(self, path: str, dest: str = None, params: dict[str, Un
             else:
                 logger.info(f'Done. Translated {translated_count} image{"" if translated_count == 1 else "s"}')
 
-    async def translate_file(self, path: str, dest: str, params: dict):
+    async def translate_file(self, path: str, dest: str, params: dict, config: Config):
         if not params.get('overwrite') and os.path.exists(dest):
             logger.info(
                 f'Skipping as already translated: "{dest}". Use --overwrite to overwrite existing translations.')
@@ -91,15 +101,14 @@ async def translate_file(self, path: str, dest: str, params: dict):
         # Turn dict to context to make values also accessible through params.<property>
         params = params or {}
         ctx = Context(**params)
-        _preprocess_params(ctx)
 
         attempts = 0
-        while ctx.attempts == -1 or attempts < ctx.attempts + 1:
+        while self.attempts == -1 or attempts < self.attempts + 1:
             if attempts > 0:
                 logger.info(f'Retrying translation! Attempt {attempts}'
-                            + (f' of {ctx.attempts}' if ctx.attempts != -1 else ''))
+                            + (f' of {self.attempts}' if self.attempts != -1 else ''))
             try:
-                return await self._translate_file(path, dest, ctx)
+                return await self._translate_file(path, dest, config, ctx)
 
             except TranslationInterrupt:
                 break
@@ -108,7 +117,7 @@ async def translate_file(self, path: str, dest: str, params: dict):
                     await self._report_progress('error-lang', True)
                 else:
                     await self._report_progress('error', True)
-                if not self.ignore_errors and not (ctx.attempts == -1 or attempts < ctx.attempts):
+                if not self.ignore_errors and not (self.attempts == -1 or attempts < self.attempts):
                     raise
                 else:
                     logger.error(f'{e.__class__.__name__}: {e}',
@@ -116,12 +125,12 @@ async def translate_file(self, path: str, dest: str, params: dict):
             attempts += 1
         return False
 
-    async def _translate_file(self, path: str, dest: str, ctx: Context) -> bool:
+    async def _translate_file(self, path: str, dest: str, config: Config, ctx: Context) -> bool:
         if path.endswith('.txt'):
             with open(path, 'r') as f:
                 queries = f.read().split('\n')
             translated_sentences = \
-                await dispatch_translation(ctx.translator, queries, ctx.use_mtpe, ctx,
+                await dispatch_translation(config.translator.translator_gen, queries, self.use_mtpe, ctx,
                                            'cpu' if self._gpu_limited_memory else self.device)
             p, ext = os.path.splitext(dest)
             if ext != '.txt':
@@ -142,26 +151,27 @@ async def _translate_file(self, path: str, dest: str, ctx: Context) -> bool:
                 logger.warn(f'Failed to open image: {path}')
                 return False
 
-            ctx = await self.translate(img, ctx)
+            ctx = await self.translate(img, config)
             result = ctx.result
 
             # Save result
-            if ctx.skip_no_text and not ctx.text_regions:
+            if self.skip_no_text and not ctx.text_regions:
                 logger.debug('Not saving due to --skip-no-text')
                 return True
             if result:
                 logger.info(f'Saving "{dest}"')
+                ctx.save_quality = self.save_quality
                 save_result(result, dest, ctx)
                 await self._report_progress('saved', True)
 
-                if ctx.save_text or ctx.save_text_file or ctx.prep_manual:
-                    if ctx.prep_manual:
+                if self.save_text or self.save_text_file or self.prep_manual:
+                    if self.prep_manual:
                         # Save original image next to translated
                         p, ext = os.path.splitext(dest)
                         img_filename = p + '-orig' + ext
                         img_path = os.path.join(os.path.dirname(dest), img_filename)
-                        img.save(img_path, quality=ctx.save_quality)
-                    if ctx.text_regions:
+                        img.save(img_path, quality=self.save_quality)
+                    if self.text_regions:
                         self._save_text_to_file(path, ctx)
                 return True
         return False
@@ -194,7 +204,7 @@ def identify_colors(fg_rgb: List[int]):
                 s += f'coords: {list(line.ravel())}\n'
         s += '\n'
 
-        text_output_file = ctx.text_output_file
+        text_output_file = self.text_output_file
         if not text_output_file:
             text_output_file = os.path.splitext(image_path)[0] + '_translations.txt'
 
diff --git a/manga_translator/translators/__init__.py b/manga_translator/translators/__init__.py
index 0ac0c00c6..36005fea6 100644
--- a/manga_translator/translators/__init__.py
+++ b/manga_translator/translators/__init__.py
@@ -21,8 +21,8 @@
 from .sakura import SakuraTranslator
 from .qwen2 import Qwen2Translator, Qwen2BigTranslator
 from .groq import GroqTranslator
-from .. import Context
-from ..config import Translator, TranslatorConfig
+from ..config import Translator, TranslatorConfig, TranslatorChain
+from ..utils import Context
 
 OFFLINE_TRANSLATORS = {
     Translator.offline: SelectiveOfflineTranslator,
@@ -67,38 +67,6 @@ def get_translator(key: Translator, *args, **kwargs) -> CommonTranslator:
 
 prepare_selective_translator(get_translator)
 
-# TODO: Refactor
-class TranslatorChain:
-    def __init__(self, string: str):
-        """
-        Parses string in form 'trans1:lang1;trans2:lang2' into chains,
-        which will be executed one after another when passed to the dispatch function.
-        """
-        if not string:
-            raise Exception('Invalid translator chain')
-        self.chain = []
-        self.target_lang = None
-        for g in string.split(';'):
-            trans, lang = g.split(':')
-            translator = Translator[trans]
-            if translator not in TRANSLATORS:
-                raise ValueError(f'Invalid choice: %s (choose from %s)' % (trans, ', '.join(map(repr, TRANSLATORS))))
-            if lang not in VALID_LANGUAGES:
-                raise ValueError(f'Invalid choice: %s (choose from %s)' % (lang, ', '.join(map(repr, VALID_LANGUAGES))))
-            self.chain.append((translator, lang))
-        self.translators, self.langs = list(zip(*self.chain))
-    
-    def has_offline(self) -> bool:
-        """
-        Returns True if the chain contains offline translators.
-        """
-        return any(translator in OFFLINE_TRANSLATORS for translator in self.translators)
-
-    def __eq__(self, __o: object) -> bool:
-        if type(__o) is str:
-            return __o == self.translators[0]
-        return super.__eq__(self, __o)
-
 async def prepare(chain: TranslatorChain):
     for key, tgt_lang in chain.chain:
         translator = get_translator(key)
diff --git a/manga_translator/translators/groq.py b/manga_translator/translators/groq.py
index 2de7becae..7b016ed92 100644
--- a/manga_translator/translators/groq.py
+++ b/manga_translator/translators/groq.py
@@ -2,7 +2,6 @@
 import os
 from typing import List
 
-from . import TranslatorChain
 from .common import CommonTranslator, MissingAPIKeyException
 from .keys import GROQ_API_KEY
 
@@ -61,7 +60,7 @@ def __init__(self, check_groq_key=True):
 
 
 
-    def parse_args(self, args: TranslatorChain):
+    def parse_args(self, args):
         #todo: is nver set
         self.config = None
 
diff --git a/manga_translator/utils/inference.py b/manga_translator/utils/inference.py
index 47fef6b01..86c8e8d74 100644
--- a/manga_translator/utils/inference.py
+++ b/manga_translator/utils/inference.py
@@ -11,7 +11,6 @@
 
 from .generic import (
     BASE_PATH,
-    Context,
     download_url_with_progressbar,
     prompt_yes_no,
     replace_prefix,

From 2d87f49e6a23905e605e568bed71db3ef7f81990 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Mon, 18 Nov 2024 20:16:33 +0100
Subject: [PATCH 15/75] load empty config

---
 manga_translator/args.py       |  8 ++++----
 manga_translator/config.py     | 34 ++++++++++++++++++----------------
 manga_translator/mode/local.py |  6 ++++--
 3 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/manga_translator/args.py b/manga_translator/args.py
index c35e02f8a..761c057bd 100644
--- a/manga_translator/args.py
+++ b/manga_translator/args.py
@@ -2,6 +2,8 @@
 import os
 from urllib.parse import unquote
 
+from torch.optim.optimizer import required
+
 from .detection import DETECTORS
 from .ocr import OCRS
 from .inpainting import INPAINTERS
@@ -88,12 +90,13 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
 parser.add_argument('--font-path', default='', type=file_path, help='Path to font file')
 parser.add_argument('--pre-dict', default=None, type=file_path, help='Path to the pre-translation dictionary file')
 parser.add_argument('--post-dict', default=None, type=file_path, help='Path to the post-translation dictionary file')
+parser.add_argument('--kernel-size', default=3, type=int, help='Set the convolution kernel size of the text erasure area to completely clean up text residues')
 
 subparsers = parser.add_subparsers(dest='mode', required=True, help='Mode of operation')
 
 # Batch mode
 parser_batch = subparsers.add_parser('local', help='Run in batch translation mode')
-parser_batch.add_argument('-i', '--input', required=True, type=dir_path, help='Path to an image folder')
+parser_batch.add_argument('-i', '--input', required=True, type=path, nargs='+', help='Path to an image folder')
 parser_batch.add_argument('-o', '--dest', default='', type=str, help='Path to the destination folder for translated images')
 parser_batch.add_argument('-f', '--format', default=None, choices=OUTPUT_FORMATS, help='Output format of the translation.')
 parser_batch.add_argument('--overwrite', action='store_true', help='Overwrite already translated images')
@@ -130,6 +133,3 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
 parser_api.add_argument('--port', default=5003, type=int, help='Port for API service')
 parser_api.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal API server communication')
 
-
-# Generares dict with a default value for each argument
-DEFAULT_ARGS = vars(parser.parse_args([]))
diff --git a/manga_translator/config.py b/manga_translator/config.py
index 428685b82..65df9e5f7 100644
--- a/manga_translator/config.py
+++ b/manga_translator/config.py
@@ -5,6 +5,8 @@
 from typing import Optional
 
 from omegaconf import OmegaConf
+from pydantic import BaseModel
+
 
 # TODO: Refactor
 class TranslatorChain:
@@ -132,12 +134,12 @@ class Translator(IntEnum):
     def __str__(self):
         return self.name
 
-class Upscaler:
+class Upscaler(IntEnum):
     waifu2x = 0
     esrgan = 1
     upscler4xultrasharp = 2
 
-class RenderConfig:
+class RenderConfig(BaseModel):
     renderer: Renderer = Renderer.default
     """Render english text translated from manga with some additional typesetting. Ignores some other argument options"""
     alignment: Alignment = Alignment.auto
@@ -189,7 +191,7 @@ def font_color_bg(self):
                     f'Invalid --font-color value: {self.font_color}. Use a hex value such as FF0000')
         return self._font_color_bg
 
-class UpscaleConfig:
+class UpscaleConfig(BaseModel):
     upscaler: Upscaler = Upscaler.esrgan
     """Upscaler to use. --upscale-ratio has to be set for it to take effect"""
     revert_upscaling: bool = False
@@ -197,7 +199,7 @@ class UpscaleConfig:
     upscale_ratio: Optional[int] = None
     """Image upscale ratio applied before detection. Can improve text detection."""
 
-class TranslatorConfig:
+class TranslatorConfig(BaseModel):
     translator: Translator = Translator.sugoi
     """Language translator to use"""
     target_lang: str = 'ENG' #todo: validate VALID_LANGUAGES #todo: convert to enum
@@ -237,7 +239,7 @@ def chatgpt_config(self):
         return self._gpt_config
 
 
-class DetectorConfig:
+class DetectorConfig(BaseModel):
     """"""
     detector: Detector =Detector.default
     """"Text detector used for creating a text mask from an image, DO NOT use craft for manga, it\'s not designed for it"""
@@ -258,7 +260,7 @@ class DetectorConfig:
     unclip_ratio: float = 2.3
     """How much to extend text skeleton to form bounding box"""
 
-class InpainterConfig:
+class InpainterConfig(BaseModel):
     inpainter: Inpainter = Inpainter.lama_large
     """Inpainting model to use"""
     inpainting_size: int = 2048
@@ -266,7 +268,7 @@ class InpainterConfig:
     inpainting_precision: InpaintPrecision = InpaintPrecision.fp32
     """Inpainting precision for lama, use bf16 while you can."""
 
-class ColorizerConfig:
+class ColorizerConfig(BaseModel):
     colorization_size: int = 576
     """Size of image used for colorization. Set to -1 to use full image size"""
     denoise_sigma: int = 30
@@ -274,7 +276,7 @@ class ColorizerConfig:
     colorizer: Colorizer = Colorizer.none
     """Colorization model to use."""
 
-class OcrConfig:
+class OcrConfig(BaseModel):
     use_mocr_merge: bool = False
     """Use bbox merge when Manga OCR inference."""
     ocr: Ocr = Ocr.ocr48px
@@ -284,7 +286,7 @@ class OcrConfig:
     ignore_bubble: int = 0
     """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles"""
 
-class Config:
+class Config(BaseModel):
     # unclear
     pre_dict: Optional[str] = None
     post_dict: Optional[str] = None
@@ -292,19 +294,19 @@ class Config:
     # json
     filter_text: Optional[str] = None
     """Filter regions by their text with a regex. Example usage: '.*badtext.*'"""
-    render: RenderConfig
+    render: RenderConfig = RenderConfig()
     """render configs"""
-    upscale: UpscaleConfig
+    upscale: UpscaleConfig = UpscaleConfig()
     """upscaler configs"""
-    translator: TranslatorConfig
+    translator: TranslatorConfig = TranslatorConfig()
     """tanslator configs"""
-    detector: DetectorConfig
+    detector: DetectorConfig = DetectorConfig()
     """detector configs"""
-    colorizer: ColorizerConfig
+    colorizer: ColorizerConfig = ColorizerConfig()
     """colorizer configs"""
-    inpainter: InpainterConfig
+    inpainter: InpainterConfig = InpainterConfig()
     """inpainter configs"""
-    ocr: OcrConfig
+    ocr: OcrConfig = OcrConfig()
     """Ocr configs"""
     # ?
     kernel_size: int = 3
diff --git a/manga_translator/mode/local.py b/manga_translator/mode/local.py
index dc395cf80..f81673f83 100644
--- a/manga_translator/mode/local.py
+++ b/manga_translator/mode/local.py
@@ -1,3 +1,4 @@
+import json
 import os
 from typing import Union, List
 
@@ -28,6 +29,7 @@ async def translate_path(self, path: str, dest: str = None, params: dict[str, Un
         """
         Translates an image or folder (recursively) specified through the path.
         """
+        config = Config(**json.loads('{}'))
         if not os.path.exists(path):
             raise FileNotFoundError(path)
         path = os.path.abspath(os.path.expanduser(path))
@@ -58,7 +60,7 @@ async def translate_path(self, path: str, dest: str = None, params: dict[str, Un
             else:
                 p, ext = os.path.splitext(dest)
                 _dest = f'{p}.{file_ext or ext[1:]}'
-            await self.translate_file(path, _dest, params)
+            await self.translate_file(path, _dest, params,config)
 
         elif os.path.isdir(path):
             # Determine destination folder path
@@ -82,7 +84,7 @@ async def translate_path(self, path: str, dest: str = None, params: dict[str, Un
                     p, ext = os.path.splitext(output_dest)
                     output_dest = f'{p}.{file_ext or ext[1:]}'
 
-                    if await self.translate_file(file_path, output_dest, params):
+                    if await self.translate_file(file_path, output_dest, params, config):
                         translated_count += 1
             if translated_count == 0:
                 logger.info('No further untranslated files found. Use --overwrite to write over existing translations.')

From c6b39d3526172c39e2447d4e3acb2b4610fcb0ea Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Tue, 19 Nov 2024 13:52:01 +0100
Subject: [PATCH 16/75] fix inpainting

---
 manga_translator/config.py              | 13 ++++++-------
 manga_translator/inpainting/__init__.py |  3 ++-
 manga_translator/inpainting/none.py     |  4 +++-
 manga_translator/inpainting/original.py |  4 +++-
 manga_translator/mode/local.py          |  9 ++++++---
 5 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/manga_translator/config.py b/manga_translator/config.py
index 65df9e5f7..4c34c5d36 100644
--- a/manga_translator/config.py
+++ b/manga_translator/config.py
@@ -166,7 +166,8 @@ class RenderConfig(BaseModel):
     """Line spacing is font_size * this value. Default is 0.01 for horizontal text and 0.2 for vertical."""
     font_size: Optional[int] = None
     """Use fixed font size for rendering"""
-
+    _font_color_fg = None
+    _font_color_bg = None
     @property
     def font_color_fg(self):
         if self.font_color and not self._font_color_fg:
@@ -214,6 +215,8 @@ class TranslatorConfig(BaseModel):
     """Output of one translator goes in another. Example: --translator-chain "google:JPN;sugoi:ENG"."""
     selective_translation: Optional[str] = None
     """Select a translator based on detected language in image. Note the first translation service acts as default if the language isn\'t defined. Example: --translator-chain "google:JPN;sugoi:ENG".'"""
+    _translator_gen = None
+    _gpt_config = None
 
     @property
     def translator_gen(self):
@@ -261,7 +264,7 @@ class DetectorConfig(BaseModel):
     """How much to extend text skeleton to form bounding box"""
 
 class InpainterConfig(BaseModel):
-    inpainter: Inpainter = Inpainter.lama_large
+    inpainter: Inpainter = Inpainter.none
     """Inpainting model to use"""
     inpainting_size: int = 2048
     """Size of image used for inpainting (too large will result in OOM)"""
@@ -287,11 +290,6 @@ class OcrConfig(BaseModel):
     """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles"""
 
 class Config(BaseModel):
-    # unclear
-    pre_dict: Optional[str] = None
-    post_dict: Optional[str] = None
-
-    # json
     filter_text: Optional[str] = None
     """Filter regions by their text with a regex. Example usage: '.*badtext.*'"""
     render: RenderConfig = RenderConfig()
@@ -313,6 +311,7 @@ class Config(BaseModel):
     """Set the convolution kernel size of the text erasure area to completely clean up text residues"""
     mask_dilation_offset: int = 0
     """By how much to extend the text mask to remove left-over text pixels of the original image."""
+    _filter_text = None
 
     @property
     def re_filter_text(self):
diff --git a/manga_translator/inpainting/__init__.py b/manga_translator/inpainting/__init__.py
index 52635c39f..9148f7640 100644
--- a/manga_translator/inpainting/__init__.py
+++ b/manga_translator/inpainting/__init__.py
@@ -38,4 +38,5 @@ async def dispatch(inpainter_key: Inpainter, image: np.ndarray, mask: np.ndarray
     inpainter = get_inpainter(inpainter_key)
     if isinstance(inpainter, OfflineInpainter):
         await inpainter.load(device)
-    return await inpainter.inpaint(image, mask, config or InpainterConfig(), inpainting_size, verbose)
+    config = config or InpainterConfig()
+    return await inpainter.inpaint(image, mask, config, inpainting_size, verbose)
diff --git a/manga_translator/inpainting/none.py b/manga_translator/inpainting/none.py
index 38d1c2a4c..39ef7090f 100644
--- a/manga_translator/inpainting/none.py
+++ b/manga_translator/inpainting/none.py
@@ -1,10 +1,12 @@
 import numpy as np
 
 from .common import CommonInpainter
+from ..config import InpainterConfig
+
 
 class NoneInpainter(CommonInpainter):
 
-    async def _inpaint(self, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray:
+    async def _inpaint(self, image: np.ndarray, mask: np.ndarray, config: InpainterConfig, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray:
         img_inpainted = np.copy(image)
         img_inpainted[mask > 0] = np.array([255, 255, 255], np.uint8)
         return img_inpainted
diff --git a/manga_translator/inpainting/original.py b/manga_translator/inpainting/original.py
index 464a1f766..c4639a2c1 100644
--- a/manga_translator/inpainting/original.py
+++ b/manga_translator/inpainting/original.py
@@ -1,8 +1,10 @@
 import numpy as np
 
 from .common import CommonInpainter
+from ..config import InpainterConfig
+
 
 class OriginalInpainter(CommonInpainter):
 
-    async def _inpaint(self, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray:
+    async def _inpaint(self, image: np.ndarray, mask: np.ndarray, config: InpainterConfig, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray:
         return np.copy(image)
diff --git a/manga_translator/mode/local.py b/manga_translator/mode/local.py
index f81673f83..55fe76e8c 100644
--- a/manga_translator/mode/local.py
+++ b/manga_translator/mode/local.py
@@ -83,9 +83,12 @@ async def translate_path(self, path: str, dest: str = None, params: dict[str, Un
                     output_dest = replace_prefix(file_path, path, _dest)
                     p, ext = os.path.splitext(output_dest)
                     output_dest = f'{p}.{file_ext or ext[1:]}'
-
-                    if await self.translate_file(file_path, output_dest, params, config):
-                        translated_count += 1
+                    try:
+                        if await self.translate_file(file_path, output_dest, params, config):
+                            translated_count += 1
+                    except Exception as e:
+                        logger.error(e)
+                        raise e
             if translated_count == 0:
                 logger.info('No further untranslated files found. Use --overwrite to write over existing translations.')
             else:

From 43625c6e5c2f90e2d3e1fe5e07ec890a3927fce6 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Tue, 19 Nov 2024 13:52:13 +0100
Subject: [PATCH 17/75] fix inpainting

---
 manga_translator/manga_translator.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py
index 746a59be7..9bc471454 100644
--- a/manga_translator/manga_translator.py
+++ b/manga_translator/manga_translator.py
@@ -276,7 +276,7 @@ async def _translate(self, config: Config, ctx: Context) -> Context:
             ctx.mask = await self._run_mask_refinement(config, ctx)
 
         if self.verbose:
-            inpaint_input_img = await dispatch_inpainting(Inpainter.none, ctx.img_rgb, ctx.mask, config.inpainter,
+            inpaint_input_img = await dispatch_inpainting(Inpainter.none, ctx.img_rgb, ctx.mask, config.inpainter,config.inpainter.inpainting_size,
                                                           self.using_gpu, self.verbose)
             cv2.imwrite(self._result_path('inpaint_input.png'), cv2.cvtColor(inpaint_input_img, cv2.COLOR_RGB2BGR))
             cv2.imwrite(self._result_path('mask_final.png'), ctx.mask)
@@ -284,12 +284,10 @@ async def _translate(self, config: Config, ctx: Context) -> Context:
         # -- Inpainting
         await self._report_progress('inpainting')
         ctx.img_inpainted = await self._run_inpainting(config, ctx)
-
         ctx.gimp_mask = np.dstack((cv2.cvtColor(ctx.img_inpainted, cv2.COLOR_RGB2BGR), ctx.mask))
 
         if self.verbose:
             cv2.imwrite(self._result_path('inpainted.png'), cv2.cvtColor(ctx.img_inpainted, cv2.COLOR_RGB2BGR))
-
         # -- Rendering
         await self._report_progress('rendering')
         ctx.img_rendered = await self._run_text_rendering(config, ctx)
@@ -499,10 +497,10 @@ async def _run_text_translation(self, config: Config, ctx: Context):
 
     async def _run_mask_refinement(self, config: Config, ctx: Context):
         return await dispatch_mask_refinement(ctx.text_regions, ctx.img_rgb, ctx.mask_raw, 'fit_text',
-                                              config.mask_dilation_offset, config.detector.ignore_bubble, self.verbose,self.kernel_size)
+                                              config.mask_dilation_offset, config.ocr.ignore_bubble, self.verbose,self.kernel_size)
 
     async def _run_inpainting(self, config: Config, ctx: Context):
-        return await dispatch_inpainting(config.inpainter.inpainter, ctx.img_rgb, ctx.mask, config.inpainter, self.device,
+        return await dispatch_inpainting(config.inpainter.inpainter, ctx.img_rgb, ctx.mask, self.device,
                                          self.verbose)
 
     async def _run_text_rendering(self, config: Config, ctx: Context):
@@ -515,7 +513,7 @@ async def _run_text_rendering(self, config: Config, ctx: Context):
         else:
             output = await dispatch_rendering(ctx.img_inpainted, ctx.text_regions, self.font_path, config.render.font_size,
                                               config.render.font_size_offset,
-                                              config.render.font_size_minimum, not config.render.no_hyphenation, config.render.render_mask, config.render.line_spacing)
+                                              config.render.font_size_minimum, not config.render.no_hyphenation, ctx.render_mask, config.render.line_spacing)
         return output
 
     def _result_path(self, path: str) -> str:

From b73d1dd410b88e23cb48f5dfc1808c73b315a460 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Tue, 19 Nov 2024 14:05:24 +0100
Subject: [PATCH 18/75] load from config

---
 manga_translator/args.py       |  1 +
 manga_translator/mode/local.py | 12 +++++++++++-
 manga_translator/mode/web.py   | 12 +++++++-----
 manga_translator/mode/ws.py    | 14 +++++++-------
 4 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/manga_translator/args.py b/manga_translator/args.py
index 761c057bd..e59f514a9 100644
--- a/manga_translator/args.py
+++ b/manga_translator/args.py
@@ -107,6 +107,7 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
 g_batch.add_argument('--save-text-file', default='', type=str, help='Like --save-text but with a specified file path.')
 parser_batch.add_argument('--prep-manual', action='store_true', help='Prepare for manual typesetting by outputting blank, inpainted images, plus copies of the original for reference')
 parser_batch.add_argument('--save-quality', default=100, type=int, help='Quality of saved JPEG image, range from 0 to 100 with 100 being best')
+parser.add_argument('--config-file', default=None, type=str, help='path to the config file')
 
 # Web mode
 parser_web = subparsers.add_parser('web', help='Run in web service mode')
diff --git a/manga_translator/mode/local.py b/manga_translator/mode/local.py
index 55fe76e8c..564efbc0f 100644
--- a/manga_translator/mode/local.py
+++ b/manga_translator/mode/local.py
@@ -29,12 +29,22 @@ async def translate_path(self, path: str, dest: str = None, params: dict[str, Un
         """
         Translates an image or folder (recursively) specified through the path.
         """
-        config = Config(**json.loads('{}'))
         if not os.path.exists(path):
             raise FileNotFoundError(path)
         path = os.path.abspath(os.path.expanduser(path))
         dest = os.path.abspath(os.path.expanduser(dest)) if dest else ''
         params = params or {}
+        config_file_path = params.get("config_file", None)
+        config_content = "{}"
+
+        if config_file_path:
+            try:
+                with open(config_file_path, 'r') as file:
+                    config_content = file.read()
+            except Exception as e:
+                print("Couldnt read file")
+                pass
+        config = Config(**json.loads(config_content))
 
         # Handle format
         file_ext = params.get('format')
diff --git a/manga_translator/mode/web.py b/manga_translator/mode/web.py
index 2acc9faf7..3f0ccb889 100644
--- a/manga_translator/mode/web.py
+++ b/manga_translator/mode/web.py
@@ -3,12 +3,13 @@
 
 import requests
 
-from manga_translator import MangaTranslator, logger, Context
+from manga_translator import logger, Context, Config
+from manga_translator.mode.local import MangaTranslatorLocal
 from manga_translator.translators import TRANSLATORS
 from manga_translator.utils import add_file_logger, remove_file_logger
 
 
-class MangaTranslatorWeb(MangaTranslator):
+class MangaTranslatorWeb(MangaTranslatorLocal):
     """
     Translator client that executes tasks on behalf of the webserver in web_main.py.
     """
@@ -110,10 +111,11 @@ async def listen(self, translation_params: dict = None):
             self._params = None
             self.result_sub_folder = ''
 
-    async def _run_text_translation(self, ctx: Context):
+    async def _run_text_translation(self, config: Config, ctx: Context):
         # Run machine translation as reference for manual translation (if `--translator=none` is not set)
-        text_regions = await super()._run_text_translation(ctx)
+        text_regions = await super()._run_text_translation(config, ctx)
 
+        #todo: manual never set
         if ctx.get('manual', False):
             logger.info('Waiting for user input from manual translation')
             requests.post(f'http://{self.host}:{self.port}/request-manual-internal', json={
@@ -142,7 +144,7 @@ async def _run_text_translation(self, ctx: Context):
                             i = i - 1
                         else:
                             text_regions[i].translation = translation
-                            text_regions[i].target_lang = ctx.translator.langs[-1]
+                            text_regions[i].target_lang = config.translator.translator_gen.langs[-1]
                         i = i + 1
                     break
                 elif 'cancel' in ret:
diff --git a/manga_translator/mode/ws.py b/manga_translator/mode/ws.py
index d696058f8..9c6067eb6 100644
--- a/manga_translator/mode/ws.py
+++ b/manga_translator/mode/ws.py
@@ -7,7 +7,7 @@
 import numpy as np
 from PIL import Image
 
-from manga_translator import logger, Context, MangaTranslator
+from manga_translator import logger, Context, MangaTranslator, Config
 from manga_translator.utils import PriorityLock, Throttler
 
 
@@ -152,7 +152,7 @@ async def server_process_inner(main_loop, logger_task, session, websocket, task)
 
             return True, output is not None
 
-        async def server_process(main_loop, session, websocket, task) -> bool:
+        async def server_process(main_loop, session, websocket, task):
             logger_task = logger.getChild(f'{task.id}')
             try:
                 (success, has_translation_mask) = await server_process_inner(main_loop, logger_task, session, websocket,
@@ -224,9 +224,9 @@ def server_thread(future, main_loop, server_loop):
         # create a future that is never done
         await future
 
-    async def _run_text_translation(self, ctx: Context):
-        coroutine = super()._run_text_translation(ctx)
-        if ctx.translator.has_offline():
+    async def _run_text_translation(self, config: Config, ctx: Context):
+        coroutine = super()._run_text_translation(config, ctx)
+        if config.translator.translator_gen.has_offline():
             return await coroutine
         else:
             task_id = self._task_id
@@ -243,10 +243,10 @@ async def _run_text_translation(self, ctx: Context):
             self._websocket = websocket
             return result
 
-    async def _run_text_rendering(self, ctx: Context):
+    async def _run_text_rendering(self, config: Config, ctx: Context):
         render_mask = (ctx.mask >= 127).astype(np.uint8)[:, :, None]
 
-        output = await super()._run_text_rendering(ctx)
+        output = await super()._run_text_rendering(config, ctx)
         render_mask[np.sum(ctx.img_rgb != output, axis=2) > 0] = 1
         ctx.render_mask = render_mask
         if self.verbose:

From f473ed7a197f01aeefe70e603b33a924b42cef0f Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Tue, 19 Nov 2024 18:38:34 +0100
Subject: [PATCH 19/75] server

---
 manga_translator/args.py |  2 +-
 server/args.py           |  9 +++++
 server/instance.py       | 66 ++++++++++++++++++++++++++++++++++++
 server/main.py           | 73 ++++++++++++++++++++++++++++++++++++++++
 server/myqueue.py        | 25 ++++++++++++++
 server/sent_data.py      | 50 +++++++++++++++++++++++++++
 6 files changed, 224 insertions(+), 1 deletion(-)
 create mode 100644 server/args.py
 create mode 100644 server/instance.py
 create mode 100644 server/main.py
 create mode 100644 server/myqueue.py
 create mode 100644 server/sent_data.py

diff --git a/manga_translator/args.py b/manga_translator/args.py
index e59f514a9..0516c075b 100644
--- a/manga_translator/args.py
+++ b/manga_translator/args.py
@@ -129,7 +129,7 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
 parser_ws.add_argument('--ws-url', default='ws://localhost:5000', type=str, help='Server URL for WebSocket mode')
 
 # API mode
-parser_api = subparsers.add_parser('api', help='Run in API mode')
+parser_api = subparsers.add_parser('shared', help='Run in API mode')
 parser_api.add_argument('--host', default='127.0.0.1', type=str, help='Host for API service')
 parser_api.add_argument('--port', default=5003, type=int, help='Port for API service')
 parser_api.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal API server communication')
diff --git a/server/args.py b/server/args.py
new file mode 100644
index 000000000..ac3d92b33
--- /dev/null
+++ b/server/args.py
@@ -0,0 +1,9 @@
+import argparse
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(description="Specify host and port for the server.")
+    parser.add_argument('--host', type=str, default='127.0.0.1', help='The host address (default: 127.0.0.1)')
+    parser.add_argument('--port', type=int, default=8080, help='The port number (default: 8080)')
+
+    return parser.parse_args()
\ No newline at end of file
diff --git a/server/instance.py b/server/instance.py
new file mode 100644
index 000000000..68260c962
--- /dev/null
+++ b/server/instance.py
@@ -0,0 +1,66 @@
+from asyncio import Event, Lock
+from typing import List, Optional
+
+from PIL import Image
+from pydantic import BaseModel
+
+from manga_translator import Config
+from server.sent_data import fetch_data_stream, NotifyType
+
+
+class ExecutorInstance(BaseModel):
+    ip: str
+    port: int
+    busy: bool = False
+
+    def free_executor(self):
+        self.busy = False
+
+    async def sent(self, image: Image, config: Config, sender: NotifyType):
+        await fetch_data_stream("http://"+self.ip+":"+str(self.port)+"/execute/translate", image, config, sender)
+
+class Executors:
+    def __init__(self):
+        self.list: List[ExecutorInstance] = []
+        self.lock: Lock = Lock()
+        self.event = Event()
+
+    def register(self, instance: ExecutorInstance):
+        self.list.append(instance)
+
+    def free_executors(self) -> int:
+        return len([item for item in self.list if not item.busy])
+
+    async def _find_instance(self):
+        while True:
+            instance = next((x for x in self.list if x.busy == False), None)
+            if instance is not None:
+                return instance
+            #todo: cricial error: warn should never happen
+            await self.event.wait()
+
+    async def find_executor(self) -> ExecutorInstance:
+        async with self.lock:  # Using async with for lock management
+            instance = await self._find_instance()
+            instance.busy = True
+            return instance
+
+    def free_executor(self, instance: ExecutorInstance):
+        instance.free_executor()
+        self.event.set()
+        self.event.clear()
+
+def example_notify(a: int, b: Optional[int] = None) -> None:
+    print(f"Notify called with a={a} and b={b}")
+
+async def main():
+    executor = ExecutorInstance(ip="127.0.0.1", port=5003)
+
+    image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png")
+    config = Config()
+
+    await executor.sent(image, config, example_notify)
+
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(main())
\ No newline at end of file
diff --git a/server/main.py b/server/main.py
new file mode 100644
index 000000000..8da8c2677
--- /dev/null
+++ b/server/main.py
@@ -0,0 +1,73 @@
+import asyncio
+from typing import List, Dict, Optional, Callable
+
+from fastapi import FastAPI, Request, HTTPException
+
+from server.instance import ExecutorInstance, Executors
+from server.myqueue import TaskQueue
+from server.sent_data import NotifyType
+
+app = FastAPI()
+executor_instances: Executors = Executors()
+task_queue = TaskQueue()
+
+@app.post("/register")
+async def register_instance(instance: ExecutorInstance, request: Request):
+    instance.ip = request.client.host
+    executor_instances.register(instance)
+    return {"code": 0}
+
+
+
+async def wait(task, notify: NotifyType):
+    """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned"""
+    while True:
+        queue_pos = task_queue.get_pos(task)
+        if notify:
+            notify(3, queue_pos)
+        if queue_pos < executor_instances.free_executors():
+            instance = await executor_instances.find_executor()
+            task_queue.remove(task)
+            if notify:
+                notify(4, 0)
+            result = "" #todo: implement logic
+            instance.busy = False
+            instance.event.set()
+
+            if notify:
+                return
+            else:
+                return result
+        else:
+            if queue_pos == 0:
+                raise HTTPException(500, detail="No translator registered")
+            await task_queue.wait_for_event()
+@app.post("/json")
+async def json(req: TranslateRequest):
+    pass
+
+@app.post("/bytes")
+async def bytes(req: TranslateRequest):
+    pass
+
+@app.post("/image")
+async def image(req: TranslateRequest):
+    pass
+
+@app.post("/stream_json")
+async def image(req: TranslateRequest):
+    pass
+
+@app.post("/stream_bytes")
+async def image(req: TranslateRequest):
+    pass
+
+@app.post("/stream_image")
+async def image(req: TranslateRequest):
+    pass
+
+if __name__ == '__main__':
+    import uvicorn
+    from args import parse_arguments
+    args = parse_arguments()
+    uvicorn.run(app, host=args.host, port=args.port)
\ No newline at end of file
diff --git a/server/myqueue.py b/server/myqueue.py
new file mode 100644
index 000000000..61c1abac3
--- /dev/null
+++ b/server/myqueue.py
@@ -0,0 +1,25 @@
+import asyncio
+from typing import List, Dict
+
+
+class TaskQueue:
+    def __init__(self):
+        self.queue: List[Dict] = []
+        self.queue_event: asyncio.Event = asyncio.Event()
+
+    def add_task(self, task):
+        self.queue.append(task)
+
+    def get_pos(self, task):
+        return self.queue.index(task)
+
+    def update_event(self):
+        self.queue_event.set()
+        self.queue_event.clear()
+
+    def remove(self, task):
+        self.queue.remove(task)
+        self.update_event()
+
+    async def wait_for_event(self):
+        await self.queue_event.wait()
\ No newline at end of file
diff --git a/server/sent_data.py b/server/sent_data.py
new file mode 100644
index 000000000..619dca71b
--- /dev/null
+++ b/server/sent_data.py
@@ -0,0 +1,50 @@
+import pickle
+from typing import Mapping, Optional, Callable
+
+import aiohttp
+from PIL.Image import Image
+from fastapi import HTTPException
+
+from manga_translator import Config
+
+NotifyType = Optional[Callable[[int, Optional[int]], None]]
+
+async def fetch_data_stream(url, image: Image, config: Config, sender: NotifyType, headers: Mapping[str, str] = {}):
+    attributes = {"image": image, "config": config}
+    data = pickle.dumps(attributes)
+
+    async with aiohttp.ClientSession() as session:
+        async with session.post(url, data=data, headers=headers) as response:
+            if response.status == 200:
+                await process_stream(response, sender)
+            else:
+                raise HTTPException(response.status, detail=response.text())
+
+
+async def process_stream(response, sender: NotifyType):
+    buffer = b''
+
+    async for chunk in response.content.iter_any():
+        if chunk:
+            buffer += chunk
+            await handle_buffer(buffer, sender)
+
+
+async def handle_buffer(buffer, sender: NotifyType):
+    while len(buffer) >= 5:
+        status, expected_size = extract_header(buffer)
+
+        if len(buffer) >= 5 + expected_size:
+            data = buffer[5:5 + expected_size]
+            sender(status, data)
+            buffer = buffer[5 + expected_size:]
+        else:
+            break
+
+
+def extract_header(buffer):
+    """Extract the status and expected size from the buffer."""
+    status = int.from_bytes(buffer[0:1], byteorder='big')
+    expected_size = int.from_bytes(buffer[1:5], byteorder='big')
+    return status, expected_size
+

From f8095685d44a1d5f133b0f3ffbfd14f858a93449 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Tue, 19 Nov 2024 18:40:02 +0100
Subject: [PATCH 20/75] fix pre & post dict

---
 manga_translator/manga_translator.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py
index 9bc471454..ffd750c82 100644
--- a/manga_translator/manga_translator.py
+++ b/manga_translator/manga_translator.py
@@ -93,6 +93,8 @@ class MangaTranslator:
     result_sub_folder: str
 
     def __init__(self, params: dict = None):
+        self.pre_dict = params.get('pre_dict', None)
+        self.post_dict = params.get('post_dict', None)
         self.font_path = None
         self.use_mtpe = False
         self.kernel_size = None
@@ -231,7 +233,7 @@ async def _translate(self, config: Config, ctx: Context) -> Context:
             return await self._revert_upscale(config, ctx)
 
         # Apply pre-dictionary after OCR
-        pre_dict = load_dictionary(config.pre_dict)
+        pre_dict = load_dictionary(self.pre_dict)
         pre_replacements = []  
         for textline in ctx.textlines:  
             original = textline.text  
@@ -381,7 +383,7 @@ async def _run_text_translation(self, config: Config, ctx: Context):
             region._direction = config.render.direction
 
         # Apply post dictionary after translating
-        post_dict = load_dictionary(config.post_dict)
+        post_dict = load_dictionary(self.post_dict)
         post_replacements = []  
         for region in ctx.text_regions:  
             original = region.translation  

From bc7f2fcf528834d9b80e6f21f41bd988828b6503 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Tue, 19 Nov 2024 19:04:12 +0100
Subject: [PATCH 21/75] python sucks

---
 execute_test.py     |  5 ++++-
 server/instance.py  | 12 ++++++++----
 server/sent_data.py |  7 +++++--
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/execute_test.py b/execute_test.py
index 3a1d0e8af..8705fb2b1 100644
--- a/execute_test.py
+++ b/execute_test.py
@@ -4,6 +4,9 @@
 import requests
 from PIL import Image
 
+from manga_translator import Config
+
+
 async def execute_method(method_name, attributes):
     url = f"http://127.0.0.1:5003/execute/{method_name}"
     headers = {'Content-Type': 'application/octet-stream'}
@@ -39,5 +42,5 @@ async def execute_method(method_name, attributes):
 
 if __name__ == '__main__':
     image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png")
-    attributes = {"image": image, "params": {"translator": "none", "inpainter": "none"}}
+    attributes = {"image": image, "config": Config()}
     asyncio.run(execute_method("translate", attributes))
\ No newline at end of file
diff --git a/server/instance.py b/server/instance.py
index 68260c962..da550613c 100644
--- a/server/instance.py
+++ b/server/instance.py
@@ -1,3 +1,4 @@
+import pickle
 from asyncio import Event, Lock
 from typing import List, Optional
 
@@ -16,7 +17,7 @@ class ExecutorInstance(BaseModel):
     def free_executor(self):
         self.busy = False
 
-    async def sent(self, image: Image, config: Config, sender: NotifyType):
+    async def sent_stream(self, image: Image, config: Config, sender: NotifyType):
         await fetch_data_stream("http://"+self.ip+":"+str(self.port)+"/execute/translate", image, config, sender)
 
 class Executors:
@@ -50,8 +51,11 @@ def free_executor(self, instance: ExecutorInstance):
         self.event.set()
         self.event.clear()
 
-def example_notify(a: int, b: Optional[int] = None) -> None:
-    print(f"Notify called with a={a} and b={b}")
+def example_notify(a: int, b) -> None:
+    if a == 0:
+        print(pickle.loads(b))
+    else:
+        print(f"Notify called with a={a} and b={b}")
 
 async def main():
     executor = ExecutorInstance(ip="127.0.0.1", port=5003)
@@ -59,7 +63,7 @@ async def main():
     image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png")
     config = Config()
 
-    await executor.sent(image, config, example_notify)
+    await executor.sent_stream(image, config, example_notify)
 
 if __name__ == "__main__":
     import asyncio
diff --git a/server/sent_data.py b/server/sent_data.py
index 619dca71b..02053bf80 100644
--- a/server/sent_data.py
+++ b/server/sent_data.py
@@ -1,3 +1,4 @@
+import asyncio
 import pickle
 from typing import Mapping, Optional, Callable
 
@@ -27,10 +28,11 @@ async def process_stream(response, sender: NotifyType):
     async for chunk in response.content.iter_any():
         if chunk:
             buffer += chunk
-            await handle_buffer(buffer, sender)
+            buffer = handle_buffer(buffer, sender)
 
 
-async def handle_buffer(buffer, sender: NotifyType):
+
+def handle_buffer(buffer, sender: NotifyType):
     while len(buffer) >= 5:
         status, expected_size = extract_header(buffer)
 
@@ -40,6 +42,7 @@ async def handle_buffer(buffer, sender: NotifyType):
             buffer = buffer[5 + expected_size:]
         else:
             break
+    return buffer
 
 
 def extract_header(buffer):

From 30d9b3559d74a233f99f9e89d22f3fba522e9fa8 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Tue, 19 Nov 2024 22:02:42 +0100
Subject: [PATCH 22/75] api logic

---
 requirements.txt   |   3 +-
 server/args.py     |   2 +-
 server/instance.py |   4 ++
 server/main.py     | 151 ++++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 143 insertions(+), 17 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 25c2e953b..e843bf354 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -51,4 +51,5 @@ accelerate
 bitsandbytes
 uvicorn
 fastapi
-pydantic
\ No newline at end of file
+pydantic
+python-multipart
\ No newline at end of file
diff --git a/server/args.py b/server/args.py
index ac3d92b33..a00e24a22 100644
--- a/server/args.py
+++ b/server/args.py
@@ -4,6 +4,6 @@
 def parse_arguments():
     parser = argparse.ArgumentParser(description="Specify host and port for the server.")
     parser.add_argument('--host', type=str, default='127.0.0.1', help='The host address (default: 127.0.0.1)')
-    parser.add_argument('--port', type=int, default=8080, help='The port number (default: 8080)')
+    parser.add_argument('--port', type=int, default=8000, help='The port number (default: 8080)')
 
     return parser.parse_args()
\ No newline at end of file
diff --git a/server/instance.py b/server/instance.py
index da550613c..0b426b606 100644
--- a/server/instance.py
+++ b/server/instance.py
@@ -7,6 +7,7 @@
 
 from manga_translator import Config
 from server.sent_data import fetch_data_stream, NotifyType
+from fastapi import Response
 
 
 class ExecutorInstance(BaseModel):
@@ -17,6 +18,9 @@ class ExecutorInstance(BaseModel):
     def free_executor(self):
         self.busy = False
 
+    async def sent(self) -> Response:
+        pass
+
     async def sent_stream(self, image: Image, config: Config, sender: NotifyType):
         await fetch_data_stream("http://"+self.ip+":"+str(self.port)+"/execute/translate", image, config, sender)
 
diff --git a/server/main.py b/server/main.py
index 8da8c2677..3c3507312 100644
--- a/server/main.py
+++ b/server/main.py
@@ -1,16 +1,34 @@
 import asyncio
-from typing import List, Dict, Optional, Callable
+import builtins
+import io
+import re
+from base64 import b64decode
+from typing import Union
 
+import requests
+from PIL import Image
 from fastapi import FastAPI, Request, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from starlette.responses import StreamingResponse
 
+from manga_translator import Config
 from server.instance import ExecutorInstance, Executors
 from server.myqueue import TaskQueue
 from server.sent_data import NotifyType
 
 app = FastAPI()
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 executor_instances: Executors = Executors()
 task_queue = TaskQueue()
 
+
 @app.post("/register")
 async def register_instance(instance: ExecutorInstance, request: Request):
     instance.ip = request.client.host
@@ -18,6 +36,44 @@ async def register_instance(instance: ExecutorInstance, request: Request):
     return {"code": 0}
 
 
+async def to_pil_image(image: Union[str, bytes]) -> Image.Image:
+    try:
+        if isinstance(image, builtins.bytes):
+            image = Image.open(io.BytesIO(image))
+            return image
+        else:
+            if re.match(r'^data:image/.+;base64,', image):
+                value = image.split(',', 1)[1]
+                image_data = b64decode(value)
+                image = Image.open(io.BytesIO(image_data))
+                return image
+            else:
+                response = requests.get(image)
+                image = Image.open(io.BytesIO(response.content))
+                return image
+    except Exception as e:
+        raise HTTPException(status_code=422, detail=str(e))
+
+
+async def multi_content_type(request: Request):
+    content_type = request.headers.get("content-type")
+
+    if content_type and content_type.startswith("multipart/form-data"):
+        form = await request.form()
+        config = form.get("config", "{}")
+        image = form.get("image")
+        image_content = await image.read()
+        config = Config.parse_raw(config)
+        return config, image_content
+    elif content_type and content_type.startswith("application/json"):
+        body = await request.json()
+        config = Config(**body.get("config", {}))
+        image = body.get("image")
+        return config, image
+
+    else:
+        raise HTTPException(status_code=400, detail="Unsupported Content-Type")
+
 
 async def wait(task, notify: NotifyType):
     """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned"""
@@ -30,7 +86,11 @@ async def wait(task, notify: NotifyType):
             task_queue.remove(task)
             if notify:
                 notify(4, 0)
-            result = "" #todo: implement logic
+            if notify:
+                await instance.sent_stream(task.image, task.config, notify)
+            else:
+                result = await instance.sent(task.image, task.config)
+
             instance.busy = False
             instance.event.set()
 
@@ -42,32 +102,93 @@ async def wait(task, notify: NotifyType):
             if queue_pos == 0:
                 raise HTTPException(500, detail="No translator registered")
             await task_queue.wait_for_event()
+
+
+async def stream(messages):
+    while True:
+        message = await messages.get()
+        yield message
+        if message[0] == 0 or message[0] == 2:
+            break
+
+def notify(code, data, transform_to_bytes, messages):
+    if code == 0:
+        result_bytes = transform_to_bytes(data)
+        encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes
+        messages.put_nowait(encoded_result)
+    else:
+        result_bytes = str(data).encode("utf-8")
+        encoded_result =code.to_bytes(1, 'big') + len(result_bytes).to_bytes(4, 'big') + result_bytes
+        messages.put_nowait(encoded_result)
+
+def transform_to_image(data):
+    return b""
+
 @app.post("/json")
-async def json(req: TranslateRequest):
-    pass
+async def json(req: Request):
+    data, img = await multi_content_type(req)
+    img = await to_pil_image(img)
+    data = await wait((data, img), None)
 
 @app.post("/bytes")
-async def bytes(req: TranslateRequest):
-    pass
+async def bytes(req: Request):
+    data, img = await multi_content_type(req)
+    img = await to_pil_image(img)
+    data = await wait((data, img), None)
+
 
 @app.post("/image")
-async def image(req: TranslateRequest):
-    pass
+async def image(req: Request):
+    data, img = await multi_content_type(req)
+    img = await to_pil_image(img)
+    data = await wait((data, img), None)
+
 
 @app.post("/stream_json")
-async def image(req: TranslateRequest):
-    pass
+async def stream_json(req: Request):
+    data, img = await multi_content_type(req)
+    img = await to_pil_image(img)
+
+    messages = asyncio.Queue()
+
+    def example_notify(code: int, data) -> None:
+        notify(code, data, transform_to_image, messages)
+
+    streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
+    asyncio.create_task(wait((data, img), example_notify))
+    return streaming_response
 
 @app.post("/stream_bytes")
-async def image(req: TranslateRequest):
-    pass
+async def stream_bytes(req: Request):
+    data, img = await multi_content_type(req)
+    img = await to_pil_image(img)
+
+    messages =  asyncio.Queue()
+
+    def example_notify(code: int, data) -> None:
+        notify(code, data, transform_to_image, messages)
+
+    streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
+    asyncio.create_task(wait((data, img), example_notify))
+    return streaming_response
 
 @app.post("/stream_image")
-async def image(req: TranslateRequest):
-    pass
+async def stream_image(req: Request):
+    data, img = await multi_content_type(req)
+    img = await to_pil_image(img)
+
+    messages =  asyncio.Queue()
+
+    def example_notify(code: int, data) -> None:
+        notify(code, data, transform_to_image, messages)
+
+    streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
+    asyncio.create_task(wait((data, img), example_notify))
+    return streaming_response
 
 if __name__ == '__main__':
     import uvicorn
     from args import parse_arguments
+
     args = parse_arguments()
-    uvicorn.run(app, host=args.host, port=args.port)
\ No newline at end of file
+    uvicorn.run(app, host=args.host, port=args.port)

From b66237ef6892458bea88a85f5c47f943e0991bfa Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Tue, 19 Nov 2024 22:52:22 +0100
Subject: [PATCH 23/75] replace api

---
 manga_translator/__main__.py |   4 -
 manga_translator/mode/api.py | 292 -----------------------------------
 server/instance.py           |   9 +-
 server/main.py               |  36 ++++-
 server/sent_data.py          |  10 ++
 server/to_json.py            |  66 ++++++++
 6 files changed, 108 insertions(+), 309 deletions(-)
 delete mode 100644 manga_translator/mode/api.py
 create mode 100644 server/to_json.py

diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py
index db711b2c8..eb348798f 100644
--- a/manga_translator/__main__.py
+++ b/manga_translator/__main__.py
@@ -79,10 +79,6 @@ async def dispatch(args: Namespace):
         translator = MangaTranslatorWS(args_dict)
         await translator.listen(args_dict)
 
-    elif args.mode == 'api':
-        from manga_translator.mode.api import MangaTranslatorAPI
-        translator = MangaTranslatorAPI(args_dict)
-        await translator.listen(args_dict)
     elif args.mode == 'shared':
         translator = MangaShare(args_dict)
         await translator.listen(args_dict)
diff --git a/manga_translator/mode/api.py b/manga_translator/mode/api.py
deleted file mode 100644
index fc71d32e3..000000000
--- a/manga_translator/mode/api.py
+++ /dev/null
@@ -1,292 +0,0 @@
-# Experimental. May be replaced by a refactored server/web_main.py in the future.
-import asyncio
-import base64
-import io
-
-import cv2
-import numpy as np
-from PIL import Image
-from aiohttp import web
-from aiohttp.web_middlewares import middleware
-from marshmallow import fields, Schema, ValidationError
-
-from manga_translator import MangaTranslator, Context, TranslationInterrupt, logger
-from manga_translator.args import translator_chain
-from manga_translator.detection import DETECTORS
-from manga_translator.inpainting import INPAINTERS
-from manga_translator.manga_translator import _preprocess_params
-from manga_translator.ocr import OCRS
-from manga_translator.translators import VALID_LANGUAGES, TRANSLATORS
-from manga_translator.upscaling import UPSCALERS
-
-
-class MangaTranslatorAPI(MangaTranslator):
-    def __init__(self, params: dict = None):
-        import nest_asyncio
-        nest_asyncio.apply()
-        super().__init__(params)
-        self.host = params.get('host', '127.0.0.1')
-        self.port = params.get('port', '5003')
-        self.log_web = params.get('log_web', False)
-        self.ignore_errors = params.get('ignore_errors', True)
-        self._task_id = None
-        self._params = None
-        self.params = params
-        self.queue = []
-
-    async def wait_queue(self, id: int):
-        while self.queue[0] != id:
-            await asyncio.sleep(0.05)
-
-    def remove_from_queue(self, id: int):
-        self.queue.remove(id)
-
-    def generate_id(self):
-        try:
-            x = max(self.queue)
-        except:
-            x = 0
-        return x + 1
-
-    def middleware_factory(self):
-        @middleware
-        async def sample_middleware(request, handler):
-            id = self.generate_id()
-            self.queue.append(id)
-            try:
-                await self.wait_queue(id)
-            except Exception as e:
-                print(e)
-            try:
-                # todo make cancellable
-                response = await handler(request)
-            except:
-                response = web.json_response({'error': "Internal Server Error", 'status': 500},
-                                             status=500)
-            # Handle cases where a user leaves the queue, request fails, or is completed
-            try:
-                self.remove_from_queue(id)
-            except Exception as e:
-                print(e)
-            return response
-
-        return sample_middleware
-
-    async def get_file(self, image, base64Images, url) -> Image:
-        if image is not None:
-            content = image.file.read()
-        elif base64Images is not None:
-            base64Images = base64Images
-            if base64Images.__contains__('base64,'):
-                base64Images = base64Images.split('base64,')[1]
-            content = base64.b64decode(base64Images)
-        elif url is not None:
-            from aiohttp import ClientSession
-            async with ClientSession() as session:
-                async with session.get(url) as resp:
-                    if resp.status == 200:
-                        content = await resp.read()
-                    else:
-                        return web.json_response({'status': 'error'})
-        else:
-            raise ValidationError("donest exist")
-        img = Image.open(io.BytesIO(content))
-
-        img.verify()
-        img = Image.open(io.BytesIO(content))
-        if img.width * img.height > 8000 ** 2:
-            raise ValidationError("to large")
-        return img
-
-    async def listen(self, translation_params: dict = None):
-        self.params = translation_params
-        app = web.Application(client_max_size=1024 * 1024 * 50, middlewares=[self.middleware_factory()])
-
-        routes = web.RouteTableDef()
-        run_until_state = ''
-
-        async def hook(state, finished):
-            if run_until_state and run_until_state == state and not finished:
-                raise TranslationInterrupt()
-
-        self.add_progress_hook(hook)
-
-        @routes.post("/get_text")
-        async def text_api(req):
-            nonlocal run_until_state
-            run_until_state = 'translating'
-            return await self.err_handling(self.run_translate, req, self.format_translate)
-
-        @routes.post("/translate")
-        async def translate_api(req):
-            nonlocal run_until_state
-            run_until_state = 'after-translating'
-            return await self.err_handling(self.run_translate, req, self.format_translate)
-
-        @routes.post("/inpaint_translate")
-        async def inpaint_translate_api(req):
-            nonlocal run_until_state
-            run_until_state = 'rendering'
-            return await self.err_handling(self.run_translate, req, self.format_translate)
-
-        @routes.post("/colorize_translate")
-        async def colorize_translate_api(req):
-            nonlocal run_until_state
-            run_until_state = 'rendering'
-            return await self.err_handling(self.run_translate, req, self.format_translate, True)
-
-        # #@routes.post("/file")
-        # async def file_api(req):
-        #     #TODO: return file
-        #     return await self.err_handling(self.file_exec, req, None)
-
-        app.add_routes(routes)
-        web.run_app(app, host=self.host, port=self.port)
-
-    async def run_translate(self, translation_params, img):
-        return await self.translate(img, translation_params)
-
-    async def err_handling(self, func, req, format, ri=False):
-        try:
-            if req.content_type == 'application/json' or req.content_type == 'multipart/form-data':
-                if req.content_type == 'application/json':
-                    d = await req.json()
-                else:
-                    d = await req.post()
-                schema = self.PostSchema()
-                data = schema.load(d)
-                if 'translator_chain' in data:
-                    data['translator_chain'] = translator_chain(data['translator_chain'])
-                if 'selective_translation' in data:
-                    data['selective_translation'] = translator_chain(data['selective_translation'])
-                ctx = Context(**dict(self.params, **data))
-                _preprocess_params(ctx)
-                if data.get('image') is None and data.get('base64Images') is None and data.get('url') is None:
-                    return web.json_response({'error': "Missing input", 'status': 422})
-                fil = await self.get_file(data.get('image'), data.get('base64Images'), data.get('url'))
-                if 'image' in data:
-                    del data['image']
-                if 'base64Images' in data:
-                    del data['base64Images']
-                if 'url' in data:
-                    del data['url']
-                attempts = 0
-                while ctx.attempts == -1 or attempts <= ctx.attempts:
-                    if attempts > 0:
-                        logger.info(f'Retrying translation! Attempt {attempts}' + (
-                            f' of {ctx.attempts}' if ctx.attempts != -1 else ''))
-                    try:
-                        await func(ctx, fil)
-                        break
-                    except TranslationInterrupt:
-                        break
-                    except Exception as e:
-                        print(e)
-                    attempts += 1
-                if ctx.attempts != -1 and attempts > ctx.attempts:
-                    return web.json_response({'error': "Internal Server Error", 'status': 500},
-                                             status=500)
-                try:
-                    return format(ctx, ri)
-                except Exception as e:
-                    print(e)
-                    return web.json_response({'error': "Failed to format", 'status': 500},
-                                             status=500)
-            else:
-                return web.json_response({'error': "Wrong content type: " + req.content_type, 'status': 415},
-                                         status=415)
-        except ValueError as e:
-            print(e)
-            return web.json_response({'error': "Wrong input type", 'status': 422}, status=422)
-
-        except ValidationError as e:
-            print(e)
-            return web.json_response({'error': "Input invalid", 'status': 422}, status=422)
-
-    def format_translate(self, ctx: Context, return_image: bool):
-        text_regions = ctx.text_regions
-        inpaint = ctx.img_inpainted
-        results = []
-        if 'overlay_ext' in ctx:
-            overlay_ext = ctx['overlay_ext']
-        else:
-            overlay_ext = 'jpg'
-        for i, blk in enumerate(text_regions):
-            minX, minY, maxX, maxY = blk.xyxy
-            if 'translations' in ctx:
-                trans = {key: value[i] for key, value in ctx['translations'].items()}
-            else:
-                trans = {}
-            trans["originalText"] = text_regions[i].text
-            if inpaint is not None:
-                overlay = inpaint[minY:maxY, minX:maxX]
-
-                retval, buffer = cv2.imencode('.' + overlay_ext, overlay)
-                jpg_as_text = base64.b64encode(buffer)
-                background = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8")
-            else:
-                background = None
-            text_region = text_regions[i]
-            text_region.adjust_bg_color = False
-            color1, color2 = text_region.get_font_colors()
-
-            results.append({
-                'text': trans,
-                'minX': int(minX),
-                'minY': int(minY),
-                'maxX': int(maxX),
-                'maxY': int(maxY),
-                'textColor': {
-                    'fg': color1.tolist(),
-                    'bg': color2.tolist()
-                },
-                'language': text_regions[i].source_lang,
-                'background': background
-            })
-        if return_image and ctx.img_colorized is not None:
-            retval, buffer = cv2.imencode('.' + overlay_ext, np.array(ctx.img_colorized))
-            jpg_as_text = base64.b64encode(buffer)
-            img = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8")
-        else:
-            img = None
-        return web.json_response({'details': results, 'img': img})
-
-    class PostSchema(Schema):
-        target_lang = fields.Str(required=False, validate=lambda a: a.upper() in VALID_LANGUAGES)
-        detector = fields.Str(required=False, validate=lambda a: a.lower() in DETECTORS)
-        ocr = fields.Str(required=False, validate=lambda a: a.lower() in OCRS)
-        inpainter = fields.Str(required=False, validate=lambda a: a.lower() in INPAINTERS)
-        upscaler = fields.Str(required=False, validate=lambda a: a.lower() in UPSCALERS)
-        translator = fields.Str(required=False, validate=lambda a: a.lower() in TRANSLATORS)
-        direction = fields.Str(required=False, validate=lambda a: a.lower() in {'auto', 'h', 'v'})
-        skip_language = fields.Str(required=False)
-        upscale_ratio = fields.Integer(required=False)
-        translator_chain = fields.Str(required=False)
-        selective_translation = fields.Str(required=False)
-        attempts = fields.Integer(required=False)
-        detection_size = fields.Integer(required=False)
-        text_threshold = fields.Float(required=False)
-        box_threshold = fields.Float(required=False)
-        unclip_ratio = fields.Float(required=False)
-        inpainting_size = fields.Integer(required=False)
-        det_rotate = fields.Bool(required=False)
-        det_auto_rotate = fields.Bool(required=False)
-        det_invert = fields.Bool(required=False)
-        det_gamma_correct = fields.Bool(required=False)
-        min_text_length = fields.Integer(required=False)
-        colorization_size = fields.Integer(required=False)
-        denoise_sigma = fields.Integer(required=False)
-        mask_dilation_offset = fields.Integer(required=False)
-        ignore_bubble = fields.Integer(required=False)
-        gpt_config = fields.String(required=False)
-        filter_text = fields.String(required=False)
-
-        # api specific
-        overlay_ext = fields.Str(required=False)
-        base64Images = fields.Raw(required=False)
-        image = fields.Raw(required=False)
-        url = fields.Raw(required=False)
-
-        # no functionality except preventing errors when given
-        fingerprint = fields.Raw(required=False)
-        clientUuid = fields.Raw(required=False)
\ No newline at end of file
diff --git a/server/instance.py b/server/instance.py
index 0b426b606..c27a0c415 100644
--- a/server/instance.py
+++ b/server/instance.py
@@ -6,8 +6,7 @@
 from pydantic import BaseModel
 
 from manga_translator import Config
-from server.sent_data import fetch_data_stream, NotifyType
-from fastapi import Response
+from server.sent_data import fetch_data_stream, NotifyType, fetch_data
 
 
 class ExecutorInstance(BaseModel):
@@ -18,15 +17,15 @@ class ExecutorInstance(BaseModel):
     def free_executor(self):
         self.busy = False
 
-    async def sent(self) -> Response:
-        pass
+    async def sent(self, image: Image, config: Config):
+        return await fetch_data("http://"+self.ip+":"+str(self.port)+"/simple_execute/translate", image, config)
 
     async def sent_stream(self, image: Image, config: Config, sender: NotifyType):
         await fetch_data_stream("http://"+self.ip+":"+str(self.port)+"/execute/translate", image, config, sender)
 
 class Executors:
     def __init__(self):
-        self.list: List[ExecutorInstance] = []
+        self.list: List[ExecutorInstance] = [ExecutorInstance(ip="127.0.0.1", port=5003)]
         self.lock: Lock = Lock()
         self.event = Event()
 
diff --git a/server/main.py b/server/main.py
index 3c3507312..cda01c0a9 100644
--- a/server/main.py
+++ b/server/main.py
@@ -9,12 +9,13 @@
 from PIL import Image
 from fastapi import FastAPI, Request, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
-from starlette.responses import StreamingResponse
+from starlette.responses import StreamingResponse, JSONResponse
 
-from manga_translator import Config
+from manga_translator import Config, Context
 from server.instance import ExecutorInstance, Executors
 from server.myqueue import TaskQueue
 from server.sent_data import NotifyType
+from server.to_json import to_json
 
 app = FastAPI()
 
@@ -91,8 +92,7 @@ async def wait(task, notify: NotifyType):
             else:
                 result = await instance.sent(task.image, task.config)
 
-            instance.busy = False
-            instance.event.set()
+            executor_instances.free_executor(instance)
 
             if notify:
                 return
@@ -127,22 +127,42 @@ def transform_to_image(data):
 @app.post("/json")
 async def json(req: Request):
     data, img = await multi_content_type(req)
-    img = await to_pil_image(img)
-    data = await wait((data, img), None)
+    ctx = Context()
+
+    ctx.image = await to_pil_image(img)
+    ctx.config = data
+    task_queue.add_task(ctx)
+
+    data = await wait(ctx, None)
+    json = to_json(data)
+    return JSONResponse(content=json)
 
 @app.post("/bytes")
 async def bytes(req: Request):
     data, img = await multi_content_type(req)
-    img = await to_pil_image(img)
+    ctx = Context()
+
+    ctx.image = await to_pil_image(img)
+    ctx.config = data
+    task_queue.add_task(ctx)
     data = await wait((data, img), None)
 
 
 @app.post("/image")
 async def image(req: Request):
     data, img = await multi_content_type(req)
-    img = await to_pil_image(img)
+    ctx = Context()
+
+    ctx.image = await to_pil_image(img)
+    ctx.config = data
+    task_queue.add_task(ctx)
+
     data = await wait((data, img), None)
+    img_byte_arr = io.BytesIO()
+    data.result.save(img_byte_arr, format="PNG")
+    img_byte_arr.seek(0)
 
+    return StreamingResponse(img_byte_arr, media_type="image/png")
 
 @app.post("/stream_json")
 async def stream_json(req: Request):
diff --git a/server/sent_data.py b/server/sent_data.py
index 02053bf80..2945cc9f7 100644
--- a/server/sent_data.py
+++ b/server/sent_data.py
@@ -21,6 +21,16 @@ async def fetch_data_stream(url, image: Image, config: Config, sender: NotifyTyp
             else:
                 raise HTTPException(response.status, detail=response.text())
 
+async def fetch_data(url, image: Image, config: Config, headers: Mapping[str, str] = {}):
+    attributes = {"image": image, "config": config}
+    data = pickle.dumps(attributes)
+
+    async with aiohttp.ClientSession() as session:
+        async with session.post(url, data=data, headers=headers) as response:
+            if response.status == 200:
+                return pickle.loads(await response.read())
+            else:
+                raise HTTPException(response.status, detail=response.text())
 
 async def process_stream(response, sender: NotifyType):
     buffer = b''
diff --git a/server/to_json.py b/server/to_json.py
new file mode 100644
index 000000000..356e74104
--- /dev/null
+++ b/server/to_json.py
@@ -0,0 +1,66 @@
+import base64
+from typing import Dict, List
+
+import cv2
+
+from manga_translator import Context
+from manga_translator.utils import TextBlock
+
+
+#input:PIL,
+#result:PIL
+#img_colorized: PIL
+#upscaled:PIL
+#img_rgb:array
+#img_alpha:None
+#textlines:list[Quadrilateral]
+#text_regions:list[TextBlock]
+#translations: map[str, arr[str]]
+#img_inpainted: array
+#gimp_mask:array
+#img_rendered: array
+#mask_raw: array
+#mask:array
+def to_json(ctx: Context):
+    text_regions:list[TextBlock] = ctx.text_regions
+    inpaint = ctx.img_inpainted
+    translations:Dict[str, List[str]] = ctx.translations
+    results = []
+    if 'overlay_ext' in ctx:
+        #todo: unreachable
+        overlay_ext = ctx['overlay_ext']
+    else:
+        overlay_ext = 'jpg'
+    for i, blk in enumerate(text_regions):
+        minX, minY, maxX, maxY = blk.xyxy
+        if 'translations' in ctx:
+            trans = {key: value[i] for key, value in translations.items()}
+        else:
+            trans = {}
+        trans["originalText"] = text_regions[i].text
+        if inpaint is not None:
+            overlay = inpaint[minY:maxY, minX:maxX]
+
+            retval, buffer = cv2.imencode('.' + overlay_ext, overlay)
+            jpg_as_text = base64.b64encode(buffer)
+            background = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8")
+        else:
+            background = None
+        text_region = text_regions[i]
+        text_region.adjust_bg_color = False
+        color1, color2 = text_region.get_font_colors()
+
+        results.append({
+            'text': trans,
+            'minX': int(minX),
+            'minY': int(minY),
+            'maxX': int(maxX),
+            'maxY': int(maxY),
+            'textColor': {
+                'fg': color1.tolist(),
+                'bg': color2.tolist()
+            },
+            'language': text_regions[i].source_lang,
+            'background': background
+        })
+    return results
\ No newline at end of file

From 897070cd691ee2e906faae87f4b528fa2620fe6e Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Tue, 19 Nov 2024 23:03:20 +0100
Subject: [PATCH 24/75] server refactor

---
 server/instance.py                            |  19 +--
 server/main.py                                | 140 ++----------------
 server/myqueue.py                             |  35 ++++-
 server/request_extraction.py                  |  61 ++++++++
 .../{sent_data.py => sent_data_internal.py}   |   0
 server/streaming.py                           |  16 ++
 6 files changed, 126 insertions(+), 145 deletions(-)
 create mode 100644 server/request_extraction.py
 rename server/{sent_data.py => sent_data_internal.py} (100%)
 create mode 100644 server/streaming.py

diff --git a/server/instance.py b/server/instance.py
index c27a0c415..7c02e695f 100644
--- a/server/instance.py
+++ b/server/instance.py
@@ -6,7 +6,7 @@
 from pydantic import BaseModel
 
 from manga_translator import Config
-from server.sent_data import fetch_data_stream, NotifyType, fetch_data
+from server.sent_data_internal import fetch_data_stream, NotifyType, fetch_data
 
 
 class ExecutorInstance(BaseModel):
@@ -54,20 +54,5 @@ def free_executor(self, instance: ExecutorInstance):
         self.event.set()
         self.event.clear()
 
-def example_notify(a: int, b) -> None:
-    if a == 0:
-        print(pickle.loads(b))
-    else:
-        print(f"Notify called with a={a} and b={b}")
 
-async def main():
-    executor = ExecutorInstance(ip="127.0.0.1", port=5003)
-
-    image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png")
-    config = Config()
-
-    await executor.sent_stream(image, config, example_notify)
-
-if __name__ == "__main__":
-    import asyncio
-    asyncio.run(main())
\ No newline at end of file
+executor_instances: Executors = Executors()
diff --git a/server/main.py b/server/main.py
index cda01c0a9..257be7cf5 100644
--- a/server/main.py
+++ b/server/main.py
@@ -1,20 +1,14 @@
 import asyncio
-import builtins
 import io
-import re
-from base64 import b64decode
-from typing import Union
 
-import requests
-from PIL import Image
-from fastapi import FastAPI, Request, HTTPException
+from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
 from starlette.responses import StreamingResponse, JSONResponse
 
-from manga_translator import Config, Context
-from server.instance import ExecutorInstance, Executors
-from server.myqueue import TaskQueue
-from server.sent_data import NotifyType
+from server.instance import ExecutorInstance, executor_instances
+from server.myqueue import wait_in_queue
+from server.request_extraction import multi_content_type, to_pil_image, get_ctx
+from server.streaming import notify, stream
 from server.to_json import to_json
 
 app = FastAPI()
@@ -26,9 +20,6 @@
     allow_methods=["*"],
     allow_headers=["*"],
 )
-executor_instances: Executors = Executors()
-task_queue = TaskQueue()
-
 
 @app.post("/register")
 async def register_instance(instance: ExecutorInstance, request: Request):
@@ -36,130 +27,25 @@ async def register_instance(instance: ExecutorInstance, request: Request):
     executor_instances.register(instance)
     return {"code": 0}
 
-
-async def to_pil_image(image: Union[str, bytes]) -> Image.Image:
-    try:
-        if isinstance(image, builtins.bytes):
-            image = Image.open(io.BytesIO(image))
-            return image
-        else:
-            if re.match(r'^data:image/.+;base64,', image):
-                value = image.split(',', 1)[1]
-                image_data = b64decode(value)
-                image = Image.open(io.BytesIO(image_data))
-                return image
-            else:
-                response = requests.get(image)
-                image = Image.open(io.BytesIO(response.content))
-                return image
-    except Exception as e:
-        raise HTTPException(status_code=422, detail=str(e))
-
-
-async def multi_content_type(request: Request):
-    content_type = request.headers.get("content-type")
-
-    if content_type and content_type.startswith("multipart/form-data"):
-        form = await request.form()
-        config = form.get("config", "{}")
-        image = form.get("image")
-        image_content = await image.read()
-        config = Config.parse_raw(config)
-        return config, image_content
-    elif content_type and content_type.startswith("application/json"):
-        body = await request.json()
-        config = Config(**body.get("config", {}))
-        image = body.get("image")
-        return config, image
-
-    else:
-        raise HTTPException(status_code=400, detail="Unsupported Content-Type")
-
-
-async def wait(task, notify: NotifyType):
-    """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned"""
-    while True:
-        queue_pos = task_queue.get_pos(task)
-        if notify:
-            notify(3, queue_pos)
-        if queue_pos < executor_instances.free_executors():
-            instance = await executor_instances.find_executor()
-            task_queue.remove(task)
-            if notify:
-                notify(4, 0)
-            if notify:
-                await instance.sent_stream(task.image, task.config, notify)
-            else:
-                result = await instance.sent(task.image, task.config)
-
-            executor_instances.free_executor(instance)
-
-            if notify:
-                return
-            else:
-                return result
-        else:
-            if queue_pos == 0:
-                raise HTTPException(500, detail="No translator registered")
-            await task_queue.wait_for_event()
-
-
-async def stream(messages):
-    while True:
-        message = await messages.get()
-        yield message
-        if message[0] == 0 or message[0] == 2:
-            break
-
-def notify(code, data, transform_to_bytes, messages):
-    if code == 0:
-        result_bytes = transform_to_bytes(data)
-        encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes
-        messages.put_nowait(encoded_result)
-    else:
-        result_bytes = str(data).encode("utf-8")
-        encoded_result =code.to_bytes(1, 'big') + len(result_bytes).to_bytes(4, 'big') + result_bytes
-        messages.put_nowait(encoded_result)
-
 def transform_to_image(data):
     return b""
 
 @app.post("/json")
 async def json(req: Request):
-    data, img = await multi_content_type(req)
-    ctx = Context()
-
-    ctx.image = await to_pil_image(img)
-    ctx.config = data
-    task_queue.add_task(ctx)
-
-    data = await wait(ctx, None)
-    json = to_json(data)
+    ctx = await get_ctx(req)
+    json = to_json(ctx)
     return JSONResponse(content=json)
 
 @app.post("/bytes")
 async def bytes(req: Request):
-    data, img = await multi_content_type(req)
-    ctx = Context()
-
-    ctx.image = await to_pil_image(img)
-    ctx.config = data
-    task_queue.add_task(ctx)
-    data = await wait((data, img), None)
+    ctx = await get_ctx(req)
 
 
 @app.post("/image")
 async def image(req: Request):
-    data, img = await multi_content_type(req)
-    ctx = Context()
-
-    ctx.image = await to_pil_image(img)
-    ctx.config = data
-    task_queue.add_task(ctx)
-
-    data = await wait((data, img), None)
+    ctx = await get_ctx(req)
     img_byte_arr = io.BytesIO()
-    data.result.save(img_byte_arr, format="PNG")
+    ctx.result.save(img_byte_arr, format="PNG")
     img_byte_arr.seek(0)
 
     return StreamingResponse(img_byte_arr, media_type="image/png")
@@ -175,7 +61,7 @@ def example_notify(code: int, data) -> None:
         notify(code, data, transform_to_image, messages)
 
     streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
-    asyncio.create_task(wait((data, img), example_notify))
+    asyncio.create_task(wait_in_queue((data, img), example_notify))
     return streaming_response
 
 @app.post("/stream_bytes")
@@ -189,7 +75,7 @@ def example_notify(code: int, data) -> None:
         notify(code, data, transform_to_image, messages)
 
     streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
-    asyncio.create_task(wait((data, img), example_notify))
+    asyncio.create_task(wait_in_queue((data, img), example_notify))
     return streaming_response
 
 @app.post("/stream_image")
@@ -203,7 +89,7 @@ def example_notify(code: int, data) -> None:
         notify(code, data, transform_to_image, messages)
 
     streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
-    asyncio.create_task(wait((data, img), example_notify))
+    asyncio.create_task(wait_in_queue((data, img), example_notify))
     return streaming_response
 
 if __name__ == '__main__':
diff --git a/server/myqueue.py b/server/myqueue.py
index 61c1abac3..cb11db50d 100644
--- a/server/myqueue.py
+++ b/server/myqueue.py
@@ -1,6 +1,10 @@
 import asyncio
 from typing import List, Dict
 
+from fastapi import HTTPException
+
+from server.instance import executor_instances
+from server.sent_data_internal import NotifyType
 
 class TaskQueue:
     def __init__(self):
@@ -22,4 +26,33 @@ def remove(self, task):
         self.update_event()
 
     async def wait_for_event(self):
-        await self.queue_event.wait()
\ No newline at end of file
+        await self.queue_event.wait()
+
+task_queue = TaskQueue()
+
+async def wait_in_queue(task, notify: NotifyType):
+    """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned"""
+    while True:
+        queue_pos = task_queue.get_pos(task)
+        if notify:
+            notify(3, queue_pos)
+        if queue_pos < executor_instances.free_executors():
+            instance = await executor_instances.find_executor()
+            task_queue.remove(task)
+            if notify:
+                notify(4, 0)
+            if notify:
+                await instance.sent_stream(task.image, task.config, notify)
+            else:
+                result = await instance.sent(task.image, task.config)
+
+            executor_instances.free_executor(instance)
+
+            if notify:
+                return
+            else:
+                return result
+        else:
+            if queue_pos == 0:
+                raise HTTPException(500, detail="No translator registered")
+            await task_queue.wait_for_event()
\ No newline at end of file
diff --git a/server/request_extraction.py b/server/request_extraction.py
new file mode 100644
index 000000000..559127760
--- /dev/null
+++ b/server/request_extraction.py
@@ -0,0 +1,61 @@
+import builtins
+import io
+import re
+from base64 import b64decode
+from typing import Union
+
+import requests
+from PIL import Image
+from fastapi import Request, HTTPException
+
+from manga_translator import Config, Context
+from server.myqueue import task_queue, wait_in_queue
+
+
+async def to_pil_image(image: Union[str, bytes]) -> Image.Image:
+    try:
+        if isinstance(image, builtins.bytes):
+            image = Image.open(io.BytesIO(image))
+            return image
+        else:
+            if re.match(r'^data:image/.+;base64,', image):
+                value = image.split(',', 1)[1]
+                image_data = b64decode(value)
+                image = Image.open(io.BytesIO(image_data))
+                return image
+            else:
+                response = requests.get(image)
+                image = Image.open(io.BytesIO(response.content))
+                return image
+    except Exception as e:
+        raise HTTPException(status_code=422, detail=str(e))
+
+
+async def multi_content_type(request: Request):
+    content_type = request.headers.get("content-type")
+
+    if content_type and content_type.startswith("multipart/form-data"):
+        form = await request.form()
+        config = form.get("config", "{}")
+        image = form.get("image")
+        image_content = await image.read()
+        config = Config.parse_raw(config)
+        return config, image_content
+    elif content_type and content_type.startswith("application/json"):
+        body = await request.json()
+        config = Config(**body.get("config", {}))
+        image = body.get("image")
+        return config, image
+
+    else:
+        raise HTTPException(status_code=400, detail="Unsupported Content-Type")
+
+async def get_ctx(req: Request):
+    data, img = await multi_content_type(req)
+    ctx = Context()
+
+    ctx.image = await to_pil_image(img)
+    ctx.config = data
+    task_queue.add_task(ctx)
+
+    data = await wait_in_queue(ctx, None)
\ No newline at end of file
diff --git a/server/sent_data.py b/server/sent_data_internal.py
similarity index 100%
rename from server/sent_data.py
rename to server/sent_data_internal.py
diff --git a/server/streaming.py b/server/streaming.py
new file mode 100644
index 000000000..809d58e05
--- /dev/null
+++ b/server/streaming.py
@@ -0,0 +1,16 @@
+async def stream(messages):
+    while True:
+        message = await messages.get()
+        yield message
+        if message[0] == 0 or message[0] == 2:
+            break
+
+def notify(code, data, transform_to_bytes, messages):
+    if code == 0:
+        result_bytes = transform_to_bytes(data)
+        encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes
+        messages.put_nowait(encoded_result)
+    else:
+        result_bytes = str(data).encode("utf-8")
+        encoded_result =code.to_bytes(1, 'big') + len(result_bytes).to_bytes(4, 'big') + result_bytes
+        messages.put_nowait(encoded_result)
\ No newline at end of file

From d0e48b9f086371f8a9be2d48a88aab65a88b4873 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Tue, 19 Nov 2024 23:11:07 +0100
Subject: [PATCH 25/75] stream update

---
 server/main.py               | 48 ++++++------------------------------
 server/request_extraction.py | 22 ++++++++++++++++-
 server/streaming.py          |  4 ++-
 3 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/server/main.py b/server/main.py
index 257be7cf5..665b61af6 100644
--- a/server/main.py
+++ b/server/main.py
@@ -1,4 +1,3 @@
-import asyncio
 import io
 
 from fastapi import FastAPI, Request
@@ -6,9 +5,7 @@
 from starlette.responses import StreamingResponse, JSONResponse
 
 from server.instance import ExecutorInstance, executor_instances
-from server.myqueue import wait_in_queue
-from server.request_extraction import multi_content_type, to_pil_image, get_ctx
-from server.streaming import notify, stream
+from server.request_extraction import get_ctx, while_streaming
 from server.to_json import to_json
 
 app = FastAPI()
@@ -27,8 +24,10 @@ async def register_instance(instance: ExecutorInstance, request: Request):
     executor_instances.register(instance)
     return {"code": 0}
 
-def transform_to_image(data):
-    return b""
+def transform_to_image(ctx):
+    img_byte_arr = io.BytesIO()
+    ctx.result.save(img_byte_arr, format="PNG")
+    return img_byte_arr.getvalue()
 
 @app.post("/json")
 async def json(req: Request):
@@ -40,7 +39,6 @@ async def json(req: Request):
 async def bytes(req: Request):
     ctx = await get_ctx(req)
 
-
 @app.post("/image")
 async def image(req: Request):
     ctx = await get_ctx(req)
@@ -52,45 +50,15 @@ async def image(req: Request):
 
 @app.post("/stream_json")
 async def stream_json(req: Request):
-    data, img = await multi_content_type(req)
-    img = await to_pil_image(img)
-
-    messages = asyncio.Queue()
-
-    def example_notify(code: int, data) -> None:
-        notify(code, data, transform_to_image, messages)
-
-    streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
-    asyncio.create_task(wait_in_queue((data, img), example_notify))
-    return streaming_response
+    return while_streaming(req, transform_to_image)
 
 @app.post("/stream_bytes")
 async def stream_bytes(req: Request):
-    data, img = await multi_content_type(req)
-    img = await to_pil_image(img)
-
-    messages =  asyncio.Queue()
-
-    def example_notify(code: int, data) -> None:
-        notify(code, data, transform_to_image, messages)
-
-    streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
-    asyncio.create_task(wait_in_queue((data, img), example_notify))
-    return streaming_response
+    return while_streaming(req, transform_to_image)
 
 @app.post("/stream_image")
 async def stream_image(req: Request):
-    data, img = await multi_content_type(req)
-    img = await to_pil_image(img)
-
-    messages =  asyncio.Queue()
-
-    def example_notify(code: int, data) -> None:
-        notify(code, data, transform_to_image, messages)
-
-    streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
-    asyncio.create_task(wait_in_queue((data, img), example_notify))
-    return streaming_response
+    return while_streaming(req, transform_to_image)
 
 if __name__ == '__main__':
     import uvicorn
diff --git a/server/request_extraction.py b/server/request_extraction.py
index 559127760..62b2f9fd8 100644
--- a/server/request_extraction.py
+++ b/server/request_extraction.py
@@ -1,3 +1,4 @@
+import asyncio
 import builtins
 import io
 import re
@@ -7,9 +8,11 @@
 import requests
 from PIL import Image
 from fastapi import Request, HTTPException
+from starlette.responses import StreamingResponse
 
 from manga_translator import Config, Context
 from server.myqueue import task_queue, wait_in_queue
+from server.streaming import notify, stream
 
 
 async def to_pil_image(image: Union[str, bytes]) -> Image.Image:
@@ -58,4 +61,21 @@ async def get_ctx(req: Request):
     ctx.config = data
     task_queue.add_task(ctx)
 
-    data = await wait_in_queue(ctx, None)
\ No newline at end of file
+    data = await wait_in_queue(ctx, None)
+
+async def while_streaming(req: Request, transform):
+    data, img = await multi_content_type(req)
+    ctx = Context()
+
+    ctx.image = await to_pil_image(img)
+    ctx.config = data
+    task_queue.add_task(ctx)
+
+    messages = asyncio.Queue()
+
+    def notify_internal(code: int, data) -> None:
+        notify(code, data, transform, messages)
+
+    streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
+    asyncio.create_task(wait_in_queue((data, img), notify_internal))
+    return streaming_response
\ No newline at end of file
diff --git a/server/streaming.py b/server/streaming.py
index 809d58e05..907fed3d7 100644
--- a/server/streaming.py
+++ b/server/streaming.py
@@ -1,3 +1,5 @@
+import pickle
+
 async def stream(messages):
     while True:
         message = await messages.get()
@@ -7,7 +9,7 @@ async def stream(messages):
 
 def notify(code, data, transform_to_bytes, messages):
     if code == 0:
-        result_bytes = transform_to_bytes(data)
+        result_bytes = transform_to_bytes(pickle.loads(data))
         encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes
         messages.put_nowait(encoded_result)
     else:

From 3be082d89ee1cdd8ba07459856b504577695cf1a Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Wed, 20 Nov 2024 17:47:52 +0100
Subject: [PATCH 26/75] stream_image,stream_json now working json&image most
 likely too

---
 api-test.html                | 153 +++++++++++++++++++++++++++++++++++
 server/main.py               |   9 ++-
 server/myqueue.py            |   4 +-
 server/request_extraction.py |   4 +-
 server/streaming.py          |   2 +-
 5 files changed, 164 insertions(+), 8 deletions(-)
 create mode 100644 api-test.html

diff --git a/api-test.html b/api-test.html
new file mode 100644
index 000000000..f07b865fb
--- /dev/null
+++ b/api-test.html
@@ -0,0 +1,153 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Image Upload and Translation</title>
+    <style>
+        .hidden {
+            display: none;
+            visibility: hidden;
+        }
+    </style>
+</head>
+<body>
+    <h1>Upload Image and Translate</h1>
+    <input type="file" id="fileInput" accept="image/*">
+    <label for="generate-image">generate image</label><input type="checkbox" id="generate-image">
+    <h3 id="error"></h3>
+    <h3 id="status"></h3>
+    <button id="submit-button" onclick="uploadAndTranslate()">Upload and Translate</button>
+    <script>
+        var gdata = null;
+        function downloadFile() {
+            download("text.json", gdata)
+        }
+
+        function download(filename, text) {
+          let element = document.createElement('a');
+          element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text));
+          element.setAttribute('download', filename);
+
+          element.style.display = 'none';
+          document.body.appendChild(element);
+
+          element.click();
+
+          document.body.removeChild(element);
+        }
+
+        function download_bytes() {
+            const url = URL.createObjectURL(gdata);
+            const element = document.createElement('a');
+            element.setAttribute('href', url);
+            element.setAttribute('download', "image.png");
+            element.style.display = 'none';
+            document.body.appendChild(element);
+            element.click();
+            document.body.removeChild(element);
+            URL.revokeObjectURL(url);
+        }
+
+        async function uploadAndTranslate() {
+            const errorField = document.getElementById('error');
+            const statusField = document.getElementById('status');
+            let submitButton = document.getElementById("submit-button")
+            const fileInput = document.getElementById('fileInput');
+            const generateImage = document.getElementById('generate-image');
+
+            submitButton.classList.add("hidden");
+            fileInput.classList.add("hidden");
+            generateImage.classList.add("hidden");
+            statusField.innerHTML = '';
+            errorField.innerHTML = '';
+            const file = fileInput.files[0];
+            if (!file) {
+                alert('Please select an image file.');
+                return;
+            }
+            statusField.innerHTML = 'Uploading...';
+
+            const formData = new FormData();
+            formData.append('image', file);
+
+            try {
+                const response = await fetch( generateImage.checked ? 'http://127.0.0.1:8000/stream_image' : 'http://127.0.0.1:8000/stream_json', {
+                    method: 'POST',
+                    body: formData,
+                    /*headers: {
+                        'Content-Type': 'application/json'
+                    },
+                    body: JSON.stringify({
+                        image: base64Image,
+                        config: {
+                            translator: {
+                                target_lang: "ENG"
+                            }
+                        }
+                    }) */
+                });
+                await process(response, statusField, errorField, generateImage.checked)
+            } catch (error) {
+                errorField.innerHTML = response.statusText;
+            } finally {
+                submitButton.classList.remove("hidden");
+                fileInput.classList.remove("hidden");
+                generateImage.classList.remove("hidden");
+            }
+        }
+
+        async function process(response, statusField, errorField, image) {
+             if (response.ok) {
+                    const reader = response.body.getReader();
+                    const decoder = new TextDecoder('utf-8');
+                    let buffer = new Uint8Array();
+                    while (true) {
+                        const { done, value } = await reader.read();
+                        if (done) break;
+                        const newBuffer = new Uint8Array(buffer.length + value.length);
+                        newBuffer.set(buffer);
+                        newBuffer.set(value, buffer.length);
+                        buffer = newBuffer;
+                        while (buffer.length >= 5) {
+                            const dataSize = new DataView(buffer.buffer).getUint32(1, false);
+                            const totalSize = 5 + dataSize;
+                            if (buffer.length < totalSize) {
+                                break;
+                            }
+
+                            const statusCode = buffer[0];
+                            const data = buffer.slice(5, totalSize);
+
+                            if(statusCode === 0) {
+                                if(image) {
+                                    gdata = new Blob([data], { type: 'application/octet-stream' });
+                                    statusField.innerHTML = '<button onclick="download_bytes()">download data</button>';
+                                }else {
+                                    gdata = decoder.decode(data);
+                                    statusField.innerHTML = '<button onclick="downloadFile()">download data</button>';
+                                }
+                            }else if(statusCode === 1) {
+                            const parsed_data = decoder.decode(data);
+                                statusField.innerHTML = `translation step ${parsed_data}`;
+                            }else if(statusCode === 2) {
+                                statusField.innerHTML = decoder.decode(data);
+                                errorField.innerHTML = "";
+                            }else if(statusCode === 3) {
+                                const parsed_data = decoder.decode(data);
+                                statusField.innerHTML = `in queue: ${parsed_data}`;
+                            }else if(statusCode === 4) {
+                                statusField.innerHTML = `started translation`;
+                            }
+                            buffer = buffer.slice(totalSize);
+                        }
+                    }
+                    console.log(buffer.length)
+
+                } else {
+                    errorField.innerHTML = response.statusText;
+                }
+        }
+    </script>
+</body>
+</html>
diff --git a/server/main.py b/server/main.py
index 665b61af6..7a8cf48f2 100644
--- a/server/main.py
+++ b/server/main.py
@@ -29,6 +29,9 @@ def transform_to_image(ctx):
     ctx.result.save(img_byte_arr, format="PNG")
     return img_byte_arr.getvalue()
 
+def transform_to_json(ctx):
+    return str(to_json(ctx)).encode("utf-8")
+
 @app.post("/json")
 async def json(req: Request):
     ctx = await get_ctx(req)
@@ -50,15 +53,15 @@ async def image(req: Request):
 
 @app.post("/stream_json")
 async def stream_json(req: Request):
-    return while_streaming(req, transform_to_image)
+    return await while_streaming(req, transform_to_json)
 
 @app.post("/stream_bytes")
 async def stream_bytes(req: Request):
-    return while_streaming(req, transform_to_image)
+    return await while_streaming(req, transform_to_image)
 
 @app.post("/stream_image")
 async def stream_image(req: Request):
-    return while_streaming(req, transform_to_image)
+    return await while_streaming(req, transform_to_image)
 
 if __name__ == '__main__':
     import uvicorn
diff --git a/server/myqueue.py b/server/myqueue.py
index cb11db50d..ca21698ed 100644
--- a/server/myqueue.py
+++ b/server/myqueue.py
@@ -35,12 +35,12 @@ async def wait_in_queue(task, notify: NotifyType):
     while True:
         queue_pos = task_queue.get_pos(task)
         if notify:
-            notify(3, queue_pos)
+            notify(3, str(queue_pos))
         if queue_pos < executor_instances.free_executors():
             instance = await executor_instances.find_executor()
             task_queue.remove(task)
             if notify:
-                notify(4, 0)
+                notify(4, "0")
             if notify:
                 await instance.sent_stream(task.image, task.config, notify)
             else:
diff --git a/server/request_extraction.py b/server/request_extraction.py
index 62b2f9fd8..9290ecc07 100644
--- a/server/request_extraction.py
+++ b/server/request_extraction.py
@@ -61,7 +61,7 @@ async def get_ctx(req: Request):
     ctx.config = data
     task_queue.add_task(ctx)
 
-    data = await wait_in_queue(ctx, None)
+    return await wait_in_queue(ctx, None)
 
 async def while_streaming(req: Request, transform):
     data, img = await multi_content_type(req)
@@ -77,5 +77,5 @@ def notify_internal(code: int, data) -> None:
         notify(code, data, transform, messages)
 
     streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
-    asyncio.create_task(wait_in_queue((data, img), notify_internal))
+    asyncio.create_task(wait_in_queue(ctx, notify_internal))
     return streaming_response
\ No newline at end of file
diff --git a/server/streaming.py b/server/streaming.py
index 907fed3d7..c65a897d2 100644
--- a/server/streaming.py
+++ b/server/streaming.py
@@ -10,7 +10,7 @@ async def stream(messages):
 def notify(code, data, transform_to_bytes, messages):
     if code == 0:
         result_bytes = transform_to_bytes(pickle.loads(data))
-        encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes
+        encoded_result = b'\x00' + len(result_bytes).to_bytes(4, 'big') + result_bytes
         messages.put_nowait(encoded_result)
     else:
         result_bytes = str(data).encode("utf-8")

From 8aa1f2b6931a64e87ad1ed949b848d09cd952693 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Wed, 20 Nov 2024 18:00:15 +0100
Subject: [PATCH 27/75] remove old tasks on interrupt connection

---
 server/myqueue.py            | 15 +++++++++++++++
 server/request_extraction.py |  2 ++
 2 files changed, 17 insertions(+)

diff --git a/server/myqueue.py b/server/myqueue.py
index ca21698ed..9ae865063 100644
--- a/server/myqueue.py
+++ b/server/myqueue.py
@@ -2,6 +2,7 @@
 from typing import List, Dict
 
 from fastapi import HTTPException
+from starlette.requests import Request
 
 from server.instance import executor_instances
 from server.sent_data_internal import NotifyType
@@ -30,6 +31,11 @@ async def wait_for_event(self):
 
 task_queue = TaskQueue()
 
+async def is_client_disconnected(request: Request) -> bool:
+    if await request.is_disconnected():
+        return True
+    return False
+
 async def wait_in_queue(task, notify: NotifyType):
     """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned"""
     while True:
@@ -37,6 +43,10 @@ async def wait_in_queue(task, notify: NotifyType):
         if notify:
             notify(3, str(queue_pos))
         if queue_pos < executor_instances.free_executors():
+            if is_client_disconnected(task.req):
+                task_queue.remove(task)
+                task_queue.update_event()
+                raise HTTPException(500, detail="User is no longer connected") #just for the logs
             instance = await executor_instances.find_executor()
             task_queue.remove(task)
             if notify:
@@ -47,6 +57,7 @@ async def wait_in_queue(task, notify: NotifyType):
                 result = await instance.sent(task.image, task.config)
 
             executor_instances.free_executor(instance)
+            task_queue.update_event()
 
             if notify:
                 return
@@ -55,4 +66,8 @@ async def wait_in_queue(task, notify: NotifyType):
         else:
             if queue_pos == 0:
                 raise HTTPException(500, detail="No translator registered")
+            if is_client_disconnected(task.req):
+                task_queue.remove(task)
+                task_queue.update_event()
+                raise HTTPException(500, detail="User is no longer connected") #just for the logs
             await task_queue.wait_for_event()
\ No newline at end of file
diff --git a/server/request_extraction.py b/server/request_extraction.py
index 9290ecc07..02dc85e70 100644
--- a/server/request_extraction.py
+++ b/server/request_extraction.py
@@ -59,6 +59,7 @@ async def get_ctx(req: Request):
 
     ctx.image = await to_pil_image(img)
     ctx.config = data
+    ctx.req = req
     task_queue.add_task(ctx)
 
     return await wait_in_queue(ctx, None)
@@ -69,6 +70,7 @@ async def while_streaming(req: Request, transform):
 
     ctx.image = await to_pil_image(img)
     ctx.config = data
+    ctx.req = req
     task_queue.add_task(ctx)
 
     messages = asyncio.Queue()

From 0160a3e2e5ec2178565082a4126bc0032b850cd2 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Wed, 20 Nov 2024 20:32:11 +0100
Subject: [PATCH 28/75] change routes

---
 execute_test.py                     | 46 -----------------------------
 server/main.py                      | 28 ++++++++++++++----
 api-test.html => server/manual.html |  3 +-
 3 files changed, 23 insertions(+), 54 deletions(-)
 delete mode 100644 execute_test.py
 rename api-test.html => server/manual.html (97%)

diff --git a/execute_test.py b/execute_test.py
deleted file mode 100644
index 8705fb2b1..000000000
--- a/execute_test.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import asyncio
-import json
-import pickle
-import requests
-from PIL import Image
-
-from manga_translator import Config
-
-
-async def execute_method(method_name, attributes):
-    url = f"http://127.0.0.1:5003/execute/{method_name}"
-    headers = {'Content-Type': 'application/octet-stream'}
-
-    response = requests.post(url, data=pickle.dumps(attributes), headers=headers, stream=True)
-
-    if response.status_code == 200:
-        buffer = b''
-        for chunk in response.iter_content(chunk_size=None):
-            if chunk:
-                buffer += chunk
-                while True:
-                    if len(buffer) >= 5:
-                        status = int.from_bytes(buffer[0:1], byteorder='big')
-                        expected_size = int.from_bytes(buffer[1:5], byteorder='big')
-                        if len(buffer) >= 5 + expected_size:
-                            data = buffer[5:5 + expected_size]
-                            if status == 0:
-                                print("data", pickle.loads(data))
-                            elif status == 1:
-                                print("log", data)
-                            elif status == 2:
-                                print("error", data)
-                            buffer = buffer[5 + expected_size:]
-                        else:
-                            break
-                    else:
-                        break
-    else:
-        print(json.loads(response.content))
-
-
-
-if __name__ == '__main__':
-    image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png")
-    attributes = {"image": image, "config": Config()}
-    asyncio.run(execute_method("translate", attributes))
\ No newline at end of file
diff --git a/server/main.py b/server/main.py
index 7a8cf48f2..49ef9f5a9 100644
--- a/server/main.py
+++ b/server/main.py
@@ -5,6 +5,7 @@
 from starlette.responses import StreamingResponse, JSONResponse
 
 from server.instance import ExecutorInstance, executor_instances
+from server.myqueue import task_queue
 from server.request_extraction import get_ctx, while_streaming
 from server.to_json import to_json
 
@@ -32,17 +33,17 @@ def transform_to_image(ctx):
 def transform_to_json(ctx):
     return str(to_json(ctx)).encode("utf-8")
 
-@app.post("/json")
+@app.post("/translate/json")
 async def json(req: Request):
     ctx = await get_ctx(req)
     json = to_json(ctx)
     return JSONResponse(content=json)
 
-@app.post("/bytes")
+@app.post("/translate/bytes")
 async def bytes(req: Request):
     ctx = await get_ctx(req)
 
-@app.post("/image")
+@app.post("/translate/image")
 async def image(req: Request):
     ctx = await get_ctx(req)
     img_byte_arr = io.BytesIO()
@@ -51,18 +52,33 @@ async def image(req: Request):
 
     return StreamingResponse(img_byte_arr, media_type="image/png")
 
-@app.post("/stream_json")
+@app.post("/translate/json/stream")
 async def stream_json(req: Request):
     return await while_streaming(req, transform_to_json)
 
-@app.post("/stream_bytes")
+@app.post("/translate/bytes/stream")
 async def stream_bytes(req: Request):
     return await while_streaming(req, transform_to_image)
 
-@app.post("/stream_image")
+@app.post("/translate/image/stream")
 async def stream_image(req: Request):
     return await while_streaming(req, transform_to_image)
 
+@app.post("/queue-size")
+async def queue_size() -> int:
+    return len(task_queue.queue)
+
+@app.post("/")
+async def index():
+    # ui.html
+    pass
+
+@app.post("/manual")
+async def manual():
+    # manual.html
+    pass
+
+
 if __name__ == '__main__':
     import uvicorn
     from args import parse_arguments
diff --git a/api-test.html b/server/manual.html
similarity index 97%
rename from api-test.html
rename to server/manual.html
index f07b865fb..1335b0c51 100644
--- a/api-test.html
+++ b/server/manual.html
@@ -72,7 +72,7 @@ <h3 id="status"></h3>
             formData.append('image', file);
 
             try {
-                const response = await fetch( generateImage.checked ? 'http://127.0.0.1:8000/stream_image' : 'http://127.0.0.1:8000/stream_json', {
+                const response = await fetch( generateImage.checked ? '/translate/image/stream' : '/translate/json/stream', {
                     method: 'POST',
                     body: formData,
                     /*headers: {
@@ -142,7 +142,6 @@ <h3 id="status"></h3>
                             buffer = buffer.slice(totalSize);
                         }
                     }
-                    console.log(buffer.length)
 
                 } else {
                     errorField.innerHTML = response.statusText;

From aa9492724c7225fbc76385d45012e77977fad9bd Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Wed, 20 Nov 2024 21:10:35 +0100
Subject: [PATCH 29/75] api goals

---
 server/args.py     |  7 ++++++-
 server/instance.py |  2 +-
 server/main.py     | 51 +++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/server/args.py b/server/args.py
index a00e24a22..a27ba5ffd 100644
--- a/server/args.py
+++ b/server/args.py
@@ -5,5 +5,10 @@ def parse_arguments():
     parser = argparse.ArgumentParser(description="Specify host and port for the server.")
     parser.add_argument('--host', type=str, default='127.0.0.1', help='The host address (default: 127.0.0.1)')
     parser.add_argument('--port', type=int, default=8000, help='The port number (default: 8080)')
-
+    #use_gpu
+    #use_gpu_limited
+    #ignore_errors
+    #verbose
+    #nonce
+    #start_instance
     return parser.parse_args()
\ No newline at end of file
diff --git a/server/instance.py b/server/instance.py
index 7c02e695f..e844289dd 100644
--- a/server/instance.py
+++ b/server/instance.py
@@ -25,7 +25,7 @@ async def sent_stream(self, image: Image, config: Config, sender: NotifyType):
 
 class Executors:
     def __init__(self):
-        self.list: List[ExecutorInstance] = [ExecutorInstance(ip="127.0.0.1", port=5003)]
+        self.list: List[ExecutorInstance] = []
         self.lock: Lock = Lock()
         self.event = Event()
 
diff --git a/server/main.py b/server/main.py
index 49ef9f5a9..dd6f6ebd6 100644
--- a/server/main.py
+++ b/server/main.py
@@ -1,6 +1,10 @@
 import io
+import os
+import secrets
+import subprocess
+import sys
 
-from fastapi import FastAPI, Request
+from fastapi import FastAPI, Request, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from starlette.responses import StreamingResponse, JSONResponse
 
@@ -10,6 +14,7 @@
 from server.to_json import to_json
 
 app = FastAPI()
+nonce = None
 
 app.add_middleware(
     CORSMiddleware,
@@ -21,6 +26,9 @@
 
 @app.post("/register")
 async def register_instance(instance: ExecutorInstance, request: Request):
+    req_nonce = request.headers.get('X-Nonce')
+    if req_nonce != nonce:
+        raise HTTPException(401, detail="Invalid nonce")
     instance.ip = request.client.host
     executor_instances.register(instance)
     return {"code": 0}
@@ -78,10 +86,51 @@ async def manual():
     # manual.html
     pass
 
+def generate_nonce():
+    return secrets.token_hex(16)
+
+def start_translator_client_proc(host: str, port: int, nonce: str, params: dict):
+    cmds = [
+        sys.executable,
+        '-m', 'manga_translator',
+        '--mode', 'shared',
+        '--host', host,
+        '--port', str(port),
+        '--nonce', nonce,
+        '--no-report'
+    ]
+    if params.get('use_gpu', False):
+        cmds.append('--use-gpu')
+    if params.get('use_gpu_limited', False):
+        cmds.append('--use-gpu-limited')
+    if params.get('ignore_errors', False):
+        cmds.append('--ignore-errors')
+    if params.get('verbose', False):
+        cmds.append('--verbose')
+
+    proc = subprocess.Popen(cmds, cwd=BASE_PATH)
+    executor_instances.register(ExecutorInstance(ip=host, port=port))
+    return proc
+
+def prepare(args):
+    global nonce
+    if args.get("nonce", None) is None:
+        nonce = os.getenv('MT_WEB_NONCE', generate_nonce())
+    else:
+        nonce = args.get("nonce", None)
+    if args.get("start_instance", None):
+        start_translator_client_proc(args.get("host", "0.0.0.0"), args.get("port",8000) + 1, nonce, args)
+
+#todo: restart if crash
+#todo: cache results
+#todo: cleanup cache
+#todo: store images while in queue
 
 if __name__ == '__main__':
     import uvicorn
     from args import parse_arguments
 
     args = parse_arguments()
+    prepare(args)
+    print("Nonce: "+nonce)
     uvicorn.run(app, host=args.host, port=args.port)

From 4dfe04c58f06625940a8e9062d8dba8441a7529c Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Wed, 20 Nov 2024 21:14:31 +0100
Subject: [PATCH 30/75] more todos & args

---
 server/args.py | 16 ++++++++++------
 server/main.py |  7 ++++---
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/server/args.py b/server/args.py
index a27ba5ffd..5c7294ca9 100644
--- a/server/args.py
+++ b/server/args.py
@@ -1,14 +1,18 @@
 import argparse
+import os
 
 
 def parse_arguments():
     parser = argparse.ArgumentParser(description="Specify host and port for the server.")
     parser.add_argument('--host', type=str, default='127.0.0.1', help='The host address (default: 127.0.0.1)')
     parser.add_argument('--port', type=int, default=8000, help='The port number (default: 8080)')
-    #use_gpu
-    #use_gpu_limited
-    #ignore_errors
-    #verbose
-    #nonce
-    #start_instance
+    parser.add_argument('-v', '--verbose', action='store_true',
+                        help='Print debug info and save intermediate images in result folder')
+    parser.add_argument('--start-instance', action='store_true',
+                        help='If a translator should be launched automatically')
+    parser.add_argument('--ignore-errors', action='store_true', help='Skip image on encountered error.')
+    parser.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal web server communication')
+    g = parser.add_mutually_exclusive_group()
+    g.add_argument('--use-gpu', action='store_true', help='Turn on/off gpu (auto switch between mps and cuda)')
+    g.add_argument('--use-gpu-limited', action='store_true', help='Turn on/off gpu (excluding offline translator)')
     return parser.parse_args()
\ No newline at end of file
diff --git a/server/main.py b/server/main.py
index dd6f6ebd6..f5b5d105a 100644
--- a/server/main.py
+++ b/server/main.py
@@ -78,12 +78,12 @@ async def queue_size() -> int:
 
 @app.post("/")
 async def index():
-    # ui.html
+    # todo:ui.html
     pass
 
 @app.post("/manual")
 async def manual():
-    # manual.html
+    # todo:manual.html
     pass
 
 def generate_nonce():
@@ -107,7 +107,7 @@ def start_translator_client_proc(host: str, port: int, nonce: str, params: dict)
         cmds.append('--ignore-errors')
     if params.get('verbose', False):
         cmds.append('--verbose')
-
+    #todo: cwd
     proc = subprocess.Popen(cmds, cwd=BASE_PATH)
     executor_instances.register(ExecutorInstance(ip=host, port=port))
     return proc
@@ -125,6 +125,7 @@ def prepare(args):
 #todo: cache results
 #todo: cleanup cache
 #todo: store images while in queue
+#todo: add docs
 
 if __name__ == '__main__':
     import uvicorn

From 29a7625123fd23e6074b946f5268c1adc827d15f Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Thu, 21 Nov 2024 15:03:29 +0100
Subject: [PATCH 31/75] some docs & queue fix

---
 manga_translator/utils/generic.py |  2 +-
 server/main.py                    | 83 ++++++++++++++++++++-----------
 server/myqueue.py                 | 20 +++++---
 server/request_extraction.py      |  8 ++-
 server/to_json.py                 | 17 ++++++-
 5 files changed, 91 insertions(+), 39 deletions(-)

diff --git a/manga_translator/utils/generic.py b/manga_translator/utils/generic.py
index 67163b69b..966e6a370 100644
--- a/manga_translator/utils/generic.py
+++ b/manga_translator/utils/generic.py
@@ -37,7 +37,7 @@ def __init__(self, **kwargs):
     def __eq__(self, other):
         if not isinstance(other, Context):
             return NotImplemented
-        return vars(self) == vars(other)
+        return dict(self) == dict(other)
 
     def __contains__(self, key):
         return key in self.keys()
diff --git a/server/main.py b/server/main.py
index f5b5d105a..185e59f6a 100644
--- a/server/main.py
+++ b/server/main.py
@@ -3,15 +3,21 @@
 import secrets
 import subprocess
 import sys
+from builtins import bytes
+from typing import Union
 
-from fastapi import FastAPI, Request, HTTPException
+from fastapi import FastAPI, Request, HTTPException, Header, Form, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
-from starlette.responses import StreamingResponse, JSONResponse
+from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse
+from pathlib import Path
 
+from pydantic import BaseModel
+
+from manga_translator import Config
 from server.instance import ExecutorInstance, executor_instances
 from server.myqueue import task_queue
-from server.request_extraction import get_ctx, while_streaming
-from server.to_json import to_json
+from server.request_extraction import get_ctx, while_streaming, TranslateRequest
+from server.to_json import to_json, Translation
 
 app = FastAPI()
 nonce = None
@@ -24,14 +30,19 @@
     allow_headers=["*"],
 )
 
-@app.post("/register")
-async def register_instance(instance: ExecutorInstance, request: Request):
-    req_nonce = request.headers.get('X-Nonce')
+class TranslateRequestForm(BaseModel):
+    """This request can be a multipart or a json request"""
+    image: UploadFile
+    """can be a url, base64 encoded image or a multipart image"""
+    config: str
+    """in case it is a multipart this needs to be a string(json.stringify)"""
+
+@app.post("/register", response_description="no response")
+async def register_instance(instance: ExecutorInstance, req: Request, req_nonce: str = Header(alias="X-Nonce")):
     if req_nonce != nonce:
         raise HTTPException(401, detail="Invalid nonce")
-    instance.ip = request.client.host
+    instance.ip = req.client.host
     executor_instances.register(instance)
-    return {"code": 0}
 
 def transform_to_image(ctx):
     img_byte_arr = io.BytesIO()
@@ -41,18 +52,29 @@ def transform_to_image(ctx):
 def transform_to_json(ctx):
     return str(to_json(ctx)).encode("utf-8")
 
-@app.post("/translate/json")
+async def parse_request(
+    req: Request,
+    image: Union[str, bytes] = Form(...),
+    config: str = Form(...),
+):
+    if req.headers.get('content-type').startswith('multipart'):
+        config = json.loads(config)
+        return TranslateRequest(image=image, config=Config(**config))
+    else:
+        return None
+
+@app.post("/translate/json", response_model=list[Translation], response_description="json strucure inspired by the ichigo translator extension")
 async def json(req: Request):
     ctx = await get_ctx(req)
     json = to_json(ctx)
     return JSONResponse(content=json)
 
-@app.post("/translate/bytes")
+@app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure following the stream encoding, but with json first and then the image bytes as chunks")
 async def bytes(req: Request):
     ctx = await get_ctx(req)
 
-@app.post("/translate/image")
-async def image(req: Request):
+@app.post("/translate/image", response_description="the result image", response_class=StreamingResponse)
+async def image(req: Request) -> StreamingResponse:
     ctx = await get_ctx(req)
     img_byte_arr = io.BytesIO()
     ctx.result.save(img_byte_arr, format="PNG")
@@ -60,31 +82,32 @@ async def image(req: Request):
 
     return StreamingResponse(img_byte_arr, media_type="image/png")
 
-@app.post("/translate/json/stream")
-async def stream_json(req: Request):
+@app.post("/translate/json/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_json(req: Request) -> StreamingResponse:
     return await while_streaming(req, transform_to_json)
 
-@app.post("/translate/bytes/stream")
-async def stream_bytes(req: Request):
+@app.post("/translate/bytes/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_bytes(req: Request)-> StreamingResponse:
     return await while_streaming(req, transform_to_image)
 
-@app.post("/translate/image/stream")
-async def stream_image(req: Request):
+@app.post("/translate/image/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_image(req: Request) -> StreamingResponse:
     return await while_streaming(req, transform_to_image)
 
-@app.post("/queue-size")
+@app.post("/queue-size", response_model=int)
 async def queue_size() -> int:
     return len(task_queue.queue)
 
-@app.post("/")
-async def index():
+@app.get("/", response_class=HTMLResponse)
+async def index() -> HTMLResponse:
     # todo:ui.html
     pass
 
-@app.post("/manual")
+@app.get("/manual", response_class=HTMLResponse)
 async def manual():
-    # todo:manual.html
-    pass
+    html_file = Path("manual.html")
+    html_content = html_file.read_text()
+    return HTMLResponse(content=html_content)
 
 def generate_nonce():
     return secrets.token_hex(16)
@@ -114,18 +137,19 @@ def start_translator_client_proc(host: str, port: int, nonce: str, params: dict)
 
 def prepare(args):
     global nonce
-    if args.get("nonce", None) is None:
+    if args.nonce is None:
         nonce = os.getenv('MT_WEB_NONCE', generate_nonce())
     else:
-        nonce = args.get("nonce", None)
-    if args.get("start_instance", None):
-        start_translator_client_proc(args.get("host", "0.0.0.0"), args.get("port",8000) + 1, nonce, args)
+        nonce = args.nonce
+    if args.start_instance:
+        start_translator_client_proc(args.host, args.port + 1, nonce, args)
 
 #todo: restart if crash
 #todo: cache results
 #todo: cleanup cache
 #todo: store images while in queue
 #todo: add docs
+#todo: index doesnt work properly in the list(is_client_disconnected is not executed immediatly/does not update the index)
 
 if __name__ == '__main__':
     import uvicorn
@@ -134,4 +158,5 @@ def prepare(args):
     args = parse_arguments()
     prepare(args)
     print("Nonce: "+nonce)
+    executor_instances.register(ExecutorInstance(ip="127.0.0.1", port=5003))
     uvicorn.run(app, host=args.host, port=args.port)
diff --git a/server/myqueue.py b/server/myqueue.py
index 9ae865063..446541fc2 100644
--- a/server/myqueue.py
+++ b/server/myqueue.py
@@ -3,13 +3,15 @@
 
 from fastapi import HTTPException
 from starlette.requests import Request
+from starlette.responses import StreamingResponse
 
+from manga_translator import Context
 from server.instance import executor_instances
 from server.sent_data_internal import NotifyType
 
 class TaskQueue:
     def __init__(self):
-        self.queue: List[Dict] = []
+        self.queue: List[Context] = []
         self.queue_event: asyncio.Event = asyncio.Event()
 
     def add_task(self, task):
@@ -43,10 +45,13 @@ async def wait_in_queue(task, notify: NotifyType):
         if notify:
             notify(3, str(queue_pos))
         if queue_pos < executor_instances.free_executors():
-            if is_client_disconnected(task.req):
+            if await is_client_disconnected(task.req):
                 task_queue.remove(task)
                 task_queue.update_event()
-                raise HTTPException(500, detail="User is no longer connected") #just for the logs
+                if notify:
+                    return
+                else:
+                    raise HTTPException(500, detail="User is no longer connected") #just for the logs
             instance = await executor_instances.find_executor()
             task_queue.remove(task)
             if notify:
@@ -64,10 +69,11 @@ async def wait_in_queue(task, notify: NotifyType):
             else:
                 return result
         else:
-            if queue_pos == 0:
-                raise HTTPException(500, detail="No translator registered")
-            if is_client_disconnected(task.req):
+            if await is_client_disconnected(task.req):
                 task_queue.remove(task)
                 task_queue.update_event()
-                raise HTTPException(500, detail="User is no longer connected") #just for the logs
+                if notify:
+                    return
+                else:
+                    raise HTTPException(500, detail="User is no longer connected") #just for the logs
             await task_queue.wait_for_event()
\ No newline at end of file
diff --git a/server/request_extraction.py b/server/request_extraction.py
index 02dc85e70..442bc0e68 100644
--- a/server/request_extraction.py
+++ b/server/request_extraction.py
@@ -8,12 +8,19 @@
 import requests
 from PIL import Image
 from fastapi import Request, HTTPException
+from pydantic import BaseModel
 from starlette.responses import StreamingResponse
 
 from manga_translator import Config, Context
 from server.myqueue import task_queue, wait_in_queue
 from server.streaming import notify, stream
 
+class TranslateRequest(BaseModel):
+    """This request can be a multipart or a json request"""
+    image: bytes|str
+    """can be a url, base64 encoded image or a multipart image"""
+    config: Config
+    """in case it is a multipart this needs to be a string(json.stringify)"""
 
 async def to_pil_image(image: Union[str, bytes]) -> Image.Image:
     try:
@@ -36,7 +43,6 @@ async def to_pil_image(image: Union[str, bytes]) -> Image.Image:
 
 async def multi_content_type(request: Request):
     content_type = request.headers.get("content-type")
-
     if content_type and content_type.startswith("multipart/form-data"):
         form = await request.form()
         config = form.get("config", "{}")
diff --git a/server/to_json.py b/server/to_json.py
index 356e74104..911718d8d 100644
--- a/server/to_json.py
+++ b/server/to_json.py
@@ -2,6 +2,7 @@
 from typing import Dict, List
 
 import cv2
+from pydantic import BaseModel
 
 from manga_translator import Context
 from manga_translator.utils import TextBlock
@@ -63,4 +64,18 @@ def to_json(ctx: Context):
             'language': text_regions[i].source_lang,
             'background': background
         })
-    return results
\ No newline at end of file
+    return results
+
+class TextColor(BaseModel):
+    fg: tuple[int, int, int]
+    bg: tuple[int, int, int]
+
+class Translation(BaseModel):
+    text: dict[str, str]
+    minX: int
+    minY: int
+    maxX: int
+    maxY: int
+    textColor:TextColor
+    language: str
+    background: str

From 5e1638779e2e883229677369410d63c98e46405c Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Thu, 21 Nov 2024 17:35:25 +0100
Subject: [PATCH 32/75] stream type fix

---
 server/myqueue.py            | 4 ++--
 server/request_extraction.py | 2 +-
 server/sent_data_internal.py | 2 +-
 server/streaming.py          | 6 +++---
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/server/myqueue.py b/server/myqueue.py
index 446541fc2..b9174d936 100644
--- a/server/myqueue.py
+++ b/server/myqueue.py
@@ -43,7 +43,7 @@ async def wait_in_queue(task, notify: NotifyType):
     while True:
         queue_pos = task_queue.get_pos(task)
         if notify:
-            notify(3, str(queue_pos))
+            notify(3, str(queue_pos).encode('utf-8'))
         if queue_pos < executor_instances.free_executors():
             if await is_client_disconnected(task.req):
                 task_queue.remove(task)
@@ -55,7 +55,7 @@ async def wait_in_queue(task, notify: NotifyType):
             instance = await executor_instances.find_executor()
             task_queue.remove(task)
             if notify:
-                notify(4, "0")
+                notify(4, b"")
             if notify:
                 await instance.sent_stream(task.image, task.config, notify)
             else:
diff --git a/server/request_extraction.py b/server/request_extraction.py
index 442bc0e68..080d84cc4 100644
--- a/server/request_extraction.py
+++ b/server/request_extraction.py
@@ -81,7 +81,7 @@ async def while_streaming(req: Request, transform):
 
     messages = asyncio.Queue()
 
-    def notify_internal(code: int, data) -> None:
+    def notify_internal(code: int, data: bytes) -> None:
         notify(code, data, transform, messages)
 
     streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
diff --git a/server/sent_data_internal.py b/server/sent_data_internal.py
index 2945cc9f7..5720cb6c5 100644
--- a/server/sent_data_internal.py
+++ b/server/sent_data_internal.py
@@ -8,7 +8,7 @@
 
 from manga_translator import Config
 
-NotifyType = Optional[Callable[[int, Optional[int]], None]]
+NotifyType = Optional[Callable[[int, Optional[bytes]], None]]
 
 async def fetch_data_stream(url, image: Image, config: Config, sender: NotifyType, headers: Mapping[str, str] = {}):
     attributes = {"image": image, "config": config}
diff --git a/server/streaming.py b/server/streaming.py
index c65a897d2..611771853 100644
--- a/server/streaming.py
+++ b/server/streaming.py
@@ -1,3 +1,4 @@
+import asyncio
 import pickle
 
 async def stream(messages):
@@ -7,12 +8,11 @@ async def stream(messages):
         if message[0] == 0 or message[0] == 2:
             break
 
-def notify(code, data, transform_to_bytes, messages):
+def notify(code: int, data: bytes, transform_to_bytes, messages: asyncio.Queue):
     if code == 0:
         result_bytes = transform_to_bytes(pickle.loads(data))
         encoded_result = b'\x00' + len(result_bytes).to_bytes(4, 'big') + result_bytes
         messages.put_nowait(encoded_result)
     else:
-        result_bytes = str(data).encode("utf-8")
-        encoded_result =code.to_bytes(1, 'big') + len(result_bytes).to_bytes(4, 'big') + result_bytes
+        encoded_result =code.to_bytes(1, 'big') + len(data).to_bytes(4, 'big') + data
         messages.put_nowait(encoded_result)
\ No newline at end of file

From 0c3908e0cd8b62c8faa8a5196a5cd23120bdd7d8 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Thu, 21 Nov 2024 17:35:49 +0100
Subject: [PATCH 33/75] add server ui

---
 manga_translator/args.py |   2 +-
 server/index.html        | 393 +++++++++++++++++++++++++++++++++++++++
 server/main.py           |  46 +++--
 3 files changed, 425 insertions(+), 16 deletions(-)
 create mode 100644 server/index.html

diff --git a/manga_translator/args.py b/manga_translator/args.py
index 0516c075b..8795b4e2f 100644
--- a/manga_translator/args.py
+++ b/manga_translator/args.py
@@ -133,4 +133,4 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
 parser_api.add_argument('--host', default='127.0.0.1', type=str, help='Host for API service')
 parser_api.add_argument('--port', default=5003, type=int, help='Port for API service')
 parser_api.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal API server communication')
-
+parser_api.add_argument("--report", default=None,type=str, help='reports to server to register instance')
diff --git a/server/index.html b/server/index.html
new file mode 100644
index 000000000..edf617609
--- /dev/null
+++ b/server/index.html
@@ -0,0 +1,393 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="utf-8"/>
+    <title>Image/Manga Translator</title>
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@unocss/reset/tailwind.min.css"/>
+    <script src="https://cdn.jsdelivr.net/npm/petite-vue@0.4.1/dist/petite-vue.iife.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/@unocss/runtime@0.30.5/uno.global.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/@iconify/iconify@2.2.0/dist/iconify.min.js"></script>
+    <style>
+        [v-cloak],
+        [un-cloak] {
+            display: none;
+        }
+    </style>
+</head>
+<body>
+<form
+        action="#"
+        class="flex py-8 w-full min-h-100vh justify-center items-center"
+        @submit.prevent="onsubmit"
+        @vue:mounted="onmounted"
+        v-scope
+        v-cloak
+        un-cloak
+>
+    <div class="flex flex-col w-85ch h-full justify-center gap-2">
+        <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
+        <div class="flex mx-4 justify-start items-end">
+            <div class="flex gap-4">
+                <div class="flex items-center" title="Detection resolution">
+                    <i class="iconify" data-icon="carbon:fit-to-screen"></i>
+                    <div class="relative">
+                        <select class="w-9ch appearance-none bg-transparent border-b border-gray-300"
+                                v-model="detectionResolution">
+                            <option value="S">1024px</option>
+                            <option value="M">1536px</option>
+                            <option value="L">2048px</option>
+                            <option value="X">2560px</option>
+                        </select>
+                        <i class="iconify absolute top-1.5 right-1 pointer-events-none"
+                           data-icon="carbon:chevron-down"></i>
+                    </div>
+                </div>
+                <div class="flex items-center gap-1" title="Text detector">
+                    <i class="iconify" data-icon="carbon:search-locate"></i>
+                    <div class="relative">
+                        <select class="w-9ch appearance-none bg-transparent border-b border-gray-300"
+                                v-model="textDetector">
+                            <option value="auto">Default</option>
+                            <option value="ctd">CTD</option>
+                        </select>
+                        <i class="iconify absolute top-1.5 right-1 pointer-events-none"
+                           data-icon="carbon:chevron-down"></i>
+                    </div>
+                </div>
+                <div class="flex items-center gap-1" title="Render text orientation">
+                    <i class="iconify" data-icon="carbon:text-align-left"></i>
+                    <div class="relative">
+                        <select class="w-12ch appearance-none bg-transparent border-b border-gray-300"
+                                v-model="renderTextDirection">
+                            <option value="auto">Auto</option>
+                            <option value="h">Horizontal</option>
+                            <option value="v">Vertical</option>
+                        </select>
+                        <i class="iconify absolute top-1.5 right-1 pointer-events-none"
+                           data-icon="carbon:chevron-down"></i>
+                    </div>
+                </div>
+                <div class="flex items-center gap-1" title="Translator">
+                    <i class="iconify" data-icon="carbon:operations-record"></i>
+                    <div class="relative">
+                        <select class="w-9ch appearance-none bg-transparent border-b border-gray-300"
+                                v-model="translator">
+                            <option v-for="key in validTranslators" :value="key">{{getTranslatorName(key)}}</option>
+                        </select>
+                        <i class="iconify absolute top-1.5 right-1 pointer-events-none"
+                           data-icon="carbon:chevron-down"></i>
+                    </div>
+                </div>
+                <div class="flex items-center gap-1" title="Target language">
+                    <i class="iconify" data-icon="carbon:language"></i>
+                    <div class="relative">
+                        <select class="w-15ch appearance-none bg-transparent border-b border-gray-300"
+                                v-model="targetLanguage">
+                            <option value="CHS">简体中文</option>
+                            <option value="CHT">繁體中文</option>
+                            <option value="JPN">日本語</option>
+                            <option value="ENG">English</option>
+                            <option value="KOR">한국어</option>
+                            <option value="VIN">Tiếng Việt</option>
+                            <option value="CSY">čeština</option>
+                            <option value="NLD">Nederlands</option>
+                            <option value="FRA">français</option>
+                            <option value="DEU">Deutsch</option>
+                            <option value="HUN">magyar nyelv</option>
+                            <option value="ITA">italiano</option>
+                            <option value="PLK">polski</option>
+                            <option value="PTB">português</option>
+                            <option value="ROM">limba română</option>
+                            <option value="RUS">русский язык</option>
+                            <option value="ESP">español</option>
+                            <option value="TRK">Türk dili</option>
+                            <option value="IND">Indonesia</option>
+                        </select>
+                        <i class="iconify absolute top-1.5 right-1 pointer-events-none"
+                           data-icon="carbon:chevron-down"></i>
+                    </div>
+                </div>
+            </div>
+        </div>
+        <div v-if="result" class="flex flex-col items-center">
+            <img class="my-2" :src="resultUri"/>
+            <button class="px-2 py-1 text-center rounded-md text-blue-800 border-2 border-blue-300" @click="clear">
+                Upload another
+            </button>
+        </div>
+        <div v-else-if="status"
+             class="grid w-full h-116 place-content-center rounded-2xl border-2 border-dashed border-gray-600">
+            <div v-if="error" class="flex flex-col items-center gap-2">
+                <div style="color: crimson">{{ statusText }}</div>
+                <button class="px-2 py-1 text-center rounded-md text-blue-800 border-2 border-blue-300" @click="clear">
+                    Upload another
+                </button>
+            </div>
+            <div v-else class="flex flex-col items-center gap-2">
+                <i class="iconify w-8 h-8 text-gray-500 animate-spin" data-icon="carbon:progress-bar-round"></i>
+                <div>{{ statusText }}</div>
+            </div>
+        </div>
+        <label
+                v-else
+                class="grid w-full h-116 place-content-center rounded-2xl border-2 border-dashed border-gray-600 cursor-pointer"
+                for="file"
+                @dragenter.prevent
+                @dragover.prevent
+                @dragleave.prevent
+                @drop.prevent="ondrop"
+        >
+            <div v-if="file" class="flex flex-col items-center gap-2">
+                <div><span class="iconify-inline inline-block mr-2 scale-125" data-icon="carbon:image-search"></span>File
+                    Preview
+                </div>
+                <img class="max-w-72 max-h-72" :src="fileUri"/>
+                <button type="submit" class="px-2 py-1 rounded-md text-blue-800 border-2 border-blue-300">Translate
+                </button>
+                <div class="text-sm text-gray-600">Click the empty space or paste/drag a new one to replace</div>
+            </div>
+            <div v-else class="flex flex-col items-center gap-2">
+                <i class="iconify w-8 h-8 text-gray-500" data-icon="carbon:cloud-upload"></i>
+                <div>Paste an image, click to select one or drag and drop here</div>
+            </div>
+            <input id="file" type="file" accept="image/png,image/jpeg,image/bmp,image/webp" class="hidden"
+                   @change="onfilechange"/>
+        </label>
+        <div class="flex justify-center gap-2">
+            <div>
+                Please consider supporting us by
+                <a class="underline underline-blue-400" href="https://ko-fi.com/voilelabs" target="_blank"
+                   rel="noopener noreferrer">Ko-fi</a>
+                or
+                <a class="underline underline-blue-400" href="https://www.patreon.com/voilelabs" target="_blank"
+                   rel="noopener noreferrer"
+                >Patreon</a
+                >!
+            </div>
+            <a
+                    class="underline underline-blue-400"
+                    href="https://github.com/zyddnys/manga-image-translator"
+                    target="_blank"
+                    rel="noopener noreferrer"
+            >Source Code</a
+            >
+        </div>
+    </div>
+</form>
+<script>
+    const BASE_URI = '/'
+    const acceptTypes = ['image/png', 'image/jpeg', 'image/bmp', 'image/webp']
+
+    function formatSize(bytes) {
+        const k = 1024
+        const sizes = ['B', 'KB', 'MB', 'GB', 'TB']
+        if (bytes === 0) return '0B'
+        const i = Math.floor(Math.log(bytes) / Math.log(k))
+        return `${(bytes / k ** i).toFixed(2)}${sizes[i]}`
+    }
+
+    function formatProgress(loaded, total) {
+        return `${formatSize(loaded)}/${formatSize(total)}`
+    }
+
+    PetiteVue.createApp({
+        onmounted() {
+            window.addEventListener('paste', this.onpaste)
+        },
+
+        file: null,
+        get fileUri() {
+            return this.file ? URL.createObjectURL(this.file) : null
+        },
+        detectionResolution: 'M',
+        textDetector: 'auto',
+        renderTextDirection: 'auto',
+        translator: 'youdao',
+        validTranslators: ['youdao', 'baidu', 'google', 'deepl', 'papago', 'caiyun', 'offline', 'gpt3.5', 'none'],
+        getTranslatorName(key) {
+            if (key == 'none')
+                return "No Text"
+            return key ? key[0].toUpperCase() + key.slice(1) : "";
+        },
+        targetLanguage: 'CHS',
+        ondrop(e) {
+            const file = e.dataTransfer?.files?.[0]
+            if (file && acceptTypes.includes(file.type)) {
+                this.file = file
+            }
+        },
+        onfilechange(e) {
+            const file = e.target.files?.[0]
+            if (file && acceptTypes.includes(file.type)) {
+                this.file = file
+            }
+        },
+        onpaste(e) {
+            const items = (e.clipboardData || e.originalEvent.clipboardData).items
+            for (const item of items) {
+                if (item.kind === 'file') {
+                    const file = item.getAsFile()
+                    if (!file || !acceptTypes.includes(file.type)) continue
+                    this.file = file
+                }
+            }
+        },
+
+        progress: null,
+        status: null,
+        queuePos: null,
+        cachedStatusText: '',
+        get statusText() {
+            var newStatusText = this._statusText
+            if (newStatusText != null && newStatusText != this.cachedStatusText) {
+                this.cachedStatusText = newStatusText
+            }
+            return this.cachedStatusText
+        },
+        get _statusText() {
+            switch (this.status) {
+                case 'upload': {
+                    if (this.progress) {
+                        return `Uploading (${this.progress})`
+                    } else {
+                        return 'Uploading'
+                    }
+                }
+                case 'pending':
+                    if (this.queuePos) {
+                        return `Queuing, your position is ${this.queuePos}`
+                    } else {
+                        return 'Processing'
+                    }
+                case 'detection':
+                    return 'Detecting texts'
+                case 'ocr':
+                    return 'Running OCR'
+                case 'mask-generation':
+                    return 'Generating text mask'
+                case 'inpainting':
+                    return 'Running inpainting'
+                case 'upscaling':
+                    return 'Running upscaling'
+                case 'translating':
+                    return 'Translating'
+                case 'rendering':
+                    return 'Rendering translated texts'
+                case 'finished':
+                    return 'Downloading image'
+                case 'error':
+                    return 'Something went wrong, please try again'
+                case 'error-upload':
+                    return 'Upload failed, please try again'
+                case 'error-lang':
+                    return 'Your target language is not supported by the chosen translator'
+                case 'error-translating':
+                    return 'Did not get any text back from the text translation service'
+                case 'error-too-large':
+                    return 'Image size too large (greater than 8000x8000 px)'
+                case 'error-disconnect':
+                    return 'Lost connection to server'
+            }
+        },
+        get error() {
+            return /^error/.test(this.status)
+        },
+        result: null,
+        get resultUri() {
+            return this.result ? URL.createObjectURL(this.result) : null
+        },
+        onsubmit(e) {
+            if (!this.file) return
+
+            this.progress = null
+            this.queuePos = null
+            this.status = 'upload'
+            let buffer = new Uint8Array();
+
+            const formData = new FormData()
+            formData.append('image', this.file)
+
+            /*formData.append('size', this.detectionResolution)
+            formData.append('detector', this.textDetector)
+            formData.append('direction', this.renderTextDirection)
+            formData.append('translator', this.translator)
+            formData.append('target_lang', this.targetLanguage)*/
+
+            const processChunk = (value) => {
+                if (this.error) return;
+
+                const newBuffer = new Uint8Array(buffer.length + value.length);
+                newBuffer.set(buffer);
+                newBuffer.set(value, buffer.length);
+                buffer = newBuffer;
+
+                while (buffer.length >= 5) {
+                    const dataSize = new DataView(buffer.buffer).getUint32(1, false);
+                    const totalSize = 5 + dataSize;
+                    if (buffer.length < totalSize) {
+                        break;
+                    }
+
+                    const statusCode = buffer[0];
+                    const decoder = new TextDecoder('utf-8');
+                    const data = buffer.slice(5, totalSize);
+                    switch (statusCode) {
+                        case 0:
+                            this.result = new Blob([data], {type: 'image/png'});
+                            this.status = null;
+                            break;
+                        case 1:
+                            this.status = decoder.decode(data);
+                            break;
+                        case 2:
+                            this.status = "error";
+                            console.error(decoder.decode(data));
+                            break;
+                        case 3:
+                            this.status = 'pending';
+                            this.queuePos = decoder.decode(data);
+                            break;
+                        case 4:
+                            this.status = 'pending';
+                            this.queuePos = null;
+                            break;
+                    }
+                    buffer = buffer.slice(totalSize);
+                }
+            }
+            const uploadWithProgress = async (formData) => {
+                try {
+                    const response = await fetch(`${BASE_URI}translate/image/stream`, {
+                        method: 'POST',
+                        body: formData,
+                    });
+
+                    if (response.status !== 200) {
+                        this.status = 'error-upload';
+                        this.status = 'pending';
+                        return;
+                    }
+
+                    const reader = response.body.getReader();
+                    while (true) {
+                        const { done, value } = await reader.read();
+                        if (done) break;
+                        processChunk(value);
+                    }
+                } catch (error) {
+                    console.error(error);
+                    this.status = 'error-disconnect';
+                }
+            }
+
+            uploadWithProgress(formData);
+        },
+        clear() {
+            this.file = null
+            this.result = null
+            this.status = null
+        },
+    }).mount()
+</script>
+</body>
+</html>
diff --git a/server/main.py b/server/main.py
index 185e59f6a..773908f41 100644
--- a/server/main.py
+++ b/server/main.py
@@ -1,8 +1,10 @@
 import io
 import os
 import secrets
+import signal
 import subprocess
 import sys
+from argparse import Namespace
 from builtins import bytes
 from typing import Union
 
@@ -100,8 +102,9 @@ async def queue_size() -> int:
 
 @app.get("/", response_class=HTMLResponse)
 async def index() -> HTMLResponse:
-    # todo:ui.html
-    pass
+    html_file = Path("index.html")
+    html_content = html_file.read_text()
+    return HTMLResponse(content=html_content)
 
 @app.get("/manual", response_class=HTMLResponse)
 async def manual():
@@ -112,27 +115,35 @@ async def manual():
 def generate_nonce():
     return secrets.token_hex(16)
 
-def start_translator_client_proc(host: str, port: int, nonce: str, params: dict):
+def start_translator_client_proc(host: str, port: int, nonce: str, params: Namespace):
     cmds = [
         sys.executable,
         '-m', 'manga_translator',
-        '--mode', 'shared',
+        'shared',
         '--host', host,
         '--port', str(port),
         '--nonce', nonce,
-        '--no-report'
     ]
-    if params.get('use_gpu', False):
+    if params.use_gpu:
         cmds.append('--use-gpu')
-    if params.get('use_gpu_limited', False):
+    if params.use_gpu_limited:
         cmds.append('--use-gpu-limited')
-    if params.get('ignore_errors', False):
+    if params.ignore_errors:
         cmds.append('--ignore-errors')
-    if params.get('verbose', False):
+    if params.verbose:
         cmds.append('--verbose')
-    #todo: cwd
-    proc = subprocess.Popen(cmds, cwd=BASE_PATH)
+    base_path = os.path.dirname(os.path.abspath(__file__))
+    parent = os.path.dirname(base_path)
+    proc = subprocess.Popen(cmds, cwd=parent)
     executor_instances.register(ExecutorInstance(ip=host, port=port))
+
+    def handle_exit_signals(signal, frame):
+        proc.terminate()
+        sys.exit(0)
+
+    signal.signal(signal.SIGINT, handle_exit_signals)
+    signal.signal(signal.SIGTERM, handle_exit_signals)
+
     return proc
 
 def prepare(args):
@@ -142,7 +153,7 @@ def prepare(args):
     else:
         nonce = args.nonce
     if args.start_instance:
-        start_translator_client_proc(args.host, args.port + 1, nonce, args)
+        return start_translator_client_proc(args.host, args.port + 1, nonce, args)
 
 #todo: restart if crash
 #todo: cache results
@@ -150,13 +161,18 @@ def prepare(args):
 #todo: store images while in queue
 #todo: add docs
 #todo: index doesnt work properly in the list(is_client_disconnected is not executed immediatly/does not update the index)
+#todo: enable config in html pages
 
 if __name__ == '__main__':
     import uvicorn
     from args import parse_arguments
 
     args = parse_arguments()
-    prepare(args)
+    args.start_instance = True
+    proc = prepare(args)
     print("Nonce: "+nonce)
-    executor_instances.register(ExecutorInstance(ip="127.0.0.1", port=5003))
-    uvicorn.run(app, host=args.host, port=args.port)
+    try:
+        uvicorn.run(app, host=args.host, port=args.port)
+    except Exception:
+        if proc:
+            proc.terminate()

From b462d12d5d891d8197f8d3e93f5be5609d8f4f64 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Thu, 21 Nov 2024 17:36:36 +0100
Subject: [PATCH 34/75] delete old code

---
 manga_translator/mode/web.py        | 153 -------
 manga_translator/server/manual.html | 352 ----------------
 manga_translator/server/ui.html     | 377 -----------------
 manga_translator/server/web_main.py | 629 ----------------------------
 manga_translator/server/ws.proto    |  36 --
 manga_translator/server/ws_pb2.py   |  32 --
 6 files changed, 1579 deletions(-)
 delete mode 100644 manga_translator/mode/web.py
 delete mode 100644 manga_translator/server/manual.html
 delete mode 100644 manga_translator/server/ui.html
 delete mode 100644 manga_translator/server/web_main.py
 delete mode 100644 manga_translator/server/ws.proto
 delete mode 100644 manga_translator/server/ws_pb2.py

diff --git a/manga_translator/mode/web.py b/manga_translator/mode/web.py
deleted file mode 100644
index 3f0ccb889..000000000
--- a/manga_translator/mode/web.py
+++ /dev/null
@@ -1,153 +0,0 @@
-import asyncio
-import time
-
-import requests
-
-from manga_translator import logger, Context, Config
-from manga_translator.mode.local import MangaTranslatorLocal
-from manga_translator.translators import TRANSLATORS
-from manga_translator.utils import add_file_logger, remove_file_logger
-
-
-class MangaTranslatorWeb(MangaTranslatorLocal):
-    """
-    Translator client that executes tasks on behalf of the webserver in web_main.py.
-    """
-
-    def __init__(self, params: dict = None):
-        super().__init__(params)
-        self.host = params.get('host', '127.0.0.1')
-        if self.host == '0.0.0.0':
-            self.host = '127.0.0.1'
-        self.port = params.get('port', 5003)
-        self.nonce = params.get('nonce', '')
-        self.ignore_errors = params.get('ignore_errors', True)
-        self._task_id = None
-        self._params = None
-
-    async def _init_connection(self):
-        available_translators = []
-        from ..translators import MissingAPIKeyException, get_translator
-        for key in TRANSLATORS:
-            try:
-                get_translator(key)
-                available_translators.append(key)
-            except MissingAPIKeyException:
-                pass
-
-        data = {
-            'nonce': self.nonce,
-            'capabilities': {
-                'translators': available_translators,
-            },
-        }
-        requests.post(f'http://{self.host}:{self.port}/connect-internal', json=data)
-
-    async def _send_state(self, state: str, finished: bool):
-        # wait for translation to be saved first (bad solution?)
-        finished = finished and not state == 'finished'
-        while True:
-            try:
-                data = {
-                    'task_id': self._task_id,
-                    'nonce': self.nonce,
-                    'state': state,
-                    'finished': finished,
-                }
-                requests.post(f'http://{self.host}:{self.port}/task-update-internal', json=data, timeout=20)
-                break
-            except Exception:
-                # if translation is finished server has to know
-                if finished:
-                    continue
-                else:
-                    break
-
-    def _get_task(self):
-        try:
-            rjson = requests.get(f'http://{self.host}:{self.port}/task-internal?nonce={self.nonce}',
-                                 timeout=3600).json()
-            return rjson.get('task_id'), rjson.get('data')
-        except Exception:
-            return None, None
-
-    async def listen(self, translation_params: dict = None):
-        """
-        Listens for translation tasks from web server.
-        """
-        logger.info('Waiting for translation tasks')
-
-        await self._init_connection()
-        self.add_progress_hook(self._send_state)
-
-        while True:
-            self._task_id, self._params = self._get_task()
-            if self._params and 'exit' in self._params:
-                break
-            if not (self._task_id and self._params):
-                await asyncio.sleep(0.1)
-                continue
-
-            self.result_sub_folder = self._task_id
-            logger.info(f'Processing task {self._task_id}')
-            if translation_params is not None:
-                # Combine default params with params chosen by webserver
-                for p, default_value in translation_params.items():
-                    current_value = self._params.get(p)
-                    self._params[p] = current_value if current_value is not None else default_value
-            if self.verbose:
-                # Write log file
-                log_file = self._result_path('log.txt')
-                add_file_logger(log_file)
-
-            # final.png will be renamed if format param is set
-            await self.translate_path(self._result_path('input.png'), self._result_path('final.png'),
-                                      params=self._params)
-            print()
-
-            if self.verbose:
-                remove_file_logger(log_file)
-            self._task_id = None
-            self._params = None
-            self.result_sub_folder = ''
-
-    async def _run_text_translation(self, config: Config, ctx: Context):
-        # Run machine translation as reference for manual translation (if `--translator=none` is not set)
-        text_regions = await super()._run_text_translation(config, ctx)
-
-        #todo: manual never set
-        if ctx.get('manual', False):
-            logger.info('Waiting for user input from manual translation')
-            requests.post(f'http://{self.host}:{self.port}/request-manual-internal', json={
-                'task_id': self._task_id,
-                'nonce': self.nonce,
-                'texts': [r.text for r in text_regions],
-                'translations': [r.translation for r in text_regions],
-            }, timeout=20)
-
-            # wait for at most 1 hour for manual translation
-            wait_until = time.time() + 3600
-            while time.time() < wait_until:
-                ret = requests.post(f'http://{self.host}:{self.port}/get-manual-result-internal', json={
-                    'task_id': self._task_id,
-                    'nonce': self.nonce
-                }, timeout=20).json()
-                if 'result' in ret:
-                    manual_translations = ret['result']
-                    if isinstance(manual_translations, str):
-                        if manual_translations == 'error':
-                            return []
-                    i = 0
-                    for translation in manual_translations:
-                        if not translation.strip():
-                            text_regions.pop(i)
-                            i = i - 1
-                        else:
-                            text_regions[i].translation = translation
-                            text_regions[i].target_lang = config.translator.translator_gen.langs[-1]
-                        i = i + 1
-                    break
-                elif 'cancel' in ret:
-                    return 'cancel'
-                await asyncio.sleep(0.1)
-        return text_regions
diff --git a/manga_translator/server/manual.html b/manga_translator/server/manual.html
deleted file mode 100644
index 9e4a2befb..000000000
--- a/manga_translator/server/manual.html
+++ /dev/null
@@ -1,352 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-	<meta charset="utf-8">
-	<meta name="viewport" content="width=device-width, initial-scale=1">
-	<title>Manga Image Translator</title>
-	<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/picnic">
-	<style type="text/css">
-		#drop-area { border: 2px dashed #ccc; border-radius: 20px; width: 100%; margin: 50px auto; padding: 20px; } #drop-area.highlight { border-color: purple; } p { margin-top: 0; } .my-form { margin-bottom: 10px; } #gallery { text-align: center;} #gallery img { width: 95%; margin-bottom: 10px; vertical-align: middle; } .btn { display: inline-block; padding: 10px; background: #ccc; cursor: pointer; border-radius: 5px; border: 1px solid #ccc; } .btn:hover { background: #ddd; } #fileElem { display: none; }
-		.no-drag-select {
-			user-drag: none;
-			user-select: none;
-		}
-	</style>
-</head>
-<body>
-<div>
-	<nav class="demo">
-	  <a href="#" class="brand">Manga Image Translator</a>
-
-	  <!-- responsive-->
-	  <input id="bmenug" type="checkbox" class="show">
-	  <label for="bmenug" class="burger pseudo button">&#8801;</label>
-
-	  <div class="menu">
-	    <a href="https://github.com/zyddnys/manga-image-translator" target="_blank">About</a>
-	  </div>
-	</nav>
-
- 	<div class="flex one one-600 two-900" style="margin-top: 4em;">
-	  	<div style="margin:50px auto">
-	  		<div class="flex two" id="inputArea">
-	  			<div id="s-area"><h2>Raw Text</h2></div>
-	  			<div id="t-area"><h2>Translation</h2></div>
-	  		</div>
-	  		<div>
-	  			<button type="button" id="submit-button" class="button" disabled="disabled" name="submit" onclick="submit()">Waiting for Image</button>
-	  			<button type="button" id="reset-button" class="button" disabled="disabled" name="reset" onclick="fullReset()">Reset</button>
-	  		</div>
-	  	</div>
-
-	  	<div>	
-			<div id="drop-area">
-			  <form class="my-form" name="upload_images">
-			    <p>Upload image by dragging and dropping images onto the dashed region</p>
-			    <input type="file" id="fileElem" accept="image/*" onchange="handleFiles(this.files)">
-			    <label class="btn" for="fileElem">Select some files</label>
-			  </form>
-			  <progress id="progress-bar" max=100 value=0 style="width:100%"></progress>
-			  <div id="gallery"></div>
-			</div>
-	  	</div>
-
-	</div>
-</div>
-
-
-
-<script type="text/javascript">
-
-var task_id = ""
-var task_status = ""
-
-
-// ************************ Drag and drop ***************** //
-
-let dropArea = document.getElementById("drop-area")
-
-// Prevent default drag behaviors
-;['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
-  dropArea.addEventListener(eventName, preventDefaults, false)   
-  document.body.addEventListener(eventName, preventDefaults, false)
-})
-
-// Highlight drop area when item is dragged over it
-;['dragenter', 'dragover'].forEach(eventName => {
-  dropArea.addEventListener(eventName, highlight, false)
-})
-
-;['dragleave', 'drop'].forEach(eventName => {
-  dropArea.addEventListener(eventName, unhighlight, false)
-})
-
-// Handle dropped files
-dropArea.addEventListener('drop', handleDrop, false)
-
-function preventDefaults (e) {
-  e.preventDefault()
-  e.stopPropagation()
-}
-
-function highlight(e) {
-  dropArea.classList.add('highlight')
-}
-
-function unhighlight(e) {
-  dropArea.classList.remove('active')
-}
-
-function handleDrop(e) {
-  var dt = e.dataTransfer
-  var files = dt.files
-
-  handleFiles(files)
-}
-
-let uploadProgress = []
-let progressBar = document.getElementById('progress-bar')
-
-function initializeProgress(numFiles) {
-  progressBar.value = 0
-  uploadProgress = []
-
-  for(let i = numFiles; i > 0; i--) {
-    uploadProgress.push(0)
-  }
-}
-
-function updateProgress(fileNumber, percent) {
-  uploadProgress[fileNumber] = percent
-  let total = uploadProgress.reduce((tot, curr) => tot + curr, 0) / uploadProgress.length
-  progressBar.value = total
-}
-
-function handleFiles(files) {
-  files = [...files]
-  initializeProgress(files.length)
-  files.forEach(uploadFile)
-  files.forEach(previewFile)
-}
-
-function previewFile(file) {
-  let reader = new FileReader()
-  reader.readAsDataURL(file)
-  reader.onloadend = function() {
-    let img = document.createElement('img')
-    img.src = reader.result
-	img.classList.add('no-drag-select')
-    document.getElementById('gallery').appendChild(img)
-  }
-}
-
-let submitButton = document.getElementById("submit-button")
-let resetButton = document.getElementById("reset-button")
-
-function fullReset() {
-	var url = '/cancel-manual-request'
-	var xhr = new XMLHttpRequest()
-	xhr.open('POST', url, true)
-	xhr.setRequestHeader("Content-Type", "application/json;charset=UTF-8")
-
-	xhr.addEventListener('readystatechange', function (e) {
-		if (xhr.readyState == 4 && xhr.status == 200) {
-			reset()
-		}
-	})
-	xhr.send(JSON.stringify({
-		"task_id": task_id,
-	}))
-}
-
-function reset() {
-	var s_area = document.getElementById('s-area')
-	for (var i = s_area.children.length - 1; i >= 0; --i) {
-		var child = s_area.children[i]
-		if (child.tagName == "INPUT") {
-			child.remove()
-		}
-	}
-	var t_area = document.getElementById('t-area')
-	for (var i = t_area.children.length - 1; i >= 0; --i) {
-		var child = t_area.children[i]
-		if (child.tagName == "INPUT") {
-			child.remove()
-		}
-	}
-
-	progressBar.value = 0
-	submitButton.innerHTML = "Waiting for Image"
-	submitButton.onclick = null
-	submitButton.setAttribute("disabled", "disabled")
-
-	resetButton.innerHTML = "No Image"
-	resetButton.onclick = fullReset
-	resetButton.setAttribute("disabled", "disabled")
-
-	var img = document.getElementsByTagName("img")[0]
-	img.remove()
-}
-
-function downloadImageAndReset(path) {
-	const url = "/result-type"
-	const xhr = new XMLHttpRequest()
-	xhr.open('GET', url, true)
-	xhr.addEventListener('readystatechange', function (e) {
-		if (xhr.readyState == 4 && xhr.status == 200) {
-			// I'm aware this looks stupid as hell but it seems like
-			// the only way to do this
-			const a = document.createElement('a')
-			a.href = path
-			a.download = "result."+xhr.responseText
-			document.body.appendChild(a)
-			a.click()
-			document.body.removeChild(a)
-
-			reset()
-		}
-	})
-	xhr.send(null)	
-
-}
-
-function uploadFile(file, i) {
-
-  // ch_images.value = ch_images.value + file.name
-  submitButton.setAttribute("disabled", "disabled")
-  submitButton.innerHTML = "Uploading"
-
-  resetButton.innerHTML = "Waiting"
-  resetButton.setAttribute("disabled", "disabled")
-
-
-  var url = '/manual-translate'
-  var xhr = new XMLHttpRequest()
-  var formData = new FormData()
-  xhr.open('POST', url, true)
-  xhr.setRequestHeader('X-Requested-With', 'XMLHttpRequest')
-
-  // Update progress (can be used to show progress indicator)
-  xhr.upload.addEventListener("progress", function(e) {
-    updateProgress(i, (e.loaded * 100.0 / e.total) || 100)
-	if (e.loaded == e.total)
-		submitButton.innerHTML = "Processing"
-  })
-
-  xhr.addEventListener('readystatechange', function(e) {
-    if (xhr.readyState == 4 && xhr.status == 200) {
-      updateProgress(i, 100) 
-
-      const obj = JSON.parse(xhr.response)
-      console.log(obj)
-
-      task_id = obj.task_id
-      task_status = obj.status
-
-      var sArea = document.getElementById('s-area')
-      var tArea = document.getElementById('t-area')
-
-      for (var i = 0; obj.trans_result != null && i < obj.trans_result.length; i++) {
-      	var input_s =  document.createElement("input")
-      	var input_t =  document.createElement("input")
-
-      	input_s.value = obj.trans_result[i].s
-      	input_t.value = obj.trans_result[i].t
-
-      	sArea.appendChild(input_s)
-      	tArea.appendChild(input_t)
-
-      	// console.log(obj.trans_result[i-1].s)
-      }
-	  
-	  submitButton.removeAttribute("disabled")
-	  submitButton.innerHTML = "Submit"
-	  submitButton.onclick = submit
-	  
-	  resetButton.removeAttribute("disabled")
-	  resetButton.innerHTML = "Reset"
-    }
-    else if (xhr.readyState == 4 && xhr.status != 200) {
-      // Error. Inform the user
-      alert("upload failed for "+file.name)
-    }
-  })
-
-
-
-  formData.append('file', file)
-  xhr.send(formData)
-}
-
-function submit() {
-	
-	submitButton.setAttribute("disabled", "disabled")
-	submitButton.innerHTML = "Running"
-
-	resetButton.setAttribute("disabled", "disabled")
-	resetButton.innerHTML = "Waiting"
-
-	var inputs = document.getElementsByTagName("input")
-
-	var trans_result = []
-	var j = ((inputs.length-2)/2)+1 
-	var k = 0
-
-	for (var i = 1; i<=((inputs.length-2)/2); i++) {
-
-		trans_result[k] = {
-			's' : inputs[i].value,
-			't' : inputs[j].value
-		}
-
-		j++
-		k++
-	}
-
-	var postData = {
-	    "task_id": task_id,
-	    "status": task_status,
-	    "trans_result": trans_result
-	}
-
-	postData = JSON.stringify(postData)
-
-	var url = '/post-manual-result'
-	var xhr = new XMLHttpRequest()
-	xhr.open('POST', url, true)
-	xhr.setRequestHeader("Content-Type", "application/json;charset=UTF-8")
-
-	xhr.addEventListener('readystatechange', function(e) {
-		if (xhr.readyState == 4 && xhr.status == 200) {
-			var img = document.getElementsByTagName("img")[0]
-			img.src = "/result/" + task_id
-			img.onclick = (function (task_id) {
-				return function () {
-					downloadImageAndReset("/result/" + task_id)
-				};
-			})(task_id)
-		}
-		else if (xhr.readyState == 4 && xhr.status != 200) {
-			// Error. Inform the user
-			alert("Request failed!")
-		}
-		submitButton.removeAttribute("disabled")
-		submitButton.innerHTML = "Download Image"
-		submitButton.onclick = (function(task_id) { return function() {
-			downloadImageAndReset("/result/" + task_id)
-		};})(task_id)
-
-		resetButton.removeAttribute("disabled")
-		resetButton.innerHTML = "Reset"
-		resetButton.onclick = reset
-
-	})
-
-
-	xhr.send(postData)
-
-
-}
-</script>
-</body>
-</html>
\ No newline at end of file
diff --git a/manga_translator/server/ui.html b/manga_translator/server/ui.html
deleted file mode 100644
index 88dcb9fc3..000000000
--- a/manga_translator/server/ui.html
+++ /dev/null
@@ -1,377 +0,0 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <meta charset="utf-8" />
-    <title>Image/Manga Translator</title>
-    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@unocss/reset/tailwind.min.css" />
-    <script src="https://cdn.jsdelivr.net/npm/petite-vue@0.4.1/dist/petite-vue.iife.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/@unocss/runtime@0.30.5/uno.global.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/@iconify/iconify@2.2.0/dist/iconify.min.js"></script>
-    <style>
-      [v-cloak],
-      [un-cloak] {
-        display: none;
-      }
-    </style>
-  </head>
-  <body>
-    <form
-      action="#"
-      class="flex py-8 w-full min-h-100vh justify-center items-center"
-      @submit.prevent="onsubmit"
-      @vue:mounted="onmounted"
-      v-scope
-      v-cloak
-      un-cloak
-    >
-      <div class="flex flex-col w-85ch h-full justify-center gap-2">
-        <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
-        <div class="flex mx-4 justify-start items-end">
-          <div class="flex gap-4">
-            <div class="flex items-center" title="Detection resolution">
-              <i class="iconify" data-icon="carbon:fit-to-screen"></i>
-              <div class="relative">
-                <select class="w-9ch appearance-none bg-transparent border-b border-gray-300" v-model="detectionResolution">
-                  <option value="S">1024px</option>
-                  <option value="M">1536px</option>
-                  <option value="L">2048px</option>
-                  <option value="X">2560px</option>
-                </select>
-                <i class="iconify absolute top-1.5 right-1 pointer-events-none" data-icon="carbon:chevron-down"></i>
-              </div>
-            </div>
-            <div class="flex items-center gap-1" title="Text detector">
-              <i class="iconify" data-icon="carbon:search-locate"></i>
-              <div class="relative">
-                <select class="w-9ch appearance-none bg-transparent border-b border-gray-300" v-model="textDetector">
-                  <option value="auto">Default</option>
-                  <option value="ctd">CTD</option>
-                </select>
-                <i class="iconify absolute top-1.5 right-1 pointer-events-none" data-icon="carbon:chevron-down"></i>
-              </div>
-            </div>
-            <div class="flex items-center gap-1" title="Render text orientation">
-              <i class="iconify" data-icon="carbon:text-align-left"></i>
-              <div class="relative">
-                <select class="w-12ch appearance-none bg-transparent border-b border-gray-300" v-model="renderTextDirection">
-                  <option value="auto">Auto</option>
-                  <option value="h">Horizontal</option>
-                  <option value="v">Vertical</option>
-                </select>
-                <i class="iconify absolute top-1.5 right-1 pointer-events-none" data-icon="carbon:chevron-down"></i>
-              </div>
-            </div>
-            <div class="flex items-center gap-1" title="Translator">
-              <i class="iconify" data-icon="carbon:operations-record"></i>
-              <div class="relative">
-                <select class="w-9ch appearance-none bg-transparent border-b border-gray-300" v-model="translator">
-                  <option v-for="key in validTranslators" :value="key">{{getTranslatorName(key)}}</option>
-                </select>
-                <i class="iconify absolute top-1.5 right-1 pointer-events-none" data-icon="carbon:chevron-down"></i>
-              </div>
-            </div>
-            <div class="flex items-center gap-1" title="Target language">
-              <i class="iconify" data-icon="carbon:language"></i>
-              <div class="relative">
-                <select class="w-15ch appearance-none bg-transparent border-b border-gray-300" v-model="targetLanguage">
-                  <option value="CHS">简体中文</option>
-                  <option value="CHT">繁體中文</option>
-                  <option value="JPN">日本語</option>
-                  <option value="ENG">English</option>
-                  <option value="KOR">한국어</option>
-                  <option value="VIN">Tiếng Việt</option>
-                  <option value="CSY">čeština</option>
-                  <option value="NLD">Nederlands</option>
-                  <option value="FRA">français</option>
-                  <option value="DEU">Deutsch</option>
-                  <option value="HUN">magyar nyelv</option>
-                  <option value="ITA">italiano</option>
-                  <option value="PLK">polski</option>
-                  <option value="PTB">português</option>
-                  <option value="ROM">limba română</option>
-                  <option value="RUS">русский язык</option>
-                  <option value="ESP">español</option>
-                  <option value="TRK">Türk dili</option>
-                  <option value="IND">Indonesia</option>
-                </select>
-                <i class="iconify absolute top-1.5 right-1 pointer-events-none" data-icon="carbon:chevron-down"></i>
-              </div>
-            </div>
-          </div>
-        </div>
-        <div v-if="result" class="flex flex-col items-center">
-          <img class="my-2" :src="resultUri" />
-          <button class="px-2 py-1 text-center rounded-md text-blue-800 border-2 border-blue-300" @click="clear">Upload another</button>
-        </div>
-        <div v-else-if="status" class="grid w-full h-116 place-content-center rounded-2xl border-2 border-dashed border-gray-600">
-          <div v-if="error" class="flex flex-col items-center gap-2">
-            <div style="color: crimson">{{ statusText }}</div>
-            <button class="px-2 py-1 text-center rounded-md text-blue-800 border-2 border-blue-300" @click="clear">Upload another</button>
-          </div>
-          <div v-else class="flex flex-col items-center gap-2">
-            <i class="iconify w-8 h-8 text-gray-500 animate-spin" data-icon="carbon:progress-bar-round"></i>
-            <div>{{ statusText }}</div>
-          </div>
-        </div>
-        <label
-          v-else
-          class="grid w-full h-116 place-content-center rounded-2xl border-2 border-dashed border-gray-600 cursor-pointer"
-          for="file"
-          @dragenter.prevent
-          @dragover.prevent
-          @dragleave.prevent
-          @drop.prevent="ondrop"
-        >
-          <div v-if="file" class="flex flex-col items-center gap-2">
-            <div><span class="iconify-inline inline-block mr-2 scale-125" data-icon="carbon:image-search"></span>File Preview</div>
-            <img class="max-w-72 max-h-72" :src="fileUri" />
-            <button type="submit" class="px-2 py-1 rounded-md text-blue-800 border-2 border-blue-300">Translate</button>
-            <div class="text-sm text-gray-600">Click the empty space or paste/drag a new one to replace</div>
-          </div>
-          <div v-else class="flex flex-col items-center gap-2">
-            <i class="iconify w-8 h-8 text-gray-500" data-icon="carbon:cloud-upload"></i>
-            <div>Paste an image, click to select one or drag and drop here</div>
-          </div>
-          <input id="file" type="file" accept="image/png,image/jpeg,image/bmp,image/webp" class="hidden" @change="onfilechange" />
-        </label>
-        <div class="flex justify-center gap-2">
-          <div>
-            Please consider supporting us by
-            <a class="underline underline-blue-400" href="https://ko-fi.com/voilelabs" target="_blank" rel="noopener noreferrer">Ko-fi</a>
-            or
-            <a class="underline underline-blue-400" href="https://www.patreon.com/voilelabs" target="_blank" rel="noopener noreferrer"
-              >Patreon</a
-            >!
-          </div>
-          <a
-            class="underline underline-blue-400"
-            href="https://github.com/zyddnys/manga-image-translator"
-            target="_blank"
-            rel="noopener noreferrer"
-            >Source Code</a
-          >
-        </div>
-      </div>
-    </form>
-    <script>
-      const BASE_URI = '/'
-      const acceptTypes = ['image/png', 'image/jpeg', 'image/bmp', 'image/webp']
-
-      function formatSize(bytes) {
-        const k = 1024
-        const sizes = ['B', 'KB', 'MB', 'GB', 'TB']
-        if (bytes === 0) return '0B'
-        const i = Math.floor(Math.log(bytes) / Math.log(k))
-        return `${(bytes / k ** i).toFixed(2)}${sizes[i]}`
-      }
-      function formatProgress(loaded, total) {
-        return `${formatSize(loaded)}/${formatSize(total)}`
-      }
-
-      PetiteVue.createApp({
-        onmounted() {
-          window.addEventListener('paste', this.onpaste)
-        },
-
-        file: null,
-        get fileUri() {
-          return this.file ? URL.createObjectURL(this.file) : null
-        },
-        detectionResolution: 'M',
-        textDetector: 'auto',
-        renderTextDirection: 'auto',
-        translator: 'youdao',
-        validTranslators: ['youdao', 'baidu', 'google', 'deepl', 'papago', 'caiyun', 'offline', 'gpt3.5', 'none'],
-        getTranslatorName(key) {
-          if (key == 'none')
-            return "No Text"
-          return key ? key[0].toUpperCase() + key.slice(1) : "";
-        },
-        targetLanguage: 'CHS',
-        ondrop(e) {
-          const file = e.dataTransfer?.files?.[0]
-          if (file && acceptTypes.includes(file.type)) {
-            this.file = file
-          }
-        },
-        onfilechange(e) {
-          const file = e.target.files?.[0]
-          if (file && acceptTypes.includes(file.type)) {
-            this.file = file
-          }
-        },
-        onpaste(e) {
-          const items = (e.clipboardData || e.originalEvent.clipboardData).items
-          for (const item of items) {
-            if (item.kind === 'file') {
-              const file = item.getAsFile()
-              if (!file || !acceptTypes.includes(file.type)) continue
-              this.file = file
-            }
-          }
-        },
-
-        progress: null,
-        status: null,
-        queuePos: null,
-        cachedStatusText: '',
-        get statusText() {
-          var newStatusText = this._statusText
-          if (newStatusText != null && newStatusText != this.cachedStatusText) {
-            this.cachedStatusText = newStatusText
-          }
-          return this.cachedStatusText
-        },
-        get _statusText() {
-          switch (this.status) {
-            case 'upload': {
-              if (this.progress) {
-                return `Uploading (${this.progress})`
-              } else {
-                return 'Uploading'
-              }
-            }
-            case 'download':
-              if (this.progress) {
-                return `Downloading (${this.progress})`
-              } else {
-                return 'Downloading'
-              }
-
-            case 'pending':
-              if (this.queuePos) {
-                return `Queuing, your position is ${this.queuePos}`
-              } else {
-                return 'Processing'
-              }
-            case 'detection':
-              return 'Detecting texts'
-            case 'ocr':
-              return 'Running OCR'
-            case 'mask-generation':
-              return 'Generating text mask'
-            case 'inpainting':
-              return 'Running inpainting'
-            case 'upscaling':
-              return 'Running upscaling'
-            case 'translating':
-              return 'Translating'
-            case 'rendering':
-              return 'Rendering translated texts'
-            case 'error':
-              return 'Something went wrong, please try again'
-            case 'error-upload':
-              return 'Upload failed, please try again'
-            case 'error-lang':
-              return 'Your target language is not supported by the chosen translator'
-            case 'error-translating':
-              return 'Did not get any text back from the text translation service'
-            case 'error-too-large':
-              return 'Image size too large (greater than 8000x8000 px)'
-            case 'error-disconnect':
-              return 'Lost connection to server'
-          }
-        },
-        get error() {
-          return /^error/.test(this.status)
-        },
-        result: null,
-        get resultUri() {
-          return this.result ? URL.createObjectURL(this.result) : null
-        },
-        onsubmit(e) {
-          if (!this.file) return
-
-          this.progress = null
-          this.queuePos = null
-          this.status = 'upload'
-
-          const formData = new FormData()
-          formData.append('file', this.file)
-          formData.append('size', this.detectionResolution)
-          formData.append('detector', this.textDetector)
-          formData.append('direction', this.renderTextDirection)
-          formData.append('translator', this.translator)
-          formData.append('target_lang', this.targetLanguage)
-
-          const xhr = new XMLHttpRequest()
-          xhr.open('POST', `${BASE_URI}submit`, true)
-          xhr.onerror = (e) => {
-            this.status = 'error-disconnect'
-          }
-          xhr.upload.onprogress = (e) => {
-            if (e.lengthComputable) this.progress = formatProgress(e.loaded, e.total)
-          }
-          xhr.onload = async () => {
-            if (xhr.status !== 200) {
-              this.status = 'error-upload'
-              return
-            }
-
-            response = JSON.parse(xhr.responseText)
-            const task_id = response['task_id']
-            this.status = response['status']
-            if (this.error)
-              return
-
-            this.status = 'pending'
-
-            async function tryFetchTaskState() {
-              try {
-                return await (await fetch(`${BASE_URI}task-state?taskid=${task_id}`)).json()
-              }
-              catch {
-                return null
-              }
-            }
-
-            for (;;) {
-              const timer = new Promise((resolve) => setTimeout(resolve, 500))
-              const res = await tryFetchTaskState()
-              if (res == null) {
-                this.status = 'error-disconnect'
-                break
-              }
-              const { state, finished, waiting } = res
-              // console.log(state, finished, waiting)
-
-              if (finished && !state.startsWith('error')) {
-                this.progress = null
-                this.status = 'download'
-
-                const xhrDownload = new XMLHttpRequest()
-                xhrDownload.open('GET', `${BASE_URI}result/${task_id}`, true)
-                xhrDownload.responseType = 'blob'
-                xhrDownload.onprogress = (e) => {
-                  if (e.lengthComputable) this.progress = formatProgress(e.loaded, e.total)
-                }
-                xhrDownload.onload = () => {
-                  this.result = xhrDownload.response
-                  this.status = null
-                }
-                xhrDownload.send()
-
-                break
-              }
-
-              this.status = state
-              this.queuePos = waiting
-
-              if (/^error/.test(state)) {
-                break
-              }
-
-              await timer
-            }
-          }
-          xhr.send(formData)
-        },
-        clear() {
-          this.file = null
-          this.result = null
-          this.status = null
-        },
-      }).mount()
-    </script>
-  </body>
-</html>
diff --git a/manga_translator/server/web_main.py b/manga_translator/server/web_main.py
deleted file mode 100644
index 9cc61c0f4..000000000
--- a/manga_translator/server/web_main.py
+++ /dev/null
@@ -1,629 +0,0 @@
-import io
-import os
-import sys
-import re
-import shutil
-import mimetypes
-import time
-import asyncio
-import subprocess
-import secrets
-from io import BytesIO
-from PIL import Image
-from aiohttp import web
-from collections import deque
-from imagehash import phash
-
-SERVER_DIR_PATH = os.path.dirname(os.path.realpath(__file__))
-BASE_PATH = os.path.dirname(os.path.dirname(SERVER_DIR_PATH))
-
-# TODO: Get capabilities through api
-VALID_LANGUAGES = {
-    'CHS': 'Chinese (Simplified)',
-    'CHT': 'Chinese (Traditional)',
-    'CSY': 'Czech',
-    'NLD': 'Dutch',
-    'ENG': 'English',
-    'FRA': 'French',
-    'DEU': 'German',
-    'HUN': 'Hungarian',
-    'ITA': 'Italian',
-    'JPN': 'Japanese',
-    'KOR': 'Korean',
-    'PLK': 'Polish',
-    'PTB': 'Portuguese (Brazil)',
-    'ROM': 'Romanian',
-    'RUS': 'Russian',
-    'ESP': 'Spanish',
-    'TRK': 'Turkish',
-    'UKR': 'Ukrainian',
-    'VIN': 'Vietnamese',
-    'ARA': 'Arabic',
-}
-# Whitelists
-VALID_DETECTORS = set(['default', 'ctd'])
-VALID_DIRECTIONS = set(['auto', 'h', 'v'])
-VALID_TRANSLATORS = [
-    'youdao',
-    'baidu',
-    'google',
-    'deepl',
-    'deepseek',
-    'papago',
-    'caiyun',
-    'gpt3.5',
-    'gpt4',
-    'nllb',
-    'nllb_big',
-    'sugoi',
-    'jparacrawl',
-    'jparacrawl_big',
-    'm2m100',
-    'm2m100_big',
-    'qwen2',
-    'qwen2_big',
-    'sakura',
-    'none',
-    'original',
-]
-
-MAX_ONGOING_TASKS = 1
-MAX_IMAGE_SIZE_PX = 8000**2
-
-# Time to wait for web client to send a request to /task-state request
-# before that web clients task gets removed from the queue
-WEB_CLIENT_TIMEOUT = -1
-
-# Time before finished tasks get removed from memory
-FINISHED_TASK_REMOVE_TIMEOUT = 1800
-
-# Auto deletes old task folders upon reaching this disk space limit
-DISK_SPACE_LIMIT = 5e7 # 50mb
-
-# TODO: Turn into dict with translator client id as key for support of multiple translator clients
-ONGOING_TASKS = []
-FINISHED_TASKS = []
-NONCE = ''
-QUEUE = deque()
-TASK_DATA = {}
-TASK_STATES = {}
-DEFAULT_TRANSLATION_PARAMS = {}
-AVAILABLE_TRANSLATORS = []
-FORMAT = ''
-
-app = web.Application(client_max_size = 1024 * 1024 * 50)
-routes = web.RouteTableDef()
-
-
-def constant_compare(a, b):
-    if isinstance(a, str):
-        a = a.encode('utf-8')
-    if isinstance(b, str):
-        b = b.encode('utf-8')
-    if not isinstance(a, bytes) or not isinstance(b, bytes):
-        return False
-    if len(a) != len(b):
-        return False
-
-    result = 0
-    for x, y in zip(a, b):
-        result |= x ^ y
-    return result == 0
-
-@routes.get("/")
-async def index_async(request):
-    global AVAILABLE_TRANSLATORS
-    with open(os.path.join(SERVER_DIR_PATH, 'ui.html'), 'r', encoding='utf8') as fp:
-        content = fp.read()
-        if AVAILABLE_TRANSLATORS:
-            content = re.sub(r'(?<=translator: )(.*)(?=,)', repr(AVAILABLE_TRANSLATORS[0]), content)
-            content = re.sub(r'(?<=validTranslators: )(\[.*\])(?=,)', repr(AVAILABLE_TRANSLATORS), content)
-        return web.Response(text=content, content_type='text/html')
-
-@routes.get("/manual")
-async def index_async(request):
-    with open(os.path.join(SERVER_DIR_PATH, 'manual.html'), 'r', encoding='utf8') as fp:
-        return web.Response(text=fp.read(), content_type='text/html')
-
-@routes.get("/result/{taskid}")
-async def result_async(request):
-    global FORMAT
-    filepath = os.path.join('result', request.match_info.get('taskid'), f'final.{FORMAT}')
-    if not os.path.exists(filepath):
-        return web.Response(status=404, text='Not Found')
-    stream = BytesIO()
-    with open(filepath, 'rb') as f:
-        stream.write(f.read())
-    mime = mimetypes.guess_type(filepath)[0] or 'application/octet-stream'
-    return web.Response(body=stream.getvalue(), content_type=mime)
-
-@routes.get("/result-type")
-async def file_type_async(request):
-    global FORMAT
-    return web.Response(text=f'{FORMAT}')
-
-@routes.get("/queue-size")
-async def queue_size_async(request):
-    return web.json_response({'size' : len(QUEUE)})
-
-async def handle_post(request):
-    data = await request.post()
-    detection_size = None
-    selected_translator = 'youdao'
-    target_language = 'CHS'
-    detector = 'default'
-    direction = 'auto'
-    if 'target_lang' in data:
-        target_language = data['target_lang'].upper()
-        # TODO: move dicts to their own files to reduce load time
-        if target_language not in VALID_LANGUAGES:
-            target_language = 'CHS'
-    if 'detector' in data:
-        detector = data['detector'].lower()
-        if detector not in VALID_DETECTORS:
-            detector = 'default'
-    if 'direction' in data:
-        direction = data['direction'].lower()
-        if direction not in VALID_DIRECTIONS:
-            direction = 'auto'
-    if 'translator' in data:
-        selected_translator = data['translator'].lower()
-        if selected_translator not in AVAILABLE_TRANSLATORS:
-            selected_translator = AVAILABLE_TRANSLATORS[0]
-    if 'size' in data:
-        size_text = data['size'].upper()
-        if size_text == 'S':
-            detection_size = 1024
-        elif size_text == 'M':
-            detection_size = 1536
-        elif size_text == 'L':
-            detection_size = 2048
-        elif size_text == 'X':
-            detection_size = 2560
-    if 'file' in data:
-        file_field = data['file']
-        content = file_field.file.read()
-    elif 'url' in data:
-        from aiohttp import ClientSession
-        async with ClientSession() as session:
-            async with session.get(data['url']) as resp:
-                if resp.status == 200:
-                    content = await resp.read()
-                else:
-                    return web.json_response({'status': 'error'})
-    else:
-        return web.json_response({'status': 'error'})
-    try:
-        img = Image.open(io.BytesIO(content))
-        img.verify()
-        img = Image.open(io.BytesIO(content))
-        if img.width * img.height > MAX_IMAGE_SIZE_PX:
-            return web.json_response({'status': 'error-too-large'})
-    except Exception:
-        return web.json_response({'status': 'error-img-corrupt'})
-    return img, detection_size, selected_translator, target_language, detector, direction
-
-@routes.post("/run")
-async def run_async(request):
-    global FORMAT
-    x = await handle_post(request)
-    if isinstance(x, tuple):
-        img, size, selected_translator, target_language, detector, direction = x
-    else:
-        return x
-    task_id = f'{phash(img, hash_size = 16)}-{size}-{selected_translator}-{target_language}-{detector}-{direction}'
-    print(f'New `run` task {task_id}')
-    if os.path.exists(f'result/{task_id}/final.{FORMAT}'):
-        # Add a console output prompt to avoid the console from appearing to be stuck without execution when the translated image is hit consecutively.
-        print(f'Using cached result for {task_id}')
-        return web.json_response({'task_id' : task_id, 'status': 'successful'})
-    # elif os.path.exists(f'result/{task_id}'):
-    #     # either image is being processed or error occurred
-    #     if task_id not in TASK_STATES:
-    #         # error occurred
-    #         return web.json_response({'state': 'error'})
-    else:
-        os.makedirs(f'result/{task_id}/', exist_ok=True)
-        img.save(f'result/{task_id}/input.png')
-        QUEUE.append(task_id)
-        now = time.time()
-        TASK_DATA[task_id] = {
-            'detection_size': size,
-            'translator': selected_translator,
-            'target_lang': target_language,
-            'detector': detector,
-            'direction': direction,
-            'created_at': now,
-            'requested_at': now,
-        }
-        TASK_STATES[task_id] = {
-            'info': 'pending',
-            'finished': False,
-        }
-    while True:
-        await asyncio.sleep(0.1)
-        if task_id not in TASK_STATES:
-            break
-        state = TASK_STATES[task_id]
-        if state['finished']:
-            break
-    return web.json_response({'task_id': task_id, 'status': 'successful' if state['finished'] else state['info']})
-
-
-@routes.post("/connect-internal")
-async def index_async(request):
-    global NONCE, VALID_TRANSLATORS, AVAILABLE_TRANSLATORS
-    # Can be extended to allow support for multiple translators
-    rqjson = await request.json()
-    if constant_compare(rqjson.get('nonce'), NONCE):
-        capabilities = rqjson.get('capabilities')
-        if capabilities:
-            translators = capabilities.get('translators')
-            AVAILABLE_TRANSLATORS.clear()
-            for key in VALID_TRANSLATORS:
-                if key in translators:
-                    AVAILABLE_TRANSLATORS.append(key)
-    return web.json_response({})
-
-@routes.get("/task-internal")
-async def get_task_async(request):
-    """
-    Called by the translator to get a translation task.
-    """
-    global NONCE, ONGOING_TASKS, DEFAULT_TRANSLATION_PARAMS
-    if constant_compare(request.rel_url.query.get('nonce'), NONCE):
-        if len(QUEUE) > 0 and len(ONGOING_TASKS) < MAX_ONGOING_TASKS:
-            task_id = QUEUE.popleft()
-            if task_id in TASK_DATA:
-                data = TASK_DATA[task_id]
-                for p, default_value in DEFAULT_TRANSLATION_PARAMS.items():
-                    current_value = data.get(p)
-                    data[p] = current_value if current_value is not None else default_value
-                if not TASK_DATA[task_id].get('manual', False):
-                    ONGOING_TASKS.append(task_id)
-                return web.json_response({'task_id': task_id, 'data': data})
-            else:
-                return web.json_response({})
-        else:
-            return web.json_response({})
-    return web.json_response({})
-
-async def manual_trans_task(task_id, texts, translations):
-    if task_id not in TASK_DATA:
-        TASK_DATA[task_id] = {}
-    if texts and translations:
-        TASK_DATA[task_id]['trans_request'] = [{'s': txt, 't': trans} for txt, trans in zip(texts, translations)]
-    else:
-        TASK_DATA[task_id]['trans_result'] = []
-        print('Manual translation complete')
-
-@routes.post("/cancel-manual-request")
-async def cancel_manual_translation(request):
-    rqjson = (await request.json())
-    if 'task_id' in rqjson:
-        task_id = rqjson['task_id']
-        if task_id in TASK_DATA:
-            TASK_DATA[task_id]['cancel'] = ' '
-            while True:
-                await asyncio.sleep(0.1)
-                if TASK_STATES[task_id]['info'].startswith('error'):
-                    ret = web.json_response({'task_id': task_id, 'status': 'error'})
-                    break
-                if TASK_STATES[task_id]['finished']:
-                    ret = web.json_response({'task_id': task_id, 'status': 'cancelled'})
-                    break
-            del TASK_STATES[task_id]
-            del TASK_DATA[task_id]
-            return ret
-    return web.json_response({})
-
-@routes.post("/post-manual-result")
-async def post_translation_result(request):
-    rqjson = (await request.json())
-    if 'trans_result' in rqjson and 'task_id' in rqjson:
-        task_id = rqjson['task_id']
-        if task_id in TASK_DATA:
-            trans_result = [r['t'] for r in rqjson['trans_result']]
-            TASK_DATA[task_id]['trans_result'] = trans_result
-            while True:
-                await asyncio.sleep(0.1)
-                if TASK_STATES[task_id]['info'].startswith('error'):
-                    ret = web.json_response({'task_id': task_id, 'status': 'error'})
-                    break
-                if TASK_STATES[task_id]['finished']:
-                    ret = web.json_response({'task_id': task_id, 'status': 'successful'})
-                    break
-            # remove old tasks
-            del TASK_STATES[task_id]
-            del TASK_DATA[task_id]
-            return ret
-    return web.json_response({})
-
-@routes.post("/request-manual-internal")
-async def request_translation_internal(request):
-    global NONCE
-    rqjson = await request.json()
-    if constant_compare(rqjson.get('nonce'), NONCE):
-        task_id = rqjson['task_id']
-        if task_id in TASK_DATA:
-            if TASK_DATA[task_id].get('manual', False):
-                # manual translation
-                asyncio.gather(manual_trans_task(task_id, rqjson['texts'], rqjson['translations']))
-    return web.json_response({})
-
-@routes.post("/get-manual-result-internal")
-async def get_translation_internal(request):
-    global NONCE
-    rqjson = (await request.json())
-    if constant_compare(rqjson.get('nonce'), NONCE):
-        task_id = rqjson['task_id']
-        if task_id in TASK_DATA:
-            if 'trans_result' in TASK_DATA[task_id]:
-                return web.json_response({'result': TASK_DATA[task_id]['trans_result']})
-            elif 'cancel' in TASK_DATA[task_id]:
-                return web.json_response({'cancel':''})
-    return web.json_response({})
-
-@routes.get("/task-state")
-async def get_task_state_async(request):
-    """
-    Web API for getting the state of an on-going translation task from the website.
-
-    Is periodically called from ui.html. Once it returns a finished state,
-    the web client will try to fetch the corresponding image through /result/<task_id>
-    """
-    task_id = request.query.get('taskid')
-    if task_id and task_id in TASK_STATES and task_id in TASK_DATA:
-        state = TASK_STATES[task_id]
-        data = TASK_DATA[task_id]
-        res_dict = {
-            'state': state['info'],
-            'finished': state['finished'],
-        }
-        data['requested_at'] = time.time()
-        try:
-            res_dict['waiting'] = QUEUE.index(task_id) + 1
-        except Exception:
-            res_dict['waiting'] = 0
-        res = web.json_response(res_dict)
-
-        return res
-    return web.json_response({'state': 'error'})
-
-@routes.post("/task-update-internal")
-async def post_task_update_async(request):
-    """
-    Lets the translator update the task state it is working on.
-    """
-    global NONCE, ONGOING_TASKS, FINISHED_TASKS
-    rqjson = (await request.json())
-    if constant_compare(rqjson.get('nonce'), NONCE):
-        task_id = rqjson['task_id']
-        if task_id in TASK_STATES and task_id in TASK_DATA:
-            TASK_STATES[task_id] = {
-                'info': rqjson['state'],
-                'finished': rqjson['finished'],
-            }
-            if rqjson['finished'] and not TASK_DATA[task_id].get('manual', False):
-                try:
-                    i = ONGOING_TASKS.index(task_id)
-                    FINISHED_TASKS.append(ONGOING_TASKS.pop(i))
-                except ValueError:
-                    pass
-            print(f'Task state {task_id} to {TASK_STATES[task_id]}')
-    return web.json_response({})
-
-@routes.post("/submit")
-async def submit_async(request):
-    """Adds new task to the queue. Called by web client in ui.html when submitting an image."""
-    global FORMAT
-    x = await handle_post(request)
-    if isinstance(x, tuple):
-        img, size, selected_translator, target_language, detector, direction = x
-    else:
-        return x
-    task_id = f'{phash(img, hash_size = 16)}-{size}-{selected_translator}-{target_language}-{detector}-{direction}'
-    now = time.time()
-    print(f'New `submit` task {task_id}')
-    if os.path.exists(f'result/{task_id}/final.{FORMAT}'):
-        TASK_STATES[task_id] = {
-            'info': 'saved',
-            'finished': True,
-        }
-        TASK_DATA[task_id] = {
-            'detection_size': size,
-            'translator': selected_translator,
-            'target_lang': target_language,
-            'detector': detector,
-            'direction': direction,
-            'created_at': now,
-            'requested_at': now,
-        }
-    elif task_id not in TASK_DATA or task_id not in TASK_STATES:
-        os.makedirs(f'result/{task_id}/', exist_ok=True)
-        img.save(f'result/{task_id}/input.png')
-        QUEUE.append(task_id)
-        TASK_STATES[task_id] = {
-            'info': 'pending',
-            'finished': False,
-        }
-        TASK_DATA[task_id] = {
-            'detection_size': size,
-            'translator': selected_translator,
-            'target_lang': target_language,
-            'detector': detector,
-            'direction': direction,
-            'created_at': now,
-            'requested_at': now,
-        }
-    return web.json_response({'task_id': task_id, 'status': 'successful'})
-
-@routes.post("/manual-translate")
-async def manual_translate_async(request):
-    x = await handle_post(request)
-    if isinstance(x, tuple):
-        img, size, selected_translator, target_language, detector, direction = x
-    else:
-        return x
-    task_id = secrets.token_hex(16)
-    print(f'New `manual-translate` task {task_id}')
-    os.makedirs(f'result/{task_id}/', exist_ok=True)
-    img.save(f'result/{task_id}/input.png')
-    now = time.time()
-    QUEUE.append(task_id)
-    # TODO: Add form fields to manual translate website
-    TASK_DATA[task_id] = {
-        # 'detection_size': size,
-        'manual': True,
-        # 'detector': detector,
-        # 'direction': direction,
-        'created_at': now,
-        'requested_at': now,
-    }
-    print(TASK_DATA[task_id])
-    TASK_STATES[task_id] = {
-        'info': 'pending',
-        'finished': False,
-    }
-    while True:
-        await asyncio.sleep(1)
-        if 'trans_request' in TASK_DATA[task_id]:
-            return web.json_response({'task_id' : task_id, 'status': 'pending', 'trans_result': TASK_DATA[task_id]['trans_request']})
-        if TASK_STATES[task_id]['info'].startswith('error'):
-            break
-        if TASK_STATES[task_id]['finished']:
-            # no texts detected
-            return web.json_response({'task_id' : task_id, 'status': 'successful'})
-    return web.json_response({'task_id' : task_id, 'status': 'error'})
-
-app.add_routes(routes)
-
-
-def generate_nonce():
-    return secrets.token_hex(16)
-
-def start_translator_client_proc(host: str, port: int, nonce: str, params: dict):
-    os.environ['MT_WEB_NONCE'] = nonce
-    cmds = [
-        sys.executable,
-        '-m', 'manga_translator',
-        '--mode', 'web_client',
-        '--host', host,
-        '--port', str(port),
-    ]
-    if params.get('use_gpu', False):
-        cmds.append('--use-gpu')
-    if params.get('use_gpu_limited', False):
-        cmds.append('--use-gpu-limited')
-    if params.get('ignore_errors', False):
-        cmds.append('--ignore-errors')
-    if params.get('verbose', False):
-        cmds.append('--verbose')
-
-    proc = subprocess.Popen(cmds, cwd=BASE_PATH)
-    return proc
-
-async def start_async_app(host: str, port: int, nonce: str, translation_params: dict = None):
-    global NONCE, DEFAULT_TRANSLATION_PARAMS, FORMAT
-    # Secret to secure communication between webserver and translator clients
-    NONCE = nonce
-    DEFAULT_TRANSLATION_PARAMS = translation_params or {}
-    FORMAT = DEFAULT_TRANSLATION_PARAMS.get('format') or 'jpg'
-    DEFAULT_TRANSLATION_PARAMS['format'] = FORMAT
-
-    # Schedule web server to run
-    runner = web.AppRunner(app)
-    await runner.setup()
-    site = web.TCPSite(runner, host, port)
-    await site.start()
-    print(f'Serving up app on http://{host}:{port}')
-
-    return runner, site
-
-async def dispatch(host: str, port: int, nonce: str = None, translation_params: dict = None):
-    global ONGOING_TASKS, FINISHED_TASKS
-
-    if nonce is None:
-        nonce = os.getenv('MT_WEB_NONCE', generate_nonce())
-
-    # Start web service
-    runner, site = await start_async_app(host, port, nonce, translation_params)
-
-    # Create client process that will execute translation tasks
-    print()
-    client_process = start_translator_client_proc(host, port, nonce, translation_params)
-
-    # Get all prior finished tasks
-    os.makedirs('result/', exist_ok=True)
-    for f in os.listdir('result/'):
-        if os.path.isdir(f'result/{f}') and re.search(r'^\w+-\d+-\w+-\w+-\w+-\w+$', f):
-            FINISHED_TASKS.append(f)
-    FINISHED_TASKS = list(sorted(FINISHED_TASKS, key=lambda task_id: os.path.getmtime(f'result/{task_id}')))
-
-    try:
-        while True:
-            await asyncio.sleep(1)
-
-            # Restart client if OOM or similar errors occurred
-            if client_process.poll() is not None:
-                # if client_process.poll() == 0:
-                #     break
-                print('Restarting translator process')
-                if len(ONGOING_TASKS) > 0:
-                    tid = ONGOING_TASKS.pop(0)
-                    state = TASK_STATES[tid]
-                    state['info'] = 'error'
-                    state['finished'] = True
-                client_process = start_translator_client_proc(host, port, nonce, translation_params)
-
-            # Filter queued and finished tasks
-            now = time.time()
-            to_del_task_ids = set()
-            for tid, s in TASK_STATES.items():
-                d = TASK_DATA[tid]
-                # Remove finished tasks after 30 minutes
-                if s['finished'] and now - d['created_at'] > FINISHED_TASK_REMOVE_TIMEOUT:
-                    to_del_task_ids.add(tid)
-
-                # Remove queued tasks without web client
-                elif WEB_CLIENT_TIMEOUT >= 0:
-                    if tid not in ONGOING_TASKS and not s['finished'] and now - d['requested_at'] > WEB_CLIENT_TIMEOUT:
-                        print('REMOVING TASK', tid)
-                        to_del_task_ids.add(tid)
-                        try:
-                            QUEUE.remove(tid)
-                        except Exception:
-                            pass
-
-            for tid in to_del_task_ids:
-                del TASK_STATES[tid]
-                del TASK_DATA[tid]
-
-            # Delete oldest folder if disk space is becoming sparse
-            if DISK_SPACE_LIMIT >= 0 and len(FINISHED_TASKS) > 0 and shutil.disk_usage('result/')[2] < DISK_SPACE_LIMIT:
-                tid = FINISHED_TASKS.pop(0)
-                try:
-                    p = f'result/{tid}'
-                    print(f'REMOVING OLD TASK RESULT: {p}')
-                    shutil.rmtree(p)
-                except FileNotFoundError:
-                    pass
-    except:
-        if client_process.poll() is None:
-            # client_process.terminate()
-            client_process.kill()
-        await runner.cleanup()
-        raise
-
-if __name__ == '__main__':
-    from ..args import parser
-
-    args = parser.parse_args()
-
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    try:
-        runner, site = loop.run_until_complete(dispatch(args.host, args.port, translation_params=vars(args)))
-    except KeyboardInterrupt:
-        pass
diff --git a/manga_translator/server/ws.proto b/manga_translator/server/ws.proto
deleted file mode 100644
index 61367b988..000000000
--- a/manga_translator/server/ws.proto
+++ /dev/null
@@ -1,36 +0,0 @@
-syntax = "proto3";
-
-package ws;
-
-message NewTask {
-  string id = 1;
-
-  string source_image = 2;
-
-  string target_language = 3;
-  string detector = 4;
-  string direction = 5;
-  string translator = 6;
-  string size = 7;
-
-  string translation_mask = 8;
-}
-
-message Status {
-  string id = 1;
-  string status = 2;
-}
-
-message FinishTask {
-  string id = 1;
-  bool success = 2;
-  bool has_translation_mask = 3;
-}
-
-message WebSocketMessage {
-  oneof message {
-    NewTask new_task = 1;
-    Status status = 2;
-    FinishTask finish_task = 3;
-  }
-}
diff --git a/manga_translator/server/ws_pb2.py b/manga_translator/server/ws_pb2.py
deleted file mode 100644
index 7fcfddfd8..000000000
--- a/manga_translator/server/ws_pb2.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler.  DO NOT EDIT!
-# source: ws.proto
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x08ws.proto\x12\x02ws\"\xe3\x01\n\x07NewTask\x12\n\n\x02id\x18\x01 \x01(\t\x12\x14\n\x0csource_image\x18\x02 \x01(\t\x12\x1b\n\x13source_image_bearer\x18\t \x01(\t\x12\x17\n\x0ftarget_language\x18\x03 \x01(\t\x12\x10\n\x08\x64\x65tector\x18\x04 \x01(\t\x12\x11\n\tdirection\x18\x05 \x01(\t\x12\x12\n\ntranslator\x18\x06 \x01(\t\x12\x0c\n\x04size\x18\x07 \x01(\t\x12\x18\n\x10translation_mask\x18\x08 \x01(\t\x12\x1f\n\x17translation_mask_bearer\x18\n \x01(\t\"$\n\x06Status\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06status\x18\x02 \x01(\t\"G\n\nFinishTask\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\x12\x1c\n\x14has_translation_mask\x18\x03 \x01(\x08\"\x83\x01\n\x10WebSocketMessage\x12\x1f\n\x08new_task\x18\x01 \x01(\x0b\x32\x0b.ws.NewTaskH\x00\x12\x1c\n\x06status\x18\x02 \x01(\x0b\x32\n.ws.StatusH\x00\x12%\n\x0b\x66inish_task\x18\x03 \x01(\x0b\x32\x0e.ws.FinishTaskH\x00\x42\t\n\x07messageb\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'ws_pb2', _globals)
-if _descriptor._USE_C_DESCRIPTORS == False:
-
-  DESCRIPTOR._options = None
-  _globals['_NEWTASK']._serialized_start=17
-  _globals['_NEWTASK']._serialized_end=244
-  _globals['_STATUS']._serialized_start=246
-  _globals['_STATUS']._serialized_end=282
-  _globals['_FINISHTASK']._serialized_start=284
-  _globals['_FINISHTASK']._serialized_end=355
-  _globals['_WEBSOCKETMESSAGE']._serialized_start=358
-  _globals['_WEBSOCKETMESSAGE']._serialized_end=489
-# @@protoc_insertion_point(module_scope)

From 75f07f4a8a2d44f45816294f9902025374b443fc Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Thu, 21 Nov 2024 17:55:54 +0100
Subject: [PATCH 35/75] changed queue a bit

---
 server/instance.py |  8 ++++----
 server/myqueue.py  | 39 +++++++++++++++++++--------------------
 2 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/server/instance.py b/server/instance.py
index e844289dd..90d6c4a1a 100644
--- a/server/instance.py
+++ b/server/instance.py
@@ -1,6 +1,5 @@
-import pickle
 from asyncio import Event, Lock
-from typing import List, Optional
+from typing import List
 
 from PIL import Image
 from pydantic import BaseModel
@@ -49,10 +48,11 @@ async def find_executor(self) -> ExecutorInstance:
             instance.busy = True
             return instance
 
-    def free_executor(self, instance: ExecutorInstance):
+    async def free_executor(self, instance: ExecutorInstance):
+        from server.myqueue import task_queue
         instance.free_executor()
         self.event.set()
         self.event.clear()
-
+        await task_queue.update_event()
 
 executor_instances: Executors = Executors()
diff --git a/server/myqueue.py b/server/myqueue.py
index b9174d936..267a97302 100644
--- a/server/myqueue.py
+++ b/server/myqueue.py
@@ -1,9 +1,8 @@
 import asyncio
-from typing import List, Dict
+from typing import List, Dict, Optional
 
 from fastapi import HTTPException
 from starlette.requests import Request
-from starlette.responses import StreamingResponse
 
 from manga_translator import Context
 from server.instance import executor_instances
@@ -17,16 +16,19 @@ def __init__(self):
     def add_task(self, task):
         self.queue.append(task)
 
-    def get_pos(self, task):
-        return self.queue.index(task)
-
-    def update_event(self):
+    def get_pos(self, task) -> Optional[int]:
+        try:
+            return self.queue.index(task)
+        except ValueError:
+            return None
+    async def update_event(self):
+        self.queue = [ctx for ctx in self.queue if not await is_client_disconnected(ctx.req)]
         self.queue_event.set()
         self.queue_event.clear()
 
-    def remove(self, task):
+    async def remove(self, task):
         self.queue.remove(task)
-        self.update_event()
+        await self.update_event()
 
     async def wait_for_event(self):
         await self.queue_event.wait()
@@ -42,18 +44,23 @@ async def wait_in_queue(task, notify: NotifyType):
     """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned"""
     while True:
         queue_pos = task_queue.get_pos(task)
+        if not queue_pos:
+            if notify:
+                return
+            else:
+                raise HTTPException(500, detail="User is no longer connected")  # just for the logs
         if notify:
             notify(3, str(queue_pos).encode('utf-8'))
         if queue_pos < executor_instances.free_executors():
             if await is_client_disconnected(task.req):
-                task_queue.remove(task)
-                task_queue.update_event()
+                await task_queue.update_event()
                 if notify:
                     return
                 else:
                     raise HTTPException(500, detail="User is no longer connected") #just for the logs
+
             instance = await executor_instances.find_executor()
-            task_queue.remove(task)
+            await task_queue.remove(task)
             if notify:
                 notify(4, b"")
             if notify:
@@ -61,19 +68,11 @@ async def wait_in_queue(task, notify: NotifyType):
             else:
                 result = await instance.sent(task.image, task.config)
 
-            executor_instances.free_executor(instance)
-            task_queue.update_event()
+            await executor_instances.free_executor(instance)
 
             if notify:
                 return
             else:
                 return result
         else:
-            if await is_client_disconnected(task.req):
-                task_queue.remove(task)
-                task_queue.update_event()
-                if notify:
-                    return
-                else:
-                    raise HTTPException(500, detail="User is no longer connected") #just for the logs
             await task_queue.wait_for_event()
\ No newline at end of file

From bba5fc64a1b895080b9cdd39cfd765b9fb27b85f Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Thu, 21 Nov 2024 18:17:49 +0100
Subject: [PATCH 36/75] custom queue element with deconstructor

---
 server/main.py               | 28 ++++++-----------
 server/myqueue.py            | 59 ++++++++++++++++++++++++++----------
 server/request_extraction.py | 24 ++++++---------
 3 files changed, 62 insertions(+), 49 deletions(-)

diff --git a/server/main.py b/server/main.py
index 773908f41..5797a610e 100644
--- a/server/main.py
+++ b/server/main.py
@@ -1,24 +1,22 @@
 import io
 import os
 import secrets
+import shutil
 import signal
 import subprocess
 import sys
 from argparse import Namespace
-from builtins import bytes
-from typing import Union
 
-from fastapi import FastAPI, Request, HTTPException, Header, Form, UploadFile
+from fastapi import FastAPI, Request, HTTPException, Header, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse
 from pathlib import Path
 
 from pydantic import BaseModel
 
-from manga_translator import Config
 from server.instance import ExecutorInstance, executor_instances
 from server.myqueue import task_queue
-from server.request_extraction import get_ctx, while_streaming, TranslateRequest
+from server.request_extraction import get_ctx, while_streaming
 from server.to_json import to_json, Translation
 
 app = FastAPI()
@@ -54,22 +52,11 @@ def transform_to_image(ctx):
 def transform_to_json(ctx):
     return str(to_json(ctx)).encode("utf-8")
 
-async def parse_request(
-    req: Request,
-    image: Union[str, bytes] = Form(...),
-    config: str = Form(...),
-):
-    if req.headers.get('content-type').startswith('multipart'):
-        config = json.loads(config)
-        return TranslateRequest(image=image, config=Config(**config))
-    else:
-        return None
 
 @app.post("/translate/json", response_model=list[Translation], response_description="json strucure inspired by the ichigo translator extension")
 async def json(req: Request):
     ctx = await get_ctx(req)
-    json = to_json(ctx)
-    return JSONResponse(content=json)
+    return JSONResponse(content=to_json(ctx))
 
 @app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure following the stream encoding, but with json first and then the image bytes as chunks")
 async def bytes(req: Request):
@@ -154,13 +141,16 @@ def prepare(args):
         nonce = args.nonce
     if args.start_instance:
         return start_translator_client_proc(args.host, args.port + 1, nonce, args)
+    folder_name= "upload-cache"
+    if os.path.exists(folder_name):
+        shutil.rmtree(folder_name)
+    os.makedirs(folder_name)
 
 #todo: restart if crash
 #todo: cache results
 #todo: cleanup cache
-#todo: store images while in queue
+
 #todo: add docs
-#todo: index doesnt work properly in the list(is_client_disconnected is not executed immediatly/does not update the index)
 #todo: enable config in html pages
 
 if __name__ == '__main__':
diff --git a/server/myqueue.py b/server/myqueue.py
index 267a97302..77f681d60 100644
--- a/server/myqueue.py
+++ b/server/myqueue.py
@@ -1,32 +1,64 @@
 import asyncio
-from typing import List, Dict, Optional
+import os
+from typing import List, Optional
 
+from PIL import Image
 from fastapi import HTTPException
-from starlette.requests import Request
+from fastapi.requests import Request
 
-from manga_translator import Context
+from manga_translator import Config
 from server.instance import executor_instances
 from server.sent_data_internal import NotifyType
 
+class QueueElement:
+    req: Request
+    image: Image.Image | str
+    config: Config
+
+    def __init__(self, req: Request, image: Image.Image, config: Config, length):
+        self.req = req
+        if length > 10:
+            #todo: store image in "upload-cache" folder
+            self.image = image
+        else:
+            self.image = image
+        self.config = config
+
+    def get_image(self)-> Image:
+        if isinstance(self.image, str):
+            return Image.open(self.image)
+        else:
+            return self.image
+
+    def __del__(self):
+        if isinstance(self.image, str):
+            os.remove(self.image)
+
+    async def is_client_disconnected(self) -> bool:
+        if await self.req.is_disconnected():
+            return True
+        return False
+
+
 class TaskQueue:
     def __init__(self):
-        self.queue: List[Context] = []
+        self.queue: List[QueueElement] = []
         self.queue_event: asyncio.Event = asyncio.Event()
 
-    def add_task(self, task):
+    def add_task(self, task: QueueElement):
         self.queue.append(task)
 
-    def get_pos(self, task) -> Optional[int]:
+    def get_pos(self, task: QueueElement) -> Optional[int]:
         try:
             return self.queue.index(task)
         except ValueError:
             return None
     async def update_event(self):
-        self.queue = [ctx for ctx in self.queue if not await is_client_disconnected(ctx.req)]
+        self.queue = [task for task in self.queue if not await task.is_client_disconnected()]
         self.queue_event.set()
         self.queue_event.clear()
 
-    async def remove(self, task):
+    async def remove(self, task: QueueElement):
         self.queue.remove(task)
         await self.update_event()
 
@@ -35,16 +67,11 @@ async def wait_for_event(self):
 
 task_queue = TaskQueue()
 
-async def is_client_disconnected(request: Request) -> bool:
-    if await request.is_disconnected():
-        return True
-    return False
-
-async def wait_in_queue(task, notify: NotifyType):
+async def wait_in_queue(task: QueueElement, notify: NotifyType):
     """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned"""
     while True:
         queue_pos = task_queue.get_pos(task)
-        if not queue_pos:
+        if queue_pos is None:
             if notify:
                 return
             else:
@@ -52,7 +79,7 @@ async def wait_in_queue(task, notify: NotifyType):
         if notify:
             notify(3, str(queue_pos).encode('utf-8'))
         if queue_pos < executor_instances.free_executors():
-            if await is_client_disconnected(task.req):
+            if await task.is_client_disconnected():
                 await task_queue.update_event()
                 if notify:
                     return
diff --git a/server/request_extraction.py b/server/request_extraction.py
index 080d84cc4..decb64bff 100644
--- a/server/request_extraction.py
+++ b/server/request_extraction.py
@@ -12,7 +12,7 @@
 from starlette.responses import StreamingResponse
 
 from manga_translator import Config, Context
-from server.myqueue import task_queue, wait_in_queue
+from server.myqueue import task_queue, wait_in_queue, QueueElement
 from server.streaming import notify, stream
 
 class TranslateRequest(BaseModel):
@@ -61,29 +61,25 @@ async def multi_content_type(request: Request):
 
 async def get_ctx(req: Request):
     data, img = await multi_content_type(req)
-    ctx = Context()
 
-    ctx.image = await to_pil_image(img)
-    ctx.config = data
-    ctx.req = req
-    task_queue.add_task(ctx)
+    image = await to_pil_image(img)
+    task = QueueElement(req, image, data, 0)
 
-    return await wait_in_queue(ctx, None)
+    task_queue.add_task(task)
+
+    return await wait_in_queue(task, None)
 
 async def while_streaming(req: Request, transform):
     data, img = await multi_content_type(req)
-    ctx = Context()
 
-    ctx.image = await to_pil_image(img)
-    ctx.config = data
-    ctx.req = req
-    task_queue.add_task(ctx)
+    image = await to_pil_image(img)
+    task = QueueElement(req, image, data, 0)
+    task_queue.add_task(task)
 
     messages = asyncio.Queue()
 
     def notify_internal(code: int, data: bytes) -> None:
         notify(code, data, transform, messages)
-
     streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
-    asyncio.create_task(wait_in_queue(ctx, notify_internal))
+    asyncio.create_task(wait_in_queue(task, notify_internal))
     return streaming_response
\ No newline at end of file

From 259b807266406edf65139b886ecd93f174f2222d Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Fri, 22 Nov 2024 12:14:05 +0100
Subject: [PATCH 37/75] byte response with cpp&rs examples

---
 manga_translator/utils/textblock.py |   2 +-
 server/main.py                      |  13 ++--
 server/response.cpp                 | 116 +++++++++++++++++++++++++++
 server/response.rs                  | 101 ++++++++++++++++++++++++
 server/to_json.py                   | 117 +++++++++++++++++-----------
 5 files changed, 297 insertions(+), 52 deletions(-)
 create mode 100644 server/response.cpp
 create mode 100644 server/response.rs

diff --git a/manga_translator/utils/textblock.py b/manga_translator/utils/textblock.py
index de3c8a4cd..358ce4da5 100644
--- a/manga_translator/utils/textblock.py
+++ b/manga_translator/utils/textblock.py
@@ -46,7 +46,7 @@ def __init__(self, lines: List[Tuple[int, int, int, int]],
                  texts: List[str] = None,
                  language: str = 'unknown',
                  font_size: float = -1,
-                 angle: int = 0,
+                 angle: float = 0,
                  translation: str = "",
                  fg_color: Tuple[float] = (0, 0, 0),
                  bg_color: Tuple[float] = (0, 0, 0),
diff --git a/server/main.py b/server/main.py
index 5797a610e..44f7b77a3 100644
--- a/server/main.py
+++ b/server/main.py
@@ -17,7 +17,7 @@
 from server.instance import ExecutorInstance, executor_instances
 from server.myqueue import task_queue
 from server.request_extraction import get_ctx, while_streaming
-from server.to_json import to_json, Translation
+from server.to_json import to_translation, TranslationResponse
 
 app = FastAPI()
 nonce = None
@@ -50,17 +50,20 @@ def transform_to_image(ctx):
     return img_byte_arr.getvalue()
 
 def transform_to_json(ctx):
-    return str(to_json(ctx)).encode("utf-8")
+    return to_translation(ctx).model_dump_json().encode("utf-8")
 
+def transform_to_bytes(ctx):
+    return to_translation(ctx).to_bytes()
 
-@app.post("/translate/json", response_model=list[Translation], response_description="json strucure inspired by the ichigo translator extension")
+@app.post("/translate/json", response_model=TranslationResponse, response_description="json strucure inspired by the ichigo translator extension")
 async def json(req: Request):
     ctx = await get_ctx(req)
-    return JSONResponse(content=to_json(ctx))
+    return to_translation(ctx)
 
 @app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure following the stream encoding, but with json first and then the image bytes as chunks")
 async def bytes(req: Request):
     ctx = await get_ctx(req)
+    return StreamingResponse(content=to_translation(ctx).to_bytes())
 
 @app.post("/translate/image", response_description="the result image", response_class=StreamingResponse)
 async def image(req: Request) -> StreamingResponse:
@@ -77,7 +80,7 @@ async def stream_json(req: Request) -> StreamingResponse:
 
 @app.post("/translate/bytes/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
 async def stream_bytes(req: Request)-> StreamingResponse:
-    return await while_streaming(req, transform_to_image)
+    return await while_streaming(req, transform_to_bytes)
 
 @app.post("/translate/image/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
 async def stream_image(req: Request) -> StreamingResponse:
diff --git a/server/response.cpp b/server/response.cpp
new file mode 100644
index 000000000..3595856a3
--- /dev/null
+++ b/server/response.cpp
@@ -0,0 +1,116 @@
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include <string>
+#include <unordered_map>
+#include <cstring>
+#include <cassert>
+
+struct Color {
+    uint8_t fg[3];
+    uint8_t bg[3];
+};
+
+struct Translation {
+    uint32_t min_x;
+    uint32_t min_y;
+    uint32_t max_x;
+    uint32_t max_y;
+    bool is_bulleted_list;
+    uint32_t angle;
+    float prob;
+    Color text_color;
+    std::unordered_map<std::string, std::string> text;
+    std::vector<uint8_t> background;
+};
+
+struct TranslationResponse {
+    std::vector<Translation> translations;
+};
+
+uint32_t read_u32(const std::vector<uint8_t>& bytes, size_t& offset) {
+    uint32_t value;
+    std::memcpy(&value, &bytes[offset], sizeof(value));
+    value = value;
+    offset += 4;
+    return value;
+}
+
+float read_f32(const std::vector<uint8_t>& bytes, size_t& offset) {
+    float value;
+    std::memcpy(&value, &bytes[offset], sizeof(value));
+    value = *reinterpret_cast<uint32_t*>(&value);
+    offset += 4;
+    return value;
+}
+
+uint8_t read_u8(const std::vector<uint8_t>& bytes, size_t& offset) {
+    uint8_t value = bytes[offset];
+    offset += 1;
+    return value;
+}
+
+bool read_bool(const std::vector<uint8_t>& bytes, size_t& offset) {
+    bool value = bytes[offset] != 0;
+    offset += 1;
+    return value;
+}
+
+std::vector<uint8_t> read_chunk(const std::vector<uint8_t>& bytes, size_t& offset) {
+    uint32_t size = read_u32(bytes, offset);
+    std::vector<uint8_t> value(bytes.begin() + offset, bytes.begin() + offset + size);
+    offset += size;
+    return value;
+}
+
+std::string read_str(const std::vector<uint8_t>& bytes, size_t& offset) {
+    std::vector<uint8_t> chunk = read_chunk(bytes, offset);
+    std::string result(chunk.begin(), chunk.end());
+    return result;
+}
+
+std::unordered_map<std::string, std::string> read_map(const std::vector<uint8_t>& bytes, size_t& offset) {
+    uint32_t count = read_u32(bytes, offset);
+    std::unordered_map<std::string, std::string> map;
+    for (uint32_t i = 0; i < count; ++i) {
+        std::string key = read_str(bytes, offset);
+        std::string value = read_str(bytes, offset);
+        map[key] = value;
+    }
+    return map;
+}
+
+Translation from_bytes(const std::vector<uint8_t>& bytes, size_t& offset) {
+    Translation translation;
+    translation.min_x = read_u32(bytes, offset);
+    translation.min_y = read_u32(bytes, offset);
+    translation.max_x = read_u32(bytes, offset);
+    translation.max_y = read_u32(bytes, offset);
+    translation.is_bulleted_list = read_bool(bytes, offset);
+    translation.angle = read_u32(bytes, offset);
+    translation.prob = read_f32(bytes, offset);
+    for (int i = 0; i < 3; ++i) {
+        translation.text_color.fg[i] = read_u8(bytes, offset);
+        translation.text_color.bg[i] = read_u8(bytes, offset);
+    }
+    translation.text = read_map(bytes, offset);
+    translation.background = read_chunk(bytes, offset);
+    return translation;
+}
+
+TranslationResponse from_bytes_response(const std::vector<uint8_t>& bytes) {
+    size_t offset = 0;
+    uint32_t count = read_u32(bytes, offset);
+    TranslationResponse response;
+
+    for (uint32_t i = 0; i < count; ++i) {
+        response.translations.push_back(from_bytes(bytes, offset));
+    }
+    return response;
+}
+
+int main() {
+    std::vector<uint8_t> bytes = {/* byte data here */};
+    TranslationResponse data = from_bytes_response(bytes);
+    return 0;
+}
diff --git a/server/response.rs b/server/response.rs
new file mode 100644
index 000000000..81fa07810
--- /dev/null
+++ b/server/response.rs
@@ -0,0 +1,101 @@
+#[derive(Debug)]
+struct Color {
+    fg: [u8; 3],
+    bg: [u8; 3],
+}
+
+#[derive(Debug)]
+struct Translation {
+    min_x: u32,
+    min_y: u32,
+    max_x: u32,
+    max_y: u32,
+    is_bulleted_list: bool,
+    angle: u32,
+    prob: f32,
+    text_color: Color,
+    text: HashMap<String, String>,
+    background: Vec<u8>,
+}
+
+#[derive(Debug)]
+struct TranslationResponse {
+    translations: Vec<Translation>,
+}
+
+impl TranslationResponse {
+    fn from_bytes(bytes: &[u8]) -> Self {
+        let mut offset = 0;
+        let v = (0..read_u32(bytes, &mut offset))
+            .map(|_| Translation::from_bytes(bytes, &mut offset))
+            .collect::<Vec<_>>();
+        Self { translations: v }
+    }
+}
+
+fn read_u32(bytes: &[u8], offset: &mut usize) -> u32 {
+    let value = u32::from_le_bytes(bytes[*offset..*offset + 4].try_into().unwrap());
+    *offset += 4;
+    value
+}
+
+impl Translation {
+    fn from_bytes(bytes: &[u8], offset: &mut usize) -> Self {
+        let read_f32 = |bytes: &[u8], offset: &mut usize| -> f32 {
+            let value = f32::from_le_bytes(bytes[*offset..*offset + 4].try_into().unwrap());
+            *offset += 4;
+            value
+        };
+
+        let read_u8 = |bytes: &[u8], offset: &mut usize| -> u8 {
+            let value = bytes[*offset];
+            *offset += 1;
+            value
+        };
+
+        let read_bool = |bytes: &[u8], offset: &mut usize| -> bool {
+            let value = bytes[*offset] != 0;
+            *offset += 1;
+            value
+        };
+
+        let read_chunk = |bytes: &[u8], offset: &mut usize| -> Vec<u8> {
+            let size = read_u32(bytes, offset);
+            let value = &bytes[*offset..*offset + size as usize];
+            *offset += size as usize;
+            value.to_vec()
+        };
+        let read_str = |bytes: &[u8], offset: &mut usize| -> String {
+            String::from_utf8(read_chunk(bytes, offset)).expect("Invalid UTF-8")
+        };
+        let read_map = |bytes: &[u8], offset: &mut usize| -> HashMap<String, String> {
+            (0..read_u32(bytes, offset))
+                .into_iter()
+                .map(|_| (read_str(bytes, offset), read_str(bytes, offset)))
+                .collect::<HashMap<String, String>>()
+        };
+        Self {
+            min_x: read_u32(bytes, offset),
+            min_y: read_u32(bytes, offset),
+            max_x: read_u32(bytes, offset),
+            max_y: read_u32(bytes, offset),
+            is_bulleted_list: read_bool(bytes, offset),
+            angle: read_u32(bytes, offset),
+            prob: read_f32(bytes, offset),
+            text_color: Color {
+                fg: [
+                    read_u8(bytes, offset),
+                    read_u8(bytes, offset),
+                    read_u8(bytes, offset),
+                ],
+                bg: [
+                    read_u8(bytes, offset),
+                    read_u8(bytes, offset),
+                    read_u8(bytes, offset),
+                ],
+            },
+            text: read_map(bytes, offset),
+            background: read_chunk(bytes, offset),
+        }
+    }
+}
diff --git a/server/to_json.py b/server/to_json.py
index 911718d8d..f6bcdc6e2 100644
--- a/server/to_json.py
+++ b/server/to_json.py
@@ -1,8 +1,10 @@
 import base64
+import struct
 from typing import Dict, List
 
 import cv2
-from pydantic import BaseModel
+import numpy as np
+from pydantic import BaseModel, Field
 
 from manga_translator import Context
 from manga_translator.utils import TextBlock
@@ -22,60 +24,83 @@
 #img_rendered: array
 #mask_raw: array
 #mask:array
-def to_json(ctx: Context):
+
+class TextColor(BaseModel):
+    fg: tuple[int, int, int]
+    bg: tuple[int, int, int]
+
+class Translation(BaseModel):
+    minX: int
+    minY: int
+    maxX: int
+    maxY: int
+    is_bulleted_list: bool
+    angle: float | int
+    prob: float
+    text_color: TextColor
+    text: dict[str, str]
+    background: np.ndarray = Field(
+        ...,
+        description="Background image encoded as a base64 string",
+        examples=["data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA..."]
+    )
+
+    class Config:
+        arbitrary_types_allowed = True
+        json_encoders = {
+            np.ndarray: lambda array: Translation.encode_background(array)
+        }
+
+    @staticmethod
+    def encode_background(array: np.ndarray) -> str:
+        retval, buffer = cv2.imencode('.png', array)
+        jpg_as_text = base64.b64encode(buffer).decode("utf-8")
+        background = f"data:image/png;base64,{jpg_as_text}"
+        return background
+
+    def to_bytes(self):
+        coords_bytes = struct.pack('4i', self.minX, self.minY, self.maxX, self.maxY)
+        is_bulleted_list_byte = struct.pack('?', self.is_bulleted_list)
+        angle_bytes = struct.pack('f', float(self.angle) if isinstance(self.angle, int) else self.angle)
+        prob_bytes = struct.pack('f', self.prob)
+        fg = struct.pack('3B', self.text_color.fg[0], self.text_color.fg[1], self.text_color.fg[2])
+        bg = struct.pack('3B', self.text_color.bg[0], self.text_color.bg[1], self.text_color.bg[2])
+        text_bytes = struct.pack('i', len(self.text.items()))
+        for key, value in self.text.items():
+            text_bytes += struct.pack('I', len(key.encode('utf-8'))) + key.encode('utf-8')
+            text_bytes += struct.pack('I', len(value.encode('utf-8'))) + value.encode('utf-8')
+        background_bytes = struct.pack('I', len(self.background.tobytes())) + self.background.tobytes()
+        return coords_bytes +is_bulleted_list_byte+ angle_bytes+prob_bytes+fg + bg + text_bytes + background_bytes
+
+class TranslationResponse(BaseModel):
+    translations: List[Translation]
+
+    def to_bytes(self):
+        items= [v.to_bytes() for v in self.translations]
+        return struct.pack('i', len(items)) + b''.join(items)
+
+def to_translation(ctx: Context) -> TranslationResponse:
     text_regions:list[TextBlock] = ctx.text_regions
     inpaint = ctx.img_inpainted
     translations:Dict[str, List[str]] = ctx.translations
     results = []
-    if 'overlay_ext' in ctx:
-        #todo: unreachable
-        overlay_ext = ctx['overlay_ext']
-    else:
-        overlay_ext = 'jpg'
     for i, blk in enumerate(text_regions):
         minX, minY, maxX, maxY = blk.xyxy
+        text_region = text_regions[i]
         if 'translations' in ctx:
             trans = {key: value[i] for key, value in translations.items()}
         else:
             trans = {}
-        trans["originalText"] = text_regions[i].text
-        if inpaint is not None:
-            overlay = inpaint[minY:maxY, minX:maxX]
-
-            retval, buffer = cv2.imencode('.' + overlay_ext, overlay)
-            jpg_as_text = base64.b64encode(buffer)
-            background = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8")
-        else:
-            background = None
-        text_region = text_regions[i]
+        trans[text_region.source_lang] = text_regions[i].text
         text_region.adjust_bg_color = False
         color1, color2 = text_region.get_font_colors()
-
-        results.append({
-            'text': trans,
-            'minX': int(minX),
-            'minY': int(minY),
-            'maxX': int(maxX),
-            'maxY': int(maxY),
-            'textColor': {
-                'fg': color1.tolist(),
-                'bg': color2.tolist()
-            },
-            'language': text_regions[i].source_lang,
-            'background': background
-        })
-    return results
-
-class TextColor(BaseModel):
-    fg: tuple[int, int, int]
-    bg: tuple[int, int, int]
-
-class Translation(BaseModel):
-    text: dict[str, str]
-    minX: int
-    minY: int
-    maxX: int
-    maxY: int
-    textColor:TextColor
-    language: str
-    background: str
+        results.append(Translation(text=trans,
+                    minX=int(minX),minY=int(minY),maxX=int(maxX),maxY=int(maxY),
+                    background=inpaint[minY:maxY, minX:maxX],
+                    is_bulleted_list=text_region.is_bulleted_list,
+                    text_color=TextColor(fg=color1.tolist(), bg=color2.tolist()),
+                    prob=text_region.prob,
+                    angle=text_region.angle
+        ))
+        #todo: background angle
+    return TranslationResponse(translations=results)

From cbf902d408db1fa8d805bf0258c9b9be01fc3b7f Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Fri, 22 Nov 2024 12:15:38 +0100
Subject: [PATCH 38/75] move files into examples

---
 {server => examples}/response.cpp | 0
 {server => examples}/response.rs  | 0
 server/main.py                    | 2 +-
 3 files changed, 1 insertion(+), 1 deletion(-)
 rename {server => examples}/response.cpp (100%)
 rename {server => examples}/response.rs (100%)

diff --git a/server/response.cpp b/examples/response.cpp
similarity index 100%
rename from server/response.cpp
rename to examples/response.cpp
diff --git a/server/response.rs b/examples/response.rs
similarity index 100%
rename from server/response.rs
rename to examples/response.rs
diff --git a/server/main.py b/server/main.py
index 44f7b77a3..3beef79c5 100644
--- a/server/main.py
+++ b/server/main.py
@@ -60,7 +60,7 @@ async def json(req: Request):
     ctx = await get_ctx(req)
     return to_translation(ctx)
 
-@app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure following the stream encoding, but with json first and then the image bytes as chunks")
+@app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure for decoding look at examples in 'examples/response.*'")
 async def bytes(req: Request):
     ctx = await get_ctx(req)
     return StreamingResponse(content=to_translation(ctx).to_bytes())

From a2f98e6b171a35e37c3ac94fe3e4adbe573617a8 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Fri, 22 Nov 2024 15:44:01 +0100
Subject: [PATCH 39/75] fix docs

---
 server/index.html            |  2 +-
 server/main.py               | 98 +++++++++++++++++++++++-------------
 server/manual.html           |  2 +-
 server/request_extraction.py | 40 ++++-----------
 server/sent_data_internal.py |  1 -
 server/to_json.py            | 10 ++--
 6 files changed, 81 insertions(+), 72 deletions(-)

diff --git a/server/index.html b/server/index.html
index edf617609..fd1a49a11 100644
--- a/server/index.html
+++ b/server/index.html
@@ -357,7 +357,7 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
             }
             const uploadWithProgress = async (formData) => {
                 try {
-                    const response = await fetch(`${BASE_URI}translate/image/stream`, {
+                    const response = await fetch(`${BASE_URI}translate/with-form/image/stream`, {
                         method: 'POST',
                         body: formData,
                     });
diff --git a/server/main.py b/server/main.py
index 3beef79c5..5563f0ce5 100644
--- a/server/main.py
+++ b/server/main.py
@@ -7,16 +7,15 @@
 import sys
 from argparse import Namespace
 
-from fastapi import FastAPI, Request, HTTPException, Header, UploadFile
+from fastapi import FastAPI, Request, HTTPException, Header, UploadFile, File, Form
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse
+from fastapi.responses import StreamingResponse, HTMLResponse
 from pathlib import Path
 
-from pydantic import BaseModel
-
+from manga_translator import Config
 from server.instance import ExecutorInstance, executor_instances
 from server.myqueue import task_queue
-from server.request_extraction import get_ctx, while_streaming
+from server.request_extraction import get_ctx, while_streaming, TranslateRequest
 from server.to_json import to_translation, TranslationResponse
 
 app = FastAPI()
@@ -30,14 +29,7 @@
     allow_headers=["*"],
 )
 
-class TranslateRequestForm(BaseModel):
-    """This request can be a multipart or a json request"""
-    image: UploadFile
-    """can be a url, base64 encoded image or a multipart image"""
-    config: str
-    """in case it is a multipart this needs to be a string(json.stringify)"""
-
-@app.post("/register", response_description="no response")
+@app.post("/register", response_description="no response", tags=["internal-api"])
 async def register_instance(instance: ExecutorInstance, req: Request, req_nonce: str = Header(alias="X-Nonce")):
     if req_nonce != nonce:
         raise HTTPException(401, detail="Invalid nonce")
@@ -55,48 +47,85 @@ def transform_to_json(ctx):
 def transform_to_bytes(ctx):
     return to_translation(ctx).to_bytes()
 
-@app.post("/translate/json", response_model=TranslationResponse, response_description="json strucure inspired by the ichigo translator extension")
-async def json(req: Request):
-    ctx = await get_ctx(req)
+@app.post("/translate/json", response_model=TranslationResponse, tags=["api", "json"],response_description="json strucure inspired by the ichigo translator extension")
+async def json(req: Request, data: TranslateRequest):
+    ctx = await get_ctx(req, data.config, data.image)
+    return to_translation(ctx)
+
+@app.post("/translate/bytes", response_class=StreamingResponse, tags=["api", "json"],response_description="custom byte structure for decoding look at examples in 'examples/response.*'")
+async def bytes(req: Request, data: TranslateRequest):
+    ctx = await get_ctx(req, data.config, data.image)
+    return StreamingResponse(content=to_translation(ctx).to_bytes())
+
+@app.post("/translate/image", response_description="the result image", tags=["api", "json"],response_class=StreamingResponse)
+async def image(req: Request, data: TranslateRequest) -> StreamingResponse:
+    ctx = await get_ctx(req, data.config, data.image)
+    img_byte_arr = io.BytesIO()
+    ctx.result.save(img_byte_arr, format="PNG")
+    img_byte_arr.seek(0)
+
+    return StreamingResponse(img_byte_arr, media_type="image/png")
+
+@app.post("/translate/json/stream", response_class=StreamingResponse,tags=["api", "json"], response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_json(req: Request, data: TranslateRequest) -> StreamingResponse:
+    return await while_streaming(req, transform_to_json, data.config, data.image)
+
+@app.post("/translate/bytes/stream", response_class=StreamingResponse, tags=["api", "json"],response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_bytes(req: Request, data: TranslateRequest)-> StreamingResponse:
+    return await while_streaming(req, transform_to_bytes,data.config, data.image)
+
+@app.post("/translate/image/stream", response_class=StreamingResponse, tags=["api", "json"], response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_image(req: Request, data: TranslateRequest) -> StreamingResponse:
+    return await while_streaming(req, transform_to_image, data.config, data.image)
+
+@app.post("/translate/with-form/json", response_model=TranslationResponse, tags=["api", "form"],response_description="json strucure inspired by the ichigo translator extension")
+async def json_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")):
+    img = await image.read()
+    ctx = await get_ctx(req, Config.parse_raw(config), img)
     return to_translation(ctx)
 
-@app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure for decoding look at examples in 'examples/response.*'")
-async def bytes(req: Request):
-    ctx = await get_ctx(req)
+@app.post("/translate/with-form/bytes", response_class=StreamingResponse, tags=["api", "form"],response_description="custom byte structure for decoding look at examples in 'examples/response.*'")
+async def bytes_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")):
+    img = await image.read()
+    ctx = await get_ctx(req, Config.parse_raw(config), img)
     return StreamingResponse(content=to_translation(ctx).to_bytes())
 
-@app.post("/translate/image", response_description="the result image", response_class=StreamingResponse)
-async def image(req: Request) -> StreamingResponse:
-    ctx = await get_ctx(req)
+@app.post("/translate/with-form/image", response_description="the result image", tags=["api", "form"],response_class=StreamingResponse)
+async def image_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")) -> StreamingResponse:
+    img = await image.read()
+    ctx = await get_ctx(req, Config.parse_raw(config), img)
     img_byte_arr = io.BytesIO()
     ctx.result.save(img_byte_arr, format="PNG")
     img_byte_arr.seek(0)
 
     return StreamingResponse(img_byte_arr, media_type="image/png")
 
-@app.post("/translate/json/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
-async def stream_json(req: Request) -> StreamingResponse:
-    return await while_streaming(req, transform_to_json)
+@app.post("/translate/with-form/json/stream", response_class=StreamingResponse, tags=["api", "form"],response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_json_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")) -> StreamingResponse:
+    img = await image.read()
+    return await while_streaming(req, transform_to_json, Config.parse_raw(config), img)
 
-@app.post("/translate/bytes/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
-async def stream_bytes(req: Request)-> StreamingResponse:
-    return await while_streaming(req, transform_to_bytes)
+@app.post("/translate/with-form/bytes/stream", response_class=StreamingResponse,tags=["api", "form"], response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_bytes_form(req: Request, image: UploadFile = File(...), config: str = Form("{}"))-> StreamingResponse:
+    img = await image.read()
+    return await while_streaming(req, transform_to_bytes, Config.parse_raw(config), img)
 
-@app.post("/translate/image/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
-async def stream_image(req: Request) -> StreamingResponse:
-    return await while_streaming(req, transform_to_image)
+@app.post("/translate/with-form/image/stream", response_class=StreamingResponse, tags=["api", "form"], response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_image_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")) -> StreamingResponse:
+    img = await image.read()
+    return await while_streaming(req, transform_to_image, Config.parse_raw(config), img)
 
-@app.post("/queue-size", response_model=int)
+@app.post("/queue-size", response_model=int, tags=["api", "json"])
 async def queue_size() -> int:
     return len(task_queue.queue)
 
-@app.get("/", response_class=HTMLResponse)
+@app.get("/", response_class=HTMLResponse,tags=["ui"])
 async def index() -> HTMLResponse:
     html_file = Path("index.html")
     html_content = html_file.read_text()
     return HTMLResponse(content=html_content)
 
-@app.get("/manual", response_class=HTMLResponse)
+@app.get("/manual", response_class=HTMLResponse, tags=["ui"])
 async def manual():
     html_file = Path("manual.html")
     html_content = html_file.read_text()
@@ -153,7 +182,6 @@ def prepare(args):
 #todo: cache results
 #todo: cleanup cache
 
-#todo: add docs
 #todo: enable config in html pages
 
 if __name__ == '__main__':
diff --git a/server/manual.html b/server/manual.html
index 1335b0c51..2983feb64 100644
--- a/server/manual.html
+++ b/server/manual.html
@@ -72,7 +72,7 @@ <h3 id="status"></h3>
             formData.append('image', file);
 
             try {
-                const response = await fetch( generateImage.checked ? '/translate/image/stream' : '/translate/json/stream', {
+                const response = await fetch( generateImage.checked ? '/translate/with-form/image/stream' : '/translate/with-form/json/stream', {
                     method: 'POST',
                     body: formData,
                     /*headers: {
diff --git a/server/request_extraction.py b/server/request_extraction.py
index decb64bff..54516fa39 100644
--- a/server/request_extraction.py
+++ b/server/request_extraction.py
@@ -9,9 +9,9 @@
 from PIL import Image
 from fastapi import Request, HTTPException
 from pydantic import BaseModel
-from starlette.responses import StreamingResponse
+from fastapi.responses import StreamingResponse
 
-from manga_translator import Config, Context
+from manga_translator import Config
 from server.myqueue import task_queue, wait_in_queue, QueueElement
 from server.streaming import notify, stream
 
@@ -19,7 +19,7 @@ class TranslateRequest(BaseModel):
     """This request can be a multipart or a json request"""
     image: bytes|str
     """can be a url, base64 encoded image or a multipart image"""
-    config: Config
+    config: Config = Config()
     """in case it is a multipart this needs to be a string(json.stringify)"""
 
 async def to_pil_image(image: Union[str, bytes]) -> Image.Image:
@@ -41,39 +41,17 @@ async def to_pil_image(image: Union[str, bytes]) -> Image.Image:
         raise HTTPException(status_code=422, detail=str(e))
 
 
-async def multi_content_type(request: Request):
-    content_type = request.headers.get("content-type")
-    if content_type and content_type.startswith("multipart/form-data"):
-        form = await request.form()
-        config = form.get("config", "{}")
-        image = form.get("image")
-        image_content = await image.read()
-        config = Config.parse_raw(config)
-        return config, image_content
-    elif content_type and content_type.startswith("application/json"):
-        body = await request.json()
-        config = Config(**body.get("config", {}))
-        image = body.get("image")
-        return config, image
-
-    else:
-        raise HTTPException(status_code=400, detail="Unsupported Content-Type")
-
-async def get_ctx(req: Request):
-    data, img = await multi_content_type(req)
-
-    image = await to_pil_image(img)
-    task = QueueElement(req, image, data, 0)
+async def get_ctx(req: Request, config: Config, image: str|bytes):
+    image = await to_pil_image(image)
+    task = QueueElement(req, image, config, 0)
 
     task_queue.add_task(task)
 
     return await wait_in_queue(task, None)
 
-async def while_streaming(req: Request, transform):
-    data, img = await multi_content_type(req)
-
-    image = await to_pil_image(img)
-    task = QueueElement(req, image, data, 0)
+async def while_streaming(req: Request, transform, config: Config, image: bytes | str):
+    image = await to_pil_image(image)
+    task = QueueElement(req, image, config, 0)
     task_queue.add_task(task)
 
     messages = asyncio.Queue()
diff --git a/server/sent_data_internal.py b/server/sent_data_internal.py
index 5720cb6c5..99f8c3730 100644
--- a/server/sent_data_internal.py
+++ b/server/sent_data_internal.py
@@ -1,4 +1,3 @@
-import asyncio
 import pickle
 from typing import Mapping, Optional, Callable
 
diff --git a/server/to_json.py b/server/to_json.py
index f6bcdc6e2..bdf1d4cab 100644
--- a/server/to_json.py
+++ b/server/to_json.py
@@ -1,10 +1,10 @@
 import base64
 import struct
-from typing import Dict, List
+from typing import Dict, List, Annotated
 
 import cv2
 import numpy as np
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, WithJsonSchema
 
 from manga_translator import Context
 from manga_translator.utils import TextBlock
@@ -24,6 +24,10 @@
 #img_rendered: array
 #mask_raw: array
 #mask:array
+NumpyNdarray = Annotated[
+    np.ndarray,
+    WithJsonSchema({'type': 'string', "format": "base64","examples": ["data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA..."]}),
+]
 
 class TextColor(BaseModel):
     fg: tuple[int, int, int]
@@ -39,7 +43,7 @@ class Translation(BaseModel):
     prob: float
     text_color: TextColor
     text: dict[str, str]
-    background: np.ndarray = Field(
+    background: NumpyNdarray = Field(
         ...,
         description="Background image encoded as a base64 string",
         examples=["data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA..."]

From a0525ddfcf70d6460906ccfe52029fd0d8d1c372 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Fri, 22 Nov 2024 15:50:44 +0100
Subject: [PATCH 40/75] convert to normal enum

---
 manga_translator/config.py | 144 ++++++++++++++++++-------------------
 1 file changed, 72 insertions(+), 72 deletions(-)

diff --git a/manga_translator/config.py b/manga_translator/config.py
index 4c34c5d36..adfe367bd 100644
--- a/manga_translator/config.py
+++ b/manga_translator/config.py
@@ -1,6 +1,6 @@
 import argparse
 import re
-from enum import IntEnum
+from enum import Enum
 
 from typing import Optional
 
@@ -56,88 +56,88 @@ def hex2rgb(h):
     h = h.lstrip('#')
     return tuple(int(h[i:i+2], 16) for i in (0, 2, 4))
 
-class Renderer(IntEnum):
-    default = 0
-    manga2Eng = 1
-    none = 2
+class Renderer(str, Enum):
+    default = "default"
+    manga2Eng = "manga2eng"
+    none = "none"
 
-class Alignment(IntEnum):
-    auto = 0
-    left = 1
-    center = 2
-    right = 3
+class Alignment(str, Enum):
+    auto = "auto"
+    left = "left"
+    center = "center"
+    right = "right"
 
-class Direction(IntEnum):
-    auto = 0
-    h = 1
-    v = 2
+class Direction(str, Enum):
+    auto = "auto"
+    h = "horizontal"
+    v = "vertical"
 
-class InpaintPrecision(IntEnum):
-    fp32 = 0
-    fp16 = 1
-    bf16 = 2
+class InpaintPrecision(str, Enum):
+    fp32 = "fp32"
+    fp16 = "fp16"
+    bf16 = "bf16"
 
     def __str__(self):
         return self.name
 
-class Detector(IntEnum):
-    default = 0
-    dbconvnext = 1
-    ctd = 2
-    craft = 3
-    none = 4
-
-class Inpainter(IntEnum):
-    default = 0
-    lama_large = 1
-    lama_mpe = 2
-    sd = 3
-    none = 4
-    original = 5
-
-class Colorizer(IntEnum):
-    none = 0
-    mc2 = 1
-
-class Ocr(IntEnum):
-    ocr32px = 0
-    ocr48px = 1
-    ocr48px_ctc = 2
-    mocr = 3
-
-class Translator(IntEnum):
-    youdao = 0
-    baidu = 1
-    deepl = 2
-    papago = 3
-    caiyun = 4
-    gpt3 = 5
-    gpt3_5 = 6
-    gpt4 = 7
-    none = 8
-    original = 9
-    sakura = 10
-    deepseek = 11
-    groq = 12
-    offline = 13
-    nllb = 14
-    nllb_big = 15
-    sugoi = 16
-    jparacrawl = 17
-    jparacrawl_big = 18
-    m2m100 = 19
-    m2m100_big = 20
-    mbart50 = 21
-    qwen2 = 22
-    qwen2_big = 23
+class Detector(str, Enum):
+    default = "default"
+    dbconvnext = "dbconvnext"
+    ctd = "ctd"
+    craft = "craft"
+    none = "none"
+
+class Inpainter(str, Enum):
+    default = "default"
+    lama_large = "lama_large"
+    lama_mpe = "lama_mpe"
+    sd = "sd"
+    none = "none"
+    original = "original"
+
+class Colorizer(str, Enum):
+    none = "none"
+    mc2 = "mc2"
+
+class Ocr(str, Enum):
+    ocr32px = "32px"
+    ocr48px = "48px"
+    ocr48px_ctc = "48px_ctc"
+    mocr = "mocr"
+
+class Translator(str, Enum):
+    youdao = "youdao"
+    baidu = "baidu"
+    deepl = "deepl"
+    papago = "papago"
+    caiyun = "caiyun"
+    gpt3 = "gpt3"
+    gpt3_5 = "gpt3.5"
+    gpt4 = "gpt4"
+    none = "none"
+    original = "original"
+    sakura = "sakura"
+    deepseek = "deepseek"
+    groq = "groq"
+    offline = "offline"
+    nllb = "nllb"
+    nllb_big = "nllb_big"
+    sugoi = "sugoi"
+    jparacrawl = "jparacrawl"
+    jparacrawl_big = "jparacrawl_big"
+    m2m100 = "m2m100"
+    m2m100_big = "m2m100_big"
+    mbart50 = "mbart50"
+    qwen2 = "qwen2"
+    qwen2_big = "qwen2_big"
 
     def __str__(self):
         return self.name
 
-class Upscaler(IntEnum):
-    waifu2x = 0
-    esrgan = 1
-    upscler4xultrasharp = 2
+class Upscaler(str, Enum):
+    waifu2x = "waifu2x"
+    esrgan = "esrgan"
+    upscler4xultrasharp = "4xultrasharp"
 
 class RenderConfig(BaseModel):
     renderer: Renderer = Renderer.default

From 322ea7893484747878247c8b759e0bb2694a0150 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Fri, 22 Nov 2024 16:08:02 +0100
Subject: [PATCH 41/75] configs

---
 server/index.html  | 43 +++++++++++++++++++++++++++----------------
 server/main.py     |  2 --
 server/manual.html |  9 +++++++++
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/server/index.html b/server/index.html
index fd1a49a11..177885d79 100644
--- a/server/index.html
+++ b/server/index.html
@@ -33,10 +33,10 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
                     <div class="relative">
                         <select class="w-9ch appearance-none bg-transparent border-b border-gray-300"
                                 v-model="detectionResolution">
-                            <option value="S">1024px</option>
-                            <option value="M">1536px</option>
-                            <option value="L">2048px</option>
-                            <option value="X">2560px</option>
+                            <option value="1024">1024px</option>
+                            <option value="1536">1536px</option>
+                            <option value="2048">2048px</option>
+                            <option value="2560">2560px</option>
                         </select>
                         <i class="iconify absolute top-1.5 right-1 pointer-events-none"
                            data-icon="carbon:chevron-down"></i>
@@ -47,7 +47,7 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
                     <div class="relative">
                         <select class="w-9ch appearance-none bg-transparent border-b border-gray-300"
                                 v-model="textDetector">
-                            <option value="auto">Default</option>
+                            <option value="default">Default</option>
                             <option value="ctd">CTD</option>
                         </select>
                         <i class="iconify absolute top-1.5 right-1 pointer-events-none"
@@ -60,8 +60,8 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
                         <select class="w-12ch appearance-none bg-transparent border-b border-gray-300"
                                 v-model="renderTextDirection">
                             <option value="auto">Auto</option>
-                            <option value="h">Horizontal</option>
-                            <option value="v">Vertical</option>
+                            <option value="horizontal">Horizontal</option>
+                            <option value="vertical">Vertical</option>
                         </select>
                         <i class="iconify absolute top-1.5 right-1 pointer-events-none"
                            data-icon="carbon:chevron-down"></i>
@@ -199,13 +199,13 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
         get fileUri() {
             return this.file ? URL.createObjectURL(this.file) : null
         },
-        detectionResolution: 'M',
-        textDetector: 'auto',
+        detectionResolution: '1536',
+        textDetector: 'default',
         renderTextDirection: 'auto',
         translator: 'youdao',
-        validTranslators: ['youdao', 'baidu', 'google', 'deepl', 'papago', 'caiyun', 'offline', 'gpt3.5', 'none'],
+        validTranslators: ['youdao', 'baidu', 'deepl', 'papago', 'caiyun', 'offline', 'gpt3.5', 'none'],
         getTranslatorName(key) {
-            if (key == 'none')
+            if (key === 'none')
                 return "No Text"
             return key ? key[0].toUpperCase() + key.slice(1) : "";
         },
@@ -307,11 +307,22 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
             const formData = new FormData()
             formData.append('image', this.file)
 
-            /*formData.append('size', this.detectionResolution)
-            formData.append('detector', this.textDetector)
-            formData.append('direction', this.renderTextDirection)
-            formData.append('translator', this.translator)
-            formData.append('target_lang', this.targetLanguage)*/
+            const config = `{
+                "detector": {
+                    "detector": "${this.textDetector}",
+                    "detection_size": ${this.detectionResolution}
+                },
+                "render": {
+                    "direction": "${this.renderTextDirection}"
+                },
+                "translator": {
+                    "translator": "${this.translator}",
+                    "target_lang": "${this.targetLanguage}"
+                }
+            }`;
+
+            formData.append('config', config)
+
 
             const processChunk = (value) => {
                 if (this.error) return;
diff --git a/server/main.py b/server/main.py
index 5563f0ce5..20d3d5833 100644
--- a/server/main.py
+++ b/server/main.py
@@ -182,8 +182,6 @@ def prepare(args):
 #todo: cache results
 #todo: cleanup cache
 
-#todo: enable config in html pages
-
 if __name__ == '__main__':
     import uvicorn
     from args import parse_arguments
diff --git a/server/manual.html b/server/manual.html
index 2983feb64..45c801070 100644
--- a/server/manual.html
+++ b/server/manual.html
@@ -15,6 +15,8 @@
     <h1>Upload Image and Translate</h1>
     <input type="file" id="fileInput" accept="image/*">
     <label for="generate-image">generate image</label><input type="checkbox" id="generate-image">
+    <label for="config-json">Config JSON</label><br>
+    <textarea id="config-json" rows="10" cols="50" placeholder='{"translator": {"translator": "none"}}'></textarea><br>
     <h3 id="error"></h3>
     <h3 id="status"></h3>
     <button id="submit-button" onclick="uploadAndTranslate()">Upload and Translate</button>
@@ -55,10 +57,12 @@ <h3 id="status"></h3>
             let submitButton = document.getElementById("submit-button")
             const fileInput = document.getElementById('fileInput');
             const generateImage = document.getElementById('generate-image');
+            const configField = document.getElementById('config-json');
 
             submitButton.classList.add("hidden");
             fileInput.classList.add("hidden");
             generateImage.classList.add("hidden");
+            configField.classList.add("hidden");
             statusField.innerHTML = '';
             errorField.innerHTML = '';
             const file = fileInput.files[0];
@@ -70,6 +74,10 @@ <h3 id="status"></h3>
 
             const formData = new FormData();
             formData.append('image', file);
+            const text = configField.innerText;
+            if (text.length > 2) {
+                formData.append('config', text)
+            }
 
             try {
                 const response = await fetch( generateImage.checked ? '/translate/with-form/image/stream' : '/translate/with-form/json/stream', {
@@ -94,6 +102,7 @@ <h3 id="status"></h3>
                 submitButton.classList.remove("hidden");
                 fileInput.classList.remove("hidden");
                 generateImage.classList.remove("hidden");
+                configField.classList.remove("hidden");
             }
         }
 

From c9d8cbe7e46af15d7f5f7e3526a54f9f6e1dff13 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sat, 23 Nov 2024 13:01:42 +0100
Subject: [PATCH 42/75] fix ctx -> config after merge

---
 manga_translator/manga_translator.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py
index 098b68563..7fa85c7b2 100644
--- a/manga_translator/manga_translator.py
+++ b/manga_translator/manga_translator.py
@@ -421,35 +421,32 @@ async def _run_text_translation(self, config: Config, ctx: Context):
                 # Target language detection  
                 if config.translator.target_lang in ['CHS', 'CHT']:  # Chinese
                     has_target_lang = bool(re.search('[\u4e00-\u9fff]', region.text))
-                elif config.translator.target_lang == 'JPN':  # Japanese
-                if ctx.target_lang in ['CHS', 'CHT']:  # Chinese
-                    has_target_lang = bool(re.search('[\u4e00-\u9fff]', region.text))
                     has_target_lang_in_translation = bool(re.search('[\u4e00-\u9fff]', region.translation))
-                elif ctx.target_lang == 'JPN':  # Japanese
+                elif config.translator.target_lang == 'JPN':  # Japanese
                     has_target_lang = bool(re.search('[\u3040-\u309f\u30a0-\u30ff\u4e00-\u9fff]', region.text))
                 elif config.translator.target_lang == 'KOR':  # Korean
                     has_target_lang_in_translation = bool(re.search('[\u3040-\u309f\u30a0-\u30ff\u4e00-\u9fff]', region.translation))
-                elif ctx.target_lang == 'KOR':  # Korean
+                elif config.translator.target_lang == 'KOR':  # Korean
                     has_target_lang = bool(re.search('[\uac00-\ud7af\u1100-\u11ff]', region.text))
                 elif config.translator.target_lang == 'ARA':  # Arabic
                     has_target_lang_in_translation = bool(re.search('[\uac00-\ud7af\u1100-\u11ff]', region.translation))
-                elif ctx.target_lang == 'ARA':  # Arabic
+                elif config.translator.target_lang == 'ARA':  # Arabic
                     has_target_lang = bool(re.search('[\u0600-\u06ff]', region.text))
                 elif config.translator.target_lang == 'THA':  # Thai
                     has_target_lang_in_translation = bool(re.search('[\u0600-\u06ff]', region.translation))
-                elif ctx.target_lang == 'THA':  # Thai
+                elif config.translator.target_lang == 'THA':  # Thai
                     has_target_lang = bool(re.search('[\u0e00-\u0e7f]', region.text))
                 elif config.translator.target_lang == 'RUS':  # Russian
                     has_target_lang_in_translation = bool(re.search('[\u0e00-\u0e7f]', region.translation))
-                elif ctx.target_lang == 'RUS':  # Russian
+                elif config.translator.target_lang == 'RUS':  # Russian
                     has_target_lang = bool(re.search('[\u0400-\u04ff]', region.text))
                 elif config.translator.target_lang == 'UKR':  # Ukrainian
                     has_target_lang_in_translation = bool(re.search('[\u0400-\u04ff]', region.translation))
-                elif ctx.target_lang == 'UKR':  # Ukrainian
+                elif config.translator.target_lang == 'UKR':  # Ukrainian
                     has_target_lang = bool(re.search('[\u0400-\u04ff]', region.text))
                 elif config.translator.target_lang == 'IND':  # Indonesian
                     has_target_lang_in_translation = bool(re.search('[\u0400-\u04ff]', region.translation))
-                elif ctx.target_lang == 'IND':  # Indonesian
+                elif config.translator.target_lang == 'IND':  # Indonesian
                     has_target_lang = bool(re.search('[A-Za-z]', region.text))
                     has_target_lang_in_translation = bool(re.search('[A-Za-z]', region.translation))
 

From 510db7f418e3c8e5f651269513b7aaf5ee929a92 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Tue, 19 Nov 2024 18:38:34 +0100
Subject: [PATCH 43/75] server

---
 manga_translator/args.py |  2 +-
 server/args.py           |  9 +++++
 server/instance.py       | 66 ++++++++++++++++++++++++++++++++++++
 server/main.py           | 73 ++++++++++++++++++++++++++++++++++++++++
 server/myqueue.py        | 25 ++++++++++++++
 server/sent_data.py      | 50 +++++++++++++++++++++++++++
 6 files changed, 224 insertions(+), 1 deletion(-)
 create mode 100644 server/args.py
 create mode 100644 server/instance.py
 create mode 100644 server/main.py
 create mode 100644 server/myqueue.py
 create mode 100644 server/sent_data.py

diff --git a/manga_translator/args.py b/manga_translator/args.py
index 92e70ff07..ef84073d6 100644
--- a/manga_translator/args.py
+++ b/manga_translator/args.py
@@ -127,7 +127,7 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
 parser_ws.add_argument('--ws-url', default='ws://localhost:5000', type=str, help='Server URL for WebSocket mode')
 
 # API mode
-parser_api = subparsers.add_parser('api', help='Run in API mode')
+parser_api = subparsers.add_parser('shared', help='Run in API mode')
 parser_api.add_argument('--host', default='127.0.0.1', type=str, help='Host for API service')
 parser_api.add_argument('--port', default=5003, type=int, help='Port for API service')
 parser_api.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal API server communication')
diff --git a/server/args.py b/server/args.py
new file mode 100644
index 000000000..ac3d92b33
--- /dev/null
+++ b/server/args.py
@@ -0,0 +1,9 @@
+import argparse
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(description="Specify host and port for the server.")
+    parser.add_argument('--host', type=str, default='127.0.0.1', help='The host address (default: 127.0.0.1)')
+    parser.add_argument('--port', type=int, default=8080, help='The port number (default: 8080)')
+
+    return parser.parse_args()
\ No newline at end of file
diff --git a/server/instance.py b/server/instance.py
new file mode 100644
index 000000000..68260c962
--- /dev/null
+++ b/server/instance.py
@@ -0,0 +1,66 @@
+from asyncio import Event, Lock
+from typing import List, Optional
+
+from PIL import Image
+from pydantic import BaseModel
+
+from manga_translator import Config
+from server.sent_data import fetch_data_stream, NotifyType
+
+
+class ExecutorInstance(BaseModel):
+    ip: str
+    port: int
+    busy: bool = False
+
+    def free_executor(self):
+        self.busy = False
+
+    async def sent(self, image: Image, config: Config, sender: NotifyType):
+        await fetch_data_stream("http://"+self.ip+":"+str(self.port)+"/execute/translate", image, config, sender)
+
+class Executors:
+    def __init__(self):
+        self.list: List[ExecutorInstance] = []
+        self.lock: Lock = Lock()
+        self.event = Event()
+
+    def register(self, instance: ExecutorInstance):
+        self.list.append(instance)
+
+    def free_executors(self) -> int:
+        return len([item for item in self.list if not item.busy])
+
+    async def _find_instance(self):
+        while True:
+            instance = next((x for x in self.list if x.busy == False), None)
+            if instance is not None:
+                return instance
+            #todo: cricial error: warn should never happen
+            await self.event.wait()
+
+    async def find_executor(self) -> ExecutorInstance:
+        async with self.lock:  # Using async with for lock management
+            instance = await self._find_instance()
+            instance.busy = True
+            return instance
+
+    def free_executor(self, instance: ExecutorInstance):
+        instance.free_executor()
+        self.event.set()
+        self.event.clear()
+
+def example_notify(a: int, b: Optional[int] = None) -> None:
+    print(f"Notify called with a={a} and b={b}")
+
+async def main():
+    executor = ExecutorInstance(ip="127.0.0.1", port=5003)
+
+    image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png")
+    config = Config()
+
+    await executor.sent(image, config, example_notify)
+
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(main())
\ No newline at end of file
diff --git a/server/main.py b/server/main.py
new file mode 100644
index 000000000..8da8c2677
--- /dev/null
+++ b/server/main.py
@@ -0,0 +1,73 @@
+import asyncio
+from typing import List, Dict, Optional, Callable
+
+from fastapi import FastAPI, Request, HTTPException
+
+from server.instance import ExecutorInstance, Executors
+from server.myqueue import TaskQueue
+from server.sent_data import NotifyType
+
+app = FastAPI()
+executor_instances: Executors = Executors()
+task_queue = TaskQueue()
+
+@app.post("/register")
+async def register_instance(instance: ExecutorInstance, request: Request):
+    instance.ip = request.client.host
+    executor_instances.register(instance)
+    return {"code": 0}
+
+
+
+async def wait(task, notify: NotifyType):
+    """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned"""
+    while True:
+        queue_pos = task_queue.get_pos(task)
+        if notify:
+            notify(3, queue_pos)
+        if queue_pos < executor_instances.free_executors():
+            instance = await executor_instances.find_executor()
+            task_queue.remove(task)
+            if notify:
+                notify(4, 0)
+            result = "" #todo: implement logic
+            instance.busy = False
+            instance.event.set()
+
+            if notify:
+                return
+            else:
+                return result
+        else:
+            if queue_pos == 0:
+                raise HTTPException(500, detail="No translator registered")
+            await task_queue.wait_for_event()
+@app.post("/json")
+async def json(req: TranslateRequest):
+    pass
+
+@app.post("/bytes")
+async def bytes(req: TranslateRequest):
+    pass
+
+@app.post("/image")
+async def image(req: TranslateRequest):
+    pass
+
+@app.post("/stream_json")
+async def image(req: TranslateRequest):
+    pass
+
+@app.post("/stream_bytes")
+async def image(req: TranslateRequest):
+    pass
+
+@app.post("/stream_image")
+async def image(req: TranslateRequest):
+    pass
+
+if __name__ == '__main__':
+    import uvicorn
+    from args import parse_arguments
+    args = parse_arguments()
+    uvicorn.run(app, host=args.host, port=args.port)
\ No newline at end of file
diff --git a/server/myqueue.py b/server/myqueue.py
new file mode 100644
index 000000000..61c1abac3
--- /dev/null
+++ b/server/myqueue.py
@@ -0,0 +1,25 @@
+import asyncio
+from typing import List, Dict
+
+
+class TaskQueue:
+    def __init__(self):
+        self.queue: List[Dict] = []
+        self.queue_event: asyncio.Event = asyncio.Event()
+
+    def add_task(self, task):
+        self.queue.append(task)
+
+    def get_pos(self, task):
+        return self.queue.index(task)
+
+    def update_event(self):
+        self.queue_event.set()
+        self.queue_event.clear()
+
+    def remove(self, task):
+        self.queue.remove(task)
+        self.update_event()
+
+    async def wait_for_event(self):
+        await self.queue_event.wait()
\ No newline at end of file
diff --git a/server/sent_data.py b/server/sent_data.py
new file mode 100644
index 000000000..619dca71b
--- /dev/null
+++ b/server/sent_data.py
@@ -0,0 +1,50 @@
+import pickle
+from typing import Mapping, Optional, Callable
+
+import aiohttp
+from PIL.Image import Image
+from fastapi import HTTPException
+
+from manga_translator import Config
+
+NotifyType = Optional[Callable[[int, Optional[int]], None]]
+
+async def fetch_data_stream(url, image: Image, config: Config, sender: NotifyType, headers: Mapping[str, str] = {}):
+    attributes = {"image": image, "config": config}
+    data = pickle.dumps(attributes)
+
+    async with aiohttp.ClientSession() as session:
+        async with session.post(url, data=data, headers=headers) as response:
+            if response.status == 200:
+                await process_stream(response, sender)
+            else:
+                raise HTTPException(response.status, detail=response.text())
+
+
+async def process_stream(response, sender: NotifyType):
+    buffer = b''
+
+    async for chunk in response.content.iter_any():
+        if chunk:
+            buffer += chunk
+            await handle_buffer(buffer, sender)
+
+
+async def handle_buffer(buffer, sender: NotifyType):
+    while len(buffer) >= 5:
+        status, expected_size = extract_header(buffer)
+
+        if len(buffer) >= 5 + expected_size:
+            data = buffer[5:5 + expected_size]
+            sender(status, data)
+            buffer = buffer[5 + expected_size:]
+        else:
+            break
+
+
+def extract_header(buffer):
+    """Extract the status and expected size from the buffer."""
+    status = int.from_bytes(buffer[0:1], byteorder='big')
+    expected_size = int.from_bytes(buffer[1:5], byteorder='big')
+    return status, expected_size
+

From 55c59b6c8bccc5d002e335b005e207fb21c91489 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Tue, 19 Nov 2024 19:04:12 +0100
Subject: [PATCH 44/75] python sucks

---
 execute_test.py     |  5 ++++-
 server/instance.py  | 12 ++++++++----
 server/sent_data.py |  7 +++++--
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/execute_test.py b/execute_test.py
index 3a1d0e8af..8705fb2b1 100644
--- a/execute_test.py
+++ b/execute_test.py
@@ -4,6 +4,9 @@
 import requests
 from PIL import Image
 
+from manga_translator import Config
+
+
 async def execute_method(method_name, attributes):
     url = f"http://127.0.0.1:5003/execute/{method_name}"
     headers = {'Content-Type': 'application/octet-stream'}
@@ -39,5 +42,5 @@ async def execute_method(method_name, attributes):
 
 if __name__ == '__main__':
     image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png")
-    attributes = {"image": image, "params": {"translator": "none", "inpainter": "none"}}
+    attributes = {"image": image, "config": Config()}
     asyncio.run(execute_method("translate", attributes))
\ No newline at end of file
diff --git a/server/instance.py b/server/instance.py
index 68260c962..da550613c 100644
--- a/server/instance.py
+++ b/server/instance.py
@@ -1,3 +1,4 @@
+import pickle
 from asyncio import Event, Lock
 from typing import List, Optional
 
@@ -16,7 +17,7 @@ class ExecutorInstance(BaseModel):
     def free_executor(self):
         self.busy = False
 
-    async def sent(self, image: Image, config: Config, sender: NotifyType):
+    async def sent_stream(self, image: Image, config: Config, sender: NotifyType):
         await fetch_data_stream("http://"+self.ip+":"+str(self.port)+"/execute/translate", image, config, sender)
 
 class Executors:
@@ -50,8 +51,11 @@ def free_executor(self, instance: ExecutorInstance):
         self.event.set()
         self.event.clear()
 
-def example_notify(a: int, b: Optional[int] = None) -> None:
-    print(f"Notify called with a={a} and b={b}")
+def example_notify(a: int, b) -> None:
+    if a == 0:
+        print(pickle.loads(b))
+    else:
+        print(f"Notify called with a={a} and b={b}")
 
 async def main():
     executor = ExecutorInstance(ip="127.0.0.1", port=5003)
@@ -59,7 +63,7 @@ async def main():
     image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png")
     config = Config()
 
-    await executor.sent(image, config, example_notify)
+    await executor.sent_stream(image, config, example_notify)
 
 if __name__ == "__main__":
     import asyncio
diff --git a/server/sent_data.py b/server/sent_data.py
index 619dca71b..02053bf80 100644
--- a/server/sent_data.py
+++ b/server/sent_data.py
@@ -1,3 +1,4 @@
+import asyncio
 import pickle
 from typing import Mapping, Optional, Callable
 
@@ -27,10 +28,11 @@ async def process_stream(response, sender: NotifyType):
     async for chunk in response.content.iter_any():
         if chunk:
             buffer += chunk
-            await handle_buffer(buffer, sender)
+            buffer = handle_buffer(buffer, sender)
 
 
-async def handle_buffer(buffer, sender: NotifyType):
+
+def handle_buffer(buffer, sender: NotifyType):
     while len(buffer) >= 5:
         status, expected_size = extract_header(buffer)
 
@@ -40,6 +42,7 @@ async def handle_buffer(buffer, sender: NotifyType):
             buffer = buffer[5 + expected_size:]
         else:
             break
+    return buffer
 
 
 def extract_header(buffer):

From 0d8d5825e8deed320ddc49daf27d4fb9fe1cc62c Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Tue, 19 Nov 2024 22:02:42 +0100
Subject: [PATCH 45/75] api logic

---
 requirements.txt   |   3 +-
 server/args.py     |   2 +-
 server/instance.py |   4 ++
 server/main.py     | 151 ++++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 143 insertions(+), 17 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 25c2e953b..e843bf354 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -51,4 +51,5 @@ accelerate
 bitsandbytes
 uvicorn
 fastapi
-pydantic
\ No newline at end of file
+pydantic
+python-multipart
\ No newline at end of file
diff --git a/server/args.py b/server/args.py
index ac3d92b33..a00e24a22 100644
--- a/server/args.py
+++ b/server/args.py
@@ -4,6 +4,6 @@
 def parse_arguments():
     parser = argparse.ArgumentParser(description="Specify host and port for the server.")
     parser.add_argument('--host', type=str, default='127.0.0.1', help='The host address (default: 127.0.0.1)')
-    parser.add_argument('--port', type=int, default=8080, help='The port number (default: 8080)')
+    parser.add_argument('--port', type=int, default=8000, help='The port number (default: 8080)')
 
     return parser.parse_args()
\ No newline at end of file
diff --git a/server/instance.py b/server/instance.py
index da550613c..0b426b606 100644
--- a/server/instance.py
+++ b/server/instance.py
@@ -7,6 +7,7 @@
 
 from manga_translator import Config
 from server.sent_data import fetch_data_stream, NotifyType
+from fastapi import Response
 
 
 class ExecutorInstance(BaseModel):
@@ -17,6 +18,9 @@ class ExecutorInstance(BaseModel):
     def free_executor(self):
         self.busy = False
 
+    async def sent(self) -> Response:
+        pass
+
     async def sent_stream(self, image: Image, config: Config, sender: NotifyType):
         await fetch_data_stream("http://"+self.ip+":"+str(self.port)+"/execute/translate", image, config, sender)
 
diff --git a/server/main.py b/server/main.py
index 8da8c2677..3c3507312 100644
--- a/server/main.py
+++ b/server/main.py
@@ -1,16 +1,34 @@
 import asyncio
-from typing import List, Dict, Optional, Callable
+import builtins
+import io
+import re
+from base64 import b64decode
+from typing import Union
 
+import requests
+from PIL import Image
 from fastapi import FastAPI, Request, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from starlette.responses import StreamingResponse
 
+from manga_translator import Config
 from server.instance import ExecutorInstance, Executors
 from server.myqueue import TaskQueue
 from server.sent_data import NotifyType
 
 app = FastAPI()
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 executor_instances: Executors = Executors()
 task_queue = TaskQueue()
 
+
 @app.post("/register")
 async def register_instance(instance: ExecutorInstance, request: Request):
     instance.ip = request.client.host
@@ -18,6 +36,44 @@ async def register_instance(instance: ExecutorInstance, request: Request):
     return {"code": 0}
 
 
+async def to_pil_image(image: Union[str, bytes]) -> Image.Image:
+    try:
+        if isinstance(image, builtins.bytes):
+            image = Image.open(io.BytesIO(image))
+            return image
+        else:
+            if re.match(r'^data:image/.+;base64,', image):
+                value = image.split(',', 1)[1]
+                image_data = b64decode(value)
+                image = Image.open(io.BytesIO(image_data))
+                return image
+            else:
+                response = requests.get(image)
+                image = Image.open(io.BytesIO(response.content))
+                return image
+    except Exception as e:
+        raise HTTPException(status_code=422, detail=str(e))
+
+
+async def multi_content_type(request: Request):
+    content_type = request.headers.get("content-type")
+
+    if content_type and content_type.startswith("multipart/form-data"):
+        form = await request.form()
+        config = form.get("config", "{}")
+        image = form.get("image")
+        image_content = await image.read()
+        config = Config.parse_raw(config)
+        return config, image_content
+    elif content_type and content_type.startswith("application/json"):
+        body = await request.json()
+        config = Config(**body.get("config", {}))
+        image = body.get("image")
+        return config, image
+
+    else:
+        raise HTTPException(status_code=400, detail="Unsupported Content-Type")
+
 
 async def wait(task, notify: NotifyType):
     """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned"""
@@ -30,7 +86,11 @@ async def wait(task, notify: NotifyType):
             task_queue.remove(task)
             if notify:
                 notify(4, 0)
-            result = "" #todo: implement logic
+            if notify:
+                await instance.sent_stream(task.image, task.config, notify)
+            else:
+                result = await instance.sent(task.image, task.config)
+
             instance.busy = False
             instance.event.set()
 
@@ -42,32 +102,93 @@ async def wait(task, notify: NotifyType):
             if queue_pos == 0:
                 raise HTTPException(500, detail="No translator registered")
             await task_queue.wait_for_event()
+
+
+async def stream(messages):
+    while True:
+        message = await messages.get()
+        yield message
+        if message[0] == 0 or message[0] == 2:
+            break
+
+def notify(code, data, transform_to_bytes, messages):
+    if code == 0:
+        result_bytes = transform_to_bytes(data)
+        encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes
+        messages.put_nowait(encoded_result)
+    else:
+        result_bytes = str(data).encode("utf-8")
+        encoded_result =code.to_bytes(1, 'big') + len(result_bytes).to_bytes(4, 'big') + result_bytes
+        messages.put_nowait(encoded_result)
+
+def transform_to_image(data):
+    return b""
+
 @app.post("/json")
-async def json(req: TranslateRequest):
-    pass
+async def json(req: Request):
+    data, img = await multi_content_type(req)
+    img = await to_pil_image(img)
+    data = await wait((data, img), None)
 
 @app.post("/bytes")
-async def bytes(req: TranslateRequest):
-    pass
+async def bytes(req: Request):
+    data, img = await multi_content_type(req)
+    img = await to_pil_image(img)
+    data = await wait((data, img), None)
+
 
 @app.post("/image")
-async def image(req: TranslateRequest):
-    pass
+async def image(req: Request):
+    data, img = await multi_content_type(req)
+    img = await to_pil_image(img)
+    data = await wait((data, img), None)
+
 
 @app.post("/stream_json")
-async def image(req: TranslateRequest):
-    pass
+async def stream_json(req: Request):
+    data, img = await multi_content_type(req)
+    img = await to_pil_image(img)
+
+    messages = asyncio.Queue()
+
+    def example_notify(code: int, data) -> None:
+        notify(code, data, transform_to_image, messages)
+
+    streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
+    asyncio.create_task(wait((data, img), example_notify))
+    return streaming_response
 
 @app.post("/stream_bytes")
-async def image(req: TranslateRequest):
-    pass
+async def stream_bytes(req: Request):
+    data, img = await multi_content_type(req)
+    img = await to_pil_image(img)
+
+    messages =  asyncio.Queue()
+
+    def example_notify(code: int, data) -> None:
+        notify(code, data, transform_to_image, messages)
+
+    streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
+    asyncio.create_task(wait((data, img), example_notify))
+    return streaming_response
 
 @app.post("/stream_image")
-async def image(req: TranslateRequest):
-    pass
+async def stream_image(req: Request):
+    data, img = await multi_content_type(req)
+    img = await to_pil_image(img)
+
+    messages =  asyncio.Queue()
+
+    def example_notify(code: int, data) -> None:
+        notify(code, data, transform_to_image, messages)
+
+    streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
+    asyncio.create_task(wait((data, img), example_notify))
+    return streaming_response
 
 if __name__ == '__main__':
     import uvicorn
     from args import parse_arguments
+
     args = parse_arguments()
-    uvicorn.run(app, host=args.host, port=args.port)
\ No newline at end of file
+    uvicorn.run(app, host=args.host, port=args.port)

From bc3f02e367c76dc3612313839280fce9b75929f9 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Tue, 19 Nov 2024 22:52:22 +0100
Subject: [PATCH 46/75] replace api

---
 manga_translator/__main__.py |   4 -
 manga_translator/mode/api.py | 292 -----------------------------------
 server/instance.py           |   9 +-
 server/main.py               |  36 ++++-
 server/sent_data.py          |  10 ++
 server/to_json.py            |  66 ++++++++
 6 files changed, 108 insertions(+), 309 deletions(-)
 delete mode 100644 manga_translator/mode/api.py
 create mode 100644 server/to_json.py

diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py
index db711b2c8..eb348798f 100644
--- a/manga_translator/__main__.py
+++ b/manga_translator/__main__.py
@@ -79,10 +79,6 @@ async def dispatch(args: Namespace):
         translator = MangaTranslatorWS(args_dict)
         await translator.listen(args_dict)
 
-    elif args.mode == 'api':
-        from manga_translator.mode.api import MangaTranslatorAPI
-        translator = MangaTranslatorAPI(args_dict)
-        await translator.listen(args_dict)
     elif args.mode == 'shared':
         translator = MangaShare(args_dict)
         await translator.listen(args_dict)
diff --git a/manga_translator/mode/api.py b/manga_translator/mode/api.py
deleted file mode 100644
index fc71d32e3..000000000
--- a/manga_translator/mode/api.py
+++ /dev/null
@@ -1,292 +0,0 @@
-# Experimental. May be replaced by a refactored server/web_main.py in the future.
-import asyncio
-import base64
-import io
-
-import cv2
-import numpy as np
-from PIL import Image
-from aiohttp import web
-from aiohttp.web_middlewares import middleware
-from marshmallow import fields, Schema, ValidationError
-
-from manga_translator import MangaTranslator, Context, TranslationInterrupt, logger
-from manga_translator.args import translator_chain
-from manga_translator.detection import DETECTORS
-from manga_translator.inpainting import INPAINTERS
-from manga_translator.manga_translator import _preprocess_params
-from manga_translator.ocr import OCRS
-from manga_translator.translators import VALID_LANGUAGES, TRANSLATORS
-from manga_translator.upscaling import UPSCALERS
-
-
-class MangaTranslatorAPI(MangaTranslator):
-    def __init__(self, params: dict = None):
-        import nest_asyncio
-        nest_asyncio.apply()
-        super().__init__(params)
-        self.host = params.get('host', '127.0.0.1')
-        self.port = params.get('port', '5003')
-        self.log_web = params.get('log_web', False)
-        self.ignore_errors = params.get('ignore_errors', True)
-        self._task_id = None
-        self._params = None
-        self.params = params
-        self.queue = []
-
-    async def wait_queue(self, id: int):
-        while self.queue[0] != id:
-            await asyncio.sleep(0.05)
-
-    def remove_from_queue(self, id: int):
-        self.queue.remove(id)
-
-    def generate_id(self):
-        try:
-            x = max(self.queue)
-        except:
-            x = 0
-        return x + 1
-
-    def middleware_factory(self):
-        @middleware
-        async def sample_middleware(request, handler):
-            id = self.generate_id()
-            self.queue.append(id)
-            try:
-                await self.wait_queue(id)
-            except Exception as e:
-                print(e)
-            try:
-                # todo make cancellable
-                response = await handler(request)
-            except:
-                response = web.json_response({'error': "Internal Server Error", 'status': 500},
-                                             status=500)
-            # Handle cases where a user leaves the queue, request fails, or is completed
-            try:
-                self.remove_from_queue(id)
-            except Exception as e:
-                print(e)
-            return response
-
-        return sample_middleware
-
-    async def get_file(self, image, base64Images, url) -> Image:
-        if image is not None:
-            content = image.file.read()
-        elif base64Images is not None:
-            base64Images = base64Images
-            if base64Images.__contains__('base64,'):
-                base64Images = base64Images.split('base64,')[1]
-            content = base64.b64decode(base64Images)
-        elif url is not None:
-            from aiohttp import ClientSession
-            async with ClientSession() as session:
-                async with session.get(url) as resp:
-                    if resp.status == 200:
-                        content = await resp.read()
-                    else:
-                        return web.json_response({'status': 'error'})
-        else:
-            raise ValidationError("donest exist")
-        img = Image.open(io.BytesIO(content))
-
-        img.verify()
-        img = Image.open(io.BytesIO(content))
-        if img.width * img.height > 8000 ** 2:
-            raise ValidationError("to large")
-        return img
-
-    async def listen(self, translation_params: dict = None):
-        self.params = translation_params
-        app = web.Application(client_max_size=1024 * 1024 * 50, middlewares=[self.middleware_factory()])
-
-        routes = web.RouteTableDef()
-        run_until_state = ''
-
-        async def hook(state, finished):
-            if run_until_state and run_until_state == state and not finished:
-                raise TranslationInterrupt()
-
-        self.add_progress_hook(hook)
-
-        @routes.post("/get_text")
-        async def text_api(req):
-            nonlocal run_until_state
-            run_until_state = 'translating'
-            return await self.err_handling(self.run_translate, req, self.format_translate)
-
-        @routes.post("/translate")
-        async def translate_api(req):
-            nonlocal run_until_state
-            run_until_state = 'after-translating'
-            return await self.err_handling(self.run_translate, req, self.format_translate)
-
-        @routes.post("/inpaint_translate")
-        async def inpaint_translate_api(req):
-            nonlocal run_until_state
-            run_until_state = 'rendering'
-            return await self.err_handling(self.run_translate, req, self.format_translate)
-
-        @routes.post("/colorize_translate")
-        async def colorize_translate_api(req):
-            nonlocal run_until_state
-            run_until_state = 'rendering'
-            return await self.err_handling(self.run_translate, req, self.format_translate, True)
-
-        # #@routes.post("/file")
-        # async def file_api(req):
-        #     #TODO: return file
-        #     return await self.err_handling(self.file_exec, req, None)
-
-        app.add_routes(routes)
-        web.run_app(app, host=self.host, port=self.port)
-
-    async def run_translate(self, translation_params, img):
-        return await self.translate(img, translation_params)
-
-    async def err_handling(self, func, req, format, ri=False):
-        try:
-            if req.content_type == 'application/json' or req.content_type == 'multipart/form-data':
-                if req.content_type == 'application/json':
-                    d = await req.json()
-                else:
-                    d = await req.post()
-                schema = self.PostSchema()
-                data = schema.load(d)
-                if 'translator_chain' in data:
-                    data['translator_chain'] = translator_chain(data['translator_chain'])
-                if 'selective_translation' in data:
-                    data['selective_translation'] = translator_chain(data['selective_translation'])
-                ctx = Context(**dict(self.params, **data))
-                _preprocess_params(ctx)
-                if data.get('image') is None and data.get('base64Images') is None and data.get('url') is None:
-                    return web.json_response({'error': "Missing input", 'status': 422})
-                fil = await self.get_file(data.get('image'), data.get('base64Images'), data.get('url'))
-                if 'image' in data:
-                    del data['image']
-                if 'base64Images' in data:
-                    del data['base64Images']
-                if 'url' in data:
-                    del data['url']
-                attempts = 0
-                while ctx.attempts == -1 or attempts <= ctx.attempts:
-                    if attempts > 0:
-                        logger.info(f'Retrying translation! Attempt {attempts}' + (
-                            f' of {ctx.attempts}' if ctx.attempts != -1 else ''))
-                    try:
-                        await func(ctx, fil)
-                        break
-                    except TranslationInterrupt:
-                        break
-                    except Exception as e:
-                        print(e)
-                    attempts += 1
-                if ctx.attempts != -1 and attempts > ctx.attempts:
-                    return web.json_response({'error': "Internal Server Error", 'status': 500},
-                                             status=500)
-                try:
-                    return format(ctx, ri)
-                except Exception as e:
-                    print(e)
-                    return web.json_response({'error': "Failed to format", 'status': 500},
-                                             status=500)
-            else:
-                return web.json_response({'error': "Wrong content type: " + req.content_type, 'status': 415},
-                                         status=415)
-        except ValueError as e:
-            print(e)
-            return web.json_response({'error': "Wrong input type", 'status': 422}, status=422)
-
-        except ValidationError as e:
-            print(e)
-            return web.json_response({'error': "Input invalid", 'status': 422}, status=422)
-
-    def format_translate(self, ctx: Context, return_image: bool):
-        text_regions = ctx.text_regions
-        inpaint = ctx.img_inpainted
-        results = []
-        if 'overlay_ext' in ctx:
-            overlay_ext = ctx['overlay_ext']
-        else:
-            overlay_ext = 'jpg'
-        for i, blk in enumerate(text_regions):
-            minX, minY, maxX, maxY = blk.xyxy
-            if 'translations' in ctx:
-                trans = {key: value[i] for key, value in ctx['translations'].items()}
-            else:
-                trans = {}
-            trans["originalText"] = text_regions[i].text
-            if inpaint is not None:
-                overlay = inpaint[minY:maxY, minX:maxX]
-
-                retval, buffer = cv2.imencode('.' + overlay_ext, overlay)
-                jpg_as_text = base64.b64encode(buffer)
-                background = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8")
-            else:
-                background = None
-            text_region = text_regions[i]
-            text_region.adjust_bg_color = False
-            color1, color2 = text_region.get_font_colors()
-
-            results.append({
-                'text': trans,
-                'minX': int(minX),
-                'minY': int(minY),
-                'maxX': int(maxX),
-                'maxY': int(maxY),
-                'textColor': {
-                    'fg': color1.tolist(),
-                    'bg': color2.tolist()
-                },
-                'language': text_regions[i].source_lang,
-                'background': background
-            })
-        if return_image and ctx.img_colorized is not None:
-            retval, buffer = cv2.imencode('.' + overlay_ext, np.array(ctx.img_colorized))
-            jpg_as_text = base64.b64encode(buffer)
-            img = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8")
-        else:
-            img = None
-        return web.json_response({'details': results, 'img': img})
-
-    class PostSchema(Schema):
-        target_lang = fields.Str(required=False, validate=lambda a: a.upper() in VALID_LANGUAGES)
-        detector = fields.Str(required=False, validate=lambda a: a.lower() in DETECTORS)
-        ocr = fields.Str(required=False, validate=lambda a: a.lower() in OCRS)
-        inpainter = fields.Str(required=False, validate=lambda a: a.lower() in INPAINTERS)
-        upscaler = fields.Str(required=False, validate=lambda a: a.lower() in UPSCALERS)
-        translator = fields.Str(required=False, validate=lambda a: a.lower() in TRANSLATORS)
-        direction = fields.Str(required=False, validate=lambda a: a.lower() in {'auto', 'h', 'v'})
-        skip_language = fields.Str(required=False)
-        upscale_ratio = fields.Integer(required=False)
-        translator_chain = fields.Str(required=False)
-        selective_translation = fields.Str(required=False)
-        attempts = fields.Integer(required=False)
-        detection_size = fields.Integer(required=False)
-        text_threshold = fields.Float(required=False)
-        box_threshold = fields.Float(required=False)
-        unclip_ratio = fields.Float(required=False)
-        inpainting_size = fields.Integer(required=False)
-        det_rotate = fields.Bool(required=False)
-        det_auto_rotate = fields.Bool(required=False)
-        det_invert = fields.Bool(required=False)
-        det_gamma_correct = fields.Bool(required=False)
-        min_text_length = fields.Integer(required=False)
-        colorization_size = fields.Integer(required=False)
-        denoise_sigma = fields.Integer(required=False)
-        mask_dilation_offset = fields.Integer(required=False)
-        ignore_bubble = fields.Integer(required=False)
-        gpt_config = fields.String(required=False)
-        filter_text = fields.String(required=False)
-
-        # api specific
-        overlay_ext = fields.Str(required=False)
-        base64Images = fields.Raw(required=False)
-        image = fields.Raw(required=False)
-        url = fields.Raw(required=False)
-
-        # no functionality except preventing errors when given
-        fingerprint = fields.Raw(required=False)
-        clientUuid = fields.Raw(required=False)
\ No newline at end of file
diff --git a/server/instance.py b/server/instance.py
index 0b426b606..c27a0c415 100644
--- a/server/instance.py
+++ b/server/instance.py
@@ -6,8 +6,7 @@
 from pydantic import BaseModel
 
 from manga_translator import Config
-from server.sent_data import fetch_data_stream, NotifyType
-from fastapi import Response
+from server.sent_data import fetch_data_stream, NotifyType, fetch_data
 
 
 class ExecutorInstance(BaseModel):
@@ -18,15 +17,15 @@ class ExecutorInstance(BaseModel):
     def free_executor(self):
         self.busy = False
 
-    async def sent(self) -> Response:
-        pass
+    async def sent(self, image: Image, config: Config):
+        return await fetch_data("http://"+self.ip+":"+str(self.port)+"/simple_execute/translate", image, config)
 
     async def sent_stream(self, image: Image, config: Config, sender: NotifyType):
         await fetch_data_stream("http://"+self.ip+":"+str(self.port)+"/execute/translate", image, config, sender)
 
 class Executors:
     def __init__(self):
-        self.list: List[ExecutorInstance] = []
+        self.list: List[ExecutorInstance] = [ExecutorInstance(ip="127.0.0.1", port=5003)]
         self.lock: Lock = Lock()
         self.event = Event()
 
diff --git a/server/main.py b/server/main.py
index 3c3507312..cda01c0a9 100644
--- a/server/main.py
+++ b/server/main.py
@@ -9,12 +9,13 @@
 from PIL import Image
 from fastapi import FastAPI, Request, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
-from starlette.responses import StreamingResponse
+from starlette.responses import StreamingResponse, JSONResponse
 
-from manga_translator import Config
+from manga_translator import Config, Context
 from server.instance import ExecutorInstance, Executors
 from server.myqueue import TaskQueue
 from server.sent_data import NotifyType
+from server.to_json import to_json
 
 app = FastAPI()
 
@@ -91,8 +92,7 @@ async def wait(task, notify: NotifyType):
             else:
                 result = await instance.sent(task.image, task.config)
 
-            instance.busy = False
-            instance.event.set()
+            executor_instances.free_executor(instance)
 
             if notify:
                 return
@@ -127,22 +127,42 @@ def transform_to_image(data):
 @app.post("/json")
 async def json(req: Request):
     data, img = await multi_content_type(req)
-    img = await to_pil_image(img)
-    data = await wait((data, img), None)
+    ctx = Context()
+
+    ctx.image = await to_pil_image(img)
+    ctx.config = data
+    task_queue.add_task(ctx)
+
+    data = await wait(ctx, None)
+    json = to_json(data)
+    return JSONResponse(content=json)
 
 @app.post("/bytes")
 async def bytes(req: Request):
     data, img = await multi_content_type(req)
-    img = await to_pil_image(img)
+    ctx = Context()
+
+    ctx.image = await to_pil_image(img)
+    ctx.config = data
+    task_queue.add_task(ctx)
     data = await wait((data, img), None)
 
 
 @app.post("/image")
 async def image(req: Request):
     data, img = await multi_content_type(req)
-    img = await to_pil_image(img)
+    ctx = Context()
+
+    ctx.image = await to_pil_image(img)
+    ctx.config = data
+    task_queue.add_task(ctx)
+
     data = await wait((data, img), None)
+    img_byte_arr = io.BytesIO()
+    data.result.save(img_byte_arr, format="PNG")
+    img_byte_arr.seek(0)
 
+    return StreamingResponse(img_byte_arr, media_type="image/png")
 
 @app.post("/stream_json")
 async def stream_json(req: Request):
diff --git a/server/sent_data.py b/server/sent_data.py
index 02053bf80..2945cc9f7 100644
--- a/server/sent_data.py
+++ b/server/sent_data.py
@@ -21,6 +21,16 @@ async def fetch_data_stream(url, image: Image, config: Config, sender: NotifyTyp
             else:
                 raise HTTPException(response.status, detail=response.text())
 
+async def fetch_data(url, image: Image, config: Config, headers: Mapping[str, str] = {}):
+    attributes = {"image": image, "config": config}
+    data = pickle.dumps(attributes)
+
+    async with aiohttp.ClientSession() as session:
+        async with session.post(url, data=data, headers=headers) as response:
+            if response.status == 200:
+                return pickle.loads(await response.read())
+            else:
+                raise HTTPException(response.status, detail=response.text())
 
 async def process_stream(response, sender: NotifyType):
     buffer = b''
diff --git a/server/to_json.py b/server/to_json.py
new file mode 100644
index 000000000..356e74104
--- /dev/null
+++ b/server/to_json.py
@@ -0,0 +1,66 @@
+import base64
+from typing import Dict, List
+
+import cv2
+
+from manga_translator import Context
+from manga_translator.utils import TextBlock
+
+
+#input:PIL,
+#result:PIL
+#img_colorized: PIL
+#upscaled:PIL
+#img_rgb:array
+#img_alpha:None
+#textlines:list[Quadrilateral]
+#text_regions:list[TextBlock]
+#translations: map[str, arr[str]]
+#img_inpainted: array
+#gimp_mask:array
+#img_rendered: array
+#mask_raw: array
+#mask:array
+def to_json(ctx: Context):
+    text_regions:list[TextBlock] = ctx.text_regions
+    inpaint = ctx.img_inpainted
+    translations:Dict[str, List[str]] = ctx.translations
+    results = []
+    if 'overlay_ext' in ctx:
+        #todo: unreachable
+        overlay_ext = ctx['overlay_ext']
+    else:
+        overlay_ext = 'jpg'
+    for i, blk in enumerate(text_regions):
+        minX, minY, maxX, maxY = blk.xyxy
+        if 'translations' in ctx:
+            trans = {key: value[i] for key, value in translations.items()}
+        else:
+            trans = {}
+        trans["originalText"] = text_regions[i].text
+        if inpaint is not None:
+            overlay = inpaint[minY:maxY, minX:maxX]
+
+            retval, buffer = cv2.imencode('.' + overlay_ext, overlay)
+            jpg_as_text = base64.b64encode(buffer)
+            background = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8")
+        else:
+            background = None
+        text_region = text_regions[i]
+        text_region.adjust_bg_color = False
+        color1, color2 = text_region.get_font_colors()
+
+        results.append({
+            'text': trans,
+            'minX': int(minX),
+            'minY': int(minY),
+            'maxX': int(maxX),
+            'maxY': int(maxY),
+            'textColor': {
+                'fg': color1.tolist(),
+                'bg': color2.tolist()
+            },
+            'language': text_regions[i].source_lang,
+            'background': background
+        })
+    return results
\ No newline at end of file

From 536859e1198b1aa2725780653f24123d0f2db991 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Tue, 19 Nov 2024 23:03:20 +0100
Subject: [PATCH 47/75] server refactor

---
 server/instance.py                            |  19 +--
 server/main.py                                | 140 ++----------------
 server/myqueue.py                             |  35 ++++-
 server/request_extraction.py                  |  61 ++++++++
 .../{sent_data.py => sent_data_internal.py}   |   0
 server/streaming.py                           |  16 ++
 6 files changed, 126 insertions(+), 145 deletions(-)
 create mode 100644 server/request_extraction.py
 rename server/{sent_data.py => sent_data_internal.py} (100%)
 create mode 100644 server/streaming.py

diff --git a/server/instance.py b/server/instance.py
index c27a0c415..7c02e695f 100644
--- a/server/instance.py
+++ b/server/instance.py
@@ -6,7 +6,7 @@
 from pydantic import BaseModel
 
 from manga_translator import Config
-from server.sent_data import fetch_data_stream, NotifyType, fetch_data
+from server.sent_data_internal import fetch_data_stream, NotifyType, fetch_data
 
 
 class ExecutorInstance(BaseModel):
@@ -54,20 +54,5 @@ def free_executor(self, instance: ExecutorInstance):
         self.event.set()
         self.event.clear()
 
-def example_notify(a: int, b) -> None:
-    if a == 0:
-        print(pickle.loads(b))
-    else:
-        print(f"Notify called with a={a} and b={b}")
 
-async def main():
-    executor = ExecutorInstance(ip="127.0.0.1", port=5003)
-
-    image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png")
-    config = Config()
-
-    await executor.sent_stream(image, config, example_notify)
-
-if __name__ == "__main__":
-    import asyncio
-    asyncio.run(main())
\ No newline at end of file
+executor_instances: Executors = Executors()
diff --git a/server/main.py b/server/main.py
index cda01c0a9..257be7cf5 100644
--- a/server/main.py
+++ b/server/main.py
@@ -1,20 +1,14 @@
 import asyncio
-import builtins
 import io
-import re
-from base64 import b64decode
-from typing import Union
 
-import requests
-from PIL import Image
-from fastapi import FastAPI, Request, HTTPException
+from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
 from starlette.responses import StreamingResponse, JSONResponse
 
-from manga_translator import Config, Context
-from server.instance import ExecutorInstance, Executors
-from server.myqueue import TaskQueue
-from server.sent_data import NotifyType
+from server.instance import ExecutorInstance, executor_instances
+from server.myqueue import wait_in_queue
+from server.request_extraction import multi_content_type, to_pil_image, get_ctx
+from server.streaming import notify, stream
 from server.to_json import to_json
 
 app = FastAPI()
@@ -26,9 +20,6 @@
     allow_methods=["*"],
     allow_headers=["*"],
 )
-executor_instances: Executors = Executors()
-task_queue = TaskQueue()
-
 
 @app.post("/register")
 async def register_instance(instance: ExecutorInstance, request: Request):
@@ -36,130 +27,25 @@ async def register_instance(instance: ExecutorInstance, request: Request):
     executor_instances.register(instance)
     return {"code": 0}
 
-
-async def to_pil_image(image: Union[str, bytes]) -> Image.Image:
-    try:
-        if isinstance(image, builtins.bytes):
-            image = Image.open(io.BytesIO(image))
-            return image
-        else:
-            if re.match(r'^data:image/.+;base64,', image):
-                value = image.split(',', 1)[1]
-                image_data = b64decode(value)
-                image = Image.open(io.BytesIO(image_data))
-                return image
-            else:
-                response = requests.get(image)
-                image = Image.open(io.BytesIO(response.content))
-                return image
-    except Exception as e:
-        raise HTTPException(status_code=422, detail=str(e))
-
-
-async def multi_content_type(request: Request):
-    content_type = request.headers.get("content-type")
-
-    if content_type and content_type.startswith("multipart/form-data"):
-        form = await request.form()
-        config = form.get("config", "{}")
-        image = form.get("image")
-        image_content = await image.read()
-        config = Config.parse_raw(config)
-        return config, image_content
-    elif content_type and content_type.startswith("application/json"):
-        body = await request.json()
-        config = Config(**body.get("config", {}))
-        image = body.get("image")
-        return config, image
-
-    else:
-        raise HTTPException(status_code=400, detail="Unsupported Content-Type")
-
-
-async def wait(task, notify: NotifyType):
-    """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned"""
-    while True:
-        queue_pos = task_queue.get_pos(task)
-        if notify:
-            notify(3, queue_pos)
-        if queue_pos < executor_instances.free_executors():
-            instance = await executor_instances.find_executor()
-            task_queue.remove(task)
-            if notify:
-                notify(4, 0)
-            if notify:
-                await instance.sent_stream(task.image, task.config, notify)
-            else:
-                result = await instance.sent(task.image, task.config)
-
-            executor_instances.free_executor(instance)
-
-            if notify:
-                return
-            else:
-                return result
-        else:
-            if queue_pos == 0:
-                raise HTTPException(500, detail="No translator registered")
-            await task_queue.wait_for_event()
-
-
-async def stream(messages):
-    while True:
-        message = await messages.get()
-        yield message
-        if message[0] == 0 or message[0] == 2:
-            break
-
-def notify(code, data, transform_to_bytes, messages):
-    if code == 0:
-        result_bytes = transform_to_bytes(data)
-        encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes
-        messages.put_nowait(encoded_result)
-    else:
-        result_bytes = str(data).encode("utf-8")
-        encoded_result =code.to_bytes(1, 'big') + len(result_bytes).to_bytes(4, 'big') + result_bytes
-        messages.put_nowait(encoded_result)
-
 def transform_to_image(data):
     return b""
 
 @app.post("/json")
 async def json(req: Request):
-    data, img = await multi_content_type(req)
-    ctx = Context()
-
-    ctx.image = await to_pil_image(img)
-    ctx.config = data
-    task_queue.add_task(ctx)
-
-    data = await wait(ctx, None)
-    json = to_json(data)
+    ctx = await get_ctx(req)
+    json = to_json(ctx)
     return JSONResponse(content=json)
 
 @app.post("/bytes")
 async def bytes(req: Request):
-    data, img = await multi_content_type(req)
-    ctx = Context()
-
-    ctx.image = await to_pil_image(img)
-    ctx.config = data
-    task_queue.add_task(ctx)
-    data = await wait((data, img), None)
+    ctx = await get_ctx(req)
 
 
 @app.post("/image")
 async def image(req: Request):
-    data, img = await multi_content_type(req)
-    ctx = Context()
-
-    ctx.image = await to_pil_image(img)
-    ctx.config = data
-    task_queue.add_task(ctx)
-
-    data = await wait((data, img), None)
+    ctx = await get_ctx(req)
     img_byte_arr = io.BytesIO()
-    data.result.save(img_byte_arr, format="PNG")
+    ctx.result.save(img_byte_arr, format="PNG")
     img_byte_arr.seek(0)
 
     return StreamingResponse(img_byte_arr, media_type="image/png")
@@ -175,7 +61,7 @@ def example_notify(code: int, data) -> None:
         notify(code, data, transform_to_image, messages)
 
     streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
-    asyncio.create_task(wait((data, img), example_notify))
+    asyncio.create_task(wait_in_queue((data, img), example_notify))
     return streaming_response
 
 @app.post("/stream_bytes")
@@ -189,7 +75,7 @@ def example_notify(code: int, data) -> None:
         notify(code, data, transform_to_image, messages)
 
     streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
-    asyncio.create_task(wait((data, img), example_notify))
+    asyncio.create_task(wait_in_queue((data, img), example_notify))
     return streaming_response
 
 @app.post("/stream_image")
@@ -203,7 +89,7 @@ def example_notify(code: int, data) -> None:
         notify(code, data, transform_to_image, messages)
 
     streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
-    asyncio.create_task(wait((data, img), example_notify))
+    asyncio.create_task(wait_in_queue((data, img), example_notify))
     return streaming_response
 
 if __name__ == '__main__':
diff --git a/server/myqueue.py b/server/myqueue.py
index 61c1abac3..cb11db50d 100644
--- a/server/myqueue.py
+++ b/server/myqueue.py
@@ -1,6 +1,10 @@
 import asyncio
 from typing import List, Dict
 
+from fastapi import HTTPException
+
+from server.instance import executor_instances
+from server.sent_data_internal import NotifyType
 
 class TaskQueue:
     def __init__(self):
@@ -22,4 +26,33 @@ def remove(self, task):
         self.update_event()
 
     async def wait_for_event(self):
-        await self.queue_event.wait()
\ No newline at end of file
+        await self.queue_event.wait()
+
+task_queue = TaskQueue()
+
+async def wait_in_queue(task, notify: NotifyType):
+    """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned"""
+    while True:
+        queue_pos = task_queue.get_pos(task)
+        if notify:
+            notify(3, queue_pos)
+        if queue_pos < executor_instances.free_executors():
+            instance = await executor_instances.find_executor()
+            task_queue.remove(task)
+            if notify:
+                notify(4, 0)
+            if notify:
+                await instance.sent_stream(task.image, task.config, notify)
+            else:
+                result = await instance.sent(task.image, task.config)
+
+            executor_instances.free_executor(instance)
+
+            if notify:
+                return
+            else:
+                return result
+        else:
+            if queue_pos == 0:
+                raise HTTPException(500, detail="No translator registered")
+            await task_queue.wait_for_event()
\ No newline at end of file
diff --git a/server/request_extraction.py b/server/request_extraction.py
new file mode 100644
index 000000000..559127760
--- /dev/null
+++ b/server/request_extraction.py
@@ -0,0 +1,61 @@
+import builtins
+import io
+import re
+from base64 import b64decode
+from typing import Union
+
+import requests
+from PIL import Image
+from fastapi import Request, HTTPException
+
+from manga_translator import Config, Context
+from server.myqueue import task_queue, wait_in_queue
+
+
+async def to_pil_image(image: Union[str, bytes]) -> Image.Image:
+    try:
+        if isinstance(image, builtins.bytes):
+            image = Image.open(io.BytesIO(image))
+            return image
+        else:
+            if re.match(r'^data:image/.+;base64,', image):
+                value = image.split(',', 1)[1]
+                image_data = b64decode(value)
+                image = Image.open(io.BytesIO(image_data))
+                return image
+            else:
+                response = requests.get(image)
+                image = Image.open(io.BytesIO(response.content))
+                return image
+    except Exception as e:
+        raise HTTPException(status_code=422, detail=str(e))
+
+
+async def multi_content_type(request: Request):
+    content_type = request.headers.get("content-type")
+
+    if content_type and content_type.startswith("multipart/form-data"):
+        form = await request.form()
+        config = form.get("config", "{}")
+        image = form.get("image")
+        image_content = await image.read()
+        config = Config.parse_raw(config)
+        return config, image_content
+    elif content_type and content_type.startswith("application/json"):
+        body = await request.json()
+        config = Config(**body.get("config", {}))
+        image = body.get("image")
+        return config, image
+
+    else:
+        raise HTTPException(status_code=400, detail="Unsupported Content-Type")
+
+async def get_ctx(req: Request):
+    data, img = await multi_content_type(req)
+    ctx = Context()
+
+    ctx.image = await to_pil_image(img)
+    ctx.config = data
+    task_queue.add_task(ctx)
+
+    data = await wait_in_queue(ctx, None)
\ No newline at end of file
diff --git a/server/sent_data.py b/server/sent_data_internal.py
similarity index 100%
rename from server/sent_data.py
rename to server/sent_data_internal.py
diff --git a/server/streaming.py b/server/streaming.py
new file mode 100644
index 000000000..809d58e05
--- /dev/null
+++ b/server/streaming.py
@@ -0,0 +1,16 @@
+async def stream(messages):
+    while True:
+        message = await messages.get()
+        yield message
+        if message[0] == 0 or message[0] == 2:
+            break
+
+def notify(code, data, transform_to_bytes, messages):
+    if code == 0:
+        result_bytes = transform_to_bytes(data)
+        encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes
+        messages.put_nowait(encoded_result)
+    else:
+        result_bytes = str(data).encode("utf-8")
+        encoded_result =code.to_bytes(1, 'big') + len(result_bytes).to_bytes(4, 'big') + result_bytes
+        messages.put_nowait(encoded_result)
\ No newline at end of file

From 4a8ec9915108c7623b428c8081ca40822cb2f694 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Tue, 19 Nov 2024 23:11:07 +0100
Subject: [PATCH 48/75] stream update

---
 server/main.py               | 48 ++++++------------------------------
 server/request_extraction.py | 22 ++++++++++++++++-
 server/streaming.py          |  4 ++-
 3 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/server/main.py b/server/main.py
index 257be7cf5..665b61af6 100644
--- a/server/main.py
+++ b/server/main.py
@@ -1,4 +1,3 @@
-import asyncio
 import io
 
 from fastapi import FastAPI, Request
@@ -6,9 +5,7 @@
 from starlette.responses import StreamingResponse, JSONResponse
 
 from server.instance import ExecutorInstance, executor_instances
-from server.myqueue import wait_in_queue
-from server.request_extraction import multi_content_type, to_pil_image, get_ctx
-from server.streaming import notify, stream
+from server.request_extraction import get_ctx, while_streaming
 from server.to_json import to_json
 
 app = FastAPI()
@@ -27,8 +24,10 @@ async def register_instance(instance: ExecutorInstance, request: Request):
     executor_instances.register(instance)
     return {"code": 0}
 
-def transform_to_image(data):
-    return b""
+def transform_to_image(ctx):
+    img_byte_arr = io.BytesIO()
+    ctx.result.save(img_byte_arr, format="PNG")
+    return img_byte_arr.getvalue()
 
 @app.post("/json")
 async def json(req: Request):
@@ -40,7 +39,6 @@ async def json(req: Request):
 async def bytes(req: Request):
     ctx = await get_ctx(req)
 
-
 @app.post("/image")
 async def image(req: Request):
     ctx = await get_ctx(req)
@@ -52,45 +50,15 @@ async def image(req: Request):
 
 @app.post("/stream_json")
 async def stream_json(req: Request):
-    data, img = await multi_content_type(req)
-    img = await to_pil_image(img)
-
-    messages = asyncio.Queue()
-
-    def example_notify(code: int, data) -> None:
-        notify(code, data, transform_to_image, messages)
-
-    streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
-    asyncio.create_task(wait_in_queue((data, img), example_notify))
-    return streaming_response
+    return while_streaming(req, transform_to_image)
 
 @app.post("/stream_bytes")
 async def stream_bytes(req: Request):
-    data, img = await multi_content_type(req)
-    img = await to_pil_image(img)
-
-    messages =  asyncio.Queue()
-
-    def example_notify(code: int, data) -> None:
-        notify(code, data, transform_to_image, messages)
-
-    streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
-    asyncio.create_task(wait_in_queue((data, img), example_notify))
-    return streaming_response
+    return while_streaming(req, transform_to_image)
 
 @app.post("/stream_image")
 async def stream_image(req: Request):
-    data, img = await multi_content_type(req)
-    img = await to_pil_image(img)
-
-    messages =  asyncio.Queue()
-
-    def example_notify(code: int, data) -> None:
-        notify(code, data, transform_to_image, messages)
-
-    streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
-    asyncio.create_task(wait_in_queue((data, img), example_notify))
-    return streaming_response
+    return while_streaming(req, transform_to_image)
 
 if __name__ == '__main__':
     import uvicorn
diff --git a/server/request_extraction.py b/server/request_extraction.py
index 559127760..62b2f9fd8 100644
--- a/server/request_extraction.py
+++ b/server/request_extraction.py
@@ -1,3 +1,4 @@
+import asyncio
 import builtins
 import io
 import re
@@ -7,9 +8,11 @@
 import requests
 from PIL import Image
 from fastapi import Request, HTTPException
+from starlette.responses import StreamingResponse
 
 from manga_translator import Config, Context
 from server.myqueue import task_queue, wait_in_queue
+from server.streaming import notify, stream
 
 
 async def to_pil_image(image: Union[str, bytes]) -> Image.Image:
@@ -58,4 +61,21 @@ async def get_ctx(req: Request):
     ctx.config = data
     task_queue.add_task(ctx)
 
-    data = await wait_in_queue(ctx, None)
\ No newline at end of file
+    data = await wait_in_queue(ctx, None)
+
+async def while_streaming(req: Request, transform):
+    data, img = await multi_content_type(req)
+    ctx = Context()
+
+    ctx.image = await to_pil_image(img)
+    ctx.config = data
+    task_queue.add_task(ctx)
+
+    messages = asyncio.Queue()
+
+    def notify_internal(code: int, data) -> None:
+        notify(code, data, transform, messages)
+
+    streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
+    asyncio.create_task(wait_in_queue((data, img), notify_internal))
+    return streaming_response
\ No newline at end of file
diff --git a/server/streaming.py b/server/streaming.py
index 809d58e05..907fed3d7 100644
--- a/server/streaming.py
+++ b/server/streaming.py
@@ -1,3 +1,5 @@
+import pickle
+
 async def stream(messages):
     while True:
         message = await messages.get()
@@ -7,7 +9,7 @@ async def stream(messages):
 
 def notify(code, data, transform_to_bytes, messages):
     if code == 0:
-        result_bytes = transform_to_bytes(data)
+        result_bytes = transform_to_bytes(pickle.loads(data))
         encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes
         messages.put_nowait(encoded_result)
     else:

From 85d7b1e4375e6fab749abff30ba1009d1669611f Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Wed, 20 Nov 2024 17:47:52 +0100
Subject: [PATCH 49/75] stream_image,stream_json now working json&image most
 likely too

---
 api-test.html                | 153 +++++++++++++++++++++++++++++++++++
 server/main.py               |   9 ++-
 server/myqueue.py            |   4 +-
 server/request_extraction.py |   4 +-
 server/streaming.py          |   2 +-
 5 files changed, 164 insertions(+), 8 deletions(-)
 create mode 100644 api-test.html

diff --git a/api-test.html b/api-test.html
new file mode 100644
index 000000000..f07b865fb
--- /dev/null
+++ b/api-test.html
@@ -0,0 +1,153 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Image Upload and Translation</title>
+    <style>
+        .hidden {
+            display: none;
+            visibility: hidden;
+        }
+    </style>
+</head>
+<body>
+    <h1>Upload Image and Translate</h1>
+    <input type="file" id="fileInput" accept="image/*">
+    <label for="generate-image">generate image</label><input type="checkbox" id="generate-image">
+    <h3 id="error"></h3>
+    <h3 id="status"></h3>
+    <button id="submit-button" onclick="uploadAndTranslate()">Upload and Translate</button>
+    <script>
+        var gdata = null;
+        function downloadFile() {
+            download("text.json", gdata)
+        }
+
+        function download(filename, text) {
+          let element = document.createElement('a');
+          element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text));
+          element.setAttribute('download', filename);
+
+          element.style.display = 'none';
+          document.body.appendChild(element);
+
+          element.click();
+
+          document.body.removeChild(element);
+        }
+
+        function download_bytes() {
+            const url = URL.createObjectURL(gdata);
+            const element = document.createElement('a');
+            element.setAttribute('href', url);
+            element.setAttribute('download', "image.png");
+            element.style.display = 'none';
+            document.body.appendChild(element);
+            element.click();
+            document.body.removeChild(element);
+            URL.revokeObjectURL(url);
+        }
+
+        async function uploadAndTranslate() {
+            const errorField = document.getElementById('error');
+            const statusField = document.getElementById('status');
+            let submitButton = document.getElementById("submit-button")
+            const fileInput = document.getElementById('fileInput');
+            const generateImage = document.getElementById('generate-image');
+
+            submitButton.classList.add("hidden");
+            fileInput.classList.add("hidden");
+            generateImage.classList.add("hidden");
+            statusField.innerHTML = '';
+            errorField.innerHTML = '';
+            const file = fileInput.files[0];
+            if (!file) {
+                alert('Please select an image file.');
+                return;
+            }
+            statusField.innerHTML = 'Uploading...';
+
+            const formData = new FormData();
+            formData.append('image', file);
+
+            try {
+                const response = await fetch( generateImage.checked ? 'http://127.0.0.1:8000/stream_image' : 'http://127.0.0.1:8000/stream_json', {
+                    method: 'POST',
+                    body: formData,
+                    /*headers: {
+                        'Content-Type': 'application/json'
+                    },
+                    body: JSON.stringify({
+                        image: base64Image,
+                        config: {
+                            translator: {
+                                target_lang: "ENG"
+                            }
+                        }
+                    }) */
+                });
+                await process(response, statusField, errorField, generateImage.checked)
+            } catch (error) {
+                errorField.innerHTML = response.statusText;
+            } finally {
+                submitButton.classList.remove("hidden");
+                fileInput.classList.remove("hidden");
+                generateImage.classList.remove("hidden");
+            }
+        }
+
+        async function process(response, statusField, errorField, image) {
+             if (response.ok) {
+                    const reader = response.body.getReader();
+                    const decoder = new TextDecoder('utf-8');
+                    let buffer = new Uint8Array();
+                    while (true) {
+                        const { done, value } = await reader.read();
+                        if (done) break;
+                        const newBuffer = new Uint8Array(buffer.length + value.length);
+                        newBuffer.set(buffer);
+                        newBuffer.set(value, buffer.length);
+                        buffer = newBuffer;
+                        while (buffer.length >= 5) {
+                            const dataSize = new DataView(buffer.buffer).getUint32(1, false);
+                            const totalSize = 5 + dataSize;
+                            if (buffer.length < totalSize) {
+                                break;
+                            }
+
+                            const statusCode = buffer[0];
+                            const data = buffer.slice(5, totalSize);
+
+                            if(statusCode === 0) {
+                                if(image) {
+                                    gdata = new Blob([data], { type: 'application/octet-stream' });
+                                    statusField.innerHTML = '<button onclick="download_bytes()">download data</button>';
+                                }else {
+                                    gdata = decoder.decode(data);
+                                    statusField.innerHTML = '<button onclick="downloadFile()">download data</button>';
+                                }
+                            }else if(statusCode === 1) {
+                            const parsed_data = decoder.decode(data);
+                                statusField.innerHTML = `translation step ${parsed_data}`;
+                            }else if(statusCode === 2) {
+                                statusField.innerHTML = decoder.decode(data);
+                                errorField.innerHTML = "";
+                            }else if(statusCode === 3) {
+                                const parsed_data = decoder.decode(data);
+                                statusField.innerHTML = `in queue: ${parsed_data}`;
+                            }else if(statusCode === 4) {
+                                statusField.innerHTML = `started translation`;
+                            }
+                            buffer = buffer.slice(totalSize);
+                        }
+                    }
+                    console.log(buffer.length)
+
+                } else {
+                    errorField.innerHTML = response.statusText;
+                }
+        }
+    </script>
+</body>
+</html>
diff --git a/server/main.py b/server/main.py
index 665b61af6..7a8cf48f2 100644
--- a/server/main.py
+++ b/server/main.py
@@ -29,6 +29,9 @@ def transform_to_image(ctx):
     ctx.result.save(img_byte_arr, format="PNG")
     return img_byte_arr.getvalue()
 
+def transform_to_json(ctx):
+    return str(to_json(ctx)).encode("utf-8")
+
 @app.post("/json")
 async def json(req: Request):
     ctx = await get_ctx(req)
@@ -50,15 +53,15 @@ async def image(req: Request):
 
 @app.post("/stream_json")
 async def stream_json(req: Request):
-    return while_streaming(req, transform_to_image)
+    return await while_streaming(req, transform_to_json)
 
 @app.post("/stream_bytes")
 async def stream_bytes(req: Request):
-    return while_streaming(req, transform_to_image)
+    return await while_streaming(req, transform_to_image)
 
 @app.post("/stream_image")
 async def stream_image(req: Request):
-    return while_streaming(req, transform_to_image)
+    return await while_streaming(req, transform_to_image)
 
 if __name__ == '__main__':
     import uvicorn
diff --git a/server/myqueue.py b/server/myqueue.py
index cb11db50d..ca21698ed 100644
--- a/server/myqueue.py
+++ b/server/myqueue.py
@@ -35,12 +35,12 @@ async def wait_in_queue(task, notify: NotifyType):
     while True:
         queue_pos = task_queue.get_pos(task)
         if notify:
-            notify(3, queue_pos)
+            notify(3, str(queue_pos))
         if queue_pos < executor_instances.free_executors():
             instance = await executor_instances.find_executor()
             task_queue.remove(task)
             if notify:
-                notify(4, 0)
+                notify(4, "0")
             if notify:
                 await instance.sent_stream(task.image, task.config, notify)
             else:
diff --git a/server/request_extraction.py b/server/request_extraction.py
index 62b2f9fd8..9290ecc07 100644
--- a/server/request_extraction.py
+++ b/server/request_extraction.py
@@ -61,7 +61,7 @@ async def get_ctx(req: Request):
     ctx.config = data
     task_queue.add_task(ctx)
 
-    data = await wait_in_queue(ctx, None)
+    return await wait_in_queue(ctx, None)
 
 async def while_streaming(req: Request, transform):
     data, img = await multi_content_type(req)
@@ -77,5 +77,5 @@ def notify_internal(code: int, data) -> None:
         notify(code, data, transform, messages)
 
     streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
-    asyncio.create_task(wait_in_queue((data, img), notify_internal))
+    asyncio.create_task(wait_in_queue(ctx, notify_internal))
     return streaming_response
\ No newline at end of file
diff --git a/server/streaming.py b/server/streaming.py
index 907fed3d7..c65a897d2 100644
--- a/server/streaming.py
+++ b/server/streaming.py
@@ -10,7 +10,7 @@ async def stream(messages):
 def notify(code, data, transform_to_bytes, messages):
     if code == 0:
         result_bytes = transform_to_bytes(pickle.loads(data))
-        encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes
+        encoded_result = b'\x00' + len(result_bytes).to_bytes(4, 'big') + result_bytes
         messages.put_nowait(encoded_result)
     else:
         result_bytes = str(data).encode("utf-8")

From d07e7dcf351b187058cdce0f6248b90c963a54f5 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Wed, 20 Nov 2024 18:00:15 +0100
Subject: [PATCH 50/75] remove old tasks on interrupt connection

---
 server/myqueue.py            | 15 +++++++++++++++
 server/request_extraction.py |  2 ++
 2 files changed, 17 insertions(+)

diff --git a/server/myqueue.py b/server/myqueue.py
index ca21698ed..9ae865063 100644
--- a/server/myqueue.py
+++ b/server/myqueue.py
@@ -2,6 +2,7 @@
 from typing import List, Dict
 
 from fastapi import HTTPException
+from starlette.requests import Request
 
 from server.instance import executor_instances
 from server.sent_data_internal import NotifyType
@@ -30,6 +31,11 @@ async def wait_for_event(self):
 
 task_queue = TaskQueue()
 
+async def is_client_disconnected(request: Request) -> bool:
+    if await request.is_disconnected():
+        return True
+    return False
+
 async def wait_in_queue(task, notify: NotifyType):
     """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned"""
     while True:
@@ -37,6 +43,10 @@ async def wait_in_queue(task, notify: NotifyType):
         if notify:
             notify(3, str(queue_pos))
         if queue_pos < executor_instances.free_executors():
+            if is_client_disconnected(task.req):
+                task_queue.remove(task)
+                task_queue.update_event()
+                raise HTTPException(500, detail="User is no longer connected") #just for the logs
             instance = await executor_instances.find_executor()
             task_queue.remove(task)
             if notify:
@@ -47,6 +57,7 @@ async def wait_in_queue(task, notify: NotifyType):
                 result = await instance.sent(task.image, task.config)
 
             executor_instances.free_executor(instance)
+            task_queue.update_event()
 
             if notify:
                 return
@@ -55,4 +66,8 @@ async def wait_in_queue(task, notify: NotifyType):
         else:
             if queue_pos == 0:
                 raise HTTPException(500, detail="No translator registered")
+            if is_client_disconnected(task.req):
+                task_queue.remove(task)
+                task_queue.update_event()
+                raise HTTPException(500, detail="User is no longer connected") #just for the logs
             await task_queue.wait_for_event()
\ No newline at end of file
diff --git a/server/request_extraction.py b/server/request_extraction.py
index 9290ecc07..02dc85e70 100644
--- a/server/request_extraction.py
+++ b/server/request_extraction.py
@@ -59,6 +59,7 @@ async def get_ctx(req: Request):
 
     ctx.image = await to_pil_image(img)
     ctx.config = data
+    ctx.req = req
     task_queue.add_task(ctx)
 
     return await wait_in_queue(ctx, None)
@@ -69,6 +70,7 @@ async def while_streaming(req: Request, transform):
 
     ctx.image = await to_pil_image(img)
     ctx.config = data
+    ctx.req = req
     task_queue.add_task(ctx)
 
     messages = asyncio.Queue()

From e9ef90123cb95793f422e599d1dce2628c419938 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Wed, 20 Nov 2024 20:32:11 +0100
Subject: [PATCH 51/75] change routes

---
 execute_test.py                     | 46 -----------------------------
 server/main.py                      | 28 ++++++++++++++----
 api-test.html => server/manual.html |  3 +-
 3 files changed, 23 insertions(+), 54 deletions(-)
 delete mode 100644 execute_test.py
 rename api-test.html => server/manual.html (97%)

diff --git a/execute_test.py b/execute_test.py
deleted file mode 100644
index 8705fb2b1..000000000
--- a/execute_test.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import asyncio
-import json
-import pickle
-import requests
-from PIL import Image
-
-from manga_translator import Config
-
-
-async def execute_method(method_name, attributes):
-    url = f"http://127.0.0.1:5003/execute/{method_name}"
-    headers = {'Content-Type': 'application/octet-stream'}
-
-    response = requests.post(url, data=pickle.dumps(attributes), headers=headers, stream=True)
-
-    if response.status_code == 200:
-        buffer = b''
-        for chunk in response.iter_content(chunk_size=None):
-            if chunk:
-                buffer += chunk
-                while True:
-                    if len(buffer) >= 5:
-                        status = int.from_bytes(buffer[0:1], byteorder='big')
-                        expected_size = int.from_bytes(buffer[1:5], byteorder='big')
-                        if len(buffer) >= 5 + expected_size:
-                            data = buffer[5:5 + expected_size]
-                            if status == 0:
-                                print("data", pickle.loads(data))
-                            elif status == 1:
-                                print("log", data)
-                            elif status == 2:
-                                print("error", data)
-                            buffer = buffer[5 + expected_size:]
-                        else:
-                            break
-                    else:
-                        break
-    else:
-        print(json.loads(response.content))
-
-
-
-if __name__ == '__main__':
-    image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png")
-    attributes = {"image": image, "config": Config()}
-    asyncio.run(execute_method("translate", attributes))
\ No newline at end of file
diff --git a/server/main.py b/server/main.py
index 7a8cf48f2..49ef9f5a9 100644
--- a/server/main.py
+++ b/server/main.py
@@ -5,6 +5,7 @@
 from starlette.responses import StreamingResponse, JSONResponse
 
 from server.instance import ExecutorInstance, executor_instances
+from server.myqueue import task_queue
 from server.request_extraction import get_ctx, while_streaming
 from server.to_json import to_json
 
@@ -32,17 +33,17 @@ def transform_to_image(ctx):
 def transform_to_json(ctx):
     return str(to_json(ctx)).encode("utf-8")
 
-@app.post("/json")
+@app.post("/translate/json")
 async def json(req: Request):
     ctx = await get_ctx(req)
     json = to_json(ctx)
     return JSONResponse(content=json)
 
-@app.post("/bytes")
+@app.post("/translate/bytes")
 async def bytes(req: Request):
     ctx = await get_ctx(req)
 
-@app.post("/image")
+@app.post("/translate/image")
 async def image(req: Request):
     ctx = await get_ctx(req)
     img_byte_arr = io.BytesIO()
@@ -51,18 +52,33 @@ async def image(req: Request):
 
     return StreamingResponse(img_byte_arr, media_type="image/png")
 
-@app.post("/stream_json")
+@app.post("/translate/json/stream")
 async def stream_json(req: Request):
     return await while_streaming(req, transform_to_json)
 
-@app.post("/stream_bytes")
+@app.post("/translate/bytes/stream")
 async def stream_bytes(req: Request):
     return await while_streaming(req, transform_to_image)
 
-@app.post("/stream_image")
+@app.post("/translate/image/stream")
 async def stream_image(req: Request):
     return await while_streaming(req, transform_to_image)
 
+@app.post("/queue-size")
+async def queue_size() -> int:
+    return len(task_queue.queue)
+
+@app.post("/")
+async def index():
+    # ui.html
+    pass
+
+@app.post("/manual")
+async def manual():
+    # manual.html
+    pass
+
+
 if __name__ == '__main__':
     import uvicorn
     from args import parse_arguments
diff --git a/api-test.html b/server/manual.html
similarity index 97%
rename from api-test.html
rename to server/manual.html
index f07b865fb..1335b0c51 100644
--- a/api-test.html
+++ b/server/manual.html
@@ -72,7 +72,7 @@ <h3 id="status"></h3>
             formData.append('image', file);
 
             try {
-                const response = await fetch( generateImage.checked ? 'http://127.0.0.1:8000/stream_image' : 'http://127.0.0.1:8000/stream_json', {
+                const response = await fetch( generateImage.checked ? '/translate/image/stream' : '/translate/json/stream', {
                     method: 'POST',
                     body: formData,
                     /*headers: {
@@ -142,7 +142,6 @@ <h3 id="status"></h3>
                             buffer = buffer.slice(totalSize);
                         }
                     }
-                    console.log(buffer.length)
 
                 } else {
                     errorField.innerHTML = response.statusText;

From c42c9f60eb0462b99b53d9c0e7c9bd3f210e2e04 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Wed, 20 Nov 2024 21:10:35 +0100
Subject: [PATCH 52/75] api goals

---
 server/args.py     |  7 ++++++-
 server/instance.py |  2 +-
 server/main.py     | 51 +++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/server/args.py b/server/args.py
index a00e24a22..a27ba5ffd 100644
--- a/server/args.py
+++ b/server/args.py
@@ -5,5 +5,10 @@ def parse_arguments():
     parser = argparse.ArgumentParser(description="Specify host and port for the server.")
     parser.add_argument('--host', type=str, default='127.0.0.1', help='The host address (default: 127.0.0.1)')
     parser.add_argument('--port', type=int, default=8000, help='The port number (default: 8080)')
-
+    #use_gpu
+    #use_gpu_limited
+    #ignore_errors
+    #verbose
+    #nonce
+    #start_instance
     return parser.parse_args()
\ No newline at end of file
diff --git a/server/instance.py b/server/instance.py
index 7c02e695f..e844289dd 100644
--- a/server/instance.py
+++ b/server/instance.py
@@ -25,7 +25,7 @@ async def sent_stream(self, image: Image, config: Config, sender: NotifyType):
 
 class Executors:
     def __init__(self):
-        self.list: List[ExecutorInstance] = [ExecutorInstance(ip="127.0.0.1", port=5003)]
+        self.list: List[ExecutorInstance] = []
         self.lock: Lock = Lock()
         self.event = Event()
 
diff --git a/server/main.py b/server/main.py
index 49ef9f5a9..dd6f6ebd6 100644
--- a/server/main.py
+++ b/server/main.py
@@ -1,6 +1,10 @@
 import io
+import os
+import secrets
+import subprocess
+import sys
 
-from fastapi import FastAPI, Request
+from fastapi import FastAPI, Request, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from starlette.responses import StreamingResponse, JSONResponse
 
@@ -10,6 +14,7 @@
 from server.to_json import to_json
 
 app = FastAPI()
+nonce = None
 
 app.add_middleware(
     CORSMiddleware,
@@ -21,6 +26,9 @@
 
 @app.post("/register")
 async def register_instance(instance: ExecutorInstance, request: Request):
+    req_nonce = request.headers.get('X-Nonce')
+    if req_nonce != nonce:
+        raise HTTPException(401, detail="Invalid nonce")
     instance.ip = request.client.host
     executor_instances.register(instance)
     return {"code": 0}
@@ -78,10 +86,51 @@ async def manual():
     # manual.html
     pass
 
+def generate_nonce():
+    return secrets.token_hex(16)
+
+def start_translator_client_proc(host: str, port: int, nonce: str, params: dict):
+    cmds = [
+        sys.executable,
+        '-m', 'manga_translator',
+        '--mode', 'shared',
+        '--host', host,
+        '--port', str(port),
+        '--nonce', nonce,
+        '--no-report'
+    ]
+    if params.get('use_gpu', False):
+        cmds.append('--use-gpu')
+    if params.get('use_gpu_limited', False):
+        cmds.append('--use-gpu-limited')
+    if params.get('ignore_errors', False):
+        cmds.append('--ignore-errors')
+    if params.get('verbose', False):
+        cmds.append('--verbose')
+
+    proc = subprocess.Popen(cmds, cwd=BASE_PATH)
+    executor_instances.register(ExecutorInstance(ip=host, port=port))
+    return proc
+
+def prepare(args):
+    global nonce
+    if args.get("nonce", None) is None:
+        nonce = os.getenv('MT_WEB_NONCE', generate_nonce())
+    else:
+        nonce = args.get("nonce", None)
+    if args.get("start_instance", None):
+        start_translator_client_proc(args.get("host", "0.0.0.0"), args.get("port",8000) + 1, nonce, args)
+
+#todo: restart if crash
+#todo: cache results
+#todo: cleanup cache
+#todo: store images while in queue
 
 if __name__ == '__main__':
     import uvicorn
     from args import parse_arguments
 
     args = parse_arguments()
+    prepare(args)
+    print("Nonce: "+nonce)
     uvicorn.run(app, host=args.host, port=args.port)

From b53b79e904e62af638027cc13dc8b228aa8b7d72 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Wed, 20 Nov 2024 21:14:31 +0100
Subject: [PATCH 53/75] more todos & args

---
 server/args.py | 16 ++++++++++------
 server/main.py |  7 ++++---
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/server/args.py b/server/args.py
index a27ba5ffd..5c7294ca9 100644
--- a/server/args.py
+++ b/server/args.py
@@ -1,14 +1,18 @@
 import argparse
+import os
 
 
 def parse_arguments():
     parser = argparse.ArgumentParser(description="Specify host and port for the server.")
     parser.add_argument('--host', type=str, default='127.0.0.1', help='The host address (default: 127.0.0.1)')
     parser.add_argument('--port', type=int, default=8000, help='The port number (default: 8080)')
-    #use_gpu
-    #use_gpu_limited
-    #ignore_errors
-    #verbose
-    #nonce
-    #start_instance
+    parser.add_argument('-v', '--verbose', action='store_true',
+                        help='Print debug info and save intermediate images in result folder')
+    parser.add_argument('--start-instance', action='store_true',
+                        help='If a translator should be launched automatically')
+    parser.add_argument('--ignore-errors', action='store_true', help='Skip image on encountered error.')
+    parser.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal web server communication')
+    g = parser.add_mutually_exclusive_group()
+    g.add_argument('--use-gpu', action='store_true', help='Turn on/off gpu (auto switch between mps and cuda)')
+    g.add_argument('--use-gpu-limited', action='store_true', help='Turn on/off gpu (excluding offline translator)')
     return parser.parse_args()
\ No newline at end of file
diff --git a/server/main.py b/server/main.py
index dd6f6ebd6..f5b5d105a 100644
--- a/server/main.py
+++ b/server/main.py
@@ -78,12 +78,12 @@ async def queue_size() -> int:
 
 @app.post("/")
 async def index():
-    # ui.html
+    # todo:ui.html
     pass
 
 @app.post("/manual")
 async def manual():
-    # manual.html
+    # todo:manual.html
     pass
 
 def generate_nonce():
@@ -107,7 +107,7 @@ def start_translator_client_proc(host: str, port: int, nonce: str, params: dict)
         cmds.append('--ignore-errors')
     if params.get('verbose', False):
         cmds.append('--verbose')
-
+    #todo: cwd
     proc = subprocess.Popen(cmds, cwd=BASE_PATH)
     executor_instances.register(ExecutorInstance(ip=host, port=port))
     return proc
@@ -125,6 +125,7 @@ def prepare(args):
 #todo: cache results
 #todo: cleanup cache
 #todo: store images while in queue
+#todo: add docs
 
 if __name__ == '__main__':
     import uvicorn

From 06e04ca8241d1822be2d4dfc58927bbfd8a27b94 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Thu, 21 Nov 2024 15:03:29 +0100
Subject: [PATCH 54/75] some docs & queue fix

---
 manga_translator/utils/generic.py |  2 +-
 server/main.py                    | 83 ++++++++++++++++++++-----------
 server/myqueue.py                 | 20 +++++---
 server/request_extraction.py      |  8 ++-
 server/to_json.py                 | 17 ++++++-
 5 files changed, 91 insertions(+), 39 deletions(-)

diff --git a/manga_translator/utils/generic.py b/manga_translator/utils/generic.py
index 92c55c6ac..785eb135d 100644
--- a/manga_translator/utils/generic.py
+++ b/manga_translator/utils/generic.py
@@ -37,7 +37,7 @@ def __init__(self, **kwargs):
     def __eq__(self, other):
         if not isinstance(other, Context):
             return NotImplemented
-        return vars(self) == vars(other)
+        return dict(self) == dict(other)
 
     def __contains__(self, key):
         return key in self.keys()
diff --git a/server/main.py b/server/main.py
index f5b5d105a..185e59f6a 100644
--- a/server/main.py
+++ b/server/main.py
@@ -3,15 +3,21 @@
 import secrets
 import subprocess
 import sys
+from builtins import bytes
+from typing import Union
 
-from fastapi import FastAPI, Request, HTTPException
+from fastapi import FastAPI, Request, HTTPException, Header, Form, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
-from starlette.responses import StreamingResponse, JSONResponse
+from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse
+from pathlib import Path
 
+from pydantic import BaseModel
+
+from manga_translator import Config
 from server.instance import ExecutorInstance, executor_instances
 from server.myqueue import task_queue
-from server.request_extraction import get_ctx, while_streaming
-from server.to_json import to_json
+from server.request_extraction import get_ctx, while_streaming, TranslateRequest
+from server.to_json import to_json, Translation
 
 app = FastAPI()
 nonce = None
@@ -24,14 +30,19 @@
     allow_headers=["*"],
 )
 
-@app.post("/register")
-async def register_instance(instance: ExecutorInstance, request: Request):
-    req_nonce = request.headers.get('X-Nonce')
+class TranslateRequestForm(BaseModel):
+    """This request can be a multipart or a json request"""
+    image: UploadFile
+    """can be a url, base64 encoded image or a multipart image"""
+    config: str
+    """in case it is a multipart this needs to be a string(json.stringify)"""
+
+@app.post("/register", response_description="no response")
+async def register_instance(instance: ExecutorInstance, req: Request, req_nonce: str = Header(alias="X-Nonce")):
     if req_nonce != nonce:
         raise HTTPException(401, detail="Invalid nonce")
-    instance.ip = request.client.host
+    instance.ip = req.client.host
     executor_instances.register(instance)
-    return {"code": 0}
 
 def transform_to_image(ctx):
     img_byte_arr = io.BytesIO()
@@ -41,18 +52,29 @@ def transform_to_image(ctx):
 def transform_to_json(ctx):
     return str(to_json(ctx)).encode("utf-8")
 
-@app.post("/translate/json")
+async def parse_request(
+    req: Request,
+    image: Union[str, bytes] = Form(...),
+    config: str = Form(...),
+):
+    if req.headers.get('content-type').startswith('multipart'):
+        config = json.loads(config)
+        return TranslateRequest(image=image, config=Config(**config))
+    else:
+        return None
+
+@app.post("/translate/json", response_model=list[Translation], response_description="json strucure inspired by the ichigo translator extension")
 async def json(req: Request):
     ctx = await get_ctx(req)
     json = to_json(ctx)
     return JSONResponse(content=json)
 
-@app.post("/translate/bytes")
+@app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure following the stream encoding, but with json first and then the image bytes as chunks")
 async def bytes(req: Request):
     ctx = await get_ctx(req)
 
-@app.post("/translate/image")
-async def image(req: Request):
+@app.post("/translate/image", response_description="the result image", response_class=StreamingResponse)
+async def image(req: Request) -> StreamingResponse:
     ctx = await get_ctx(req)
     img_byte_arr = io.BytesIO()
     ctx.result.save(img_byte_arr, format="PNG")
@@ -60,31 +82,32 @@ async def image(req: Request):
 
     return StreamingResponse(img_byte_arr, media_type="image/png")
 
-@app.post("/translate/json/stream")
-async def stream_json(req: Request):
+@app.post("/translate/json/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_json(req: Request) -> StreamingResponse:
     return await while_streaming(req, transform_to_json)
 
-@app.post("/translate/bytes/stream")
-async def stream_bytes(req: Request):
+@app.post("/translate/bytes/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_bytes(req: Request)-> StreamingResponse:
     return await while_streaming(req, transform_to_image)
 
-@app.post("/translate/image/stream")
-async def stream_image(req: Request):
+@app.post("/translate/image/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_image(req: Request) -> StreamingResponse:
     return await while_streaming(req, transform_to_image)
 
-@app.post("/queue-size")
+@app.post("/queue-size", response_model=int)
 async def queue_size() -> int:
     return len(task_queue.queue)
 
-@app.post("/")
-async def index():
+@app.get("/", response_class=HTMLResponse)
+async def index() -> HTMLResponse:
     # todo:ui.html
     pass
 
-@app.post("/manual")
+@app.get("/manual", response_class=HTMLResponse)
 async def manual():
-    # todo:manual.html
-    pass
+    html_file = Path("manual.html")
+    html_content = html_file.read_text()
+    return HTMLResponse(content=html_content)
 
 def generate_nonce():
     return secrets.token_hex(16)
@@ -114,18 +137,19 @@ def start_translator_client_proc(host: str, port: int, nonce: str, params: dict)
 
 def prepare(args):
     global nonce
-    if args.get("nonce", None) is None:
+    if args.nonce is None:
         nonce = os.getenv('MT_WEB_NONCE', generate_nonce())
     else:
-        nonce = args.get("nonce", None)
-    if args.get("start_instance", None):
-        start_translator_client_proc(args.get("host", "0.0.0.0"), args.get("port",8000) + 1, nonce, args)
+        nonce = args.nonce
+    if args.start_instance:
+        start_translator_client_proc(args.host, args.port + 1, nonce, args)
 
 #todo: restart if crash
 #todo: cache results
 #todo: cleanup cache
 #todo: store images while in queue
 #todo: add docs
+#todo: index doesnt work properly in the list(is_client_disconnected is not executed immediatly/does not update the index)
 
 if __name__ == '__main__':
     import uvicorn
@@ -134,4 +158,5 @@ def prepare(args):
     args = parse_arguments()
     prepare(args)
     print("Nonce: "+nonce)
+    executor_instances.register(ExecutorInstance(ip="127.0.0.1", port=5003))
     uvicorn.run(app, host=args.host, port=args.port)
diff --git a/server/myqueue.py b/server/myqueue.py
index 9ae865063..446541fc2 100644
--- a/server/myqueue.py
+++ b/server/myqueue.py
@@ -3,13 +3,15 @@
 
 from fastapi import HTTPException
 from starlette.requests import Request
+from starlette.responses import StreamingResponse
 
+from manga_translator import Context
 from server.instance import executor_instances
 from server.sent_data_internal import NotifyType
 
 class TaskQueue:
     def __init__(self):
-        self.queue: List[Dict] = []
+        self.queue: List[Context] = []
         self.queue_event: asyncio.Event = asyncio.Event()
 
     def add_task(self, task):
@@ -43,10 +45,13 @@ async def wait_in_queue(task, notify: NotifyType):
         if notify:
             notify(3, str(queue_pos))
         if queue_pos < executor_instances.free_executors():
-            if is_client_disconnected(task.req):
+            if await is_client_disconnected(task.req):
                 task_queue.remove(task)
                 task_queue.update_event()
-                raise HTTPException(500, detail="User is no longer connected") #just for the logs
+                if notify:
+                    return
+                else:
+                    raise HTTPException(500, detail="User is no longer connected") #just for the logs
             instance = await executor_instances.find_executor()
             task_queue.remove(task)
             if notify:
@@ -64,10 +69,11 @@ async def wait_in_queue(task, notify: NotifyType):
             else:
                 return result
         else:
-            if queue_pos == 0:
-                raise HTTPException(500, detail="No translator registered")
-            if is_client_disconnected(task.req):
+            if await is_client_disconnected(task.req):
                 task_queue.remove(task)
                 task_queue.update_event()
-                raise HTTPException(500, detail="User is no longer connected") #just for the logs
+                if notify:
+                    return
+                else:
+                    raise HTTPException(500, detail="User is no longer connected") #just for the logs
             await task_queue.wait_for_event()
\ No newline at end of file
diff --git a/server/request_extraction.py b/server/request_extraction.py
index 02dc85e70..442bc0e68 100644
--- a/server/request_extraction.py
+++ b/server/request_extraction.py
@@ -8,12 +8,19 @@
 import requests
 from PIL import Image
 from fastapi import Request, HTTPException
+from pydantic import BaseModel
 from starlette.responses import StreamingResponse
 
 from manga_translator import Config, Context
 from server.myqueue import task_queue, wait_in_queue
 from server.streaming import notify, stream
 
+class TranslateRequest(BaseModel):
+    """This request can be a multipart or a json request"""
+    image: bytes|str
+    """can be a url, base64 encoded image or a multipart image"""
+    config: Config
+    """in case it is a multipart this needs to be a string(json.stringify)"""
 
 async def to_pil_image(image: Union[str, bytes]) -> Image.Image:
     try:
@@ -36,7 +43,6 @@ async def to_pil_image(image: Union[str, bytes]) -> Image.Image:
 
 async def multi_content_type(request: Request):
     content_type = request.headers.get("content-type")
-
     if content_type and content_type.startswith("multipart/form-data"):
         form = await request.form()
         config = form.get("config", "{}")
diff --git a/server/to_json.py b/server/to_json.py
index 356e74104..911718d8d 100644
--- a/server/to_json.py
+++ b/server/to_json.py
@@ -2,6 +2,7 @@
 from typing import Dict, List
 
 import cv2
+from pydantic import BaseModel
 
 from manga_translator import Context
 from manga_translator.utils import TextBlock
@@ -63,4 +64,18 @@ def to_json(ctx: Context):
             'language': text_regions[i].source_lang,
             'background': background
         })
-    return results
\ No newline at end of file
+    return results
+
+class TextColor(BaseModel):
+    fg: tuple[int, int, int]
+    bg: tuple[int, int, int]
+
+class Translation(BaseModel):
+    text: dict[str, str]
+    minX: int
+    minY: int
+    maxX: int
+    maxY: int
+    textColor:TextColor
+    language: str
+    background: str

From cbd01adb924e7750f7019d579609f5774c7f465a Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Thu, 21 Nov 2024 17:35:25 +0100
Subject: [PATCH 55/75] stream type fix

---
 server/myqueue.py            | 4 ++--
 server/request_extraction.py | 2 +-
 server/sent_data_internal.py | 2 +-
 server/streaming.py          | 6 +++---
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/server/myqueue.py b/server/myqueue.py
index 446541fc2..b9174d936 100644
--- a/server/myqueue.py
+++ b/server/myqueue.py
@@ -43,7 +43,7 @@ async def wait_in_queue(task, notify: NotifyType):
     while True:
         queue_pos = task_queue.get_pos(task)
         if notify:
-            notify(3, str(queue_pos))
+            notify(3, str(queue_pos).encode('utf-8'))
         if queue_pos < executor_instances.free_executors():
             if await is_client_disconnected(task.req):
                 task_queue.remove(task)
@@ -55,7 +55,7 @@ async def wait_in_queue(task, notify: NotifyType):
             instance = await executor_instances.find_executor()
             task_queue.remove(task)
             if notify:
-                notify(4, "0")
+                notify(4, b"")
             if notify:
                 await instance.sent_stream(task.image, task.config, notify)
             else:
diff --git a/server/request_extraction.py b/server/request_extraction.py
index 442bc0e68..080d84cc4 100644
--- a/server/request_extraction.py
+++ b/server/request_extraction.py
@@ -81,7 +81,7 @@ async def while_streaming(req: Request, transform):
 
     messages = asyncio.Queue()
 
-    def notify_internal(code: int, data) -> None:
+    def notify_internal(code: int, data: bytes) -> None:
         notify(code, data, transform, messages)
 
     streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
diff --git a/server/sent_data_internal.py b/server/sent_data_internal.py
index 2945cc9f7..5720cb6c5 100644
--- a/server/sent_data_internal.py
+++ b/server/sent_data_internal.py
@@ -8,7 +8,7 @@
 
 from manga_translator import Config
 
-NotifyType = Optional[Callable[[int, Optional[int]], None]]
+NotifyType = Optional[Callable[[int, Optional[bytes]], None]]
 
 async def fetch_data_stream(url, image: Image, config: Config, sender: NotifyType, headers: Mapping[str, str] = {}):
     attributes = {"image": image, "config": config}
diff --git a/server/streaming.py b/server/streaming.py
index c65a897d2..611771853 100644
--- a/server/streaming.py
+++ b/server/streaming.py
@@ -1,3 +1,4 @@
+import asyncio
 import pickle
 
 async def stream(messages):
@@ -7,12 +8,11 @@ async def stream(messages):
         if message[0] == 0 or message[0] == 2:
             break
 
-def notify(code, data, transform_to_bytes, messages):
+def notify(code: int, data: bytes, transform_to_bytes, messages: asyncio.Queue):
     if code == 0:
         result_bytes = transform_to_bytes(pickle.loads(data))
         encoded_result = b'\x00' + len(result_bytes).to_bytes(4, 'big') + result_bytes
         messages.put_nowait(encoded_result)
     else:
-        result_bytes = str(data).encode("utf-8")
-        encoded_result =code.to_bytes(1, 'big') + len(result_bytes).to_bytes(4, 'big') + result_bytes
+        encoded_result =code.to_bytes(1, 'big') + len(data).to_bytes(4, 'big') + data
         messages.put_nowait(encoded_result)
\ No newline at end of file

From 858675a7a4bdacd86ee5727d8c28538384526a29 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Thu, 21 Nov 2024 17:35:49 +0100
Subject: [PATCH 56/75] add server ui

---
 manga_translator/args.py |   2 +-
 server/index.html        | 393 +++++++++++++++++++++++++++++++++++++++
 server/main.py           |  46 +++--
 3 files changed, 425 insertions(+), 16 deletions(-)
 create mode 100644 server/index.html

diff --git a/manga_translator/args.py b/manga_translator/args.py
index ef84073d6..562059ad6 100644
--- a/manga_translator/args.py
+++ b/manga_translator/args.py
@@ -131,4 +131,4 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
 parser_api.add_argument('--host', default='127.0.0.1', type=str, help='Host for API service')
 parser_api.add_argument('--port', default=5003, type=int, help='Port for API service')
 parser_api.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal API server communication')
-
+parser_api.add_argument("--report", default=None,type=str, help='reports to server to register instance')
diff --git a/server/index.html b/server/index.html
new file mode 100644
index 000000000..edf617609
--- /dev/null
+++ b/server/index.html
@@ -0,0 +1,393 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="utf-8"/>
+    <title>Image/Manga Translator</title>
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@unocss/reset/tailwind.min.css"/>
+    <script src="https://cdn.jsdelivr.net/npm/petite-vue@0.4.1/dist/petite-vue.iife.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/@unocss/runtime@0.30.5/uno.global.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/@iconify/iconify@2.2.0/dist/iconify.min.js"></script>
+    <style>
+        [v-cloak],
+        [un-cloak] {
+            display: none;
+        }
+    </style>
+</head>
+<body>
+<form
+        action="#"
+        class="flex py-8 w-full min-h-100vh justify-center items-center"
+        @submit.prevent="onsubmit"
+        @vue:mounted="onmounted"
+        v-scope
+        v-cloak
+        un-cloak
+>
+    <div class="flex flex-col w-85ch h-full justify-center gap-2">
+        <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
+        <div class="flex mx-4 justify-start items-end">
+            <div class="flex gap-4">
+                <div class="flex items-center" title="Detection resolution">
+                    <i class="iconify" data-icon="carbon:fit-to-screen"></i>
+                    <div class="relative">
+                        <select class="w-9ch appearance-none bg-transparent border-b border-gray-300"
+                                v-model="detectionResolution">
+                            <option value="S">1024px</option>
+                            <option value="M">1536px</option>
+                            <option value="L">2048px</option>
+                            <option value="X">2560px</option>
+                        </select>
+                        <i class="iconify absolute top-1.5 right-1 pointer-events-none"
+                           data-icon="carbon:chevron-down"></i>
+                    </div>
+                </div>
+                <div class="flex items-center gap-1" title="Text detector">
+                    <i class="iconify" data-icon="carbon:search-locate"></i>
+                    <div class="relative">
+                        <select class="w-9ch appearance-none bg-transparent border-b border-gray-300"
+                                v-model="textDetector">
+                            <option value="auto">Default</option>
+                            <option value="ctd">CTD</option>
+                        </select>
+                        <i class="iconify absolute top-1.5 right-1 pointer-events-none"
+                           data-icon="carbon:chevron-down"></i>
+                    </div>
+                </div>
+                <div class="flex items-center gap-1" title="Render text orientation">
+                    <i class="iconify" data-icon="carbon:text-align-left"></i>
+                    <div class="relative">
+                        <select class="w-12ch appearance-none bg-transparent border-b border-gray-300"
+                                v-model="renderTextDirection">
+                            <option value="auto">Auto</option>
+                            <option value="h">Horizontal</option>
+                            <option value="v">Vertical</option>
+                        </select>
+                        <i class="iconify absolute top-1.5 right-1 pointer-events-none"
+                           data-icon="carbon:chevron-down"></i>
+                    </div>
+                </div>
+                <div class="flex items-center gap-1" title="Translator">
+                    <i class="iconify" data-icon="carbon:operations-record"></i>
+                    <div class="relative">
+                        <select class="w-9ch appearance-none bg-transparent border-b border-gray-300"
+                                v-model="translator">
+                            <option v-for="key in validTranslators" :value="key">{{getTranslatorName(key)}}</option>
+                        </select>
+                        <i class="iconify absolute top-1.5 right-1 pointer-events-none"
+                           data-icon="carbon:chevron-down"></i>
+                    </div>
+                </div>
+                <div class="flex items-center gap-1" title="Target language">
+                    <i class="iconify" data-icon="carbon:language"></i>
+                    <div class="relative">
+                        <select class="w-15ch appearance-none bg-transparent border-b border-gray-300"
+                                v-model="targetLanguage">
+                            <option value="CHS">简体中文</option>
+                            <option value="CHT">繁體中文</option>
+                            <option value="JPN">日本語</option>
+                            <option value="ENG">English</option>
+                            <option value="KOR">한국어</option>
+                            <option value="VIN">Tiếng Việt</option>
+                            <option value="CSY">čeština</option>
+                            <option value="NLD">Nederlands</option>
+                            <option value="FRA">français</option>
+                            <option value="DEU">Deutsch</option>
+                            <option value="HUN">magyar nyelv</option>
+                            <option value="ITA">italiano</option>
+                            <option value="PLK">polski</option>
+                            <option value="PTB">português</option>
+                            <option value="ROM">limba română</option>
+                            <option value="RUS">русский язык</option>
+                            <option value="ESP">español</option>
+                            <option value="TRK">Türk dili</option>
+                            <option value="IND">Indonesia</option>
+                        </select>
+                        <i class="iconify absolute top-1.5 right-1 pointer-events-none"
+                           data-icon="carbon:chevron-down"></i>
+                    </div>
+                </div>
+            </div>
+        </div>
+        <div v-if="result" class="flex flex-col items-center">
+            <img class="my-2" :src="resultUri"/>
+            <button class="px-2 py-1 text-center rounded-md text-blue-800 border-2 border-blue-300" @click="clear">
+                Upload another
+            </button>
+        </div>
+        <div v-else-if="status"
+             class="grid w-full h-116 place-content-center rounded-2xl border-2 border-dashed border-gray-600">
+            <div v-if="error" class="flex flex-col items-center gap-2">
+                <div style="color: crimson">{{ statusText }}</div>
+                <button class="px-2 py-1 text-center rounded-md text-blue-800 border-2 border-blue-300" @click="clear">
+                    Upload another
+                </button>
+            </div>
+            <div v-else class="flex flex-col items-center gap-2">
+                <i class="iconify w-8 h-8 text-gray-500 animate-spin" data-icon="carbon:progress-bar-round"></i>
+                <div>{{ statusText }}</div>
+            </div>
+        </div>
+        <label
+                v-else
+                class="grid w-full h-116 place-content-center rounded-2xl border-2 border-dashed border-gray-600 cursor-pointer"
+                for="file"
+                @dragenter.prevent
+                @dragover.prevent
+                @dragleave.prevent
+                @drop.prevent="ondrop"
+        >
+            <div v-if="file" class="flex flex-col items-center gap-2">
+                <div><span class="iconify-inline inline-block mr-2 scale-125" data-icon="carbon:image-search"></span>File
+                    Preview
+                </div>
+                <img class="max-w-72 max-h-72" :src="fileUri"/>
+                <button type="submit" class="px-2 py-1 rounded-md text-blue-800 border-2 border-blue-300">Translate
+                </button>
+                <div class="text-sm text-gray-600">Click the empty space or paste/drag a new one to replace</div>
+            </div>
+            <div v-else class="flex flex-col items-center gap-2">
+                <i class="iconify w-8 h-8 text-gray-500" data-icon="carbon:cloud-upload"></i>
+                <div>Paste an image, click to select one or drag and drop here</div>
+            </div>
+            <input id="file" type="file" accept="image/png,image/jpeg,image/bmp,image/webp" class="hidden"
+                   @change="onfilechange"/>
+        </label>
+        <div class="flex justify-center gap-2">
+            <div>
+                Please consider supporting us by
+                <a class="underline underline-blue-400" href="https://ko-fi.com/voilelabs" target="_blank"
+                   rel="noopener noreferrer">Ko-fi</a>
+                or
+                <a class="underline underline-blue-400" href="https://www.patreon.com/voilelabs" target="_blank"
+                   rel="noopener noreferrer"
+                >Patreon</a
+                >!
+            </div>
+            <a
+                    class="underline underline-blue-400"
+                    href="https://github.com/zyddnys/manga-image-translator"
+                    target="_blank"
+                    rel="noopener noreferrer"
+            >Source Code</a
+            >
+        </div>
+    </div>
+</form>
+<script>
+    const BASE_URI = '/'
+    const acceptTypes = ['image/png', 'image/jpeg', 'image/bmp', 'image/webp']
+
+    function formatSize(bytes) {
+        const k = 1024
+        const sizes = ['B', 'KB', 'MB', 'GB', 'TB']
+        if (bytes === 0) return '0B'
+        const i = Math.floor(Math.log(bytes) / Math.log(k))
+        return `${(bytes / k ** i).toFixed(2)}${sizes[i]}`
+    }
+
+    function formatProgress(loaded, total) {
+        return `${formatSize(loaded)}/${formatSize(total)}`
+    }
+
+    PetiteVue.createApp({
+        onmounted() {
+            window.addEventListener('paste', this.onpaste)
+        },
+
+        file: null,
+        get fileUri() {
+            return this.file ? URL.createObjectURL(this.file) : null
+        },
+        detectionResolution: 'M',
+        textDetector: 'auto',
+        renderTextDirection: 'auto',
+        translator: 'youdao',
+        validTranslators: ['youdao', 'baidu', 'google', 'deepl', 'papago', 'caiyun', 'offline', 'gpt3.5', 'none'],
+        getTranslatorName(key) {
+            if (key == 'none')
+                return "No Text"
+            return key ? key[0].toUpperCase() + key.slice(1) : "";
+        },
+        targetLanguage: 'CHS',
+        ondrop(e) {
+            const file = e.dataTransfer?.files?.[0]
+            if (file && acceptTypes.includes(file.type)) {
+                this.file = file
+            }
+        },
+        onfilechange(e) {
+            const file = e.target.files?.[0]
+            if (file && acceptTypes.includes(file.type)) {
+                this.file = file
+            }
+        },
+        onpaste(e) {
+            const items = (e.clipboardData || e.originalEvent.clipboardData).items
+            for (const item of items) {
+                if (item.kind === 'file') {
+                    const file = item.getAsFile()
+                    if (!file || !acceptTypes.includes(file.type)) continue
+                    this.file = file
+                }
+            }
+        },
+
+        progress: null,
+        status: null,
+        queuePos: null,
+        cachedStatusText: '',
+        get statusText() {
+            var newStatusText = this._statusText
+            if (newStatusText != null && newStatusText != this.cachedStatusText) {
+                this.cachedStatusText = newStatusText
+            }
+            return this.cachedStatusText
+        },
+        get _statusText() {
+            switch (this.status) {
+                case 'upload': {
+                    if (this.progress) {
+                        return `Uploading (${this.progress})`
+                    } else {
+                        return 'Uploading'
+                    }
+                }
+                case 'pending':
+                    if (this.queuePos) {
+                        return `Queuing, your position is ${this.queuePos}`
+                    } else {
+                        return 'Processing'
+                    }
+                case 'detection':
+                    return 'Detecting texts'
+                case 'ocr':
+                    return 'Running OCR'
+                case 'mask-generation':
+                    return 'Generating text mask'
+                case 'inpainting':
+                    return 'Running inpainting'
+                case 'upscaling':
+                    return 'Running upscaling'
+                case 'translating':
+                    return 'Translating'
+                case 'rendering':
+                    return 'Rendering translated texts'
+                case 'finished':
+                    return 'Downloading image'
+                case 'error':
+                    return 'Something went wrong, please try again'
+                case 'error-upload':
+                    return 'Upload failed, please try again'
+                case 'error-lang':
+                    return 'Your target language is not supported by the chosen translator'
+                case 'error-translating':
+                    return 'Did not get any text back from the text translation service'
+                case 'error-too-large':
+                    return 'Image size too large (greater than 8000x8000 px)'
+                case 'error-disconnect':
+                    return 'Lost connection to server'
+            }
+        },
+        get error() {
+            return /^error/.test(this.status)
+        },
+        result: null,
+        get resultUri() {
+            return this.result ? URL.createObjectURL(this.result) : null
+        },
+        onsubmit(e) {
+            if (!this.file) return
+
+            this.progress = null
+            this.queuePos = null
+            this.status = 'upload'
+            let buffer = new Uint8Array();
+
+            const formData = new FormData()
+            formData.append('image', this.file)
+
+            /*formData.append('size', this.detectionResolution)
+            formData.append('detector', this.textDetector)
+            formData.append('direction', this.renderTextDirection)
+            formData.append('translator', this.translator)
+            formData.append('target_lang', this.targetLanguage)*/
+
+            const processChunk = (value) => {
+                if (this.error) return;
+
+                const newBuffer = new Uint8Array(buffer.length + value.length);
+                newBuffer.set(buffer);
+                newBuffer.set(value, buffer.length);
+                buffer = newBuffer;
+
+                while (buffer.length >= 5) {
+                    const dataSize = new DataView(buffer.buffer).getUint32(1, false);
+                    const totalSize = 5 + dataSize;
+                    if (buffer.length < totalSize) {
+                        break;
+                    }
+
+                    const statusCode = buffer[0];
+                    const decoder = new TextDecoder('utf-8');
+                    const data = buffer.slice(5, totalSize);
+                    switch (statusCode) {
+                        case 0:
+                            this.result = new Blob([data], {type: 'image/png'});
+                            this.status = null;
+                            break;
+                        case 1:
+                            this.status = decoder.decode(data);
+                            break;
+                        case 2:
+                            this.status = "error";
+                            console.error(decoder.decode(data));
+                            break;
+                        case 3:
+                            this.status = 'pending';
+                            this.queuePos = decoder.decode(data);
+                            break;
+                        case 4:
+                            this.status = 'pending';
+                            this.queuePos = null;
+                            break;
+                    }
+                    buffer = buffer.slice(totalSize);
+                }
+            }
+            const uploadWithProgress = async (formData) => {
+                try {
+                    const response = await fetch(`${BASE_URI}translate/image/stream`, {
+                        method: 'POST',
+                        body: formData,
+                    });
+
+                    if (response.status !== 200) {
+                        this.status = 'error-upload';
+                        this.status = 'pending';
+                        return;
+                    }
+
+                    const reader = response.body.getReader();
+                    while (true) {
+                        const { done, value } = await reader.read();
+                        if (done) break;
+                        processChunk(value);
+                    }
+                } catch (error) {
+                    console.error(error);
+                    this.status = 'error-disconnect';
+                }
+            }
+
+            uploadWithProgress(formData);
+        },
+        clear() {
+            this.file = null
+            this.result = null
+            this.status = null
+        },
+    }).mount()
+</script>
+</body>
+</html>
diff --git a/server/main.py b/server/main.py
index 185e59f6a..773908f41 100644
--- a/server/main.py
+++ b/server/main.py
@@ -1,8 +1,10 @@
 import io
 import os
 import secrets
+import signal
 import subprocess
 import sys
+from argparse import Namespace
 from builtins import bytes
 from typing import Union
 
@@ -100,8 +102,9 @@ async def queue_size() -> int:
 
 @app.get("/", response_class=HTMLResponse)
 async def index() -> HTMLResponse:
-    # todo:ui.html
-    pass
+    html_file = Path("index.html")
+    html_content = html_file.read_text()
+    return HTMLResponse(content=html_content)
 
 @app.get("/manual", response_class=HTMLResponse)
 async def manual():
@@ -112,27 +115,35 @@ async def manual():
 def generate_nonce():
     return secrets.token_hex(16)
 
-def start_translator_client_proc(host: str, port: int, nonce: str, params: dict):
+def start_translator_client_proc(host: str, port: int, nonce: str, params: Namespace):
     cmds = [
         sys.executable,
         '-m', 'manga_translator',
-        '--mode', 'shared',
+        'shared',
         '--host', host,
         '--port', str(port),
         '--nonce', nonce,
-        '--no-report'
     ]
-    if params.get('use_gpu', False):
+    if params.use_gpu:
         cmds.append('--use-gpu')
-    if params.get('use_gpu_limited', False):
+    if params.use_gpu_limited:
         cmds.append('--use-gpu-limited')
-    if params.get('ignore_errors', False):
+    if params.ignore_errors:
         cmds.append('--ignore-errors')
-    if params.get('verbose', False):
+    if params.verbose:
         cmds.append('--verbose')
-    #todo: cwd
-    proc = subprocess.Popen(cmds, cwd=BASE_PATH)
+    base_path = os.path.dirname(os.path.abspath(__file__))
+    parent = os.path.dirname(base_path)
+    proc = subprocess.Popen(cmds, cwd=parent)
     executor_instances.register(ExecutorInstance(ip=host, port=port))
+
+    def handle_exit_signals(signal, frame):
+        proc.terminate()
+        sys.exit(0)
+
+    signal.signal(signal.SIGINT, handle_exit_signals)
+    signal.signal(signal.SIGTERM, handle_exit_signals)
+
     return proc
 
 def prepare(args):
@@ -142,7 +153,7 @@ def prepare(args):
     else:
         nonce = args.nonce
     if args.start_instance:
-        start_translator_client_proc(args.host, args.port + 1, nonce, args)
+        return start_translator_client_proc(args.host, args.port + 1, nonce, args)
 
 #todo: restart if crash
 #todo: cache results
@@ -150,13 +161,18 @@ def prepare(args):
 #todo: store images while in queue
 #todo: add docs
 #todo: index doesnt work properly in the list(is_client_disconnected is not executed immediatly/does not update the index)
+#todo: enable config in html pages
 
 if __name__ == '__main__':
     import uvicorn
     from args import parse_arguments
 
     args = parse_arguments()
-    prepare(args)
+    args.start_instance = True
+    proc = prepare(args)
     print("Nonce: "+nonce)
-    executor_instances.register(ExecutorInstance(ip="127.0.0.1", port=5003))
-    uvicorn.run(app, host=args.host, port=args.port)
+    try:
+        uvicorn.run(app, host=args.host, port=args.port)
+    except Exception:
+        if proc:
+            proc.terminate()

From 8aeff8e21fd1c80540da00661f5166fe95624ad8 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Thu, 21 Nov 2024 17:36:36 +0100
Subject: [PATCH 57/75] delete old code

---
 manga_translator/mode/web.py        | 153 -------
 manga_translator/server/manual.html | 352 ----------------
 manga_translator/server/ui.html     | 377 -----------------
 manga_translator/server/web_main.py | 629 ----------------------------
 manga_translator/server/ws.proto    |  36 --
 manga_translator/server/ws_pb2.py   |  32 --
 6 files changed, 1579 deletions(-)
 delete mode 100644 manga_translator/mode/web.py
 delete mode 100644 manga_translator/server/manual.html
 delete mode 100644 manga_translator/server/ui.html
 delete mode 100644 manga_translator/server/web_main.py
 delete mode 100644 manga_translator/server/ws.proto
 delete mode 100644 manga_translator/server/ws_pb2.py

diff --git a/manga_translator/mode/web.py b/manga_translator/mode/web.py
deleted file mode 100644
index 3f0ccb889..000000000
--- a/manga_translator/mode/web.py
+++ /dev/null
@@ -1,153 +0,0 @@
-import asyncio
-import time
-
-import requests
-
-from manga_translator import logger, Context, Config
-from manga_translator.mode.local import MangaTranslatorLocal
-from manga_translator.translators import TRANSLATORS
-from manga_translator.utils import add_file_logger, remove_file_logger
-
-
-class MangaTranslatorWeb(MangaTranslatorLocal):
-    """
-    Translator client that executes tasks on behalf of the webserver in web_main.py.
-    """
-
-    def __init__(self, params: dict = None):
-        super().__init__(params)
-        self.host = params.get('host', '127.0.0.1')
-        if self.host == '0.0.0.0':
-            self.host = '127.0.0.1'
-        self.port = params.get('port', 5003)
-        self.nonce = params.get('nonce', '')
-        self.ignore_errors = params.get('ignore_errors', True)
-        self._task_id = None
-        self._params = None
-
-    async def _init_connection(self):
-        available_translators = []
-        from ..translators import MissingAPIKeyException, get_translator
-        for key in TRANSLATORS:
-            try:
-                get_translator(key)
-                available_translators.append(key)
-            except MissingAPIKeyException:
-                pass
-
-        data = {
-            'nonce': self.nonce,
-            'capabilities': {
-                'translators': available_translators,
-            },
-        }
-        requests.post(f'http://{self.host}:{self.port}/connect-internal', json=data)
-
-    async def _send_state(self, state: str, finished: bool):
-        # wait for translation to be saved first (bad solution?)
-        finished = finished and not state == 'finished'
-        while True:
-            try:
-                data = {
-                    'task_id': self._task_id,
-                    'nonce': self.nonce,
-                    'state': state,
-                    'finished': finished,
-                }
-                requests.post(f'http://{self.host}:{self.port}/task-update-internal', json=data, timeout=20)
-                break
-            except Exception:
-                # if translation is finished server has to know
-                if finished:
-                    continue
-                else:
-                    break
-
-    def _get_task(self):
-        try:
-            rjson = requests.get(f'http://{self.host}:{self.port}/task-internal?nonce={self.nonce}',
-                                 timeout=3600).json()
-            return rjson.get('task_id'), rjson.get('data')
-        except Exception:
-            return None, None
-
-    async def listen(self, translation_params: dict = None):
-        """
-        Listens for translation tasks from web server.
-        """
-        logger.info('Waiting for translation tasks')
-
-        await self._init_connection()
-        self.add_progress_hook(self._send_state)
-
-        while True:
-            self._task_id, self._params = self._get_task()
-            if self._params and 'exit' in self._params:
-                break
-            if not (self._task_id and self._params):
-                await asyncio.sleep(0.1)
-                continue
-
-            self.result_sub_folder = self._task_id
-            logger.info(f'Processing task {self._task_id}')
-            if translation_params is not None:
-                # Combine default params with params chosen by webserver
-                for p, default_value in translation_params.items():
-                    current_value = self._params.get(p)
-                    self._params[p] = current_value if current_value is not None else default_value
-            if self.verbose:
-                # Write log file
-                log_file = self._result_path('log.txt')
-                add_file_logger(log_file)
-
-            # final.png will be renamed if format param is set
-            await self.translate_path(self._result_path('input.png'), self._result_path('final.png'),
-                                      params=self._params)
-            print()
-
-            if self.verbose:
-                remove_file_logger(log_file)
-            self._task_id = None
-            self._params = None
-            self.result_sub_folder = ''
-
-    async def _run_text_translation(self, config: Config, ctx: Context):
-        # Run machine translation as reference for manual translation (if `--translator=none` is not set)
-        text_regions = await super()._run_text_translation(config, ctx)
-
-        #todo: manual never set
-        if ctx.get('manual', False):
-            logger.info('Waiting for user input from manual translation')
-            requests.post(f'http://{self.host}:{self.port}/request-manual-internal', json={
-                'task_id': self._task_id,
-                'nonce': self.nonce,
-                'texts': [r.text for r in text_regions],
-                'translations': [r.translation for r in text_regions],
-            }, timeout=20)
-
-            # wait for at most 1 hour for manual translation
-            wait_until = time.time() + 3600
-            while time.time() < wait_until:
-                ret = requests.post(f'http://{self.host}:{self.port}/get-manual-result-internal', json={
-                    'task_id': self._task_id,
-                    'nonce': self.nonce
-                }, timeout=20).json()
-                if 'result' in ret:
-                    manual_translations = ret['result']
-                    if isinstance(manual_translations, str):
-                        if manual_translations == 'error':
-                            return []
-                    i = 0
-                    for translation in manual_translations:
-                        if not translation.strip():
-                            text_regions.pop(i)
-                            i = i - 1
-                        else:
-                            text_regions[i].translation = translation
-                            text_regions[i].target_lang = config.translator.translator_gen.langs[-1]
-                        i = i + 1
-                    break
-                elif 'cancel' in ret:
-                    return 'cancel'
-                await asyncio.sleep(0.1)
-        return text_regions
diff --git a/manga_translator/server/manual.html b/manga_translator/server/manual.html
deleted file mode 100644
index 9e4a2befb..000000000
--- a/manga_translator/server/manual.html
+++ /dev/null
@@ -1,352 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-	<meta charset="utf-8">
-	<meta name="viewport" content="width=device-width, initial-scale=1">
-	<title>Manga Image Translator</title>
-	<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/picnic">
-	<style type="text/css">
-		#drop-area { border: 2px dashed #ccc; border-radius: 20px; width: 100%; margin: 50px auto; padding: 20px; } #drop-area.highlight { border-color: purple; } p { margin-top: 0; } .my-form { margin-bottom: 10px; } #gallery { text-align: center;} #gallery img { width: 95%; margin-bottom: 10px; vertical-align: middle; } .btn { display: inline-block; padding: 10px; background: #ccc; cursor: pointer; border-radius: 5px; border: 1px solid #ccc; } .btn:hover { background: #ddd; } #fileElem { display: none; }
-		.no-drag-select {
-			user-drag: none;
-			user-select: none;
-		}
-	</style>
-</head>
-<body>
-<div>
-	<nav class="demo">
-	  <a href="#" class="brand">Manga Image Translator</a>
-
-	  <!-- responsive-->
-	  <input id="bmenug" type="checkbox" class="show">
-	  <label for="bmenug" class="burger pseudo button">&#8801;</label>
-
-	  <div class="menu">
-	    <a href="https://github.com/zyddnys/manga-image-translator" target="_blank">About</a>
-	  </div>
-	</nav>
-
- 	<div class="flex one one-600 two-900" style="margin-top: 4em;">
-	  	<div style="margin:50px auto">
-	  		<div class="flex two" id="inputArea">
-	  			<div id="s-area"><h2>Raw Text</h2></div>
-	  			<div id="t-area"><h2>Translation</h2></div>
-	  		</div>
-	  		<div>
-	  			<button type="button" id="submit-button" class="button" disabled="disabled" name="submit" onclick="submit()">Waiting for Image</button>
-	  			<button type="button" id="reset-button" class="button" disabled="disabled" name="reset" onclick="fullReset()">Reset</button>
-	  		</div>
-	  	</div>
-
-	  	<div>	
-			<div id="drop-area">
-			  <form class="my-form" name="upload_images">
-			    <p>Upload image by dragging and dropping images onto the dashed region</p>
-			    <input type="file" id="fileElem" accept="image/*" onchange="handleFiles(this.files)">
-			    <label class="btn" for="fileElem">Select some files</label>
-			  </form>
-			  <progress id="progress-bar" max=100 value=0 style="width:100%"></progress>
-			  <div id="gallery"></div>
-			</div>
-	  	</div>
-
-	</div>
-</div>
-
-
-
-<script type="text/javascript">
-
-var task_id = ""
-var task_status = ""
-
-
-// ************************ Drag and drop ***************** //
-
-let dropArea = document.getElementById("drop-area")
-
-// Prevent default drag behaviors
-;['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
-  dropArea.addEventListener(eventName, preventDefaults, false)   
-  document.body.addEventListener(eventName, preventDefaults, false)
-})
-
-// Highlight drop area when item is dragged over it
-;['dragenter', 'dragover'].forEach(eventName => {
-  dropArea.addEventListener(eventName, highlight, false)
-})
-
-;['dragleave', 'drop'].forEach(eventName => {
-  dropArea.addEventListener(eventName, unhighlight, false)
-})
-
-// Handle dropped files
-dropArea.addEventListener('drop', handleDrop, false)
-
-function preventDefaults (e) {
-  e.preventDefault()
-  e.stopPropagation()
-}
-
-function highlight(e) {
-  dropArea.classList.add('highlight')
-}
-
-function unhighlight(e) {
-  dropArea.classList.remove('active')
-}
-
-function handleDrop(e) {
-  var dt = e.dataTransfer
-  var files = dt.files
-
-  handleFiles(files)
-}
-
-let uploadProgress = []
-let progressBar = document.getElementById('progress-bar')
-
-function initializeProgress(numFiles) {
-  progressBar.value = 0
-  uploadProgress = []
-
-  for(let i = numFiles; i > 0; i--) {
-    uploadProgress.push(0)
-  }
-}
-
-function updateProgress(fileNumber, percent) {
-  uploadProgress[fileNumber] = percent
-  let total = uploadProgress.reduce((tot, curr) => tot + curr, 0) / uploadProgress.length
-  progressBar.value = total
-}
-
-function handleFiles(files) {
-  files = [...files]
-  initializeProgress(files.length)
-  files.forEach(uploadFile)
-  files.forEach(previewFile)
-}
-
-function previewFile(file) {
-  let reader = new FileReader()
-  reader.readAsDataURL(file)
-  reader.onloadend = function() {
-    let img = document.createElement('img')
-    img.src = reader.result
-	img.classList.add('no-drag-select')
-    document.getElementById('gallery').appendChild(img)
-  }
-}
-
-let submitButton = document.getElementById("submit-button")
-let resetButton = document.getElementById("reset-button")
-
-function fullReset() {
-	var url = '/cancel-manual-request'
-	var xhr = new XMLHttpRequest()
-	xhr.open('POST', url, true)
-	xhr.setRequestHeader("Content-Type", "application/json;charset=UTF-8")
-
-	xhr.addEventListener('readystatechange', function (e) {
-		if (xhr.readyState == 4 && xhr.status == 200) {
-			reset()
-		}
-	})
-	xhr.send(JSON.stringify({
-		"task_id": task_id,
-	}))
-}
-
-function reset() {
-	var s_area = document.getElementById('s-area')
-	for (var i = s_area.children.length - 1; i >= 0; --i) {
-		var child = s_area.children[i]
-		if (child.tagName == "INPUT") {
-			child.remove()
-		}
-	}
-	var t_area = document.getElementById('t-area')
-	for (var i = t_area.children.length - 1; i >= 0; --i) {
-		var child = t_area.children[i]
-		if (child.tagName == "INPUT") {
-			child.remove()
-		}
-	}
-
-	progressBar.value = 0
-	submitButton.innerHTML = "Waiting for Image"
-	submitButton.onclick = null
-	submitButton.setAttribute("disabled", "disabled")
-
-	resetButton.innerHTML = "No Image"
-	resetButton.onclick = fullReset
-	resetButton.setAttribute("disabled", "disabled")
-
-	var img = document.getElementsByTagName("img")[0]
-	img.remove()
-}
-
-function downloadImageAndReset(path) {
-	const url = "/result-type"
-	const xhr = new XMLHttpRequest()
-	xhr.open('GET', url, true)
-	xhr.addEventListener('readystatechange', function (e) {
-		if (xhr.readyState == 4 && xhr.status == 200) {
-			// I'm aware this looks stupid as hell but it seems like
-			// the only way to do this
-			const a = document.createElement('a')
-			a.href = path
-			a.download = "result."+xhr.responseText
-			document.body.appendChild(a)
-			a.click()
-			document.body.removeChild(a)
-
-			reset()
-		}
-	})
-	xhr.send(null)	
-
-}
-
-function uploadFile(file, i) {
-
-  // ch_images.value = ch_images.value + file.name
-  submitButton.setAttribute("disabled", "disabled")
-  submitButton.innerHTML = "Uploading"
-
-  resetButton.innerHTML = "Waiting"
-  resetButton.setAttribute("disabled", "disabled")
-
-
-  var url = '/manual-translate'
-  var xhr = new XMLHttpRequest()
-  var formData = new FormData()
-  xhr.open('POST', url, true)
-  xhr.setRequestHeader('X-Requested-With', 'XMLHttpRequest')
-
-  // Update progress (can be used to show progress indicator)
-  xhr.upload.addEventListener("progress", function(e) {
-    updateProgress(i, (e.loaded * 100.0 / e.total) || 100)
-	if (e.loaded == e.total)
-		submitButton.innerHTML = "Processing"
-  })
-
-  xhr.addEventListener('readystatechange', function(e) {
-    if (xhr.readyState == 4 && xhr.status == 200) {
-      updateProgress(i, 100) 
-
-      const obj = JSON.parse(xhr.response)
-      console.log(obj)
-
-      task_id = obj.task_id
-      task_status = obj.status
-
-      var sArea = document.getElementById('s-area')
-      var tArea = document.getElementById('t-area')
-
-      for (var i = 0; obj.trans_result != null && i < obj.trans_result.length; i++) {
-      	var input_s =  document.createElement("input")
-      	var input_t =  document.createElement("input")
-
-      	input_s.value = obj.trans_result[i].s
-      	input_t.value = obj.trans_result[i].t
-
-      	sArea.appendChild(input_s)
-      	tArea.appendChild(input_t)
-
-      	// console.log(obj.trans_result[i-1].s)
-      }
-	  
-	  submitButton.removeAttribute("disabled")
-	  submitButton.innerHTML = "Submit"
-	  submitButton.onclick = submit
-	  
-	  resetButton.removeAttribute("disabled")
-	  resetButton.innerHTML = "Reset"
-    }
-    else if (xhr.readyState == 4 && xhr.status != 200) {
-      // Error. Inform the user
-      alert("upload failed for "+file.name)
-    }
-  })
-
-
-
-  formData.append('file', file)
-  xhr.send(formData)
-}
-
-function submit() {
-	
-	submitButton.setAttribute("disabled", "disabled")
-	submitButton.innerHTML = "Running"
-
-	resetButton.setAttribute("disabled", "disabled")
-	resetButton.innerHTML = "Waiting"
-
-	var inputs = document.getElementsByTagName("input")
-
-	var trans_result = []
-	var j = ((inputs.length-2)/2)+1 
-	var k = 0
-
-	for (var i = 1; i<=((inputs.length-2)/2); i++) {
-
-		trans_result[k] = {
-			's' : inputs[i].value,
-			't' : inputs[j].value
-		}
-
-		j++
-		k++
-	}
-
-	var postData = {
-	    "task_id": task_id,
-	    "status": task_status,
-	    "trans_result": trans_result
-	}
-
-	postData = JSON.stringify(postData)
-
-	var url = '/post-manual-result'
-	var xhr = new XMLHttpRequest()
-	xhr.open('POST', url, true)
-	xhr.setRequestHeader("Content-Type", "application/json;charset=UTF-8")
-
-	xhr.addEventListener('readystatechange', function(e) {
-		if (xhr.readyState == 4 && xhr.status == 200) {
-			var img = document.getElementsByTagName("img")[0]
-			img.src = "/result/" + task_id
-			img.onclick = (function (task_id) {
-				return function () {
-					downloadImageAndReset("/result/" + task_id)
-				};
-			})(task_id)
-		}
-		else if (xhr.readyState == 4 && xhr.status != 200) {
-			// Error. Inform the user
-			alert("Request failed!")
-		}
-		submitButton.removeAttribute("disabled")
-		submitButton.innerHTML = "Download Image"
-		submitButton.onclick = (function(task_id) { return function() {
-			downloadImageAndReset("/result/" + task_id)
-		};})(task_id)
-
-		resetButton.removeAttribute("disabled")
-		resetButton.innerHTML = "Reset"
-		resetButton.onclick = reset
-
-	})
-
-
-	xhr.send(postData)
-
-
-}
-</script>
-</body>
-</html>
\ No newline at end of file
diff --git a/manga_translator/server/ui.html b/manga_translator/server/ui.html
deleted file mode 100644
index 88dcb9fc3..000000000
--- a/manga_translator/server/ui.html
+++ /dev/null
@@ -1,377 +0,0 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <meta charset="utf-8" />
-    <title>Image/Manga Translator</title>
-    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@unocss/reset/tailwind.min.css" />
-    <script src="https://cdn.jsdelivr.net/npm/petite-vue@0.4.1/dist/petite-vue.iife.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/@unocss/runtime@0.30.5/uno.global.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/@iconify/iconify@2.2.0/dist/iconify.min.js"></script>
-    <style>
-      [v-cloak],
-      [un-cloak] {
-        display: none;
-      }
-    </style>
-  </head>
-  <body>
-    <form
-      action="#"
-      class="flex py-8 w-full min-h-100vh justify-center items-center"
-      @submit.prevent="onsubmit"
-      @vue:mounted="onmounted"
-      v-scope
-      v-cloak
-      un-cloak
-    >
-      <div class="flex flex-col w-85ch h-full justify-center gap-2">
-        <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
-        <div class="flex mx-4 justify-start items-end">
-          <div class="flex gap-4">
-            <div class="flex items-center" title="Detection resolution">
-              <i class="iconify" data-icon="carbon:fit-to-screen"></i>
-              <div class="relative">
-                <select class="w-9ch appearance-none bg-transparent border-b border-gray-300" v-model="detectionResolution">
-                  <option value="S">1024px</option>
-                  <option value="M">1536px</option>
-                  <option value="L">2048px</option>
-                  <option value="X">2560px</option>
-                </select>
-                <i class="iconify absolute top-1.5 right-1 pointer-events-none" data-icon="carbon:chevron-down"></i>
-              </div>
-            </div>
-            <div class="flex items-center gap-1" title="Text detector">
-              <i class="iconify" data-icon="carbon:search-locate"></i>
-              <div class="relative">
-                <select class="w-9ch appearance-none bg-transparent border-b border-gray-300" v-model="textDetector">
-                  <option value="auto">Default</option>
-                  <option value="ctd">CTD</option>
-                </select>
-                <i class="iconify absolute top-1.5 right-1 pointer-events-none" data-icon="carbon:chevron-down"></i>
-              </div>
-            </div>
-            <div class="flex items-center gap-1" title="Render text orientation">
-              <i class="iconify" data-icon="carbon:text-align-left"></i>
-              <div class="relative">
-                <select class="w-12ch appearance-none bg-transparent border-b border-gray-300" v-model="renderTextDirection">
-                  <option value="auto">Auto</option>
-                  <option value="h">Horizontal</option>
-                  <option value="v">Vertical</option>
-                </select>
-                <i class="iconify absolute top-1.5 right-1 pointer-events-none" data-icon="carbon:chevron-down"></i>
-              </div>
-            </div>
-            <div class="flex items-center gap-1" title="Translator">
-              <i class="iconify" data-icon="carbon:operations-record"></i>
-              <div class="relative">
-                <select class="w-9ch appearance-none bg-transparent border-b border-gray-300" v-model="translator">
-                  <option v-for="key in validTranslators" :value="key">{{getTranslatorName(key)}}</option>
-                </select>
-                <i class="iconify absolute top-1.5 right-1 pointer-events-none" data-icon="carbon:chevron-down"></i>
-              </div>
-            </div>
-            <div class="flex items-center gap-1" title="Target language">
-              <i class="iconify" data-icon="carbon:language"></i>
-              <div class="relative">
-                <select class="w-15ch appearance-none bg-transparent border-b border-gray-300" v-model="targetLanguage">
-                  <option value="CHS">简体中文</option>
-                  <option value="CHT">繁體中文</option>
-                  <option value="JPN">日本語</option>
-                  <option value="ENG">English</option>
-                  <option value="KOR">한국어</option>
-                  <option value="VIN">Tiếng Việt</option>
-                  <option value="CSY">čeština</option>
-                  <option value="NLD">Nederlands</option>
-                  <option value="FRA">français</option>
-                  <option value="DEU">Deutsch</option>
-                  <option value="HUN">magyar nyelv</option>
-                  <option value="ITA">italiano</option>
-                  <option value="PLK">polski</option>
-                  <option value="PTB">português</option>
-                  <option value="ROM">limba română</option>
-                  <option value="RUS">русский язык</option>
-                  <option value="ESP">español</option>
-                  <option value="TRK">Türk dili</option>
-                  <option value="IND">Indonesia</option>
-                </select>
-                <i class="iconify absolute top-1.5 right-1 pointer-events-none" data-icon="carbon:chevron-down"></i>
-              </div>
-            </div>
-          </div>
-        </div>
-        <div v-if="result" class="flex flex-col items-center">
-          <img class="my-2" :src="resultUri" />
-          <button class="px-2 py-1 text-center rounded-md text-blue-800 border-2 border-blue-300" @click="clear">Upload another</button>
-        </div>
-        <div v-else-if="status" class="grid w-full h-116 place-content-center rounded-2xl border-2 border-dashed border-gray-600">
-          <div v-if="error" class="flex flex-col items-center gap-2">
-            <div style="color: crimson">{{ statusText }}</div>
-            <button class="px-2 py-1 text-center rounded-md text-blue-800 border-2 border-blue-300" @click="clear">Upload another</button>
-          </div>
-          <div v-else class="flex flex-col items-center gap-2">
-            <i class="iconify w-8 h-8 text-gray-500 animate-spin" data-icon="carbon:progress-bar-round"></i>
-            <div>{{ statusText }}</div>
-          </div>
-        </div>
-        <label
-          v-else
-          class="grid w-full h-116 place-content-center rounded-2xl border-2 border-dashed border-gray-600 cursor-pointer"
-          for="file"
-          @dragenter.prevent
-          @dragover.prevent
-          @dragleave.prevent
-          @drop.prevent="ondrop"
-        >
-          <div v-if="file" class="flex flex-col items-center gap-2">
-            <div><span class="iconify-inline inline-block mr-2 scale-125" data-icon="carbon:image-search"></span>File Preview</div>
-            <img class="max-w-72 max-h-72" :src="fileUri" />
-            <button type="submit" class="px-2 py-1 rounded-md text-blue-800 border-2 border-blue-300">Translate</button>
-            <div class="text-sm text-gray-600">Click the empty space or paste/drag a new one to replace</div>
-          </div>
-          <div v-else class="flex flex-col items-center gap-2">
-            <i class="iconify w-8 h-8 text-gray-500" data-icon="carbon:cloud-upload"></i>
-            <div>Paste an image, click to select one or drag and drop here</div>
-          </div>
-          <input id="file" type="file" accept="image/png,image/jpeg,image/bmp,image/webp" class="hidden" @change="onfilechange" />
-        </label>
-        <div class="flex justify-center gap-2">
-          <div>
-            Please consider supporting us by
-            <a class="underline underline-blue-400" href="https://ko-fi.com/voilelabs" target="_blank" rel="noopener noreferrer">Ko-fi</a>
-            or
-            <a class="underline underline-blue-400" href="https://www.patreon.com/voilelabs" target="_blank" rel="noopener noreferrer"
-              >Patreon</a
-            >!
-          </div>
-          <a
-            class="underline underline-blue-400"
-            href="https://github.com/zyddnys/manga-image-translator"
-            target="_blank"
-            rel="noopener noreferrer"
-            >Source Code</a
-          >
-        </div>
-      </div>
-    </form>
-    <script>
-      const BASE_URI = '/'
-      const acceptTypes = ['image/png', 'image/jpeg', 'image/bmp', 'image/webp']
-
-      function formatSize(bytes) {
-        const k = 1024
-        const sizes = ['B', 'KB', 'MB', 'GB', 'TB']
-        if (bytes === 0) return '0B'
-        const i = Math.floor(Math.log(bytes) / Math.log(k))
-        return `${(bytes / k ** i).toFixed(2)}${sizes[i]}`
-      }
-      function formatProgress(loaded, total) {
-        return `${formatSize(loaded)}/${formatSize(total)}`
-      }
-
-      PetiteVue.createApp({
-        onmounted() {
-          window.addEventListener('paste', this.onpaste)
-        },
-
-        file: null,
-        get fileUri() {
-          return this.file ? URL.createObjectURL(this.file) : null
-        },
-        detectionResolution: 'M',
-        textDetector: 'auto',
-        renderTextDirection: 'auto',
-        translator: 'youdao',
-        validTranslators: ['youdao', 'baidu', 'google', 'deepl', 'papago', 'caiyun', 'offline', 'gpt3.5', 'none'],
-        getTranslatorName(key) {
-          if (key == 'none')
-            return "No Text"
-          return key ? key[0].toUpperCase() + key.slice(1) : "";
-        },
-        targetLanguage: 'CHS',
-        ondrop(e) {
-          const file = e.dataTransfer?.files?.[0]
-          if (file && acceptTypes.includes(file.type)) {
-            this.file = file
-          }
-        },
-        onfilechange(e) {
-          const file = e.target.files?.[0]
-          if (file && acceptTypes.includes(file.type)) {
-            this.file = file
-          }
-        },
-        onpaste(e) {
-          const items = (e.clipboardData || e.originalEvent.clipboardData).items
-          for (const item of items) {
-            if (item.kind === 'file') {
-              const file = item.getAsFile()
-              if (!file || !acceptTypes.includes(file.type)) continue
-              this.file = file
-            }
-          }
-        },
-
-        progress: null,
-        status: null,
-        queuePos: null,
-        cachedStatusText: '',
-        get statusText() {
-          var newStatusText = this._statusText
-          if (newStatusText != null && newStatusText != this.cachedStatusText) {
-            this.cachedStatusText = newStatusText
-          }
-          return this.cachedStatusText
-        },
-        get _statusText() {
-          switch (this.status) {
-            case 'upload': {
-              if (this.progress) {
-                return `Uploading (${this.progress})`
-              } else {
-                return 'Uploading'
-              }
-            }
-            case 'download':
-              if (this.progress) {
-                return `Downloading (${this.progress})`
-              } else {
-                return 'Downloading'
-              }
-
-            case 'pending':
-              if (this.queuePos) {
-                return `Queuing, your position is ${this.queuePos}`
-              } else {
-                return 'Processing'
-              }
-            case 'detection':
-              return 'Detecting texts'
-            case 'ocr':
-              return 'Running OCR'
-            case 'mask-generation':
-              return 'Generating text mask'
-            case 'inpainting':
-              return 'Running inpainting'
-            case 'upscaling':
-              return 'Running upscaling'
-            case 'translating':
-              return 'Translating'
-            case 'rendering':
-              return 'Rendering translated texts'
-            case 'error':
-              return 'Something went wrong, please try again'
-            case 'error-upload':
-              return 'Upload failed, please try again'
-            case 'error-lang':
-              return 'Your target language is not supported by the chosen translator'
-            case 'error-translating':
-              return 'Did not get any text back from the text translation service'
-            case 'error-too-large':
-              return 'Image size too large (greater than 8000x8000 px)'
-            case 'error-disconnect':
-              return 'Lost connection to server'
-          }
-        },
-        get error() {
-          return /^error/.test(this.status)
-        },
-        result: null,
-        get resultUri() {
-          return this.result ? URL.createObjectURL(this.result) : null
-        },
-        onsubmit(e) {
-          if (!this.file) return
-
-          this.progress = null
-          this.queuePos = null
-          this.status = 'upload'
-
-          const formData = new FormData()
-          formData.append('file', this.file)
-          formData.append('size', this.detectionResolution)
-          formData.append('detector', this.textDetector)
-          formData.append('direction', this.renderTextDirection)
-          formData.append('translator', this.translator)
-          formData.append('target_lang', this.targetLanguage)
-
-          const xhr = new XMLHttpRequest()
-          xhr.open('POST', `${BASE_URI}submit`, true)
-          xhr.onerror = (e) => {
-            this.status = 'error-disconnect'
-          }
-          xhr.upload.onprogress = (e) => {
-            if (e.lengthComputable) this.progress = formatProgress(e.loaded, e.total)
-          }
-          xhr.onload = async () => {
-            if (xhr.status !== 200) {
-              this.status = 'error-upload'
-              return
-            }
-
-            response = JSON.parse(xhr.responseText)
-            const task_id = response['task_id']
-            this.status = response['status']
-            if (this.error)
-              return
-
-            this.status = 'pending'
-
-            async function tryFetchTaskState() {
-              try {
-                return await (await fetch(`${BASE_URI}task-state?taskid=${task_id}`)).json()
-              }
-              catch {
-                return null
-              }
-            }
-
-            for (;;) {
-              const timer = new Promise((resolve) => setTimeout(resolve, 500))
-              const res = await tryFetchTaskState()
-              if (res == null) {
-                this.status = 'error-disconnect'
-                break
-              }
-              const { state, finished, waiting } = res
-              // console.log(state, finished, waiting)
-
-              if (finished && !state.startsWith('error')) {
-                this.progress = null
-                this.status = 'download'
-
-                const xhrDownload = new XMLHttpRequest()
-                xhrDownload.open('GET', `${BASE_URI}result/${task_id}`, true)
-                xhrDownload.responseType = 'blob'
-                xhrDownload.onprogress = (e) => {
-                  if (e.lengthComputable) this.progress = formatProgress(e.loaded, e.total)
-                }
-                xhrDownload.onload = () => {
-                  this.result = xhrDownload.response
-                  this.status = null
-                }
-                xhrDownload.send()
-
-                break
-              }
-
-              this.status = state
-              this.queuePos = waiting
-
-              if (/^error/.test(state)) {
-                break
-              }
-
-              await timer
-            }
-          }
-          xhr.send(formData)
-        },
-        clear() {
-          this.file = null
-          this.result = null
-          this.status = null
-        },
-      }).mount()
-    </script>
-  </body>
-</html>
diff --git a/manga_translator/server/web_main.py b/manga_translator/server/web_main.py
deleted file mode 100644
index 9cc61c0f4..000000000
--- a/manga_translator/server/web_main.py
+++ /dev/null
@@ -1,629 +0,0 @@
-import io
-import os
-import sys
-import re
-import shutil
-import mimetypes
-import time
-import asyncio
-import subprocess
-import secrets
-from io import BytesIO
-from PIL import Image
-from aiohttp import web
-from collections import deque
-from imagehash import phash
-
-SERVER_DIR_PATH = os.path.dirname(os.path.realpath(__file__))
-BASE_PATH = os.path.dirname(os.path.dirname(SERVER_DIR_PATH))
-
-# TODO: Get capabilities through api
-VALID_LANGUAGES = {
-    'CHS': 'Chinese (Simplified)',
-    'CHT': 'Chinese (Traditional)',
-    'CSY': 'Czech',
-    'NLD': 'Dutch',
-    'ENG': 'English',
-    'FRA': 'French',
-    'DEU': 'German',
-    'HUN': 'Hungarian',
-    'ITA': 'Italian',
-    'JPN': 'Japanese',
-    'KOR': 'Korean',
-    'PLK': 'Polish',
-    'PTB': 'Portuguese (Brazil)',
-    'ROM': 'Romanian',
-    'RUS': 'Russian',
-    'ESP': 'Spanish',
-    'TRK': 'Turkish',
-    'UKR': 'Ukrainian',
-    'VIN': 'Vietnamese',
-    'ARA': 'Arabic',
-}
-# Whitelists
-VALID_DETECTORS = set(['default', 'ctd'])
-VALID_DIRECTIONS = set(['auto', 'h', 'v'])
-VALID_TRANSLATORS = [
-    'youdao',
-    'baidu',
-    'google',
-    'deepl',
-    'deepseek',
-    'papago',
-    'caiyun',
-    'gpt3.5',
-    'gpt4',
-    'nllb',
-    'nllb_big',
-    'sugoi',
-    'jparacrawl',
-    'jparacrawl_big',
-    'm2m100',
-    'm2m100_big',
-    'qwen2',
-    'qwen2_big',
-    'sakura',
-    'none',
-    'original',
-]
-
-MAX_ONGOING_TASKS = 1
-MAX_IMAGE_SIZE_PX = 8000**2
-
-# Time to wait for web client to send a request to /task-state request
-# before that web clients task gets removed from the queue
-WEB_CLIENT_TIMEOUT = -1
-
-# Time before finished tasks get removed from memory
-FINISHED_TASK_REMOVE_TIMEOUT = 1800
-
-# Auto deletes old task folders upon reaching this disk space limit
-DISK_SPACE_LIMIT = 5e7 # 50mb
-
-# TODO: Turn into dict with translator client id as key for support of multiple translator clients
-ONGOING_TASKS = []
-FINISHED_TASKS = []
-NONCE = ''
-QUEUE = deque()
-TASK_DATA = {}
-TASK_STATES = {}
-DEFAULT_TRANSLATION_PARAMS = {}
-AVAILABLE_TRANSLATORS = []
-FORMAT = ''
-
-app = web.Application(client_max_size = 1024 * 1024 * 50)
-routes = web.RouteTableDef()
-
-
-def constant_compare(a, b):
-    if isinstance(a, str):
-        a = a.encode('utf-8')
-    if isinstance(b, str):
-        b = b.encode('utf-8')
-    if not isinstance(a, bytes) or not isinstance(b, bytes):
-        return False
-    if len(a) != len(b):
-        return False
-
-    result = 0
-    for x, y in zip(a, b):
-        result |= x ^ y
-    return result == 0
-
-@routes.get("/")
-async def index_async(request):
-    global AVAILABLE_TRANSLATORS
-    with open(os.path.join(SERVER_DIR_PATH, 'ui.html'), 'r', encoding='utf8') as fp:
-        content = fp.read()
-        if AVAILABLE_TRANSLATORS:
-            content = re.sub(r'(?<=translator: )(.*)(?=,)', repr(AVAILABLE_TRANSLATORS[0]), content)
-            content = re.sub(r'(?<=validTranslators: )(\[.*\])(?=,)', repr(AVAILABLE_TRANSLATORS), content)
-        return web.Response(text=content, content_type='text/html')
-
-@routes.get("/manual")
-async def index_async(request):
-    with open(os.path.join(SERVER_DIR_PATH, 'manual.html'), 'r', encoding='utf8') as fp:
-        return web.Response(text=fp.read(), content_type='text/html')
-
-@routes.get("/result/{taskid}")
-async def result_async(request):
-    global FORMAT
-    filepath = os.path.join('result', request.match_info.get('taskid'), f'final.{FORMAT}')
-    if not os.path.exists(filepath):
-        return web.Response(status=404, text='Not Found')
-    stream = BytesIO()
-    with open(filepath, 'rb') as f:
-        stream.write(f.read())
-    mime = mimetypes.guess_type(filepath)[0] or 'application/octet-stream'
-    return web.Response(body=stream.getvalue(), content_type=mime)
-
-@routes.get("/result-type")
-async def file_type_async(request):
-    global FORMAT
-    return web.Response(text=f'{FORMAT}')
-
-@routes.get("/queue-size")
-async def queue_size_async(request):
-    return web.json_response({'size' : len(QUEUE)})
-
-async def handle_post(request):
-    data = await request.post()
-    detection_size = None
-    selected_translator = 'youdao'
-    target_language = 'CHS'
-    detector = 'default'
-    direction = 'auto'
-    if 'target_lang' in data:
-        target_language = data['target_lang'].upper()
-        # TODO: move dicts to their own files to reduce load time
-        if target_language not in VALID_LANGUAGES:
-            target_language = 'CHS'
-    if 'detector' in data:
-        detector = data['detector'].lower()
-        if detector not in VALID_DETECTORS:
-            detector = 'default'
-    if 'direction' in data:
-        direction = data['direction'].lower()
-        if direction not in VALID_DIRECTIONS:
-            direction = 'auto'
-    if 'translator' in data:
-        selected_translator = data['translator'].lower()
-        if selected_translator not in AVAILABLE_TRANSLATORS:
-            selected_translator = AVAILABLE_TRANSLATORS[0]
-    if 'size' in data:
-        size_text = data['size'].upper()
-        if size_text == 'S':
-            detection_size = 1024
-        elif size_text == 'M':
-            detection_size = 1536
-        elif size_text == 'L':
-            detection_size = 2048
-        elif size_text == 'X':
-            detection_size = 2560
-    if 'file' in data:
-        file_field = data['file']
-        content = file_field.file.read()
-    elif 'url' in data:
-        from aiohttp import ClientSession
-        async with ClientSession() as session:
-            async with session.get(data['url']) as resp:
-                if resp.status == 200:
-                    content = await resp.read()
-                else:
-                    return web.json_response({'status': 'error'})
-    else:
-        return web.json_response({'status': 'error'})
-    try:
-        img = Image.open(io.BytesIO(content))
-        img.verify()
-        img = Image.open(io.BytesIO(content))
-        if img.width * img.height > MAX_IMAGE_SIZE_PX:
-            return web.json_response({'status': 'error-too-large'})
-    except Exception:
-        return web.json_response({'status': 'error-img-corrupt'})
-    return img, detection_size, selected_translator, target_language, detector, direction
-
-@routes.post("/run")
-async def run_async(request):
-    global FORMAT
-    x = await handle_post(request)
-    if isinstance(x, tuple):
-        img, size, selected_translator, target_language, detector, direction = x
-    else:
-        return x
-    task_id = f'{phash(img, hash_size = 16)}-{size}-{selected_translator}-{target_language}-{detector}-{direction}'
-    print(f'New `run` task {task_id}')
-    if os.path.exists(f'result/{task_id}/final.{FORMAT}'):
-        # Add a console output prompt to avoid the console from appearing to be stuck without execution when the translated image is hit consecutively.
-        print(f'Using cached result for {task_id}')
-        return web.json_response({'task_id' : task_id, 'status': 'successful'})
-    # elif os.path.exists(f'result/{task_id}'):
-    #     # either image is being processed or error occurred
-    #     if task_id not in TASK_STATES:
-    #         # error occurred
-    #         return web.json_response({'state': 'error'})
-    else:
-        os.makedirs(f'result/{task_id}/', exist_ok=True)
-        img.save(f'result/{task_id}/input.png')
-        QUEUE.append(task_id)
-        now = time.time()
-        TASK_DATA[task_id] = {
-            'detection_size': size,
-            'translator': selected_translator,
-            'target_lang': target_language,
-            'detector': detector,
-            'direction': direction,
-            'created_at': now,
-            'requested_at': now,
-        }
-        TASK_STATES[task_id] = {
-            'info': 'pending',
-            'finished': False,
-        }
-    while True:
-        await asyncio.sleep(0.1)
-        if task_id not in TASK_STATES:
-            break
-        state = TASK_STATES[task_id]
-        if state['finished']:
-            break
-    return web.json_response({'task_id': task_id, 'status': 'successful' if state['finished'] else state['info']})
-
-
-@routes.post("/connect-internal")
-async def index_async(request):
-    global NONCE, VALID_TRANSLATORS, AVAILABLE_TRANSLATORS
-    # Can be extended to allow support for multiple translators
-    rqjson = await request.json()
-    if constant_compare(rqjson.get('nonce'), NONCE):
-        capabilities = rqjson.get('capabilities')
-        if capabilities:
-            translators = capabilities.get('translators')
-            AVAILABLE_TRANSLATORS.clear()
-            for key in VALID_TRANSLATORS:
-                if key in translators:
-                    AVAILABLE_TRANSLATORS.append(key)
-    return web.json_response({})
-
-@routes.get("/task-internal")
-async def get_task_async(request):
-    """
-    Called by the translator to get a translation task.
-    """
-    global NONCE, ONGOING_TASKS, DEFAULT_TRANSLATION_PARAMS
-    if constant_compare(request.rel_url.query.get('nonce'), NONCE):
-        if len(QUEUE) > 0 and len(ONGOING_TASKS) < MAX_ONGOING_TASKS:
-            task_id = QUEUE.popleft()
-            if task_id in TASK_DATA:
-                data = TASK_DATA[task_id]
-                for p, default_value in DEFAULT_TRANSLATION_PARAMS.items():
-                    current_value = data.get(p)
-                    data[p] = current_value if current_value is not None else default_value
-                if not TASK_DATA[task_id].get('manual', False):
-                    ONGOING_TASKS.append(task_id)
-                return web.json_response({'task_id': task_id, 'data': data})
-            else:
-                return web.json_response({})
-        else:
-            return web.json_response({})
-    return web.json_response({})
-
-async def manual_trans_task(task_id, texts, translations):
-    if task_id not in TASK_DATA:
-        TASK_DATA[task_id] = {}
-    if texts and translations:
-        TASK_DATA[task_id]['trans_request'] = [{'s': txt, 't': trans} for txt, trans in zip(texts, translations)]
-    else:
-        TASK_DATA[task_id]['trans_result'] = []
-        print('Manual translation complete')
-
-@routes.post("/cancel-manual-request")
-async def cancel_manual_translation(request):
-    rqjson = (await request.json())
-    if 'task_id' in rqjson:
-        task_id = rqjson['task_id']
-        if task_id in TASK_DATA:
-            TASK_DATA[task_id]['cancel'] = ' '
-            while True:
-                await asyncio.sleep(0.1)
-                if TASK_STATES[task_id]['info'].startswith('error'):
-                    ret = web.json_response({'task_id': task_id, 'status': 'error'})
-                    break
-                if TASK_STATES[task_id]['finished']:
-                    ret = web.json_response({'task_id': task_id, 'status': 'cancelled'})
-                    break
-            del TASK_STATES[task_id]
-            del TASK_DATA[task_id]
-            return ret
-    return web.json_response({})
-
-@routes.post("/post-manual-result")
-async def post_translation_result(request):
-    rqjson = (await request.json())
-    if 'trans_result' in rqjson and 'task_id' in rqjson:
-        task_id = rqjson['task_id']
-        if task_id in TASK_DATA:
-            trans_result = [r['t'] for r in rqjson['trans_result']]
-            TASK_DATA[task_id]['trans_result'] = trans_result
-            while True:
-                await asyncio.sleep(0.1)
-                if TASK_STATES[task_id]['info'].startswith('error'):
-                    ret = web.json_response({'task_id': task_id, 'status': 'error'})
-                    break
-                if TASK_STATES[task_id]['finished']:
-                    ret = web.json_response({'task_id': task_id, 'status': 'successful'})
-                    break
-            # remove old tasks
-            del TASK_STATES[task_id]
-            del TASK_DATA[task_id]
-            return ret
-    return web.json_response({})
-
-@routes.post("/request-manual-internal")
-async def request_translation_internal(request):
-    global NONCE
-    rqjson = await request.json()
-    if constant_compare(rqjson.get('nonce'), NONCE):
-        task_id = rqjson['task_id']
-        if task_id in TASK_DATA:
-            if TASK_DATA[task_id].get('manual', False):
-                # manual translation
-                asyncio.gather(manual_trans_task(task_id, rqjson['texts'], rqjson['translations']))
-    return web.json_response({})
-
-@routes.post("/get-manual-result-internal")
-async def get_translation_internal(request):
-    global NONCE
-    rqjson = (await request.json())
-    if constant_compare(rqjson.get('nonce'), NONCE):
-        task_id = rqjson['task_id']
-        if task_id in TASK_DATA:
-            if 'trans_result' in TASK_DATA[task_id]:
-                return web.json_response({'result': TASK_DATA[task_id]['trans_result']})
-            elif 'cancel' in TASK_DATA[task_id]:
-                return web.json_response({'cancel':''})
-    return web.json_response({})
-
-@routes.get("/task-state")
-async def get_task_state_async(request):
-    """
-    Web API for getting the state of an on-going translation task from the website.
-
-    Is periodically called from ui.html. Once it returns a finished state,
-    the web client will try to fetch the corresponding image through /result/<task_id>
-    """
-    task_id = request.query.get('taskid')
-    if task_id and task_id in TASK_STATES and task_id in TASK_DATA:
-        state = TASK_STATES[task_id]
-        data = TASK_DATA[task_id]
-        res_dict = {
-            'state': state['info'],
-            'finished': state['finished'],
-        }
-        data['requested_at'] = time.time()
-        try:
-            res_dict['waiting'] = QUEUE.index(task_id) + 1
-        except Exception:
-            res_dict['waiting'] = 0
-        res = web.json_response(res_dict)
-
-        return res
-    return web.json_response({'state': 'error'})
-
-@routes.post("/task-update-internal")
-async def post_task_update_async(request):
-    """
-    Lets the translator update the task state it is working on.
-    """
-    global NONCE, ONGOING_TASKS, FINISHED_TASKS
-    rqjson = (await request.json())
-    if constant_compare(rqjson.get('nonce'), NONCE):
-        task_id = rqjson['task_id']
-        if task_id in TASK_STATES and task_id in TASK_DATA:
-            TASK_STATES[task_id] = {
-                'info': rqjson['state'],
-                'finished': rqjson['finished'],
-            }
-            if rqjson['finished'] and not TASK_DATA[task_id].get('manual', False):
-                try:
-                    i = ONGOING_TASKS.index(task_id)
-                    FINISHED_TASKS.append(ONGOING_TASKS.pop(i))
-                except ValueError:
-                    pass
-            print(f'Task state {task_id} to {TASK_STATES[task_id]}')
-    return web.json_response({})
-
-@routes.post("/submit")
-async def submit_async(request):
-    """Adds new task to the queue. Called by web client in ui.html when submitting an image."""
-    global FORMAT
-    x = await handle_post(request)
-    if isinstance(x, tuple):
-        img, size, selected_translator, target_language, detector, direction = x
-    else:
-        return x
-    task_id = f'{phash(img, hash_size = 16)}-{size}-{selected_translator}-{target_language}-{detector}-{direction}'
-    now = time.time()
-    print(f'New `submit` task {task_id}')
-    if os.path.exists(f'result/{task_id}/final.{FORMAT}'):
-        TASK_STATES[task_id] = {
-            'info': 'saved',
-            'finished': True,
-        }
-        TASK_DATA[task_id] = {
-            'detection_size': size,
-            'translator': selected_translator,
-            'target_lang': target_language,
-            'detector': detector,
-            'direction': direction,
-            'created_at': now,
-            'requested_at': now,
-        }
-    elif task_id not in TASK_DATA or task_id not in TASK_STATES:
-        os.makedirs(f'result/{task_id}/', exist_ok=True)
-        img.save(f'result/{task_id}/input.png')
-        QUEUE.append(task_id)
-        TASK_STATES[task_id] = {
-            'info': 'pending',
-            'finished': False,
-        }
-        TASK_DATA[task_id] = {
-            'detection_size': size,
-            'translator': selected_translator,
-            'target_lang': target_language,
-            'detector': detector,
-            'direction': direction,
-            'created_at': now,
-            'requested_at': now,
-        }
-    return web.json_response({'task_id': task_id, 'status': 'successful'})
-
-@routes.post("/manual-translate")
-async def manual_translate_async(request):
-    x = await handle_post(request)
-    if isinstance(x, tuple):
-        img, size, selected_translator, target_language, detector, direction = x
-    else:
-        return x
-    task_id = secrets.token_hex(16)
-    print(f'New `manual-translate` task {task_id}')
-    os.makedirs(f'result/{task_id}/', exist_ok=True)
-    img.save(f'result/{task_id}/input.png')
-    now = time.time()
-    QUEUE.append(task_id)
-    # TODO: Add form fields to manual translate website
-    TASK_DATA[task_id] = {
-        # 'detection_size': size,
-        'manual': True,
-        # 'detector': detector,
-        # 'direction': direction,
-        'created_at': now,
-        'requested_at': now,
-    }
-    print(TASK_DATA[task_id])
-    TASK_STATES[task_id] = {
-        'info': 'pending',
-        'finished': False,
-    }
-    while True:
-        await asyncio.sleep(1)
-        if 'trans_request' in TASK_DATA[task_id]:
-            return web.json_response({'task_id' : task_id, 'status': 'pending', 'trans_result': TASK_DATA[task_id]['trans_request']})
-        if TASK_STATES[task_id]['info'].startswith('error'):
-            break
-        if TASK_STATES[task_id]['finished']:
-            # no texts detected
-            return web.json_response({'task_id' : task_id, 'status': 'successful'})
-    return web.json_response({'task_id' : task_id, 'status': 'error'})
-
-app.add_routes(routes)
-
-
-def generate_nonce():
-    return secrets.token_hex(16)
-
-def start_translator_client_proc(host: str, port: int, nonce: str, params: dict):
-    os.environ['MT_WEB_NONCE'] = nonce
-    cmds = [
-        sys.executable,
-        '-m', 'manga_translator',
-        '--mode', 'web_client',
-        '--host', host,
-        '--port', str(port),
-    ]
-    if params.get('use_gpu', False):
-        cmds.append('--use-gpu')
-    if params.get('use_gpu_limited', False):
-        cmds.append('--use-gpu-limited')
-    if params.get('ignore_errors', False):
-        cmds.append('--ignore-errors')
-    if params.get('verbose', False):
-        cmds.append('--verbose')
-
-    proc = subprocess.Popen(cmds, cwd=BASE_PATH)
-    return proc
-
-async def start_async_app(host: str, port: int, nonce: str, translation_params: dict = None):
-    global NONCE, DEFAULT_TRANSLATION_PARAMS, FORMAT
-    # Secret to secure communication between webserver and translator clients
-    NONCE = nonce
-    DEFAULT_TRANSLATION_PARAMS = translation_params or {}
-    FORMAT = DEFAULT_TRANSLATION_PARAMS.get('format') or 'jpg'
-    DEFAULT_TRANSLATION_PARAMS['format'] = FORMAT
-
-    # Schedule web server to run
-    runner = web.AppRunner(app)
-    await runner.setup()
-    site = web.TCPSite(runner, host, port)
-    await site.start()
-    print(f'Serving up app on http://{host}:{port}')
-
-    return runner, site
-
-async def dispatch(host: str, port: int, nonce: str = None, translation_params: dict = None):
-    global ONGOING_TASKS, FINISHED_TASKS
-
-    if nonce is None:
-        nonce = os.getenv('MT_WEB_NONCE', generate_nonce())
-
-    # Start web service
-    runner, site = await start_async_app(host, port, nonce, translation_params)
-
-    # Create client process that will execute translation tasks
-    print()
-    client_process = start_translator_client_proc(host, port, nonce, translation_params)
-
-    # Get all prior finished tasks
-    os.makedirs('result/', exist_ok=True)
-    for f in os.listdir('result/'):
-        if os.path.isdir(f'result/{f}') and re.search(r'^\w+-\d+-\w+-\w+-\w+-\w+$', f):
-            FINISHED_TASKS.append(f)
-    FINISHED_TASKS = list(sorted(FINISHED_TASKS, key=lambda task_id: os.path.getmtime(f'result/{task_id}')))
-
-    try:
-        while True:
-            await asyncio.sleep(1)
-
-            # Restart client if OOM or similar errors occurred
-            if client_process.poll() is not None:
-                # if client_process.poll() == 0:
-                #     break
-                print('Restarting translator process')
-                if len(ONGOING_TASKS) > 0:
-                    tid = ONGOING_TASKS.pop(0)
-                    state = TASK_STATES[tid]
-                    state['info'] = 'error'
-                    state['finished'] = True
-                client_process = start_translator_client_proc(host, port, nonce, translation_params)
-
-            # Filter queued and finished tasks
-            now = time.time()
-            to_del_task_ids = set()
-            for tid, s in TASK_STATES.items():
-                d = TASK_DATA[tid]
-                # Remove finished tasks after 30 minutes
-                if s['finished'] and now - d['created_at'] > FINISHED_TASK_REMOVE_TIMEOUT:
-                    to_del_task_ids.add(tid)
-
-                # Remove queued tasks without web client
-                elif WEB_CLIENT_TIMEOUT >= 0:
-                    if tid not in ONGOING_TASKS and not s['finished'] and now - d['requested_at'] > WEB_CLIENT_TIMEOUT:
-                        print('REMOVING TASK', tid)
-                        to_del_task_ids.add(tid)
-                        try:
-                            QUEUE.remove(tid)
-                        except Exception:
-                            pass
-
-            for tid in to_del_task_ids:
-                del TASK_STATES[tid]
-                del TASK_DATA[tid]
-
-            # Delete oldest folder if disk space is becoming sparse
-            if DISK_SPACE_LIMIT >= 0 and len(FINISHED_TASKS) > 0 and shutil.disk_usage('result/')[2] < DISK_SPACE_LIMIT:
-                tid = FINISHED_TASKS.pop(0)
-                try:
-                    p = f'result/{tid}'
-                    print(f'REMOVING OLD TASK RESULT: {p}')
-                    shutil.rmtree(p)
-                except FileNotFoundError:
-                    pass
-    except:
-        if client_process.poll() is None:
-            # client_process.terminate()
-            client_process.kill()
-        await runner.cleanup()
-        raise
-
-if __name__ == '__main__':
-    from ..args import parser
-
-    args = parser.parse_args()
-
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    try:
-        runner, site = loop.run_until_complete(dispatch(args.host, args.port, translation_params=vars(args)))
-    except KeyboardInterrupt:
-        pass
diff --git a/manga_translator/server/ws.proto b/manga_translator/server/ws.proto
deleted file mode 100644
index 61367b988..000000000
--- a/manga_translator/server/ws.proto
+++ /dev/null
@@ -1,36 +0,0 @@
-syntax = "proto3";
-
-package ws;
-
-message NewTask {
-  string id = 1;
-
-  string source_image = 2;
-
-  string target_language = 3;
-  string detector = 4;
-  string direction = 5;
-  string translator = 6;
-  string size = 7;
-
-  string translation_mask = 8;
-}
-
-message Status {
-  string id = 1;
-  string status = 2;
-}
-
-message FinishTask {
-  string id = 1;
-  bool success = 2;
-  bool has_translation_mask = 3;
-}
-
-message WebSocketMessage {
-  oneof message {
-    NewTask new_task = 1;
-    Status status = 2;
-    FinishTask finish_task = 3;
-  }
-}
diff --git a/manga_translator/server/ws_pb2.py b/manga_translator/server/ws_pb2.py
deleted file mode 100644
index 7fcfddfd8..000000000
--- a/manga_translator/server/ws_pb2.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler.  DO NOT EDIT!
-# source: ws.proto
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x08ws.proto\x12\x02ws\"\xe3\x01\n\x07NewTask\x12\n\n\x02id\x18\x01 \x01(\t\x12\x14\n\x0csource_image\x18\x02 \x01(\t\x12\x1b\n\x13source_image_bearer\x18\t \x01(\t\x12\x17\n\x0ftarget_language\x18\x03 \x01(\t\x12\x10\n\x08\x64\x65tector\x18\x04 \x01(\t\x12\x11\n\tdirection\x18\x05 \x01(\t\x12\x12\n\ntranslator\x18\x06 \x01(\t\x12\x0c\n\x04size\x18\x07 \x01(\t\x12\x18\n\x10translation_mask\x18\x08 \x01(\t\x12\x1f\n\x17translation_mask_bearer\x18\n \x01(\t\"$\n\x06Status\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06status\x18\x02 \x01(\t\"G\n\nFinishTask\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\x12\x1c\n\x14has_translation_mask\x18\x03 \x01(\x08\"\x83\x01\n\x10WebSocketMessage\x12\x1f\n\x08new_task\x18\x01 \x01(\x0b\x32\x0b.ws.NewTaskH\x00\x12\x1c\n\x06status\x18\x02 \x01(\x0b\x32\n.ws.StatusH\x00\x12%\n\x0b\x66inish_task\x18\x03 \x01(\x0b\x32\x0e.ws.FinishTaskH\x00\x42\t\n\x07messageb\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'ws_pb2', _globals)
-if _descriptor._USE_C_DESCRIPTORS == False:
-
-  DESCRIPTOR._options = None
-  _globals['_NEWTASK']._serialized_start=17
-  _globals['_NEWTASK']._serialized_end=244
-  _globals['_STATUS']._serialized_start=246
-  _globals['_STATUS']._serialized_end=282
-  _globals['_FINISHTASK']._serialized_start=284
-  _globals['_FINISHTASK']._serialized_end=355
-  _globals['_WEBSOCKETMESSAGE']._serialized_start=358
-  _globals['_WEBSOCKETMESSAGE']._serialized_end=489
-# @@protoc_insertion_point(module_scope)

From aef0a275d83177732f0e2f4e29db9278a18c435b Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Thu, 21 Nov 2024 17:55:54 +0100
Subject: [PATCH 58/75] changed queue a bit

---
 server/instance.py |  8 ++++----
 server/myqueue.py  | 39 +++++++++++++++++++--------------------
 2 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/server/instance.py b/server/instance.py
index e844289dd..90d6c4a1a 100644
--- a/server/instance.py
+++ b/server/instance.py
@@ -1,6 +1,5 @@
-import pickle
 from asyncio import Event, Lock
-from typing import List, Optional
+from typing import List
 
 from PIL import Image
 from pydantic import BaseModel
@@ -49,10 +48,11 @@ async def find_executor(self) -> ExecutorInstance:
             instance.busy = True
             return instance
 
-    def free_executor(self, instance: ExecutorInstance):
+    async def free_executor(self, instance: ExecutorInstance):
+        from server.myqueue import task_queue
         instance.free_executor()
         self.event.set()
         self.event.clear()
-
+        await task_queue.update_event()
 
 executor_instances: Executors = Executors()
diff --git a/server/myqueue.py b/server/myqueue.py
index b9174d936..267a97302 100644
--- a/server/myqueue.py
+++ b/server/myqueue.py
@@ -1,9 +1,8 @@
 import asyncio
-from typing import List, Dict
+from typing import List, Dict, Optional
 
 from fastapi import HTTPException
 from starlette.requests import Request
-from starlette.responses import StreamingResponse
 
 from manga_translator import Context
 from server.instance import executor_instances
@@ -17,16 +16,19 @@ def __init__(self):
     def add_task(self, task):
         self.queue.append(task)
 
-    def get_pos(self, task):
-        return self.queue.index(task)
-
-    def update_event(self):
+    def get_pos(self, task) -> Optional[int]:
+        try:
+            return self.queue.index(task)
+        except ValueError:
+            return None
+    async def update_event(self):
+        self.queue = [ctx for ctx in self.queue if not await is_client_disconnected(ctx.req)]
         self.queue_event.set()
         self.queue_event.clear()
 
-    def remove(self, task):
+    async def remove(self, task):
         self.queue.remove(task)
-        self.update_event()
+        await self.update_event()
 
     async def wait_for_event(self):
         await self.queue_event.wait()
@@ -42,18 +44,23 @@ async def wait_in_queue(task, notify: NotifyType):
     """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned"""
     while True:
         queue_pos = task_queue.get_pos(task)
+        if not queue_pos:
+            if notify:
+                return
+            else:
+                raise HTTPException(500, detail="User is no longer connected")  # just for the logs
         if notify:
             notify(3, str(queue_pos).encode('utf-8'))
         if queue_pos < executor_instances.free_executors():
             if await is_client_disconnected(task.req):
-                task_queue.remove(task)
-                task_queue.update_event()
+                await task_queue.update_event()
                 if notify:
                     return
                 else:
                     raise HTTPException(500, detail="User is no longer connected") #just for the logs
+
             instance = await executor_instances.find_executor()
-            task_queue.remove(task)
+            await task_queue.remove(task)
             if notify:
                 notify(4, b"")
             if notify:
@@ -61,19 +68,11 @@ async def wait_in_queue(task, notify: NotifyType):
             else:
                 result = await instance.sent(task.image, task.config)
 
-            executor_instances.free_executor(instance)
-            task_queue.update_event()
+            await executor_instances.free_executor(instance)
 
             if notify:
                 return
             else:
                 return result
         else:
-            if await is_client_disconnected(task.req):
-                task_queue.remove(task)
-                task_queue.update_event()
-                if notify:
-                    return
-                else:
-                    raise HTTPException(500, detail="User is no longer connected") #just for the logs
             await task_queue.wait_for_event()
\ No newline at end of file

From cc37ca2371586e70c837d4e7ee0123f9dabd8a1a Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Thu, 21 Nov 2024 18:17:49 +0100
Subject: [PATCH 59/75] custom queue element with deconstructor

---
 server/main.py               | 28 ++++++-----------
 server/myqueue.py            | 59 ++++++++++++++++++++++++++----------
 server/request_extraction.py | 24 ++++++---------
 3 files changed, 62 insertions(+), 49 deletions(-)

diff --git a/server/main.py b/server/main.py
index 773908f41..5797a610e 100644
--- a/server/main.py
+++ b/server/main.py
@@ -1,24 +1,22 @@
 import io
 import os
 import secrets
+import shutil
 import signal
 import subprocess
 import sys
 from argparse import Namespace
-from builtins import bytes
-from typing import Union
 
-from fastapi import FastAPI, Request, HTTPException, Header, Form, UploadFile
+from fastapi import FastAPI, Request, HTTPException, Header, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse
 from pathlib import Path
 
 from pydantic import BaseModel
 
-from manga_translator import Config
 from server.instance import ExecutorInstance, executor_instances
 from server.myqueue import task_queue
-from server.request_extraction import get_ctx, while_streaming, TranslateRequest
+from server.request_extraction import get_ctx, while_streaming
 from server.to_json import to_json, Translation
 
 app = FastAPI()
@@ -54,22 +52,11 @@ def transform_to_image(ctx):
 def transform_to_json(ctx):
     return str(to_json(ctx)).encode("utf-8")
 
-async def parse_request(
-    req: Request,
-    image: Union[str, bytes] = Form(...),
-    config: str = Form(...),
-):
-    if req.headers.get('content-type').startswith('multipart'):
-        config = json.loads(config)
-        return TranslateRequest(image=image, config=Config(**config))
-    else:
-        return None
 
 @app.post("/translate/json", response_model=list[Translation], response_description="json strucure inspired by the ichigo translator extension")
 async def json(req: Request):
     ctx = await get_ctx(req)
-    json = to_json(ctx)
-    return JSONResponse(content=json)
+    return JSONResponse(content=to_json(ctx))
 
 @app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure following the stream encoding, but with json first and then the image bytes as chunks")
 async def bytes(req: Request):
@@ -154,13 +141,16 @@ def prepare(args):
         nonce = args.nonce
     if args.start_instance:
         return start_translator_client_proc(args.host, args.port + 1, nonce, args)
+    folder_name= "upload-cache"
+    if os.path.exists(folder_name):
+        shutil.rmtree(folder_name)
+    os.makedirs(folder_name)
 
 #todo: restart if crash
 #todo: cache results
 #todo: cleanup cache
-#todo: store images while in queue
+
 #todo: add docs
-#todo: index doesnt work properly in the list(is_client_disconnected is not executed immediatly/does not update the index)
 #todo: enable config in html pages
 
 if __name__ == '__main__':
diff --git a/server/myqueue.py b/server/myqueue.py
index 267a97302..77f681d60 100644
--- a/server/myqueue.py
+++ b/server/myqueue.py
@@ -1,32 +1,64 @@
 import asyncio
-from typing import List, Dict, Optional
+import os
+from typing import List, Optional
 
+from PIL import Image
 from fastapi import HTTPException
-from starlette.requests import Request
+from fastapi.requests import Request
 
-from manga_translator import Context
+from manga_translator import Config
 from server.instance import executor_instances
 from server.sent_data_internal import NotifyType
 
+class QueueElement:
+    req: Request
+    image: Image.Image | str
+    config: Config
+
+    def __init__(self, req: Request, image: Image.Image, config: Config, length):
+        self.req = req
+        if length > 10:
+            #todo: store image in "upload-cache" folder
+            self.image = image
+        else:
+            self.image = image
+        self.config = config
+
+    def get_image(self)-> Image:
+        if isinstance(self.image, str):
+            return Image.open(self.image)
+        else:
+            return self.image
+
+    def __del__(self):
+        if isinstance(self.image, str):
+            os.remove(self.image)
+
+    async def is_client_disconnected(self) -> bool:
+        if await self.req.is_disconnected():
+            return True
+        return False
+
+
 class TaskQueue:
     def __init__(self):
-        self.queue: List[Context] = []
+        self.queue: List[QueueElement] = []
         self.queue_event: asyncio.Event = asyncio.Event()
 
-    def add_task(self, task):
+    def add_task(self, task: QueueElement):
         self.queue.append(task)
 
-    def get_pos(self, task) -> Optional[int]:
+    def get_pos(self, task: QueueElement) -> Optional[int]:
         try:
             return self.queue.index(task)
         except ValueError:
             return None
     async def update_event(self):
-        self.queue = [ctx for ctx in self.queue if not await is_client_disconnected(ctx.req)]
+        self.queue = [task for task in self.queue if not await task.is_client_disconnected()]
         self.queue_event.set()
         self.queue_event.clear()
 
-    async def remove(self, task):
+    async def remove(self, task: QueueElement):
         self.queue.remove(task)
         await self.update_event()
 
@@ -35,16 +67,11 @@ async def wait_for_event(self):
 
 task_queue = TaskQueue()
 
-async def is_client_disconnected(request: Request) -> bool:
-    if await request.is_disconnected():
-        return True
-    return False
-
-async def wait_in_queue(task, notify: NotifyType):
+async def wait_in_queue(task: QueueElement, notify: NotifyType):
     """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned"""
     while True:
         queue_pos = task_queue.get_pos(task)
-        if not queue_pos:
+        if queue_pos is None:
             if notify:
                 return
             else:
@@ -52,7 +79,7 @@ async def wait_in_queue(task, notify: NotifyType):
         if notify:
             notify(3, str(queue_pos).encode('utf-8'))
         if queue_pos < executor_instances.free_executors():
-            if await is_client_disconnected(task.req):
+            if await task.is_client_disconnected():
                 await task_queue.update_event()
                 if notify:
                     return
diff --git a/server/request_extraction.py b/server/request_extraction.py
index 080d84cc4..decb64bff 100644
--- a/server/request_extraction.py
+++ b/server/request_extraction.py
@@ -12,7 +12,7 @@
 from starlette.responses import StreamingResponse
 
 from manga_translator import Config, Context
-from server.myqueue import task_queue, wait_in_queue
+from server.myqueue import task_queue, wait_in_queue, QueueElement
 from server.streaming import notify, stream
 
 class TranslateRequest(BaseModel):
@@ -61,29 +61,25 @@ async def multi_content_type(request: Request):
 
 async def get_ctx(req: Request):
     data, img = await multi_content_type(req)
-    ctx = Context()
 
-    ctx.image = await to_pil_image(img)
-    ctx.config = data
-    ctx.req = req
-    task_queue.add_task(ctx)
+    image = await to_pil_image(img)
+    task = QueueElement(req, image, data, 0)
 
-    return await wait_in_queue(ctx, None)
+    task_queue.add_task(task)
+
+    return await wait_in_queue(task, None)
 
 async def while_streaming(req: Request, transform):
     data, img = await multi_content_type(req)
-    ctx = Context()
 
-    ctx.image = await to_pil_image(img)
-    ctx.config = data
-    ctx.req = req
-    task_queue.add_task(ctx)
+    image = await to_pil_image(img)
+    task = QueueElement(req, image, data, 0)
+    task_queue.add_task(task)
 
     messages = asyncio.Queue()
 
     def notify_internal(code: int, data: bytes) -> None:
         notify(code, data, transform, messages)
-
     streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream")
-    asyncio.create_task(wait_in_queue(ctx, notify_internal))
+    asyncio.create_task(wait_in_queue(task, notify_internal))
     return streaming_response
\ No newline at end of file

From 6cd1819582dec208ac8f96b3b53f22ee5eb37f05 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Fri, 22 Nov 2024 12:14:05 +0100
Subject: [PATCH 60/75] byte response with cpp&rs examples

---
 manga_translator/utils/textblock.py |   2 +-
 server/main.py                      |  13 ++--
 server/response.cpp                 | 116 +++++++++++++++++++++++++++
 server/response.rs                  | 101 ++++++++++++++++++++++++
 server/to_json.py                   | 117 +++++++++++++++++-----------
 5 files changed, 297 insertions(+), 52 deletions(-)
 create mode 100644 server/response.cpp
 create mode 100644 server/response.rs

diff --git a/manga_translator/utils/textblock.py b/manga_translator/utils/textblock.py
index 8c2f390f5..5549c30a1 100644
--- a/manga_translator/utils/textblock.py
+++ b/manga_translator/utils/textblock.py
@@ -46,7 +46,7 @@ def __init__(self, lines: List[Tuple[int, int, int, int]],
                  texts: List[str] = None,
                  language: str = 'unknown',
                  font_size: float = -1,
-                 angle: int = 0,
+                 angle: float = 0,
                  translation: str = "",
                  fg_color: Tuple[float] = (0, 0, 0),
                  bg_color: Tuple[float] = (0, 0, 0),
diff --git a/server/main.py b/server/main.py
index 5797a610e..44f7b77a3 100644
--- a/server/main.py
+++ b/server/main.py
@@ -17,7 +17,7 @@
 from server.instance import ExecutorInstance, executor_instances
 from server.myqueue import task_queue
 from server.request_extraction import get_ctx, while_streaming
-from server.to_json import to_json, Translation
+from server.to_json import to_translation, TranslationResponse
 
 app = FastAPI()
 nonce = None
@@ -50,17 +50,20 @@ def transform_to_image(ctx):
     return img_byte_arr.getvalue()
 
 def transform_to_json(ctx):
-    return str(to_json(ctx)).encode("utf-8")
+    return to_translation(ctx).model_dump_json().encode("utf-8")
 
+def transform_to_bytes(ctx):
+    return to_translation(ctx).to_bytes()
 
-@app.post("/translate/json", response_model=list[Translation], response_description="json strucure inspired by the ichigo translator extension")
+@app.post("/translate/json", response_model=TranslationResponse, response_description="json strucure inspired by the ichigo translator extension")
 async def json(req: Request):
     ctx = await get_ctx(req)
-    return JSONResponse(content=to_json(ctx))
+    return to_translation(ctx)
 
 @app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure following the stream encoding, but with json first and then the image bytes as chunks")
 async def bytes(req: Request):
     ctx = await get_ctx(req)
+    return StreamingResponse(content=to_translation(ctx).to_bytes())
 
 @app.post("/translate/image", response_description="the result image", response_class=StreamingResponse)
 async def image(req: Request) -> StreamingResponse:
@@ -77,7 +80,7 @@ async def stream_json(req: Request) -> StreamingResponse:
 
 @app.post("/translate/bytes/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
 async def stream_bytes(req: Request)-> StreamingResponse:
-    return await while_streaming(req, transform_to_image)
+    return await while_streaming(req, transform_to_bytes)
 
 @app.post("/translate/image/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
 async def stream_image(req: Request) -> StreamingResponse:
diff --git a/server/response.cpp b/server/response.cpp
new file mode 100644
index 000000000..3595856a3
--- /dev/null
+++ b/server/response.cpp
@@ -0,0 +1,116 @@
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include <string>
+#include <unordered_map>
+#include <cstring>
+#include <cassert>
+
+struct Color {
+    uint8_t fg[3];
+    uint8_t bg[3];
+};
+
+struct Translation {
+    uint32_t min_x;
+    uint32_t min_y;
+    uint32_t max_x;
+    uint32_t max_y;
+    bool is_bulleted_list;
+    uint32_t angle;
+    float prob;
+    Color text_color;
+    std::unordered_map<std::string, std::string> text;
+    std::vector<uint8_t> background;
+};
+
+struct TranslationResponse {
+    std::vector<Translation> translations;
+};
+
+uint32_t read_u32(const std::vector<uint8_t>& bytes, size_t& offset) {
+    uint32_t value;
+    std::memcpy(&value, &bytes[offset], sizeof(value));
+    value = value;
+    offset += 4;
+    return value;
+}
+
+float read_f32(const std::vector<uint8_t>& bytes, size_t& offset) {
+    float value;
+    std::memcpy(&value, &bytes[offset], sizeof(value));
+    value = *reinterpret_cast<uint32_t*>(&value);
+    offset += 4;
+    return value;
+}
+
+uint8_t read_u8(const std::vector<uint8_t>& bytes, size_t& offset) {
+    uint8_t value = bytes[offset];
+    offset += 1;
+    return value;
+}
+
+bool read_bool(const std::vector<uint8_t>& bytes, size_t& offset) {
+    bool value = bytes[offset] != 0;
+    offset += 1;
+    return value;
+}
+
+std::vector<uint8_t> read_chunk(const std::vector<uint8_t>& bytes, size_t& offset) {
+    uint32_t size = read_u32(bytes, offset);
+    std::vector<uint8_t> value(bytes.begin() + offset, bytes.begin() + offset + size);
+    offset += size;
+    return value;
+}
+
+std::string read_str(const std::vector<uint8_t>& bytes, size_t& offset) {
+    std::vector<uint8_t> chunk = read_chunk(bytes, offset);
+    std::string result(chunk.begin(), chunk.end());
+    return result;
+}
+
+std::unordered_map<std::string, std::string> read_map(const std::vector<uint8_t>& bytes, size_t& offset) {
+    uint32_t count = read_u32(bytes, offset);
+    std::unordered_map<std::string, std::string> map;
+    for (uint32_t i = 0; i < count; ++i) {
+        std::string key = read_str(bytes, offset);
+        std::string value = read_str(bytes, offset);
+        map[key] = value;
+    }
+    return map;
+}
+
+Translation from_bytes(const std::vector<uint8_t>& bytes, size_t& offset) {
+    Translation translation;
+    translation.min_x = read_u32(bytes, offset);
+    translation.min_y = read_u32(bytes, offset);
+    translation.max_x = read_u32(bytes, offset);
+    translation.max_y = read_u32(bytes, offset);
+    translation.is_bulleted_list = read_bool(bytes, offset);
+    translation.angle = read_u32(bytes, offset);
+    translation.prob = read_f32(bytes, offset);
+    for (int i = 0; i < 3; ++i) {
+        translation.text_color.fg[i] = read_u8(bytes, offset);
+        translation.text_color.bg[i] = read_u8(bytes, offset);
+    }
+    translation.text = read_map(bytes, offset);
+    translation.background = read_chunk(bytes, offset);
+    return translation;
+}
+
+TranslationResponse from_bytes_response(const std::vector<uint8_t>& bytes) {
+    size_t offset = 0;
+    uint32_t count = read_u32(bytes, offset);
+    TranslationResponse response;
+
+    for (uint32_t i = 0; i < count; ++i) {
+        response.translations.push_back(from_bytes(bytes, offset));
+    }
+    return response;
+}
+
+int main() {
+    std::vector<uint8_t> bytes = {/* byte data here */};
+    TranslationResponse data = from_bytes_response(bytes);
+    return 0;
+}
diff --git a/server/response.rs b/server/response.rs
new file mode 100644
index 000000000..81fa07810
--- /dev/null
+++ b/server/response.rs
@@ -0,0 +1,101 @@
+#[derive(Debug)]
+struct Color {
+    fg: [u8; 3],
+    bg: [u8; 3],
+}
+
+#[derive(Debug)]
+struct Translation {
+    min_x: u32,
+    min_y: u32,
+    max_x: u32,
+    max_y: u32,
+    is_bulleted_list: bool,
+    angle: u32,
+    prob: f32,
+    text_color: Color,
+    text: HashMap<String, String>,
+    background: Vec<u8>,
+}
+
+#[derive(Debug)]
+struct TranslationResponse {
+    translations: Vec<Translation>,
+}
+
+impl TranslationResponse {
+    fn from_bytes(bytes: &[u8]) -> Self {
+        let mut offset = 0;
+        let v = (0..read_u32(bytes, &mut offset))
+            .map(|_| Translation::from_bytes(bytes, &mut offset))
+            .collect::<Vec<_>>();
+        Self { translations: v }
+    }
+}
+
+fn read_u32(bytes: &[u8], offset: &mut usize) -> u32 {
+    let value = u32::from_le_bytes(bytes[*offset..*offset + 4].try_into().unwrap());
+    *offset += 4;
+    value
+}
+
+impl Translation {
+    fn from_bytes(bytes: &[u8], offset: &mut usize) -> Self {
+        let read_f32 = |bytes: &[u8], offset: &mut usize| -> f32 {
+            let value = f32::from_le_bytes(bytes[*offset..*offset + 4].try_into().unwrap());
+            *offset += 4;
+            value
+        };
+
+        let read_u8 = |bytes: &[u8], offset: &mut usize| -> u8 {
+            let value = bytes[*offset];
+            *offset += 1;
+            value
+        };
+
+        let read_bool = |bytes: &[u8], offset: &mut usize| -> bool {
+            let value = bytes[*offset] != 0;
+            *offset += 1;
+            value
+        };
+
+        let read_chunk = |bytes: &[u8], offset: &mut usize| -> Vec<u8> {
+            let size = read_u32(bytes, offset);
+            let value = &bytes[*offset..*offset + size as usize];
+            *offset += size as usize;
+            value.to_vec()
+        };
+        let read_str = |bytes: &[u8], offset: &mut usize| -> String {
+            String::from_utf8(read_chunk(bytes, offset)).expect("Invalid UTF-8")
+        };
+        let read_map = |bytes: &[u8], offset: &mut usize| -> HashMap<String, String> {
+            (0..read_u32(bytes, offset))
+                .into_iter()
+                .map(|_| (read_str(bytes, offset), read_str(bytes, offset)))
+                .collect::<HashMap<String, String>>()
+        };
+        Self {
+            min_x: read_u32(bytes, offset),
+            min_y: read_u32(bytes, offset),
+            max_x: read_u32(bytes, offset),
+            max_y: read_u32(bytes, offset),
+            is_bulleted_list: read_bool(bytes, offset),
+            angle: read_u32(bytes, offset),
+            prob: read_f32(bytes, offset),
+            text_color: Color {
+                fg: [
+                    read_u8(bytes, offset),
+                    read_u8(bytes, offset),
+                    read_u8(bytes, offset),
+                ],
+                bg: [
+                    read_u8(bytes, offset),
+                    read_u8(bytes, offset),
+                    read_u8(bytes, offset),
+                ],
+            },
+            text: read_map(bytes, offset),
+            background: read_chunk(bytes, offset),
+        }
+    }
+}
diff --git a/server/to_json.py b/server/to_json.py
index 911718d8d..f6bcdc6e2 100644
--- a/server/to_json.py
+++ b/server/to_json.py
@@ -1,8 +1,10 @@
 import base64
+import struct
 from typing import Dict, List
 
 import cv2
-from pydantic import BaseModel
+import numpy as np
+from pydantic import BaseModel, Field
 
 from manga_translator import Context
 from manga_translator.utils import TextBlock
@@ -22,60 +24,83 @@
 #img_rendered: array
 #mask_raw: array
 #mask:array
-def to_json(ctx: Context):
+
+class TextColor(BaseModel):
+    fg: tuple[int, int, int]
+    bg: tuple[int, int, int]
+
+class Translation(BaseModel):
+    minX: int
+    minY: int
+    maxX: int
+    maxY: int
+    is_bulleted_list: bool
+    angle: float | int
+    prob: float
+    text_color: TextColor
+    text: dict[str, str]
+    background: np.ndarray = Field(
+        ...,
+        description="Background image encoded as a base64 string",
+        examples=["data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA..."]
+    )
+
+    class Config:
+        arbitrary_types_allowed = True
+        json_encoders = {
+            np.ndarray: lambda array: Translation.encode_background(array)
+        }
+
+    @staticmethod
+    def encode_background(array: np.ndarray) -> str:
+        retval, buffer = cv2.imencode('.png', array)
+        jpg_as_text = base64.b64encode(buffer).decode("utf-8")
+        background = f"data:image/png;base64,{jpg_as_text}"
+        return background
+
+    def to_bytes(self):
+        coords_bytes = struct.pack('4i', self.minX, self.minY, self.maxX, self.maxY)
+        is_bulleted_list_byte = struct.pack('?', self.is_bulleted_list)
+        angle_bytes = struct.pack('f', float(self.angle) if isinstance(self.angle, int) else self.angle)
+        prob_bytes = struct.pack('f', self.prob)
+        fg = struct.pack('3B', self.text_color.fg[0], self.text_color.fg[1], self.text_color.fg[2])
+        bg = struct.pack('3B', self.text_color.bg[0], self.text_color.bg[1], self.text_color.bg[2])
+        text_bytes = struct.pack('i', len(self.text.items()))
+        for key, value in self.text.items():
+            text_bytes += struct.pack('I', len(key.encode('utf-8'))) + key.encode('utf-8')
+            text_bytes += struct.pack('I', len(value.encode('utf-8'))) + value.encode('utf-8')
+        background_bytes = struct.pack('I', len(self.background.tobytes())) + self.background.tobytes()
+        return coords_bytes +is_bulleted_list_byte+ angle_bytes+prob_bytes+fg + bg + text_bytes + background_bytes
+
+class TranslationResponse(BaseModel):
+    translations: List[Translation]
+
+    def to_bytes(self):
+        items= [v.to_bytes() for v in self.translations]
+        return struct.pack('i', len(items)) + b''.join(items)
+
+def to_translation(ctx: Context) -> TranslationResponse:
     text_regions:list[TextBlock] = ctx.text_regions
     inpaint = ctx.img_inpainted
     translations:Dict[str, List[str]] = ctx.translations
     results = []
-    if 'overlay_ext' in ctx:
-        #todo: unreachable
-        overlay_ext = ctx['overlay_ext']
-    else:
-        overlay_ext = 'jpg'
     for i, blk in enumerate(text_regions):
         minX, minY, maxX, maxY = blk.xyxy
+        text_region = text_regions[i]
         if 'translations' in ctx:
             trans = {key: value[i] for key, value in translations.items()}
         else:
             trans = {}
-        trans["originalText"] = text_regions[i].text
-        if inpaint is not None:
-            overlay = inpaint[minY:maxY, minX:maxX]
-
-            retval, buffer = cv2.imencode('.' + overlay_ext, overlay)
-            jpg_as_text = base64.b64encode(buffer)
-            background = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8")
-        else:
-            background = None
-        text_region = text_regions[i]
+        trans[text_region.source_lang] = text_regions[i].text
         text_region.adjust_bg_color = False
         color1, color2 = text_region.get_font_colors()
-
-        results.append({
-            'text': trans,
-            'minX': int(minX),
-            'minY': int(minY),
-            'maxX': int(maxX),
-            'maxY': int(maxY),
-            'textColor': {
-                'fg': color1.tolist(),
-                'bg': color2.tolist()
-            },
-            'language': text_regions[i].source_lang,
-            'background': background
-        })
-    return results
-
-class TextColor(BaseModel):
-    fg: tuple[int, int, int]
-    bg: tuple[int, int, int]
-
-class Translation(BaseModel):
-    text: dict[str, str]
-    minX: int
-    minY: int
-    maxX: int
-    maxY: int
-    textColor:TextColor
-    language: str
-    background: str
+        results.append(Translation(text=trans,
+                    minX=int(minX),minY=int(minY),maxX=int(maxX),maxY=int(maxY),
+                    background=inpaint[minY:maxY, minX:maxX],
+                    is_bulleted_list=text_region.is_bulleted_list,
+                    text_color=TextColor(fg=color1.tolist(), bg=color2.tolist()),
+                    prob=text_region.prob,
+                    angle=text_region.angle
+        ))
+        #todo: background angle
+    return TranslationResponse(translations=results)

From cf7412e4f676927b83e2fb0eb4ef7940c1dc6262 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Fri, 22 Nov 2024 12:15:38 +0100
Subject: [PATCH 61/75] move files into examples

---
 {server => examples}/response.cpp | 0
 {server => examples}/response.rs  | 0
 server/main.py                    | 2 +-
 3 files changed, 1 insertion(+), 1 deletion(-)
 rename {server => examples}/response.cpp (100%)
 rename {server => examples}/response.rs (100%)

diff --git a/server/response.cpp b/examples/response.cpp
similarity index 100%
rename from server/response.cpp
rename to examples/response.cpp
diff --git a/server/response.rs b/examples/response.rs
similarity index 100%
rename from server/response.rs
rename to examples/response.rs
diff --git a/server/main.py b/server/main.py
index 44f7b77a3..3beef79c5 100644
--- a/server/main.py
+++ b/server/main.py
@@ -60,7 +60,7 @@ async def json(req: Request):
     ctx = await get_ctx(req)
     return to_translation(ctx)
 
-@app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure following the stream encoding, but with json first and then the image bytes as chunks")
+@app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure for decoding look at examples in 'examples/response.*'")
 async def bytes(req: Request):
     ctx = await get_ctx(req)
     return StreamingResponse(content=to_translation(ctx).to_bytes())

From aac997e00f3c69f4a3349d056666bf6c1d42a0f8 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Fri, 22 Nov 2024 15:44:01 +0100
Subject: [PATCH 62/75] fix docs

---
 server/index.html            |  2 +-
 server/main.py               | 98 +++++++++++++++++++++++-------------
 server/manual.html           |  2 +-
 server/request_extraction.py | 40 ++++-----------
 server/sent_data_internal.py |  1 -
 server/to_json.py            | 10 ++--
 6 files changed, 81 insertions(+), 72 deletions(-)

diff --git a/server/index.html b/server/index.html
index edf617609..fd1a49a11 100644
--- a/server/index.html
+++ b/server/index.html
@@ -357,7 +357,7 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
             }
             const uploadWithProgress = async (formData) => {
                 try {
-                    const response = await fetch(`${BASE_URI}translate/image/stream`, {
+                    const response = await fetch(`${BASE_URI}translate/with-form/image/stream`, {
                         method: 'POST',
                         body: formData,
                     });
diff --git a/server/main.py b/server/main.py
index 3beef79c5..5563f0ce5 100644
--- a/server/main.py
+++ b/server/main.py
@@ -7,16 +7,15 @@
 import sys
 from argparse import Namespace
 
-from fastapi import FastAPI, Request, HTTPException, Header, UploadFile
+from fastapi import FastAPI, Request, HTTPException, Header, UploadFile, File, Form
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse
+from fastapi.responses import StreamingResponse, HTMLResponse
 from pathlib import Path
 
-from pydantic import BaseModel
-
+from manga_translator import Config
 from server.instance import ExecutorInstance, executor_instances
 from server.myqueue import task_queue
-from server.request_extraction import get_ctx, while_streaming
+from server.request_extraction import get_ctx, while_streaming, TranslateRequest
 from server.to_json import to_translation, TranslationResponse
 
 app = FastAPI()
@@ -30,14 +29,7 @@
     allow_headers=["*"],
 )
 
-class TranslateRequestForm(BaseModel):
-    """This request can be a multipart or a json request"""
-    image: UploadFile
-    """can be a url, base64 encoded image or a multipart image"""
-    config: str
-    """in case it is a multipart this needs to be a string(json.stringify)"""
-
-@app.post("/register", response_description="no response")
+@app.post("/register", response_description="no response", tags=["internal-api"])
 async def register_instance(instance: ExecutorInstance, req: Request, req_nonce: str = Header(alias="X-Nonce")):
     if req_nonce != nonce:
         raise HTTPException(401, detail="Invalid nonce")
@@ -55,48 +47,85 @@ def transform_to_json(ctx):
 def transform_to_bytes(ctx):
     return to_translation(ctx).to_bytes()
 
-@app.post("/translate/json", response_model=TranslationResponse, response_description="json strucure inspired by the ichigo translator extension")
-async def json(req: Request):
-    ctx = await get_ctx(req)
+@app.post("/translate/json", response_model=TranslationResponse, tags=["api", "json"],response_description="json strucure inspired by the ichigo translator extension")
+async def json(req: Request, data: TranslateRequest):
+    ctx = await get_ctx(req, data.config, data.image)
+    return to_translation(ctx)
+
+@app.post("/translate/bytes", response_class=StreamingResponse, tags=["api", "json"],response_description="custom byte structure for decoding look at examples in 'examples/response.*'")
+async def bytes(req: Request, data: TranslateRequest):
+    ctx = await get_ctx(req, data.config, data.image)
+    return StreamingResponse(content=to_translation(ctx).to_bytes())
+
+@app.post("/translate/image", response_description="the result image", tags=["api", "json"],response_class=StreamingResponse)
+async def image(req: Request, data: TranslateRequest) -> StreamingResponse:
+    ctx = await get_ctx(req, data.config, data.image)
+    img_byte_arr = io.BytesIO()
+    ctx.result.save(img_byte_arr, format="PNG")
+    img_byte_arr.seek(0)
+
+    return StreamingResponse(img_byte_arr, media_type="image/png")
+
+@app.post("/translate/json/stream", response_class=StreamingResponse,tags=["api", "json"], response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_json(req: Request, data: TranslateRequest) -> StreamingResponse:
+    return await while_streaming(req, transform_to_json, data.config, data.image)
+
+@app.post("/translate/bytes/stream", response_class=StreamingResponse, tags=["api", "json"],response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_bytes(req: Request, data: TranslateRequest)-> StreamingResponse:
+    return await while_streaming(req, transform_to_bytes,data.config, data.image)
+
+@app.post("/translate/image/stream", response_class=StreamingResponse, tags=["api", "json"], response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_image(req: Request, data: TranslateRequest) -> StreamingResponse:
+    return await while_streaming(req, transform_to_image, data.config, data.image)
+
+@app.post("/translate/with-form/json", response_model=TranslationResponse, tags=["api", "form"],response_description="json strucure inspired by the ichigo translator extension")
+async def json_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")):
+    img = await image.read()
+    ctx = await get_ctx(req, Config.parse_raw(config), img)
     return to_translation(ctx)
 
-@app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure for decoding look at examples in 'examples/response.*'")
-async def bytes(req: Request):
-    ctx = await get_ctx(req)
+@app.post("/translate/with-form/bytes", response_class=StreamingResponse, tags=["api", "form"],response_description="custom byte structure for decoding look at examples in 'examples/response.*'")
+async def bytes_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")):
+    img = await image.read()
+    ctx = await get_ctx(req, Config.parse_raw(config), img)
     return StreamingResponse(content=to_translation(ctx).to_bytes())
 
-@app.post("/translate/image", response_description="the result image", response_class=StreamingResponse)
-async def image(req: Request) -> StreamingResponse:
-    ctx = await get_ctx(req)
+@app.post("/translate/with-form/image", response_description="the result image", tags=["api", "form"],response_class=StreamingResponse)
+async def image_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")) -> StreamingResponse:
+    img = await image.read()
+    ctx = await get_ctx(req, Config.parse_raw(config), img)
     img_byte_arr = io.BytesIO()
     ctx.result.save(img_byte_arr, format="PNG")
     img_byte_arr.seek(0)
 
     return StreamingResponse(img_byte_arr, media_type="image/png")
 
-@app.post("/translate/json/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
-async def stream_json(req: Request) -> StreamingResponse:
-    return await while_streaming(req, transform_to_json)
+@app.post("/translate/with-form/json/stream", response_class=StreamingResponse, tags=["api", "form"],response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_json_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")) -> StreamingResponse:
+    img = await image.read()
+    return await while_streaming(req, transform_to_json, Config.parse_raw(config), img)
 
-@app.post("/translate/bytes/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
-async def stream_bytes(req: Request)-> StreamingResponse:
-    return await while_streaming(req, transform_to_bytes)
+@app.post("/translate/with-form/bytes/stream", response_class=StreamingResponse,tags=["api", "form"], response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_bytes_form(req: Request, image: UploadFile = File(...), config: str = Form("{}"))-> StreamingResponse:
+    img = await image.read()
+    return await while_streaming(req, transform_to_bytes, Config.parse_raw(config), img)
 
-@app.post("/translate/image/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
-async def stream_image(req: Request) -> StreamingResponse:
-    return await while_streaming(req, transform_to_image)
+@app.post("/translate/with-form/image/stream", response_class=StreamingResponse, tags=["api", "form"], response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance")
+async def stream_image_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")) -> StreamingResponse:
+    img = await image.read()
+    return await while_streaming(req, transform_to_image, Config.parse_raw(config), img)
 
-@app.post("/queue-size", response_model=int)
+@app.post("/queue-size", response_model=int, tags=["api", "json"])
 async def queue_size() -> int:
     return len(task_queue.queue)
 
-@app.get("/", response_class=HTMLResponse)
+@app.get("/", response_class=HTMLResponse,tags=["ui"])
 async def index() -> HTMLResponse:
     html_file = Path("index.html")
     html_content = html_file.read_text()
     return HTMLResponse(content=html_content)
 
-@app.get("/manual", response_class=HTMLResponse)
+@app.get("/manual", response_class=HTMLResponse, tags=["ui"])
 async def manual():
     html_file = Path("manual.html")
     html_content = html_file.read_text()
@@ -153,7 +182,6 @@ def prepare(args):
 #todo: cache results
 #todo: cleanup cache
 
-#todo: add docs
 #todo: enable config in html pages
 
 if __name__ == '__main__':
diff --git a/server/manual.html b/server/manual.html
index 1335b0c51..2983feb64 100644
--- a/server/manual.html
+++ b/server/manual.html
@@ -72,7 +72,7 @@ <h3 id="status"></h3>
             formData.append('image', file);
 
             try {
-                const response = await fetch( generateImage.checked ? '/translate/image/stream' : '/translate/json/stream', {
+                const response = await fetch( generateImage.checked ? '/translate/with-form/image/stream' : '/translate/with-form/json/stream', {
                     method: 'POST',
                     body: formData,
                     /*headers: {
diff --git a/server/request_extraction.py b/server/request_extraction.py
index decb64bff..54516fa39 100644
--- a/server/request_extraction.py
+++ b/server/request_extraction.py
@@ -9,9 +9,9 @@
 from PIL import Image
 from fastapi import Request, HTTPException
 from pydantic import BaseModel
-from starlette.responses import StreamingResponse
+from fastapi.responses import StreamingResponse
 
-from manga_translator import Config, Context
+from manga_translator import Config
 from server.myqueue import task_queue, wait_in_queue, QueueElement
 from server.streaming import notify, stream
 
@@ -19,7 +19,7 @@ class TranslateRequest(BaseModel):
     """This request can be a multipart or a json request"""
     image: bytes|str
     """can be a url, base64 encoded image or a multipart image"""
-    config: Config
+    config: Config = Config()
     """in case it is a multipart this needs to be a string(json.stringify)"""
 
 async def to_pil_image(image: Union[str, bytes]) -> Image.Image:
@@ -41,39 +41,17 @@ async def to_pil_image(image: Union[str, bytes]) -> Image.Image:
         raise HTTPException(status_code=422, detail=str(e))
 
 
-async def multi_content_type(request: Request):
-    content_type = request.headers.get("content-type")
-    if content_type and content_type.startswith("multipart/form-data"):
-        form = await request.form()
-        config = form.get("config", "{}")
-        image = form.get("image")
-        image_content = await image.read()
-        config = Config.parse_raw(config)
-        return config, image_content
-    elif content_type and content_type.startswith("application/json"):
-        body = await request.json()
-        config = Config(**body.get("config", {}))
-        image = body.get("image")
-        return config, image
-
-    else:
-        raise HTTPException(status_code=400, detail="Unsupported Content-Type")
-
-async def get_ctx(req: Request):
-    data, img = await multi_content_type(req)
-
-    image = await to_pil_image(img)
-    task = QueueElement(req, image, data, 0)
+async def get_ctx(req: Request, config: Config, image: str|bytes):
+    image = await to_pil_image(image)
+    task = QueueElement(req, image, config, 0)
 
     task_queue.add_task(task)
 
     return await wait_in_queue(task, None)
 
-async def while_streaming(req: Request, transform):
-    data, img = await multi_content_type(req)
-
-    image = await to_pil_image(img)
-    task = QueueElement(req, image, data, 0)
+async def while_streaming(req: Request, transform, config: Config, image: bytes | str):
+    image = await to_pil_image(image)
+    task = QueueElement(req, image, config, 0)
     task_queue.add_task(task)
 
     messages = asyncio.Queue()
diff --git a/server/sent_data_internal.py b/server/sent_data_internal.py
index 5720cb6c5..99f8c3730 100644
--- a/server/sent_data_internal.py
+++ b/server/sent_data_internal.py
@@ -1,4 +1,3 @@
-import asyncio
 import pickle
 from typing import Mapping, Optional, Callable
 
diff --git a/server/to_json.py b/server/to_json.py
index f6bcdc6e2..bdf1d4cab 100644
--- a/server/to_json.py
+++ b/server/to_json.py
@@ -1,10 +1,10 @@
 import base64
 import struct
-from typing import Dict, List
+from typing import Dict, List, Annotated
 
 import cv2
 import numpy as np
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, WithJsonSchema
 
 from manga_translator import Context
 from manga_translator.utils import TextBlock
@@ -24,6 +24,10 @@
 #img_rendered: array
 #mask_raw: array
 #mask:array
+NumpyNdarray = Annotated[
+    np.ndarray,
+    WithJsonSchema({'type': 'string', "format": "base64","examples": ["data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA..."]}),
+]
 
 class TextColor(BaseModel):
     fg: tuple[int, int, int]
@@ -39,7 +43,7 @@ class Translation(BaseModel):
     prob: float
     text_color: TextColor
     text: dict[str, str]
-    background: np.ndarray = Field(
+    background: NumpyNdarray = Field(
         ...,
         description="Background image encoded as a base64 string",
         examples=["data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA..."]

From 589e0370bf360e82777b46b69a8bb00dad5245c7 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Fri, 22 Nov 2024 16:08:02 +0100
Subject: [PATCH 63/75] configs

---
 server/index.html  | 43 +++++++++++++++++++++++++++----------------
 server/main.py     |  2 --
 server/manual.html |  9 +++++++++
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/server/index.html b/server/index.html
index fd1a49a11..177885d79 100644
--- a/server/index.html
+++ b/server/index.html
@@ -33,10 +33,10 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
                     <div class="relative">
                         <select class="w-9ch appearance-none bg-transparent border-b border-gray-300"
                                 v-model="detectionResolution">
-                            <option value="S">1024px</option>
-                            <option value="M">1536px</option>
-                            <option value="L">2048px</option>
-                            <option value="X">2560px</option>
+                            <option value="1024">1024px</option>
+                            <option value="1536">1536px</option>
+                            <option value="2048">2048px</option>
+                            <option value="2560">2560px</option>
                         </select>
                         <i class="iconify absolute top-1.5 right-1 pointer-events-none"
                            data-icon="carbon:chevron-down"></i>
@@ -47,7 +47,7 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
                     <div class="relative">
                         <select class="w-9ch appearance-none bg-transparent border-b border-gray-300"
                                 v-model="textDetector">
-                            <option value="auto">Default</option>
+                            <option value="default">Default</option>
                             <option value="ctd">CTD</option>
                         </select>
                         <i class="iconify absolute top-1.5 right-1 pointer-events-none"
@@ -60,8 +60,8 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
                         <select class="w-12ch appearance-none bg-transparent border-b border-gray-300"
                                 v-model="renderTextDirection">
                             <option value="auto">Auto</option>
-                            <option value="h">Horizontal</option>
-                            <option value="v">Vertical</option>
+                            <option value="horizontal">Horizontal</option>
+                            <option value="vertical">Vertical</option>
                         </select>
                         <i class="iconify absolute top-1.5 right-1 pointer-events-none"
                            data-icon="carbon:chevron-down"></i>
@@ -199,13 +199,13 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
         get fileUri() {
             return this.file ? URL.createObjectURL(this.file) : null
         },
-        detectionResolution: 'M',
-        textDetector: 'auto',
+        detectionResolution: '1536',
+        textDetector: 'default',
         renderTextDirection: 'auto',
         translator: 'youdao',
-        validTranslators: ['youdao', 'baidu', 'google', 'deepl', 'papago', 'caiyun', 'offline', 'gpt3.5', 'none'],
+        validTranslators: ['youdao', 'baidu', 'deepl', 'papago', 'caiyun', 'offline', 'gpt3.5', 'none'],
         getTranslatorName(key) {
-            if (key == 'none')
+            if (key === 'none')
                 return "No Text"
             return key ? key[0].toUpperCase() + key.slice(1) : "";
         },
@@ -307,11 +307,22 @@ <h1 class="text-center text-lg font-light">Image/Manga Translator</h1>
             const formData = new FormData()
             formData.append('image', this.file)
 
-            /*formData.append('size', this.detectionResolution)
-            formData.append('detector', this.textDetector)
-            formData.append('direction', this.renderTextDirection)
-            formData.append('translator', this.translator)
-            formData.append('target_lang', this.targetLanguage)*/
+            const config = `{
+                "detector": {
+                    "detector": "${this.textDetector}",
+                    "detection_size": ${this.detectionResolution}
+                },
+                "render": {
+                    "direction": "${this.renderTextDirection}"
+                },
+                "translator": {
+                    "translator": "${this.translator}",
+                    "target_lang": "${this.targetLanguage}"
+                }
+            }`;
+
+            formData.append('config', config)
+
 
             const processChunk = (value) => {
                 if (this.error) return;
diff --git a/server/main.py b/server/main.py
index 5563f0ce5..20d3d5833 100644
--- a/server/main.py
+++ b/server/main.py
@@ -182,8 +182,6 @@ def prepare(args):
 #todo: cache results
 #todo: cleanup cache
 
-#todo: enable config in html pages
-
 if __name__ == '__main__':
     import uvicorn
     from args import parse_arguments
diff --git a/server/manual.html b/server/manual.html
index 2983feb64..45c801070 100644
--- a/server/manual.html
+++ b/server/manual.html
@@ -15,6 +15,8 @@
     <h1>Upload Image and Translate</h1>
     <input type="file" id="fileInput" accept="image/*">
     <label for="generate-image">generate image</label><input type="checkbox" id="generate-image">
+    <label for="config-json">Config JSON</label><br>
+    <textarea id="config-json" rows="10" cols="50" placeholder='{"translator": {"translator": "none"}}'></textarea><br>
     <h3 id="error"></h3>
     <h3 id="status"></h3>
     <button id="submit-button" onclick="uploadAndTranslate()">Upload and Translate</button>
@@ -55,10 +57,12 @@ <h3 id="status"></h3>
             let submitButton = document.getElementById("submit-button")
             const fileInput = document.getElementById('fileInput');
             const generateImage = document.getElementById('generate-image');
+            const configField = document.getElementById('config-json');
 
             submitButton.classList.add("hidden");
             fileInput.classList.add("hidden");
             generateImage.classList.add("hidden");
+            configField.classList.add("hidden");
             statusField.innerHTML = '';
             errorField.innerHTML = '';
             const file = fileInput.files[0];
@@ -70,6 +74,10 @@ <h3 id="status"></h3>
 
             const formData = new FormData();
             formData.append('image', file);
+            const text = configField.innerText;
+            if (text.length > 2) {
+                formData.append('config', text)
+            }
 
             try {
                 const response = await fetch( generateImage.checked ? '/translate/with-form/image/stream' : '/translate/with-form/json/stream', {
@@ -94,6 +102,7 @@ <h3 id="status"></h3>
                 submitButton.classList.remove("hidden");
                 fileInput.classList.remove("hidden");
                 generateImage.classList.remove("hidden");
+                configField.classList.remove("hidden");
             }
         }
 

From 6c6a4b558ae583437a34a1cb8ffc96b39dd9102a Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sat, 23 Nov 2024 13:10:34 +0100
Subject: [PATCH 64/75] delete old code & move file

---
 manga_translator/__main__.py         | 12 +-----------
 manga_translator/args.py             | 12 ------------
 manga_translator/mode/__init__.py    |  0
 manga_translator/{ => mode}/share.py |  0
 4 files changed, 1 insertion(+), 23 deletions(-)
 create mode 100644 manga_translator/mode/__init__.py
 rename manga_translator/{ => mode}/share.py (100%)

diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py
index eb348798f..dca4740a7 100644
--- a/manga_translator/__main__.py
+++ b/manga_translator/__main__.py
@@ -3,7 +3,6 @@
 import logging
 from argparse import Namespace
 
-from manga_translator.share import MangaShare
 from .manga_translator import (
     set_main_logger, load_dictionary, apply_dictionary,
 )
@@ -65,26 +64,17 @@ async def dispatch(args: Namespace):
                 except Exception :
                     pass
 
-    elif args.mode == 'web':
-        from .server.web_main import dispatch
-        await dispatch(args.host, args.port, translation_params=args_dict)
-
-    elif args.mode == 'web_client':
-        from manga_translator.mode.web import MangaTranslatorWeb
-        translator = MangaTranslatorWeb(args_dict)
-        await translator.listen(args_dict)
-
     elif args.mode == 'ws':
         from manga_translator.mode.ws import MangaTranslatorWS
         translator = MangaTranslatorWS(args_dict)
         await translator.listen(args_dict)
 
     elif args.mode == 'shared':
+        from manga_translator.mode.share import MangaShare
         translator = MangaShare(args_dict)
         await translator.listen(args_dict)
 
 
-
 if __name__ == '__main__':
     args = None
     init_logging()
diff --git a/manga_translator/args.py b/manga_translator/args.py
index 562059ad6..472311c6a 100644
--- a/manga_translator/args.py
+++ b/manga_translator/args.py
@@ -107,18 +107,6 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
 parser_batch.add_argument('--save-quality', default=100, type=int, help='Quality of saved JPEG image, range from 0 to 100 with 100 being best')
 parser.add_argument('--config-file', default=None, type=str, help='path to the config file')
 
-# Web mode
-parser_web = subparsers.add_parser('web', help='Run in web service mode')
-parser_web.add_argument('--host', default='127.0.0.1', type=str, help='Host for web service')
-parser_web.add_argument('--port', default=5003, type=int, help='Port for web service')
-parser_web.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal web server communication')
-
-# Web client mode
-parser_web_client = subparsers.add_parser('web_client', help='Run in web client mode')
-parser_web_client.add_argument('--host', default='127.0.0.1', type=str, help='Host for web client')
-parser_web_client.add_argument('--port', default=5003, type=int, help='Port for web client')
-parser_web_client.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal web client communication')
-
 # WebSocket mode
 parser_ws = subparsers.add_parser('ws', help='Run in WebSocket mode')
 parser_ws.add_argument('--host', default='127.0.0.1', type=str, help='Host for WebSocket service')
diff --git a/manga_translator/mode/__init__.py b/manga_translator/mode/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/manga_translator/share.py b/manga_translator/mode/share.py
similarity index 100%
rename from manga_translator/share.py
rename to manga_translator/mode/share.py

From cb98b6dafa5d05d40f0c6bf2f675cc4a34705d63 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sat, 23 Nov 2024 13:17:41 +0100
Subject: [PATCH 65/75] gpt 3.5 fix

---
 manga_translator/config.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/manga_translator/config.py b/manga_translator/config.py
index adfe367bd..17e51111e 100644
--- a/manga_translator/config.py
+++ b/manga_translator/config.py
@@ -22,7 +22,10 @@ def __init__(self, string: str):
         self.target_lang = None
         for g in string.split(';'):
             trans, lang = g.split(':')
-            translator = Translator[trans]
+            if trans == "gpt3.5":
+                translator = Translator["gpt3_5"]
+            else:
+                translator = Translator[trans]
             if translator not in TRANSLATORS:
                 raise ValueError(f'Invalid choice: %s (choose from %s)' % (trans, ', '.join(map(repr, TRANSLATORS))))
             if lang not in VALID_LANGUAGES:

From e4062281696f2c570becb9e6ecc486f591de49d3 Mon Sep 17 00:00:00 2001
From: zyddnys <zyddnys@outlook.com>
Date: Fri, 29 Nov 2024 21:55:50 -0500
Subject: [PATCH 66/75] fix #703

---
 manga_translator/rendering/text_render.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/manga_translator/rendering/text_render.py b/manga_translator/rendering/text_render.py
index a8be534e7..da6301a9c 100644
--- a/manga_translator/rendering/text_render.py
+++ b/manga_translator/rendering/text_render.py
@@ -346,10 +346,7 @@ def put_text_vertical(font_size: int, text: str, h: int, alignment: str, fg: Tup
     canvas_border = np.clip(canvas_border, 0, 255)
     line_box = add_color(canvas_text, fg, canvas_border, bg)
     # rect
-    if bg is None :
-        x, y, w, h = cv2.boundingRect(canvas_text)
-    else :
-        x, y, w, h = cv2.boundingRect(canvas_border)
+    x, y, w, h = cv2.boundingRect(canvas_border)
     return line_box[y:y+h, x:x+w]
 
 def select_hyphenator(lang: str):

From 1fc572cd21d541a87511d43ad99bd5223a8bb25e Mon Sep 17 00:00:00 2001
From: zyddnys <zyddnys@outlook.com>
Date: Fri, 29 Nov 2024 21:57:38 -0500
Subject: [PATCH 67/75] fix

---
 manga_translator/rendering/text_render.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/manga_translator/rendering/text_render.py b/manga_translator/rendering/text_render.py
index da6301a9c..64eebe632 100644
--- a/manga_translator/rendering/text_render.py
+++ b/manga_translator/rendering/text_render.py
@@ -733,12 +733,8 @@ def put_text_horizontal(font_size: int, text: str, width: int, height: int, alig
     canvas_border = np.clip(canvas_border, 0, 255)
     line_box = add_color(canvas_text, fg, canvas_border, bg)
 
-    # rect
-    if bg is None :
-        x, y, w, h = cv2.boundingRect(canvas_text)
-    else :
-        x, y, w, h = cv2.boundingRect(canvas_border)
-    return line_box[y:y+height, x:x+width]
+    x, y, w, h = cv2.boundingRect(canvas_border)
+    return line_box[y:y+h, x:x+w]
 
 # def put_text(img: np.ndarray, text: str, line_count: int, x: int, y: int, w: int, h: int, fg: Tuple[int, int, int], bg: Optional[Tuple[int, int, int]]):
 #     pass

From 260dd0afbce95fa36b9cc0461dfb7e7f3e04e3c0 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sat, 30 Nov 2024 14:16:12 +0100
Subject: [PATCH 68/75] some docs

---
 README.md                    | 819 +++++++++++++++++++++++------------
 README_CN.md                 | 139 +-----
 examples/config-example.json |  61 +++
 manga_translator/__main__.py |   6 +
 manga_translator/args.py     |   2 +
 5 files changed, 630 insertions(+), 397 deletions(-)
 create mode 100644 examples/config-example.json

diff --git a/README.md b/README.md
index 50f7bf7aa..989316cba 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,7 @@ Some manga/images will never be translated, therefore this project is born.
     - [Options](#options)
     - [Language Code Reference](#language-code-reference)
     - [Translators Reference](#translators-reference)
+    - [Config Documentation](#config-file)
     - [GPT Config Reference](#gpt-config-reference)
     - [Using Gimp for rendering](#using-gimp-for-rendering)
     - [Api Documentation](#api-documentation)
@@ -270,43 +271,22 @@ make run-web-server
 
 ## Usage
 
-### Batch mode (default)
+### Local mode
 
 ```bash
-# use `--use-gpu` for speedup if you have a compatible NVIDIA GPU.
-# use `--target-lang <language_code>` to specify a target language.
-# use `--inpainter=none` to disable inpainting.
-# use `--translator=none` if you only want to use inpainting (blank bubbles)
 # replace <path> with the path to the image folder or file.
-$ python -m manga_translator -v --translator=google -l ENG -i <path>
+$ python -m manga_translator local -v -i <path>
 # results can be found under `<path_to_image_folder>-translated`.
 ```
 
-### Demo mode
-
-```bash
-# saves singular image into /result folder for demonstration purposes
-# use `--mode demo` to enable demo translation.
-# replace <path> with the path to the image file.
-$ python -m manga_translator --mode demo -v --translator=google -l ENG -i <path>
-# result can be found in `result/`.
-```
-
 ### Web Mode
 
 ```bash
 # use `--mode web` to start a web server.
-$ python -m manga_translator -v --mode web --use-gpu
+$ cd server && python main.py --use-gpu
 # the demo will be serving on http://127.0.0.1:5003
 ```
 
-### Api Mode
-
-```bash
-# use `--mode web` to start a web server.
-$ python -m manga_translator -v --mode api --use-gpu
-# the demo will be serving on http://127.0.0.1:5003
-```
 ## Related Projects
 GUI implementation: [BallonsTranslator](https://github.com/dmMaze/BallonsTranslator)
 
@@ -318,7 +298,7 @@ Detector:
 - JPN: ??
 - CHS: ??
 - KOR: ??
-- Using `--detector ctd` can increase the amount of text lines detected
+- Using `{"detector":{"detector": "ctd"}}` can increase the amount of text lines detected
 
 
 OCR:
@@ -352,139 +332,26 @@ Colorizer: **mc2**
 ### Options
 
 ```text
--h, --help                                   show this help message and exit
--m, --mode {demo,batch,web,web_client,ws,api}
-                                             Run demo in single image demo mode (demo), batch
-                                             translation mode (batch), web service mode (web)
--i, --input INPUT [INPUT ...]                Path to an image file if using demo mode, or path to an
-                                             image folder if using batch mode
--o, --dest DEST                              Path to the destination folder for translated images in
-                                             batch mode
--l, --target-lang {CHS,CHT,CSY,NLD,ENG,FRA,DEU,HUN,ITA,JPN,KOR,PLK,PTB,ROM,RUS,ESP,TRK,UKR,VIN,ARA,CNR,SRP,HRV,THA,IND,FIL}
-                                             Destination language
--v, --verbose                                Print debug info and save intermediate images in result
-                                             folder
--f, --format {png,webp,jpg,xcf,psd,pdf}      Output format of the translation.
---attempts ATTEMPTS                          Retry attempts on encountered error. -1 means infinite
-                                             times.
---ignore-errors                              Skip image on encountered error.
---overwrite                                  Overwrite already translated images in batch mode.
---skip-no-text                               Skip image without text (Will not be saved).
---model-dir MODEL_DIR                        Model directory (by default ./models in project root)
---use-gpu                                   Turn on/off gpu
---use-gpu-limited                           Turn on/off gpu (excluding offline translator)
---detector {default,ctd,craft,none}          Text detector used for creating a text mask from an
-                                             image, DO NOT use craft for manga, it's not designed
-                                             for it
---ocr {32px,48px,48px_ctc,mocr}              Optical character recognition (OCR) model to use
---use-mocr-merge                             Use bbox merge when Manga OCR inference.
---inpainter {default,lama_large,lama_mpe,sd,none,original}
-                                             Inpainting model to use
---upscaler {waifu2x,esrgan,4xultrasharp}     Upscaler to use. --upscale-ratio has to be set for it
-                                             to take effect
---upscale-ratio UPSCALE_RATIO                Image upscale ratio applied before detection. Can
-                                             improve text detection.
---colorizer {mc2}                            Colorization model to use.
---translator {google,youdao,baidu,deepl,papago,caiyun,gpt3,gpt3.5,gpt4,none,original,offline,nllb,nllb_big,sugoi,jparacrawl,jparacrawl_big,m2m100,m2m100_big,sakura}
-                                             Language translator to use
---translator-chain TRANSLATOR_CHAIN          Output of one translator goes in another. Example:
-                                             --translator-chain "google:JPN;sugoi:ENG".
---selective-translation SELECTIVE_TRANSLATION
-                                             Select a translator based on detected language in
-                                             image. Note the first translation service acts as
-                                             default if the language isn't defined. Example:
-                                             --translator-chain "google:JPN;sugoi:ENG".
---revert-upscaling                           Downscales the previously upscaled image after
-                                             translation back to original size (Use with --upscale-
-                                             ratio).
---detection-size DETECTION_SIZE              Size of image used for detection
---det-rotate                                 Rotate the image for detection. Might improve
-                                             detection.
---det-auto-rotate                            Rotate the image for detection to prefer vertical
-                                             textlines. Might improve detection.
---det-invert                                 Invert the image colors for detection. Might improve
-                                             detection.
---det-gamma-correct                          Applies gamma correction for detection. Might improve
-                                             detection.
---unclip-ratio UNCLIP_RATIO                  How much to extend text skeleton to form bounding box
---box-threshold BOX_THRESHOLD                Threshold for bbox generation
---text-threshold TEXT_THRESHOLD              Threshold for text detection
---min-text-length MIN_TEXT_LENGTH            Minimum text length of a text region
---no-text-lang-skip                          Dont skip text that is seemingly already in the target
-                                             language.
---inpainting-size INPAINTING_SIZE            Size of image used for inpainting (too large will
-                                             result in OOM)
---inpainting-precision {fp32,fp16,bf16}      Inpainting precision for lama, use bf16 while you can.
---colorization-size COLORIZATION_SIZE        Size of image used for colorization. Set to -1 to use
-                                             full image size
---denoise-sigma DENOISE_SIGMA                Used by colorizer and affects color strength, range
-                                             from 0 to 255 (default 30). -1 turns it off.
---mask-dilation-offset MASK_DILATION_OFFSET  By how much to extend the text mask to remove left-over
-                                             text pixels of the original image.
---font-size FONT_SIZE                        Use fixed font size for rendering
---font-size-offset FONT_SIZE_OFFSET          Offset font size by a given amount, positive number
-                                             increase font size and vice versa
---font-size-minimum FONT_SIZE_MINIMUM        Minimum output font size. Default is
-                                             image_sides_sum/200
---font-color FONT_COLOR                      Overwrite the text fg/bg color detected by the OCR
-                                             model. Use hex string without the "#" such as FFFFFF
-                                             for a white foreground or FFFFFF:000000 to also have a
-                                             black background around the text.
---line-spacing LINE_SPACING                  Line spacing is font_size * this value. Default is 0.01
-                                             for horizontal text and 0.2 for vertical.
---force-horizontal                           Force text to be rendered horizontally
---force-vertical                             Force text to be rendered vertically
---align-left                                 Align rendered text left
---align-center                               Align rendered text centered
---align-right                                Align rendered text right
---uppercase                                  Change text to uppercase
---lowercase                                  Change text to lowercase
---no-hyphenation                             If renderer should be splitting up words using a hyphen
-                                             character (-)
---manga2eng                                  Render english text translated from manga with some
-                                             additional typesetting. Ignores some other argument
-                                             options
---gpt-config GPT_CONFIG                      Path to GPT config file, more info in README
---use-mtpe                                   Turn on/off machine translation post editing (MTPE) on
-                                             the command line (works only on linux right now)
---save-text                                  Save extracted text and translations into a text file.
---save-text-file SAVE_TEXT_FILE              Like --save-text but with a specified file path.
---filter-text FILTER_TEXT                    Filter regions by their text with a regex. Example
-                                             usage: --text-filter ".*badtext.*"
---pre-dict FILE_PATH                         Path to the pre-translation dictionary file. One entry per line,
-                                             Comments can be added with `#` and `//`.
-                                             usage: //Example
-                                                    dog cat #Example
-                                                    abc def
-                                                    abc
---post-dict FILE_PATH                        Path to the post-translation dictionary file. Same as above.
---skip-lang                                  Skip translation if source image is one of the provide languages, 
-                                             use comma to separate multiple languages. Example: JPN,ENG
---prep-manual                                Prepare for manual typesetting by outputting blank,
-                                             inpainted images, plus copies of the original for
-                                             reference
---font-path FONT_PATH                        Path to font file
---gimp-font GIMP_FONT                        Font family to use for gimp rendering.
---host HOST                                  Used by web module to decide which host to attach to
---port PORT                                  Used by web module to decide which port to attach to
---nonce NONCE                                Used by web module as secret for securing internal web
-                                             server communication
---ws-url WS_URL                              Server URL for WebSocket mode
---save-quality SAVE_QUALITY                  Quality of saved JPEG image, range from 0 to 100 with
-                                             100 being best
---ignore-bubble IGNORE_BUBBLE                The threshold for ignoring text in non bubble areas,
-                                             with valid values ranging from 1 to 50, does not ignore
-                                             others. Recommendation 5 to 10. If it is too low,
-                                             normal bubble areas may be ignored, and if it is too
-                                             large, non bubble areas may be considered normal
-                                             bubbles
+-h, --help                     show this help message and exit
+-v, --verbose                  Print debug info and save intermediate images in result folder
+--attempts ATTEMPTS            Retry attempts on encountered error. -1 means infinite times.
+--ignore-errors                Skip image on encountered error.
+--model-dir MODEL_DIR          Model directory (by default ./models in project root)
+--use-gpu                      Turn on/off gpu (auto switch between mps and cuda)
+--use-gpu-limited              Turn on/off gpu (excluding offline translator)
+--font-path FONT_PATH          Path to font file
+--pre-dict PRE_DICT            Path to the pre-translation dictionary file
+--post-dict POST_DICT          Path to the post-translation dictionary file
+--kernel-size KERNEL_SIZE      Set the convolution kernel size of the text erasure area to
+                               completely clean up text residues
+--config-file CONFIG_FILE      path to the config file
 ```
 
 <!-- Auto generated end -->
 
 ### Language Code Reference
 
-Used by the `--target-lang` or `-l` argument.
+Used by the `translator/language` in the config
 
 ```yaml
 CHS: Chinese (Simplified)
@@ -546,7 +413,535 @@ DEEPL_AUTH_KEY=xxxxxxxx...
 - Offline: Whether the translator can be used offline.
 
 - Sugoi is created by mingshiba, please support him in https://www.patreon.com/mingshiba
+### Config file
+run `python -m manga_translator config-help >> config-info.json` 
+
+an example can be found in example/config-example.json
+```json
+{
+  "$defs": {
+    "Alignment": {
+      "enum": [
+        "auto",
+        "left",
+        "center",
+        "right"
+      ],
+      "title": "Alignment",
+      "type": "string"
+    },
+    "Colorizer": {
+      "enum": [
+        "none",
+        "mc2"
+      ],
+      "title": "Colorizer",
+      "type": "string"
+    },
+    "ColorizerConfig": {
+      "properties": {
+        "colorization_size": {
+          "default": 576,
+          "title": "Colorization Size",
+          "type": "integer"
+        },
+        "denoise_sigma": {
+          "default": 30,
+          "title": "Denoise Sigma",
+          "type": "integer"
+        },
+        "colorizer": {
+          "$ref": "#/$defs/Colorizer",
+          "default": "none"
+        }
+      },
+      "title": "ColorizerConfig",
+      "type": "object"
+    },
+    "Detector": {
+      "enum": [
+        "default",
+        "dbconvnext",
+        "ctd",
+        "craft",
+        "none"
+      ],
+      "title": "Detector",
+      "type": "string"
+    },
+    "DetectorConfig": {
+      "properties": {
+        "detector": {
+          "$ref": "#/$defs/Detector",
+          "default": "default"
+        },
+        "detection_size": {
+          "default": 1536,
+          "title": "Detection Size",
+          "type": "integer"
+        },
+        "text_threshold": {
+          "default": 0.5,
+          "title": "Text Threshold",
+          "type": "number"
+        },
+        "det_rotate": {
+          "default": false,
+          "title": "Det Rotate",
+          "type": "boolean"
+        },
+        "det_auto_rotate": {
+          "default": false,
+          "title": "Det Auto Rotate",
+          "type": "boolean"
+        },
+        "det_invert": {
+          "default": false,
+          "title": "Det Invert",
+          "type": "boolean"
+        },
+        "det_gamma_correct": {
+          "default": false,
+          "title": "Det Gamma Correct",
+          "type": "boolean"
+        },
+        "box_threshold": {
+          "default": 0.7,
+          "title": "Box Threshold",
+          "type": "number"
+        },
+        "unclip_ratio": {
+          "default": 2.3,
+          "title": "Unclip Ratio",
+          "type": "number"
+        }
+      },
+      "title": "DetectorConfig",
+      "type": "object"
+    },
+    "Direction": {
+      "enum": [
+        "auto",
+        "horizontal",
+        "vertical"
+      ],
+      "title": "Direction",
+      "type": "string"
+    },
+    "InpaintPrecision": {
+      "enum": [
+        "fp32",
+        "fp16",
+        "bf16"
+      ],
+      "title": "InpaintPrecision",
+      "type": "string"
+    },
+    "Inpainter": {
+      "enum": [
+        "default",
+        "lama_large",
+        "lama_mpe",
+        "sd",
+        "none",
+        "original"
+      ],
+      "title": "Inpainter",
+      "type": "string"
+    },
+    "InpainterConfig": {
+      "properties": {
+        "inpainter": {
+          "$ref": "#/$defs/Inpainter",
+          "default": "none"
+        },
+        "inpainting_size": {
+          "default": 2048,
+          "title": "Inpainting Size",
+          "type": "integer"
+        },
+        "inpainting_precision": {
+          "$ref": "#/$defs/InpaintPrecision",
+          "default": "fp32"
+        }
+      },
+      "title": "InpainterConfig",
+      "type": "object"
+    },
+    "Ocr": {
+      "enum": [
+        "32px",
+        "48px",
+        "48px_ctc",
+        "mocr"
+      ],
+      "title": "Ocr",
+      "type": "string"
+    },
+    "OcrConfig": {
+      "properties": {
+        "use_mocr_merge": {
+          "default": false,
+          "title": "Use Mocr Merge",
+          "type": "boolean"
+        },
+        "ocr": {
+          "$ref": "#/$defs/Ocr",
+          "default": "48px"
+        },
+        "min_text_length": {
+          "default": 0,
+          "title": "Min Text Length",
+          "type": "integer"
+        },
+        "ignore_bubble": {
+          "default": 0,
+          "title": "Ignore Bubble",
+          "type": "integer"
+        }
+      },
+      "title": "OcrConfig",
+      "type": "object"
+    },
+    "RenderConfig": {
+      "properties": {
+        "renderer": {
+          "$ref": "#/$defs/Renderer",
+          "default": "default"
+        },
+        "alignment": {
+          "$ref": "#/$defs/Alignment",
+          "default": "auto"
+        },
+        "disable_font_border": {
+          "default": false,
+          "title": "Disable Font Border",
+          "type": "boolean"
+        },
+        "font_size_offset": {
+          "default": 0,
+          "title": "Font Size Offset",
+          "type": "integer"
+        },
+        "font_size_minimum": {
+          "default": -1,
+          "title": "Font Size Minimum",
+          "type": "integer"
+        },
+        "direction": {
+          "$ref": "#/$defs/Direction",
+          "default": "auto"
+        },
+        "uppercase": {
+          "default": false,
+          "title": "Uppercase",
+          "type": "boolean"
+        },
+        "lowercase": {
+          "default": false,
+          "title": "Lowercase",
+          "type": "boolean"
+        },
+        "gimp_font": {
+          "default": "Sans-serif",
+          "title": "Gimp Font",
+          "type": "string"
+        },
+        "no_hyphenation": {
+          "default": false,
+          "title": "No Hyphenation",
+          "type": "boolean"
+        },
+        "font_color": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Font Color"
+        },
+        "line_spacing": {
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Line Spacing"
+        },
+        "font_size": {
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Font Size"
+        }
+      },
+      "title": "RenderConfig",
+      "type": "object"
+    },
+    "Renderer": {
+      "enum": [
+        "default",
+        "manga2eng",
+        "none"
+      ],
+      "title": "Renderer",
+      "type": "string"
+    },
+    "Translator": {
+      "enum": [
+        "youdao",
+        "baidu",
+        "deepl",
+        "papago",
+        "caiyun",
+        "gpt3",
+        "gpt3.5",
+        "gpt4",
+        "none",
+        "original",
+        "sakura",
+        "deepseek",
+        "groq",
+        "offline",
+        "nllb",
+        "nllb_big",
+        "sugoi",
+        "jparacrawl",
+        "jparacrawl_big",
+        "m2m100",
+        "m2m100_big",
+        "mbart50",
+        "qwen2",
+        "qwen2_big"
+      ],
+      "title": "Translator",
+      "type": "string"
+    },
+    "TranslatorConfig": {
+      "properties": {
+        "translator": {
+          "$ref": "#/$defs/Translator",
+          "default": "sugoi"
+        },
+        "target_lang": {
+          "default": "ENG",
+          "title": "Target Lang",
+          "type": "string"
+        },
+        "no_text_lang_skip": {
+          "default": false,
+          "title": "No Text Lang Skip",
+          "type": "boolean"
+        },
+        "skip_lang": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Skip Lang"
+        },
+        "gpt_config": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Gpt Config"
+        },
+        "translator_chain": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Translator Chain"
+        },
+        "selective_translation": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Selective Translation"
+        }
+      },
+      "title": "TranslatorConfig",
+      "type": "object"
+    },
+    "UpscaleConfig": {
+      "properties": {
+        "upscaler": {
+          "$ref": "#/$defs/Upscaler",
+          "default": "esrgan"
+        },
+        "revert_upscaling": {
+          "default": false,
+          "title": "Revert Upscaling",
+          "type": "boolean"
+        },
+        "upscale_ratio": {
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Upscale Ratio"
+        }
+      },
+      "title": "UpscaleConfig",
+      "type": "object"
+    },
+    "Upscaler": {
+      "enum": [
+        "waifu2x",
+        "esrgan",
+        "4xultrasharp"
+      ],
+      "title": "Upscaler",
+      "type": "string"
+    }
+  },
+  "properties": {
+    "filter_text": {
+      "anyOf": [
+        {
+          "type": "string"
+        },
+        {
+          "type": "null"
+        }
+      ],
+      "default": null,
+      "title": "Filter Text"
+    },
+    "render": {
+      "$ref": "#/$defs/RenderConfig",
+      "default": {
+        "renderer": "default",
+        "alignment": "auto",
+        "disable_font_border": false,
+        "font_size_offset": 0,
+        "font_size_minimum": -1,
+        "direction": "auto",
+        "uppercase": false,
+        "lowercase": false,
+        "gimp_font": "Sans-serif",
+        "no_hyphenation": false,
+        "font_color": null,
+        "line_spacing": null,
+        "font_size": null
+      }
+    },
+    "upscale": {
+      "$ref": "#/$defs/UpscaleConfig",
+      "default": {
+        "upscaler": "esrgan",
+        "revert_upscaling": false,
+        "upscale_ratio": null
+      }
+    },
+    "translator": {
+      "$ref": "#/$defs/TranslatorConfig",
+      "default": {
+        "translator": "sugoi",
+        "target_lang": "ENG",
+        "no_text_lang_skip": false,
+        "skip_lang": null,
+        "gpt_config": null,
+        "translator_chain": null,
+        "selective_translation": null
+      }
+    },
+    "detector": {
+      "$ref": "#/$defs/DetectorConfig",
+      "default": {
+        "detector": "default",
+        "detection_size": 1536,
+        "text_threshold": 0.5,
+        "det_rotate": false,
+        "det_auto_rotate": false,
+        "det_invert": false,
+        "det_gamma_correct": false,
+        "box_threshold": 0.7,
+        "unclip_ratio": 2.3
+      }
+    },
+    "colorizer": {
+      "$ref": "#/$defs/ColorizerConfig",
+      "default": {
+        "colorization_size": 576,
+        "denoise_sigma": 30,
+        "colorizer": "none"
+      }
+    },
+    "inpainter": {
+      "$ref": "#/$defs/InpainterConfig",
+      "default": {
+        "inpainter": "none",
+        "inpainting_size": 2048,
+        "inpainting_precision": "fp32"
+      }
+    },
+    "ocr": {
+      "$ref": "#/$defs/OcrConfig",
+      "default": {
+        "use_mocr_merge": false,
+        "ocr": "48px",
+        "min_text_length": 0,
+        "ignore_bubble": 0
+      }
+    },
+    "kernel_size": {
+      "default": 3,
+      "title": "Kernel Size",
+      "type": "integer"
+    },
+    "mask_dilation_offset": {
+      "default": 0,
+      "title": "Mask Dilation Offset",
+      "type": "integer"
+    }
+  },
+  "title": "Config",
+  "type": "object"
+}
 
+```
 ### GPT Config Reference
 
 Used by the `--gpt-config` argument.
@@ -622,125 +1017,7 @@ Limitations:
 
 ### Api Documentation
 
-<details closed>
-<summary>API V2</summary>
-<br>
-
-```bash
-# use `--mode api` to start a web server.
-$ python -m manga_translator -v --mode api --use-gpu
-# the api will be serving on http://127.0.0.1:5003
-```
-
-Api is accepting json(post) and multipart.
-<br>
-Api endpoints are `/colorize_translate`, `/inpaint_translate`, `/translate`, `/get_text`.
-<br>
-Valid arguments for the api are:
-
-```
-// These are taken from args.py. For more info see README.md
-detector: String
-ocr: String
-inpainter: String
-upscaler: String
-translator: String 
-target_language: String
-upscale_ratio: Integer
-translator_chain: String
-selective_translation: String
-attempts: Integer
-detection_size: Integer // 1024 => 'S', 1536 => 'M', 2048 => 'L', 2560 => 'X'
-text_threshold: Float
-box_threshold: Float
-unclip_ratio: Float
-inpainting_size: Integer
-det_rotate: Bool
-det_auto_rotate: Bool
-det_invert: Bool
-det_gamma_correct: Bool
-min_text_length: Integer
-colorization_size: Integer
-denoise_sigma: Integer
-mask_dilation_offset: Integer
-ignore_bubble: Integer
-gpt_config: String
-filter_text: String
-overlay_type: String
-
-// These are api specific args
-direction: String // {'auto', 'h', 'v'}
-base64Images: String //Image in base64 format
-image: Multipart // image upload from multipart
-url: String // an url string
-```
-
-</details>
-
-Manual translation replaces machine translation with human translators.
-Basic manual translation demo can be found at <http://127.0.0.1:5003/manual> when using web mode.
-<details closed>
-<summary>API</summary>
-<br>
-
-Two modes of translation service are provided by the demo: synchronous mode and asynchronous mode.\
-In synchronous mode your HTTP POST request will finish once the translation task is finished.\
-In asynchronous mode your HTTP POST request will respond with a `task_id` immediately, you can use this `task_id` to
-poll for translation task state.
-
-#### Synchronous mode
-
-1. POST a form request with form data `file:<content-of-image>` to <http://127.0.0.1:5003/run>
-2. Wait for response
-3. Use the resultant `task_id` to find translation result in `result/` directory, e.g. using Nginx to expose `result/`
-
-#### Asynchronous mode
-
-1. POST a form request with form data `file:<content-of-image>` to <http://127.0.0.1:5003/submit>
-2. Acquire translation `task_id`
-3. Poll for translation task state by posting JSON `{"taskid": <task-id>}` to <http://127.0.0.1:5003/task-state>
-4. Translation is finished when the resultant state is either `finished`, `error` or `error-lang`
-5. Find translation result in `result/` directory, e.g. using Nginx to expose `result/`
-
-#### Manual translation
-
-POST a form request with form data `file:<content-of-image>` to <http://127.0.0.1:5003/manual-translate>
-and wait for response.
-
-You will obtain a JSON response like this:
-
-```json
-{
-  "task_id": "12c779c9431f954971cae720eb104499",
-  "status": "pending",
-  "trans_result": [
-    {
-      "s": "☆上司来ちゃった……",
-      "t": ""
-    }
-  ]
-}
-```
-
-Fill in translated texts:
-
-```json
-{
-  "task_id": "12c779c9431f954971cae720eb104499",
-  "status": "pending",
-  "trans_result": [
-    {
-      "s": "☆上司来ちゃった……",
-      "t": "☆Boss is here..."
-    }
-  ]
-}
-```
-
-Post translated JSON to <http://127.0.0.1:5003/post-manual-result> and wait for response.\
-Then you can find the translation result in `result/` directory, e.g. using Nginx to expose `result/`.
-
-</details>
+Read openapi docs: `127.0.0.1:5003/docs`
 
 ## Next steps
 
diff --git a/README_CN.md b/README_CN.md
index 46f674e50..e4a9a70be 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -106,132 +106,19 @@ FIL: Filipino (Tagalog)
 ## 选项
 
 ```text
--h, --help                                   show this help message and exit
--m, --mode {demo,batch,web,web_client,ws,api}
-                                             Run demo in single image demo mode (demo), batch
-                                             translation mode (batch), web service mode (web)
--i, --input INPUT [INPUT ...]                Path to an image file if using demo mode, or path to an
-                                             image folder if using batch mode
--o, --dest DEST                              Path to the destination folder for translated images in
-                                             batch mode
--l, --target-lang {CHS,CHT,CSY,NLD,ENG,FRA,DEU,HUN,ITA,JPN,KOR,PLK,PTB,ROM,RUS,ESP,TRK,UKR,VIN,ARA,CNR,SRP,HRV,THA,IND,FIL}
-                                             Destination language
--v, --verbose                                Print debug info and save intermediate images in result
-                                             folder
--f, --format {png,webp,jpg,xcf,psd,pdf}      Output format of the translation.
---attempts ATTEMPTS                          Retry attempts on encountered error. -1 means infinite
-                                             times.
---ignore-errors                              Skip image on encountered error.
---overwrite                                  Overwrite already translated images in batch mode.
---skip-no-text                               Skip image without text (Will not be saved).
---model-dir MODEL_DIR                        Model directory (by default ./models in project root)
---use-gpu                                   Turn on/off gpu (automatic selection between mps or cuda)
---use-gpu-limited                           Turn on/off gpu (excluding offline translator)
---detector {default,ctd,craft,none}          Text detector used for creating a text mask from an
-                                             image, DO NOT use craft for manga, it's not designed
-                                             for it
---ocr {32px,48px,48px_ctc,mocr}              Optical character recognition (OCR) model to use
---use-mocr-merge                             Use bbox merge when Manga OCR inference.
---inpainter {default,lama_large,lama_mpe,sd,none,original}
-                                             Inpainting model to use
---upscaler {waifu2x,esrgan,4xultrasharp}     Upscaler to use. --upscale-ratio has to be set for it
-                                             to take effect
---upscale-ratio UPSCALE_RATIO                Image upscale ratio applied before detection. Can
-                                             improve text detection.
---colorizer {mc2}                            Colorization model to use.
---translator {google,youdao,baidu,deepl,papago,caiyun,gpt3,gpt3.5,gpt4,deepseek,none,original,offline,nllb,nllb_big,sugoi,jparacrawl,jparacrawl_big,m2m100,sakura}
-                                             Language translator to use
---translator-chain TRANSLATOR_CHAIN          Output of one translator goes in another. Example:
-                                             --translator-chain "google:JPN;sugoi:ENG".
---selective-translation SELECTIVE_TRANSLATION
-                                             Select a translator based on detected language in
-                                             image. Note the first translation service acts as
-                                             default if the language isn't defined. Example:
-                                             --translator-chain "google:JPN;sugoi:ENG".
---revert-upscaling                           Downscales the previously upscaled image after
-                                             translation back to original size (Use with --upscale-
-                                             ratio).
---detection-size DETECTION_SIZE              Size of image used for detection
---det-rotate                                 Rotate the image for detection. Might improve
-                                             detection.
---det-auto-rotate                            Rotate the image for detection to prefer vertical
-                                             textlines. Might improve detection.
---det-invert                                 Invert the image colors for detection. Might improve
-                                             detection.
---det-gamma-correct                          Applies gamma correction for detection. Might improve
-                                             detection.
---unclip-ratio UNCLIP_RATIO                  How much to extend text skeleton to form bounding box
---box-threshold BOX_THRESHOLD                Threshold for bbox generation
---text-threshold TEXT_THRESHOLD              Threshold for text detection
---min-text-length MIN_TEXT_LENGTH            Minimum text length of a text region
---no-text-lang-skip                          Dont skip text that is seemingly already in the target
-                                             language.
---inpainting-size INPAINTING_SIZE            Size of image used for inpainting (too large will
-                                             result in OOM)
---inpainting-precision {fp32,fp16,bf16}      Inpainting precision for lama, use bf16 while you can.
---colorization-size COLORIZATION_SIZE        Size of image used for colorization. Set to -1 to use
-                                             full image size
---denoise-sigma DENOISE_SIGMA                Used by colorizer and affects color strength, range
-                                             from 0 to 255 (default 30). -1 turns it off.
---mask-dilation-offset MASK_DILATION_OFFSET  By how much to extend the text mask to remove left-over
-                                             text pixels of the original image.
---font-size FONT_SIZE                        Use fixed font size for rendering
---font-size-offset FONT_SIZE_OFFSET          Offset font size by a given amount, positive number
-                                             increase font size and vice versa
---font-size-minimum FONT_SIZE_MINIMUM        Minimum output font size. Default is
-                                             image_sides_sum/200
---font-color FONT_COLOR                      Overwrite the text fg/bg color detected by the OCR
-                                             model. Use hex string without the "#" such as FFFFFF
-                                             for a white foreground or FFFFFF:000000 to also have a
-                                             black background around the text.
---line-spacing LINE_SPACING                  Line spacing is font_size * this value. Default is 0.01
-                                             for horizontal text and 0.2 for vertical.
---force-horizontal                           Force text to be rendered horizontally
---force-vertical                             Force text to be rendered vertically
---align-left                                 Align rendered text left
---align-center                               Align rendered text centered
---align-right                                Align rendered text right
---uppercase                                  Change text to uppercase
---lowercase                                  Change text to lowercase
---no-hyphenation                             If renderer should be splitting up words using a hyphen
-                                             character (-)
---manga2eng                                  Render english text translated from manga with some
-                                             additional typesetting. Ignores some other argument
-                                             options
---gpt-config GPT_CONFIG                      Path to GPT config file, more info in README
---use-mtpe                                   Turn on/off machine translation post editing (MTPE) on
-                                             the command line (works only on linux right now)
---save-text                                  Save extracted text and translations into a text file.
---save-text-file SAVE_TEXT_FILE              Like --save-text but with a specified file path.
---filter-text FILTER_TEXT                    Filter regions by their text with a regex. Example
-                                             usage: --text-filter ".*badtext.*"
---pre-dict FILE_PATH                         Path to the pre-translation dictionary file. One entry per line,
-                                             Comments can be added with `#` and `//`.
-                                             usage: //Example
-                                                    dog cat #Example
-                                                    abc def
-                                                    abc
---post-dict FILE_PATH                        Path to the post-translation dictionary file. Same as above.
---skip-lang                                  Skip translation if source image is one of the provide languages, 
-                                             use comma to separate multiple languages. Example: JPN,ENG
---prep-manual                                Prepare for manual typesetting by outputting blank,
-                                             inpainted images, plus copies of the original for
-                                             reference
---font-path FONT_PATH                        Path to font file
---gimp-font GIMP_FONT                        Font family to use for gimp rendering.
---host HOST                                  Used by web module to decide which host to attach to
---port PORT                                  Used by web module to decide which port to attach to
---nonce NONCE                                Used by web module as secret for securing internal web
-                                             server communication
---ws-url WS_URL                              Server URL for WebSocket mode
---save-quality SAVE_QUALITY                  Quality of saved JPEG image, range from 0 to 100 with
-                                             100 being best
---ignore-bubble IGNORE_BUBBLE                The threshold for ignoring text in non bubble areas,
-                                             with valid values ranging from 1 to 50, does not ignore
-                                             others. Recommendation 5 to 10. If it is too low,
-                                             normal bubble areas may be ignored, and if it is too
-                                             large, non bubble areas may be considered normal
-                                             bubbles
+-h, --help                     show this help message and exit
+-v, --verbose                  Print debug info and save intermediate images in result folder
+--attempts ATTEMPTS            Retry attempts on encountered error. -1 means infinite times.
+--ignore-errors                Skip image on encountered error.
+--model-dir MODEL_DIR          Model directory (by default ./models in project root)
+--use-gpu                      Turn on/off gpu (auto switch between mps and cuda)
+--use-gpu-limited              Turn on/off gpu (excluding offline translator)
+--font-path FONT_PATH          Path to font file
+--pre-dict PRE_DICT            Path to the pre-translation dictionary file
+--post-dict POST_DICT          Path to the post-translation dictionary file
+--kernel-size KERNEL_SIZE      Set the convolution kernel size of the text erasure area to
+                               completely clean up text residues
+--config-file CONFIG_FILE      path to the config file
 ```
 
 <!-- Auto generated end -->
diff --git a/examples/config-example.json b/examples/config-example.json
new file mode 100644
index 000000000..08c7b5008
--- /dev/null
+++ b/examples/config-example.json
@@ -0,0 +1,61 @@
+{
+  "filter_text": null,
+  "render": {
+    "renderer": "default",
+    "alignment": "auto",
+    "disable_font_border": false,
+    "font_size_offset": 0,
+    "font_size_minimum": -1,
+    "direction": "auto",
+    "uppercase": false,
+    "lowercase": false,
+    "gimp_font": "Sans-serif",
+    "no_hyphenation": false,
+    "font_color": null,
+    "line_spacing": null,
+    "font_size": null
+  },
+  "upscale": {
+    "upscaler": "esrgan",
+    "revert_upscaling": false,
+    "upscale_ratio": null
+  },
+  "translator": {
+    "translator": "sugoi",
+    "target_lang": "ENG",
+    "no_text_lang_skip": false,
+    "skip_lang": null,
+    "gpt_config": null,
+    "translator_chain": null,
+    "selective_translation": null
+  },
+  "detector": {
+    "detector": "default",
+    "detection_size": 1536,
+    "text_threshold": 0.5,
+    "det_rotate": false,
+    "det_auto_rotate": false,
+    "det_invert": false,
+    "det_gamma_correct": false,
+    "box_threshold": 0.7,
+    "unclip_ratio": 2.3
+  },
+  "colorizer": {
+    "colorization_size": 576,
+    "denoise_sigma": 30,
+    "colorizer": "none"
+  },
+  "inpainter": {
+    "inpainter": "none",
+    "inpainting_size": 2048,
+    "inpainting_precision": "fp32"
+  },
+  "ocr": {
+    "use_mocr_merge": false,
+    "ocr": "48px",
+    "min_text_length": 0,
+    "ignore_bubble": 0
+  },
+  "kernel_size": 3,
+  "mask_dilation_offset": 0
+}
diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py
index dca4740a7..4edeec4ff 100644
--- a/manga_translator/__main__.py
+++ b/manga_translator/__main__.py
@@ -3,6 +3,7 @@
 import logging
 from argparse import Namespace
 
+from manga_translator import Config
 from .manga_translator import (
     set_main_logger, load_dictionary, apply_dictionary,
 )
@@ -73,6 +74,11 @@ async def dispatch(args: Namespace):
         from manga_translator.mode.share import MangaShare
         translator = MangaShare(args_dict)
         await translator.listen(args_dict)
+    elif args.mode == 'config-help':
+        import json
+        config = Config.schema()
+        print(json.dumps(config, indent=2))
+
 
 
 if __name__ == '__main__':
diff --git a/manga_translator/args.py b/manga_translator/args.py
index 472311c6a..172fa1914 100644
--- a/manga_translator/args.py
+++ b/manga_translator/args.py
@@ -120,3 +120,5 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
 parser_api.add_argument('--port', default=5003, type=int, help='Port for API service')
 parser_api.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal API server communication')
 parser_api.add_argument("--report", default=None,type=str, help='reports to server to register instance')
+
+subparsers.add_parser('config-help', help='Print help information for config file')
\ No newline at end of file

From a458b7fa96340d3944fb9c2b990204ac5a9d0b4d Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sat, 30 Nov 2024 15:33:20 +0100
Subject: [PATCH 69/75] add toml support

---
 examples/config-example.toml   | 50 ++++++++++++++++++++++++++++++++++
 manga_translator/mode/local.py | 19 +++++++++++--
 2 files changed, 66 insertions(+), 3 deletions(-)
 create mode 100644 examples/config-example.toml

diff --git a/examples/config-example.toml b/examples/config-example.toml
new file mode 100644
index 000000000..a17b10650
--- /dev/null
+++ b/examples/config-example.toml
@@ -0,0 +1,50 @@
+mask_dilation_offset = 0
+
+[render]
+renderer = "default"
+alignment = "auto"
+disable_font_border = false
+font_size_offset = 0
+font_size_minimum = -1
+direction = "auto"
+uppercase = false
+lowercase = false
+gimp_font = "Sans-serif"
+no_hyphenation = false
+
+[upscale]
+upscaler = "esrgan"
+revert_upscaling = false
+
+[translator]
+translator = "sugoi"
+target_lang = "ENG"
+no_text_lang_skip = false
+
+[detector]
+detector = "default"
+detection_size = 1536
+text_threshold = 0.5
+det_rotate = false
+det_auto_rotate = false
+det_invert = false
+det_gamma_correct = false
+box_threshold = 0.7
+unclip_ratio = 2.3
+
+[colorizer]
+colorization_size = 576
+denoise_sigma = 30
+colorizer = "none"
+
+[inpainter]
+inpainter = "none"
+inpainting_size = 2048
+inpainting_precision = "fp32"
+
+[ocr]
+use_mocr_merge = false
+ocr = "48px"
+min_text_length = 0
+ignore_bubble = 0
+
diff --git a/manga_translator/mode/local.py b/manga_translator/mode/local.py
index 564efbc0f..384e7f1fe 100644
--- a/manga_translator/mode/local.py
+++ b/manga_translator/mode/local.py
@@ -35,7 +35,6 @@ async def translate_path(self, path: str, dest: str = None, params: dict[str, Un
         dest = os.path.abspath(os.path.expanduser(dest)) if dest else ''
         params = params or {}
         config_file_path = params.get("config_file", None)
-        config_content = "{}"
 
         if config_file_path:
             try:
@@ -43,9 +42,23 @@ async def translate_path(self, path: str, dest: str = None, params: dict[str, Un
                     config_content = file.read()
             except Exception as e:
                 print("Couldnt read file")
-                pass
-        config = Config(**json.loads(config_content))
+                raise e
+            config_extension = os.path.splitext(config_file_path)[1].lower()
 
+            try:
+                if config_extension == ".toml":
+                    import tomllib
+                    config_dict = tomllib.loads(config_content)
+                elif config_extension == ".json":
+                    config_dict = json.loads(config_content)
+                else:
+                    raise ValueError("Unsupported configuration file format")
+            except Exception as e:
+                print("Failed to load configuration file")
+                raise e
+            config = Config(**config_dict)
+        else:
+            config = Config()
         # Handle format
         file_ext = params.get('format')
         if params.get('save_quality', 100) < 100:

From cb1a5a305f0797207e9fa58b1ea7b8f5d392c52b Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sat, 30 Nov 2024 17:00:21 +0100
Subject: [PATCH 70/75] issues with main.py when executing with command line

---
 server/main.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/server/main.py b/server/main.py
index 20d3d5833..88081bfdf 100644
--- a/server/main.py
+++ b/server/main.py
@@ -7,6 +7,9 @@
 import sys
 from argparse import Namespace
 
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+
 from fastapi import FastAPI, Request, HTTPException, Header, UploadFile, File, Form
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse, HTMLResponse
@@ -121,13 +124,15 @@ async def queue_size() -> int:
 
 @app.get("/", response_class=HTMLResponse,tags=["ui"])
 async def index() -> HTMLResponse:
-    html_file = Path("index.html")
+    script_directory = Path(__file__).parent
+    html_file = script_directory / "index.html"
     html_content = html_file.read_text()
     return HTMLResponse(content=html_content)
 
 @app.get("/manual", response_class=HTMLResponse, tags=["ui"])
 async def manual():
-    html_file = Path("manual.html")
+    script_directory = Path(__file__).parent
+    html_file = script_directory / "manual.html"
     html_content = html_file.read_text()
     return HTMLResponse(content=html_content)
 

From de441a42e42c82c28f1498e0cb8d27f981a0fb83 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sat, 30 Nov 2024 22:21:52 +0100
Subject: [PATCH 71/75] typo

---
 manga_translator/args.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/manga_translator/args.py b/manga_translator/args.py
index 172fa1914..06846b944 100644
--- a/manga_translator/args.py
+++ b/manga_translator/args.py
@@ -105,7 +105,7 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
 g_batch.add_argument('--save-text-file', default='', type=str, help='Like --save-text but with a specified file path.')
 parser_batch.add_argument('--prep-manual', action='store_true', help='Prepare for manual typesetting by outputting blank, inpainted images, plus copies of the original for reference')
 parser_batch.add_argument('--save-quality', default=100, type=int, help='Quality of saved JPEG image, range from 0 to 100 with 100 being best')
-parser.add_argument('--config-file', default=None, type=str, help='path to the config file')
+parser_batch.add_argument('--config-file', default=None, type=str, help='path to the config file')
 
 # WebSocket mode
 parser_ws = subparsers.add_parser('ws', help='Run in WebSocket mode')

From 1b441b86756d8565dfc1e8c0c850e8e0c1e5da80 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sat, 30 Nov 2024 22:38:12 +0100
Subject: [PATCH 72/75] why does the order matter?

---
 manga_translator/args.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/manga_translator/args.py b/manga_translator/args.py
index 06846b944..72a717a1c 100644
--- a/manga_translator/args.py
+++ b/manga_translator/args.py
@@ -78,6 +78,8 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
 
 
 parser = argparse.ArgumentParser(prog='manga_translator', description='Seamlessly translate mangas into a chosen language', formatter_class=HelpFormatter)
+subparsers = parser.add_subparsers(dest='mode', required=True, help='Mode of operation')
+
 parser.add_argument('-v', '--verbose', action='store_true', help='Print debug info and save intermediate images in result folder')
 parser.add_argument('--attempts', default=0, type=int, help='Retry attempts on encountered error. -1 means infinite times.')
 parser.add_argument('--ignore-errors', action='store_true', help='Skip image on encountered error.')
@@ -90,8 +92,6 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
 parser.add_argument('--post-dict', default=None, type=file_path, help='Path to the post-translation dictionary file')
 parser.add_argument('--kernel-size', default=3, type=int, help='Set the convolution kernel size of the text erasure area to completely clean up text residues')
 
-subparsers = parser.add_subparsers(dest='mode', required=True, help='Mode of operation')
-
 # Batch mode
 parser_batch = subparsers.add_parser('local', help='Run in batch translation mode')
 parser_batch.add_argument('-i', '--input', required=True, type=path, nargs='+', help='Path to an image folder')

From cb2ed2442e8c33a68a2a850c9fca25674f85672b Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sat, 30 Nov 2024 22:56:10 +0100
Subject: [PATCH 73/75] why does the order matter?

---
 manga_translator/__main__.py |  5 ++++-
 manga_translator/args.py     | 39 +++++++++++++++++++++++++-----------
 2 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py
index 4edeec4ff..9e4970ba1 100644
--- a/manga_translator/__main__.py
+++ b/manga_translator/__main__.py
@@ -4,6 +4,7 @@
 from argparse import Namespace
 
 from manga_translator import Config
+from manga_translator.args import parser, reparse
 from .manga_translator import (
     set_main_logger, load_dictionary, apply_dictionary,
 )
@@ -85,7 +86,9 @@ async def dispatch(args: Namespace):
     args = None
     init_logging()
     try:
-        args = parser.parse_args()
+        args, unknown = parser.parse_known_args()
+        parser.add_argument_group()
+        args = Namespace(**{**vars(args), **vars(reparse(unknown))})
         set_log_level(level=logging.DEBUG if args.verbose else logging.INFO)
         logger = get_logger(args.mode)
         set_main_logger(logger)
diff --git a/manga_translator/args.py b/manga_translator/args.py
index 72a717a1c..5b5a3ec83 100644
--- a/manga_translator/args.py
+++ b/manga_translator/args.py
@@ -76,22 +76,37 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
         else:
             return super()._format_action_invocation(action)
 
+def general_parser(g_parser):
+    g_parser.add_argument('-v', '--verbose', action='store_true',
+                        help='Print debug info and save intermediate images in result folder')
+    g_parser.add_argument('--attempts', default=0, type=int,
+                        help='Retry attempts on encountered error. -1 means infinite times.')
+    g_parser.add_argument('--ignore-errors', action='store_true', help='Skip image on encountered error.')
+    g_parser.add_argument('--model-dir', default=None, type=dir_path,
+                        help='Model directory (by default ./models in project root)')
+    g = g_parser.add_mutually_exclusive_group()
+    g.add_argument('--use-gpu', action='store_true', help='Turn on/off gpu (auto switch between mps and cuda)')
+    g.add_argument('--use-gpu-limited', action='store_true', help='Turn on/off gpu (excluding offline translator)')
+    g_parser.add_argument('--font-path', default='', type=file_path, help='Path to font file')
+    g_parser.add_argument('--pre-dict', default=None, type=file_path, help='Path to the pre-translation dictionary file')
+    g_parser.add_argument('--post-dict', default=None, type=file_path,
+                        help='Path to the post-translation dictionary file')
+    g_parser.add_argument('--kernel-size', default=3, type=int,
+                        help='Set the convolution kernel size of the text erasure area to completely clean up text residues')
+
+
+
+def reparse(arr: list):
+    p = argparse.ArgumentParser(prog='manga_translator',
+                                     description='Seamlessly translate mangas into a chosen language',
+                                     formatter_class=HelpFormatter)
+    general_parser(p)
+    return p.parse_args(arr)
 
 parser = argparse.ArgumentParser(prog='manga_translator', description='Seamlessly translate mangas into a chosen language', formatter_class=HelpFormatter)
+general_parser(parser)
 subparsers = parser.add_subparsers(dest='mode', required=True, help='Mode of operation')
 
-parser.add_argument('-v', '--verbose', action='store_true', help='Print debug info and save intermediate images in result folder')
-parser.add_argument('--attempts', default=0, type=int, help='Retry attempts on encountered error. -1 means infinite times.')
-parser.add_argument('--ignore-errors', action='store_true', help='Skip image on encountered error.')
-parser.add_argument('--model-dir', default=None, type=dir_path, help='Model directory (by default ./models in project root)')
-g = parser.add_mutually_exclusive_group()
-g.add_argument('--use-gpu', action='store_true', help='Turn on/off gpu (auto switch between mps and cuda)')
-g.add_argument('--use-gpu-limited', action='store_true', help='Turn on/off gpu (excluding offline translator)')
-parser.add_argument('--font-path', default='', type=file_path, help='Path to font file')
-parser.add_argument('--pre-dict', default=None, type=file_path, help='Path to the pre-translation dictionary file')
-parser.add_argument('--post-dict', default=None, type=file_path, help='Path to the post-translation dictionary file')
-parser.add_argument('--kernel-size', default=3, type=int, help='Set the convolution kernel size of the text erasure area to completely clean up text residues')
-
 # Batch mode
 parser_batch = subparsers.add_parser('local', help='Run in batch translation mode')
 parser_batch.add_argument('-i', '--input', required=True, type=path, nargs='+', help='Path to an image folder')

From 9a19389e73c5abec2d1535ceadbba14f79ec61d1 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sat, 30 Nov 2024 22:58:05 +0100
Subject: [PATCH 74/75] this shouldnt be here

---
 manga_translator/__main__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py
index 9e4970ba1..43a5ca863 100644
--- a/manga_translator/__main__.py
+++ b/manga_translator/__main__.py
@@ -87,7 +87,6 @@ async def dispatch(args: Namespace):
     init_logging()
     try:
         args, unknown = parser.parse_known_args()
-        parser.add_argument_group()
         args = Namespace(**{**vars(args), **vars(reparse(unknown))})
         set_log_level(level=logging.DEBUG if args.verbose else logging.INFO)
         logger = get_logger(args.mode)

From e979c4386f0dcccf1be175ee86f2704a5ffe34c2 Mon Sep 17 00:00:00 2001
From: frederik-uni <147479464+frederik-uni@users.noreply.github.com>
Date: Sun, 1 Dec 2024 01:53:24 +0100
Subject: [PATCH 75/75] fixes #760

---
 manga_translator/manga_translator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py
index 7fa85c7b2..324a87fc7 100644
--- a/manga_translator/manga_translator.py
+++ b/manga_translator/manga_translator.py
@@ -279,7 +279,7 @@ async def _translate(self, config: Config, ctx: Context) -> Context:
 
         if self.verbose:
             inpaint_input_img = await dispatch_inpainting(Inpainter.none, ctx.img_rgb, ctx.mask, config.inpainter,config.inpainter.inpainting_size,
-                                                          self.using_gpu, self.verbose)
+                                                          self.device, self.verbose)
             cv2.imwrite(self._result_path('inpaint_input.png'), cv2.cvtColor(inpaint_input_img, cv2.COLOR_RGB2BGR))
             cv2.imwrite(self._result_path('mask_final.png'), ctx.mask)
 
@@ -534,7 +534,7 @@ async def _run_mask_refinement(self, config: Config, ctx: Context):
                                               config.mask_dilation_offset, config.ocr.ignore_bubble, self.verbose,self.kernel_size)
 
     async def _run_inpainting(self, config: Config, ctx: Context):
-        return await dispatch_inpainting(config.inpainter.inpainter, ctx.img_rgb, ctx.mask, self.device,
+        return await dispatch_inpainting(config.inpainter.inpainter, ctx.img_rgb, ctx.mask, config.inpainter, config.inpainter.inpainting_size, self.device,
                                          self.verbose)
 
     async def _run_text_rendering(self, config: Config, ctx: Context):