From 58f1e4859192c80c16989f5aa66712dc871dff06 Mon Sep 17 00:00:00 2001 From: popcion Date: Wed, 11 Dec 2024 03:49:01 +0800 Subject: [PATCH] Update manga_translator.py fix skip_lang --- manga_translator/manga_translator.py | 39 +++++++++++++++++++--------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py index c9d02144b..d0147fe09 100644 --- a/manga_translator/manga_translator.py +++ b/manga_translator/manga_translator.py @@ -213,18 +213,6 @@ async def _translate(self, config: Config, ctx: Context) -> Context: # -- OCR await self._report_progress('ocr') ctx.textlines = await self._run_ocr(config, ctx) - - if config.translator.skip_lang is not None : - filtered_textlines = [] - skip_langs = config.translator.skip_lang.split(',') - for txtln in ctx.textlines : - try : - source_language = LANGDETECT_MAP.get(langdetect.detect(txtln.text), 'UNKNOWN') - except Exception : - source_language = 'UNKNOWN' - if source_language not in skip_langs : - filtered_textlines.append(txtln) - ctx.textlines = filtered_textlines if not ctx.textlines: await self._report_progress('skip-no-text', True) @@ -338,6 +326,33 @@ async def _run_ocr(self, config: Config, ctx: Context): async def _run_textline_merge(self, config: Config, ctx: Context): text_regions = await dispatch_textline_merge(ctx.textlines, ctx.img_rgb.shape[1], ctx.img_rgb.shape[0], verbose=self.verbose) + # First, filter out languages to skip + if ctx.skip_lang is not None: + skip_langs = [lang.strip().upper() for lang in ctx.skip_lang.split(',')] + filtered_text_regions = [] + for region in text_regions: + try: + detected_lang = langdetect.detect(region.text) + source_language = LANGDETECT_MAP.get(detected_lang.lower(), 'UNKNOWN').upper() + except Exception: + source_language = 'UNKNOWN' + + # Print detected source_language and whether it's in skip_langs + # logger.info(f'Detected source language: {source_language}, in skip_langs: {source_language in skip_langs}, text: "{region.text}"') + + if source_language in skip_langs: + logger.info(f'Filtered out: {region.text}') + logger.info(f'Reason: Detected language {source_language} is in skip_langs') + continue # Skip this region + filtered_text_regions.append(region) + text_regions = filtered_text_regions + + if not text_regions: + await self._report_progress('skip-no-text', True) + # If all text regions are filtered out, return an empty list + ctx.result = ctx.upscaled + return [] + new_text_regions = [] for region in text_regions: if len(region.text) >= config.ocr.min_text_length \