Skip to content

Commit

Permalink
Update manga_translator.py
Browse files Browse the repository at this point in the history
fix skip_lang
  • Loading branch information
popcion authored Dec 10, 2024
1 parent 6faf4b3 commit 58f1e48
Showing 1 changed file with 27 additions and 12 deletions.
39 changes: 27 additions & 12 deletions manga_translator/manga_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,18 +213,6 @@ async def _translate(self, config: Config, ctx: Context) -> Context:
# -- OCR
await self._report_progress('ocr')
ctx.textlines = await self._run_ocr(config, ctx)

if config.translator.skip_lang is not None :
filtered_textlines = []
skip_langs = config.translator.skip_lang.split(',')
for txtln in ctx.textlines :
try :
source_language = LANGDETECT_MAP.get(langdetect.detect(txtln.text), 'UNKNOWN')
except Exception :
source_language = 'UNKNOWN'
if source_language not in skip_langs :
filtered_textlines.append(txtln)
ctx.textlines = filtered_textlines

if not ctx.textlines:
await self._report_progress('skip-no-text', True)
Expand Down Expand Up @@ -338,6 +326,33 @@ async def _run_ocr(self, config: Config, ctx: Context):
async def _run_textline_merge(self, config: Config, ctx: Context):
text_regions = await dispatch_textline_merge(ctx.textlines, ctx.img_rgb.shape[1], ctx.img_rgb.shape[0],
verbose=self.verbose)
# First, filter out languages to skip
if ctx.skip_lang is not None:
skip_langs = [lang.strip().upper() for lang in ctx.skip_lang.split(',')]
filtered_text_regions = []
for region in text_regions:
try:
detected_lang = langdetect.detect(region.text)
source_language = LANGDETECT_MAP.get(detected_lang.lower(), 'UNKNOWN').upper()
except Exception:
source_language = 'UNKNOWN'

# Print detected source_language and whether it's in skip_langs
# logger.info(f'Detected source language: {source_language}, in skip_langs: {source_language in skip_langs}, text: "{region.text}"')

if source_language in skip_langs:
logger.info(f'Filtered out: {region.text}')
logger.info(f'Reason: Detected language {source_language} is in skip_langs')
continue # Skip this region
filtered_text_regions.append(region)
text_regions = filtered_text_regions

if not text_regions:
await self._report_progress('skip-no-text', True)
# If all text regions are filtered out, return an empty list
ctx.result = ctx.upscaled
return []

new_text_regions = []
for region in text_regions:
if len(region.text) >= config.ocr.min_text_length \
Expand Down

0 comments on commit 58f1e48

Please sign in to comment.