From 23e7766c5b6958220f524421aa226b7eeef03244 Mon Sep 17 00:00:00 2001 From: popcion Date: Sun, 15 Dec 2024 05:43:25 +0800 Subject: [PATCH] Add files via upload --- manga_translator/manga_translator.py | 47 ---------------------------- 1 file changed, 47 deletions(-) diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py index 0eb182a4..dd7343bb 100644 --- a/manga_translator/manga_translator.py +++ b/manga_translator/manga_translator.py @@ -352,53 +352,6 @@ async def _run_textline_merge(self, config: Config, ctx: Context): new_text_regions = [] for region in text_regions: - - # Remove leading spaces and specified characters from each line (after pre-translation dictionary replacement) - original_text = region.text - stripped_text = original_text.lstrip('、?!') - - # Record the removed leading characters - removed_start_chars = original_text[:len(original_text) - len(stripped_text)] - if removed_start_chars: - logger.info(f'Removed leading characters: "{removed_start_chars}" from "{original_text}"') - - # Filter condition modification: Handle incomplete brackets - # Combine left brackets and left quotation marks into a single list - left_symbols = ['(', '(', '[', '【', '{', '〔', '〈', '「', - '“', '‘', '《', '『', '"', '〝', '﹁', '﹃', - '⸂', '⸄', '⸉', '⸌', '⸜', '⸠', '‹', '«'] - - # Combine right brackets and right quotation marks into a single list - right_symbols = [')', ')', ']', '】', '}', '〕', '〉', '」', - '”', '’', '》', '』', '"', '〞', '﹂', '﹄', - '⸃', '⸅', '⸊', '⸍', '⸝', '⸡', '›', '»'] - - # Combine all symbols - all_symbols = left_symbols + right_symbols - - # Count the number of left and right symbols - left_count = sum(stripped_text.count(s) for s in left_symbols) - right_count = sum(stripped_text.count(s) for s in right_symbols) - - # Check if the number of left and right symbols match - if left_count != right_count: - # Symbols are not paired, remove all symbols - for s in all_symbols: - stripped_text = stripped_text.replace(s, '') - logger.info(f'Removed unpaired symbols from "{stripped_text}"') - - # Check if the text ends with an Arabic numeral, "、", or "?" - stripped_text = stripped_text.rstrip() - end_char = stripped_text[-1] if stripped_text else '' - - # If the end is a specified character, remove it instead of skipping the whole sentence - if end_char in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '、']: - stripped_text = stripped_text[:-1] # Remove the last character - logger.info(f'Removed last character: {end_char} from "{stripped_text}"') - - # Update region.text - region.text = stripped_text.strip() - if len(region.text) >= config.ocr.min_text_length \ and not is_valuable_text(region.text) \ or (not config.translator.no_text_lang_skip and langcodes.tag_distance(region.source_lang, config.translator.target_lang) == 0):