Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
popcion authored Dec 14, 2024
1 parent 894c85f commit 23e7766
Showing 1 changed file with 0 additions and 47 deletions.
47 changes: 0 additions & 47 deletions manga_translator/manga_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,53 +352,6 @@ async def _run_textline_merge(self, config: Config, ctx: Context):

new_text_regions = []
for region in text_regions:

# Remove leading spaces and specified characters from each line (after pre-translation dictionary replacement)
original_text = region.text
stripped_text = original_text.lstrip('、?!')

# Record the removed leading characters
removed_start_chars = original_text[:len(original_text) - len(stripped_text)]
if removed_start_chars:
logger.info(f'Removed leading characters: "{removed_start_chars}" from "{original_text}"')

# Filter condition modification: Handle incomplete brackets
# Combine left brackets and left quotation marks into a single list
left_symbols = ['(', '(', '[', '【', '{', '〔', '〈', '「',
'“', '‘', '《', '『', '"', '〝', '﹁', '﹃',
'⸂', '⸄', '⸉', '⸌', '⸜', '⸠', '‹', '«']

# Combine right brackets and right quotation marks into a single list
right_symbols = [')', ')', ']', '】', '}', '〕', '〉', '」',
'”', '’', '》', '』', '"', '〞', '﹂', '﹄',
'⸃', '⸅', '⸊', '⸍', '⸝', '⸡', '›', '»']

# Combine all symbols
all_symbols = left_symbols + right_symbols

# Count the number of left and right symbols
left_count = sum(stripped_text.count(s) for s in left_symbols)
right_count = sum(stripped_text.count(s) for s in right_symbols)

# Check if the number of left and right symbols match
if left_count != right_count:
# Symbols are not paired, remove all symbols
for s in all_symbols:
stripped_text = stripped_text.replace(s, '')
logger.info(f'Removed unpaired symbols from "{stripped_text}"')

# Check if the text ends with an Arabic numeral, "、", or "?"
stripped_text = stripped_text.rstrip()
end_char = stripped_text[-1] if stripped_text else ''

# If the end is a specified character, remove it instead of skipping the whole sentence
if end_char in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '、']:
stripped_text = stripped_text[:-1] # Remove the last character
logger.info(f'Removed last character: {end_char} from "{stripped_text}"')

# Update region.text
region.text = stripped_text.strip()

if len(region.text) >= config.ocr.min_text_length \
and not is_valuable_text(region.text) \
or (not config.translator.no_text_lang_skip and langcodes.tag_distance(region.source_lang, config.translator.target_lang) == 0):
Expand Down

0 comments on commit 23e7766

Please sign in to comment.