Skip to content

Commit

Permalink
Merge pull request #59 from dmMaze/main
Browse files Browse the repository at this point in the history
improve English text rendering suggested by #58
  • Loading branch information
zyddnys authored Apr 20, 2022
2 parents c24ef01 + 9b828fc commit 27d52a1
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 10 deletions.
Binary file added fonts/comic shanns 2.ttf
Binary file not shown.
30 changes: 27 additions & 3 deletions text_rendering/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@

from typing import List
from typing import List, Union
from utils import Quadrilateral
import numpy as np
import cv2
import math
from utils import findNextPowerOf2

import textwrap
from . import text_render
from .text_render_eng import render_textblock_list_eng
from textblockdetector.textblock import TextBlock

def fg_bg_compare(fg, bg):
Expand Down Expand Up @@ -154,4 +155,27 @@ def render(img_canvas, font_size, text_mag_ratio, trans_text, region, majority_d
canvas_region = rgba_region[:, :, 0: 3]
mask_region = rgba_region[:, :, 3: 4].astype(np.float32) / 255.0
img_canvas = np.clip((img_canvas.astype(np.float32) * (1 - mask_region) + canvas_region.astype(np.float32) * mask_region), 0, 255).astype(np.uint8)
return img_canvas
return img_canvas




async def dispatch_eng_render(img_canvas: np.ndarray, text_regions: Union[List[TextBlock], List[Quadrilateral]], translated_sentences: List[str], font_path: str) -> np.ndarray :
if len(text_regions) == 0:
return img_canvas

if isinstance(text_regions[0], Quadrilateral):
blk_list = []
for region, tr in zip(text_regions, translated_sentences):
x = np.min(region.pts[:, 0])
w = np.max(region.pts[:, 0]) - x
y = np.min(region.pts[:, 1])
h = np.max(region.pts[:, 1]) - y
font_size = region.font_size * 0.7
blk = TextBlock([x, y, w, h], lines=[region.pts], translation=tr, angle=region.angle, font_size=font_size)
blk_list.append(blk)
return render_textblock_list_eng(img_canvas, blk_list, font_path, size_tol=1.1)

for blk, tr in zip(text_regions, translated_sentences):
blk.translation = tr
return render_textblock_list_eng(img_canvas, text_regions, font_path, size_tol=1.2)
115 changes: 115 additions & 0 deletions text_rendering/text_render_eng.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from PIL import ImageFont, ImageDraw, Image
import numpy as np
from typing import List, Union
from textblockdetector import TextBlock

from utils import Quadrilateral


class Line:
def __init__(self, text: str = '', pos_x: int = 0, pos_y: int = 0, length: float = 0) -> None:
self.text = text
self.pos_x = pos_x
self.pos_y = pos_y
self.length = int(length)


def text_to_word_list(text: str) -> List[str]:
text = text.upper().replace(' ', ' ')
processed_text = ''

# dumb way to insure spaces between words
text_len = len(text)
for ii, c in enumerate(text):
if c in ['.', '?', '!'] and ii < text_len - 1:
next_c = text[ii + 1]
if next_c.isalpha() or next_c.isnumeric():
processed_text += c + ' '
else:
processed_text += c
else:
processed_text += c
word_list = processed_text.split(' ')
words = []
skip_next = False
word_num = len(word_list)
for ii, word in enumerate(word_list):
if skip_next:
skip_next = False
continue
if ii < word_num - 1:
if len(word) == 1 or len(word_list[ii + 1]) == 1:
skip_next = True
word = word + ' ' + word_list[ii + 1]
words.append(word)
return words

def render_textblock_list_eng(img: np.ndarray, blk_list: List[TextBlock], font_path: str, scale_quality=1.0, align_center=True, size_tol=1.0):
pilimg = Image.fromarray(img)
for blk in blk_list:
if blk.vertical:
blk.angle -= 90
sw_r = 0.1
fs = int(blk.font_size / (1 + 2*sw_r) * scale_quality)
min_bbox = blk.min_rect(rotate_back=False)[0]
bx, by = min_bbox[0]
bw, bh = min_bbox[2] - min_bbox[0]
cx, cy = bx + bw / 2, by + bh / 2
bw = bw * scale_quality

font = ImageFont.truetype(font_path, fs)
words = text_to_word_list(blk.translation)
if not len(words):
continue

base_length = -1
w_list = []

sw = int(sw_r * font.size)
line_height = int((1 + 2*sw_r) * font.getmetrics()[0])

for word in words:
wl = font.getlength(word)
w_list.append(wl)
if wl > base_length:
base_length = wl
base_length = max(base_length, bw)
space_l = font.getlength(' ')
pos_x, pos_y = 0, 0
line = Line(words[0], 0, 0, w_list[0])
line_lst = [line]
for word, wl in zip(words[1:], w_list[1:]):
added_len = int(space_l + wl + line.length)
if added_len > base_length:
pos_y += line_height
line = Line(word, 0, pos_y, wl)
line_lst.append(line)
else:
line.text = line.text + ' ' + word
line.length = added_len
last_line = line_lst[-1]
canvas_h = last_line.pos_y + line_height
canvas_w = int(base_length)

font_color = (0, 0, 0)
stroke_color = (255, 255, 255)
img = Image.new('RGBA', (canvas_w, canvas_h), color = (0, 0, 0, 0))
d = ImageDraw.Draw(img)
d.fontmode = 'L'

for line in line_lst:
pos_x = int((base_length - line.length) / 2) if align_center else 0
d.text((pos_x, line.pos_y), line.text, font=font, fill=font_color, stroke_width=sw, stroke_fill=stroke_color)

if abs(blk.angle) > 3:
img = img.rotate(-blk.angle, expand=True)
im_w, im_h = img.size
scale = min(bh / im_h * size_tol, bw / im_w * size_tol)
if scale < 1:
img = img.resize((int(im_w*scale), int(im_h*scale)))

im_w, im_h = img.size
paste_x, paste_y = int(cx - im_w / 2), int(cy - im_h / 2)
pilimg.paste(img, (paste_x, paste_y), mask=img)

return np.array(pilimg)
6 changes: 3 additions & 3 deletions textblockdetector/textblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def min_rect(self, rotate_back=True):
min_bbox = np.array([[min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y]])
if angled and rotate_back:
min_bbox = rotate_polygons(center, min_bbox, -self.angle)
return min_bbox.reshape(-1, 4, 2)
return min_bbox.reshape(-1, 4, 2).astype(np.int64)

# equivalent to qt's boundingRect, ignore angle
def bounding_rect(self):
Expand Down Expand Up @@ -365,8 +365,8 @@ def try_merge_textline(blk: TextBlock, blk2: TextBlock, fntsize_tol=1.3, distanc
blk.lines.append(blk2.lines[0])
blk.vec = vec_sum
blk.angle = int(round(np.rad2deg(math.atan2(vec_sum[1], vec_sum[0]))))
if blk.vertical:
blk.angle -= 90
# if blk.vertical:
# blk.angle -= 90
blk.norm = np.linalg.norm(vec_sum)
blk.distance = np.append(blk.distance, blk2.distance[-1])
blk.font_size = fntsz_avg
Expand Down
14 changes: 10 additions & 4 deletions translate_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
parser.add_argument('--target-lang', default='CHS', type=str, help='destination language')
parser.add_argument('--use-ctd', action='store_true', help='use comic-text-detector for text detection')
parser.add_argument('--verbose', action='store_true', help='print debug info and save intermediate images')
parser.add_argument('--manga2eng', action='store_true', help='render English text translated from manga with some typesetting')
parser.add_argument('--eng-font', default='fonts/comic shanns 2.ttf', type=str, help='font used by manga2eng mode')
args = parser.parse_args()

def update_state(task_id, nonce, state) :
Expand Down Expand Up @@ -181,11 +183,15 @@ async def infer(
if mode == 'web' and task_id :
update_state(task_id, nonce, 'render')
# render translated texts
if detector == 'ctd' :
from text_rendering import dispatch_ctd_render
output = await dispatch_ctd_render(np.copy(img_inpainted), args.text_mag_ratio, translated_sentences, text_regions, render_text_direction_overwrite)
if args.target_lang == 'ENG' and args.manga2eng:
from text_rendering import dispatch_eng_render
output = await dispatch_eng_render(np.copy(img_inpainted), text_regions, translated_sentences, args.eng_font)
else:
output = await dispatch_rendering(np.copy(img_inpainted), args.text_mag_ratio, translated_sentences, textlines, text_regions, render_text_direction_overwrite)
if detector == 'ctd' :
from text_rendering import dispatch_ctd_render
output = await dispatch_ctd_render(np.copy(img_inpainted), args.text_mag_ratio, translated_sentences, text_regions, render_text_direction_overwrite)
else:
output = await dispatch_rendering(np.copy(img_inpainted), args.text_mag_ratio, translated_sentences, textlines, text_regions, render_text_direction_overwrite)

print(' -- Saving results')
if alpha_ch is not None :
Expand Down

0 comments on commit 27d52a1

Please sign in to comment.