Merge pull request #59 from dmMaze/main

improve English text rendering suggested by #58
zyddnys · Apr 20, 2022 · 27d52a1 · 27d52a1
2 parents c24ef01 + 9b828fc
commit 27d52a1
Show file tree

Hide file tree

Showing 5 changed files with 155 additions and 10 deletions.
diff --git a/fonts/comic shanns 2.ttf b/fonts/comic shanns 2.ttf
diff --git a/text_rendering/__init__.py b/text_rendering/__init__.py
@@ -1,12 +1,13 @@
 
-from typing import List
+from typing import List, Union
 from utils import Quadrilateral
 import numpy as np
 import cv2
 import math
 from utils import findNextPowerOf2
-
+import textwrap
 from . import text_render
+from .text_render_eng import render_textblock_list_eng
 from textblockdetector.textblock import TextBlock
 
 def fg_bg_compare(fg, bg):
@@ -154,4 +155,27 @@ def render(img_canvas, font_size, text_mag_ratio, trans_text, region, majority_d
 	canvas_region = rgba_region[:, :, 0: 3]
 	mask_region = rgba_region[:, :, 3: 4].astype(np.float32) / 255.0
 	img_canvas = np.clip((img_canvas.astype(np.float32) * (1 - mask_region) + canvas_region.astype(np.float32) * mask_region), 0, 255).astype(np.uint8)
-	return img_canvas
+	return img_canvas
+
+
+
+
+async def dispatch_eng_render(img_canvas: np.ndarray, text_regions: Union[List[TextBlock], List[Quadrilateral]], translated_sentences: List[str], font_path: str) -> np.ndarray :
+	if len(text_regions) == 0:
+		return img_canvas
+
+	if isinstance(text_regions[0], 	Quadrilateral):
+		blk_list = []
+		for region, tr in zip(text_regions, translated_sentences):
+			x = np.min(region.pts[:, 0])
+			w = np.max(region.pts[:, 0]) - x
+			y = np.min(region.pts[:, 1])
+			h = np.max(region.pts[:, 1]) - y
+			font_size = region.font_size * 0.7
+			blk = TextBlock([x, y, w, h], lines=[region.pts], translation=tr, angle=region.angle, font_size=font_size)
+			blk_list.append(blk)
+		return render_textblock_list_eng(img_canvas, blk_list, font_path, size_tol=1.1)
+
+	for blk, tr in zip(text_regions, translated_sentences):
+		blk.translation = tr
+	return render_textblock_list_eng(img_canvas, text_regions, font_path, size_tol=1.2)
diff --git a/text_rendering/text_render_eng.py b/text_rendering/text_render_eng.py
@@ -0,0 +1,115 @@
+from PIL import ImageFont, ImageDraw, Image
+import numpy as np
+from typing import List, Union
+from textblockdetector import TextBlock
+
+from utils import Quadrilateral
+
+
+class Line:
+    def __init__(self, text: str = '', pos_x: int = 0, pos_y: int = 0, length: float = 0) -> None:
+        self.text = text
+        self.pos_x = pos_x
+        self.pos_y = pos_y
+        self.length = int(length)
+
+
+def text_to_word_list(text: str) -> List[str]:
+    text = text.upper().replace('  ', ' ')
+    processed_text = ''
+
+    # dumb way to insure spaces between words
+    text_len = len(text)
+    for ii, c in enumerate(text):
+            if c in ['.', '?', '!'] and ii < text_len - 1:
+                next_c = text[ii + 1]
+                if next_c.isalpha() or next_c.isnumeric():
+                    processed_text += c + ' '
+                else:
+                    processed_text += c
+            else:
+                processed_text += c
+    word_list = processed_text.split(' ')
+    words = []
+    skip_next = False
+    word_num = len(word_list)
+    for ii, word in enumerate(word_list):
+        if skip_next:
+            skip_next = False
+            continue
+        if ii < word_num - 1:
+            if len(word) == 1 or len(word_list[ii + 1]) == 1:
+                skip_next = True
+                word = word + ' ' + word_list[ii + 1]
+        words.append(word)
+    return words
+
+def render_textblock_list_eng(img: np.ndarray, blk_list: List[TextBlock], font_path: str, scale_quality=1.0, align_center=True, size_tol=1.0):
+    pilimg = Image.fromarray(img)
+    for blk in blk_list:
+        if blk.vertical:
+            blk.angle -= 90
+        sw_r = 0.1
+        fs = int(blk.font_size / (1 + 2*sw_r) * scale_quality) 
+        min_bbox = blk.min_rect(rotate_back=False)[0]
+        bx, by = min_bbox[0]
+        bw, bh = min_bbox[2] - min_bbox[0]
+        cx, cy = bx + bw / 2, by + bh / 2
+        bw = bw * scale_quality
+
+        font = ImageFont.truetype(font_path, fs)
+        words = text_to_word_list(blk.translation)
+        if not len(words):
+            continue
+
+        base_length = -1
+        w_list = []
+
+        sw = int(sw_r * font.size)
+        line_height = int((1 + 2*sw_r) * font.getmetrics()[0])
+
+        for word in words:
+            wl = font.getlength(word)
+            w_list.append(wl)
+            if wl > base_length:
+                base_length = wl
+        base_length = max(base_length, bw)
+        space_l = font.getlength(' ')
+        pos_x, pos_y = 0, 0
+        line = Line(words[0], 0, 0, w_list[0])
+        line_lst = [line]
+        for word, wl in zip(words[1:], w_list[1:]):
+            added_len = int(space_l + wl + line.length)
+            if added_len > base_length:
+                pos_y += line_height
+                line = Line(word, 0, pos_y, wl)
+                line_lst.append(line)
+            else:
+                line.text = line.text + ' ' + word
+                line.length = added_len
+        last_line = line_lst[-1]
+        canvas_h = last_line.pos_y + line_height
+        canvas_w = int(base_length)
+
+        font_color = (0, 0, 0)
+        stroke_color = (255, 255, 255)
+        img = Image.new('RGBA', (canvas_w, canvas_h), color = (0, 0, 0, 0))
+        d = ImageDraw.Draw(img)
+        d.fontmode = 'L'
+
+        for line in line_lst:
+            pos_x = int((base_length - line.length) / 2) if align_center else 0
+            d.text((pos_x, line.pos_y), line.text, font=font, fill=font_color, stroke_width=sw, stroke_fill=stroke_color)
+
+        if abs(blk.angle) > 3:
+            img = img.rotate(-blk.angle, expand=True)
+        im_w, im_h = img.size
+        scale = min(bh / im_h * size_tol, bw / im_w * size_tol)
+        if scale < 1:
+            img = img.resize((int(im_w*scale), int(im_h*scale)))
+
+        im_w, im_h = img.size
+        paste_x, paste_y = int(cx - im_w / 2), int(cy - im_h / 2)
+        pilimg.paste(img, (paste_x, paste_y), mask=img)
+
+    return np.array(pilimg)
diff --git a/textblockdetector/textblock.py b/textblockdetector/textblock.py
@@ -131,7 +131,7 @@ def min_rect(self, rotate_back=True):
         min_bbox = np.array([[min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y]])
         if angled and rotate_back:
             min_bbox = rotate_polygons(center, min_bbox, -self.angle)
-        return min_bbox.reshape(-1, 4, 2)
+        return min_bbox.reshape(-1, 4, 2).astype(np.int64)
 
     # equivalent to qt's boundingRect, ignore angle
     def bounding_rect(self):
@@ -365,8 +365,8 @@ def try_merge_textline(blk: TextBlock, blk2: TextBlock, fntsize_tol=1.3, distanc
     blk.lines.append(blk2.lines[0])
     blk.vec = vec_sum
     blk.angle = int(round(np.rad2deg(math.atan2(vec_sum[1], vec_sum[0]))))
-    if blk.vertical:
-        blk.angle -= 90
+    # if blk.vertical:
+    #     blk.angle -= 90
     blk.norm = np.linalg.norm(vec_sum)
     blk.distance = np.append(blk.distance, blk2.distance[-1])
     blk.font_size = fntsz_avg

diff --git a/translate_demo.py b/translate_demo.py
@@ -36,6 +36,8 @@
 parser.add_argument('--target-lang', default='CHS', type=str, help='destination language')
 parser.add_argument('--use-ctd', action='store_true', help='use comic-text-detector for text detection')
 parser.add_argument('--verbose', action='store_true', help='print debug info and save intermediate images')
+parser.add_argument('--manga2eng', action='store_true', help='render English text translated from manga with some typesetting')
+parser.add_argument('--eng-font', default='fonts/comic shanns 2.ttf', type=str, help='font used by manga2eng mode')
 args = parser.parse_args()
 
 def update_state(task_id, nonce, state) :
@@ -181,11 +183,15 @@ async def infer(
 		if mode == 'web' and task_id :
 			update_state(task_id, nonce, 'render')
 		# render translated texts
-		if detector == 'ctd' :
-			from text_rendering import dispatch_ctd_render
-			output = await dispatch_ctd_render(np.copy(img_inpainted), args.text_mag_ratio, translated_sentences, text_regions, render_text_direction_overwrite)
+		if args.target_lang == 'ENG' and args.manga2eng:
+			from text_rendering import dispatch_eng_render
+			output = await dispatch_eng_render(np.copy(img_inpainted), text_regions, translated_sentences, args.eng_font)
 		else:
-			output = await dispatch_rendering(np.copy(img_inpainted), args.text_mag_ratio, translated_sentences, textlines, text_regions, render_text_direction_overwrite)
+			if detector == 'ctd' :
+				from text_rendering import dispatch_ctd_render
+				output = await dispatch_ctd_render(np.copy(img_inpainted), args.text_mag_ratio, translated_sentences, text_regions, render_text_direction_overwrite)
+			else:
+				output = await dispatch_rendering(np.copy(img_inpainted), args.text_mag_ratio, translated_sentences, textlines, text_regions, render_text_direction_overwrite)
 
 		print(' -- Saving results')
 		if alpha_ch is not None :