Merge branch 'main' of github.com:zyddnys/manga-image-translator

zyddnys · Nov 28, 2023 · 5e2c7aa · 5e2c7aa
2 parents c09a8e8 + be068d1
commit 5e2c7aa
Show file tree

Hide file tree

Showing 35 changed files with 80 additions and 80 deletions.
diff --git a/README.md b/README.md
@@ -413,7 +413,7 @@ THA: Thai
 --selective-translation SELECTIVE_TRANSLATION
                                              Select a translator based on detected language in
                                              image. Note the first translation service acts as
-                                             default if the language isnt defined. Example:
+                                             default if the language isn't defined. Example:
                                              --translator-chain "google:JPN;sugoi:ENG".
 --revert-upscaling                           Downscales the previously upscaled image after
                                              translation back to original size (Use with --upscale-
@@ -589,7 +589,7 @@ A list of what needs to be done next, you're welcome to contribute.
    This works for images without speech bubbles, but making it impossible to decide where to put translated English text. I have no idea how to solve this.
 4. [Ryota et al.](https://arxiv.org/abs/2012.14271) proposed using multimodal machine translation, maybe we can add ViT features for building custom NMT models.
 5. Make this project works for video(rewrite code in C++ and use GPU/other hardware NN accelerator).\
-   Used for detecting hard subtitles in videos, generting ass file and remove them completetly.
+   Used for detecting hard subtitles in videos, generating ass file and remove them completely.
 6. ~~Mask refinement based using non deep learning algorithms, I am currently testing out CRF based algorithm.~~
 7. ~~Angled text region merge is not currently supported~~
 8. Create pip repository

diff --git a/README_CN.md b/README_CN.md
@@ -145,7 +145,7 @@ THA: Thai
 --selective-translation SELECTIVE_TRANSLATION
                                              Select a translator based on detected language in
                                              image. Note the first translation service acts as
-                                             default if the language isnt defined. Example:
+                                             default if the language isn't defined. Example:
                                              --translator-chain "google:JPN;sugoi:ENG".
 --revert-upscaling                           Downscales the previously upscaled image after
                                              translation back to original size (Use with --upscale-

diff --git a/manga_translator/args.py b/manga_translator/args.py
@@ -42,7 +42,7 @@ def dir_path(string):
     return s
 
 # def choice_chain(choices):
-#     """Argument type for string chains from choices seperated by ':'. Example: 'choice1:choice2:choice3'"""
+#     """Argument type for string chains from choices separated by ':'. Example: 'choice1:choice2:choice3'"""
 #     def _func(string):
 #         if choices is not None:
 #             for s in string.split(':') or ['']:
@@ -113,7 +113,7 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
 g = parser.add_mutually_exclusive_group()
 g.add_argument('--translator', default='google', type=str, choices=TRANSLATORS, help='Language translator to use')
 g.add_argument('--translator-chain', default=None, type=translator_chain, help='Output of one translator goes in another. Example: --translator-chain "google:JPN;sugoi:ENG".')
-g.add_argument('--selective-translation', default=None, type=translator_chain, help='Select a translator based on detected language in image. Note the first translation service acts as default if the language isnt defined. Example: --translator-chain "google:JPN;sugoi:ENG".')
+g.add_argument('--selective-translation', default=None, type=translator_chain, help='Select a translator based on detected language in image. Note the first translation service acts as default if the language isn\'t defined. Example: --translator-chain "google:JPN;sugoi:ENG".')
 
 parser.add_argument('--revert-upscaling', action='store_true', help='Downscales the previously upscaled image after translation back to original size (Use with --upscale-ratio).')
 parser.add_argument('--detection-size', default=1536, type=int, help='Size of image used for detection')

diff --git a/manga_translator/detection/common.py b/manga_translator/detection/common.py
@@ -28,8 +28,8 @@ async def detect(self, image: np.ndarray, detect_size: int, text_threshold: floa
             self.logger.debug('Adding border')
             image = self._add_border(image, minimum_image_size)
         if invert:
-            self.logger.debug('Adding invertion')
-            image = self._add_invertion(image)
+            self.logger.debug('Adding inversion')
+            image = self._add_inversion(image)
         if gamma_correct:
             self.logger.debug('Adding gamma correction')
             image = self._add_gamma_correction(image)
@@ -112,7 +112,7 @@ def _remove_rotation(self, textlines, raw_mask, mask, img_w, img_h):
             textlines[i] = Quadrilateral(rotated_pts, txtln.text, txtln.prob)
         return textlines, raw_mask, mask
 
-    def _add_invertion(self, image: np.ndarray):
+    def _add_inversion(self, image: np.ndarray):
         return cv2.bitwise_not(image)
 
     def _add_gamma_correction(self, image: np.ndarray):

diff --git a/manga_translator/detection/ctd_utils/textmask.py b/manga_translator/detection/ctd_utils/textmask.py
@@ -98,9 +98,9 @@ def merge_mask_list(mask_list, pred_mask, blk: Quadrilateral = None, pred_thresh
                     continue
                 x1, y1, x2, y2 = x, y, x+w, y+h
                 label_local = labels[y1: y2, x1: x2]
-                label_cordinates = np.where(label_local==label_index)
+                label_coordinates = np.where(label_local==label_index)
                 tmp_merged = np.zeros_like(label_local, np.uint8)
-                tmp_merged[label_cordinates] = 255
+                tmp_merged[label_coordinates] = 255
                 tmp_merged = cv2.bitwise_or(mask_merged[y1: y2, x1: x2], tmp_merged)
                 xor_merged = cv2.bitwise_xor(tmp_merged, pred_mask[y1: y2, x1: x2]).sum()
                 xor_origin = cv2.bitwise_xor(mask_merged[y1: y2, x1: x2], pred_mask[y1: y2, x1: x2]).sum()
@@ -121,9 +121,9 @@ def merge_mask_list(mask_list, pred_mask, blk: Quadrilateral = None, pred_thresh
         if area < area_thresh:
             x1, y1, x2, y2 = x, y, x+w, y+h
             label_local = labels[y1: y2, x1: x2]
-            label_cordinates = np.where(label_local==label_index)
+            label_coordinates = np.where(label_local==label_index)
             tmp_merged = np.zeros_like(label_local, np.uint8)
-            tmp_merged[label_cordinates] = 255
+            tmp_merged[label_coordinates] = 255
             tmp_merged = cv2.bitwise_or(mask_merged[y1: y2, x1: x2], tmp_merged)
             xor_merged = cv2.bitwise_xor(tmp_merged, pred_mask[y1: y2, x1: x2]).sum()
             xor_origin = cv2.bitwise_xor(mask_merged[y1: y2, x1: x2], pred_mask[y1: y2, x1: x2]).sum()

diff --git a/manga_translator/detection/ctd_utils/utils/db_utils.py b/manga_translator/detection/ctd_utils/utils/db_utils.py
@@ -49,7 +49,7 @@ def __call__(self, batch, pred, is_output_polygon=False, height=None, width=None
         pred:
             binary: text region segmentation map, with shape (N, H, W)
             thresh: [if exists] thresh hold prediction with shape (N, H, W)
-            thresh_binary: [if exists] binarized with threshhold, (N, H, W)
+            thresh_binary: [if exists] binarized with threshold, (N, H, W)
         '''
         pred = pred[:, 0, :, :]
         segmentation = self.binarize(pred)
@@ -522,12 +522,12 @@ def shrink_polygon_pyclipper(polygon, shrink_ratio):
     subject = [tuple(l) for l in polygon]
     padding = pyclipper.PyclipperOffset()
     padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
-    shrinked = padding.Execute(-distance)
-    if shrinked == []:
-        shrinked = np.array(shrinked)
+    shrunk = padding.Execute(-distance)
+    if shrunk == []:
+        shrunk = np.array(shrunk)
     else:
-        shrinked = np.array(shrinked[0]).reshape(-1, 2)
-    return shrinked
+        shrunk = np.array(shrunk[0]).reshape(-1, 2)
+    return shrunk
 
 class MakeShrinkMap():
     r'''
@@ -563,12 +563,12 @@ def __call__(self, data: dict) -> dict:
                 cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0)
                 ignore_tags[i] = True
             else:
-                shrinked = self.shrink_func(polygon, self.shrink_ratio)
-                if shrinked.size == 0:
+                shrunk = self.shrink_func(polygon, self.shrink_ratio)
+                if shrunk.size == 0:
                     cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0)
                     ignore_tags[i] = True
                     continue
-                cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
+                cv2.fillPoly(gt, [shrunk.astype(np.int32)], 1)
 
         data['shrink_map'] = gt
         data['shrink_mask'] = mask

diff --git a/manga_translator/detection/default_utils/craft_utils.py b/manga_translator/detection/default_utils/craft_utils.py
@@ -8,12 +8,12 @@
 import cv2
 import math
 
-""" auxilary functions """
+""" auxiliary functions """
 # unwarp corodinates
 def warpCoord(Minv, pt):
     out = np.matmul(Minv, (pt[0], pt[1], 1))
     return np.array([out[0]/out[2], out[1]/out[2]])
-""" end of auxilary functions """
+""" end of auxiliary functions """
 
 
 def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text):
@@ -168,7 +168,7 @@ def getPoly_core(boxes, labels, mapper, linkmap):
         # calc median maximum of pivot points
         half_char_h = np.median(seg_height) * expand_ratio / 2
 
-        # calc gradiant and apply to make horizontal pivots
+        # calc gradient and apply to make horizontal pivots
         new_pp = []
         for i, (x, cy) in enumerate(pp):
             dx = cp_section[i * 2 + 2][0] - cp_section[i * 2][0]

diff --git a/manga_translator/detection/default_utils/dbnet_utils.py b/manga_translator/detection/default_utils/dbnet_utils.py
@@ -25,7 +25,7 @@ def __call__(self, batch, pred, is_output_polygon=False):
         pred:
             binary: text region segmentation map, with shape (N, H, W)
             thresh: [if exists] thresh hold prediction with shape (N, H, W)
-            thresh_binary: [if exists] binarized with threshhold, (N, H, W)
+            thresh_binary: [if exists] binarized with threshold, (N, H, W)
         '''
         pred = pred[:, 0, :, :]
         segmentation = self.binarize(pred)

diff --git a/manga_translator/inpainting/inpainting_attn.py b/manga_translator/inpainting/inpainting_attn.py
@@ -138,7 +138,7 @@ class GlobalAttention(nn.Module):
 
     def __init__(self, in_dim):
         super(GlobalAttention, self).__init__()
-        self.chanel_in = in_dim
+        self.channel_in = in_dim
 
         self.query_conv = ScaledWSConv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1)
         self.key_conv = ScaledWSConv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1)

diff --git a/manga_translator/inpainting/inpainting_lama.py b/manga_translator/inpainting/inpainting_lama.py
@@ -33,7 +33,7 @@ async def _load(self, device: str):
             self.model = self.model.cuda()
 
 
-class DepthWiseSeperableConv(nn.Module):
+class DepthWiseSeparableConv(nn.Module):
     def __init__(self, in_dim, out_dim, *args, **kwargs):
         super().__init__()
         if 'groups' in kwargs:
@@ -91,7 +91,7 @@ def __init__(self, in_dim, out_dim, kernel_size, dilation_num=3, comb_mode='sum'
             self.cat_in = False
             self.in_dims = [in_dim] * dilation_num
 
-        conv_type = DepthWiseSeperableConv if use_depthwise else nn.Conv2d
+        conv_type = DepthWiseSeparableConv if use_depthwise else nn.Conv2d
         dilation = min_dilation
         for i in range(dilation_num):
             if isinstance(padding, int):
@@ -158,7 +158,7 @@ def get_conv_block_ctor(kind='default'):
     if kind == 'default':
         return nn.Conv2d
     if kind == 'depthwise':
-        return DepthWiseSeperableConv   
+        return DepthWiseSeparableConv   
     if kind == 'multidilated':
         return MultidilatedConv
     raise ValueError(f'Unknown convolutional block kind {kind}')

diff --git a/manga_translator/inpainting/ldm/models/diffusion/plms.py b/manga_translator/inpainting/ldm/models/diffusion/plms.py
@@ -233,7 +233,7 @@ def get_x_prev_and_pred_x0(e_t, index):
             # 2nd order Pseudo Linear Multistep (Adams-Bashforth)
             e_t_prime = (3 * e_t - old_eps[-1]) / 2
         elif len(old_eps) == 2:
-            # 3nd order Pseudo Linear Multistep (Adams-Bashforth)
+            # 3rd order Pseudo Linear Multistep (Adams-Bashforth)
             e_t_prime = (23 * e_t - 16 * old_eps[-1] + 5 * old_eps[-2]) / 12
         elif len(old_eps) >= 3:
             # 4nd order Pseudo Linear Multistep (Adams-Bashforth)

diff --git a/manga_translator/inpainting/ldm/modules/attention.py b/manga_translator/inpainting/ldm/modules/attention.py
@@ -12,9 +12,9 @@
 try:
     import xformers
     import xformers.ops
-    XFORMERS_IS_AVAILBLE = False
+    XFORMERS_IS_AVAILABLE = False
 except ImportError:
-    XFORMERS_IS_AVAILBLE = False
+    XFORMERS_IS_AVAILABLE = False
 
 # CrossAttn precision handling
 import os
@@ -251,7 +251,7 @@ class BasicTransformerBlock(nn.Module):
     def __init__(self, dim, n_heads, d_head, dropout=0., context_dim=None, gated_ff=True, checkpoint=True,
                  disable_self_attn=False):
         super().__init__()
-        attn_mode = "softmax-xformers" if XFORMERS_IS_AVAILBLE else "softmax"
+        attn_mode = "softmax-xformers" if XFORMERS_IS_AVAILABLE else "softmax"
         assert attn_mode in self.ATTENTION_MODES
         attn_cls = self.ATTENTION_MODES[attn_mode]
         self.disable_self_attn = disable_self_attn

diff --git a/manga_translator/inpainting/ldm/modules/diffusionmodules/model.py b/manga_translator/inpainting/ldm/modules/diffusionmodules/model.py
@@ -11,9 +11,9 @@
 try:
     import xformers
     import xformers.ops
-    XFORMERS_IS_AVAILBLE = False
+    XFORMERS_IS_AVAILABLE = False
 except:
-    XFORMERS_IS_AVAILBLE = False
+    XFORMERS_IS_AVAILABLE = False
     print("No module 'xformers'. Proceeding without it.")
 
 
@@ -279,7 +279,7 @@ def forward(self, x, context=None, mask=None):
 
 def make_attn(in_channels, attn_type="vanilla", attn_kwargs=None):
     assert attn_type in ["vanilla", "vanilla-xformers", "memory-efficient-cross-attn", "linear", "none"], f'attn_type {attn_type} unknown'
-    if XFORMERS_IS_AVAILBLE and attn_type == "vanilla":
+    if XFORMERS_IS_AVAILABLE and attn_type == "vanilla":
         attn_type = "vanilla-xformers"
     print(f"making attention of type '{attn_type}' with {in_channels} in_channels")
     if attn_type == "vanilla":

diff --git a/manga_translator/inpainting/ldm/modules/diffusionmodules/openaimodel.py b/manga_translator/inpainting/ldm/modules/diffusionmodules/openaimodel.py
@@ -345,7 +345,7 @@ def count_flops_attn(model, _x, y):
 
 class QKVAttentionLegacy(nn.Module):
     """
-    A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping
+    A module which performs QKV attention. Matches legacy QKVAttention + input/output heads shaping
     """
 
     def __init__(self, n_heads):

diff --git a/manga_translator/inpainting/ldm/modules/ema.py b/manga_translator/inpainting/ldm/modules/ema.py
@@ -3,14 +3,14 @@
 
 
 class LitEma(nn.Module):
-    def __init__(self, model, decay=0.9999, use_num_upates=True):
+    def __init__(self, model, decay=0.9999, use_num_updates=True):
         super().__init__()
         if decay < 0.0 or decay > 1.0:
             raise ValueError('Decay must be between 0 and 1')
 
         self.m_name2s_name = {}
         self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32))
-        self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_upates
+        self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_updates
         else torch.tensor(-1, dtype=torch.int))
 
         for name, p in model.named_parameters():

diff --git a/manga_translator/inpainting/ldm/modules/image_degradation/bsrgan.py b/manga_translator/inpainting/ldm/modules/image_degradation/bsrgan.py
@@ -170,7 +170,7 @@ def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var
     [X, Y] = np.meshgrid(range(k_size[0]), range(k_size[1]))
     Z = np.stack([X, Y], 2)[:, :, :, None]
 
-    # Calcualte Gaussian for every pixel of the kernel
+    # Calculate Gaussian for every pixel of the kernel
     ZZ = Z - MU
     ZZ_t = ZZ.transpose(0, 1, 3, 2)
     raw_kernel = np.exp(-0.5 * np.squeeze(ZZ_t @ INV_SIGMA @ ZZ)) * (1 + noise)
@@ -613,7 +613,7 @@ def degradation_bsrgan_variant(image, sf=4, isp_model=None):
     return example
 
 
-# TODO incase there is a pickle error one needs to replace a += x with a = a + x in add_speckle_noise etc...
+# TODO in case there is a pickle error one needs to replace a += x with a = a + x in add_speckle_noise etc...
 def degradation_bsrgan_plus(img, sf=4, shuffle_prob=0.5, use_sharp=True, lq_patchsize=64, isp_model=None):
     """
     This is an extended degradation model by combining

diff --git a/manga_translator/inpainting/ldm/modules/image_degradation/bsrgan_light.py b/manga_translator/inpainting/ldm/modules/image_degradation/bsrgan_light.py
@@ -169,7 +169,7 @@ def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var
     [X, Y] = np.meshgrid(range(k_size[0]), range(k_size[1]))
     Z = np.stack([X, Y], 2)[:, :, :, None]
 
-    # Calcualte Gaussian for every pixel of the kernel
+    # Calculate Gaussian for every pixel of the kernel
     ZZ = Z - MU
     ZZ_t = ZZ.transpose(0, 1, 3, 2)
     raw_kernel = np.exp(-0.5 * np.squeeze(ZZ_t @ INV_SIGMA @ ZZ)) * (1 + noise)

diff --git a/manga_translator/inpainting/ldm/modules/image_degradation/utils_image.py b/manga_translator/inpainting/ldm/modules/image_degradation/utils_image.py
@@ -59,7 +59,7 @@ def surf(Z, cmap='rainbow', figsize=None):
 
 '''
 # --------------------------------------------
-# get image pathes
+# get image paths
 # --------------------------------------------
 '''
 
@@ -122,14 +122,14 @@ def imssave(imgs, img_path):
         cv2.imwrite(new_path, img)
 
 
-def split_imageset(original_dataroot, taget_dataroot, n_channels=3, p_size=800, p_overlap=96, p_max=1000):
+def split_imageset(original_dataroot, target_dataroot, n_channels=3, p_size=800, p_overlap=96, p_max=1000):
     """
     split the large images from original_dataroot into small overlapped images with size (p_size)x(p_size),
-    and save them into taget_dataroot; only the images with larger size than (p_max)x(p_max)
-    will be splitted.
+    and save them into target_dataroot; only the images with larger size than (p_max)x(p_max)
+    will be split.
     Args:
         original_dataroot:
-        taget_dataroot:
+        target_dataroot:
         p_size: size of small images
         p_overlap: patch size in training is a good choice
         p_max: images with smaller size than (p_max)x(p_max) keep unchanged.
@@ -139,8 +139,8 @@ def split_imageset(original_dataroot, taget_dataroot, n_channels=3, p_size=800,
         # img_name, ext = os.path.splitext(os.path.basename(img_path))
         img = imread_uint(img_path, n_channels=n_channels)
         patches = patches_from_image(img, p_size, p_overlap, p_max)
-        imssave(patches, os.path.join(taget_dataroot,os.path.basename(img_path)))
-        #if original_dataroot == taget_dataroot:
+        imssave(patches, os.path.join(target_dataroot,os.path.basename(img_path)))
+        #if original_dataroot == target_dataroot:
         #del img_path
 
 '''
@@ -180,7 +180,7 @@ def mkdir_and_rename(path):
 
 
 # --------------------------------------------
-# get uint8 image of size HxWxn_channles (RGB)
+# get uint8 image of size HxWxn_channels (RGB)
 # --------------------------------------------
 def imread_uint(path, n_channels=3):
     #  input: path
@@ -215,7 +215,7 @@ def imwrite(img, img_path):
 
 
 # --------------------------------------------
-# get single image of size HxWxn_channles (BGR)
+# get single image of size HxWxn_channels (BGR)
 # --------------------------------------------
 def read_img(path):
     # read image by cv2

diff --git a/manga_translator/inpainting/ldm/modules/midas/midas/transforms.py b/manga_translator/inpainting/ldm/modules/midas/midas/transforms.py
@@ -125,7 +125,7 @@ def get_size(self, width, height):
                     # fit height
                     scale_width = scale_height
             elif self.__resize_method == "minimal":
-                # scale as least as possbile
+                # scale as least as possible
                 if abs(1 - scale_width) < abs(1 - scale_height):
                     # fit width
                     scale_height = scale_width

diff --git a/manga_translator/inpainting/ldm/util.py b/manga_translator/inpainting/ldm/util.py
@@ -23,7 +23,7 @@ def log_txt_as_img(wh, xc, size=10):
         try:
             draw.text((0, 0), lines, fill="black", font=font)
         except UnicodeEncodeError:
-            print("Cant encode string for logging. Skipping.")
+            print("Can't encode string for logging. Skipping.")
 
         txt = np.array(txt).transpose(2, 0, 1) / 127.5 - 1.0
         txts.append(txt)

diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py
@@ -23,7 +23,7 @@
 from .args import DEFAULT_ARGS, translator_chain
 from .utils import (
     BASE_PATH,
-    LANGAUGE_ORIENTATION_PRESETS,
+    LANGUAGE_ORIENTATION_PRESETS,
     ModelWrapper,
     Context,
     PriorityLock,
@@ -543,7 +543,7 @@ async def _run_text_rendering(self, ctx: Context):
         if ctx.renderer == 'none':
             output = ctx.img_inpainted
         # manga2eng currently only supports horizontal left to right rendering
-        elif ctx.renderer == 'manga2eng' and ctx.text_regions and LANGAUGE_ORIENTATION_PRESETS.get(
+        elif ctx.renderer == 'manga2eng' and ctx.text_regions and LANGUAGE_ORIENTATION_PRESETS.get(
                 ctx.text_regions[0].target_lang) == 'h':
             output = await dispatch_eng_render(ctx.img_inpainted, ctx.img_rgb, ctx.text_regions, ctx.font_path, ctx.line_spacing)
         else: