diff --git a/README.md b/README.md index 539af5e7e..70c828966 100644 --- a/README.md +++ b/README.md @@ -413,7 +413,7 @@ THA: Thai --selective-translation SELECTIVE_TRANSLATION Select a translator based on detected language in image. Note the first translation service acts as - default if the language isnt defined. Example: + default if the language isn't defined. Example: --translator-chain "google:JPN;sugoi:ENG". --revert-upscaling Downscales the previously upscaled image after translation back to original size (Use with --upscale- @@ -589,7 +589,7 @@ A list of what needs to be done next, you're welcome to contribute. This works for images without speech bubbles, but making it impossible to decide where to put translated English text. I have no idea how to solve this. 4. [Ryota et al.](https://arxiv.org/abs/2012.14271) proposed using multimodal machine translation, maybe we can add ViT features for building custom NMT models. 5. Make this project works for video(rewrite code in C++ and use GPU/other hardware NN accelerator).\ - Used for detecting hard subtitles in videos, generting ass file and remove them completetly. + Used for detecting hard subtitles in videos, generating ass file and remove them completely. 6. ~~Mask refinement based using non deep learning algorithms, I am currently testing out CRF based algorithm.~~ 7. ~~Angled text region merge is not currently supported~~ 8. Create pip repository diff --git a/README_CN.md b/README_CN.md index e5a485519..79f1f4260 100644 --- a/README_CN.md +++ b/README_CN.md @@ -145,7 +145,7 @@ THA: Thai --selective-translation SELECTIVE_TRANSLATION Select a translator based on detected language in image. Note the first translation service acts as - default if the language isnt defined. Example: + default if the language isn't defined. Example: --translator-chain "google:JPN;sugoi:ENG". --revert-upscaling Downscales the previously upscaled image after translation back to original size (Use with --upscale- diff --git a/manga_translator/args.py b/manga_translator/args.py index e7841218e..dc549c1ec 100644 --- a/manga_translator/args.py +++ b/manga_translator/args.py @@ -42,7 +42,7 @@ def dir_path(string): return s # def choice_chain(choices): -# """Argument type for string chains from choices seperated by ':'. Example: 'choice1:choice2:choice3'""" +# """Argument type for string chains from choices separated by ':'. Example: 'choice1:choice2:choice3'""" # def _func(string): # if choices is not None: # for s in string.split(':') or ['']: @@ -113,7 +113,7 @@ def _format_action_invocation(self, action: argparse.Action) -> str: g = parser.add_mutually_exclusive_group() g.add_argument('--translator', default='google', type=str, choices=TRANSLATORS, help='Language translator to use') g.add_argument('--translator-chain', default=None, type=translator_chain, help='Output of one translator goes in another. Example: --translator-chain "google:JPN;sugoi:ENG".') -g.add_argument('--selective-translation', default=None, type=translator_chain, help='Select a translator based on detected language in image. Note the first translation service acts as default if the language isnt defined. Example: --translator-chain "google:JPN;sugoi:ENG".') +g.add_argument('--selective-translation', default=None, type=translator_chain, help='Select a translator based on detected language in image. Note the first translation service acts as default if the language isn\'t defined. Example: --translator-chain "google:JPN;sugoi:ENG".') parser.add_argument('--revert-upscaling', action='store_true', help='Downscales the previously upscaled image after translation back to original size (Use with --upscale-ratio).') parser.add_argument('--detection-size', default=1536, type=int, help='Size of image used for detection') diff --git a/manga_translator/detection/common.py b/manga_translator/detection/common.py index 3cb6fea33..a4b7f4f67 100644 --- a/manga_translator/detection/common.py +++ b/manga_translator/detection/common.py @@ -28,8 +28,8 @@ async def detect(self, image: np.ndarray, detect_size: int, text_threshold: floa self.logger.debug('Adding border') image = self._add_border(image, minimum_image_size) if invert: - self.logger.debug('Adding invertion') - image = self._add_invertion(image) + self.logger.debug('Adding inversion') + image = self._add_inversion(image) if gamma_correct: self.logger.debug('Adding gamma correction') image = self._add_gamma_correction(image) @@ -112,7 +112,7 @@ def _remove_rotation(self, textlines, raw_mask, mask, img_w, img_h): textlines[i] = Quadrilateral(rotated_pts, txtln.text, txtln.prob) return textlines, raw_mask, mask - def _add_invertion(self, image: np.ndarray): + def _add_inversion(self, image: np.ndarray): return cv2.bitwise_not(image) def _add_gamma_correction(self, image: np.ndarray): diff --git a/manga_translator/detection/ctd_utils/textmask.py b/manga_translator/detection/ctd_utils/textmask.py index ee388835a..ddadc0f2a 100644 --- a/manga_translator/detection/ctd_utils/textmask.py +++ b/manga_translator/detection/ctd_utils/textmask.py @@ -98,9 +98,9 @@ def merge_mask_list(mask_list, pred_mask, blk: Quadrilateral = None, pred_thresh continue x1, y1, x2, y2 = x, y, x+w, y+h label_local = labels[y1: y2, x1: x2] - label_cordinates = np.where(label_local==label_index) + label_coordinates = np.where(label_local==label_index) tmp_merged = np.zeros_like(label_local, np.uint8) - tmp_merged[label_cordinates] = 255 + tmp_merged[label_coordinates] = 255 tmp_merged = cv2.bitwise_or(mask_merged[y1: y2, x1: x2], tmp_merged) xor_merged = cv2.bitwise_xor(tmp_merged, pred_mask[y1: y2, x1: x2]).sum() xor_origin = cv2.bitwise_xor(mask_merged[y1: y2, x1: x2], pred_mask[y1: y2, x1: x2]).sum() @@ -121,9 +121,9 @@ def merge_mask_list(mask_list, pred_mask, blk: Quadrilateral = None, pred_thresh if area < area_thresh: x1, y1, x2, y2 = x, y, x+w, y+h label_local = labels[y1: y2, x1: x2] - label_cordinates = np.where(label_local==label_index) + label_coordinates = np.where(label_local==label_index) tmp_merged = np.zeros_like(label_local, np.uint8) - tmp_merged[label_cordinates] = 255 + tmp_merged[label_coordinates] = 255 tmp_merged = cv2.bitwise_or(mask_merged[y1: y2, x1: x2], tmp_merged) xor_merged = cv2.bitwise_xor(tmp_merged, pred_mask[y1: y2, x1: x2]).sum() xor_origin = cv2.bitwise_xor(mask_merged[y1: y2, x1: x2], pred_mask[y1: y2, x1: x2]).sum() diff --git a/manga_translator/detection/ctd_utils/utils/db_utils.py b/manga_translator/detection/ctd_utils/utils/db_utils.py index 71db510d1..4631e43bf 100644 --- a/manga_translator/detection/ctd_utils/utils/db_utils.py +++ b/manga_translator/detection/ctd_utils/utils/db_utils.py @@ -49,7 +49,7 @@ def __call__(self, batch, pred, is_output_polygon=False, height=None, width=None pred: binary: text region segmentation map, with shape (N, H, W) thresh: [if exists] thresh hold prediction with shape (N, H, W) - thresh_binary: [if exists] binarized with threshhold, (N, H, W) + thresh_binary: [if exists] binarized with threshold, (N, H, W) ''' pred = pred[:, 0, :, :] segmentation = self.binarize(pred) @@ -522,12 +522,12 @@ def shrink_polygon_pyclipper(polygon, shrink_ratio): subject = [tuple(l) for l in polygon] padding = pyclipper.PyclipperOffset() padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) - shrinked = padding.Execute(-distance) - if shrinked == []: - shrinked = np.array(shrinked) + shrunk = padding.Execute(-distance) + if shrunk == []: + shrunk = np.array(shrunk) else: - shrinked = np.array(shrinked[0]).reshape(-1, 2) - return shrinked + shrunk = np.array(shrunk[0]).reshape(-1, 2) + return shrunk class MakeShrinkMap(): r''' @@ -563,12 +563,12 @@ def __call__(self, data: dict) -> dict: cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0) ignore_tags[i] = True else: - shrinked = self.shrink_func(polygon, self.shrink_ratio) - if shrinked.size == 0: + shrunk = self.shrink_func(polygon, self.shrink_ratio) + if shrunk.size == 0: cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0) ignore_tags[i] = True continue - cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1) + cv2.fillPoly(gt, [shrunk.astype(np.int32)], 1) data['shrink_map'] = gt data['shrink_mask'] = mask diff --git a/manga_translator/detection/default_utils/craft_utils.py b/manga_translator/detection/default_utils/craft_utils.py index 89107660c..1ff48f9f0 100644 --- a/manga_translator/detection/default_utils/craft_utils.py +++ b/manga_translator/detection/default_utils/craft_utils.py @@ -8,12 +8,12 @@ import cv2 import math -""" auxilary functions """ +""" auxiliary functions """ # unwarp corodinates def warpCoord(Minv, pt): out = np.matmul(Minv, (pt[0], pt[1], 1)) return np.array([out[0]/out[2], out[1]/out[2]]) -""" end of auxilary functions """ +""" end of auxiliary functions """ def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text): @@ -168,7 +168,7 @@ def getPoly_core(boxes, labels, mapper, linkmap): # calc median maximum of pivot points half_char_h = np.median(seg_height) * expand_ratio / 2 - # calc gradiant and apply to make horizontal pivots + # calc gradient and apply to make horizontal pivots new_pp = [] for i, (x, cy) in enumerate(pp): dx = cp_section[i * 2 + 2][0] - cp_section[i * 2][0] diff --git a/manga_translator/detection/default_utils/dbnet_utils.py b/manga_translator/detection/default_utils/dbnet_utils.py index ba97d0398..6dba8a6cd 100644 --- a/manga_translator/detection/default_utils/dbnet_utils.py +++ b/manga_translator/detection/default_utils/dbnet_utils.py @@ -25,7 +25,7 @@ def __call__(self, batch, pred, is_output_polygon=False): pred: binary: text region segmentation map, with shape (N, H, W) thresh: [if exists] thresh hold prediction with shape (N, H, W) - thresh_binary: [if exists] binarized with threshhold, (N, H, W) + thresh_binary: [if exists] binarized with threshold, (N, H, W) ''' pred = pred[:, 0, :, :] segmentation = self.binarize(pred) diff --git a/manga_translator/inpainting/inpainting_attn.py b/manga_translator/inpainting/inpainting_attn.py index 735cb0d7c..87e4f2020 100644 --- a/manga_translator/inpainting/inpainting_attn.py +++ b/manga_translator/inpainting/inpainting_attn.py @@ -138,7 +138,7 @@ class GlobalAttention(nn.Module): def __init__(self, in_dim): super(GlobalAttention, self).__init__() - self.chanel_in = in_dim + self.channel_in = in_dim self.query_conv = ScaledWSConv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1) self.key_conv = ScaledWSConv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1) diff --git a/manga_translator/inpainting/inpainting_lama.py b/manga_translator/inpainting/inpainting_lama.py index 289863b08..e4291255c 100644 --- a/manga_translator/inpainting/inpainting_lama.py +++ b/manga_translator/inpainting/inpainting_lama.py @@ -33,7 +33,7 @@ async def _load(self, device: str): self.model = self.model.cuda() -class DepthWiseSeperableConv(nn.Module): +class DepthWiseSeparableConv(nn.Module): def __init__(self, in_dim, out_dim, *args, **kwargs): super().__init__() if 'groups' in kwargs: @@ -91,7 +91,7 @@ def __init__(self, in_dim, out_dim, kernel_size, dilation_num=3, comb_mode='sum' self.cat_in = False self.in_dims = [in_dim] * dilation_num - conv_type = DepthWiseSeperableConv if use_depthwise else nn.Conv2d + conv_type = DepthWiseSeparableConv if use_depthwise else nn.Conv2d dilation = min_dilation for i in range(dilation_num): if isinstance(padding, int): @@ -158,7 +158,7 @@ def get_conv_block_ctor(kind='default'): if kind == 'default': return nn.Conv2d if kind == 'depthwise': - return DepthWiseSeperableConv + return DepthWiseSeparableConv if kind == 'multidilated': return MultidilatedConv raise ValueError(f'Unknown convolutional block kind {kind}') diff --git a/manga_translator/inpainting/ldm/models/diffusion/plms.py b/manga_translator/inpainting/ldm/models/diffusion/plms.py index 7002a365d..ddb47f961 100644 --- a/manga_translator/inpainting/ldm/models/diffusion/plms.py +++ b/manga_translator/inpainting/ldm/models/diffusion/plms.py @@ -233,7 +233,7 @@ def get_x_prev_and_pred_x0(e_t, index): # 2nd order Pseudo Linear Multistep (Adams-Bashforth) e_t_prime = (3 * e_t - old_eps[-1]) / 2 elif len(old_eps) == 2: - # 3nd order Pseudo Linear Multistep (Adams-Bashforth) + # 3rd order Pseudo Linear Multistep (Adams-Bashforth) e_t_prime = (23 * e_t - 16 * old_eps[-1] + 5 * old_eps[-2]) / 12 elif len(old_eps) >= 3: # 4nd order Pseudo Linear Multistep (Adams-Bashforth) diff --git a/manga_translator/inpainting/ldm/modules/attention.py b/manga_translator/inpainting/ldm/modules/attention.py index 5bd4f8dfe..dd17e8249 100644 --- a/manga_translator/inpainting/ldm/modules/attention.py +++ b/manga_translator/inpainting/ldm/modules/attention.py @@ -12,9 +12,9 @@ try: import xformers import xformers.ops - XFORMERS_IS_AVAILBLE = False + XFORMERS_IS_AVAILABLE = False except ImportError: - XFORMERS_IS_AVAILBLE = False + XFORMERS_IS_AVAILABLE = False # CrossAttn precision handling import os @@ -251,7 +251,7 @@ class BasicTransformerBlock(nn.Module): def __init__(self, dim, n_heads, d_head, dropout=0., context_dim=None, gated_ff=True, checkpoint=True, disable_self_attn=False): super().__init__() - attn_mode = "softmax-xformers" if XFORMERS_IS_AVAILBLE else "softmax" + attn_mode = "softmax-xformers" if XFORMERS_IS_AVAILABLE else "softmax" assert attn_mode in self.ATTENTION_MODES attn_cls = self.ATTENTION_MODES[attn_mode] self.disable_self_attn = disable_self_attn diff --git a/manga_translator/inpainting/ldm/modules/diffusionmodules/model.py b/manga_translator/inpainting/ldm/modules/diffusionmodules/model.py index d2ecd42da..21872015e 100644 --- a/manga_translator/inpainting/ldm/modules/diffusionmodules/model.py +++ b/manga_translator/inpainting/ldm/modules/diffusionmodules/model.py @@ -11,9 +11,9 @@ try: import xformers import xformers.ops - XFORMERS_IS_AVAILBLE = False + XFORMERS_IS_AVAILABLE = False except: - XFORMERS_IS_AVAILBLE = False + XFORMERS_IS_AVAILABLE = False print("No module 'xformers'. Proceeding without it.") @@ -279,7 +279,7 @@ def forward(self, x, context=None, mask=None): def make_attn(in_channels, attn_type="vanilla", attn_kwargs=None): assert attn_type in ["vanilla", "vanilla-xformers", "memory-efficient-cross-attn", "linear", "none"], f'attn_type {attn_type} unknown' - if XFORMERS_IS_AVAILBLE and attn_type == "vanilla": + if XFORMERS_IS_AVAILABLE and attn_type == "vanilla": attn_type = "vanilla-xformers" print(f"making attention of type '{attn_type}' with {in_channels} in_channels") if attn_type == "vanilla": diff --git a/manga_translator/inpainting/ldm/modules/diffusionmodules/openaimodel.py b/manga_translator/inpainting/ldm/modules/diffusionmodules/openaimodel.py index 76392045b..be0c6fd55 100644 --- a/manga_translator/inpainting/ldm/modules/diffusionmodules/openaimodel.py +++ b/manga_translator/inpainting/ldm/modules/diffusionmodules/openaimodel.py @@ -345,7 +345,7 @@ def count_flops_attn(model, _x, y): class QKVAttentionLegacy(nn.Module): """ - A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping + A module which performs QKV attention. Matches legacy QKVAttention + input/output heads shaping """ def __init__(self, n_heads): diff --git a/manga_translator/inpainting/ldm/modules/ema.py b/manga_translator/inpainting/ldm/modules/ema.py index ca513a12e..ccf1f119d 100644 --- a/manga_translator/inpainting/ldm/modules/ema.py +++ b/manga_translator/inpainting/ldm/modules/ema.py @@ -3,14 +3,14 @@ class LitEma(nn.Module): - def __init__(self, model, decay=0.9999, use_num_upates=True): + def __init__(self, model, decay=0.9999, use_num_updates=True): super().__init__() if decay < 0.0 or decay > 1.0: raise ValueError('Decay must be between 0 and 1') self.m_name2s_name = {} self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32)) - self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_upates + self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_updates else torch.tensor(-1, dtype=torch.int)) for name, p in model.named_parameters(): diff --git a/manga_translator/inpainting/ldm/modules/image_degradation/bsrgan.py b/manga_translator/inpainting/ldm/modules/image_degradation/bsrgan.py index 7283f22e8..ebd8c4065 100644 --- a/manga_translator/inpainting/ldm/modules/image_degradation/bsrgan.py +++ b/manga_translator/inpainting/ldm/modules/image_degradation/bsrgan.py @@ -170,7 +170,7 @@ def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var [X, Y] = np.meshgrid(range(k_size[0]), range(k_size[1])) Z = np.stack([X, Y], 2)[:, :, :, None] - # Calcualte Gaussian for every pixel of the kernel + # Calculate Gaussian for every pixel of the kernel ZZ = Z - MU ZZ_t = ZZ.transpose(0, 1, 3, 2) raw_kernel = np.exp(-0.5 * np.squeeze(ZZ_t @ INV_SIGMA @ ZZ)) * (1 + noise) @@ -613,7 +613,7 @@ def degradation_bsrgan_variant(image, sf=4, isp_model=None): return example -# TODO incase there is a pickle error one needs to replace a += x with a = a + x in add_speckle_noise etc... +# TODO in case there is a pickle error one needs to replace a += x with a = a + x in add_speckle_noise etc... def degradation_bsrgan_plus(img, sf=4, shuffle_prob=0.5, use_sharp=True, lq_patchsize=64, isp_model=None): """ This is an extended degradation model by combining diff --git a/manga_translator/inpainting/ldm/modules/image_degradation/bsrgan_light.py b/manga_translator/inpainting/ldm/modules/image_degradation/bsrgan_light.py index 4b79c57c0..7f93c6d0c 100644 --- a/manga_translator/inpainting/ldm/modules/image_degradation/bsrgan_light.py +++ b/manga_translator/inpainting/ldm/modules/image_degradation/bsrgan_light.py @@ -169,7 +169,7 @@ def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var [X, Y] = np.meshgrid(range(k_size[0]), range(k_size[1])) Z = np.stack([X, Y], 2)[:, :, :, None] - # Calcualte Gaussian for every pixel of the kernel + # Calculate Gaussian for every pixel of the kernel ZZ = Z - MU ZZ_t = ZZ.transpose(0, 1, 3, 2) raw_kernel = np.exp(-0.5 * np.squeeze(ZZ_t @ INV_SIGMA @ ZZ)) * (1 + noise) diff --git a/manga_translator/inpainting/ldm/modules/image_degradation/utils_image.py b/manga_translator/inpainting/ldm/modules/image_degradation/utils_image.py index 2a7e43a60..154eecc96 100644 --- a/manga_translator/inpainting/ldm/modules/image_degradation/utils_image.py +++ b/manga_translator/inpainting/ldm/modules/image_degradation/utils_image.py @@ -59,7 +59,7 @@ def surf(Z, cmap='rainbow', figsize=None): ''' # -------------------------------------------- -# get image pathes +# get image paths # -------------------------------------------- ''' @@ -122,14 +122,14 @@ def imssave(imgs, img_path): cv2.imwrite(new_path, img) -def split_imageset(original_dataroot, taget_dataroot, n_channels=3, p_size=800, p_overlap=96, p_max=1000): +def split_imageset(original_dataroot, target_dataroot, n_channels=3, p_size=800, p_overlap=96, p_max=1000): """ split the large images from original_dataroot into small overlapped images with size (p_size)x(p_size), - and save them into taget_dataroot; only the images with larger size than (p_max)x(p_max) - will be splitted. + and save them into target_dataroot; only the images with larger size than (p_max)x(p_max) + will be split. Args: original_dataroot: - taget_dataroot: + target_dataroot: p_size: size of small images p_overlap: patch size in training is a good choice p_max: images with smaller size than (p_max)x(p_max) keep unchanged. @@ -139,8 +139,8 @@ def split_imageset(original_dataroot, taget_dataroot, n_channels=3, p_size=800, # img_name, ext = os.path.splitext(os.path.basename(img_path)) img = imread_uint(img_path, n_channels=n_channels) patches = patches_from_image(img, p_size, p_overlap, p_max) - imssave(patches, os.path.join(taget_dataroot,os.path.basename(img_path))) - #if original_dataroot == taget_dataroot: + imssave(patches, os.path.join(target_dataroot,os.path.basename(img_path))) + #if original_dataroot == target_dataroot: #del img_path ''' @@ -180,7 +180,7 @@ def mkdir_and_rename(path): # -------------------------------------------- -# get uint8 image of size HxWxn_channles (RGB) +# get uint8 image of size HxWxn_channels (RGB) # -------------------------------------------- def imread_uint(path, n_channels=3): # input: path @@ -215,7 +215,7 @@ def imwrite(img, img_path): # -------------------------------------------- -# get single image of size HxWxn_channles (BGR) +# get single image of size HxWxn_channels (BGR) # -------------------------------------------- def read_img(path): # read image by cv2 diff --git a/manga_translator/inpainting/ldm/modules/midas/midas/transforms.py b/manga_translator/inpainting/ldm/modules/midas/midas/transforms.py index 37d68afce..7b50bc153 100644 --- a/manga_translator/inpainting/ldm/modules/midas/midas/transforms.py +++ b/manga_translator/inpainting/ldm/modules/midas/midas/transforms.py @@ -125,7 +125,7 @@ def get_size(self, width, height): # fit height scale_width = scale_height elif self.__resize_method == "minimal": - # scale as least as possbile + # scale as least as possible if abs(1 - scale_width) < abs(1 - scale_height): # fit width scale_height = scale_width diff --git a/manga_translator/inpainting/ldm/util.py b/manga_translator/inpainting/ldm/util.py index 047ee9e9b..5a5071690 100644 --- a/manga_translator/inpainting/ldm/util.py +++ b/manga_translator/inpainting/ldm/util.py @@ -23,7 +23,7 @@ def log_txt_as_img(wh, xc, size=10): try: draw.text((0, 0), lines, fill="black", font=font) except UnicodeEncodeError: - print("Cant encode string for logging. Skipping.") + print("Can't encode string for logging. Skipping.") txt = np.array(txt).transpose(2, 0, 1) / 127.5 - 1.0 txts.append(txt) diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py index 3e79fb9b3..266ad02fe 100644 --- a/manga_translator/manga_translator.py +++ b/manga_translator/manga_translator.py @@ -23,7 +23,7 @@ from .args import DEFAULT_ARGS, translator_chain from .utils import ( BASE_PATH, - LANGAUGE_ORIENTATION_PRESETS, + LANGUAGE_ORIENTATION_PRESETS, ModelWrapper, Context, PriorityLock, @@ -543,7 +543,7 @@ async def _run_text_rendering(self, ctx: Context): if ctx.renderer == 'none': output = ctx.img_inpainted # manga2eng currently only supports horizontal left to right rendering - elif ctx.renderer == 'manga2eng' and ctx.text_regions and LANGAUGE_ORIENTATION_PRESETS.get( + elif ctx.renderer == 'manga2eng' and ctx.text_regions and LANGUAGE_ORIENTATION_PRESETS.get( ctx.text_regions[0].target_lang) == 'h': output = await dispatch_eng_render(ctx.img_inpainted, ctx.img_rgb, ctx.text_regions, ctx.font_path, ctx.line_spacing) else: diff --git a/manga_translator/ocr/model_32px.py b/manga_translator/ocr/model_32px.py index c532cbcd3..4ca05c655 100644 --- a/manga_translator/ocr/model_32px.py +++ b/manga_translator/ocr/model_32px.py @@ -435,7 +435,7 @@ def next_token_batch( # S, N, E memory = torch.stack([memory[:, idx, :] for idx in [item.memory_idx for item in hyps]], dim = 1) for l, layer in enumerate(decoders.layers): - # TODO: keys and values are recomputed everytime + # TODO: keys and values are recomputed every time # L - 1, N, E combined_activations = torch.cat([item.cached_activations[l] for item in hyps], dim = 1) # L, N, E diff --git a/manga_translator/ocr/model_48px.py b/manga_translator/ocr/model_48px.py index f8b3d43bf..0fce5e8ba 100644 --- a/manga_translator/ocr/model_48px.py +++ b/manga_translator/ocr/model_48px.py @@ -462,7 +462,7 @@ def next_token_batch( # N, L, E memory = torch.stack([memory[idx, :, :] for idx in [item.memory_idx for item in hyps]], dim = 0) for l, layer in enumerate(decoders): - # TODO: keys and values are recomputed everytime + # TODO: keys and values are recomputed every time # N, L - 1, E combined_activations = torch.cat([item.cached_activations[l] for item in hyps], dim = 0) # N, L, E diff --git a/manga_translator/ocr/model_ocr_large.py b/manga_translator/ocr/model_ocr_large.py index 620e3238b..7ad958da5 100644 --- a/manga_translator/ocr/model_ocr_large.py +++ b/manga_translator/ocr/model_ocr_large.py @@ -307,7 +307,7 @@ def next_token_batch( # S, N, E memory = torch.stack([memory[:, idx, :] for idx in [item.memory_idx for item in hyps]], dim = 1) for l, layer in enumerate(decoders.layers): - # TODO: keys and values are recomputed everytime + # TODO: keys and values are recomputed every time # L - 1, N, E combined_activations = torch.cat([item.cached_activations[l] for item in hyps], dim = 1) # L, N, E diff --git a/manga_translator/rendering/gimp_render.py b/manga_translator/rendering/gimp_render.py index 57b91c362..9425b88e7 100644 --- a/manga_translator/rendering/gimp_render.py +++ b/manga_translator/rendering/gimp_render.py @@ -40,7 +40,7 @@ {angle} """ -save_tempaltes = { +save_templates = { "xcf": '( gimp-xcf-save RUN-NONINTERACTIVE image background_layer "{out_file}" "{out_file}" )', "psd": '( file-psd-save RUN-NONINTERACTIVE image background_layer "{out_file}" "{out_file}" 0 0 )', "pdf": '( file-pdf-save RUN-NONINTERACTIVE image background_layer "{out_file}" "{out_file}" TRUE TRUE TRUE )', @@ -125,7 +125,7 @@ def gimp_render(out_file, ctx: Context): text_init=text_init, text=text, extension=extension, - save=save_tempaltes[extension].format(out_file=out_file.replace("\\", "\\\\")), + save=save_templates[extension].format(out_file=out_file.replace("\\", "\\\\")), create_mask=( create_mask.format(mask_file=mask_file.replace("\\", "\\\\")) if ctx.gimp_mask is not None diff --git a/manga_translator/rendering/text_render.py b/manga_translator/rendering/text_render.py index 3d7242a72..e15b7e0c9 100644 --- a/manga_translator/rendering/text_render.py +++ b/manga_translator/rendering/text_render.py @@ -62,7 +62,7 @@ } def CJK_Compatibility_Forms_translate(cdpt: str, direction: int): - """direction: 0 - horizonal, 1 - vertical""" + """direction: 0 - horizontal, 1 - vertical""" if cdpt == 'ー' and direction == 1: return 'ー', 90 if cdpt in CJK_V2H: @@ -468,7 +468,7 @@ def get_present_syllables(line_idx, word_pos): # Step 1: - # Arrange words without hyphenating unless neccessary + # Arrange words without hyphenating unless necessary i = 0 while True: @@ -510,7 +510,7 @@ def get_present_syllables(line_idx, word_pos): # Step 2: # Compare two adjacent lines and try to hyphenate backwards - # Avoid hyphenation if max_lines isnt fully used + # Avoid hyphenation if max_lines isn't fully used if hyphenate and len(line_words_list) > max_lines: line_idx = 0 while line_idx < len(line_words_list) - 1: @@ -635,7 +635,7 @@ def get_present_syllables(line_idx, word_pos): # print(line_text, get_string_width(font_size, line_text), line_width_list[i]) # assert(line_width_list[i] == get_string_width(font_size, line_text)) - # Shouldnt be needed but there is apparently still a bug somewhere (See #458) + # Shouldn't be needed but there is apparently still a bug somewhere (See #458) line_width_list[i] = get_string_width(font_size, line_text) line_text_list.append(line_text) diff --git a/manga_translator/rendering/text_render_eng.py b/manga_translator/rendering/text_render_eng.py index 991ff7d9a..909500250 100644 --- a/manga_translator/rendering/text_render_eng.py +++ b/manga_translator/rendering/text_render_eng.py @@ -391,7 +391,7 @@ def update_enlarged_xyxy(region): # Adjust enlarge ratios relative to each other to reduce intersections for region in text_regions: - # If it wasnt changed below already + # If it wasn't changed below already if region.enlarge_ratio == 1: # The larger the aspect ratio the more it should try to enlarge the bubble region.enlarge_ratio = min(max(region.xywh[2] / region.xywh[3], region.xywh[3] / region.xywh[2]) * 1.5, 3) diff --git a/manga_translator/server/web_main.py b/manga_translator/server/web_main.py index fb22918af..15f4ce21b 100644 --- a/manga_translator/server/web_main.py +++ b/manga_translator/server/web_main.py @@ -561,7 +561,7 @@ async def dispatch(host: str, port: int, nonce: str = None, translation_params: while True: await asyncio.sleep(1) - # Restart client if OOM or similar errors occured + # Restart client if OOM or similar errors occurred if client_process.poll() is not None: # if client_process.poll() == 0: # break diff --git a/manga_translator/translators/chatgpt.py b/manga_translator/translators/chatgpt.py index 417b16511..d410da063 100644 --- a/manga_translator/translators/chatgpt.py +++ b/manga_translator/translators/chatgpt.py @@ -103,7 +103,7 @@ def _assemble_prompts(self, from_lang: str, to_lang: str, queries: List[str]): for i, query in enumerate(queries): prompt += f'\n<|{i+1-i_offset}|>{query}' - # If prompt is growing too large and theres still a lot of text left + # If prompt is growing too large and there's still a lot of text left # split off the rest of the queries into new prompts. # 1 token = ~4 characters according to https://platform.openai.com/tokenizer # TODO: potentially add summarizations from special requests as context information diff --git a/manga_translator/utils/bubble.py b/manga_translator/utils/bubble.py index b4b80e4b6..e5c694197 100644 --- a/manga_translator/utils/bubble.py +++ b/manga_translator/utils/bubble.py @@ -26,7 +26,7 @@ def check_color(image): def is_ignore(region_img, ignore_bubble = 0): """ Principle: Normally, white bubbles and their text boxes are mostly white, while black bubbles and their text boxes are mostly black. We calculate the ratio of white or black pixels around the text block to the total pixels, and judge whether the area is a normal bubble area or not. Based on the value of the --ignore-bubble parameter, if the ratio is greater than the base value and less than (100-base value), then it is considered a non-bubble area. - The normal range for ingore-bubble is 1-50, and other values are considered not input. The recommended value for ingore-bubble is 10. The smaller it is, the more likely it is to recognize normal bubbles as image text and skip them. The larger it is, the more likely it is to recognize image text as normal bubbles. + The normal range for ignore-bubble is 1-50, and other values are considered not input. The recommended value for ignore-bubble is 10. The smaller it is, the more likely it is to recognize normal bubbles as image text and skip them. The larger it is, the more likely it is to recognize image text as normal bubbles. Assuming ignore-bubble = 10 The text block is surrounded by white if it is <10, and the text block is very likely to be a normal white bubble. diff --git a/manga_translator/utils/generic.py b/manga_translator/utils/generic.py index cd7a8ae8a..9ccc88e10 100644 --- a/manga_translator/utils/generic.py +++ b/manga_translator/utils/generic.py @@ -81,7 +81,7 @@ def repeating_sequence(s: str): def is_whitespace(ch): """Checks whether `chars` is a whitespace character.""" - # \t, \n, and \r are technically contorl characters but we treat them + # \t, \n, and \r are technically control characters but we treat them # as whitespace since they are generally considered as such. if ch == " " or ch == "\t" or ch == "\n" or ch == "\r" or ord(ch) == 0: return True diff --git a/manga_translator/utils/inference.py b/manga_translator/utils/inference.py index 188f22cca..973fd8bbd 100644 --- a/manga_translator/utils/inference.py +++ b/manga_translator/utils/inference.py @@ -83,7 +83,7 @@ class ModelWrapper(ABC): hash - Hash of downloaded file, Can be obtained upon ModelVerificationException - file - File download destination, If set to '.' the filename will be infered + file - File download destination, If set to '.' the filename will be inferred from the url (fallback is `model_id` value) archive - Dict that contains all files/folders that are to be extracted from diff --git a/manga_translator/utils/textblock.py b/manga_translator/utils/textblock.py index 3f5b10184..17b4dfc20 100644 --- a/manga_translator/utils/textblock.py +++ b/manga_translator/utils/textblock.py @@ -14,7 +14,7 @@ # LANGCLS2IDX = {'eng': 0, 'ja': 1, 'unknown': 2} # determines render direction -LANGAUGE_ORIENTATION_PRESETS = { +LANGUAGE_ORIENTATION_PRESETS = { 'CHS': 'auto', 'CHT': 'auto', 'CSY': 'h', @@ -193,7 +193,7 @@ def real_area(self) -> float: lines = self.lines.reshape((-1, 2)) return MultiPoint([tuple(l) for l in lines]).convex_hull.area - def normalizd_width_list(self) -> List[float]: + def normalized_width_list(self) -> List[float]: polygons = self.unrotated_polygons width_list = [] for polygon in polygons: @@ -327,7 +327,7 @@ def get_font_colors(self, bgr=False): def direction(self): """Render direction determined through used language or aspect ratio.""" if self._direction not in ('h', 'v', 'hr', 'vr'): - d = LANGAUGE_ORIENTATION_PRESETS.get(self.target_lang) + d = LANGUAGE_ORIENTATION_PRESETS.get(self.target_lang) if d in ('h', 'v', 'hr', 'vr'): return d @@ -486,7 +486,7 @@ def sort_regions(regions: List[TextBlock], right_to_left=True) -> List[TextBlock # vertical = norm_v > norm_h # else: # vertical = norm_v > norm_h * 2 -# # calcuate distance between textlines and origin +# # calculate distance between textlines and origin # if vertical: # primary_vec, primary_norm = v, norm_v # distance_vectors = center_pnts - np.array([[im_w, 0]], dtype=np.float64) # vertical manga text is read from right to left, so origin is (imw, 0) diff --git a/training/ocr/custom_ctc.cc b/training/ocr/custom_ctc.cc index b5ebf1044..a595a6c35 100644 --- a/training/ocr/custom_ctc.cc +++ b/training/ocr/custom_ctc.cc @@ -5,7 +5,7 @@ // We mostly follow Graves. // 1. Graves et al: http://www.cs.toronto.edu/~graves/icml_2006.pdf // Note from zyddnys: -// Added regression capability to CTC loss, currectly we use L2 regression, future L1 regression maybe added +// Added regression capability to CTC loss, currently we use L2 regression, future L1 regression maybe added // Two BLANKS where BLANK is the BLANK in CTC, BLANK_1 means regression part of this target is ignored // We use the equations from above link, but note that [1] has 1-based indexing and we (of course) use 0-based. // Graves et al call the probabilities y, we use log_probs (also calling them inputs) diff --git a/training/ocr/custom_ctc_kernel.cu b/training/ocr/custom_ctc_kernel.cu index 4338b3b3b..efe766ca1 100644 --- a/training/ocr/custom_ctc_kernel.cu +++ b/training/ocr/custom_ctc_kernel.cu @@ -5,9 +5,9 @@ // We mostly follow Graves. // 1. Graves et al: http://www.cs.toronto.edu/~graves/icml_2006.pdf // Note from zyddnys: -// Added regression capability to CTC loss, currectly we use L2 regression, future L1 regression maybe added +// Added regression capability to CTC loss, currently we use L2 regression, future L1 regression maybe added // Two BLANKS where BLANK is the BLANK in CTC, BLANK_1 means regression part of this target is ignored -// Many kernels are splitted into multiple kernels to prevent CUDA too much resources requested error +// Many kernels are split into multiple kernels to prevent CUDA too much resources requested error // We use the equations from above link, but note that [1] has 1-based indexing and we (of course) use 0-based. // Graves et al call the probabilities y, we use log_probs (also calling them inputs) // A few optimizations (similar to those here, but also some I didn't take) are described in @@ -84,7 +84,7 @@ __device__ inline scalar_t custom_distance_forward(scalar_t x, scalar_t mu) { // so if l is l_0 l_1 ... l_(tl-1) then this looks up idx in // l' = BLANK l_0 BLANK l_1 BLANK ... BLANK l_(tl-1) BLANK // - note that no bound-checking is done -// - it is important to only call it witth idx == 0 if the target length is 0 +// - it is important to only call it with idx == 0 if the target length is 0 // - __restrict__ impact to be measured, see // https://devblogs.nvidia.com/cuda-pro-tip-optimize-pointer-aliasing/ template @@ -646,7 +646,7 @@ ctc_loss_backward_log_beta_gpu_kernel(scalar_t* __restrict__ log_beta_data, int64_t tg_batch_offset = tg_batch_offsets[b]; - // "first" row, the beta initiaization before eq (10) (t=target_length - differes per batch) + // "first" row, the beta initiaization before eq (10) (t=target_length - differs per batch) for (int64_t block_s = 2*max_target_length - (2*max_target_length % blockDim.x); block_s >= 0; block_s -= blockDim.x) { int64_t s = threadIdx.x + block_s; scalar_t lb; @@ -1224,7 +1224,7 @@ std::tuple custom_ctc_loss_backward_gpu_template( C10_CUDA_KERNEL_LAUNCH_CHECK(); // catch launch errors } - // zero those invalid graident elements due to padding + // zero those invalid gradient elements due to padding { int threads_input = max_threads; while (threads_input / 2 >= log_probs.size(1)) {