From 52e420a4e9bef149826ba9dae73c141d8dc89c63 Mon Sep 17 00:00:00 2001 From: PhuNH <1079742+PhuNH@users.noreply.github.com> Date: Sun, 3 Sep 2023 11:58:40 +0200 Subject: [PATCH] Apply a better structure for renderers --- mdit_py_i18n/renderer_i18n.py | 35 +++-- mdit_py_i18n/renderer_l10n.py | 243 ++++++++++++++++------------------ 2 files changed, 134 insertions(+), 144 deletions(-) diff --git a/mdit_py_i18n/renderer_i18n.py b/mdit_py_i18n/renderer_i18n.py index d2ff2bb..7f11b9d 100644 --- a/mdit_py_i18n/renderer_i18n.py +++ b/mdit_py_i18n/renderer_i18n.py @@ -14,7 +14,7 @@ from .utils import DomainExtractionProtocol -class _MdCtx: +class MdCtx: def __init__(self, env: EnvType): self.path: str = env['path'] self.domain_e: DomainExtractionProtocol = env['domain_extraction'] @@ -23,16 +23,6 @@ def add_entry(self, msgid: str, line_number: int, comment: str = '', msgctxt: st self.domain_e.add_entry(self.path, msgid, line_number, comment, msgctxt) -def _link_ref(env: EnvType, md_ctx: _MdCtx): - refs = env.get('references', {}).items() - if len(refs) == 0: - return - for ref, details in refs: - if title := details.get('title', ''): - # TODO: line number? - md_ctx.add_entry(title, 0) - - class RendererMarkdownI18N: """ Implements `RendererProtocol` @@ -56,30 +46,39 @@ def render(self, tokens: Sequence[Token], _options: OptionsDict, env: EnvType): - 'domain_extraction': an object compatible with `DomainExtractionProtocol` :return: None """ - md_ctx = _MdCtx(env) + md_ctx = MdCtx(env) for i, token in enumerate(tokens): if token.type in self.rules: r = self.rules[token.type](tokens, i, md_ctx) if r == -1: break + self._link_ref(env, md_ctx) + + @staticmethod + def _link_ref(env: EnvType, md_ctx: MdCtx): + refs = env.get('references', {}).items() + if len(refs) == 0: + return + for ref, details in refs: + if title := details.get('title', ''): + # TODO: line number? + md_ctx.add_entry(title, 0) - _link_ref(env, md_ctx) - @classmethod - def front_matter(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx): + def front_matter(cls, tokens: Sequence[Token], idx: int, md_ctx: MdCtx): token = tokens[idx] md_ctx.domain_e.render_front_matter(md_ctx.path, token.content, token.markup) @classmethod - def inline(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx): + def inline(cls, tokens: Sequence[Token], idx: int, md_ctx: MdCtx): token = tokens[idx] content = utils.SPACES_PATTERN.sub(' ', token.content.replace('\n', ' ')) if content and not utils.SPACES_PATTERN.fullmatch(content): md_ctx.add_entry(content, token.map[0] + 1) @classmethod - def fence(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx): + def fence(cls, tokens: Sequence[Token], idx: int, md_ctx: MdCtx): token = tokens[idx] try: lexer = lexers.get_lexer_by_name(token.info) @@ -121,6 +120,6 @@ def fence(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx): md_ctx.add_entry(comment, comment_line_num) @classmethod - def html_block(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx): + def html_block(cls, tokens: Sequence[Token], idx: int, md_ctx: MdCtx): token = tokens[idx] md_ctx.add_entry(token.content, token.map[0] + 1) diff --git a/mdit_py_i18n/renderer_l10n.py b/mdit_py_i18n/renderer_l10n.py index 94142b0..fa1fcf1 100644 --- a/mdit_py_i18n/renderer_l10n.py +++ b/mdit_py_i18n/renderer_l10n.py @@ -21,7 +21,7 @@ ORDERED_LIST_MARKUPS = {'.', ')'} -class _MdCtx: +class MdCtx: def __init__(self, env: EnvType): self.line_indent = '' self.indent_1st_line = '' # list item, definition detail, atx heading @@ -54,95 +54,6 @@ class _FenceCtx: next_indent: str = '' # a second field when 'indent' is busy -# TODO: parameterize: fence code comment i18n, wrap width, comment identifiers -# TODO: multiline comment? -def _fence_comment(fence_ctx: _FenceCtx, md_ctx: _MdCtx, content_result: L10NResult): - localized_comment = md_ctx.domain_g.l10n_func(fence_ctx.comment) - if localized_comment is not fence_ctx.comment: - content_result.l10n_count += 1 - content_result.total_count += 1 - subsequent_indent = ' ' * len(fence_ctx.indent) if fence_ctx.indent.strip() else fence_ctx.indent - comment_lines = textwrap.wrap(localized_comment, - 100, - initial_indent=f'{fence_ctx.indent}// ', - subsequent_indent=f'{subsequent_indent}// ') - for line in comment_lines: - fence_ctx.localized += f'{line}\n' - fence_ctx.comment = '' - - -def _fence(token: Token, md_ctx: _MdCtx, content_result: L10NResult): - try: - lexer = lexers.get_lexer_by_name(token.info) - except util.ClassNotFound: - lexer = lexers.guess_lexer(token.content) - code_toks = lexer.get_tokens(token.content) - - # values to use in _fence_comment function - fence_ctx = _FenceCtx() - # number of the last line with a comment token - last_comment_line_num = 0 - # the token starts with one line of the fence, then the content. +1: 0-base -> 1-base - line_num = token.map[0] + 1 + 1 - - # concatenate comment tokens until either a non-comment token or a blank line or end of token stream - for tok_type, tok_val in code_toks: - if tok_type == pygments.token.Token.Comment.Single: - # when another comment is already being parsed and there's a blank line - if fence_ctx.comment and line_num - last_comment_line_num > 1: - _fence_comment(fence_ctx, md_ctx, content_result) - last_nl = fence_ctx.next_indent.rfind('\n') - fence_ctx.localized += fence_ctx.next_indent[:last_nl + 1] - fence_ctx.indent = fence_ctx.next_indent[last_nl + 1:] - if fence_ctx.comment != '': - fence_ctx.comment += ' ' - if comment_match := utils.SINGLE_COMMENT_PATTERN.match(tok_val): - fence_ctx.comment += comment_match.group(2).strip() - last_comment_line_num = line_num - else: - if fence_ctx.comment: - if tok_val.strip(): - _fence_comment(fence_ctx, md_ctx, content_result) - fence_ctx.indent = fence_ctx.next_indent + tok_val - fence_ctx.next_indent = '' - else: - fence_ctx.next_indent = tok_val - else: - last_nl = tok_val.rfind('\n') - if last_nl != -1: - fence_ctx.localized += fence_ctx.indent + tok_val[:last_nl + 1] - fence_ctx.indent = tok_val[last_nl + 1:] - else: - fence_ctx.indent += tok_val - line_num += tok_val.count('\n') - if fence_ctx.comment: - _fence_comment(fence_ctx, md_ctx, content_result) - return fence_ctx.localized - - -def _front_matter(token: Token, env: EnvType) -> L10NResult: - domain_g: DomainGenerationProtocol = env['domain_generation'] - fm_result = domain_g.render_front_matter(token.content, token.markup) - return fm_result - - -def _link_ref(env: EnvType, md_ctx: _MdCtx, content_result: L10NResult): - refs = env.get('references', {}).items() - if len(refs) == 0: - return - content_result.localized += '\n' - for ref, details in refs: - href = details['href'] - content_result.localized += f'[{ref}]: {href}' - if title := details.get('title', ''): - localized_title = md_ctx.domain_g.l10n_func(title) - if localized_title is not title: - content_result.l10n_count += 1 - content_result.total_count += 1 - content_result.localized += f' "{localized_title}"' - content_result.localized += '\n' - - class RendererMarkdownL10N: """ Implements `RendererProtocol` @@ -164,27 +75,109 @@ def render(self, tokens: Sequence[Token], _options: OptionsDict, env: EnvType) - :param env: containing 'domain_generation' an object compatible with `DomainGenerationProtocol` :return: an `L10NResult` """ - if tokens[0].type == 'front_matter': - fm_result = _front_matter(tokens[0], env) + md_ctx = MdCtx(env) + + if (token := tokens[0]).type == 'front_matter': + fm_result = md_ctx.domain_g.render_front_matter(token.content, token.markup) tokens = tokens[1:] else: fm_result = L10NResult('', 0, 0) - md_ctx = _MdCtx(env) content_result = L10NResult('', 0, 0) - for i, token in enumerate(tokens): if token.type in self.rules: r = self.rules[token.type](tokens, i, md_ctx, content_result) if r == -1: break - - _link_ref(env, md_ctx, content_result) + self._link_ref(env, md_ctx, content_result) return fm_result, content_result + @staticmethod + def _link_ref(env: EnvType, md_ctx: MdCtx, content_result: L10NResult): + refs = env.get('references', {}).items() + if len(refs) == 0: + return + content_result.localized += '\n' + for ref, details in refs: + href = details['href'] + content_result.localized += f'[{ref}]: {href}' + if title := details.get('title', ''): + localized_title = md_ctx.domain_g.l10n_func(title) + if localized_title is not title: + content_result.l10n_count += 1 + content_result.total_count += 1 + content_result.localized += f' "{localized_title}"' + content_result.localized += '\n' + + # TODO: parameterize: fence code comment i18n, wrap width, comment identifiers + # TODO: multiline comment? + @staticmethod + def _fence_comment(fence_ctx: _FenceCtx, md_ctx: MdCtx, content_result: L10NResult): + localized_comment = md_ctx.domain_g.l10n_func(fence_ctx.comment) + if localized_comment is not fence_ctx.comment: + content_result.l10n_count += 1 + content_result.total_count += 1 + subsequent_indent = ' ' * len(fence_ctx.indent) if fence_ctx.indent.strip() else fence_ctx.indent + comment_lines = textwrap.wrap(localized_comment, + 100, + initial_indent=f'{fence_ctx.indent}// ', + subsequent_indent=f'{subsequent_indent}// ') + for line in comment_lines: + fence_ctx.localized += f'{line}\n' + fence_ctx.comment = '' + + @classmethod + def _fence(cls, token: Token, md_ctx: MdCtx, content_result: L10NResult): + try: + lexer = lexers.get_lexer_by_name(token.info) + except util.ClassNotFound: + lexer = lexers.guess_lexer(token.content) + code_toks = lexer.get_tokens(token.content) + + # values to use in _fence_comment function + fence_ctx = _FenceCtx() + # number of the last line with a comment token + last_comment_line_num = 0 + # the token starts with one line of the fence, then the content. +1: 0-base -> 1-base + line_num = token.map[0] + 1 + 1 + + # concatenate comment tokens until either a non-comment token or a blank line or end of token stream + for tok_type, tok_val in code_toks: + if tok_type == pygments.token.Token.Comment.Single: + # when another comment is already being parsed and there's a blank line + if fence_ctx.comment and line_num - last_comment_line_num > 1: + cls._fence_comment(fence_ctx, md_ctx, content_result) + last_nl = fence_ctx.next_indent.rfind('\n') + fence_ctx.localized += fence_ctx.next_indent[:last_nl + 1] + fence_ctx.indent = fence_ctx.next_indent[last_nl + 1:] + if fence_ctx.comment != '': + fence_ctx.comment += ' ' + if comment_match := utils.SINGLE_COMMENT_PATTERN.match(tok_val): + fence_ctx.comment += comment_match.group(2).strip() + last_comment_line_num = line_num + else: + if fence_ctx.comment: + if tok_val.strip(): + cls._fence_comment(fence_ctx, md_ctx, content_result) + fence_ctx.indent = fence_ctx.next_indent + tok_val + fence_ctx.next_indent = '' + else: + fence_ctx.next_indent = tok_val + else: + last_nl = tok_val.rfind('\n') + if last_nl != -1: + fence_ctx.localized += fence_ctx.indent + tok_val[:last_nl + 1] + fence_ctx.indent = tok_val[last_nl + 1:] + else: + fence_ctx.indent += tok_val + line_num += tok_val.count('\n') + if fence_ctx.comment: + cls._fence_comment(fence_ctx, md_ctx, content_result) + return fence_ctx.localized + @classmethod - def inline(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx, content_result: L10NResult): + def inline(cls, tokens: Sequence[Token], idx: int, md_ctx: MdCtx, content_result: L10NResult): token = tokens[idx] content = utils.SPACES_PATTERN.sub(' ', token.content.replace('\n', ' ')) if content and not utils.SPACES_PATTERN.fullmatch(content): @@ -202,18 +195,18 @@ def inline(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx, content_resul # blockquote # TODO: blockquote inside list @classmethod - def blockquote_open(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx, _content_result: L10NResult): + def blockquote_open(cls, tokens: Sequence[Token], idx: int, md_ctx: MdCtx, _content_result: L10NResult): token = tokens[idx] md_ctx.line_indent = f'{md_ctx.get_line_indent()}{token.markup} ' @classmethod - def blockquote_close(cls, _tokens: Sequence[Token], _idx: int, md_ctx: _MdCtx, content_result: L10NResult): + def blockquote_close(cls, _tokens: Sequence[Token], _idx: int, md_ctx: MdCtx, content_result: L10NResult): md_ctx.line_indent = md_ctx.line_indent[:-2] content_result.localized += f'{md_ctx.line_indent}\n' # heading @classmethod - def heading_open(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx, _content_result: L10NResult): + def heading_open(cls, tokens: Sequence[Token], idx: int, md_ctx: MdCtx, _content_result: L10NResult): token = tokens[idx] if token.markup not in SETEXT_HEADING_MARKUPS: md_ctx.indent_1st_line += f'{token.markup} ' @@ -224,7 +217,7 @@ def heading_open(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx, _conten md_ctx.setext_heading = token.markup @classmethod - def heading_close(cls, _tokens: Sequence[Token], _idx: int, md_ctx: _MdCtx, content_result: L10NResult): + def heading_close(cls, _tokens: Sequence[Token], _idx: int, md_ctx: MdCtx, content_result: L10NResult): if md_ctx.setext_heading: content_result.localized += f'\n{md_ctx.get_line_indent()}{md_ctx.setext_heading}' md_ctx.setext_heading = '' @@ -235,7 +228,7 @@ def heading_close(cls, _tokens: Sequence[Token], _idx: int, md_ctx: _MdCtx, cont # thematic break @classmethod - def hr(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx, content_result: L10NResult): + def hr(cls, tokens: Sequence[Token], idx: int, md_ctx: MdCtx, content_result: L10NResult): token = tokens[idx] # always use '_' here to differentiate this from setext headings and bullet list items content_result.localized += f'{md_ctx.get_line_indent()}{len(token.markup) * "_"}\n' @@ -243,7 +236,7 @@ def hr(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx, content_result: L # list # TODO: loose lists? @classmethod - def list_item_open(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx, _content_result: L10NResult): + def list_item_open(cls, tokens: Sequence[Token], idx: int, md_ctx: MdCtx, _content_result: L10NResult): token = tokens[idx] markup = f'{token.info}{token.markup}' if token.markup in ORDERED_LIST_MARKUPS else f'{token.markup}' md_ctx.indent_1st_line += f'{markup} ' @@ -256,7 +249,7 @@ def list_item_open(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx, _cont def list_item_close(cls, _tokens: Sequence[Token], _idx: int, - md_ctx: _MdCtx, + md_ctx: MdCtx, _content_result: L10NResult): latest_len = md_ctx.indents.pop() md_ctx.line_indent = md_ctx.line_indent[:-latest_len] @@ -265,7 +258,7 @@ def list_item_close(cls, def bullet_list_close(cls, tokens: Sequence[Token], idx: int, - md_ctx: _MdCtx, + md_ctx: MdCtx, content_result: L10NResult): # add a blank line when next token is not a closing one if idx < len(tokens) - 1 and tokens[idx + 1].nesting != -1: @@ -275,15 +268,13 @@ def bullet_list_close(cls, def ordered_list_close(cls, tokens: Sequence[Token], idx: int, - md_ctx: _MdCtx, + md_ctx: MdCtx, content_result: L10NResult): - # add a blank line when next token is not a closing one - if idx < len(tokens) - 1 and tokens[idx + 1].nesting != -1: - content_result.localized += f'{md_ctx.line_indent}\n' + cls.bullet_list_close(tokens, idx, md_ctx, content_result) # paragraph @classmethod - def paragraph_close(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx, content_result: L10NResult): + def paragraph_close(cls, tokens: Sequence[Token], idx: int, md_ctx: MdCtx, content_result: L10NResult): content_result.localized += '\n' if idx < len(tokens) - 1: next_token = tokens[idx + 1] @@ -295,16 +286,16 @@ def paragraph_close(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx, cont # indented code block @classmethod - def code_block(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx, content_result: L10NResult): + def code_block(cls, tokens: Sequence[Token], idx: int, md_ctx: MdCtx, content_result: L10NResult): token = tokens[idx] localized_code_block = token.content.replace('\n', f'\n{md_ctx.line_indent} ') content_result.localized += f'{md_ctx.get_line_indent()} {localized_code_block}\n' # fenced code block @classmethod - def fence(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx, content_result: L10NResult): + def fence(cls, tokens: Sequence[Token], idx: int, md_ctx: MdCtx, content_result: L10NResult): token = tokens[idx] - localized_fence = _fence(token, md_ctx, content_result) + localized_fence = cls._fence(token, md_ctx, content_result) localized_fence = localized_fence.replace('\n', f'\n{md_ctx.line_indent}') # a newline is at the end of token.content already, so we only need to append token.markup there content_result.localized += f'''{md_ctx.get_line_indent()}{token.markup}{token.info} @@ -313,7 +304,7 @@ def fence(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx, content_result # html block @classmethod - def html_block(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx, content_result: L10NResult): + def html_block(cls, tokens: Sequence[Token], idx: int, md_ctx: MdCtx, content_result: L10NResult): token = tokens[idx] localized_html = md_ctx.domain_g.l10n_func(token.content) if localized_html is not token.content: @@ -324,59 +315,59 @@ def html_block(cls, tokens: Sequence[Token], idx: int, md_ctx: _MdCtx, content_r # table @classmethod - def table_open(cls, _tokens: Sequence[Token], _idx: int, md_ctx: _MdCtx, _content_result: L10NResult): + def table_open(cls, _tokens: Sequence[Token], _idx: int, md_ctx: MdCtx, _content_result: L10NResult): md_ctx.in_table = True @classmethod - def tr_open(cls, _tokens: Sequence[Token], _idx: int, md_ctx: _MdCtx, content_result: L10NResult): + def tr_open(cls, _tokens: Sequence[Token], _idx: int, md_ctx: MdCtx, content_result: L10NResult): content_result.localized += f'{md_ctx.get_line_indent()}|' @classmethod - def th_open(cls, _tokens: Sequence[Token], _idx: int, md_ctx: _MdCtx, content_result: L10NResult): + def th_open(cls, _tokens: Sequence[Token], _idx: int, md_ctx: MdCtx, content_result: L10NResult): content_result.localized += ' ' md_ctx.table_sep += '| --- ' @classmethod - def th_close(cls, _tokens: Sequence[Token], _idx: int, _md_ctx: _MdCtx, content_result: L10NResult): + def th_close(cls, _tokens: Sequence[Token], _idx: int, _md_ctx: MdCtx, content_result: L10NResult): content_result.localized += ' |' @classmethod - def thead_close(cls, _tokens: Sequence[Token], _idx: int, md_ctx: _MdCtx, content_result: L10NResult): + def thead_close(cls, _tokens: Sequence[Token], _idx: int, md_ctx: MdCtx, content_result: L10NResult): md_ctx.table_sep += '|\n' content_result.localized += f'{md_ctx.line_indent}{md_ctx.table_sep}' md_ctx.table_sep = '' @classmethod - def td_open(cls, _tokens: Sequence[Token], _idx: int, _md_ctx: _MdCtx, content_result: L10NResult): + def td_open(cls, _tokens: Sequence[Token], _idx: int, _md_ctx: MdCtx, content_result: L10NResult): content_result.localized += ' ' @classmethod - def td_close(cls, _tokens: Sequence[Token], _idx: int, _md_ctx: _MdCtx, content_result: L10NResult): + def td_close(cls, _tokens: Sequence[Token], _idx: int, _md_ctx: MdCtx, content_result: L10NResult): content_result.localized += ' |' @classmethod - def tr_close(cls, _tokens: Sequence[Token], _idx: int, _md_ctx: _MdCtx, content_result: L10NResult): + def tr_close(cls, _tokens: Sequence[Token], _idx: int, _md_ctx: MdCtx, content_result: L10NResult): content_result.localized += '\n' @classmethod - def table_close(cls, _tokens: Sequence[Token], _idx: int, md_ctx: _MdCtx, content_result: L10NResult): + def table_close(cls, _tokens: Sequence[Token], _idx: int, md_ctx: MdCtx, content_result: L10NResult): content_result.localized += f'{md_ctx.line_indent}\n' md_ctx.in_table = False # definition list @classmethod - def dd_open(cls, _tokens: Sequence[Token], _idx: int, md_ctx: _MdCtx, _content_result: L10NResult): + def dd_open(cls, _tokens: Sequence[Token], _idx: int, md_ctx: MdCtx, _content_result: L10NResult): md_ctx.indent_1st_line += f': ' md_ctx.indent_1st_line_len += 2 md_ctx.line_indent += ' ' * 2 md_ctx.indents.append(2) @classmethod - def dd_close(cls, _tokens: Sequence[Token], _idx: int, md_ctx: _MdCtx, content_result: L10NResult): + def dd_close(cls, _tokens: Sequence[Token], _idx: int, md_ctx: MdCtx, content_result: L10NResult): latest_len = md_ctx.indents.pop() md_ctx.line_indent = md_ctx.line_indent[:-latest_len] content_result.localized += f'{md_ctx.line_indent}\n' @classmethod - def dt_close(cls, _tokens: Sequence[Token], _idx: int, _md_ctx: _MdCtx, content_result: L10NResult): + def dt_close(cls, _tokens: Sequence[Token], _idx: int, _md_ctx: MdCtx, content_result: L10NResult): content_result.localized += '\n'