From 6ef9b10a3a1a032b995d47b51eca99e0d9fc0581 Mon Sep 17 00:00:00 2001 From: OMOTO Tsukasa Date: Sun, 27 Aug 2023 14:49:07 +0900 Subject: [PATCH 1/8] Improve line breaking behavior for east asian characters This commit aims to produce more natural line breaks in the rendered output. --- extension/cjk_test.go | 4 ++-- renderer/html/html.go | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/extension/cjk_test.go b/extension/cjk_test.go index e97bb72..40a7a10 100644 --- a/extension/cjk_test.go +++ b/extension/cjk_test.go @@ -140,7 +140,7 @@ func TestEastAsianLineBreaks(t *testing.T) { No: no, Description: "Soft line breaks between a western character and an east asian wide character are rendered as a newline", Markdown: "太郎は\\ **「こんにちわ」**\\ と言ったa\nんです", - Expected: "

太郎は\\ 「こんにちわ」\\ と言ったa\nんです

", + Expected: "

太郎は\\ 「こんにちわ」\\ と言ったaんです

", }, t, ) @@ -152,7 +152,7 @@ func TestEastAsianLineBreaks(t *testing.T) { No: no, Description: "Soft line breaks between an east asian wide character and a western character are rendered as a newline", Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nbんです", - Expected: "

太郎は\\ 「こんにちわ」\\ と言った\nbんです

", + Expected: "

太郎は\\ 「こんにちわ」\\ と言ったbんです

", }, t, ) diff --git a/renderer/html/html.go b/renderer/html/html.go index 3503688..73f7835 100644 --- a/renderer/html/html.go +++ b/renderer/html/html.go @@ -669,8 +669,7 @@ func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, en if siblingText := sibling.(*ast.Text).Text(source); len(siblingText) != 0 { thisLastRune := util.ToRune(value, len(value)-1) siblingFirstRune, _ := utf8.DecodeRune(siblingText) - if !(util.IsEastAsianWideRune(thisLastRune) && - util.IsEastAsianWideRune(siblingFirstRune)) { + if !util.IsEastAsianWideRune(thisLastRune) && !util.IsEastAsianWideRune(siblingFirstRune) { _ = w.WriteByte('\n') } } From 6cbcfebb712e8d060c2fdf6a59e7c214475e945a Mon Sep 17 00:00:00 2001 From: OMOTO Tsukasa Date: Sun, 10 Sep 2023 01:52:36 +0900 Subject: [PATCH 2/8] Add a WorksEvenWithOneSide option to EastAsianLineBreak --- README.md | 1 + extension/cjk.go | 39 ++++++++++++++++++++++++++---- extension/cjk_test.go | 33 ++++++++++++++++++++++++++ renderer/html/html.go | 55 +++++++++++++++++++++++++++++++++++-------- 4 files changed, 113 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index eaaf27e..4c36a37 100644 --- a/README.md +++ b/README.md @@ -380,6 +380,7 @@ This extension provides additional options for CJK users. | Functional option | Type | Description | | ----------------- | ---- | ----------- | | `extension.WithEastAsianLineBreaks` | `-` | Soft line breaks are rendered as a newline. Some asian users will see it as an unnecessary space. With this option, soft line breaks between east asian wide characters will be ignored. | +| `extension.WithWorksEvenWithOneSide` | `-` | A functional option for `WithEastAsianLineBreaks` indicates that a softline break is ignored even if only one side of the break is east asian wide character. | | `extension.WithEscapedSpace` | `-` | Without spaces around an emphasis started with east asian punctuations, it is not interpreted as an emphasis(as defined in CommonMark spec). With this option, you can avoid this inconvenient behavior by putting 'not rendered' spaces around an emphasis like `太郎は\ **「こんにちわ」**\ といった`. | diff --git a/extension/cjk.go b/extension/cjk.go index 14bcde1..b1e5dcd 100644 --- a/extension/cjk.go +++ b/extension/cjk.go @@ -9,11 +9,28 @@ import ( // A CJKOption sets options for CJK support mostly for HTML based renderers. type CJKOption func(*cjk) +// A EastAsianLineBreaksOption sets options for east asian line breaks. +type EastAsianLineBreaksOption func(*eastAsianLineBreaks) + // WithEastAsianLineBreaks is a functional option that indicates whether softline breaks // between east asian wide characters should be ignored. -func WithEastAsianLineBreaks() CJKOption { +func WithEastAsianLineBreaks(opts ...EastAsianLineBreaksOption) CJKOption { return func(c *cjk) { - c.EastAsianLineBreaks = true + e := &eastAsianLineBreaks{ + Enabled: true, + } + for _, opt := range opts { + opt(e) + } + c.EastAsianLineBreaks = e + } +} + +// WithWorksEvenWithOneSide is a functional option that indicates that a softline break +// is ignored even if only one side of the break is east asian wide character. +func WithWorksEvenWithOneSide() EastAsianLineBreaksOption { + return func(e *eastAsianLineBreaks) { + e.WorksEvenWithOneSide = true } } @@ -25,10 +42,15 @@ func WithEscapedSpace() CJKOption { } type cjk struct { - EastAsianLineBreaks bool + EastAsianLineBreaks *eastAsianLineBreaks EscapedSpace bool } +type eastAsianLineBreaks struct { + Enabled bool + WorksEvenWithOneSide bool +} + // CJK is a goldmark extension that provides functionalities for CJK languages. var CJK = NewCJK(WithEastAsianLineBreaks(), WithEscapedSpace()) @@ -42,8 +64,15 @@ func NewCJK(opts ...CJKOption) goldmark.Extender { } func (e *cjk) Extend(m goldmark.Markdown) { - if e.EastAsianLineBreaks { - m.Renderer().AddOptions(html.WithEastAsianLineBreaks()) + if e.EastAsianLineBreaks != nil { + if e.EastAsianLineBreaks.Enabled { + opts := []html.EastAsianLineBreaksOption{} + if e.EastAsianLineBreaks.WorksEvenWithOneSide { + opts = append(opts, html.WithWorksEvenWithOneSide()) + } + m.Renderer().AddOptions(html.WithEastAsianLineBreaks(opts...)) + } + } if e.EscapedSpace { m.Renderer().AddOptions(html.WithWriter(html.NewWriter(html.WithEscapedSpace()))) diff --git a/extension/cjk_test.go b/extension/cjk_test.go index 40a7a10..0f7797e 100644 --- a/extension/cjk_test.go +++ b/extension/cjk_test.go @@ -208,4 +208,37 @@ func TestEastAsianLineBreaks(t *testing.T) { }, t, ) + + // WithWorksEvenWithOneSide option + markdown = goldmark.New(goldmark.WithRendererOptions( + html.WithXHTML(), + html.WithUnsafe(), + ), + goldmark.WithExtensions( + NewCJK(WithEastAsianLineBreaks(WithWorksEvenWithOneSide())), + ), + ) + no = 9 + testutil.DoTestCase( + markdown, + testutil.MarkdownTestCase{ + No: no, + Description: "Soft line breaks between a western character and an east asian wide character are ignored", + Markdown: "太郎は\\ **「こんにちわ」**\\ と言ったa\nんです", + Expected: "

太郎は\\ 「こんにちわ」\\ と言ったaんです

", + }, + t, + ) + + no = 10 + testutil.DoTestCase( + markdown, + testutil.MarkdownTestCase{ + No: no, + Description: "Soft line breaks between an east asian wide character and a western character are ignored", + Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nbんです", + Expected: "

太郎は\\ 「こんにちわ」\\ と言ったbんです

", + }, + t, + ) } diff --git a/renderer/html/html.go b/renderer/html/html.go index 73f7835..d74c51f 100644 --- a/renderer/html/html.go +++ b/renderer/html/html.go @@ -16,7 +16,7 @@ import ( type Config struct { Writer Writer HardWraps bool - EastAsianLineBreaks bool + EastAsianLineBreaks eastAsianLineBreaks XHTML bool Unsafe bool } @@ -26,7 +26,7 @@ func NewConfig() Config { return Config{ Writer: DefaultWriter, HardWraps: false, - EastAsianLineBreaks: false, + EastAsianLineBreaks: eastAsianLineBreaks{}, XHTML: false, Unsafe: false, } @@ -38,7 +38,7 @@ func (c *Config) SetOption(name renderer.OptionName, value interface{}) { case optHardWraps: c.HardWraps = value.(bool) case optEastAsianLineBreaks: - c.EastAsianLineBreaks = value.(bool) + c.EastAsianLineBreaks = value.(eastAsianLineBreaks) case optXHTML: c.XHTML = value.(bool) case optUnsafe: @@ -103,24 +103,51 @@ func WithHardWraps() interface { // EastAsianLineBreaks is an option name used in WithEastAsianLineBreaks. const optEastAsianLineBreaks renderer.OptionName = "EastAsianLineBreaks" +type eastAsianLineBreaks struct { + Enabled bool + WorksEvenWithOneSide bool +} + type withEastAsianLineBreaks struct { + worksEvenWithOneSide bool } +type EastAsianLineBreaksOption func(*withEastAsianLineBreaks) + func (o *withEastAsianLineBreaks) SetConfig(c *renderer.Config) { - c.Options[optEastAsianLineBreaks] = true + c.Options[optEastAsianLineBreaks] = eastAsianLineBreaks{ + Enabled: true, + WorksEvenWithOneSide: o.worksEvenWithOneSide, + } } func (o *withEastAsianLineBreaks) SetHTMLOption(c *Config) { - c.EastAsianLineBreaks = true + c.EastAsianLineBreaks = eastAsianLineBreaks{ + Enabled: true, + WorksEvenWithOneSide: o.worksEvenWithOneSide, + } } // WithEastAsianLineBreaks is a functional option that indicates whether softline breaks // between east asian wide characters should be ignored. -func WithEastAsianLineBreaks() interface { +func WithEastAsianLineBreaks(opts ...EastAsianLineBreaksOption) interface { renderer.Option Option } { - return &withEastAsianLineBreaks{} + w := &withEastAsianLineBreaks{} + for _, opt := range opts { + opt(w) + } + + return w +} + +// WithWorksEvenWithOneSide is a functional option that indicates that a softline break +// is ignored even if only one side of the break is east asian wide character. +func WithWorksEvenWithOneSide() EastAsianLineBreaksOption { + return func(o *withEastAsianLineBreaks) { + o.worksEvenWithOneSide = true + } } // XHTML is an option name used in WithXHTML. @@ -663,14 +690,22 @@ func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, en _, _ = w.WriteString("
\n") } } else if n.SoftLineBreak() { - if r.EastAsianLineBreaks && len(value) != 0 { + if r.EastAsianLineBreaks.Enabled && len(value) != 0 { sibling := node.NextSibling() if sibling != nil && sibling.Kind() == ast.KindText { if siblingText := sibling.(*ast.Text).Text(source); len(siblingText) != 0 { thisLastRune := util.ToRune(value, len(value)-1) siblingFirstRune, _ := utf8.DecodeRune(siblingText) - if !util.IsEastAsianWideRune(thisLastRune) && !util.IsEastAsianWideRune(siblingFirstRune) { - _ = w.WriteByte('\n') + if r.EastAsianLineBreaks.WorksEvenWithOneSide { + if !(util.IsEastAsianWideRune(thisLastRune) || + util.IsEastAsianWideRune(siblingFirstRune)) { + _ = w.WriteByte('\n') + } + } else { + if !(util.IsEastAsianWideRune(thisLastRune) && + util.IsEastAsianWideRune(siblingFirstRune)) { + _ = w.WriteByte('\n') + } } } } From 2367b9ff46e5223d52ef0399eaed2c331fbb66e2 Mon Sep 17 00:00:00 2001 From: OMOTO Tsukasa Date: Sun, 10 Sep 2023 15:17:16 +0900 Subject: [PATCH 3/8] add comments --- renderer/html/html.go | 1 + 1 file changed, 1 insertion(+) diff --git a/renderer/html/html.go b/renderer/html/html.go index d74c51f..0ca7c52 100644 --- a/renderer/html/html.go +++ b/renderer/html/html.go @@ -112,6 +112,7 @@ type withEastAsianLineBreaks struct { worksEvenWithOneSide bool } +// A EastAsianLineBreaksOption sets options for east asian line breaks. type EastAsianLineBreaksOption func(*withEastAsianLineBreaks) func (o *withEastAsianLineBreaks) SetConfig(c *renderer.Config) { From dc2230c2354a4ebbad910486064c202ff376b4ad Mon Sep 17 00:00:00 2001 From: OMOTO Tsukasa Date: Sun, 10 Sep 2023 18:48:02 +0900 Subject: [PATCH 4/8] fix tests --- extension/cjk_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extension/cjk_test.go b/extension/cjk_test.go index 0f7797e..e4336f3 100644 --- a/extension/cjk_test.go +++ b/extension/cjk_test.go @@ -140,7 +140,7 @@ func TestEastAsianLineBreaks(t *testing.T) { No: no, Description: "Soft line breaks between a western character and an east asian wide character are rendered as a newline", Markdown: "太郎は\\ **「こんにちわ」**\\ と言ったa\nんです", - Expected: "

太郎は\\ 「こんにちわ」\\ と言ったaんです

", + Expected: "

太郎は\\ 「こんにちわ」\\ と言ったa\nんです

", }, t, ) @@ -152,7 +152,7 @@ func TestEastAsianLineBreaks(t *testing.T) { No: no, Description: "Soft line breaks between an east asian wide character and a western character are rendered as a newline", Markdown: "太郎は\\ **「こんにちわ」**\\ と言った\nbんです", - Expected: "

太郎は\\ 「こんにちわ」\\ と言ったbんです

", + Expected: "

太郎は\\ 「こんにちわ」\\ と言った\nbんです

", }, t, ) From 9d0b1b6bb83ef4abe51755dcc3f46a6930084288 Mon Sep 17 00:00:00 2001 From: OMOTO Tsukasa Date: Sat, 23 Sep 2023 20:49:54 +0900 Subject: [PATCH 5/8] Define `EastAsianLineBreaksStyle` to specify behavior of line breaking --- extension/cjk.go | 42 ++++++++++---------- extension/cjk_test.go | 4 +- renderer/html/html.go | 89 +++++++++++++++++++++++++------------------ 3 files changed, 75 insertions(+), 60 deletions(-) diff --git a/extension/cjk.go b/extension/cjk.go index b1e5dcd..9cf5381 100644 --- a/extension/cjk.go +++ b/extension/cjk.go @@ -9,31 +9,31 @@ import ( // A CJKOption sets options for CJK support mostly for HTML based renderers. type CJKOption func(*cjk) -// A EastAsianLineBreaksOption sets options for east asian line breaks. -type EastAsianLineBreaksOption func(*eastAsianLineBreaks) +// A EastAsianLineBreaksStyle is a style of east asian line breaks. +type EastAsianLineBreaksStyle int + +const ( + EastAsianLineBreaksStyleSimple EastAsianLineBreaksStyle = iota + EastAsianLineBreaksCSS3Draft +) + +type EastAsianLineBreaksFunction func() // WithEastAsianLineBreaks is a functional option that indicates whether softline breaks // between east asian wide characters should be ignored. -func WithEastAsianLineBreaks(opts ...EastAsianLineBreaksOption) CJKOption { +func WithEastAsianLineBreaks(style ...EastAsianLineBreaksStyle) CJKOption { return func(c *cjk) { e := &eastAsianLineBreaks{ - Enabled: true, + Enabled: true, + EastAsianLineBreaksStyle: EastAsianLineBreaksStyleSimple, } - for _, opt := range opts { - opt(e) + for _, s := range style { + e.EastAsianLineBreaksStyle = s } c.EastAsianLineBreaks = e } } -// WithWorksEvenWithOneSide is a functional option that indicates that a softline break -// is ignored even if only one side of the break is east asian wide character. -func WithWorksEvenWithOneSide() EastAsianLineBreaksOption { - return func(e *eastAsianLineBreaks) { - e.WorksEvenWithOneSide = true - } -} - // WithEscapedSpace is a functional option that indicates that a '\' escaped half-space(0x20) should not be rendered. func WithEscapedSpace() CJKOption { return func(c *cjk) { @@ -47,8 +47,8 @@ type cjk struct { } type eastAsianLineBreaks struct { - Enabled bool - WorksEvenWithOneSide bool + Enabled bool + EastAsianLineBreaksStyle EastAsianLineBreaksStyle } // CJK is a goldmark extension that provides functionalities for CJK languages. @@ -66,13 +66,13 @@ func NewCJK(opts ...CJKOption) goldmark.Extender { func (e *cjk) Extend(m goldmark.Markdown) { if e.EastAsianLineBreaks != nil { if e.EastAsianLineBreaks.Enabled { - opts := []html.EastAsianLineBreaksOption{} - if e.EastAsianLineBreaks.WorksEvenWithOneSide { - opts = append(opts, html.WithWorksEvenWithOneSide()) + style := html.EastAsianLineBreaksStyleSimple + switch e.EastAsianLineBreaks.EastAsianLineBreaksStyle { + case EastAsianLineBreaksCSS3Draft: + style = html.EastAsianLineBreaksCSS3Draft } - m.Renderer().AddOptions(html.WithEastAsianLineBreaks(opts...)) + m.Renderer().AddOptions(html.WithEastAsianLineBreaks(style)) } - } if e.EscapedSpace { m.Renderer().AddOptions(html.WithWriter(html.NewWriter(html.WithEscapedSpace()))) diff --git a/extension/cjk_test.go b/extension/cjk_test.go index e4336f3..7091284 100644 --- a/extension/cjk_test.go +++ b/extension/cjk_test.go @@ -209,13 +209,13 @@ func TestEastAsianLineBreaks(t *testing.T) { t, ) - // WithWorksEvenWithOneSide option + // test with EastAsianLineBreaksCSS3Draft markdown = goldmark.New(goldmark.WithRendererOptions( html.WithXHTML(), html.WithUnsafe(), ), goldmark.WithExtensions( - NewCJK(WithEastAsianLineBreaks(WithWorksEvenWithOneSide())), + NewCJK(WithEastAsianLineBreaks(EastAsianLineBreaksCSS3Draft)), ), ) no = 9 diff --git a/renderer/html/html.go b/renderer/html/html.go index 0ca7c52..e2629cc 100644 --- a/renderer/html/html.go +++ b/renderer/html/html.go @@ -103,52 +103,75 @@ func WithHardWraps() interface { // EastAsianLineBreaks is an option name used in WithEastAsianLineBreaks. const optEastAsianLineBreaks renderer.OptionName = "EastAsianLineBreaks" +type EastAsianLineBreaksStyle int + +const ( + EastAsianLineBreaksStyleSimple EastAsianLineBreaksStyle = iota + EastAsianLineBreaksCSS3Draft +) + +type eastAsianLineBreaksFunction interface { + SoftLineBreak(thisLastRune rune, siblingFirstRune rune) bool +} + +type eastAsianLineBreaksSimple struct{} + +func (e *eastAsianLineBreaksSimple) SoftLineBreak(thisLastRune rune, siblingFirstRune rune) bool { + return !(util.IsEastAsianWideRune(thisLastRune) && util.IsEastAsianWideRune(siblingFirstRune)) +} + +type eastAsianLineBreaksCSS3Draft struct{} + +func (e *eastAsianLineBreaksCSS3Draft) SoftLineBreak(thisLastRune rune, siblingFirstRune rune) bool { + return !(util.IsEastAsianWideRune(thisLastRune) || util.IsEastAsianWideRune(siblingFirstRune)) +} + type eastAsianLineBreaks struct { - Enabled bool - WorksEvenWithOneSide bool + Enabled bool + EastAsianLineBreaksFunction eastAsianLineBreaksFunction } type withEastAsianLineBreaks struct { - worksEvenWithOneSide bool + eastAsianLineBreaksStyle EastAsianLineBreaksStyle } -// A EastAsianLineBreaksOption sets options for east asian line breaks. -type EastAsianLineBreaksOption func(*withEastAsianLineBreaks) - func (o *withEastAsianLineBreaks) SetConfig(c *renderer.Config) { - c.Options[optEastAsianLineBreaks] = eastAsianLineBreaks{ - Enabled: true, - WorksEvenWithOneSide: o.worksEvenWithOneSide, + switch o.eastAsianLineBreaksStyle { + case EastAsianLineBreaksStyleSimple: + c.Options[optEastAsianLineBreaks] = eastAsianLineBreaks{ + Enabled: true, + EastAsianLineBreaksFunction: &eastAsianLineBreaksSimple{}, + } + case EastAsianLineBreaksCSS3Draft: + c.Options[optEastAsianLineBreaks] = eastAsianLineBreaks{ + Enabled: true, + EastAsianLineBreaksFunction: &eastAsianLineBreaksCSS3Draft{}, + } } } func (o *withEastAsianLineBreaks) SetHTMLOption(c *Config) { - c.EastAsianLineBreaks = eastAsianLineBreaks{ - Enabled: true, - WorksEvenWithOneSide: o.worksEvenWithOneSide, + switch o.eastAsianLineBreaksStyle { + case EastAsianLineBreaksStyleSimple: + c.EastAsianLineBreaks = eastAsianLineBreaks{ + Enabled: true, + EastAsianLineBreaksFunction: &eastAsianLineBreaksSimple{}, + } + case EastAsianLineBreaksCSS3Draft: + c.EastAsianLineBreaks = eastAsianLineBreaks{ + Enabled: true, + EastAsianLineBreaksFunction: &eastAsianLineBreaksCSS3Draft{}, + } } } // WithEastAsianLineBreaks is a functional option that indicates whether softline breaks // between east asian wide characters should be ignored. -func WithEastAsianLineBreaks(opts ...EastAsianLineBreaksOption) interface { +func WithEastAsianLineBreaks(style EastAsianLineBreaksStyle) interface { renderer.Option Option } { - w := &withEastAsianLineBreaks{} - for _, opt := range opts { - opt(w) - } - - return w -} - -// WithWorksEvenWithOneSide is a functional option that indicates that a softline break -// is ignored even if only one side of the break is east asian wide character. -func WithWorksEvenWithOneSide() EastAsianLineBreaksOption { - return func(o *withEastAsianLineBreaks) { - o.worksEvenWithOneSide = true - } + return &withEastAsianLineBreaks{style} } // XHTML is an option name used in WithXHTML. @@ -697,16 +720,8 @@ func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, en if siblingText := sibling.(*ast.Text).Text(source); len(siblingText) != 0 { thisLastRune := util.ToRune(value, len(value)-1) siblingFirstRune, _ := utf8.DecodeRune(siblingText) - if r.EastAsianLineBreaks.WorksEvenWithOneSide { - if !(util.IsEastAsianWideRune(thisLastRune) || - util.IsEastAsianWideRune(siblingFirstRune)) { - _ = w.WriteByte('\n') - } - } else { - if !(util.IsEastAsianWideRune(thisLastRune) && - util.IsEastAsianWideRune(siblingFirstRune)) { - _ = w.WriteByte('\n') - } + if r.EastAsianLineBreaks.EastAsianLineBreaksFunction.SoftLineBreak(thisLastRune, siblingFirstRune) { + _ = w.WriteByte('\n') } } } From 792af6819ea0fd79b877e98307f7e86eb0af1187 Mon Sep 17 00:00:00 2001 From: OMOTO Tsukasa Date: Sun, 24 Sep 2023 14:25:34 +0900 Subject: [PATCH 6/8] Updat README.md --- README.md | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4c36a37..3b00089 100644 --- a/README.md +++ b/README.md @@ -379,11 +379,48 @@ This extension provides additional options for CJK users. | Functional option | Type | Description | | ----------------- | ---- | ----------- | -| `extension.WithEastAsianLineBreaks` | `-` | Soft line breaks are rendered as a newline. Some asian users will see it as an unnecessary space. With this option, soft line breaks between east asian wide characters will be ignored. | -| `extension.WithWorksEvenWithOneSide` | `-` | A functional option for `WithEastAsianLineBreaks` indicates that a softline break is ignored even if only one side of the break is east asian wide character. | +| `extension.WithEastAsianLineBreaks` | `...extension.EastAsianLineBreaksStyle` | Soft line breaks are rendered as a newline. Some asian users will see it as an unnecessary space. With this option, soft line breaks between east asian wide characters will be ignored. | | `extension.WithEscapedSpace` | `-` | Without spaces around an emphasis started with east asian punctuations, it is not interpreted as an emphasis(as defined in CommonMark spec). With this option, you can avoid this inconvenient behavior by putting 'not rendered' spaces around an emphasis like `太郎は\ **「こんにちわ」**\ といった`. | - +#### Styles of Line Breaking + +| Style | Description | +| ----- | ----------- | +| `EastAsianLineBreaksStyleSimple` | Soft line breaks are ignored if both sides of the break are east asian wide character. This behavior is the same as [`east_asian_line_breaks`](https://pandoc.org/MANUAL.html#extension-east_asian_line_breaks) in Pandoc. | +| `EastAsianLineBreaksCSS3Draft` | Soft line breaks are ignored even if only one side of the break is east asian wide character. | + +#### Example of `EastAsianLineBreaksStyleSimple` + +Input Markdown: + +```md +私はプログラマーです。 +東京の会社に勤めています。 +GoでWebアプリケーションを開発しています。 +``` + +Output: + +```md +

私はプログラマーです。東京の会社に勤めています。\nGoでWebアプリケーションを開発しています。

+``` + +#### Example of `EastAsianLineBreaksCSS3Draft` + +Input Markdown: + +```md +私はプログラマーです。 +東京の会社に勤めています。 +GoでWebアプリケーションを開発しています。 +``` + +Output: + +```md +

私はプログラマーです。東京の会社に勤めています。GoでWebアプリケーションを開発しています。

+``` + Security -------------------- By default, goldmark does not render raw HTML or potentially-dangerous URLs. From 8c6830d73b581db2c59181391bba051c1350d8ba Mon Sep 17 00:00:00 2001 From: OMOTO Tsukasa Date: Sun, 24 Sep 2023 15:07:17 +0900 Subject: [PATCH 7/8] fix errors of lints --- extension/cjk.go | 6 ++++-- renderer/html/html.go | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/extension/cjk.go b/extension/cjk.go index 9cf5381..aabafad 100644 --- a/extension/cjk.go +++ b/extension/cjk.go @@ -13,12 +13,14 @@ type CJKOption func(*cjk) type EastAsianLineBreaksStyle int const ( + // EastAsianLineBreaksStyleSimple is a style where soft line breaks are ignored + // if both sides of the break are east asian wide characters. EastAsianLineBreaksStyleSimple EastAsianLineBreaksStyle = iota + // EastAsianLineBreaksCSS3Draft is a style where soft line breaks are ignored + // even if only one side of the break is an east asian wide character. EastAsianLineBreaksCSS3Draft ) -type EastAsianLineBreaksFunction func() - // WithEastAsianLineBreaks is a functional option that indicates whether softline breaks // between east asian wide characters should be ignored. func WithEastAsianLineBreaks(style ...EastAsianLineBreaksStyle) CJKOption { diff --git a/renderer/html/html.go b/renderer/html/html.go index e2629cc..2da6362 100644 --- a/renderer/html/html.go +++ b/renderer/html/html.go @@ -103,10 +103,15 @@ func WithHardWraps() interface { // EastAsianLineBreaks is an option name used in WithEastAsianLineBreaks. const optEastAsianLineBreaks renderer.OptionName = "EastAsianLineBreaks" +// A EastAsianLineBreaksStyle is a style of east asian line breaks. type EastAsianLineBreaksStyle int const ( + // EastAsianLineBreaksStyleSimple is a style where soft line breaks are ignored + // if both sides of the break are east asian wide characters. EastAsianLineBreaksStyleSimple EastAsianLineBreaksStyle = iota + // EastAsianLineBreaksCSS3Draft is a style where soft line breaks are ignored + // even if only one side of the break is an east asian wide character. EastAsianLineBreaksCSS3Draft ) From 6b3067e7e71473178654aa960a2dfbf049e99fd4 Mon Sep 17 00:00:00 2001 From: OMOTO Tsukasa Date: Sun, 22 Oct 2023 14:33:49 +0900 Subject: [PATCH 8/8] Implements CSS3Draft --- README.md | 6 +- extension/cjk_test.go | 31 ++- renderer/html/html.go | 54 ++++- util/util.go | 16 -- util/util_cjk.go | 469 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 547 insertions(+), 29 deletions(-) create mode 100644 util/util_cjk.go diff --git a/README.md b/README.md index 3b00089..8d9d83f 100644 --- a/README.md +++ b/README.md @@ -387,7 +387,7 @@ This extension provides additional options for CJK users. | Style | Description | | ----- | ----------- | | `EastAsianLineBreaksStyleSimple` | Soft line breaks are ignored if both sides of the break are east asian wide character. This behavior is the same as [`east_asian_line_breaks`](https://pandoc.org/MANUAL.html#extension-east_asian_line_breaks) in Pandoc. | -| `EastAsianLineBreaksCSS3Draft` | Soft line breaks are ignored even if only one side of the break is east asian wide character. | +| `EastAsianLineBreaksCSS3Draft` | This option implements CSS text level3 [Segment Break Transformation Rules](https://drafts.csswg.org/css-text-3/#line-break-transform) with [some enhancements](https://github.com/w3c/csswg-drafts/issues/5086). | #### Example of `EastAsianLineBreaksStyleSimple` @@ -401,7 +401,7 @@ GoでWebアプリケーションを開発しています。 Output: -```md +```html

私はプログラマーです。東京の会社に勤めています。\nGoでWebアプリケーションを開発しています。

``` @@ -417,7 +417,7 @@ GoでWebアプリケーションを開発しています。 Output: -```md +```html

私はプログラマーです。東京の会社に勤めています。GoでWebアプリケーションを開発しています。

``` diff --git a/extension/cjk_test.go b/extension/cjk_test.go index 7091284..0eaa26c 100644 --- a/extension/cjk_test.go +++ b/extension/cjk_test.go @@ -177,6 +177,7 @@ func TestEastAsianLineBreaks(t *testing.T) { t, ) + // Tests with EastAsianLineBreaksStyleSimple markdown = goldmark.New(goldmark.WithRendererOptions( html.WithXHTML(), html.WithUnsafe(), @@ -208,8 +209,19 @@ func TestEastAsianLineBreaks(t *testing.T) { }, t, ) + no = 9 + testutil.DoTestCase( + markdown, + testutil.MarkdownTestCase{ + No: no, + Description: "Soft line breaks between an east asian wide character and a western character are ignored", + Markdown: "私はプログラマーです。\n東京の会社に勤めています。\nGoでWebアプリケーションを開発しています。", + Expected: "

私はプログラマーです。東京の会社に勤めています。\nGoでWebアプリケーションを開発しています。

", + }, + t, + ) - // test with EastAsianLineBreaksCSS3Draft + // Tests with EastAsianLineBreaksCSS3Draft markdown = goldmark.New(goldmark.WithRendererOptions( html.WithXHTML(), html.WithUnsafe(), @@ -218,7 +230,7 @@ func TestEastAsianLineBreaks(t *testing.T) { NewCJK(WithEastAsianLineBreaks(EastAsianLineBreaksCSS3Draft)), ), ) - no = 9 + no = 10 testutil.DoTestCase( markdown, testutil.MarkdownTestCase{ @@ -230,7 +242,7 @@ func TestEastAsianLineBreaks(t *testing.T) { t, ) - no = 10 + no = 11 testutil.DoTestCase( markdown, testutil.MarkdownTestCase{ @@ -241,4 +253,17 @@ func TestEastAsianLineBreaks(t *testing.T) { }, t, ) + + no = 12 + testutil.DoTestCase( + markdown, + testutil.MarkdownTestCase{ + No: no, + Description: "Soft line breaks between an east asian wide character and a western character are ignored", + Markdown: "私はプログラマーです。\n東京の会社に勤めています。\nGoでWebアプリケーションを開発しています。", + Expected: "

私はプログラマーです。東京の会社に勤めています。GoでWebアプリケーションを開発しています。

", + }, + t, + ) + } diff --git a/renderer/html/html.go b/renderer/html/html.go index 2da6362..b3ce071 100644 --- a/renderer/html/html.go +++ b/renderer/html/html.go @@ -5,6 +5,7 @@ import ( "bytes" "fmt" "strconv" + "unicode" "unicode/utf8" "github.com/yuin/goldmark/ast" @@ -107,15 +108,13 @@ const optEastAsianLineBreaks renderer.OptionName = "EastAsianLineBreaks" type EastAsianLineBreaksStyle int const ( - // EastAsianLineBreaksStyleSimple is a style where soft line breaks are ignored - // if both sides of the break are east asian wide characters. + // EastAsianLineBreaksStyleSimple follows east_asian_line_breaks in Pandoc. EastAsianLineBreaksStyleSimple EastAsianLineBreaksStyle = iota - // EastAsianLineBreaksCSS3Draft is a style where soft line breaks are ignored - // even if only one side of the break is an east asian wide character. + // EastAsianLineBreaksCSS3Draft follows CSS text level3 "Segment Break Transformation Rules" with some enhancements. EastAsianLineBreaksCSS3Draft ) -type eastAsianLineBreaksFunction interface { +type eastAsianLineBreaker interface { SoftLineBreak(thisLastRune rune, siblingFirstRune rune) bool } @@ -128,12 +127,53 @@ func (e *eastAsianLineBreaksSimple) SoftLineBreak(thisLastRune rune, siblingFirs type eastAsianLineBreaksCSS3Draft struct{} func (e *eastAsianLineBreaksCSS3Draft) SoftLineBreak(thisLastRune rune, siblingFirstRune rune) bool { - return !(util.IsEastAsianWideRune(thisLastRune) || util.IsEastAsianWideRune(siblingFirstRune)) + // Implements CSS text level3 Segment Break Transformation Rules with some enhancements. + // References: + // - https://www.w3.org/TR/2020/WD-css-text-3-20200429/#line-break-transform + // - https://github.com/w3c/csswg-drafts/issues/5086 + + // Rule1: + // If the character immediately before or immediately after the segment break is + // the zero-width space character (U+200B), then the break is removed, leaving behind the zero-width space. + if thisLastRune == '\u200B' || siblingFirstRune == '\u200B' { + return false + } + + // Rule2: + // Otherwise, if the East Asian Width property of both the character before and after the segment break is + // F, W, or H (not A), and neither side is Hangul, then the segment break is removed. + thisLastRuneEastAsianWidth := util.EastAsianWidth(thisLastRune) + siblingFirstRuneEastAsianWidth := util.EastAsianWidth(siblingFirstRune) + if (thisLastRuneEastAsianWidth == "F" || + thisLastRuneEastAsianWidth == "W" || + thisLastRuneEastAsianWidth == "H") && + (siblingFirstRuneEastAsianWidth == "F" || + siblingFirstRuneEastAsianWidth == "W" || + siblingFirstRuneEastAsianWidth == "H") { + return unicode.Is(unicode.Hangul, thisLastRune) || unicode.Is(unicode.Hangul, siblingFirstRune) + } + + // Rule3: + // Otherwise, if either the character before or after the segment break belongs to + // the space-discarding character set and it is a Unicode Punctuation (P*) or U+3000, + // then the segment break is removed. + if util.IsSpaceDiscardingUnicodeRune(thisLastRune) || + unicode.IsPunct(thisLastRune) || + thisLastRune == '\u3000' || + util.IsSpaceDiscardingUnicodeRune(siblingFirstRune) || + unicode.IsPunct(siblingFirstRune) || + siblingFirstRune == '\u3000' { + return false + } + + // Rule4: + // Otherwise, the segment break is converted to a space (U+0020). + return true } type eastAsianLineBreaks struct { Enabled bool - EastAsianLineBreaksFunction eastAsianLineBreaksFunction + EastAsianLineBreaksFunction eastAsianLineBreaker } type withEastAsianLineBreaks struct { diff --git a/util/util.go b/util/util.go index 1bfc585..9bf09ad 100644 --- a/util/util.go +++ b/util/util.go @@ -836,22 +836,6 @@ func IsAlphaNumeric(c byte) bool { return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' } -// IsEastAsianWideRune returns trhe if the given rune is an east asian wide character, otherwise false. -func IsEastAsianWideRune(r rune) bool { - // https://en.wikipedia.org/wiki/CJK_Symbols_and_Punctuation - var CJKSymbolsAndPunctuation = &unicode.RangeTable{ - R16: []unicode.Range16{ - {0x3000, 0x303F, 1}, - }, - } - return unicode.Is(unicode.Hiragana, r) || - unicode.Is(unicode.Katakana, r) || - unicode.Is(unicode.Han, r) || - unicode.Is(unicode.Lm, r) || - unicode.Is(unicode.Hangul, r) || - unicode.Is(CJKSymbolsAndPunctuation, r) -} - // A BufWriter is a subset of the bufio.Writer . type BufWriter interface { io.Writer diff --git a/util/util_cjk.go b/util/util_cjk.go new file mode 100644 index 0000000..d758107 --- /dev/null +++ b/util/util_cjk.go @@ -0,0 +1,469 @@ +package util + +import "unicode" + +var cjkRadicalsSupplement = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x2E80, 0x2EFF, 1}, + }, +} + +var kangxiRadicals = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x2F00, 0x2FDF, 1}, + }, +} + +var ideographicDescriptionCharacters = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x2FF0, 0x2FFF, 1}, + }, +} + +var cjkSymbolsAndPunctuation = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x3000, 0x303F, 1}, + }, +} + +var hiragana = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x3040, 0x309F, 1}, + }, +} + +var katakana = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x30A0, 0x30FF, 1}, + }, +} + +var kanbun = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x3130, 0x318F, 1}, + {0x3190, 0x319F, 1}, + }, +} + +var cjkStrokes = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x31C0, 0x31EF, 1}, + }, +} + +var katakanaPhoneticExtensions = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x31F0, 0x31FF, 1}, + }, +} + +var cjkCompatibility = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x3300, 0x33FF, 1}, + }, +} + +var cjkUnifiedIdeographsExtensionA = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x3400, 0x4DBF, 1}, + }, +} + +var cjkUnifiedIdeographs = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x4E00, 0x9FFF, 1}, + }, +} + +var yiSyllables = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0xA000, 0xA48F, 1}, + }, +} + +var yiRadicals = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0xA490, 0xA4CF, 1}, + }, +} + +var cjkCompatibilityIdeographs = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0xF900, 0xFAFF, 1}, + }, +} + +var verticalForms = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0xFE10, 0xFE1F, 1}, + }, +} + +var cjkCompatibilityForms = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0xFE30, 0xFE4F, 1}, + }, +} + +var smallFormVariants = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0xFE50, 0xFE6F, 1}, + }, +} + +var halfwidthAndFullwidthForms = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0xFF00, 0xFFEF, 1}, + }, +} + +var kanaSupplement = &unicode.RangeTable{ + R32: []unicode.Range32{ + {0x1B000, 0x1B0FF, 1}, + }, +} + +var kanaExtendedA = &unicode.RangeTable{ + R32: []unicode.Range32{ + {0x1B100, 0x1B12F, 1}, + }, +} + +var smallKanaExtension = &unicode.RangeTable{ + R32: []unicode.Range32{ + {0x1B130, 0x1B16F, 1}, + }, +} + +var cjkUnifiedIdeographsExtensionB = &unicode.RangeTable{ + R32: []unicode.Range32{ + {0x20000, 0x2A6DF, 1}, + }, +} + +var cjkUnifiedIdeographsExtensionC = &unicode.RangeTable{ + R32: []unicode.Range32{ + {0x2A700, 0x2B73F, 1}, + }, +} + +var cjkUnifiedIdeographsExtensionD = &unicode.RangeTable{ + R32: []unicode.Range32{ + {0x2B740, 0x2B81F, 1}, + }, +} + +var cjkUnifiedIdeographsExtensionE = &unicode.RangeTable{ + R32: []unicode.Range32{ + {0x2B820, 0x2CEAF, 1}, + }, +} + +var cjkUnifiedIdeographsExtensionF = &unicode.RangeTable{ + R32: []unicode.Range32{ + {0x2CEB0, 0x2EBEF, 1}, + }, +} + +var cjkCompatibilityIdeographsSupplement = &unicode.RangeTable{ + R32: []unicode.Range32{ + {0x2F800, 0x2FA1F, 1}, + }, +} + +var cjkUnifiedIdeographsExtensionG = &unicode.RangeTable{ + R32: []unicode.Range32{ + {0x30000, 0x3134F, 1}, + }, +} + +// IsEastAsianWideRune returns trhe if the given rune is an east asian wide character, otherwise false. +func IsEastAsianWideRune(r rune) bool { + return unicode.Is(unicode.Hiragana, r) || + unicode.Is(unicode.Katakana, r) || + unicode.Is(unicode.Han, r) || + unicode.Is(unicode.Lm, r) || + unicode.Is(unicode.Hangul, r) || + unicode.Is(cjkSymbolsAndPunctuation, r) +} + +// IsSpaceDiscardingUnicodeRune returns true if the given rune is space-discarding unicode character, otherwise false. +// See https://www.w3.org/TR/2020/WD-css-text-3-20200429/#space-discard-set +func IsSpaceDiscardingUnicodeRune(r rune) bool { + return unicode.Is(cjkRadicalsSupplement, r) || + unicode.Is(kangxiRadicals, r) || + unicode.Is(ideographicDescriptionCharacters, r) || + unicode.Is(cjkSymbolsAndPunctuation, r) || + unicode.Is(hiragana, r) || + unicode.Is(katakana, r) || + unicode.Is(kanbun, r) || + unicode.Is(cjkStrokes, r) || + unicode.Is(katakanaPhoneticExtensions, r) || + unicode.Is(cjkCompatibility, r) || + unicode.Is(cjkUnifiedIdeographsExtensionA, r) || + unicode.Is(cjkUnifiedIdeographs, r) || + unicode.Is(yiSyllables, r) || + unicode.Is(yiRadicals, r) || + unicode.Is(cjkCompatibilityIdeographs, r) || + unicode.Is(verticalForms, r) || + unicode.Is(cjkCompatibilityForms, r) || + unicode.Is(smallFormVariants, r) || + unicode.Is(halfwidthAndFullwidthForms, r) || + unicode.Is(kanaSupplement, r) || + unicode.Is(kanaExtendedA, r) || + unicode.Is(smallKanaExtension, r) || + unicode.Is(cjkUnifiedIdeographsExtensionB, r) || + unicode.Is(cjkUnifiedIdeographsExtensionC, r) || + unicode.Is(cjkUnifiedIdeographsExtensionD, r) || + unicode.Is(cjkUnifiedIdeographsExtensionE, r) || + unicode.Is(cjkUnifiedIdeographsExtensionF, r) || + unicode.Is(cjkCompatibilityIdeographsSupplement, r) || + unicode.Is(cjkUnifiedIdeographsExtensionG, r) +} + +// EastAsianWidth returns the east asian width of the given rune. +// See https://www.unicode.org/reports/tr11/tr11-36.html +func EastAsianWidth(r rune) string { + switch { + case r == 0x3000, + (0xFF01 <= r && r <= 0xFF60), + (0xFFE0 <= r && r <= 0xFFE6): + return "F" + + case r == 0x20A9, + (0xFF61 <= r && r <= 0xFFBE), + (0xFFC2 <= r && r <= 0xFFC7), + (0xFFCA <= r && r <= 0xFFCF), + (0xFFD2 <= r && r <= 0xFFD7), + (0xFFDA <= r && r <= 0xFFDC), + (0xFFE8 <= r && r <= 0xFFEE): + return "H" + + case (0x1100 <= r && r <= 0x115F), + (0x11A3 <= r && r <= 0x11A7), + (0x11FA <= r && r <= 0x11FF), + (0x2329 <= r && r <= 0x232A), + (0x2E80 <= r && r <= 0x2E99), + (0x2E9B <= r && r <= 0x2EF3), + (0x2F00 <= r && r <= 0x2FD5), + (0x2FF0 <= r && r <= 0x2FFB), + (0x3001 <= r && r <= 0x303E), + (0x3041 <= r && r <= 0x3096), + (0x3099 <= r && r <= 0x30FF), + (0x3105 <= r && r <= 0x312D), + (0x3131 <= r && r <= 0x318E), + (0x3190 <= r && r <= 0x31BA), + (0x31C0 <= r && r <= 0x31E3), + (0x31F0 <= r && r <= 0x321E), + (0x3220 <= r && r <= 0x3247), + (0x3250 <= r && r <= 0x32FE), + (0x3300 <= r && r <= 0x4DBF), + (0x4E00 <= r && r <= 0xA48C), + (0xA490 <= r && r <= 0xA4C6), + (0xA960 <= r && r <= 0xA97C), + (0xAC00 <= r && r <= 0xD7A3), + (0xD7B0 <= r && r <= 0xD7C6), + (0xD7CB <= r && r <= 0xD7FB), + (0xF900 <= r && r <= 0xFAFF), + (0xFE10 <= r && r <= 0xFE19), + (0xFE30 <= r && r <= 0xFE52), + (0xFE54 <= r && r <= 0xFE66), + (0xFE68 <= r && r <= 0xFE6B), + (0x1B000 <= r && r <= 0x1B001), + (0x1F200 <= r && r <= 0x1F202), + (0x1F210 <= r && r <= 0x1F23A), + (0x1F240 <= r && r <= 0x1F248), + (0x1F250 <= r && r <= 0x1F251), + (0x20000 <= r && r <= 0x2F73F), + (0x2B740 <= r && r <= 0x2FFFD), + (0x30000 <= r && r <= 0x3FFFD): + return "W" + + case (0x0020 <= r && r <= 0x007E), + (0x00A2 <= r && r <= 0x00A3), + (0x00A5 <= r && r <= 0x00A6), + r == 0x00AC, + r == 0x00AF, + (0x27E6 <= r && r <= 0x27ED), + (0x2985 <= r && r <= 0x2986): + return "Na" + + case (0x00A1 == r), + (0x00A4 == r), + (0x00A7 <= r && r <= 0x00A8), + (0x00AA == r), + (0x00AD <= r && r <= 0x00AE), + (0x00B0 <= r && r <= 0x00B4), + (0x00B6 <= r && r <= 0x00BA), + (0x00BC <= r && r <= 0x00BF), + (0x00C6 == r), + (0x00D0 == r), + (0x00D7 <= r && r <= 0x00D8), + (0x00DE <= r && r <= 0x00E1), + (0x00E6 == r), + (0x00E8 <= r && r <= 0x00EA), + (0x00EC <= r && r <= 0x00ED), + (0x00F0 == r), + (0x00F2 <= r && r <= 0x00F3), + (0x00F7 <= r && r <= 0x00FA), + (0x00FC == r), + (0x00FE == r), + (0x0101 == r), + (0x0111 == r), + (0x0113 == r), + (0x011B == r), + (0x0126 <= r && r <= 0x0127), + (0x012B == r), + (0x0131 <= r && r <= 0x0133), + (0x0138 == r), + (0x013F <= r && r <= 0x0142), + (0x0144 == r), + (0x0148 <= r && r <= 0x014B), + (0x014D == r), + (0x0152 <= r && r <= 0x0153), + (0x0166 <= r && r <= 0x0167), + (0x016B == r), + (0x01CE == r), + (0x01D0 == r), + (0x01D2 == r), + (0x01D4 == r), + (0x01D6 == r), + (0x01D8 == r), + (0x01DA == r), + (0x01DC == r), + (0x0251 == r), + (0x0261 == r), + (0x02C4 == r), + (0x02C7 == r), + (0x02C9 <= r && r <= 0x02CB), + (0x02CD == r), + (0x02D0 == r), + (0x02D8 <= r && r <= 0x02DB), + (0x02DD == r), + (0x02DF == r), + (0x0300 <= r && r <= 0x036F), + (0x0391 <= r && r <= 0x03A1), + (0x03A3 <= r && r <= 0x03A9), + (0x03B1 <= r && r <= 0x03C1), + (0x03C3 <= r && r <= 0x03C9), + (0x0401 == r), + (0x0410 <= r && r <= 0x044F), + (0x0451 == r), + (0x2010 == r), + (0x2013 <= r && r <= 0x2016), + (0x2018 <= r && r <= 0x2019), + (0x201C <= r && r <= 0x201D), + (0x2020 <= r && r <= 0x2022), + (0x2024 <= r && r <= 0x2027), + (0x2030 == r), + (0x2032 <= r && r <= 0x2033), + (0x2035 == r), + (0x203B == r), + (0x203E == r), + (0x2074 == r), + (0x207F == r), + (0x2081 <= r && r <= 0x2084), + (0x20AC == r), + (0x2103 == r), + (0x2105 == r), + (0x2109 == r), + (0x2113 == r), + (0x2116 == r), + (0x2121 <= r && r <= 0x2122), + (0x2126 == r), + (0x212B == r), + (0x2153 <= r && r <= 0x2154), + (0x215B <= r && r <= 0x215E), + (0x2160 <= r && r <= 0x216B), + (0x2170 <= r && r <= 0x2179), + (0x2189 == r), + (0x2190 <= r && r <= 0x2199), + (0x21B8 <= r && r <= 0x21B9), + (0x21D2 == r), + (0x21D4 == r), + (0x21E7 == r), + (0x2200 == r), + (0x2202 <= r && r <= 0x2203), + (0x2207 <= r && r <= 0x2208), + (0x220B == r), + (0x220F == r), + (0x2211 == r), + (0x2215 == r), + (0x221A == r), + (0x221D <= r && r <= 0x2220), + (0x2223 == r), + (0x2225 == r), + (0x2227 <= r && r <= 0x222C), + (0x222E == r), + (0x2234 <= r && r <= 0x2237), + (0x223C <= r && r <= 0x223D), + (0x2248 == r), + (0x224C == r), + (0x2252 == r), + (0x2260 <= r && r <= 0x2261), + (0x2264 <= r && r <= 0x2267), + (0x226A <= r && r <= 0x226B), + (0x226E <= r && r <= 0x226F), + (0x2282 <= r && r <= 0x2283), + (0x2286 <= r && r <= 0x2287), + (0x2295 == r), + (0x2299 == r), + (0x22A5 == r), + (0x22BF == r), + (0x2312 == r), + (0x2460 <= r && r <= 0x24E9), + (0x24EB <= r && r <= 0x254B), + (0x2550 <= r && r <= 0x2573), + (0x2580 <= r && r <= 0x258F), + (0x2592 <= r && r <= 0x2595), + (0x25A0 <= r && r <= 0x25A1), + (0x25A3 <= r && r <= 0x25A9), + (0x25B2 <= r && r <= 0x25B3), + (0x25B6 <= r && r <= 0x25B7), + (0x25BC <= r && r <= 0x25BD), + (0x25C0 <= r && r <= 0x25C1), + (0x25C6 <= r && r <= 0x25C8), + (0x25CB == r), + (0x25CE <= r && r <= 0x25D1), + (0x25E2 <= r && r <= 0x25E5), + (0x25EF == r), + (0x2605 <= r && r <= 0x2606), + (0x2609 == r), + (0x260E <= r && r <= 0x260F), + (0x2614 <= r && r <= 0x2615), + (0x261C == r), + (0x261E == r), + (0x2640 == r), + (0x2642 == r), + (0x2660 <= r && r <= 0x2661), + (0x2663 <= r && r <= 0x2665), + (0x2667 <= r && r <= 0x266A), + (0x266C <= r && r <= 0x266D), + (0x266F == r), + (0x269E <= r && r <= 0x269F), + (0x26BE <= r && r <= 0x26BF), + (0x26C4 <= r && r <= 0x26CD), + (0x26CF <= r && r <= 0x26E1), + (0x26E3 == r), + (0x26E8 <= r && r <= 0x26FF), + (0x273D == r), + (0x2757 == r), + (0x2776 <= r && r <= 0x277F), + (0x2B55 <= r && r <= 0x2B59), + (0x3248 <= r && r <= 0x324F), + (0xE000 <= r && r <= 0xF8FF), + (0xFE00 <= r && r <= 0xFE0F), + (0xFFFD == r), + (0x1F100 <= r && r <= 0x1F10A), + (0x1F110 <= r && r <= 0x1F12D), + (0x1F130 <= r && r <= 0x1F169), + (0x1F170 <= r && r <= 0x1F19A), + (0xE0100 <= r && r <= 0xE01EF), + (0xF0000 <= r && r <= 0xFFFFD), + (0x100000 <= r && r <= 0x10FFFD): + return "A" + + default: + return "N" + } +}