-
Notifications
You must be signed in to change notification settings - Fork 258
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Define line break styles for east asian characters as options #411
Merged
yuin
merged 8 commits into
yuin:master
from
henry0312:update_cond_east_asian_line_breaks
Oct 28, 2023
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
6ef9b10
Improve line breaking behavior for east asian characters
henry0312 6cbcfeb
Add a WorksEvenWithOneSide option to EastAsianLineBreak
henry0312 2367b9f
add comments
henry0312 dc2230c
fix tests
henry0312 9d0b1b6
Define `EastAsianLineBreaksStyle` to specify behavior of line breaking
henry0312 792af68
Updat README.md
henry0312 8c6830d
fix errors of lints
henry0312 6b3067e
Implements CSS3Draft
henry0312 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ import ( | |
"bytes" | ||
"fmt" | ||
"strconv" | ||
"unicode" | ||
"unicode/utf8" | ||
|
||
"github.com/yuin/goldmark/ast" | ||
|
@@ -16,7 +17,7 @@ import ( | |
type Config struct { | ||
Writer Writer | ||
HardWraps bool | ||
EastAsianLineBreaks bool | ||
EastAsianLineBreaks eastAsianLineBreaks | ||
XHTML bool | ||
Unsafe bool | ||
} | ||
|
@@ -26,7 +27,7 @@ func NewConfig() Config { | |
return Config{ | ||
Writer: DefaultWriter, | ||
HardWraps: false, | ||
EastAsianLineBreaks: false, | ||
EastAsianLineBreaks: eastAsianLineBreaks{}, | ||
XHTML: false, | ||
Unsafe: false, | ||
} | ||
|
@@ -38,7 +39,7 @@ func (c *Config) SetOption(name renderer.OptionName, value interface{}) { | |
case optHardWraps: | ||
c.HardWraps = value.(bool) | ||
case optEastAsianLineBreaks: | ||
c.EastAsianLineBreaks = value.(bool) | ||
c.EastAsianLineBreaks = value.(eastAsianLineBreaks) | ||
case optXHTML: | ||
c.XHTML = value.(bool) | ||
case optUnsafe: | ||
|
@@ -103,24 +104,119 @@ func WithHardWraps() interface { | |
// EastAsianLineBreaks is an option name used in WithEastAsianLineBreaks. | ||
const optEastAsianLineBreaks renderer.OptionName = "EastAsianLineBreaks" | ||
|
||
// A EastAsianLineBreaksStyle is a style of east asian line breaks. | ||
type EastAsianLineBreaksStyle int | ||
|
||
const ( | ||
// EastAsianLineBreaksStyleSimple follows east_asian_line_breaks in Pandoc. | ||
EastAsianLineBreaksStyleSimple EastAsianLineBreaksStyle = iota | ||
// EastAsianLineBreaksCSS3Draft follows CSS text level3 "Segment Break Transformation Rules" with some enhancements. | ||
EastAsianLineBreaksCSS3Draft | ||
) | ||
|
||
type eastAsianLineBreaker interface { | ||
SoftLineBreak(thisLastRune rune, siblingFirstRune rune) bool | ||
} | ||
|
||
type eastAsianLineBreaksSimple struct{} | ||
|
||
func (e *eastAsianLineBreaksSimple) SoftLineBreak(thisLastRune rune, siblingFirstRune rune) bool { | ||
return !(util.IsEastAsianWideRune(thisLastRune) && util.IsEastAsianWideRune(siblingFirstRune)) | ||
} | ||
|
||
type eastAsianLineBreaksCSS3Draft struct{} | ||
|
||
func (e *eastAsianLineBreaksCSS3Draft) SoftLineBreak(thisLastRune rune, siblingFirstRune rune) bool { | ||
// Implements CSS text level3 Segment Break Transformation Rules with some enhancements. | ||
// References: | ||
// - https://www.w3.org/TR/2020/WD-css-text-3-20200429/#line-break-transform | ||
// - https://github.com/w3c/csswg-drafts/issues/5086 | ||
|
||
// Rule1: | ||
// If the character immediately before or immediately after the segment break is | ||
// the zero-width space character (U+200B), then the break is removed, leaving behind the zero-width space. | ||
if thisLastRune == '\u200B' || siblingFirstRune == '\u200B' { | ||
return false | ||
} | ||
|
||
// Rule2: | ||
// Otherwise, if the East Asian Width property of both the character before and after the segment break is | ||
// F, W, or H (not A), and neither side is Hangul, then the segment break is removed. | ||
thisLastRuneEastAsianWidth := util.EastAsianWidth(thisLastRune) | ||
siblingFirstRuneEastAsianWidth := util.EastAsianWidth(siblingFirstRune) | ||
if (thisLastRuneEastAsianWidth == "F" || | ||
thisLastRuneEastAsianWidth == "W" || | ||
thisLastRuneEastAsianWidth == "H") && | ||
(siblingFirstRuneEastAsianWidth == "F" || | ||
siblingFirstRuneEastAsianWidth == "W" || | ||
siblingFirstRuneEastAsianWidth == "H") { | ||
return unicode.Is(unicode.Hangul, thisLastRune) || unicode.Is(unicode.Hangul, siblingFirstRune) | ||
} | ||
|
||
// Rule3: | ||
// Otherwise, if either the character before or after the segment break belongs to | ||
// the space-discarding character set and it is a Unicode Punctuation (P*) or U+3000, | ||
// then the segment break is removed. | ||
if util.IsSpaceDiscardingUnicodeRune(thisLastRune) || | ||
unicode.IsPunct(thisLastRune) || | ||
thisLastRune == '\u3000' || | ||
util.IsSpaceDiscardingUnicodeRune(siblingFirstRune) || | ||
unicode.IsPunct(siblingFirstRune) || | ||
siblingFirstRune == '\u3000' { | ||
return false | ||
} | ||
|
||
// Rule4: | ||
// Otherwise, the segment break is converted to a space (U+0020). | ||
return true | ||
} | ||
|
||
type eastAsianLineBreaks struct { | ||
Enabled bool | ||
EastAsianLineBreaksFunction eastAsianLineBreaker | ||
} | ||
|
||
type withEastAsianLineBreaks struct { | ||
eastAsianLineBreaksStyle EastAsianLineBreaksStyle | ||
} | ||
|
||
func (o *withEastAsianLineBreaks) SetConfig(c *renderer.Config) { | ||
c.Options[optEastAsianLineBreaks] = true | ||
switch o.eastAsianLineBreaksStyle { | ||
case EastAsianLineBreaksStyleSimple: | ||
c.Options[optEastAsianLineBreaks] = eastAsianLineBreaks{ | ||
Enabled: true, | ||
EastAsianLineBreaksFunction: &eastAsianLineBreaksSimple{}, | ||
} | ||
case EastAsianLineBreaksCSS3Draft: | ||
c.Options[optEastAsianLineBreaks] = eastAsianLineBreaks{ | ||
Enabled: true, | ||
EastAsianLineBreaksFunction: &eastAsianLineBreaksCSS3Draft{}, | ||
} | ||
} | ||
} | ||
|
||
func (o *withEastAsianLineBreaks) SetHTMLOption(c *Config) { | ||
c.EastAsianLineBreaks = true | ||
switch o.eastAsianLineBreaksStyle { | ||
case EastAsianLineBreaksStyleSimple: | ||
c.EastAsianLineBreaks = eastAsianLineBreaks{ | ||
Enabled: true, | ||
EastAsianLineBreaksFunction: &eastAsianLineBreaksSimple{}, | ||
} | ||
case EastAsianLineBreaksCSS3Draft: | ||
c.EastAsianLineBreaks = eastAsianLineBreaks{ | ||
Enabled: true, | ||
EastAsianLineBreaksFunction: &eastAsianLineBreaksCSS3Draft{}, | ||
} | ||
} | ||
} | ||
|
||
// WithEastAsianLineBreaks is a functional option that indicates whether softline breaks | ||
// between east asian wide characters should be ignored. | ||
func WithEastAsianLineBreaks() interface { | ||
func WithEastAsianLineBreaks(style EastAsianLineBreaksStyle) interface { | ||
renderer.Option | ||
Option | ||
} { | ||
return &withEastAsianLineBreaks{} | ||
return &withEastAsianLineBreaks{style} | ||
} | ||
|
||
// XHTML is an option name used in WithXHTML. | ||
|
@@ -663,14 +759,13 @@ func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, en | |
_, _ = w.WriteString("<br>\n") | ||
} | ||
} else if n.SoftLineBreak() { | ||
if r.EastAsianLineBreaks && len(value) != 0 { | ||
if r.EastAsianLineBreaks.Enabled && len(value) != 0 { | ||
sibling := node.NextSibling() | ||
if sibling != nil && sibling.Kind() == ast.KindText { | ||
if siblingText := sibling.(*ast.Text).Text(source); len(siblingText) != 0 { | ||
thisLastRune := util.ToRune(value, len(value)-1) | ||
siblingFirstRune, _ := utf8.DecodeRune(siblingText) | ||
if !(util.IsEastAsianWideRune(thisLastRune) && | ||
util.IsEastAsianWideRune(siblingFirstRune)) { | ||
if r.EastAsianLineBreaks.EastAsianLineBreaksFunction.SoftLineBreak(thisLastRune, siblingFirstRune) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am convinced that this is one of the things we should implement. |
||
_ = w.WriteByte('\n') | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you have a better name than
EastAsianLineBreaksCSS3Draft
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems your implementation does not satisfy CSS3 draft rules. We may have choices...
I prefer 1.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for your thorough feedback.
I’m on board with option 1 and will give the CSS text level 3 rules and additional enhancements a shot. Admittedly, I'm not a pro with this CSS issue, so while I'll do my best, I might miss some nuances. Any extra guidance or pointers while I work through this would be awesome!
Will keep you posted on the progress. Talk to you soon!