From 2ec4ca0a0239feea7e7742a112722507064f2a0b Mon Sep 17 00:00:00 2001 From: Michael Lorant Date: Fri, 26 Jan 2024 15:01:30 +1100 Subject: [PATCH] Replace go-runewidth with uniseg Replace the use of `RuneWidth` and `StringWidth` from `mattn/go- runewidth` with equivalent functions from `rivo/uniseg`. It is important to be aware that using `RuneWidth` will not be accurate as the width of a rune cannot be determined in isolation. This requires a shift to thinking about grapheme clusters instead. Unfortunately due to the complexity of identifying grapheme clusters, there has been some signifcant performance regressions in two functions: - PrintableRuneWidth: 10x slower - TruncateString: 4x slower Two other functions have had performance improvements: - MarginString: 2x faster - PaddingString: 2x faster The documentation for `rivo/uniseg` mentions the use of `Step` and `StepString` performing "orders of magnitude faster" than using the `NewGraphemes` method. However, implementing these changes only resulted in a 10% performance increase. Signed-off-by: Michael Lorant --- ansi/buffer.go | 22 +++++++++++----------- go.mod | 2 +- go.sum | 6 ++---- padding/padding.go | 4 ++-- truncate/truncate.go | 25 ++++++++++++++----------- wrap/wrap.go | 37 ++++++++++++++++++++----------------- 6 files changed, 50 insertions(+), 46 deletions(-) diff --git a/ansi/buffer.go b/ansi/buffer.go index 471bcaf..dfea99a 100644 --- a/ansi/buffer.go +++ b/ansi/buffer.go @@ -3,7 +3,7 @@ package ansi import ( "bytes" - "github.com/mattn/go-runewidth" + "github.com/rivo/uniseg" ) // Buffer is a buffer aware of ANSI escape sequences. @@ -19,22 +19,22 @@ func (w Buffer) PrintableRuneWidth() int { // PrintableRuneWidth returns the cell width of the given string. func PrintableRuneWidth(s string) int { - var n int + var n []rune var ansi bool for _, c := range s { - if c == Marker { + switch { + case c == Marker: // ANSI escape sequence ansi = true - } else if ansi { - if IsTerminator(c) { - // ANSI sequence terminated - ansi = false - } - } else { - n += runewidth.RuneWidth(c) + case ansi && IsTerminator(c): + // ANSI sequence terminated + ansi = false + case ansi: + default: + n = append(n, c) } } - return n + return uniseg.StringWidth(string(n)) } diff --git a/go.mod b/go.mod index 8aa39aa..55aca69 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,4 @@ module github.com/muesli/reflow go 1.13 -require github.com/mattn/go-runewidth v0.0.14 +require github.com/rivo/uniseg v0.4.4 diff --git a/go.sum b/go.sum index 2579e19..60b0d09 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,2 @@ -github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= -github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= -github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= -github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis= +github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= diff --git a/padding/padding.go b/padding/padding.go index 707ec46..2ece223 100644 --- a/padding/padding.go +++ b/padding/padding.go @@ -5,8 +5,8 @@ import ( "io" "strings" - "github.com/mattn/go-runewidth" "github.com/muesli/reflow/ansi" + "github.com/rivo/uniseg" ) type PaddingFunc func(w io.Writer) @@ -71,7 +71,7 @@ func (w *Writer) Write(b []byte) (int, error) { w.ansi = false } } else { - w.lineLen += runewidth.StringWidth(string(c)) + w.lineLen += uniseg.StringWidth(string(c)) if c == '\n' { // end of current line diff --git a/truncate/truncate.go b/truncate/truncate.go index 5aab5f8..6905a6c 100644 --- a/truncate/truncate.go +++ b/truncate/truncate.go @@ -4,7 +4,7 @@ import ( "bytes" "io" - "github.com/mattn/go-runewidth" + "github.com/rivo/uniseg" "github.com/muesli/reflow/ansi" ) @@ -79,17 +79,20 @@ func (w *Writer) Write(b []byte) (int, error) { w.width -= uint(tw) var curWidth uint - for _, c := range string(b) { - if c == ansi.Marker { + gr := uniseg.NewGraphemes(string(b)) + for gr.Next() { + rs := gr.Runes() + + switch { + case len(rs) == 1 && rs[0] == ansi.Marker: // ANSI escape sequence w.ansi = true - } else if w.ansi { - if ansi.IsTerminator(c) { - // ANSI sequence terminated - w.ansi = false - } - } else { - curWidth += uint(runewidth.RuneWidth(c)) + case len(rs) == 1 && w.ansi && ansi.IsTerminator(rs[0]): + // ANSI sequence terminated + w.ansi = false + case w.ansi: + default: + curWidth += uint(gr.Width()) } if curWidth > w.width { @@ -100,7 +103,7 @@ func (w *Writer) Write(b []byte) (int, error) { return n, err } - _, err := w.ansiWriter.Write([]byte(string(c))) + _, err := w.ansiWriter.Write([]byte(gr.Str())) if err != nil { return 0, err } diff --git a/wrap/wrap.go b/wrap/wrap.go index b6f2a80..3d3ea53 100644 --- a/wrap/wrap.go +++ b/wrap/wrap.go @@ -5,8 +5,8 @@ import ( "strings" "unicode" - "github.com/mattn/go-runewidth" "github.com/muesli/reflow/ansi" + "github.com/rivo/uniseg" ) var ( @@ -67,6 +67,7 @@ func String(s string, limit int) string { func (w *Wrap) Write(b []byte) (int, error) { s := strings.Replace(string(b), "\t", strings.Repeat(" ", w.TabWidth), -1) + if !w.KeepNewlines { s = strings.Replace(s, "\n", "", -1) } @@ -78,37 +79,39 @@ func (w *Wrap) Write(b []byte) (int, error) { return w.buf.Write(b) } - for _, c := range s { - if c == ansi.Marker { + gr := uniseg.NewGraphemes(s) + for gr.Next() { + rs := gr.Runes() + + switch { + case len(rs) == 1 && rs[0] == ansi.Marker: w.ansi = true - } else if w.ansi { - if ansi.IsTerminator(c) { - w.ansi = false - } - } else if inGroup(w.Newline, c) { + case len(rs) == 1 && w.ansi && ansi.IsTerminator(rs[0]): + w.ansi = false + case w.ansi: + case len(rs) == 1 && inGroup(w.Newline, rs[0]): w.addNewLine() w.forcefulNewline = false continue - } else { - width := runewidth.RuneWidth(c) - - if w.lineLen+width > w.Limit { + default: + if w.lineLen + gr.Width() > w.Limit { w.addNewLine() w.forcefulNewline = true } - if w.lineLen == 0 { - if w.forcefulNewline && !w.PreserveSpace && unicode.IsSpace(c) { + switch { + case w.lineLen == 0: + if len(rs) == 1 && w.forcefulNewline && !w.PreserveSpace && unicode.IsSpace(rs[0]) { continue } - } else { + default: w.forcefulNewline = false } - w.lineLen += width + w.lineLen += gr.Width() } - _, _ = w.buf.WriteRune(c) + _, _ = w.buf.WriteString(gr.Str()) } return len(b), nil