From 471d31ba4a69d25ab0799b5875a39351620607b6 Mon Sep 17 00:00:00 2001 From: Ayman Bagabas Date: Tue, 12 Mar 2024 21:55:28 -0400 Subject: [PATCH] feat(term): ansi: implement ANSI aware truncation This implements an ANSI and wide-characters aware truncation algorithm that uses the newly merged [ANSI parser state machine][statemachine] and the fantastic library uniseg. Since this is using the ANSI state machine, it's compatible with `CSI m` (SGR) style sequence, `OSC 8` (hyperlinks), and basically any other escape sequence supported in the state machine (DCS, ESC, SOS, PM, APC). Related: https://github.com/muesli/reflow/pull/71 [statemachine]: https://github.com/charmbracelet/x/blob/main/exp/term/ansi/parser/transition_table.go --- exp/term/ansi/truncate.go | 116 +++++++++++++++++++++++++++++++++ exp/term/ansi/truncate_test.go | 57 ++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 exp/term/ansi/truncate.go create mode 100644 exp/term/ansi/truncate_test.go diff --git a/exp/term/ansi/truncate.go b/exp/term/ansi/truncate.go new file mode 100644 index 00000000..e105470c --- /dev/null +++ b/exp/term/ansi/truncate.go @@ -0,0 +1,116 @@ +package ansi + +import ( + "bytes" + + . "github.com/charmbracelet/x/exp/term/ansi/parser" + "github.com/rivo/uniseg" +) + +// Truncate truncates a string to a given length, adding a tail to the +// end if the string is longer than the given length. +// This function is aware of ANSI escape codes and will not break them, and +// accounts for wide-characters (such as East Asians and emojis). +func Truncate(s string, length int, tail string) string { + tw := StringWidth(tail) + length -= tw + if length < 0 { + return "" + } + + var cluster []byte + var buf bytes.Buffer + curWidth := 0 + ignoring := false + gstate := -1 + pstate := GroundState // initial state + b := []byte(s) + i := 0 + + // Here we iterate over the bytes of the string and collect printable + // characters and runes. We also keep track of the width of the string + // in cells. + // Once we reach the given length, we start ignoring characters and only + // collect ANSI escape codes until we reach the end of the next escape + // code, or end of string. + for i < len(b) { + state, action := Table.Transition(pstate, b[i]) + // log.Printf("pstate: %s, state: %s, action: %s, code: %q", StateNames[pstate], StateNames[state], ActionNames[action], s[i]) + + switch action { + case CollectAction: + // This action happens when we transition to the Utf8State. + if w := utf8ByteLen(b[i]); w > 1 { + var width int + cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate) + + // log.Printf("cluster: %q, width: %d, curWidth: %d", string(cluster), width, curWidth) + + // increment the index by the length of the cluster + i += len(cluster) + + // Are we ignoring? Skip to the next byte + if ignoring { + continue + } + + // Is this gonna be too wide? + // If so write the tail and stop collecting. + if curWidth+width >= length && !ignoring { + ignoring = true + buf.WriteString(tail) + } + + if curWidth+width > length { + continue + } + + curWidth += width + for _, r := range cluster { + buf.WriteByte(r) + } + + // Done collecting, now we're back in the ground state. + pstate = GroundState + continue + } else { + // Collecting sequence intermediate bytes + buf.WriteByte(b[i]) + } + case PrintAction: + // Is this gonna be too wide? + // If so write the tail and stop collecting. + if curWidth >= length && !ignoring { + ignoring = true + buf.WriteString(tail) + } + + // Skip to the next byte if we're ignoring + if ignoring { + i++ + continue + } + + // collects printable ASCII + curWidth++ + fallthrough + default: + buf.WriteByte(b[i]) + i++ + } + + // Transition to the next state. + pstate = state + + // log.Printf("buf: %q, curWidth: %d, ignoring: %v", buf.String(), curWidth, ignoring) + + // Once we reach the given length, we start ignoring runes and write + // the tail to the buffer. + if curWidth > length && !ignoring { + ignoring = true + buf.WriteString(tail) + } + } + + return buf.String() +} diff --git a/exp/term/ansi/truncate_test.go b/exp/term/ansi/truncate_test.go new file mode 100644 index 00000000..4fe18891 --- /dev/null +++ b/exp/term/ansi/truncate_test.go @@ -0,0 +1,57 @@ +package ansi + +import ( + "testing" +) + +var tcases = []struct { + name string + input string + tail string + width int + expect string +}{ + {"empty", "", "", 0, ""}, + {"simple", "foobar", "", 3, "foo"}, + {"passthrough", "foobar", "", 10, "foobar"}, + {"ascii", "hello", "", 3, "hel"}, + {"emoji", "👋", "", 2, "👋"}, + {"wideemoji", "🫧", "", 2, "🫧"}, + {"controlemoji", "\x1b[31mhello 👋abc\x1b[0m", "", 8, "\x1b[31mhello 👋\x1b[0m"}, + {"osc8", "\x1b]8;;https://charm.sh\x1b\\Charmbracelet 🫧\x1b]8;;\x1b\\", "", 5, "\x1b]8;;https://charm.sh\x1b\\Charm\x1b]8;;\x1b\\"}, + {"osc8_8bit", "\x9d8;;https://charm.sh\x9cCharmbracelet 🫧\x9d8;;\x9c", "", 5, "\x9d8;;https://charm.sh\x9cCharm\x9d8;;\x9c"}, + {"style_tail", "\x1B[38;5;219mHiya!", "…", 3, "\x1B[38;5;219mHi…"}, + {"double_style_tail", "\x1B[38;5;219mHiya!\x1B[38;5;219mHello", "…", 7, "\x1B[38;5;219mHiya!\x1B[38;5;219mH…"}, + {"noop", "\x1B[7m--", "", 2, "\x1B[7m--"}, + {"double_width", "\x1B[38;2;249;38;114m你好\x1B[0m", "", 3, "\x1B[38;2;249;38;114m你\x1B[0m"}, + {"double_width_rune", "你", "", 1, ""}, + {"double_width_runes", "你好", "", 2, "你"}, + {"spaces_only", " ", "…", 2, " …"}, + {"longer_tail", "foo", "...", 2, ""}, + {"same_tail_width", "foo", "...", 3, "..."}, + {"same_tail_width_control", "\x1b[31mfoo\x1b[0m", "...", 3, "\x1b[31m...\x1b[0m"}, + {"same_width", "foo", "", 3, "foo"}, + {"truncate_with_tail", "foobar", ".", 4, "foo."}, + {"style", "I really \x1B[38;2;249;38;114mlove\x1B[0m Go!", "", 8, "I really\x1B[38;2;249;38;114m\x1B[0m"}, + {"dcs", "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foobar", "…", 4, "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foo…"}, +} + +func TestTruncate(t *testing.T) { + for i, c := range tcases { + t.Run(c.name, func(t *testing.T) { + if result := Truncate(c.input, c.width, c.tail); result != c.expect { + t.Errorf("test case %d failed: expected %q, got %q", i+1, c.expect, result) + } + }) + } +} + +func BenchmarkTruncateString(b *testing.B) { + b.RunParallel(func(pb *testing.PB) { + b.ReportAllocs() + b.ResetTimer() + for pb.Next() { + Truncate("foo", 2, "") + } + }) +}