Skip to content

Commit

Permalink
feat(term): ansi: implement ANSI aware truncation
Browse files Browse the repository at this point in the history
This implements an ANSI and wide-characters aware truncation algorithm
that uses the newly merged [ANSI parser state machine][statemachine] and
the fantastic library uniseg.

Since this is using the ANSI state machine, it's compatible with `CSI m`
(SGR) style sequence, `OSC 8` (hyperlinks), and basically any other
escape sequence supported in the state machine (DCS, ESC, SOS, PM, APC).

Related: muesli/reflow#71

[statemachine]: https://github.com/charmbracelet/x/blob/main/exp/term/ansi/parser/transition_table.go
  • Loading branch information
aymanbagabas committed Mar 13, 2024
1 parent 8cc69f8 commit 471d31b
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 0 deletions.
116 changes: 116 additions & 0 deletions exp/term/ansi/truncate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package ansi

import (
"bytes"

. "github.com/charmbracelet/x/exp/term/ansi/parser"
"github.com/rivo/uniseg"
)

// Truncate truncates a string to a given length, adding a tail to the
// end if the string is longer than the given length.
// This function is aware of ANSI escape codes and will not break them, and
// accounts for wide-characters (such as East Asians and emojis).
func Truncate(s string, length int, tail string) string {
tw := StringWidth(tail)
length -= tw
if length < 0 {
return ""
}

var cluster []byte
var buf bytes.Buffer
curWidth := 0
ignoring := false
gstate := -1
pstate := GroundState // initial state
b := []byte(s)
i := 0

// Here we iterate over the bytes of the string and collect printable
// characters and runes. We also keep track of the width of the string
// in cells.
// Once we reach the given length, we start ignoring characters and only
// collect ANSI escape codes until we reach the end of the next escape
// code, or end of string.
for i < len(b) {
state, action := Table.Transition(pstate, b[i])
// log.Printf("pstate: %s, state: %s, action: %s, code: %q", StateNames[pstate], StateNames[state], ActionNames[action], s[i])

switch action {
case CollectAction:
// This action happens when we transition to the Utf8State.
if w := utf8ByteLen(b[i]); w > 1 {
var width int
cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate)

// log.Printf("cluster: %q, width: %d, curWidth: %d", string(cluster), width, curWidth)

// increment the index by the length of the cluster
i += len(cluster)

// Are we ignoring? Skip to the next byte
if ignoring {
continue
}

// Is this gonna be too wide?
// If so write the tail and stop collecting.
if curWidth+width >= length && !ignoring {
ignoring = true
buf.WriteString(tail)
}

if curWidth+width > length {
continue
}

curWidth += width
for _, r := range cluster {
buf.WriteByte(r)
}

// Done collecting, now we're back in the ground state.
pstate = GroundState
continue
} else {
// Collecting sequence intermediate bytes
buf.WriteByte(b[i])
}
case PrintAction:
// Is this gonna be too wide?
// If so write the tail and stop collecting.
if curWidth >= length && !ignoring {
ignoring = true
buf.WriteString(tail)
}

// Skip to the next byte if we're ignoring
if ignoring {
i++
continue
}

// collects printable ASCII
curWidth++
fallthrough
default:
buf.WriteByte(b[i])
i++
}

// Transition to the next state.
pstate = state

// log.Printf("buf: %q, curWidth: %d, ignoring: %v", buf.String(), curWidth, ignoring)

// Once we reach the given length, we start ignoring runes and write
// the tail to the buffer.
if curWidth > length && !ignoring {
ignoring = true
buf.WriteString(tail)
}
}

return buf.String()
}
57 changes: 57 additions & 0 deletions exp/term/ansi/truncate_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package ansi

import (
"testing"
)

var tcases = []struct {
name string
input string
tail string
width int
expect string
}{
{"empty", "", "", 0, ""},
{"simple", "foobar", "", 3, "foo"},
{"passthrough", "foobar", "", 10, "foobar"},
{"ascii", "hello", "", 3, "hel"},
{"emoji", "👋", "", 2, "👋"},
{"wideemoji", "🫧", "", 2, "🫧"},
{"controlemoji", "\x1b[31mhello 👋abc\x1b[0m", "", 8, "\x1b[31mhello 👋\x1b[0m"},
{"osc8", "\x1b]8;;https://charm.sh\x1b\\Charmbracelet 🫧\x1b]8;;\x1b\\", "", 5, "\x1b]8;;https://charm.sh\x1b\\Charm\x1b]8;;\x1b\\"},
{"osc8_8bit", "\x9d8;;https://charm.sh\x9cCharmbracelet 🫧\x9d8;;\x9c", "", 5, "\x9d8;;https://charm.sh\x9cCharm\x9d8;;\x9c"},
{"style_tail", "\x1B[38;5;219mHiya!", "…", 3, "\x1B[38;5;219mHi…"},
{"double_style_tail", "\x1B[38;5;219mHiya!\x1B[38;5;219mHello", "…", 7, "\x1B[38;5;219mHiya!\x1B[38;5;219mH…"},
{"noop", "\x1B[7m--", "", 2, "\x1B[7m--"},
{"double_width", "\x1B[38;2;249;38;114m你好\x1B[0m", "", 3, "\x1B[38;2;249;38;114m你\x1B[0m"},
{"double_width_rune", "你", "", 1, ""},
{"double_width_runes", "你好", "", 2, "你"},
{"spaces_only", " ", "…", 2, " …"},
{"longer_tail", "foo", "...", 2, ""},
{"same_tail_width", "foo", "...", 3, "..."},
{"same_tail_width_control", "\x1b[31mfoo\x1b[0m", "...", 3, "\x1b[31m...\x1b[0m"},
{"same_width", "foo", "", 3, "foo"},
{"truncate_with_tail", "foobar", ".", 4, "foo."},
{"style", "I really \x1B[38;2;249;38;114mlove\x1B[0m Go!", "", 8, "I really\x1B[38;2;249;38;114m\x1B[0m"},
{"dcs", "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foobar", "…", 4, "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foo…"},
}

func TestTruncate(t *testing.T) {
for i, c := range tcases {
t.Run(c.name, func(t *testing.T) {
if result := Truncate(c.input, c.width, c.tail); result != c.expect {
t.Errorf("test case %d failed: expected %q, got %q", i+1, c.expect, result)
}
})
}
}

func BenchmarkTruncateString(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
b.ReportAllocs()
b.ResetTimer()
for pb.Next() {
Truncate("foo", 2, "")
}
})
}

0 comments on commit 471d31b

Please sign in to comment.