charmbracelet · aymanbagabas · Mar 21, 2024 · Mar 14, 2024 · Mar 19, 2024
diff --git a/exp/term/ansi/wrap.go b/exp/term/ansi/wrap.go
@@ -0,0 +1,246 @@
+package ansi
+
+import (
+	"bytes"
+	"unicode"
+	"unicode/utf8"
+
+	. "github.com/charmbracelet/x/exp/term/ansi/parser"
+	"github.com/rivo/uniseg"
+)
+
+// Wrap wraps a string or a block of text to a given line length, breaking word
+// boundaries. This will preserve ANSI escape codes and will account for
+// wide-characters in the string.
+// When preserveSpace is true, spaces at the beginning of a line will be
+// preserved.
+func Wrap(s string, limit int, preserveSpace bool) string {
+	if limit < 1 {
+		return s
+	}
+
+	var (
+		cluster      []byte
+		buf          bytes.Buffer
+		curWidth     int
+		forceNewline bool
+		gstate       = -1
+		pstate       = GroundState // initial state
+		b            = []byte(s)
+		i            = 0
+	)
+
+	addNewline := func() {
+		buf.WriteByte('\n')
+		curWidth = 0
+	}
+
+	for i < len(b) {
+		state, action := Table.Transition(pstate, b[i])
+		// log.Printf("pstate: %s, state: %s, action: %s, code: %q", StateNames[pstate], StateNames[state], ActionNames[action], b[i])
+
+		switch action {
+		case CollectAction:
+			if w := utf8ByteLen(b[i]); w > 1 {
+				var width int
+				cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate)
+				// log.Printf("cluster: %q, width: %d, curWidth: %d, buf: %q", string(cluster), width, curWidth, b[i:])
+				i += len(cluster)
+
+				if curWidth+width > limit {
+					addNewline()
+				}
+				if !preserveSpace && curWidth == 0 && len(cluster) <= 4 {
+					// Skip spaces at the beginning of a line
+					if r, _ := utf8.DecodeRune(cluster); r != utf8.RuneError && unicode.IsSpace(r) {
+						pstate = GroundState
+						continue
+					}
+				}
+				buf.Write(cluster)
+				curWidth += width
+				gstate = -1 // reset grapheme state otherwise, width calculation might be off
+
+				pstate = GroundState
+				continue
+			} else {
+				// Collect sequence intermediate bytes
+				buf.WriteByte(b[i])
+			}
+		case PrintAction, ExecuteAction:
+			if b[i] == '\n' {
+				addNewline()
+				forceNewline = false
+				break
+			}
+
+			if curWidth+1 > limit {
+				addNewline()
+				forceNewline = true
+			}
+
+			// Skip spaces at the beginning of a line
+			if curWidth == 0 {
+				if !preserveSpace && forceNewline && unicode.IsSpace(rune(b[i])) {
+					break
+				}
+				forceNewline = false
+			}
+
+			buf.WriteByte(b[i])
+			curWidth++
+		default:
+			buf.WriteByte(b[i])
+		}
+		// log.Printf("curWidth: %d, limit: %d", curWidth, limit)
+
+		// We manage the UTF8 state separately manually above.
+		if pstate != Utf8State {
+			pstate = state
+		}
+		i++
+	}
+
+	return buf.String()
+}
+
+// Wordwrap wraps a string or a block of text to a given line length, not
+// breaking word boundaries. This will preserve ANSI escape codes and will
+// account for wide-characters in the string.
+// The breakpoints string is a list of characters that are considered
+// breakpoints for word wrapping. A hyphen (-) is always considered a
+// breakpoint.
+func Wordwrap(s string, limit int, breakpoints string) string {
+	if limit < 1 {
+		return s
+	}
+
+	// Add a hyphen to the breakpoints
+	breakpoints += "-"
+
+	var (
+		cluster  []byte
+		buf      bytes.Buffer
+		word     bytes.Buffer
+		space    bytes.Buffer
+		curWidth int
+		wordLen  int
+		gstate   = -1
+		pstate   = GroundState // initial state
+		b        = []byte(s)
+		i        = 0
+	)
+
+	addSpace := func() {
+		curWidth += space.Len()
+		buf.Write(space.Bytes())
+		space.Reset()
+	}
+
+	addWord := func() {
+		if word.Len() == 0 {
+			return
+		}
+		addSpace()
+		curWidth += wordLen
+		buf.Write(word.Bytes())
+		word.Reset()
+		wordLen = 0
+	}
+
+	addNewline := func() {
+		buf.WriteByte('\n')
+		curWidth = 0
+		space.Reset()
+	}
+
+	for i < len(b) {
+		state, action := Table.Transition(pstate, b[i])
+		// log.Printf("pstate: %s, state: %s, action: %s, code: %q", StateNames[pstate], StateNames[state], ActionNames[action], b[i])
+		// log.Printf("curWidth: %d, limit: %d", curWidth, limit)
+		// log.Printf("word: %q, wordLen: %d, space: %q", word.String(), wordLen, space.String())
+
+		switch action {
+		case CollectAction:
+			if w := utf8ByteLen(b[i]); w > 1 {
+				var width int
+				cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate)
+				// log.Printf("cluster: %q, width: %d, buf: %q", cluster, width, b[i:])
+				i += len(cluster)
+
+				r, _ := utf8.DecodeRune(cluster)
+				if r != utf8.RuneError && unicode.IsSpace(r) {
+					addWord()
+					space.WriteRune(r)
+				} else if bytes.ContainsAny(cluster, breakpoints) {
+					addSpace()
+					addWord()
+					buf.Write(cluster)
+				} else {
+					word.Write(cluster)
+					wordLen += width
+					if curWidth+space.Len()+wordLen > limit &&
+						wordLen < limit {
+						addNewline()
+					}
+				}
+
+				pstate = GroundState
+				continue
+			} else {
+				// Collect sequence intermediate bytes
+				word.WriteByte(b[i])
+			}
+		case PrintAction, ExecuteAction:
+			r := rune(b[i])
+			if r == '\n' {
+				if wordLen == 0 {
+					if curWidth+space.Len() > limit {
+						curWidth = 0
+					} else {
+						buf.Write(space.Bytes())
+					}
+					space.Reset()
+				}
+
+				addWord()
+				addNewline()
+			} else if unicode.IsSpace(r) {
+				addWord()
+				space.WriteByte(b[i])
+			} else if runeContainsAny(r, breakpoints) {
+				addSpace()
+				addWord()
+				buf.WriteByte(b[i])
+			} else {
+				word.WriteByte(b[i])
+				wordLen++
+				if curWidth+space.Len()+wordLen > limit &&
+					wordLen < limit {
+					addNewline()
+				}
+			}
+
+		default:
+			word.WriteByte(b[i])
+		}
+		// We manage the UTF8 state separately manually above.
+		if pstate != Utf8State {
+			pstate = state
+		}
+		i++
+	}
+
+	addWord()
+
+	return buf.String()
+}
+
+func runeContainsAny(r rune, s string) bool {
+	for _, c := range s {
+		if c == r {
+			return true
+		}
+	}
+	return false
+}
diff --git a/exp/term/ansi/wrap_test.go b/exp/term/ansi/wrap_test.go
@@ -0,0 +1,110 @@
+package ansi_test
+
+import (
+	"testing"
+
+	"github.com/charmbracelet/x/exp/term/ansi"
+)
+
+var cases = []struct {
+	name          string
+	input         string
+	limit         int
+	expected      string
+	preserveSpace bool
+}{
+	{"empty string", "", 0, "", true},
+	{"passthrough", "foobar\n ", 0, "foobar\n ", true},
+	{"pass", "foo", 4, "foo", true},
+	{"simple", "foobarfoo", 4, "foob\narfo\no", true},
+	{"lf", "f\no\nobar", 3, "f\no\noba\nr", true},
+	{"lf_space", "foo bar\n  baz", 3, "foo\n ba\nr\n  b\naz", true},
+	{"tab", "foo\tbar", 3, "foo\n\tba\nr", true},
+	{"unicode_space", "foo\xc2\xa0bar", 3, "foo\nbar", false},
+	{"style_nochange", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7, "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", true},
+	{"style", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", 3, "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mju\nst \nano\nthe\nr t\nest\x1B[38;2;249;38;114m\n)\x1B[0m", true},
+	{"style_lf", "I really \x1B[38;2;249;38;114mlove\x1B[0m Go!", 8, "I really\n\x1b[38;2;249;38;114mlove\x1b[0m Go!", false},
+	{"style_emoji", "I really \x1B[38;2;249;38;114mlove u🫧\x1B[0m", 8, "I really\n\x1b[38;2;249;38;114mlove u🫧\x1b[0m", false},
+	{"hyperlink", "I really \x1B]8;;https://example.com/\x1B\\love\x1B]8;;\x1B\\ Go!", 10, "I really \x1b]8;;https://example.com/\x1b\\l\nove\x1b]8;;\x1b\\ Go!", false},
+	{"dcs", "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foobar", 3, "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foo\nbar", false},
+	{"begin_with_space", " foo", 4, " foo", false},
+	{"style_dont_affect_wrap", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7, "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", false},
+	{"preserve_style", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", 3, "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mju\nst \nano\nthe\nr t\nest\x1B[38;2;249;38;114m\n)\x1B[0m", false},
+	{"emoji", "foo🫧foobar", 4, "foo\n🫧fo\nobar", false},
+	{"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\สวัสดีสวัสดี\x1b]8;;\x1b\\", 8, "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\", false},
+}
+
+func TestWrap(t *testing.T) {
+	for i, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := ansi.Wrap(tt.input, tt.limit, tt.preserveSpace); got != tt.expected {
+				t.Errorf("case %d, expected %q, got %q", i+1, tt.expected, got)
+			}
+		})
+	}
+}
+
+var wwCases = []struct {
+	name        string
+	input       string
+	limit       int
+	breakPoints string
+	expected    string
+}{
+	{"empty string", "", 0, "", ""},
+	{"passthrough", "foobar\n ", 0, "", "foobar\n "},
+	{"pass", "foo", 3, "", "foo"},
+	{"toolong", "foobarfoo", 4, "", "foobarfoo"},
+	{"white space", "foo bar foo", 4, "", "foo\nbar\nfoo"},
+	{"broken_at_spaces", "foo bars foobars", 4, "", "foo\nbars\nfoobars"},
+	{"hyphen", "foo-foobar", 4, "-", "foo-\nfoobar"},
+	{"emoji_breakpoint", "foo😃 foobar", 4, "😃", "foo😃\nfoobar"},
+	{"wide_emoji_breakpoint", "foo🫧 foobar", 4, "🫧", "foo🫧\nfoobar"},
+	{"space_breakpoint", "foo --bar", 9, "-", "foo --bar"},
+	{"simple", "foo bars foobars", 4, "", "foo\nbars\nfoobars"},
+	{"limit", "foo bar", 5, "", "foo\nbar"},
+	{"remove white spaces", "foo    \nb   ar   ", 4, "", "foo\nb\nar"},
+	{"white space trail width", "foo\nb\t a\n bar", 4, "", "foo\nb\t a\n bar"},
+	{"explicit_line_break", "foo bar foo\n", 4, "", "foo\nbar\nfoo\n"},
+	{"explicit_breaks", "\nfoo bar\n\n\nfoo\n", 4, "", "\nfoo\nbar\n\n\nfoo\n"},
+	{"example", " This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* foo  \nbar    ", 6, "", " This\nis a\nlist: \n\n\t* foo\n\t* bar\n\n\n\t* foo\nbar"},
+	{"style_code_dont_affect_length", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7, "", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m"},
+	{"style_code_dont_get_wrapped", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", 3, "", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust\nanother\ntest\x1B[38;2;249;38;114m)\x1B[0m"},
+	{"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\ สวัสดีสวัสดี\x1b]8;;\x1b\\", 8, "", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\"},
+}
+
+func TestWordwrap(t *testing.T) {
+	for i, tt := range wwCases {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := ansi.Wordwrap(tt.input, tt.limit, tt.breakPoints); got != tt.expected {
+				t.Errorf("case %d, expected %q, got %q", i+1, tt.expected, got)
+			}
+		})
+	}
+}
+
+func TestWrapWordwrap(t *testing.T) {
+	t.Skip("WIP")
+	input := "the quick brown foxxxxxxxxxxxxxxxx jumped over the lazy dog."
+	limit := 16
+	output := ansi.Wordwrap(input, limit, "")
+	t.Logf("output: %q", output)
+	output = ansi.Wrap(output, limit, false)
+	if output != "the quick brown\nfoxxxxxxxxxxxxx\nxxxx jumped over\nthe lazy dog." {
+		t.Errorf("expected %q, got %q", "the quick brown\nfoxxxxxxxxxxxxxx\nxx jumped over\nthe lazy dog.", output)
+	}
+}
+
+const _ = `
+ the quick brown
+ foxxxxxxxxxxxxxxxx
+ jumped over the
+ lazy dog.
+`
+
+const _ = `
+ the quick brown
+ foxxxxxxxxxxxxxx
+ xx jumped over t
+ he lazy dog.
+`