Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(term): ansi: implement wrap and wordwrap #51

Merged
merged 2 commits into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
246 changes: 246 additions & 0 deletions exp/term/ansi/wrap.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
package ansi

import (
"bytes"
"unicode"
"unicode/utf8"

. "github.com/charmbracelet/x/exp/term/ansi/parser"
aymanbagabas marked this conversation as resolved.
Show resolved Hide resolved
"github.com/rivo/uniseg"
)

// Wrap wraps a string or a block of text to a given line length, breaking word
// boundaries. This will preserve ANSI escape codes and will account for
// wide-characters in the string.
// When preserveSpace is true, spaces at the beginning of a line will be
// preserved.
func Wrap(s string, limit int, preserveSpace bool) string {
if limit < 1 {
return s
}

var (
cluster []byte
buf bytes.Buffer
curWidth int
forceNewline bool
gstate = -1
pstate = GroundState // initial state
b = []byte(s)
i = 0
)

addNewline := func() {
buf.WriteByte('\n')
curWidth = 0
}

for i < len(b) {
state, action := Table.Transition(pstate, b[i])
// log.Printf("pstate: %s, state: %s, action: %s, code: %q", StateNames[pstate], StateNames[state], ActionNames[action], b[i])

switch action {
case CollectAction:
if w := utf8ByteLen(b[i]); w > 1 {
var width int
cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate)
// log.Printf("cluster: %q, width: %d, curWidth: %d, buf: %q", string(cluster), width, curWidth, b[i:])
i += len(cluster)

if curWidth+width > limit {
addNewline()
}
if !preserveSpace && curWidth == 0 && len(cluster) <= 4 {
// Skip spaces at the beginning of a line
if r, _ := utf8.DecodeRune(cluster); r != utf8.RuneError && unicode.IsSpace(r) {
pstate = GroundState
continue
}
}
buf.Write(cluster)
curWidth += width
gstate = -1 // reset grapheme state otherwise, width calculation might be off

pstate = GroundState
continue
} else {
// Collect sequence intermediate bytes
buf.WriteByte(b[i])
}
case PrintAction, ExecuteAction:
if b[i] == '\n' {
addNewline()
forceNewline = false
break
}

if curWidth+1 > limit {
addNewline()
forceNewline = true
}

// Skip spaces at the beginning of a line
if curWidth == 0 {
if !preserveSpace && forceNewline && unicode.IsSpace(rune(b[i])) {
break
}
forceNewline = false
}

buf.WriteByte(b[i])
curWidth++
default:
buf.WriteByte(b[i])
}
// log.Printf("curWidth: %d, limit: %d", curWidth, limit)

// We manage the UTF8 state separately manually above.
if pstate != Utf8State {
pstate = state
}
i++
}

return buf.String()
}

// Wordwrap wraps a string or a block of text to a given line length, not
// breaking word boundaries. This will preserve ANSI escape codes and will
// account for wide-characters in the string.
// The breakpoints string is a list of characters that are considered
// breakpoints for word wrapping. A hyphen (-) is always considered a
// breakpoint.
func Wordwrap(s string, limit int, breakpoints string) string {
if limit < 1 {
return s
}

// Add a hyphen to the breakpoints
breakpoints += "-"

var (
cluster []byte
buf bytes.Buffer
word bytes.Buffer
space bytes.Buffer
curWidth int
wordLen int
gstate = -1
pstate = GroundState // initial state
b = []byte(s)
i = 0
aymanbagabas marked this conversation as resolved.
Show resolved Hide resolved
)

addSpace := func() {
curWidth += space.Len()
buf.Write(space.Bytes())
space.Reset()
}

addWord := func() {
if word.Len() == 0 {
return
}
addSpace()
curWidth += wordLen
buf.Write(word.Bytes())
word.Reset()
wordLen = 0
}

addNewline := func() {
buf.WriteByte('\n')
curWidth = 0
space.Reset()
}

for i < len(b) {
state, action := Table.Transition(pstate, b[i])
// log.Printf("pstate: %s, state: %s, action: %s, code: %q", StateNames[pstate], StateNames[state], ActionNames[action], b[i])
aymanbagabas marked this conversation as resolved.
Show resolved Hide resolved
// log.Printf("curWidth: %d, limit: %d", curWidth, limit)
// log.Printf("word: %q, wordLen: %d, space: %q", word.String(), wordLen, space.String())

switch action {
case CollectAction:
if w := utf8ByteLen(b[i]); w > 1 {
var width int
cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate)
// log.Printf("cluster: %q, width: %d, buf: %q", cluster, width, b[i:])
i += len(cluster)

r, _ := utf8.DecodeRune(cluster)
if r != utf8.RuneError && unicode.IsSpace(r) {
addWord()
space.WriteRune(r)
} else if bytes.ContainsAny(cluster, breakpoints) {
addSpace()
addWord()
buf.Write(cluster)
} else {
word.Write(cluster)
wordLen += width
if curWidth+space.Len()+wordLen > limit &&
wordLen < limit {
addNewline()
}
}

pstate = GroundState
continue
} else {
aymanbagabas marked this conversation as resolved.
Show resolved Hide resolved
// Collect sequence intermediate bytes
word.WriteByte(b[i])
}
case PrintAction, ExecuteAction:
r := rune(b[i])
if r == '\n' {
aymanbagabas marked this conversation as resolved.
Show resolved Hide resolved
if wordLen == 0 {
if curWidth+space.Len() > limit {
curWidth = 0
} else {
buf.Write(space.Bytes())
}
space.Reset()
}

addWord()
addNewline()
} else if unicode.IsSpace(r) {
addWord()
space.WriteByte(b[i])
} else if runeContainsAny(r, breakpoints) {
addSpace()
addWord()
buf.WriteByte(b[i])
} else {
word.WriteByte(b[i])
wordLen++
if curWidth+space.Len()+wordLen > limit &&
wordLen < limit {
addNewline()
}
}

default:
word.WriteByte(b[i])
}
// We manage the UTF8 state separately manually above.
if pstate != Utf8State {
pstate = state
}
i++
}

addWord()

return buf.String()
}

func runeContainsAny(r rune, s string) bool {
for _, c := range s {
if c == r {
return true
}
}
return false
}
110 changes: 110 additions & 0 deletions exp/term/ansi/wrap_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package ansi_test

import (
"testing"

"github.com/charmbracelet/x/exp/term/ansi"
)

var cases = []struct {
name string
input string
limit int
expected string
preserveSpace bool
}{
{"empty string", "", 0, "", true},
{"passthrough", "foobar\n ", 0, "foobar\n ", true},
{"pass", "foo", 4, "foo", true},
{"simple", "foobarfoo", 4, "foob\narfo\no", true},
{"lf", "f\no\nobar", 3, "f\no\noba\nr", true},
{"lf_space", "foo bar\n baz", 3, "foo\n ba\nr\n b\naz", true},
{"tab", "foo\tbar", 3, "foo\n\tba\nr", true},
{"unicode_space", "foo\xc2\xa0bar", 3, "foo\nbar", false},
{"style_nochange", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7, "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", true},
{"style", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", 3, "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mju\nst \nano\nthe\nr t\nest\x1B[38;2;249;38;114m\n)\x1B[0m", true},
{"style_lf", "I really \x1B[38;2;249;38;114mlove\x1B[0m Go!", 8, "I really\n\x1b[38;2;249;38;114mlove\x1b[0m Go!", false},
{"style_emoji", "I really \x1B[38;2;249;38;114mlove u🫧\x1B[0m", 8, "I really\n\x1b[38;2;249;38;114mlove u🫧\x1b[0m", false},
{"hyperlink", "I really \x1B]8;;https://example.com/\x1B\\love\x1B]8;;\x1B\\ Go!", 10, "I really \x1b]8;;https://example.com/\x1b\\l\nove\x1b]8;;\x1b\\ Go!", false},
{"dcs", "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foobar", 3, "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foo\nbar", false},
{"begin_with_space", " foo", 4, " foo", false},
{"style_dont_affect_wrap", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7, "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", false},
{"preserve_style", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", 3, "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mju\nst \nano\nthe\nr t\nest\x1B[38;2;249;38;114m\n)\x1B[0m", false},
{"emoji", "foo🫧foobar", 4, "foo\n🫧fo\nobar", false},
{"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\สวัสดีสวัสดี\x1b]8;;\x1b\\", 8, "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\", false},
}

func TestWrap(t *testing.T) {
for i, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
if got := ansi.Wrap(tt.input, tt.limit, tt.preserveSpace); got != tt.expected {
t.Errorf("case %d, expected %q, got %q", i+1, tt.expected, got)
}
})
}
}

var wwCases = []struct {
name string
input string
limit int
breakPoints string
expected string
}{
{"empty string", "", 0, "", ""},
{"passthrough", "foobar\n ", 0, "", "foobar\n "},
{"pass", "foo", 3, "", "foo"},
{"toolong", "foobarfoo", 4, "", "foobarfoo"},
{"white space", "foo bar foo", 4, "", "foo\nbar\nfoo"},
{"broken_at_spaces", "foo bars foobars", 4, "", "foo\nbars\nfoobars"},
{"hyphen", "foo-foobar", 4, "-", "foo-\nfoobar"},
{"emoji_breakpoint", "foo😃 foobar", 4, "😃", "foo😃\nfoobar"},
{"wide_emoji_breakpoint", "foo🫧 foobar", 4, "🫧", "foo🫧\nfoobar"},
{"space_breakpoint", "foo --bar", 9, "-", "foo --bar"},
{"simple", "foo bars foobars", 4, "", "foo\nbars\nfoobars"},
{"limit", "foo bar", 5, "", "foo\nbar"},
{"remove white spaces", "foo \nb ar ", 4, "", "foo\nb\nar"},
{"white space trail width", "foo\nb\t a\n bar", 4, "", "foo\nb\t a\n bar"},
{"explicit_line_break", "foo bar foo\n", 4, "", "foo\nbar\nfoo\n"},
{"explicit_breaks", "\nfoo bar\n\n\nfoo\n", 4, "", "\nfoo\nbar\n\n\nfoo\n"},
{"example", " This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* foo \nbar ", 6, "", " This\nis a\nlist: \n\n\t* foo\n\t* bar\n\n\n\t* foo\nbar"},
{"style_code_dont_affect_length", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7, "", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m"},
{"style_code_dont_get_wrapped", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", 3, "", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust\nanother\ntest\x1B[38;2;249;38;114m)\x1B[0m"},
{"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\ สวัสดีสวัสดี\x1b]8;;\x1b\\", 8, "", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\"},
}

func TestWordwrap(t *testing.T) {
for i, tt := range wwCases {
t.Run(tt.name, func(t *testing.T) {
if got := ansi.Wordwrap(tt.input, tt.limit, tt.breakPoints); got != tt.expected {
t.Errorf("case %d, expected %q, got %q", i+1, tt.expected, got)
}
})
}
}

func TestWrapWordwrap(t *testing.T) {
t.Skip("WIP")
input := "the quick brown foxxxxxxxxxxxxxxxx jumped over the lazy dog."
limit := 16
output := ansi.Wordwrap(input, limit, "")
t.Logf("output: %q", output)
output = ansi.Wrap(output, limit, false)
if output != "the quick brown\nfoxxxxxxxxxxxxx\nxxxx jumped over\nthe lazy dog." {
t.Errorf("expected %q, got %q", "the quick brown\nfoxxxxxxxxxxxxxx\nxx jumped over\nthe lazy dog.", output)
}
}

const _ = `
the quick brown
foxxxxxxxxxxxxxxxx
jumped over the
lazy dog.
`

const _ = `
the quick brown
foxxxxxxxxxxxxxx
xx jumped over t
he lazy dog.
`
Loading