From f94dd9b68d1f31b2561e560e0e597a1645c6283c Mon Sep 17 00:00:00 2001 From: rsteube Date: Sun, 3 Dec 2023 11:26:06 +0100 Subject: [PATCH] tmp --- shlex.go | 5 +++-- shlex_test.go | 42 +++++++++++++++++++++--------------------- tokenslice.go | 5 +---- 3 files changed, 25 insertions(+), 27 deletions(-) diff --git a/shlex.go b/shlex.go index fc75d01..c74807c 100644 --- a/shlex.go +++ b/shlex.go @@ -62,7 +62,8 @@ func (t *Token) Equal(other *Token) bool { t.RawValue != other.RawValue, t.Index != other.Index, t.State != other.State, - t.WordbreakType != other.WordbreakType: + t.WordbreakType != other.WordbreakType, + t.WordbreakIndex != other.WordbreakIndex: return false default: return true @@ -231,7 +232,7 @@ func newTokenizer(r io.Reader) *tokenizer { func (t *tokenizer) scanStream() (*Token, error) { previousState := t.state t.state = START_STATE - token := &Token{WordbreakIndex: -1} + token := &Token{} var nextRune rune var nextRuneType runeTokenClass var err error diff --git a/shlex_test.go b/shlex_test.go index 168571e..43acd84 100644 --- a/shlex_test.go +++ b/shlex_test.go @@ -50,27 +50,27 @@ func init() { func TestTokenizer(t *testing.T) { testInput := strings.NewReader(testString) expectedTokens := []*Token{ - {WORD_TOKEN, "one", "one", 0, IN_WORD_STATE, WORDBREAK_UNKNOWN, -1}, - {WORD_TOKEN, "two", "two", 4, IN_WORD_STATE, WORDBREAK_UNKNOWN, -1}, - {WORD_TOKEN, "three four", "\"three four\"", 8, IN_WORD_STATE, WORDBREAK_UNKNOWN, -1}, - {WORD_TOKEN, "five \"six\"", "\"five \\\"six\\\"\"", 21, IN_WORD_STATE, WORDBREAK_UNKNOWN, -1}, - {WORD_TOKEN, "seven#eight", "seven#eight", 36, IN_WORD_STATE, WORDBREAK_UNKNOWN, -1}, - {COMMENT_TOKEN, " nine # ten", "# nine # ten", 48, START_STATE, WORDBREAK_UNKNOWN, -1}, - {WORD_TOKEN, "eleven", "eleven", 62, IN_WORD_STATE, WORDBREAK_UNKNOWN, -1}, - {WORD_TOKEN, "twelve\\", "'twelve\\'", 69, IN_WORD_STATE, WORDBREAK_UNKNOWN, -1}, - {WORD_TOKEN, "thirteen", "thirteen", 79, IN_WORD_STATE, WORDBREAK_UNKNOWN, -1}, - {WORDBREAK_TOKEN, "=", "=", 87, WORDBREAK_STATE, WORDBREAK_UNKNOWN, -1}, - {WORD_TOKEN, "13", "13", 88, IN_WORD_STATE, WORDBREAK_UNKNOWN, -1}, - {WORD_TOKEN, "fourteen/14", "fourteen/14", 91, IN_WORD_STATE, WORDBREAK_UNKNOWN, -1}, - {WORDBREAK_TOKEN, "|", "|", 103, WORDBREAK_STATE, WORDBREAK_PIPE, -1}, - {WORDBREAK_TOKEN, "||", "||", 105, WORDBREAK_STATE, WORDBREAK_LIST_OR, -1}, - {WORDBREAK_TOKEN, "|", "|", 108, WORDBREAK_STATE, WORDBREAK_PIPE, -1}, - {WORD_TOKEN, "after", "after", 109, IN_WORD_STATE, WORDBREAK_UNKNOWN, -1}, - {WORD_TOKEN, "before", "before", 115, IN_WORD_STATE, WORDBREAK_UNKNOWN, -1}, - {WORDBREAK_TOKEN, "|", "|", 121, WORDBREAK_STATE, WORDBREAK_PIPE, -1}, - {WORDBREAK_TOKEN, "&", "&", 123, WORDBREAK_STATE, WORDBREAK_LIST_ASYNC, -1}, - {WORDBREAK_TOKEN, ";", ";", 125, WORDBREAK_STATE, WORDBREAK_LIST_SEQUENTIAL, -1}, - {WORD_TOKEN, "", "", 126, START_STATE, WORDBREAK_UNKNOWN, -1}, + {WORD_TOKEN, "one", "one", 0, IN_WORD_STATE, WORDBREAK_UNKNOWN, 0}, + {WORD_TOKEN, "two", "two", 4, IN_WORD_STATE, WORDBREAK_UNKNOWN, 0}, + {WORD_TOKEN, "three four", "\"three four\"", 8, IN_WORD_STATE, WORDBREAK_UNKNOWN, 0}, + {WORD_TOKEN, "five \"six\"", "\"five \\\"six\\\"\"", 21, IN_WORD_STATE, WORDBREAK_UNKNOWN, 0}, + {WORD_TOKEN, "seven#eight", "seven#eight", 36, IN_WORD_STATE, WORDBREAK_UNKNOWN, 0}, + {COMMENT_TOKEN, " nine # ten", "# nine # ten", 48, START_STATE, WORDBREAK_UNKNOWN, 0}, + {WORD_TOKEN, "eleven", "eleven", 62, IN_WORD_STATE, WORDBREAK_UNKNOWN, 0}, + {WORD_TOKEN, "twelve\\", "'twelve\\'", 69, IN_WORD_STATE, WORDBREAK_UNKNOWN, 0}, + {WORD_TOKEN, "thirteen", "thirteen", 79, IN_WORD_STATE, WORDBREAK_UNKNOWN, 0}, + {WORDBREAK_TOKEN, "=", "=", 87, WORDBREAK_STATE, WORDBREAK_UNKNOWN, 0}, + {WORD_TOKEN, "13", "13", 88, IN_WORD_STATE, WORDBREAK_UNKNOWN, 0}, + {WORD_TOKEN, "fourteen/14", "fourteen/14", 91, IN_WORD_STATE, WORDBREAK_UNKNOWN, 0}, + {WORDBREAK_TOKEN, "|", "|", 103, WORDBREAK_STATE, WORDBREAK_PIPE, 0}, + {WORDBREAK_TOKEN, "||", "||", 105, WORDBREAK_STATE, WORDBREAK_LIST_OR, 0}, + {WORDBREAK_TOKEN, "|", "|", 108, WORDBREAK_STATE, WORDBREAK_PIPE, 0}, + {WORD_TOKEN, "after", "after", 109, IN_WORD_STATE, WORDBREAK_UNKNOWN, 0}, + {WORD_TOKEN, "before", "before", 115, IN_WORD_STATE, WORDBREAK_UNKNOWN, 0}, + {WORDBREAK_TOKEN, "|", "|", 121, WORDBREAK_STATE, WORDBREAK_PIPE, 0}, + {WORDBREAK_TOKEN, "&", "&", 123, WORDBREAK_STATE, WORDBREAK_LIST_ASYNC, 0}, + {WORDBREAK_TOKEN, ";", ";", 125, WORDBREAK_STATE, WORDBREAK_LIST_SEQUENTIAL, 0}, + {WORD_TOKEN, "", "", 126, START_STATE, WORDBREAK_UNKNOWN, 0}, } tokenizer := newTokenizer(testInput) diff --git a/tokenslice.go b/tokenslice.go index 6dc73d3..dc3c552 100644 --- a/tokenslice.go +++ b/tokenslice.go @@ -2,7 +2,6 @@ package shlex import ( "strconv" - "strings" ) type TokenSlice []Token @@ -102,9 +101,7 @@ func (t TokenSlice) WordbreakPrefix() string { case QUOTING_STATE, QUOTING_ESCAPING_STATE, ESCAPING_QUOTED_STATE: found = true // TODO add value up to last opening quote to prefix - if index := strings.LastIndexAny(last.RawValue, `"'`); index > -1 { // TODO index needs to be stored during scanning - prefix = last.RawValue[:index] // TODO test - this is wrong (needs to be value up to rawvalue index -> just rescan the substring) - } + prefix = last.RawValue[:last.WordbreakIndex-last.Index-1] // TODO test - this is wrong (needs to be value up to rawvalue index -> just rescan the substring) } for i := len(t) - 2; i >= 0; i-- {