-
-
Notifications
You must be signed in to change notification settings - Fork 401
/
delegate.go
152 lines (139 loc) · 3.55 KB
/
delegate.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
package chroma
import (
"bytes"
)
type delegatingLexer struct {
root Lexer
language Lexer
}
// DelegatingLexer combines two lexers to handle the common case of a language embedded inside another, such as PHP
// inside HTML or PHP inside plain text.
//
// It takes two lexer as arguments: a root lexer and a language lexer. First everything is scanned using the language
// lexer, which must return "Other" for unrecognised tokens. Then all "Other" tokens are lexed using the root lexer.
// Finally, these two sets of tokens are merged.
//
// The lexers from the template lexer package use this base lexer.
func DelegatingLexer(root Lexer, language Lexer) Lexer {
return &delegatingLexer{
root: root,
language: language,
}
}
func (d *delegatingLexer) AnalyseText(text string) float32 {
return d.root.AnalyseText(text)
}
func (d *delegatingLexer) SetAnalyser(analyser func(text string) float32) Lexer {
d.root.SetAnalyser(analyser)
return d
}
func (d *delegatingLexer) SetRegistry(r *LexerRegistry) Lexer {
d.root.SetRegistry(r)
d.language.SetRegistry(r)
return d
}
func (d *delegatingLexer) Config() *Config {
return d.language.Config()
}
// An insertion is the character range where language tokens should be inserted.
type insertion struct {
start, end int
tokens []Token
}
func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) { // nolint: gocognit
tokens, err := Tokenise(Coalesce(d.language), options, text)
if err != nil {
return nil, err
}
// Compute insertions and gather "Other" tokens.
others := &bytes.Buffer{}
insertions := []*insertion{}
var insert *insertion
offset := 0
var last Token
for _, t := range tokens {
if t.Type == Other {
if last != EOF && insert != nil && last.Type != Other {
insert.end = offset
}
others.WriteString(t.Value)
} else {
if last == EOF || last.Type == Other {
insert = &insertion{start: offset}
insertions = append(insertions, insert)
}
insert.tokens = append(insert.tokens, t)
}
last = t
offset += len(t.Value)
}
if len(insertions) == 0 {
return d.root.Tokenise(options, text)
}
// Lex the other tokens.
rootTokens, err := Tokenise(Coalesce(d.root), options, others.String())
if err != nil {
return nil, err
}
// Interleave the two sets of tokens.
var out []Token
offset = 0 // Offset into text.
tokenIndex := 0
nextToken := func() Token {
if tokenIndex >= len(rootTokens) {
return EOF
}
t := rootTokens[tokenIndex]
tokenIndex++
return t
}
insertionIndex := 0
nextInsertion := func() *insertion {
if insertionIndex >= len(insertions) {
return nil
}
i := insertions[insertionIndex]
insertionIndex++
return i
}
t := nextToken()
i := nextInsertion()
for t != EOF || i != nil {
// fmt.Printf("%d->%d:%q %d->%d:%q\n", offset, offset+len(t.Value), t.Value, i.start, i.end, Stringify(i.tokens...))
if t == EOF || (i != nil && i.start < offset+len(t.Value)) {
var l Token
l, t = splitToken(t, i.start-offset)
if l != EOF {
out = append(out, l)
offset += len(l.Value)
}
out = append(out, i.tokens...)
offset += i.end - i.start
if t == EOF {
t = nextToken()
}
i = nextInsertion()
} else {
out = append(out, t)
offset += len(t.Value)
t = nextToken()
}
}
return Literator(out...), nil
}
func splitToken(t Token, offset int) (l Token, r Token) {
if t == EOF {
return EOF, EOF
}
if offset == 0 {
return EOF, t
}
if offset == len(t.Value) {
return t, EOF
}
l = t.Clone()
r = t.Clone()
l.Value = l.Value[:offset]
r.Value = r.Value[offset:]
return
}