|
|
|
@ -156,14 +156,14 @@ func Tokenize(text string, delimiter Delimiter) []Token {
|
|
|
|
|
// FIXME performance
|
|
|
|
|
var tokens []string
|
|
|
|
|
if delimiter.regex != nil {
|
|
|
|
|
for len(text) > 0 {
|
|
|
|
|
loc := delimiter.regex.FindStringIndex(text)
|
|
|
|
|
if len(loc) < 2 {
|
|
|
|
|
loc = []int{0, len(text)}
|
|
|
|
|
}
|
|
|
|
|
last := util.Max(loc[1], 1)
|
|
|
|
|
tokens = append(tokens, text[:last])
|
|
|
|
|
text = text[last:]
|
|
|
|
|
locs := delimiter.regex.FindAllStringIndex(text, -1)
|
|
|
|
|
begin := 0
|
|
|
|
|
for _, loc := range locs {
|
|
|
|
|
tokens = append(tokens, text[begin:loc[1]])
|
|
|
|
|
begin = loc[1]
|
|
|
|
|
}
|
|
|
|
|
if begin < len(text) {
|
|
|
|
|
tokens = append(tokens, text[begin:])
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return withPrefixLengths(tokens, 0)
|
|
|
|
|