Avoid unconditionally storsing input as runes

When --with-nth is used, fzf used to preprocess each line and store the
result as rune array, which was wasteful if the line only contains ascii
characters.
pull/967/merge
Junegunn Choi 7 years ago
parent bc9d2abdb6
commit c9f16b6430
No known key found for this signature in database
GPG Key ID: 254BC280FEF9C627

@ -63,9 +63,6 @@ func Run(opts *Options, revision string) {
ansiProcessor := func(data []byte) (util.Chars, *[]ansiOffset) { ansiProcessor := func(data []byte) (util.Chars, *[]ansiOffset) {
return util.ToChars(data), nil return util.ToChars(data), nil
} }
ansiProcessorRunes := func(data []rune) (util.Chars, *[]ansiOffset) {
return util.RunesToChars(data), nil
}
if opts.Ansi { if opts.Ansi {
if opts.Theme != nil { if opts.Theme != nil {
var state *ansiState var state *ansiState
@ -82,9 +79,6 @@ func Run(opts *Options, revision string) {
return util.RunesToChars([]rune(trimmed)), nil return util.RunesToChars([]rune(trimmed)), nil
} }
} }
ansiProcessorRunes = func(data []rune) (util.Chars, *[]ansiOffset) {
return ansiProcessor([]byte(string(data)))
}
} }
// Chunk list // Chunk list
@ -103,15 +97,15 @@ func Run(opts *Options, revision string) {
}) })
} else { } else {
chunkList = NewChunkList(func(data []byte, index int) Item { chunkList = NewChunkList(func(data []byte, index int) Item {
tokens := Tokenize(util.ToChars(data), opts.Delimiter) tokens := Tokenize(string(data), opts.Delimiter)
trans := Transform(tokens, opts.WithNth) trans := Transform(tokens, opts.WithNth)
transformed := joinTokens(trans)
if len(header) < opts.HeaderLines { if len(header) < opts.HeaderLines {
header = append(header, string(joinTokens(trans))) header = append(header, transformed)
eventBox.Set(EvtHeader, header) eventBox.Set(EvtHeader, header)
return nilItem return nilItem
} }
textRunes := joinTokens(trans) trimmed, colors := ansiProcessor([]byte(transformed))
trimmed, colors := ansiProcessorRunes(textRunes)
trimmed.Index = int32(index) trimmed.Index = int32(index)
return Item{text: trimmed, colors: colors, origText: &data} return Item{text: trimmed, colors: colors, origText: &data}
}) })

@ -6,7 +6,6 @@ import (
"testing" "testing"
"github.com/junegunn/fzf/src/tui" "github.com/junegunn/fzf/src/tui"
"github.com/junegunn/fzf/src/util"
) )
func TestDelimiterRegex(t *testing.T) { func TestDelimiterRegex(t *testing.T) {
@ -44,7 +43,7 @@ func TestDelimiterRegex(t *testing.T) {
func TestDelimiterRegexString(t *testing.T) { func TestDelimiterRegexString(t *testing.T) {
delim := delimiterRegexp("*") delim := delimiterRegexp("*")
tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim) tokens := Tokenize("-*--*---**---", delim)
if delim.regex != nil || if delim.regex != nil ||
tokens[0].text.ToString() != "-*" || tokens[0].text.ToString() != "-*" ||
tokens[1].text.ToString() != "--*" || tokens[1].text.ToString() != "--*" ||
@ -57,7 +56,7 @@ func TestDelimiterRegexString(t *testing.T) {
func TestDelimiterRegexRegex(t *testing.T) { func TestDelimiterRegexRegex(t *testing.T) {
delim := delimiterRegexp("--\\*") delim := delimiterRegexp("--\\*")
tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim) tokens := Tokenize("-*--*---**---", delim)
if delim.str != nil || if delim.str != nil ||
tokens[0].text.ToString() != "-*--*" || tokens[0].text.ToString() != "-*--*" ||
tokens[1].text.ToString() != "---*" || tokens[1].text.ToString() != "---*" ||

@ -362,7 +362,7 @@ func (p *Pattern) prepareInput(item *Item) []Token {
return *item.transformed return *item.transformed
} }
tokens := Tokenize(item.text, p.delimiter) tokens := Tokenize(item.text.ToString(), p.delimiter)
ret := Transform(tokens, p.nth) ret := Transform(tokens, p.nth)
item.transformed = &ret item.transformed = &ret
return ret return ret

@ -133,7 +133,7 @@ func TestCaseSensitivity(t *testing.T) {
func TestOrigTextAndTransformed(t *testing.T) { func TestOrigTextAndTransformed(t *testing.T) {
pattern := BuildPattern(true, algo.FuzzyMatchV2, true, CaseSmart, false, true, true, []Range{}, Delimiter{}, []rune("jg")) pattern := BuildPattern(true, algo.FuzzyMatchV2, true, CaseSmart, false, true, true, []Range{}, Delimiter{}, []rune("jg"))
tokens := Tokenize(util.RunesToChars([]rune("junegunn")), Delimiter{}) tokens := Tokenize("junegunn", Delimiter{})
trans := Transform(tokens, []Range{Range{1, 1}}) trans := Transform(tokens, []Range{Range{1, 1}})
origBytes := []byte("junegunn.choi") origBytes := []byte("junegunn.choi")

@ -1174,7 +1174,7 @@ func replacePlaceholder(template string, stripAnsi bool, delimiter Delimiter, fo
for idx, item := range items { for idx, item := range items {
chars := util.RunesToChars([]rune(item.AsString(stripAnsi))) chars := util.RunesToChars([]rune(item.AsString(stripAnsi)))
tokens := Tokenize(chars, delimiter) tokens := Tokenize(chars.ToString(), delimiter)
trans := Transform(tokens, ranges) trans := Transform(tokens, ranges)
str := string(joinTokens(trans)) str := string(joinTokens(trans))
if delimiter.str != nil { if delimiter.str != nil {

@ -1,6 +1,7 @@
package fzf package fzf
import ( import (
"bytes"
"regexp" "regexp"
"strconv" "strconv"
"strings" "strings"
@ -74,14 +75,14 @@ func ParseRange(str *string) (Range, bool) {
return newRange(n, n), true return newRange(n, n), true
} }
func withPrefixLengths(tokens []util.Chars, begin int) []Token { func withPrefixLengths(tokens []string, begin int) []Token {
ret := make([]Token, len(tokens)) ret := make([]Token, len(tokens))
prefixLength := begin prefixLength := begin
for idx, token := range tokens { for idx := range tokens {
// NOTE: &tokens[idx] instead of &tokens chars := util.ToChars([]byte(tokens[idx]))
ret[idx] = Token{&tokens[idx], int32(prefixLength)} ret[idx] = Token{&chars, int32(prefixLength)}
prefixLength += token.Length() prefixLength += chars.Length()
} }
return ret return ret
} }
@ -92,16 +93,15 @@ const (
awkWhite awkWhite
) )
func awkTokenizer(input util.Chars) ([]util.Chars, int) { func awkTokenizer(input string) ([]string, int) {
// 9, 32 // 9, 32
ret := []util.Chars{} ret := []string{}
prefixLength := 0 prefixLength := 0
state := awkNil state := awkNil
numChars := input.Length()
begin := 0 begin := 0
end := 0 end := 0
for idx := 0; idx < numChars; idx++ { for idx := 0; idx < len(input); idx++ {
r := input.Get(idx) r := input[idx]
white := r == 9 || r == 32 white := r == 9 || r == 32
switch state { switch state {
case awkNil: case awkNil:
@ -119,19 +119,19 @@ func awkTokenizer(input util.Chars) ([]util.Chars, int) {
if white { if white {
end = idx + 1 end = idx + 1
} else { } else {
ret = append(ret, input.Slice(begin, end)) ret = append(ret, input[begin:end])
state, begin, end = awkBlack, idx, idx+1 state, begin, end = awkBlack, idx, idx+1
} }
} }
} }
if begin < end { if begin < end {
ret = append(ret, input.Slice(begin, end)) ret = append(ret, input[begin:end])
} }
return ret, prefixLength return ret, prefixLength
} }
// Tokenize tokenizes the given string with the delimiter // Tokenize tokenizes the given string with the delimiter
func Tokenize(text util.Chars, delimiter Delimiter) []Token { func Tokenize(text string, delimiter Delimiter) []Token {
if delimiter.str == nil && delimiter.regex == nil { if delimiter.str == nil && delimiter.regex == nil {
// AWK-style (\S+\s*) // AWK-style (\S+\s*)
tokens, prefixLength := awkTokenizer(text) tokens, prefixLength := awkTokenizer(text)
@ -139,36 +139,31 @@ func Tokenize(text util.Chars, delimiter Delimiter) []Token {
} }
if delimiter.str != nil { if delimiter.str != nil {
return withPrefixLengths(text.Split(*delimiter.str), 0) return withPrefixLengths(strings.SplitAfter(text, *delimiter.str), 0)
} }
// FIXME performance // FIXME performance
var tokens []string var tokens []string
if delimiter.regex != nil { if delimiter.regex != nil {
str := text.ToString() for len(text) > 0 {
for len(str) > 0 { loc := delimiter.regex.FindStringIndex(text)
loc := delimiter.regex.FindStringIndex(str)
if loc == nil { if loc == nil {
loc = []int{0, len(str)} loc = []int{0, len(text)}
} }
last := util.Max(loc[1], 1) last := util.Max(loc[1], 1)
tokens = append(tokens, str[:last]) tokens = append(tokens, text[:last])
str = str[last:] text = text[last:]
} }
} }
asRunes := make([]util.Chars, len(tokens)) return withPrefixLengths(tokens, 0)
for i, token := range tokens {
asRunes[i] = util.RunesToChars([]rune(token))
}
return withPrefixLengths(asRunes, 0)
} }
func joinTokens(tokens []Token) []rune { func joinTokens(tokens []Token) string {
ret := []rune{} var output bytes.Buffer
for _, token := range tokens { for _, token := range tokens {
ret = append(ret, token.text.ToRunes()...) output.WriteString(token.text.ToString())
} }
return ret return output.String()
} }
// Transform is used to transform the input when --with-nth option is given // Transform is used to transform the input when --with-nth option is given
@ -181,7 +176,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
if r.begin == r.end { if r.begin == r.end {
idx := r.begin idx := r.begin
if idx == rangeEllipsis { if idx == rangeEllipsis {
chars := util.RunesToChars(joinTokens(tokens)) chars := util.ToChars([]byte(joinTokens(tokens)))
parts = append(parts, &chars) parts = append(parts, &chars)
} else { } else {
if idx < 0 { if idx < 0 {
@ -224,15 +219,15 @@ func Transform(tokens []Token, withNth []Range) []Token {
var merged util.Chars var merged util.Chars
switch len(parts) { switch len(parts) {
case 0: case 0:
merged = util.RunesToChars([]rune{}) merged = util.ToChars([]byte{})
case 1: case 1:
merged = *parts[0] merged = *parts[0]
default: default:
runes := []rune{} var output bytes.Buffer
for _, part := range parts { for _, part := range parts {
runes = append(runes, part.ToRunes()...) output.WriteString(part.ToString())
} }
merged = util.RunesToChars(runes) merged = util.ToChars([]byte(output.String()))
} }
var prefixLength int32 var prefixLength int32

@ -2,8 +2,6 @@ package fzf
import ( import (
"testing" "testing"
"github.com/junegunn/fzf/src/util"
) )
func TestParseRange(t *testing.T) { func TestParseRange(t *testing.T) {
@ -47,19 +45,19 @@ func TestParseRange(t *testing.T) {
func TestTokenize(t *testing.T) { func TestTokenize(t *testing.T) {
// AWK-style // AWK-style
input := " abc: def: ghi " input := " abc: def: ghi "
tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{}) tokens := Tokenize(input, Delimiter{})
if tokens[0].text.ToString() != "abc: " || tokens[0].prefixLength != 2 { if tokens[0].text.ToString() != "abc: " || tokens[0].prefixLength != 2 {
t.Errorf("%s", tokens) t.Errorf("%s", tokens)
} }
// With delimiter // With delimiter
tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":")) tokens = Tokenize(input, delimiterRegexp(":"))
if tokens[0].text.ToString() != " abc:" || tokens[0].prefixLength != 0 { if tokens[0].text.ToString() != " abc:" || tokens[0].prefixLength != 0 {
t.Errorf("%s", tokens) t.Error(tokens[0].text.ToString(), tokens[0].prefixLength)
} }
// With delimiter regex // With delimiter regex
tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp("\\s+")) tokens = Tokenize(input, delimiterRegexp("\\s+"))
if tokens[0].text.ToString() != " " || tokens[0].prefixLength != 0 || if tokens[0].text.ToString() != " " || tokens[0].prefixLength != 0 ||
tokens[1].text.ToString() != "abc: " || tokens[1].prefixLength != 2 || tokens[1].text.ToString() != "abc: " || tokens[1].prefixLength != 2 ||
tokens[2].text.ToString() != "def: " || tokens[2].prefixLength != 8 || tokens[2].text.ToString() != "def: " || tokens[2].prefixLength != 8 ||
@ -71,7 +69,7 @@ func TestTokenize(t *testing.T) {
func TestTransform(t *testing.T) { func TestTransform(t *testing.T) {
input := " abc: def: ghi: jkl" input := " abc: def: ghi: jkl"
{ {
tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{}) tokens := Tokenize(input, Delimiter{})
{ {
ranges := splitNth("1,2,3") ranges := splitNth("1,2,3")
tx := Transform(tokens, ranges) tx := Transform(tokens, ranges)
@ -93,7 +91,7 @@ func TestTransform(t *testing.T) {
} }
} }
{ {
tokens := Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":")) tokens := Tokenize(input, delimiterRegexp(":"))
{ {
ranges := splitNth("1..2,3,2..,1") ranges := splitNth("1..2,3,2..,1")
tx := Transform(tokens, ranges) tx := Transform(tokens, ranges)

@ -157,46 +157,3 @@ func (chars *Chars) CopyRunes(dest []rune) {
} }
return return
} }
func (chars *Chars) Slice(b int, e int) Chars {
if runes := chars.optionalRunes(); runes != nil {
return RunesToChars(runes[b:e])
}
return Chars{slice: chars.slice[b:e], inBytes: true}
}
func (chars *Chars) Split(delimiter string) []Chars {
delim := []rune(delimiter)
numChars := chars.Length()
numDelim := len(delim)
begin := 0
ret := make([]Chars, 0, 1)
for index := 0; index < numChars; {
if index+numDelim <= numChars {
match := true
for off, d := range delim {
if chars.Get(index+off) != d {
match = false
break
}
}
// Found the delimiter
if match {
incr := Max(numDelim, 1)
ret = append(ret, chars.Slice(begin, index+incr))
index += incr
begin = index
continue
}
} else {
// Impossible to find the delimiter in the remaining substring
break
}
index++
}
if begin < numChars || len(ret) == 0 {
ret = append(ret, chars.Slice(begin, numChars))
}
return ret
}

@ -44,29 +44,3 @@ func TestTrimLength(t *testing.T) {
check(" h o ", 5) check(" h o ", 5)
check(" ", 0) check(" ", 0)
} }
func TestSplit(t *testing.T) {
check := func(str string, delim string, tokens ...string) {
input := ToChars([]byte(str))
result := input.Split(delim)
if len(result) != len(tokens) {
t.Errorf(
"Invalid Split result for '%s': %d tokens found (expected %d): %s",
str, len(result), len(tokens), result)
}
for idx, token := range tokens {
if result[idx].ToString() != token {
t.Errorf("Invalid Split result for '%s': %s (expected %s)",
str, result[idx].ToString(), token)
}
}
}
check("abc:def::", ":", "abc:", "def:", ":")
check("abc:def::", "-", "abc:def::")
check("abc", "", "a", "b", "c")
check("abc", "a", "a", "bc")
check("abc", "ab", "ab", "c")
check("abc", "abc", "abc")
check("abc", "abcd", "abc")
check("", "abcd", "")
}

Loading…
Cancel
Save