From 41b3511ad9a885504a41eb4292fbceeba86e56f0 Mon Sep 17 00:00:00 2001 From: Junegunn Choi Date: Mon, 1 Apr 2024 23:38:46 +0900 Subject: [PATCH] Improve ingestion performance (by around 20%) --- BUILD.md | 2 +- CHANGELOG.md | 1 + go.mod | 2 +- src/constants.go | 1 + src/core.go | 13 ++++--- src/reader.go | 89 +++++++++++++++++++++++++++++++++++++++--------- 6 files changed, 85 insertions(+), 23 deletions(-) diff --git a/BUILD.md b/BUILD.md index 18630e14..eabffadb 100644 --- a/BUILD.md +++ b/BUILD.md @@ -6,7 +6,7 @@ Build instructions ### Prerequisites -- Go 1.18 or above +- Go 1.20 or above ### Using Makefile diff --git a/CHANGELOG.md b/CHANGELOG.md index c90242fb..2dc315ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ CHANGELOG 0.49.0 ------ +- Ingestion performance improved by around 20% - Added two environment variables exported to the child processes - `FZF_PREVIEW_LABEL` - `FZF_BORDER_LABEL` diff --git a/go.mod b/go.mod index 851d24c0..7cdfa80f 100644 --- a/go.mod +++ b/go.mod @@ -17,4 +17,4 @@ require ( golang.org/x/text v0.14.0 // indirect ) -go 1.17 +go 1.20 diff --git a/src/constants.go b/src/constants.go index f5f8a939..faf6a0e5 100644 --- a/src/constants.go +++ b/src/constants.go @@ -14,6 +14,7 @@ const ( // Reader readerBufferSize = 64 * 1024 + readerSlabSize = 128 * 1024 readerPollIntervalMin = 10 * time.Millisecond readerPollIntervalStep = 5 * time.Millisecond readerPollIntervalMax = 50 * time.Millisecond diff --git a/src/core.go b/src/core.go index f6c5a50d..1006109d 100644 --- a/src/core.go +++ b/src/core.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "time" + "unsafe" "github.com/junegunn/fzf/src/util" ) @@ -18,6 +19,10 @@ Matcher -> EvtSearchFin -> Terminal (update list) Matcher -> EvtHeader -> Terminal (update header) */ +func ustring(data []byte) string { + return unsafe.String(unsafe.SliceData(data), len(data)) +} + // Run starts fzf func Run(opts *Options, version string, revision string) { sort := opts.Sort > 0 @@ -45,7 +50,7 @@ func Run(opts *Options, version string, revision string) { if opts.Theme.Colored { ansiProcessor = func(data []byte) (util.Chars, *[]ansiOffset) { prevLineAnsiState = lineAnsiState - trimmed, offsets, newState := extractColor(string(data), lineAnsiState, nil) + trimmed, offsets, newState := extractColor(ustring(data), lineAnsiState, nil) lineAnsiState = newState return util.ToChars([]byte(trimmed)), offsets } @@ -53,7 +58,7 @@ func Run(opts *Options, version string, revision string) { // When color is disabled but ansi option is given, // we simply strip out ANSI codes from the input ansiProcessor = func(data []byte) (util.Chars, *[]ansiOffset) { - trimmed, _, _ := extractColor(string(data), nil, nil) + trimmed, _, _ := extractColor(ustring(data), nil, nil) return util.ToChars([]byte(trimmed)), nil } } @@ -66,7 +71,7 @@ func Run(opts *Options, version string, revision string) { if len(opts.WithNth) == 0 { chunkList = NewChunkList(func(item *Item, data []byte) bool { if len(header) < opts.HeaderLines { - header = append(header, string(data)) + header = append(header, ustring(data)) eventBox.Set(EvtHeader, header) return false } @@ -77,7 +82,7 @@ func Run(opts *Options, version string, revision string) { }) } else { chunkList = NewChunkList(func(item *Item, data []byte) bool { - tokens := Tokenize(string(data), opts.Delimiter) + tokens := Tokenize(ustring(data), opts.Delimiter) if opts.Ansi && opts.Theme.Colored && len(tokens) > 1 { var ansiState *ansiState if prevLineAnsiState != nil { diff --git a/src/reader.go b/src/reader.go index 47102ec1..3ebe3521 100644 --- a/src/reader.go +++ b/src/reader.go @@ -7,6 +7,7 @@ import ( "os" "os/exec" "path/filepath" + "strconv" "sync" "sync/atomic" "time" @@ -111,31 +112,85 @@ func (r *Reader) ReadSource(root string, opts walkerOpts, ignores []string) { } func (r *Reader) feed(src io.Reader) { + readerSlabSize, ae := strconv.Atoi(os.Getenv("SLAB_KB")) + if ae != nil { + readerSlabSize = 128 * 1024 + } else { + readerSlabSize *= 1024 + } + readerBufferSize, be := strconv.Atoi(os.Getenv("BUF_KB")) + if be != nil { + readerBufferSize = 64 * 1024 + } else { + readerBufferSize *= 1024 + } + + slab := make([]byte, readerSlabSize) + pointer := 0 delim := byte('\n') if r.delimNil { delim = '\000' } reader := bufio.NewReaderSize(src, readerBufferSize) + + // We do not put a slice longer than 10% of the slab to reduce fragmentation + maxBytes := readerBufferSize / 10 + for { - // ReadBytes returns err != nil if and only if the returned data does not - // end in delim. - bytea, err := reader.ReadBytes(delim) - byteaLen := len(bytea) - if byteaLen > 0 { - if err == nil { - // get rid of carriage return if under Windows: - if util.IsWindows() && byteaLen >= 2 && bytea[byteaLen-2] == byte('\r') { - bytea = bytea[:byteaLen-2] - } else { - bytea = bytea[:byteaLen-1] + var frags [][]byte + fragsLen := 0 + for { + bytea, err := reader.ReadSlice(delim) + if err == bufio.ErrBufferFull { + // Could not find the delimiter in the reader buffer. + // Need to collect the fragments and merge them later. + frags = append(frags, bytea) + fragsLen += len(bytea) + } else { + byteaLen := len(bytea) + if err == nil { + // No errors. Found the delimiter. + if util.IsWindows() && byteaLen >= 2 && bytea[byteaLen-2] == byte('\r') { + bytea = bytea[:byteaLen-2] + byteaLen -= 2 + } else { + bytea = bytea[:byteaLen-1] + byteaLen-- + } } + + itemLen := fragsLen + byteaLen + pointer += itemLen + var slice []byte + if itemLen <= maxBytes { // We can use the slab + // Allocate a new slab if it doesn't fit + if pointer > readerSlabSize { + slab = make([]byte, readerSlabSize) + pointer = itemLen + } + slice = slab[pointer-itemLen : pointer] + } else { // We can't use the slab because the item is too large + slice = make([]byte, itemLen) + } + + if len(frags) > 0 { + // Collect the fragments + n := 0 + for _, frag := range frags { + n += copy(slice[n:], frag) + } + copy(slice[n:], bytea) + } else if byteaLen > 0 { + copy(slice, bytea) + } + if (err == nil || itemLen > 0) && r.pusher(slice) { + atomic.StoreInt32(&r.event, int32(EvtReadNew)) + } + if err != nil { + return + } + break } - if r.pusher(bytea) { - atomic.StoreInt32(&r.event, int32(EvtReadNew)) - } - } - if err != nil { - break } } }