From 5234c3759adfb68ec7ec5232347ae0d9b4e91243 Mon Sep 17 00:00:00 2001 From: Junegunn Choi Date: Tue, 2 Apr 2024 01:02:49 +0900 Subject: [PATCH] Improve ingestion performance (by around 40%) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary fzf --sync --bind load:accept < 27M-lines ran 1.16 ± 0.01 times faster than fzf-41b3511 --sync --bind load:accept < 27M-lines 1.44 ± 0.01 times faster than fzf-0.48.1 --sync --bind load:accept < 27M-lines --- CHANGELOG.md | 2 +- src/reader.go | 129 ++++++++++++++++++++++++-------------------------- 2 files changed, 63 insertions(+), 68 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2dc315ce..d5f66a62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ CHANGELOG 0.49.0 ------ -- Ingestion performance improved by around 20% +- Ingestion performance improved by around 40% - Added two environment variables exported to the child processes - `FZF_PREVIEW_LABEL` - `FZF_BORDER_LABEL` diff --git a/src/reader.go b/src/reader.go index 3ebe3521..85a988bb 100644 --- a/src/reader.go +++ b/src/reader.go @@ -1,13 +1,12 @@ package fzf import ( - "bufio" + "bytes" "context" "io" "os" "os/exec" "path/filepath" - "strconv" "sync" "sync/atomic" "time" @@ -112,86 +111,82 @@ func (r *Reader) ReadSource(root string, opts walkerOpts, ignores []string) { } func (r *Reader) feed(src io.Reader) { - readerSlabSize, ae := strconv.Atoi(os.Getenv("SLAB_KB")) - if ae != nil { - readerSlabSize = 128 * 1024 - } else { - readerSlabSize *= 1024 - } - readerBufferSize, be := strconv.Atoi(os.Getenv("BUF_KB")) - if be != nil { - readerBufferSize = 64 * 1024 - } else { - readerBufferSize *= 1024 - } + /* + readerSlabSize, ae := strconv.Atoi(os.Getenv("SLAB_KB")) + if ae != nil { + readerSlabSize = 128 * 1024 + } else { + readerSlabSize *= 1024 + } + readerBufferSize, be := strconv.Atoi(os.Getenv("BUF_KB")) + if be != nil { + readerBufferSize = 64 * 1024 + } else { + readerBufferSize *= 1024 + } + */ - slab := make([]byte, readerSlabSize) - pointer := 0 delim := byte('\n') if r.delimNil { delim = '\000' } - reader := bufio.NewReaderSize(src, readerBufferSize) - - // We do not put a slice longer than 10% of the slab to reduce fragmentation - maxBytes := readerBufferSize / 10 + slab := make([]byte, readerSlabSize) + leftover := []byte{} + var err error for { - var frags [][]byte - fragsLen := 0 - for { - bytea, err := reader.ReadSlice(delim) - if err == bufio.ErrBufferFull { - // Could not find the delimiter in the reader buffer. - // Need to collect the fragments and merge them later. - frags = append(frags, bytea) - fragsLen += len(bytea) - } else { - byteaLen := len(bytea) - if err == nil { - // No errors. Found the delimiter. - if util.IsWindows() && byteaLen >= 2 && bytea[byteaLen-2] == byte('\r') { - bytea = bytea[:byteaLen-2] - byteaLen -= 2 - } else { - bytea = bytea[:byteaLen-1] - byteaLen-- - } - } + n := 0 + scope := slab[:util.Min(len(slab), readerBufferSize)] + for i := 0; i < 100; i++ { + n, err = src.Read(scope) + if n > 0 || err != nil { + break + } + } - itemLen := fragsLen + byteaLen - pointer += itemLen - var slice []byte - if itemLen <= maxBytes { // We can use the slab - // Allocate a new slab if it doesn't fit - if pointer > readerSlabSize { - slab = make([]byte, readerSlabSize) - pointer = itemLen - } - slice = slab[pointer-itemLen : pointer] - } else { // We can't use the slab because the item is too large - slice = make([]byte, itemLen) - } + // We're not making any progress after 100 tries. Stop. + if n == 0 && err == nil { + break + } - if len(frags) > 0 { - // Collect the fragments - n := 0 - for _, frag := range frags { - n += copy(slice[n:], frag) - } - copy(slice[n:], bytea) - } else if byteaLen > 0 { - copy(slice, bytea) + buf := slab[:n] + slab = slab[n:] + + for len(buf) > 0 { + if i := bytes.IndexByte(buf, delim); i >= 0 { + // Found the delimiter + slice := buf[:i+1] + buf = buf[i+1:] + if util.IsWindows() && len(slice) >= 2 && slice[len(slice)-2] == byte('\r') { + slice = slice[:len(slice)-2] + } else { + slice = slice[:len(slice)-1] } - if (err == nil || itemLen > 0) && r.pusher(slice) { - atomic.StoreInt32(&r.event, int32(EvtReadNew)) + if len(leftover) > 0 { + slice = append(leftover, slice...) + leftover = []byte{} } - if err != nil { - return + if (err == nil || len(slice) > 0) && r.pusher(slice) { + atomic.StoreInt32(&r.event, int32(EvtReadNew)) } + } else { + // Could not find the delimiter in the buffer + leftover = append(leftover, buf...) break } } + + if err == io.EOF { + leftover = append(leftover, buf...) + break + } + + if len(slab) == 0 { + slab = make([]byte, readerSlabSize) + } + } + if len(leftover) > 0 && r.pusher(leftover) { + atomic.StoreInt32(&r.event, int32(EvtReadNew)) } }