From c3a7a24eeaa5b7451e652df68683b5e8b24cde6b Mon Sep 17 00:00:00 2001 From: Junegunn Choi Date: Sat, 30 Jul 2022 22:11:21 +0900 Subject: [PATCH] Tweak bonus points to word boundaries Close https://github.com/junegunn/fzf.vim/issues/1004 # jobs/latency.js is favored over job_latency.js printf 'job_latency.js\njobs/latency.js' | fzf -qlatency --- CHANGELOG.md | 17 +++++++++++ src/algo/algo.go | 69 ++++++++++++++++++++++++++++++++----------- src/algo/algo_test.go | 45 ++++++++++++++-------------- 3 files changed, 91 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 14b548d5..8a234e41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,23 @@ CHANGELOG ========= +0.32.0 +------ +- Updated the scoring algorithm + - Different bonus points to different categories of word boundaries + (listed higher to lower bonus point) + - Word after whitespace characters or beginning of the string + - Word after common delimiter characters (`/,:;|`) + - Word after other non-word characters + ````sh + # foo/bar.sh` is preferred over `foo-bar.sh` on `bar` + fzf --query bar --height 4 << EOF + foo-bar.sh + foo/bar.sh + EOF + ``` +- Bug fixes and improvements + 0.31.0 ------ - Added support for an alternative preview window layout that is activated diff --git a/src/algo/algo.go b/src/algo/algo.go index 40fb2af6..15214a68 100644 --- a/src/algo/algo.go +++ b/src/algo/algo.go @@ -89,6 +89,9 @@ import ( var DEBUG bool +const delimiterChars = "/,:;|" +const whiteChars = " \t\n\v\f\r\x85\xA0" + func indexAt(index int, max int, forward bool) int { if forward { return index @@ -117,6 +120,12 @@ const ( // in web2 dictionary and my file system. bonusBoundary = scoreMatch / 2 + // Extra bonus for word boundary after whitespace character or beginning of the string + bonusBoundaryWhite = bonusBoundary + 2 + + // Extra bonus for word boundary after slash, colon, semi-colon, and comma + bonusBoundaryDelimiter = bonusBoundary + 1 + // Although bonus point for non-word characters is non-contextual, we need it // for computing bonus points for consecutive chunks starting with a non-word // character. @@ -143,7 +152,9 @@ const ( type charClass int const ( - charNonWord charClass = iota + charWhite charClass = iota + charNonWord + charDelimiter charLower charUpper charLetter @@ -181,6 +192,10 @@ func charClassOfAscii(char rune) charClass { return charUpper } else if char >= '0' && char <= '9' { return charNumber + } else if strings.IndexRune(whiteChars, char) >= 0 { + return charWhite + } else if strings.IndexRune(delimiterChars, char) >= 0 { + return charDelimiter } return charNonWord } @@ -194,6 +209,10 @@ func charClassOfNonAscii(char rune) charClass { return charNumber } else if unicode.IsLetter(char) { return charLetter + } else if unicode.IsSpace(char) { + return charWhite + } else if strings.IndexRune(delimiterChars, char) >= 0 { + return charDelimiter } return charNonWord } @@ -206,22 +225,33 @@ func charClassOf(char rune) charClass { } func bonusFor(prevClass charClass, class charClass) int16 { - if prevClass == charNonWord && class != charNonWord { - // Word boundary - return bonusBoundary - } else if prevClass == charLower && class == charUpper || + if class > charNonWord { + if prevClass == charWhite { + // Word boundary after whitespace + return bonusBoundaryWhite + } else if prevClass == charDelimiter { + // Word boundary after a delimiter character + return bonusBoundaryDelimiter + } else if prevClass == charNonWord { + // Word boundary + return bonusBoundary + } + } + if prevClass == charLower && class == charUpper || prevClass != charNumber && class == charNumber { // camelCase letter123 return bonusCamel123 } else if class == charNonWord { return bonusNonWord + } else if class == charWhite { + return bonusBoundaryWhite } return 0 } func bonusAt(input *util.Chars, idx int) int16 { if idx == 0 { - return bonusBoundary + return bonusBoundaryWhite } return bonusFor(charClassOf(input.Get(idx-1)), charClassOf(input.Get(idx))) } @@ -377,7 +407,7 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util. // Phase 2. Calculate bonus for each point maxScore, maxScorePos := int16(0), 0 pidx, lastIdx := 0, 0 - pchar0, pchar, prevH0, prevClass, inGap := pattern[0], pattern[0], int16(0), charNonWord, false + pchar0, pchar, prevH0, prevClass, inGap := pattern[0], pattern[0], int16(0), charWhite, false Tsub := T[idx:] H0sub, C0sub, Bsub := H0[idx:][:len(Tsub)], C0[idx:][:len(Tsub)], B[idx:][:len(Tsub)] for off, char := range Tsub { @@ -417,7 +447,7 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util. C0sub[off] = 1 if M == 1 && (forward && score > maxScore || !forward && score >= maxScore) { maxScore, maxScorePos = score, idx+off - if forward && bonus == bonusBoundary { + if forward && bonus >= bonusBoundary { break } } @@ -486,11 +516,14 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util. s1 = Hdiag[off] + scoreMatch b := Bsub[off] consecutive = Cdiag[off] + 1 - // Break consecutive chunk - if b == bonusBoundary { - consecutive = 1 - } else if consecutive > 1 { - b = util.Max16(b, util.Max16(bonusConsecutive, B[col-int(consecutive)+1])) + if consecutive > 1 { + fb := B[col-int(consecutive)+1] + // Break consecutive chunk + if b >= bonusBoundary && b > fb { + consecutive = 1 + } else { + b = util.Max16(b, util.Max16(bonusConsecutive, fb)) + } } if s1+b < s2 { s1 += Bsub[off] @@ -555,7 +588,7 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util. func calculateScore(caseSensitive bool, normalize bool, text *util.Chars, pattern []rune, sidx int, eidx int, withPos bool) (int, *[]int) { pidx, score, inGap, consecutive, firstBonus := 0, 0, false, 0, int16(0) pos := posArray(withPos, len(pattern)) - prevClass := charNonWord + prevClass := charWhite if sidx > 0 { prevClass = charClassOf(text.Get(sidx - 1)) } @@ -583,7 +616,7 @@ func calculateScore(caseSensitive bool, normalize bool, text *util.Chars, patter firstBonus = bonus } else { // Break consecutive chunk - if bonus == bonusBoundary { + if bonus >= bonusBoundary && bonus > firstBonus { firstBonus = bonus } bonus = util.Max16(util.Max16(bonus, firstBonus), bonusConsecutive) @@ -741,7 +774,7 @@ func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text *uti if bonus > bestBonus { bestPos, bestBonus = index, bonus } - if bonus == bonusBoundary { + if bonus >= bonusBoundary { break } index -= pidx - 1 @@ -877,8 +910,8 @@ func EqualMatch(caseSensitive bool, normalize bool, forward bool, text *util.Cha match = runesStr == string(pattern) } if match { - return Result{trimmedLen, trimmedLen + lenPattern, (scoreMatch+bonusBoundary)*lenPattern + - (bonusFirstCharMultiplier-1)*bonusBoundary}, nil + return Result{trimmedLen, trimmedLen + lenPattern, (scoreMatch+bonusBoundaryWhite)*lenPattern + + (bonusFirstCharMultiplier-1)*bonusBoundaryWhite}, nil } return Result{-1, -1, 0}, nil } diff --git a/src/algo/algo_test.go b/src/algo/algo_test.go index 218ca1f1..2dbe3833 100644 --- a/src/algo/algo_test.go +++ b/src/algo/algo_test.go @@ -45,29 +45,29 @@ func TestFuzzyMatch(t *testing.T) { assertMatch(t, fn, false, forward, "fooBarbaz1", "oBZ", 2, 9, scoreMatch*3+bonusCamel123+scoreGapStart+scoreGapExtension*3) assertMatch(t, fn, false, forward, "foo bar baz", "fbb", 0, 9, - scoreMatch*3+bonusBoundary*bonusFirstCharMultiplier+ - bonusBoundary*2+2*scoreGapStart+4*scoreGapExtension) + scoreMatch*3+bonusBoundaryWhite*bonusFirstCharMultiplier+ + bonusBoundaryWhite*2+2*scoreGapStart+4*scoreGapExtension) assertMatch(t, fn, false, forward, "/AutomatorDocument.icns", "rdoc", 9, 13, scoreMatch*4+bonusCamel123+bonusConsecutive*2) assertMatch(t, fn, false, forward, "/man1/zshcompctl.1", "zshc", 6, 10, - scoreMatch*4+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary*3) + scoreMatch*4+bonusBoundaryDelimiter*bonusFirstCharMultiplier+bonusBoundaryDelimiter*3) assertMatch(t, fn, false, forward, "/.oh-my-zsh/cache", "zshc", 8, 13, - scoreMatch*4+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary*3+scoreGapStart) + scoreMatch*4+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary*2+scoreGapStart+bonusBoundaryDelimiter) assertMatch(t, fn, false, forward, "ab0123 456", "12356", 3, 10, scoreMatch*5+bonusConsecutive*3+scoreGapStart+scoreGapExtension) assertMatch(t, fn, false, forward, "abc123 456", "12356", 3, 10, scoreMatch*5+bonusCamel123*bonusFirstCharMultiplier+bonusCamel123*2+bonusConsecutive+scoreGapStart+scoreGapExtension) assertMatch(t, fn, false, forward, "foo/bar/baz", "fbb", 0, 9, - scoreMatch*3+bonusBoundary*bonusFirstCharMultiplier+ - bonusBoundary*2+2*scoreGapStart+4*scoreGapExtension) + scoreMatch*3+bonusBoundaryWhite*bonusFirstCharMultiplier+ + bonusBoundaryDelimiter*2+2*scoreGapStart+4*scoreGapExtension) assertMatch(t, fn, false, forward, "fooBarBaz", "fbb", 0, 7, - scoreMatch*3+bonusBoundary*bonusFirstCharMultiplier+ + scoreMatch*3+bonusBoundaryWhite*bonusFirstCharMultiplier+ bonusCamel123*2+2*scoreGapStart+2*scoreGapExtension) assertMatch(t, fn, false, forward, "foo barbaz", "fbb", 0, 8, - scoreMatch*3+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary+ + scoreMatch*3+bonusBoundaryWhite*bonusFirstCharMultiplier+bonusBoundaryWhite+ scoreGapStart*2+scoreGapExtension*3) assertMatch(t, fn, false, forward, "fooBar Baz", "foob", 0, 4, - scoreMatch*4+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary*3) + scoreMatch*4+bonusBoundaryWhite*bonusFirstCharMultiplier+bonusBoundaryWhite*3) assertMatch(t, fn, false, forward, "xFoo-Bar Baz", "foo-b", 1, 6, scoreMatch*5+bonusCamel123*bonusFirstCharMultiplier+bonusCamel123*2+ bonusNonWord+bonusBoundary) @@ -75,14 +75,14 @@ func TestFuzzyMatch(t *testing.T) { assertMatch(t, fn, true, forward, "fooBarbaz", "oBz", 2, 9, scoreMatch*3+bonusCamel123+scoreGapStart+scoreGapExtension*3) assertMatch(t, fn, true, forward, "Foo/Bar/Baz", "FBB", 0, 9, - scoreMatch*3+bonusBoundary*(bonusFirstCharMultiplier+2)+ + scoreMatch*3+bonusBoundaryWhite*bonusFirstCharMultiplier+bonusBoundaryDelimiter*2+ scoreGapStart*2+scoreGapExtension*4) assertMatch(t, fn, true, forward, "FooBarBaz", "FBB", 0, 7, - scoreMatch*3+bonusBoundary*bonusFirstCharMultiplier+bonusCamel123*2+ + scoreMatch*3+bonusBoundaryWhite*bonusFirstCharMultiplier+bonusCamel123*2+ scoreGapStart*2+scoreGapExtension*2) assertMatch(t, fn, true, forward, "FooBar Baz", "FooB", 0, 4, - scoreMatch*4+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary*2+ - util.Max(bonusCamel123, bonusBoundary)) + scoreMatch*4+bonusBoundaryWhite*bonusFirstCharMultiplier+bonusBoundaryWhite*2+ + util.Max(bonusCamel123, bonusBoundaryWhite)) // Consecutive bonus updated assertMatch(t, fn, true, forward, "foo-bar", "o-ba", 2, 6, @@ -98,10 +98,10 @@ func TestFuzzyMatch(t *testing.T) { func TestFuzzyMatchBackward(t *testing.T) { assertMatch(t, FuzzyMatchV1, false, true, "foobar fb", "fb", 0, 4, - scoreMatch*2+bonusBoundary*bonusFirstCharMultiplier+ + scoreMatch*2+bonusBoundaryWhite*bonusFirstCharMultiplier+ scoreGapStart+scoreGapExtension) assertMatch(t, FuzzyMatchV1, false, false, "foobar fb", "fb", 7, 9, - scoreMatch*2+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary) + scoreMatch*2+bonusBoundaryWhite*bonusFirstCharMultiplier+bonusBoundaryWhite) } func TestExactMatchNaive(t *testing.T) { @@ -114,9 +114,9 @@ func TestExactMatchNaive(t *testing.T) { assertMatch(t, ExactMatchNaive, false, dir, "/AutomatorDocument.icns", "rdoc", 9, 13, scoreMatch*4+bonusCamel123+bonusConsecutive*2) assertMatch(t, ExactMatchNaive, false, dir, "/man1/zshcompctl.1", "zshc", 6, 10, - scoreMatch*4+bonusBoundary*(bonusFirstCharMultiplier+3)) + scoreMatch*4+bonusBoundaryDelimiter*(bonusFirstCharMultiplier+3)) assertMatch(t, ExactMatchNaive, false, dir, "/.oh-my-zsh/cache", "zsh/c", 8, 13, - scoreMatch*5+bonusBoundary*(bonusFirstCharMultiplier+4)) + scoreMatch*5+bonusBoundary*(bonusFirstCharMultiplier+3)+bonusBoundaryDelimiter) } } @@ -128,7 +128,7 @@ func TestExactMatchNaiveBackward(t *testing.T) { } func TestPrefixMatch(t *testing.T) { - score := (scoreMatch+bonusBoundary)*3 + bonusBoundary*(bonusFirstCharMultiplier-1) + score := scoreMatch*3 + bonusBoundaryWhite*bonusFirstCharMultiplier + bonusBoundaryWhite*2 for _, dir := range []bool{true, false} { assertMatch(t, PrefixMatch, true, dir, "fooBarbaz", "Foo", -1, -1, 0) @@ -156,9 +156,10 @@ func TestSuffixMatch(t *testing.T) { // Strip trailing white space from the string assertMatch(t, SuffixMatch, false, dir, "fooBarbaz ", "baz", 6, 9, scoreMatch*3+bonusConsecutive*2) + // Only when the pattern doesn't end with a space assertMatch(t, SuffixMatch, false, dir, "fooBarbaz ", "baz ", 6, 10, - scoreMatch*4+bonusConsecutive*2+bonusNonWord) + scoreMatch*4+bonusConsecutive*2+bonusBoundaryWhite) } } @@ -182,9 +183,9 @@ func TestNormalize(t *testing.T) { input, pattern, sidx, eidx, score) } } - test("Só Danço Samba", "So", 0, 2, 56, FuzzyMatchV1, FuzzyMatchV2, PrefixMatch, ExactMatchNaive) - test("Só Danço Samba", "sodc", 0, 7, 89, FuzzyMatchV1, FuzzyMatchV2) - test("Danço", "danco", 0, 5, 128, FuzzyMatchV1, FuzzyMatchV2, PrefixMatch, SuffixMatch, ExactMatchNaive, EqualMatch) + test("Só Danço Samba", "So", 0, 2, 62, FuzzyMatchV1, FuzzyMatchV2, PrefixMatch, ExactMatchNaive) + test("Só Danço Samba", "sodc", 0, 7, 97, FuzzyMatchV1, FuzzyMatchV2) + test("Danço", "danco", 0, 5, 140, FuzzyMatchV1, FuzzyMatchV2, PrefixMatch, SuffixMatch, ExactMatchNaive, EqualMatch) } func TestLongString(t *testing.T) {