From 5c25984ea01677eb759501a5cff3fa63d2d460d8 Mon Sep 17 00:00:00 2001 From: Junegunn Choi Date: Tue, 14 Apr 2015 21:45:37 +0900 Subject: [PATCH] Fix Unicode case handling (#186) --- CHANGELOG.md | 7 +++++++ install | 2 +- src/algo/algo.go | 37 +++++++++++++++++++++++++------------ src/constants.go | 2 +- src/pattern.go | 12 +++++++++--- test/test_go.rb | 7 +++++++ 6 files changed, 50 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3b1cf32..edaf6749 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,13 @@ CHANGELOG ========= +0.9.8 +----- + +### Bug fixes + +- Fixed Unicode case handling (#186) + 0.9.7 ----- diff --git a/install b/install index 3b8dbe6f..c47b6ec2 100755 --- a/install +++ b/install @@ -1,6 +1,6 @@ #!/usr/bin/env bash -version=0.9.7 +version=0.9.8 cd $(dirname $BASH_SOURCE) fzf_base=$(pwd) diff --git a/src/algo/algo.go b/src/algo/algo.go index 60c436e5..36c8d873 100644 --- a/src/algo/algo.go +++ b/src/algo/algo.go @@ -1,6 +1,9 @@ package algo -import "strings" +import ( + "strings" + "unicode" +) /* * String matching algorithms here do not use strings.ToLower to avoid @@ -34,8 +37,17 @@ func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) { for index, char := range runes { // This is considerably faster than blindly applying strings.ToLower to the // whole string - if !caseSensitive && char >= 65 && char <= 90 { - char += 32 + if !caseSensitive { + // Partially inlining `unicode.ToLower`. Ugly, but makes a noticeable + // difference in CPU cost. (Measured on Go 1.4.1. Also note that the Go + // compiler as of now does not inline non-leaf functions.) + if char >= 'A' && char <= 'Z' { + char += 32 + runes[index] = char + } else if char > unicode.MaxASCII { + char = unicode.To(unicode.LowerCase, char) + runes[index] = char + } } if char == pattern[pidx] { if sidx < 0 { @@ -52,9 +64,6 @@ func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) { pidx-- for index := eidx - 1; index >= sidx; index-- { char := runes[index] - if !caseSensitive && char >= 65 && char <= 90 { - char += 32 - } if char == pattern[pidx] { if pidx--; pidx < 0 { sidx = index @@ -110,8 +119,12 @@ func ExactMatchNaive(caseSensitive bool, input *string, pattern []rune) (int, in pidx := 0 for index := 0; index < numRunes; index++ { char := runes[index] - if !caseSensitive && char >= 65 && char <= 90 { - char += 32 + if !caseSensitive { + if char >= 'A' && char <= 'Z' { + char += 32 + } else if char > unicode.MaxASCII { + char = unicode.To(unicode.LowerCase, char) + } } if pattern[pidx] == char { pidx++ @@ -135,8 +148,8 @@ func PrefixMatch(caseSensitive bool, input *string, pattern []rune) (int, int) { for index, r := range pattern { char := runes[index] - if !caseSensitive && char >= 65 && char <= 90 { - char += 32 + if !caseSensitive { + char = unicode.ToLower(char) } if char != r { return -1, -1 @@ -156,8 +169,8 @@ func SuffixMatch(caseSensitive bool, input *string, pattern []rune) (int, int) { for index, r := range pattern { char := runes[index+diff] - if !caseSensitive && char >= 65 && char <= 90 { - char += 32 + if !caseSensitive { + char = unicode.ToLower(char) } if char != r { return -1, -1 diff --git a/src/constants.go b/src/constants.go index 5cd6d80b..36804d0b 100644 --- a/src/constants.go +++ b/src/constants.go @@ -5,7 +5,7 @@ import ( ) // Current version -const Version = "0.9.7" +const Version = "0.9.8" // fzf events const ( diff --git a/src/pattern.go b/src/pattern.go index fbb70c5f..e6bda5f3 100644 --- a/src/pattern.go +++ b/src/pattern.go @@ -4,12 +4,11 @@ import ( "regexp" "sort" "strings" + "unicode" "github.com/junegunn/fzf/src/algo" ) -const uppercaseLetters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - // fuzzy // 'exact // ^exact-prefix @@ -91,7 +90,14 @@ func BuildPattern(mode Mode, caseMode Case, switch caseMode { case CaseSmart: - if !strings.ContainsAny(asString, uppercaseLetters) { + hasUppercase := false + for _, r := range runes { + if unicode.IsUpper(r) { + hasUppercase = true + break + } + } + if !hasUppercase { runes, caseSensitive = []rune(strings.ToLower(asString)), false } case CaseIgnore: diff --git a/test/test_go.rb b/test/test_go.rb index 94093ff1..a4b19700 100644 --- a/test/test_go.rb +++ b/test/test_go.rb @@ -470,6 +470,13 @@ class TestGoFZF < TestBase tmux.send_keys :Enter assert_equal ['111', '11'], readonce.split($/) end + + def test_unicode_case + assert_equal %w[СТРОКА2 Строка4], + `printf "строКА1\\nСТРОКА2\\nстрока3\\nСтрока4" | fzf -fС`.split($/) + assert_equal %w[строКА1 СТРОКА2 строка3 Строка4], + `printf "строКА1\\nСТРОКА2\\nстрока3\\nСтрока4" | fzf -fс`.split($/) + end end module TestShell