You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
zk/internal/util/fts5/fts5.go

104 lines
2.3 KiB
Go

package fts5
import "strings"
// ConvertQuery transforms a Google-like query into a SQLite FTS5 one.
func ConvertQuery(query string) string {
out := ""
// List of tokens which won't be automatically quoted in the output query.
passthroughTokens := map[string]bool{
"AND": true,
"OR": true,
"NOT": true,
}
// Whitespaces and parentheses are term separators outside explicit quotes.
termSeparators := map[rune]bool{
' ': true,
'\t': true,
'\n': true,
'(': true,
')': true,
}
// Indicates whether the current term was explicitly quoted in the query.
inQuote := false
// Current term being read.
term := ""
// Finishes the current term and write it to the output after quoting it.
closeTerm := func() {
if term == "" {
return
}
if !inQuote && passthroughTokens[term] {
out += term
} else {
// If the term has a wildcard suffix, it is a prefix token. We make
// sure that the * is not quoted or it will be ignored by the FTS5
// tokenizer.
isPrefixToken := !inQuote && strings.HasSuffix(term, "*")
if isPrefixToken {
term = strings.TrimSuffix(term, "*")
}
out += `"` + term + `"`
if isPrefixToken {
out += "*"
}
}
term = ""
}
for _, c := range query {
switch {
// Explicit quotes.
case c == '"':
if inQuote { // We are already in a quoted term? Then it's a closing quote.
closeTerm()
}
inQuote = !inQuote
// Passthrough for ^ and * when they are at the start of a term, to allow:
// ^foo -> ^"foo"
// "foo"* -> "foo"*
case term == "" && (c == '^' || c == '*'):
out += string(c)
// Passthrough for FTS5's column filters, e.g.
// col:foo -> col:"foo"
case !inQuote && c == ':':
out += term + string(c)
term = ""
// - is an alias to NOT, but only at the start of a term, to allow
// compound words such as "well-known"
case c == '-' && term == "":
out += " NOT "
// | is an alias to OR.
case !inQuote && c == '|':
closeTerm()
out += " OR "
// FTS5's + is ignored because it doesn't bring much to the syntax,
// compared to explicit quotes.
case !inQuote && c == '+' && term == "":
break
// Term separators outside explicit quotes terminates the current term.
case !inQuote && termSeparators[c]:
closeTerm()
out += string(c)
default:
term += string(c)
}
}
closeTerm()
return out
}