|
|
@ -36,6 +36,8 @@ type term struct {
|
|
|
|
origText []rune
|
|
|
|
origText []rune
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
type termSet []term
|
|
|
|
|
|
|
|
|
|
|
|
// Pattern represents search pattern
|
|
|
|
// Pattern represents search pattern
|
|
|
|
type Pattern struct {
|
|
|
|
type Pattern struct {
|
|
|
|
fuzzy bool
|
|
|
|
fuzzy bool
|
|
|
@ -43,8 +45,8 @@ type Pattern struct {
|
|
|
|
caseSensitive bool
|
|
|
|
caseSensitive bool
|
|
|
|
forward bool
|
|
|
|
forward bool
|
|
|
|
text []rune
|
|
|
|
text []rune
|
|
|
|
terms []term
|
|
|
|
termSets []termSet
|
|
|
|
hasInvTerm bool
|
|
|
|
cacheable bool
|
|
|
|
delimiter Delimiter
|
|
|
|
delimiter Delimiter
|
|
|
|
nth []Range
|
|
|
|
nth []Range
|
|
|
|
procFun map[termType]func(bool, bool, []rune, []rune) (int, int)
|
|
|
|
procFun map[termType]func(bool, bool, []rune, []rune) (int, int)
|
|
|
@ -88,14 +90,20 @@ func BuildPattern(fuzzy bool, extended bool, caseMode Case, forward bool,
|
|
|
|
return cached
|
|
|
|
return cached
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
caseSensitive, hasInvTerm := true, false
|
|
|
|
caseSensitive, cacheable := true, true
|
|
|
|
terms := []term{}
|
|
|
|
termSets := []termSet{}
|
|
|
|
|
|
|
|
|
|
|
|
if extended {
|
|
|
|
if extended {
|
|
|
|
terms = parseTerms(fuzzy, caseMode, asString)
|
|
|
|
termSets = parseTerms(fuzzy, caseMode, asString)
|
|
|
|
for _, term := range terms {
|
|
|
|
Loop:
|
|
|
|
if term.inv {
|
|
|
|
for _, termSet := range termSets {
|
|
|
|
hasInvTerm = true
|
|
|
|
for idx, term := range termSet {
|
|
|
|
|
|
|
|
// If the query contains inverse search terms or OR operators,
|
|
|
|
|
|
|
|
// we cannot cache the search scope
|
|
|
|
|
|
|
|
if idx > 0 || term.inv {
|
|
|
|
|
|
|
|
cacheable = false
|
|
|
|
|
|
|
|
break Loop
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
@ -113,8 +121,8 @@ func BuildPattern(fuzzy bool, extended bool, caseMode Case, forward bool,
|
|
|
|
caseSensitive: caseSensitive,
|
|
|
|
caseSensitive: caseSensitive,
|
|
|
|
forward: forward,
|
|
|
|
forward: forward,
|
|
|
|
text: []rune(asString),
|
|
|
|
text: []rune(asString),
|
|
|
|
terms: terms,
|
|
|
|
termSets: termSets,
|
|
|
|
hasInvTerm: hasInvTerm,
|
|
|
|
cacheable: cacheable,
|
|
|
|
nth: nth,
|
|
|
|
nth: nth,
|
|
|
|
delimiter: delimiter,
|
|
|
|
delimiter: delimiter,
|
|
|
|
procFun: make(map[termType]func(bool, bool, []rune, []rune) (int, int))}
|
|
|
|
procFun: make(map[termType]func(bool, bool, []rune, []rune) (int, int))}
|
|
|
@ -129,9 +137,11 @@ func BuildPattern(fuzzy bool, extended bool, caseMode Case, forward bool,
|
|
|
|
return ptr
|
|
|
|
return ptr
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func parseTerms(fuzzy bool, caseMode Case, str string) []term {
|
|
|
|
func parseTerms(fuzzy bool, caseMode Case, str string) []termSet {
|
|
|
|
tokens := _splitRegex.Split(str, -1)
|
|
|
|
tokens := _splitRegex.Split(str, -1)
|
|
|
|
terms := []term{}
|
|
|
|
sets := []termSet{}
|
|
|
|
|
|
|
|
set := termSet{}
|
|
|
|
|
|
|
|
switchSet := false
|
|
|
|
for _, token := range tokens {
|
|
|
|
for _, token := range tokens {
|
|
|
|
typ, inv, text := termFuzzy, false, token
|
|
|
|
typ, inv, text := termFuzzy, false, token
|
|
|
|
lowerText := strings.ToLower(text)
|
|
|
|
lowerText := strings.ToLower(text)
|
|
|
@ -145,6 +155,11 @@ func parseTerms(fuzzy bool, caseMode Case, str string) []term {
|
|
|
|
typ = termExact
|
|
|
|
typ = termExact
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if text == "|" {
|
|
|
|
|
|
|
|
switchSet = false
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if strings.HasPrefix(text, "!") {
|
|
|
|
if strings.HasPrefix(text, "!") {
|
|
|
|
inv = true
|
|
|
|
inv = true
|
|
|
|
text = text[1:]
|
|
|
|
text = text[1:]
|
|
|
@ -173,15 +188,23 @@ func parseTerms(fuzzy bool, caseMode Case, str string) []term {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if len(text) > 0 {
|
|
|
|
if len(text) > 0 {
|
|
|
|
terms = append(terms, term{
|
|
|
|
if switchSet {
|
|
|
|
|
|
|
|
sets = append(sets, set)
|
|
|
|
|
|
|
|
set = termSet{}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
set = append(set, term{
|
|
|
|
typ: typ,
|
|
|
|
typ: typ,
|
|
|
|
inv: inv,
|
|
|
|
inv: inv,
|
|
|
|
text: []rune(text),
|
|
|
|
text: []rune(text),
|
|
|
|
caseSensitive: caseSensitive,
|
|
|
|
caseSensitive: caseSensitive,
|
|
|
|
origText: origText})
|
|
|
|
origText: origText})
|
|
|
|
|
|
|
|
switchSet = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return terms
|
|
|
|
if len(set) > 0 {
|
|
|
|
|
|
|
|
sets = append(sets, set)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return sets
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// IsEmpty returns true if the pattern is effectively empty
|
|
|
|
// IsEmpty returns true if the pattern is effectively empty
|
|
|
@ -189,7 +212,7 @@ func (p *Pattern) IsEmpty() bool {
|
|
|
|
if !p.extended {
|
|
|
|
if !p.extended {
|
|
|
|
return len(p.text) == 0
|
|
|
|
return len(p.text) == 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return len(p.terms) == 0
|
|
|
|
return len(p.termSets) == 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// AsString returns the search query in string type
|
|
|
|
// AsString returns the search query in string type
|
|
|
@ -203,11 +226,10 @@ func (p *Pattern) CacheKey() string {
|
|
|
|
return p.AsString()
|
|
|
|
return p.AsString()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
cacheableTerms := []string{}
|
|
|
|
cacheableTerms := []string{}
|
|
|
|
for _, term := range p.terms {
|
|
|
|
for _, termSet := range p.termSets {
|
|
|
|
if term.inv {
|
|
|
|
if len(termSet) == 1 && !termSet[0].inv {
|
|
|
|
continue
|
|
|
|
cacheableTerms = append(cacheableTerms, string(termSet[0].origText))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
cacheableTerms = append(cacheableTerms, string(term.origText))
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return strings.Join(cacheableTerms, " ")
|
|
|
|
return strings.Join(cacheableTerms, " ")
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -218,7 +240,7 @@ func (p *Pattern) Match(chunk *Chunk) []*Item {
|
|
|
|
|
|
|
|
|
|
|
|
// ChunkCache: Exact match
|
|
|
|
// ChunkCache: Exact match
|
|
|
|
cacheKey := p.CacheKey()
|
|
|
|
cacheKey := p.CacheKey()
|
|
|
|
if !p.hasInvTerm { // Because we're excluding Inv-term from cache key
|
|
|
|
if p.cacheable {
|
|
|
|
if cached, found := _cache.Find(chunk, cacheKey); found {
|
|
|
|
if cached, found := _cache.Find(chunk, cacheKey); found {
|
|
|
|
return cached
|
|
|
|
return cached
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -243,7 +265,7 @@ Loop:
|
|
|
|
|
|
|
|
|
|
|
|
matches := p.matchChunk(space)
|
|
|
|
matches := p.matchChunk(space)
|
|
|
|
|
|
|
|
|
|
|
|
if !p.hasInvTerm {
|
|
|
|
if p.cacheable {
|
|
|
|
_cache.Add(chunk, cacheKey, matches)
|
|
|
|
_cache.Add(chunk, cacheKey, matches)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return matches
|
|
|
|
return matches
|
|
|
@ -260,7 +282,7 @@ func (p *Pattern) matchChunk(chunk *Chunk) []*Item {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
for _, item := range *chunk {
|
|
|
|
for _, item := range *chunk {
|
|
|
|
if offsets := p.extendedMatch(item); len(offsets) == len(p.terms) {
|
|
|
|
if offsets := p.extendedMatch(item); len(offsets) == len(p.termSets) {
|
|
|
|
matches = append(matches, dupItem(item, offsets))
|
|
|
|
matches = append(matches, dupItem(item, offsets))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -275,7 +297,7 @@ func (p *Pattern) MatchItem(item *Item) bool {
|
|
|
|
return sidx >= 0
|
|
|
|
return sidx >= 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
offsets := p.extendedMatch(item)
|
|
|
|
offsets := p.extendedMatch(item)
|
|
|
|
return len(offsets) == len(p.terms)
|
|
|
|
return len(offsets) == len(p.termSets)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func dupItem(item *Item, offsets []Offset) *Item {
|
|
|
|
func dupItem(item *Item, offsets []Offset) *Item {
|
|
|
@ -301,15 +323,20 @@ func (p *Pattern) basicMatch(item *Item) (int, int, int) {
|
|
|
|
func (p *Pattern) extendedMatch(item *Item) []Offset {
|
|
|
|
func (p *Pattern) extendedMatch(item *Item) []Offset {
|
|
|
|
input := p.prepareInput(item)
|
|
|
|
input := p.prepareInput(item)
|
|
|
|
offsets := []Offset{}
|
|
|
|
offsets := []Offset{}
|
|
|
|
for _, term := range p.terms {
|
|
|
|
Loop:
|
|
|
|
pfun := p.procFun[term.typ]
|
|
|
|
for _, termSet := range p.termSets {
|
|
|
|
if sidx, eidx, tlen := p.iter(pfun, input, term.caseSensitive, p.forward, term.text); sidx >= 0 {
|
|
|
|
for _, term := range termSet {
|
|
|
|
if term.inv {
|
|
|
|
pfun := p.procFun[term.typ]
|
|
|
|
|
|
|
|
if sidx, eidx, tlen := p.iter(pfun, input, term.caseSensitive, p.forward, term.text); sidx >= 0 {
|
|
|
|
|
|
|
|
if term.inv {
|
|
|
|
|
|
|
|
break Loop
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
offsets = append(offsets, Offset{int32(sidx), int32(eidx), int32(tlen)})
|
|
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
} else if term.inv {
|
|
|
|
|
|
|
|
offsets = append(offsets, Offset{0, 0, 0})
|
|
|
|
break
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
offsets = append(offsets, Offset{int32(sidx), int32(eidx), int32(tlen)})
|
|
|
|
|
|
|
|
} else if term.inv {
|
|
|
|
|
|
|
|
offsets = append(offsets, Offset{0, 0, 0})
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return offsets
|
|
|
|
return offsets
|
|
|
|