[readline] do a fuzzy match if the prefix match fails to find anything

Prototype for #541
This commit is contained in:
Timothy Stack 2018-09-13 14:27:49 -07:00
parent def35d1ae7
commit f241704a93
13 changed files with 368 additions and 23 deletions

10
NEWS
View File

@ -1,4 +1,14 @@
lnav v0.8.5:
Interface Changes:
* The auto-complete behavior in the prompt has been modified to fall back
to a fuzzy search if the prefix search finds no matches. For example,
typing in ":fin" and pressing TAB would previously not do anything.
Now, the ":fin" will be completed to ":filter-in " since that is a
strong fuzzy match. If there are multiple matches, as would happen
with ":dfil", readline's menu-complete behavior will be engaged and
you can press TAB cycle through the options.
lnav v0.8.4:
Features:
* Added the ':comment' command that can be used to attach a comment to a

View File

@ -13,6 +13,7 @@ set(diag_STAT_SRCS
file_vtab.cc
fs-extension-functions.cc
fstat_vtab.cc
fts_fuzzy_match.cc
grep_proc.cc
help_text_formatter.cc
hist_source.cc
@ -108,6 +109,7 @@ set(diag_STAT_SRCS
filter_observer.hh
format-text-files.hh
fstat_vtab.hh
fts_fuzzy_match.hh
grep_highlighter.hh
help.hh
help_text_formatter.hh

View File

@ -162,6 +162,7 @@ noinst_HEADERS = \
filter_observer.hh \
format-text-files.hh \
fstat_vtab.hh \
fts_fuzzy_match.hh \
grep_highlighter.hh \
grep_proc.hh \
help.hh \
@ -279,6 +280,7 @@ libdiag_a_SOURCES = \
file_vtab.cc \
fstat_vtab.cc \
fs-extension-functions.cc \
fts_fuzzy_match.cc \
grep_proc.cc \
help_text_formatter.cc \
hist_source.cc \

178
src/fts_fuzzy_match.cc Normal file
View File

@ -0,0 +1,178 @@
// LICENSE
//
// This software is dual-licensed to the public domain and under the following
// license: you are granted a perpetual, irrevocable license to copy, modify,
// publish, and distribute this file as you see fit.
#include "config.h"
#include "fts_fuzzy_match.hh"
namespace fts {
// Forward declarations for "private" implementation
namespace fuzzy_internal {
static bool fuzzy_match_recursive(const char * pattern, const char * str, int & outScore, const char * strBegin,
uint8_t const * srcMatches, uint8_t * newMatches, int maxMatches, int nextMatch,
int & recursionCount, int recursionLimit);
}
// Public interface
bool fuzzy_match_simple(char const * pattern, char const * str) {
while (*pattern != '\0' && *str != '\0') {
if (tolower(*pattern) == tolower(*str))
++pattern;
++str;
}
return *pattern == '\0' ? true : false;
}
bool fuzzy_match(char const * pattern, char const * str, int & outScore) {
uint8_t matches[256];
return fuzzy_match(pattern, str, outScore, matches, sizeof(matches));
}
bool fuzzy_match(char const * pattern, char const * str, int & outScore, uint8_t * matches, int maxMatches) {
int recursionCount = 0;
int recursionLimit = 10;
return fuzzy_internal::fuzzy_match_recursive(pattern, str, outScore, str, nullptr, matches, maxMatches, 0, recursionCount, recursionLimit);
}
// Private implementation
static bool fuzzy_internal::fuzzy_match_recursive(const char * pattern, const char * str, int & outScore,
const char * strBegin, uint8_t const * srcMatches, uint8_t * matches, int maxMatches,
int nextMatch, int & recursionCount, int recursionLimit)
{
// Count recursions
++recursionCount;
if (recursionCount >= recursionLimit)
return false;
// Detect end of strings
if (*pattern == '\0' || *str == '\0')
return false;
// Recursion params
bool recursiveMatch = false;
uint8_t bestRecursiveMatches[256];
int bestRecursiveScore = 0;
// Loop through pattern and str looking for a match
bool first_match = true;
while (*pattern != '\0' && *str != '\0') {
// Found match
if (tolower(*pattern) == tolower(*str)) {
// Supplied matches buffer was too short
if (nextMatch >= maxMatches)
return false;
// "Copy-on-Write" srcMatches into matches
if (first_match && srcMatches) {
memcpy(matches, srcMatches, nextMatch);
first_match = false;
}
// Recursive call that "skips" this match
uint8_t recursiveMatches[256];
int recursiveScore;
if (fuzzy_match_recursive(pattern, str + 1, recursiveScore, strBegin, matches, recursiveMatches, sizeof(recursiveMatches), nextMatch, recursionCount, recursionLimit)) {
// Pick best recursive score
if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
memcpy(bestRecursiveMatches, recursiveMatches, 256);
bestRecursiveScore = recursiveScore;
}
recursiveMatch = true;
}
// Advance
matches[nextMatch++] = (uint8_t)(str - strBegin);
++pattern;
}
++str;
}
// Determine if full pattern was matched
bool matched = *pattern == '\0' ? true : false;
// Calculate score
if (matched) {
const int sequential_bonus = 15; // bonus for adjacent matches
const int separator_bonus = 30; // bonus if match occurs after a separator
const int camel_bonus = 30; // bonus if match is uppercase and prev is lower
const int first_letter_bonus = 15; // bonus if the first letter is matched
const int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match
const int max_leading_letter_penalty = -15; // maximum penalty for leading letters
const int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter
// Iterate str to end
while (*str != '\0')
++str;
// Initialize score
outScore = 100;
// Apply leading letter penalty
int penalty = leading_letter_penalty * matches[0];
if (penalty < max_leading_letter_penalty)
penalty = max_leading_letter_penalty;
outScore += penalty;
// Apply unmatched penalty
int unmatched = (int)(str - strBegin) - nextMatch;
outScore += unmatched_letter_penalty * unmatched;
// Apply ordering bonuses
for (int i = 0; i < nextMatch; ++i) {
uint8_t currIdx = matches[i];
if (i > 0) {
uint8_t prevIdx = matches[i - 1];
// Sequential
if (currIdx == (prevIdx + 1))
outScore += sequential_bonus;
}
// Check for bonuses based on neighbor character value
if (currIdx > 0) {
// Camel case
char neighbor = strBegin[currIdx - 1];
char curr = strBegin[currIdx];
if (::islower(neighbor) && ::isupper(curr))
outScore += camel_bonus;
// Separator
bool neighborSeparator = neighbor == '_' || neighbor == ' ';
if (neighborSeparator)
outScore += separator_bonus;
}
else {
// First letter
outScore += first_letter_bonus;
}
}
}
// Return best result
if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
// Recursive score is better than "this"
memcpy(matches, bestRecursiveMatches, maxMatches);
outScore = bestRecursiveScore;
return true;
}
else if (matched) {
// "this" score is better than recursive
return true;
}
else {
// no match
return false;
}
}
} // namespace fts

47
src/fts_fuzzy_match.hh Normal file
View File

@ -0,0 +1,47 @@
// LICENSE
//
// This software is dual-licensed to the public domain and under the following
// license: you are granted a perpetual, irrevocable license to copy, modify,
// publish, and distribute this file as you see fit.
//
// VERSION
// 0.2.0 (2017-02-18) Scored matches perform exhaustive search for best score
// 0.1.0 (2016-03-28) Initial release
//
// AUTHOR
// Forrest Smith
//
// NOTES
// Compiling
// You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation.
//
// fuzzy_match_simple(...)
// Returns true if each character in pattern is found sequentially within str
//
// fuzzy_match(...)
// Returns true if pattern is found AND calculates a score.
// Performs exhaustive search via recursion to find all possible matches and match with highest score.
// Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern.
// Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
// Uses uint8_t for match indices. Therefore patterns are limited to 256 characters.
// Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning.
#ifndef FTS_FUZZY_MATCH_H
#define FTS_FUZZY_MATCH_H
#include <cstdint> // uint8_t
#include <ctype.h> // ::tolower, ::toupper
#include <cstring> // memcpy
#include <cstdio>
// Public interface
namespace fts {
bool fuzzy_match_simple(char const * pattern, char const * str);
bool fuzzy_match(char const * pattern, char const * str, int & outScore);
bool fuzzy_match(char const * pattern, char const * str, int & outScore, uint8_t * matches, int maxMatches);
}
#endif // FTS_FUZZY_MATCH_H

View File

@ -282,8 +282,14 @@ void listview_curses::do_update(void)
}
if (this->lv_show_bottom_border) {
mvwchgat(this->lv_window,
this->lv_y + height - 1, this->lv_x, width - 1, A_UNDERLINE, 0, NULL);
cchar_t row_ch[width];
int y = this->lv_y + height - 1;
mvwin_wchnstr(this->lv_window, y, this->lv_x, row_ch, width - 1);
for (int lpc = 0; lpc < width - 1; lpc++) {
row_ch[lpc].attr |= A_UNDERLINE;
}
mvwadd_wchnstr(this->lv_window, y, this->lv_x, row_ch, width - 1);
}
this->lv_needs_update = false;

View File

@ -528,32 +528,43 @@ void rl_display_matches(void *dummy, readline_curses *rc)
unsigned long width;
__attribute((unused))
unsigned long height;
int max_len, cols, rows;
int max_len, cols;
getmaxyx(lnav_data.ld_window, height, width);
max_len = rc->get_max_match_length() + 2;
cols = max(1UL, width / max_len);
rows = (matches.size() + cols - 1) / cols;
if (matches.empty()) {
lnav_data.ld_match_source.clear();
}
else if (cols == 1) {
lnav_data.ld_match_source.replace_with(rc->get_matches());
}
else {
std::vector<std::string> horiz_matches;
string current_match = rc->get_match_string();
int curr_col = 0;
attr_line_t al;
bool add_nl = false;
horiz_matches.resize(rows);
for (size_t lpc = 0; lpc < matches.size(); lpc++) {
int curr_row = lpc % rows;
horiz_matches[curr_row].append(matches[lpc]);
horiz_matches[curr_row].append(
max_len - matches[lpc].length(), ' ');
for (auto match : matches) {
if (add_nl) {
al.append(1, '\n');
add_nl = false;
}
lnav_data.ld_match_source.replace_with(horiz_matches);
if (match == current_match) {
al.append(match, &view_curses::VC_STYLE, A_REVERSE);
} else {
al.append(match);
}
curr_col += 1;
if (curr_col < cols) {
int padding = max_len - match.size();
al.append(padding, ' ');
} else {
curr_col = 0;
add_nl = true;
}
}
lnav_data.ld_match_source.replace_with(al);
}
tc.reload_data();

View File

@ -62,6 +62,7 @@
#include "ansi_scrubber.hh"
#include "readline_curses.hh"
#include "spookyhash/SpookyV2.h"
#include "fts_fuzzy_match.hh"
using namespace std;
@ -71,6 +72,7 @@ static sig_atomic_t got_winch = 0;
static readline_curses *child_this;
static sig_atomic_t looping = 1;
static const int HISTORY_SIZE = 256;
static int completion_start;
static const char *RL_INIT[] = {
/*
@ -79,6 +81,11 @@ static const char *RL_INIT[] = {
*/
"set horizontal-scroll-mode on",
"set bell-style none",
"set show-all-if-ambiguous on",
"set show-all-if-unmodified on",
"set menu-complete-display-prefix on",
"TAB: menu-complete",
"\"\\x0b\": menu-complete-backward",
NULL
};
@ -237,6 +244,37 @@ char *readline_context::completion_generator(const char *text, int state)
matches.push_back(*iter);
}
}
if (matches.empty()) {
vector<pair<int, string>> fuzzy_matches;
for (iter = arg_possibilities->begin();
iter != arg_possibilities->end();
++iter) {
const char *poss_str = iter->c_str();
int score;
if (fts::fuzzy_match(text, poss_str, score) && score > 0) {
log_debug("match score %d %s %s", score, text, poss_str);
fuzzy_matches.emplace_back(score, *iter);
}
}
if (!fuzzy_matches.empty()) {
stable_sort(begin(fuzzy_matches), end(fuzzy_matches),
[](auto l, auto r) { return r.first < l.first; });
int highest = fuzzy_matches[0].first;
for (auto pair : fuzzy_matches) {
if (highest - pair.first < 10) {
matches.push_back(pair.second);
} else {
break;
}
}
}
}
}
if (matches.size() == 1) {
@ -246,8 +284,8 @@ char *readline_context::completion_generator(const char *text, int state)
last_match_str_valid = false;
if (sendstring(child_this->rc_command_pipe[readline_curses::RCF_SLAVE],
"m:0:0",
5) == -1) {
"m:0:0:0",
7) == -1) {
_exit(1);
}
}
@ -267,6 +305,7 @@ char **readline_context::attempted_completion(const char *text,
{
char **retval = NULL;
completion_start = start;
if (start == 0 && loaded_context->rc_possibilities.find("__command") !=
loaded_context->rc_possibilities.end()) {
arg_possibilities = &loaded_context->rc_possibilities["__command"];
@ -322,6 +361,19 @@ static int rubout_char_or_abort(int count, int key)
}
}
int readline_context::command_complete(int count, int key)
{
if (loaded_context->rc_possibilities.find("__command") !=
loaded_context->rc_possibilities.end()) {
char *space = strchr(rl_line_buffer, ' ');
if (space == nullptr) {
return rl_menu_complete(count, key);
}
}
return rl_insert(count, key);
}
readline_curses::readline_curses()
: rc_active_context(-1),
rc_child(-1),
@ -377,6 +429,7 @@ readline_curses::readline_curses()
stifle_history(HISTORY_SIZE);
rl_add_defun("rubout-char-or-abort", rubout_char_or_abort, '\b');
// rl_add_defun("command-complete", readline_context::command_complete, ' ');
for (int lpc = 0; RL_INIT[lpc]; lpc++) {
snprintf(buffer, sizeof(buffer), "%s", RL_INIT[lpc]);
@ -425,7 +478,9 @@ void readline_curses::store_matches(
}
}
else {
rc = snprintf(msg, sizeof(msg), "m:%d:%d", num_matches, max_len);
rc = snprintf(msg, sizeof(msg),
"m:%d:%d:%d",
completion_start, num_matches, max_len);
if (sendstring(child_this->rc_command_pipe[RCF_SLAVE], msg, rc) == -1) {
_exit(1);
}
@ -503,10 +558,18 @@ void readline_curses::start(void)
SpookyHash::Hash128(rl_line_buffer, rl_end, &h1, &h2);
if (rl_last_func == readline_context::command_complete) {
rl_last_func = rl_menu_complete;
}
bool complete_done = (
rl_last_func != rl_menu_complete &&
rl_last_func != rl_backward_menu_complete);
if (h1 == last_h1 && h2 == last_h2) {
// do nothing
} else if (sendcmd(this->rc_command_pipe[RCF_SLAVE],
'l',
complete_done ? 'l': 'c',
rl_line_buffer,
rl_end) != 0) {
perror("line: write failed");
@ -735,8 +798,10 @@ void readline_curses::check_poll_set(const vector<struct pollfd> &pollfds)
}
}
else if (msg[0] == 'm') {
if (sscanf(msg, "m:%d:%d", &this->rc_matches_remaining,
&this->rc_max_match_length) != 2) {
if (sscanf(msg, "m:%d:%d:%d",
&this->rc_match_start,
&this->rc_matches_remaining,
&this->rc_max_match_length) != 3) {
require(0);
}
this->rc_matches.clear();
@ -783,6 +848,12 @@ void readline_curses::check_poll_set(const vector<struct pollfd> &pollfds)
this->rc_display_match.invoke(this);
break;
case 'c':
this->rc_line_buffer = &msg[2];
this->rc_change.invoke(this);
this->rc_display_match.invoke(this);
break;
case 'n':
this->rc_display_next.invoke(this);
break;

View File

@ -203,6 +203,7 @@ public:
return this->rc_highlighter;
};
static int command_complete(int, int);
private:
static char **attempted_completion(const char *text, int start, int end);
static char *completion_generator(const char *text, int state);
@ -383,6 +384,19 @@ public:
return this->rc_matches;
};
int get_match_start() const {
return this->rc_match_start;
}
std::string get_match_string() const {
size_t space_index = this->rc_line_buffer.find(' ', this->rc_match_start);
if (space_index > 0) {
space_index = space_index - this->rc_match_start;
}
return this->rc_line_buffer.substr(this->rc_match_start, space_index);
}
int get_max_match_length() const {
return this->rc_max_match_length;
};
@ -408,6 +422,7 @@ private:
std::string rc_line_buffer;
time_t rc_value_expiration;
std::string rc_alt_value;
int rc_match_start{0};
int rc_matches_remaining;
int rc_max_match_length;
std::vector<std::string> rc_matches;

View File

@ -516,7 +516,6 @@ void view_curses::mvwattrline(WINDOW *window,
}
if (attr_range.lr_end > attr_range.lr_start) {
string_attrs_t::const_iterator range_iter;
int awidth = attr_range.length();
int color_pair;

View File

@ -177,6 +177,9 @@ const char *vt52_curses::map_input(int ch, int &len_out)
case 0x7f:
ch = BACKSPACE;
break;
case KEY_BTAB:
ch = BACKTAB;
break;
}
this->vc_map_buffer = (char)ch;
retval = &this->vc_map_buffer; /* XXX probably shouldn't do this. */

View File

@ -118,6 +118,7 @@ public:
const static char BACKSPACE = 8; /*< VT52 Backspace key value. */
const static char BELL = 7; /*< VT52 Bell value. */
const static char STX = 2; /*< VT52 Start-of-text value. */
const static char BACKTAB = 11;
protected: