[sql] extract() assumed too much about elements

pull/1037/head
Tim Stack 2 years ago
parent c9ec288853
commit e047386441

@ -91,6 +91,12 @@ map_elements_to_json2(yajl_gen gen,
int col = 0;
for (auto& iter : *el) {
if (iter.e_token != DNT_PAIR) {
log_warning("dropping non-pair element: %s",
dp.get_element_string(iter).c_str());
continue;
}
const data_parser::element& pvalue = iter.get_pair_value();
if (pvalue.value_token() == DT_INVALID) {
@ -139,6 +145,11 @@ map_elements_to_json(yajl_gen gen,
std::vector<std::string> names;
for (auto& iter : *el) {
if (iter.e_token != DNT_PAIR) {
unique_names = false;
continue;
}
const auto& pvalue = iter.get_pair_value();
if (pvalue.value_token() == DT_INVALID) {
@ -154,9 +165,8 @@ map_elements_to_json(yajl_gen gen,
if (names | lnav::itertools::find(key_str)) {
unique_names = false;
break;
} else {
names.push_back(key_str);
}
names.push_back(key_str);
}
names.clear();
@ -187,7 +197,8 @@ elements_to_json(yajl_gen gen,
if (key_str.empty()
&& el->front().get_pair_value().value_token()
== DNT_GROUP) {
== DNT_GROUP)
{
element_to_json(gen, dp, el->front().get_pair_value());
} else {
yajlpp_map singleton_map(gen);

@ -21,7 +21,7 @@
"pattern": "^(?<timestamp>[A-Z][a-z]{2}\\s+\\d+\\s+\\d+:\\d+:\\d+) (?<host>[^\\s]+) (?<facility>\\w+)\\.(?<level>\\w+) (?<module>dnsmasq-dhcp[A-Za-z0-9\\.\\-]*)(?:\\[(?<ID>\\d+)\\])?: (?<dhcp_op>DHCP[^(]+)(?:\\((?<dhcp_iface>[^)]*)\\)) (?:(?<dhcp_ip>(?:\\d{1,3}\\.){3}\\d{1,3}) )?(?<dhcp_mac>(?:[0-9a-f]{2}:)+[0-9a-f]{2})(?: (?<body>.*))?$"
},
"other": {
"pattern": "^(?<timestamp>[A-Z][a-z]{2}\\s+\\d+\\s+\\d+:\\d+:\\d+) (?<host>[^\\s]+) (?<facility>\\w+)\\.(?<level>\\w+) (?<module>(?!kernel|dnsmasq-dhcp)[A-Za-z0-9/\\.\\-]*)(?:\\[(?<ID>\\d+)\\])?: (?:\\[apply-config\\])?(?:\\[(?:\\s*(?<ellapsed>\\d+\\.\\d+))\\]\\s)?(?<body>.*)$"
"pattern": "^(?<timestamp>[A-Z][a-z]{2}\\s+\\d+\\s+\\d+:\\d+:\\d+) (?<host>[^\\s]+) (?<facility>\\w+)\\.(?<level>\\w+) (?<module>(?!kernel|dnsmasq-dhcp)[A-Za-z0-9\\.\\-]*)(?:\\[(?<ID>\\d+)\\])?: (?:\\[apply-config\\])?(?:\\[(?:\\s*(?<ellapsed>\\d+\\.\\d+))\\]\\s)?(?<body>.*)$"
}
},
"level-field": "level",

@ -303,7 +303,7 @@ rcBestIndex(sqlite3_vtab* tab, sqlite3_index_info* pIdxInfo)
}
}
viu.allocate_args(1);
viu.allocate_args(FSTAT_COL_PATTERN, FSTAT_COL_PATTERN, 1);
return SQLITE_OK;
}

@ -2742,14 +2742,15 @@ regexp_capture(*string*, *pattern*)
.. _regexp_capture_into_json:
regexp_capture_into_json(*string*, *pattern*)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
regexp_capture_into_json(*string*, *pattern*, *\[options\]*)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
A table-valued function that executes a regular-expression over a string and returns the captured values as a JSON object. If the regex only matches a subset of the input string, it will be rerun on the remaining parts of the string until no more matches are found.
**Parameters**
* **string\*** --- The string to match against the given pattern.
* **pattern\*** --- The regular expression to match.
* **options** --- A JSON object with the following option: convert-numbers - True (default) if text that looks like numeric data should be converted to JSON numbers, false if they should be captured as strings.
**Examples**
To extract the key/value pairs 'a'/1 and 'b'/2 from the string 'a=1; b=2':

@ -34,12 +34,14 @@
#include "base/lnav_log.hh"
#include "column_namer.hh"
#include "config.h"
#include "lnav_util.hh"
#include "pcrepp/pcrepp.hh"
#include "scn/scn.h"
#include "sql_help.hh"
#include "sql_util.hh"
#include "vtab_module.hh"
#include "yajlpp/yajlpp.hh"
#include "yajlpp/yajlpp_def.hh"
enum {
RC_COL_MATCH_INDEX,
@ -202,7 +204,7 @@ rcBestIndex(sqlite3_vtab* tab, sqlite3_index_info* pIdxInfo)
}
}
viu.allocate_args(2);
viu.allocate_args(RC_COL_VALUE, RC_COL_PATTERN, 2);
return SQLITE_OK;
}
@ -253,8 +255,20 @@ enum {
RCJ_COL_CONTENT,
RCJ_COL_VALUE,
RCJ_COL_PATTERN,
RCJ_COL_FLAGS,
};
struct regexp_capture_flags {
bool convert_numbers{true};
};
const typed_json_path_container<regexp_capture_flags>
regexp_capture_flags_handlers
= typed_json_path_container<regexp_capture_flags>{
yajlpp::property_handler("convert-numbers")
.for_field(&regexp_capture_flags::convert_numbers),
};
struct regexp_capture_into_json {
static constexpr const char* NAME = "regexp_capture_into_json";
static constexpr const char* CREATE_STMT = R"(
@ -265,7 +279,8 @@ CREATE TABLE regexp_capture_into_json (
match_index INTEGER,
content TEXT,
value TEXT HIDDEN,
pattern TEXT HIDDEN
pattern TEXT HIDDEN,
flags TEXT HIDDEN
);
)";
@ -280,6 +295,8 @@ CREATE TABLE regexp_capture_into_json (
bool c_matched{false};
size_t c_match_index{0};
sqlite3_int64 c_rowid{0};
std::string c_flags_string;
nonstd::optional<regexp_capture_flags> c_flags;
cursor(sqlite3_vtab* vt) : base({vt}) { this->c_context.set_count(0); }
@ -332,7 +349,7 @@ CREATE TABLE regexp_capture_into_json (
if (!cap->is_valid()) {
yajl_gen_null(gen);
} else {
} else if (!vc.c_flags || vc.c_flags->convert_numbers) {
auto cap_view = vc.c_input->to_string_view(cap);
auto scan_int_res
= scn::scan_value<int64_t>(cap_view);
@ -354,6 +371,10 @@ CREATE TABLE regexp_capture_into_json (
yajl_gen_pstring(
gen, cap_view.data(), cap_view.length());
} else {
yajl_gen_pstring(gen,
vc.c_input->get_substr_start(cap),
cap->length());
}
}
}
@ -384,6 +405,14 @@ CREATE TABLE regexp_capture_into_json (
ctx, str.c_str(), str.length(), SQLITE_TRANSIENT);
break;
}
case RCJ_COL_FLAGS: {
if (!vc.c_flags) {
sqlite3_result_null(ctx);
} else {
to_sqlite(ctx, vc.c_flags_string);
}
break;
}
}
return SQLITE_OK;
@ -404,12 +433,13 @@ rcjBestIndex(sqlite3_vtab* tab, sqlite3_index_info* pIdxInfo)
switch (iter->iColumn) {
case RCJ_COL_VALUE:
case RCJ_COL_PATTERN:
case RCJ_COL_FLAGS:
viu.column_used(iter);
break;
}
}
viu.allocate_args(2);
viu.allocate_args(RCJ_COL_VALUE, RCJ_COL_FLAGS, 2);
return SQLITE_OK;
}
@ -422,14 +452,16 @@ rcjFilter(sqlite3_vtab_cursor* pVtabCursor,
{
auto* pCur = (regexp_capture_into_json::cursor*) pVtabCursor;
if (argc != 2) {
if (argc < 2 || argc > 3) {
pCur->c_content.clear();
pCur->c_pattern.clear();
pCur->c_flags_string.clear();
pCur->c_flags = nonstd::nullopt;
return SQLITE_OK;
}
auto byte_count = sqlite3_value_bytes(argv[0]);
auto blob = (const char*) sqlite3_value_blob(argv[0]);
const auto* blob = (const char*) sqlite3_value_blob(argv[0]);
pCur->c_content_as_blob = (sqlite3_value_type(argv[0]) == SQLITE_BLOB);
pCur->c_content.assign(blob, byte_count);
@ -442,6 +474,32 @@ rcjFilter(sqlite3_vtab_cursor* pVtabCursor,
return SQLITE_ERROR;
}
pCur->c_flags_string.clear();
pCur->c_flags = nonstd::nullopt;
if (argc == 3) {
static const intern_string_t FLAGS_SRC = intern_string::lookup("flags");
const auto flags_json = from_sqlite<string_fragment>()(argc, argv, 2);
if (!flags_json.empty()) {
const auto parse_res
= regexp_capture_flags_handlers.parser_for(FLAGS_SRC).of(
flags_json);
if (parse_res.isErr()) {
auto um = lnav::console::user_message::error(
"unable to parse flags")
.with_reason(parse_res.unwrapErr()[0]);
pVtabCursor->pVtab->zErrMsg = sqlite3_mprintf(
"%s%s", sqlitepp::ERROR_PREFIX, lnav::to_json(um).c_str());
return SQLITE_ERROR;
}
pCur->c_flags_string = flags_json.to_string();
pCur->c_flags = parse_res.unwrap();
}
}
pCur->c_pattern = re_res.unwrap();
pCur->c_namer
= std::make_unique<column_namer>(column_namer::language::JSON);
@ -527,6 +585,13 @@ register_regexp_vtab(sqlite3* db)
.with_parameter(
{"string", "The string to match against the given pattern."})
.with_parameter({"pattern", "The regular expression to match."})
.with_parameter(help_text{
"options",
"A JSON object with the following option: "
"convert-numbers - True (default) if text that looks like "
"numeric data should be converted to JSON numbers, "
"false if they should be captured as strings."}
.optional())
.with_result({
"match_index",
"The match iteration. This value will increase "

@ -65,3 +65,45 @@ sqlite3_error_to_user_message(sqlite3* db)
return lnav::console::user_message::error("SQL statement failed")
.with_reason(errmsg);
}
void
vtab_index_usage::column_used(
const vtab_index_constraints::const_iterator& iter)
{
this->viu_min_column = std::min(iter->iColumn, this->viu_min_column);
this->viu_max_column = std::max(iter->iColumn, this->viu_max_column);
this->viu_index_info.idxNum |= (1L << iter.i_index);
this->viu_used_column_count += 1;
}
void
vtab_index_usage::allocate_args(int low, int high, int required)
{
int n_arg = 0;
if (this->viu_min_column != low || this->viu_max_column > high
|| this->viu_used_column_count < required)
{
this->viu_index_info.estimatedCost = 2147483647;
this->viu_index_info.estimatedRows = 2147483647;
return;
}
for (int lpc = 0; lpc <= this->viu_max_column; lpc++) {
for (int cons_index = 0; cons_index < this->viu_index_info.nConstraint;
cons_index++)
{
if (this->viu_index_info.aConstraint[cons_index].iColumn != lpc) {
continue;
}
if (!(this->viu_index_info.idxNum & (1L << cons_index))) {
continue;
}
this->viu_index_info.aConstraintUsage[cons_index].argvIndex
= ++n_arg;
}
}
this->viu_index_info.estimatedCost = 1.0;
this->viu_index_info.estimatedRows = 1;
}

@ -568,47 +568,14 @@ public:
{
}
void column_used(const vtab_index_constraints::const_iterator& iter)
{
this->viu_max_column = std::max(iter->iColumn, this->viu_max_column);
this->viu_index_info.idxNum |= (1L << iter.i_index);
this->viu_used_column_count += 1;
}
void allocate_args(int expected)
{
int n_arg = 0;
void column_used(const vtab_index_constraints::const_iterator& iter);
if (this->viu_used_column_count != expected) {
this->viu_index_info.estimatedCost = 2147483647;
this->viu_index_info.estimatedRows = 2147483647;
return;
}
for (int lpc = 0; lpc <= this->viu_max_column; lpc++) {
for (int cons_index = 0;
cons_index < this->viu_index_info.nConstraint;
cons_index++)
{
if (this->viu_index_info.aConstraint[cons_index].iColumn != lpc)
{
continue;
}
if (!(this->viu_index_info.idxNum & (1L << cons_index))) {
continue;
}
this->viu_index_info.aConstraintUsage[cons_index].argvIndex
= ++n_arg;
}
}
this->viu_index_info.estimatedCost = 1.0;
this->viu_index_info.estimatedRows = 1;
}
void allocate_args(int low, int high, int required);
private:
sqlite3_index_info& viu_index_info;
int viu_used_column_count{0};
int viu_min_column{INT_MAX};
int viu_max_column{0};
};

@ -111,10 +111,7 @@ CREATE TABLE xpath (
cursor(sqlite3_vtab* vt) : base({vt}) {}
~cursor()
{
this->reset();
}
~cursor() { this->reset(); }
int reset()
{
@ -131,9 +128,7 @@ CREATE TABLE xpath (
return SQLITE_OK;
}
int eof()
{
return this->c_rowid >= (int64_t) this->c_results.size(); }
int eof() { return this->c_rowid >= (int64_t) this->c_results.size(); }
int get_rowid(sqlite3_int64& rowid_out)
{
@ -288,7 +283,7 @@ rcBestIndex(sqlite3_vtab* tab, sqlite3_index_info* pIdxInfo)
}
}
viu.allocate_args(2);
viu.allocate_args(XP_COL_XPATH, XP_COL_VALUE, 2);
return SQLITE_OK;
}

@ -567,9 +567,9 @@ public:
}
template<typename T>
yajlpp_gen_context& with_obj(T& obj)
yajlpp_gen_context& with_obj(const T& obj)
{
this->ygc_obj_stack.push(&obj);
this->ygc_obj_stack.push((void*) &obj);
return *this;
}

@ -1465,7 +1465,7 @@ struct typed_json_path_container : public json_path_container {
return yajlpp_parser<T>{src, this};
}
std::string to_string(T& obj) const
std::string to_string(const T& obj) const
{
yajlpp_gen gen;
yajlpp_gen_context ygc(gen, *this);

@ -760,6 +760,10 @@ EXPECTED_FILES = \
$(srcdir)/%reldir%/test_sql_regexp.sh_b841a0c09601e2419eeb99e85f7e286c889e4801.out \
$(srcdir)/%reldir%/test_sql_regexp.sh_bbd1128cf61a9af8f9dc937b46217443f42e1a7a.err \
$(srcdir)/%reldir%/test_sql_regexp.sh_bbd1128cf61a9af8f9dc937b46217443f42e1a7a.out \
$(srcdir)/%reldir%/test_sql_regexp.sh_d42e1fcfe6d42394f79da84be2d37e62c4c0ea63.err \
$(srcdir)/%reldir%/test_sql_regexp.sh_d42e1fcfe6d42394f79da84be2d37e62c4c0ea63.out \
$(srcdir)/%reldir%/test_sql_regexp.sh_ed6e9f13f178def009ee58c2aeea8c3c70fdb580.err \
$(srcdir)/%reldir%/test_sql_regexp.sh_ed6e9f13f178def009ee58c2aeea8c3c70fdb580.out \
$(srcdir)/%reldir%/test_sql_search_table.sh_1a0d872ebc492fcecb2e79a0993170d5fc771a5b.err \
$(srcdir)/%reldir%/test_sql_search_table.sh_1a0d872ebc492fcecb2e79a0993170d5fc771a5b.out \
$(srcdir)/%reldir%/test_sql_search_table.sh_3f5f74863d065418bca5a000e6ad3d9344635164.err \
@ -866,6 +870,8 @@ EXPECTED_FILES = \
$(srcdir)/%reldir%/test_sql_str_func.sh_d4bc869850f5b7e53353fc2506fea0c8e96f29c5.out \
$(srcdir)/%reldir%/test_sql_str_func.sh_d4e805ff08d4ccf62865dbf8db8d526f7ce02f37.err \
$(srcdir)/%reldir%/test_sql_str_func.sh_d4e805ff08d4ccf62865dbf8db8d526f7ce02f37.out \
$(srcdir)/%reldir%/test_sql_str_func.sh_d54a759f5683a22ad289129b2096b80652b1cc0c.err \
$(srcdir)/%reldir%/test_sql_str_func.sh_d54a759f5683a22ad289129b2096b80652b1cc0c.out \
$(srcdir)/%reldir%/test_sql_str_func.sh_d8d4cde8bbc98175069be579ff5634de43880b8c.err \
$(srcdir)/%reldir%/test_sql_str_func.sh_d8d4cde8bbc98175069be579ff5634de43880b8c.out \
$(srcdir)/%reldir%/test_sql_str_func.sh_e68167bf5edc7a7b1defd06bdfb694ffa8b00df2.err \

@ -3384,7 +3384,7 @@ For support questions, email:
regexp_capture_into_json(string, pattern)
regexp_capture_into_json(string, pattern, [options])
══════════════════════════════════════════════════════════════════════
A table-valued function that executes a regular-expression over a
string and returns the captured values as a JSON object. If the
@ -3393,6 +3393,10 @@ For support questions, email:
Parameters
string The string to match against the given pattern.
pattern The regular expression to match.
options A JSON object with the following option:
convert-numbers - True (default) if text that looks like
numeric data should be converted to JSON numbers, false if
they should be captured as strings.
Results
match_index The match iteration. This value will
increase each time a new match is found in the input

@ -0,0 +1,9 @@
✘ error: unable to parse flags
reason: invalid JSON
 |  reason: lexical error: invalid char in json text.
 |   |  {abc
 |   |  (right here) ------^
 |   --> flags:1
 |   | {abc
 --> command-option:1
 | ;SELECT * from regexp_capture_into_json('foo=0x123e;', '(?<key>\w+)=(?<value>[^;]+)', '{abc')

@ -0,0 +1,2 @@
match_index  content 
 0 {"key":"foo","value":"0x123e"} 

@ -0,0 +1,47 @@
[
{
"log_body": "[VpxLRO] -- BEGIN lro-846063 -- SessionManager -- vim.SessionManager.sessionIsActive -- 528e6e0c-246d-58b5-3234-278c6e0c5d0d(52c289ac-2563-48d5-8a8e-f178da022c0d)",
"extract(log_body)": {
"col_0": [
"VpxLRO"
],
"col_1": "--",
"col_2": "BEGIN",
"col_3": "lro-846063",
"col_4": "--",
"col_5": "SessionManager",
"col_6": "--",
"col_7": "vim.SessionManager.sessionIsActive",
"col_8": "--",
"col_9": "528e6e0c-246d-58b5-3234-278c6e0c5d0d",
"col_10": [
"52c289ac-2563-48d5-8a8e-f178da022c0d"
]
}
},
{
"log_body": "[VpxLRO] -- FINISH lro-846063",
"extract(log_body)": {
"col_0": [
"VpxLRO"
],
"col_1": "--",
"col_2": "FINISH",
"col_3": "lro-846063"
}
},
{
"log_body": "Exception was thrown when call vsan-performance-manager for cluster [vim.ClusterComputeResource:domain-c109,Cluster-52] perf metrics: N3Vim5Fault8NotFound9ExceptionE(Fault cause: vim.fault.NotFound\n--> )",
"extract(log_body)": {
"Exception was thrown when call vsan-performance-manager for cluster [vim.ClusterComputeResource:domain-c109,Cluster-52] perf metrics": {
"N3Vim5Fault8NotFound9ExceptionE": [
"Fault cause",
"vim.fault.NotFound",
"\n",
"--",
">"
]
}
}
}
]

@ -0,0 +1,4 @@
2022-06-02T11:58:12.193Z info vpxd[45715] [Originator@6876 sub=vpxLro opID=7e1280cf] [VpxLRO] -- BEGIN lro-846063 -- SessionManager -- vim.SessionManager.sessionIsActive -- 528e6e0c-246d-58b5-3234-278c6e0c5d0d(52c289ac-2563-48d5-8a8e-f178da022c0d)
2022-06-02T11:58:12.194Z info vpxd[45715] [Originator@6876 sub=vpxLro opID=7e1280cf] [VpxLRO] -- FINISH lro-846063
2022-06-02T11:59:41.498Z warning vpxd[47756] [Originator@6876 sub=drmLogger opID=SWI-66b629ff] Exception was thrown when call vsan-performance-manager for cluster [vim.ClusterComputeResource:domain-c109,Cluster-52] perf metrics: N3Vim5Fault8NotFound9ExceptionE(Fault cause: vim.fault.NotFound
--> )

@ -11,6 +11,14 @@ run_cap_test ${lnav_test} -n \
-c ";SELECT * from regexp_capture_into_json('foo=0x123e;', '(?<key>\w+)=(?<value>[^;]+)')" \
${test_dir}/logfile_syslog.3
run_cap_test ${lnav_test} -n \
-c ";SELECT * from regexp_capture_into_json('foo=0x123e;', '(?<key>\w+)=(?<value>[^;]+)', json_object('convert-numbers', json('false')))" \
${test_dir}/logfile_syslog.3
run_cap_test ${lnav_test} -n \
-c ";SELECT * from regexp_capture_into_json('foo=0x123e;', '(?<key>\w+)=(?<value>[^;]+)', '{abc')" \
${test_dir}/logfile_syslog.3
run_cap_test ${lnav_test} -n \
-c ";SELECT * from regexp_capture_into_json('foo=123e;', '(?<key>\w+)=(?<value>[^;]+)')" \
${test_dir}/logfile_syslog.3

@ -107,3 +107,8 @@ run_cap_test ./drive_sql "SELECT encode('foo', null)"
run_cap_test ./drive_sql "SELECT encode(null, 'base64')"
run_cap_test ./drive_sql "SELECT gunzip(decode(encode(gzip('Hello, World!'), 'base64'), 'base64'))"
run_cap_test ${lnav_test} -n \
-c ';SELECT log_body, extract(log_body) from vmw_log' \
-c ':write-json-to -' \
${test_dir}/logfile_vmw_log.0

Loading…
Cancel
Save