lnav/src/xpath_vtab.cc

399 lines
13 KiB
C++
Raw Normal View History

2020-12-23 23:01:21 +00:00
/**
* Copyright (c) 2020, Timothy Stack
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2022-03-16 22:38:08 +00:00
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2020-12-23 23:01:21 +00:00
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sstream>
#include <unordered_map>
#include "base/lnav_log.hh"
2022-03-16 22:38:08 +00:00
#include "config.h"
2020-12-23 23:01:21 +00:00
#include "pugixml/pugixml.hpp"
2022-03-13 22:49:41 +00:00
#include "sql_help.hh"
2020-12-23 23:01:21 +00:00
#include "sql_util.hh"
#include "vtab_module.hh"
2022-03-16 22:38:08 +00:00
#include "xml_util.hh"
2020-12-23 23:01:21 +00:00
#include "yajlpp/yajlpp.hh"
enum {
XP_COL_RESULT,
XP_COL_NODE_PATH,
XP_COL_NODE_ATTR,
XP_COL_NODE_TEXT,
XP_COL_XPATH,
XP_COL_VALUE,
};
2022-03-16 22:38:08 +00:00
static thread_local std::unordered_map<std::string, pugi::xpath_query>
QUERY_CACHE;
2020-12-23 23:01:21 +00:00
2022-03-16 22:38:08 +00:00
static pugi::xpath_query
checkout_query(const std::string& query)
2020-12-23 23:01:21 +00:00
{
auto iter = QUERY_CACHE.find(query);
if (iter == QUERY_CACHE.end()) {
auto xquery = pugi::xpath_query(query.c_str());
if (!xquery) {
return xquery;
}
auto pair = QUERY_CACHE.emplace(query, std::move(xquery));
iter = pair.first;
}
auto retval = std::move(iter->second);
QUERY_CACHE.erase(iter);
return retval;
}
2022-03-16 22:38:08 +00:00
static void
checkin_query(const std::string& query_str, pugi::xpath_query query)
2020-12-23 23:01:21 +00:00
{
if (!query) {
return;
}
QUERY_CACHE[query_str] = std::move(query);
}
struct xpath_vtab {
2022-03-16 22:38:08 +00:00
static constexpr const char* NAME = "xpath";
static constexpr const char* CREATE_STMT = R"(
2020-12-23 23:01:21 +00:00
-- The xpath() table-valued function allows you to execute an xpath expression
CREATE TABLE xpath (
result text, -- The result of the xpath expression
node_path text, -- The absolute path to the node selected by the expression
node_attr text, -- The node attributes stored in a JSON object
node_text text, -- The text portion of the node selected by the expression
xpath text HIDDEN,
value text HIDDEN
);
)";
struct cursor {
sqlite3_vtab_cursor base;
sqlite3_int64 c_rowid{0};
2022-03-31 15:59:19 +00:00
std::string c_xpath;
std::string c_value;
2021-03-06 00:17:28 +00:00
bool c_value_as_blob{false};
2020-12-23 23:01:21 +00:00
pugi::xpath_query c_query;
pugi::xml_document c_doc;
pugi::xpath_node_set c_results;
2022-07-07 04:22:14 +00:00
cursor(sqlite3_vtab* vt) : base({vt}) {}
2020-12-23 23:01:21 +00:00
2022-03-16 22:38:08 +00:00
~cursor()
{
2020-12-23 23:01:21 +00:00
this->reset();
}
2022-03-16 22:38:08 +00:00
int reset()
{
2020-12-23 23:01:21 +00:00
this->c_rowid = 0;
checkin_query(this->c_xpath, std::move(this->c_query));
return SQLITE_OK;
2022-07-07 04:22:14 +00:00
}
2020-12-23 23:01:21 +00:00
2022-03-16 22:38:08 +00:00
int next()
{
2020-12-23 23:01:21 +00:00
this->c_rowid += 1;
return SQLITE_OK;
2022-07-07 04:22:14 +00:00
}
2020-12-23 23:01:21 +00:00
2022-03-16 22:38:08 +00:00
int eof()
{
2022-07-07 04:22:14 +00:00
return this->c_rowid >= (int64_t) this->c_results.size(); }
2020-12-23 23:01:21 +00:00
2022-03-16 22:38:08 +00:00
int get_rowid(sqlite3_int64& rowid_out)
{
2020-12-23 23:01:21 +00:00
rowid_out = this->c_rowid;
return SQLITE_OK;
2022-07-07 04:22:14 +00:00
}
2020-12-23 23:01:21 +00:00
};
2022-03-16 22:38:08 +00:00
int get_column(const cursor& vc, sqlite3_context* ctx, int col)
{
2020-12-23 23:01:21 +00:00
switch (col) {
case XP_COL_RESULT: {
auto& xpath_node = vc.c_results[vc.c_rowid];
if (xpath_node.node()) {
2022-03-31 15:59:19 +00:00
std::ostringstream oss;
2020-12-23 23:01:21 +00:00
// XXX avoid the extra allocs
xpath_node.node().print(oss);
auto node_xml = oss.str();
sqlite3_result_text(ctx,
node_xml.c_str(),
node_xml.length(),
SQLITE_TRANSIENT);
} else if (xpath_node.attribute()) {
sqlite3_result_text(ctx,
xpath_node.attribute().value(),
-1,
SQLITE_TRANSIENT);
} else {
sqlite3_result_null(ctx);
}
break;
}
case XP_COL_NODE_PATH: {
auto& xpath_node = vc.c_results[vc.c_rowid];
auto x_node = xpath_node.node();
auto x_attr = xpath_node.attribute();
if (x_node || x_attr) {
if (!x_node) {
x_node = xpath_node.parent();
}
auto node_path = lnav::pugixml::get_actual_path(x_node);
2020-12-23 23:01:21 +00:00
if (x_attr) {
node_path += "/@" + std::string(x_attr.name());
}
sqlite3_result_text(ctx,
node_path.c_str(),
node_path.length(),
SQLITE_TRANSIENT);
} else {
sqlite3_result_null(ctx);
}
break;
}
case XP_COL_NODE_ATTR: {
auto& xpath_node = vc.c_results[vc.c_rowid];
auto x_node = xpath_node.node();
auto x_attr = xpath_node.attribute();
if (x_node || x_attr) {
if (!x_node) {
x_node = xpath_node.parent();
}
yajlpp_gen gen;
yajl_gen_config(gen, yajl_gen_beautify, false);
{
yajlpp_map attrs(gen);
for (const auto& attr : x_node.attributes()) {
attrs.gen(attr.name());
attrs.gen(attr.value());
}
}
auto sf = gen.to_string_fragment();
2022-03-16 22:38:08 +00:00
sqlite3_result_text(
ctx, sf.data(), sf.length(), SQLITE_TRANSIENT);
2020-12-23 23:01:21 +00:00
sqlite3_result_subtype(ctx, 'J');
} else {
sqlite3_result_null(ctx);
}
break;
}
case XP_COL_NODE_TEXT: {
auto& xpath_node = vc.c_results[vc.c_rowid];
auto x_node = xpath_node.node();
auto x_attr = xpath_node.attribute();
if (x_node || x_attr) {
if (!x_node) {
x_node = xpath_node.parent();
}
auto node_text = x_node.text();
2022-03-16 22:38:08 +00:00
sqlite3_result_text(
ctx, node_text.get(), -1, SQLITE_TRANSIENT);
2020-12-23 23:01:21 +00:00
} else {
sqlite3_result_null(ctx);
}
break;
}
case XP_COL_XPATH:
sqlite3_result_text(ctx,
vc.c_xpath.c_str(),
vc.c_xpath.length(),
SQLITE_STATIC);
break;
case XP_COL_VALUE:
2021-03-06 00:17:28 +00:00
if (vc.c_value_as_blob) {
sqlite3_result_blob64(ctx,
vc.c_value.c_str(),
vc.c_value.length(),
SQLITE_STATIC);
} else {
sqlite3_result_text(ctx,
vc.c_value.c_str(),
vc.c_value.length(),
SQLITE_STATIC);
}
2020-12-23 23:01:21 +00:00
break;
}
return SQLITE_OK;
}
};
2022-03-16 22:38:08 +00:00
static int
rcBestIndex(sqlite3_vtab* tab, sqlite3_index_info* pIdxInfo)
2020-12-23 23:01:21 +00:00
{
vtab_index_constraints vic(pIdxInfo);
vtab_index_usage viu(pIdxInfo);
for (auto iter = vic.begin(); iter != vic.end(); ++iter) {
if (iter->op != SQLITE_INDEX_CONSTRAINT_EQ) {
continue;
}
switch (iter->iColumn) {
case XP_COL_VALUE:
case XP_COL_XPATH:
viu.column_used(iter);
break;
}
}
viu.allocate_args(2);
return SQLITE_OK;
}
2022-03-16 22:38:08 +00:00
static int
rcFilter(sqlite3_vtab_cursor* pVtabCursor,
int idxNum,
const char* idxStr,
int argc,
sqlite3_value** argv)
2020-12-23 23:01:21 +00:00
{
2022-03-16 22:38:08 +00:00
auto* pCur = (xpath_vtab::cursor*) pVtabCursor;
2020-12-23 23:01:21 +00:00
if (argc != 2) {
pCur->c_xpath.clear();
pCur->c_value.clear();
return SQLITE_OK;
}
2021-03-06 00:17:28 +00:00
pCur->c_value_as_blob = (sqlite3_value_type(argv[1]) == SQLITE_BLOB);
auto byte_count = sqlite3_value_bytes(argv[1]);
2021-06-21 09:44:48 +00:00
if (byte_count == 0) {
pCur->c_rowid = 0;
return SQLITE_OK;
}
2022-03-16 22:38:08 +00:00
auto blob = (const char*) sqlite3_value_blob(argv[1]);
2021-03-06 00:17:28 +00:00
pCur->c_value.assign(blob, byte_count);
2020-12-23 23:01:21 +00:00
auto parse_res = pCur->c_doc.load_string(pCur->c_value.c_str());
if (!parse_res) {
2022-03-16 22:38:08 +00:00
pVtabCursor->pVtab->zErrMsg
= sqlite3_mprintf("Invalid XML document at offset %d: %s",
parse_res.offset,
parse_res.description());
2020-12-23 23:01:21 +00:00
return SQLITE_ERROR;
}
2022-03-16 22:38:08 +00:00
pCur->c_xpath = (const char*) sqlite3_value_text(argv[0]);
2020-12-23 23:01:21 +00:00
pCur->c_query = checkout_query(pCur->c_xpath);
if (!pCur->c_query) {
auto& res = pCur->c_query.result();
2022-03-16 22:38:08 +00:00
pVtabCursor->pVtab->zErrMsg
= sqlite3_mprintf("Invalid XPATH expression at offset %d: %s",
res.offset,
res.description());
2020-12-23 23:01:21 +00:00
return SQLITE_ERROR;
}
pCur->c_rowid = 0;
pCur->c_results = pCur->c_doc.select_nodes(pCur->c_query);
return SQLITE_OK;
}
2022-03-16 22:38:08 +00:00
int
register_xpath_vtab(sqlite3* db)
2020-12-23 23:01:21 +00:00
{
static vtab_module<tvt_no_update<xpath_vtab>> XPATH_MODULE;
2022-03-16 22:38:08 +00:00
static help_text xpath_help
= help_text("xpath",
"A table-valued function that executes an xpath expression "
"over an XML "
"string and returns the selected values.")
.sql_table_valued_function()
2022-03-31 15:59:19 +00:00
.with_parameter({
"xpath",
"The XPATH expression to evaluate over the XML document.",
})
2022-03-16 22:38:08 +00:00
.with_parameter({"xmldoc", "The XML document as a string."})
.with_result({"result", "The result of the XPATH expression."})
2022-03-31 15:59:19 +00:00
.with_result({
"node_path",
"The absolute path to the node containing the result.",
})
2022-03-16 22:38:08 +00:00
.with_result(
{"node_attr", "The node's attributes stored in JSON object."})
.with_result({"node_text", "The node's text value."})
.with_tags({"string", "xml"})
2022-03-31 15:59:19 +00:00
.with_example({
"To select the XML nodes on the path '/abc/def'",
"SELECT * FROM xpath('/abc/def', '<abc><def "
"a=\"b\">Hello</def><def>Bye</def></abc>')",
})
.with_example({
"To select all 'a' attributes on the path '/abc/def'",
"SELECT * FROM xpath('/abc/def/@a', '<abc><def "
"a=\"b\">Hello</def><def>Bye</def></abc>')",
})
.with_example({
"To select the text nodes on the path '/abc/def'",
"SELECT * FROM xpath('/abc/def/text()', '<abc><def "
"a=\"b\">Hello &#x2605;</def></abc>')",
});
2020-12-23 23:01:21 +00:00
int rc;
XPATH_MODULE.vm_module.xBestIndex = rcBestIndex;
XPATH_MODULE.vm_module.xFilter = rcFilter;
rc = XPATH_MODULE.create(db, "xpath");
2022-03-31 15:59:19 +00:00
sqlite_function_help.insert(std::make_pair("xpath", &xpath_help));
2020-12-23 23:01:21 +00:00
xpath_help.index_tags();
ensure(rc == SQLITE_OK);
return rc;
}