2020-12-23 23:01:21 +00:00
|
|
|
/**
|
|
|
|
* Copyright (c) 2020, Timothy Stack
|
|
|
|
*
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
|
|
|
* * Redistributions of source code must retain the above copyright notice, this
|
|
|
|
* list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
|
|
* and/or other materials provided with the distribution.
|
|
|
|
* * Neither the name of Timothy Stack nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
|
|
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
|
|
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
|
|
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
2022-03-16 22:38:08 +00:00
|
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
|
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
2020-12-23 23:01:21 +00:00
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sstream>
|
|
|
|
#include <unordered_map>
|
|
|
|
|
|
|
|
#include "base/lnav_log.hh"
|
2022-03-16 22:38:08 +00:00
|
|
|
#include "config.h"
|
2020-12-23 23:01:21 +00:00
|
|
|
#include "pugixml/pugixml.hpp"
|
2022-03-13 22:49:41 +00:00
|
|
|
#include "sql_help.hh"
|
2020-12-23 23:01:21 +00:00
|
|
|
#include "sql_util.hh"
|
|
|
|
#include "vtab_module.hh"
|
2022-03-16 22:38:08 +00:00
|
|
|
#include "xml_util.hh"
|
2020-12-23 23:01:21 +00:00
|
|
|
#include "yajlpp/yajlpp.hh"
|
|
|
|
|
|
|
|
enum {
|
|
|
|
XP_COL_RESULT,
|
|
|
|
XP_COL_NODE_PATH,
|
|
|
|
XP_COL_NODE_ATTR,
|
|
|
|
XP_COL_NODE_TEXT,
|
|
|
|
XP_COL_XPATH,
|
|
|
|
XP_COL_VALUE,
|
|
|
|
};
|
|
|
|
|
2022-03-16 22:38:08 +00:00
|
|
|
static thread_local std::unordered_map<std::string, pugi::xpath_query>
|
|
|
|
QUERY_CACHE;
|
2020-12-23 23:01:21 +00:00
|
|
|
|
2022-03-16 22:38:08 +00:00
|
|
|
static pugi::xpath_query
|
|
|
|
checkout_query(const std::string& query)
|
2020-12-23 23:01:21 +00:00
|
|
|
{
|
|
|
|
auto iter = QUERY_CACHE.find(query);
|
|
|
|
if (iter == QUERY_CACHE.end()) {
|
|
|
|
auto xquery = pugi::xpath_query(query.c_str());
|
|
|
|
|
|
|
|
if (!xquery) {
|
|
|
|
return xquery;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto pair = QUERY_CACHE.emplace(query, std::move(xquery));
|
|
|
|
|
|
|
|
iter = pair.first;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto retval = std::move(iter->second);
|
|
|
|
|
|
|
|
QUERY_CACHE.erase(iter);
|
|
|
|
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
2022-03-16 22:38:08 +00:00
|
|
|
static void
|
|
|
|
checkin_query(const std::string& query_str, pugi::xpath_query query)
|
2020-12-23 23:01:21 +00:00
|
|
|
{
|
|
|
|
if (!query) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
QUERY_CACHE[query_str] = std::move(query);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct xpath_vtab {
|
2022-03-16 22:38:08 +00:00
|
|
|
static constexpr const char* NAME = "xpath";
|
|
|
|
static constexpr const char* CREATE_STMT = R"(
|
2020-12-23 23:01:21 +00:00
|
|
|
-- The xpath() table-valued function allows you to execute an xpath expression
|
|
|
|
CREATE TABLE xpath (
|
|
|
|
result text, -- The result of the xpath expression
|
|
|
|
node_path text, -- The absolute path to the node selected by the expression
|
|
|
|
node_attr text, -- The node attributes stored in a JSON object
|
|
|
|
node_text text, -- The text portion of the node selected by the expression
|
|
|
|
|
|
|
|
xpath text HIDDEN,
|
|
|
|
value text HIDDEN
|
|
|
|
);
|
|
|
|
)";
|
|
|
|
|
|
|
|
struct cursor {
|
|
|
|
sqlite3_vtab_cursor base;
|
|
|
|
sqlite3_int64 c_rowid{0};
|
2022-03-31 15:59:19 +00:00
|
|
|
std::string c_xpath;
|
|
|
|
std::string c_value;
|
2021-03-06 00:17:28 +00:00
|
|
|
bool c_value_as_blob{false};
|
2020-12-23 23:01:21 +00:00
|
|
|
pugi::xpath_query c_query;
|
|
|
|
pugi::xml_document c_doc;
|
|
|
|
pugi::xpath_node_set c_results;
|
|
|
|
|
2022-07-07 04:22:14 +00:00
|
|
|
cursor(sqlite3_vtab* vt) : base({vt}) {}
|
2020-12-23 23:01:21 +00:00
|
|
|
|
2022-03-16 22:38:08 +00:00
|
|
|
~cursor()
|
|
|
|
{
|
2020-12-23 23:01:21 +00:00
|
|
|
this->reset();
|
|
|
|
}
|
|
|
|
|
2022-03-16 22:38:08 +00:00
|
|
|
int reset()
|
|
|
|
{
|
2020-12-23 23:01:21 +00:00
|
|
|
this->c_rowid = 0;
|
|
|
|
checkin_query(this->c_xpath, std::move(this->c_query));
|
|
|
|
|
|
|
|
return SQLITE_OK;
|
2022-07-07 04:22:14 +00:00
|
|
|
}
|
2020-12-23 23:01:21 +00:00
|
|
|
|
2022-03-16 22:38:08 +00:00
|
|
|
int next()
|
|
|
|
{
|
2020-12-23 23:01:21 +00:00
|
|
|
this->c_rowid += 1;
|
|
|
|
|
|
|
|
return SQLITE_OK;
|
2022-07-07 04:22:14 +00:00
|
|
|
}
|
2020-12-23 23:01:21 +00:00
|
|
|
|
2022-03-16 22:38:08 +00:00
|
|
|
int eof()
|
|
|
|
{
|
2022-07-07 04:22:14 +00:00
|
|
|
return this->c_rowid >= (int64_t) this->c_results.size(); }
|
2020-12-23 23:01:21 +00:00
|
|
|
|
2022-03-16 22:38:08 +00:00
|
|
|
int get_rowid(sqlite3_int64& rowid_out)
|
|
|
|
{
|
2020-12-23 23:01:21 +00:00
|
|
|
rowid_out = this->c_rowid;
|
|
|
|
|
|
|
|
return SQLITE_OK;
|
2022-07-07 04:22:14 +00:00
|
|
|
}
|
2020-12-23 23:01:21 +00:00
|
|
|
};
|
|
|
|
|
2022-03-16 22:38:08 +00:00
|
|
|
int get_column(const cursor& vc, sqlite3_context* ctx, int col)
|
|
|
|
{
|
2020-12-23 23:01:21 +00:00
|
|
|
switch (col) {
|
|
|
|
case XP_COL_RESULT: {
|
|
|
|
auto& xpath_node = vc.c_results[vc.c_rowid];
|
|
|
|
|
|
|
|
if (xpath_node.node()) {
|
2022-03-31 15:59:19 +00:00
|
|
|
std::ostringstream oss;
|
2020-12-23 23:01:21 +00:00
|
|
|
|
|
|
|
// XXX avoid the extra allocs
|
|
|
|
xpath_node.node().print(oss);
|
|
|
|
auto node_xml = oss.str();
|
|
|
|
sqlite3_result_text(ctx,
|
|
|
|
node_xml.c_str(),
|
|
|
|
node_xml.length(),
|
|
|
|
SQLITE_TRANSIENT);
|
|
|
|
} else if (xpath_node.attribute()) {
|
|
|
|
sqlite3_result_text(ctx,
|
|
|
|
xpath_node.attribute().value(),
|
|
|
|
-1,
|
|
|
|
SQLITE_TRANSIENT);
|
|
|
|
} else {
|
|
|
|
sqlite3_result_null(ctx);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case XP_COL_NODE_PATH: {
|
|
|
|
auto& xpath_node = vc.c_results[vc.c_rowid];
|
|
|
|
auto x_node = xpath_node.node();
|
|
|
|
auto x_attr = xpath_node.attribute();
|
|
|
|
|
|
|
|
if (x_node || x_attr) {
|
|
|
|
if (!x_node) {
|
|
|
|
x_node = xpath_node.parent();
|
|
|
|
}
|
|
|
|
|
2021-02-07 06:13:08 +00:00
|
|
|
auto node_path = lnav::pugixml::get_actual_path(x_node);
|
2020-12-23 23:01:21 +00:00
|
|
|
if (x_attr) {
|
|
|
|
node_path += "/@" + std::string(x_attr.name());
|
|
|
|
}
|
|
|
|
sqlite3_result_text(ctx,
|
|
|
|
node_path.c_str(),
|
|
|
|
node_path.length(),
|
|
|
|
SQLITE_TRANSIENT);
|
|
|
|
} else {
|
|
|
|
sqlite3_result_null(ctx);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case XP_COL_NODE_ATTR: {
|
|
|
|
auto& xpath_node = vc.c_results[vc.c_rowid];
|
|
|
|
auto x_node = xpath_node.node();
|
|
|
|
auto x_attr = xpath_node.attribute();
|
|
|
|
|
|
|
|
if (x_node || x_attr) {
|
|
|
|
if (!x_node) {
|
|
|
|
x_node = xpath_node.parent();
|
|
|
|
}
|
|
|
|
|
|
|
|
yajlpp_gen gen;
|
|
|
|
|
|
|
|
yajl_gen_config(gen, yajl_gen_beautify, false);
|
|
|
|
|
|
|
|
{
|
|
|
|
yajlpp_map attrs(gen);
|
|
|
|
|
|
|
|
for (const auto& attr : x_node.attributes()) {
|
|
|
|
attrs.gen(attr.name());
|
|
|
|
attrs.gen(attr.value());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
auto sf = gen.to_string_fragment();
|
|
|
|
|
2022-03-16 22:38:08 +00:00
|
|
|
sqlite3_result_text(
|
|
|
|
ctx, sf.data(), sf.length(), SQLITE_TRANSIENT);
|
2020-12-23 23:01:21 +00:00
|
|
|
sqlite3_result_subtype(ctx, 'J');
|
|
|
|
} else {
|
|
|
|
sqlite3_result_null(ctx);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case XP_COL_NODE_TEXT: {
|
|
|
|
auto& xpath_node = vc.c_results[vc.c_rowid];
|
|
|
|
auto x_node = xpath_node.node();
|
|
|
|
auto x_attr = xpath_node.attribute();
|
|
|
|
|
|
|
|
if (x_node || x_attr) {
|
|
|
|
if (!x_node) {
|
|
|
|
x_node = xpath_node.parent();
|
|
|
|
}
|
|
|
|
|
|
|
|
auto node_text = x_node.text();
|
|
|
|
|
2022-03-16 22:38:08 +00:00
|
|
|
sqlite3_result_text(
|
|
|
|
ctx, node_text.get(), -1, SQLITE_TRANSIENT);
|
2020-12-23 23:01:21 +00:00
|
|
|
} else {
|
|
|
|
sqlite3_result_null(ctx);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case XP_COL_XPATH:
|
|
|
|
sqlite3_result_text(ctx,
|
|
|
|
vc.c_xpath.c_str(),
|
|
|
|
vc.c_xpath.length(),
|
|
|
|
SQLITE_STATIC);
|
|
|
|
break;
|
|
|
|
case XP_COL_VALUE:
|
2021-03-06 00:17:28 +00:00
|
|
|
if (vc.c_value_as_blob) {
|
|
|
|
sqlite3_result_blob64(ctx,
|
|
|
|
vc.c_value.c_str(),
|
|
|
|
vc.c_value.length(),
|
|
|
|
SQLITE_STATIC);
|
|
|
|
} else {
|
|
|
|
sqlite3_result_text(ctx,
|
|
|
|
vc.c_value.c_str(),
|
|
|
|
vc.c_value.length(),
|
|
|
|
SQLITE_STATIC);
|
|
|
|
}
|
2020-12-23 23:01:21 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return SQLITE_OK;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2022-03-16 22:38:08 +00:00
|
|
|
static int
|
|
|
|
rcBestIndex(sqlite3_vtab* tab, sqlite3_index_info* pIdxInfo)
|
2020-12-23 23:01:21 +00:00
|
|
|
{
|
|
|
|
vtab_index_constraints vic(pIdxInfo);
|
|
|
|
vtab_index_usage viu(pIdxInfo);
|
|
|
|
|
|
|
|
for (auto iter = vic.begin(); iter != vic.end(); ++iter) {
|
|
|
|
if (iter->op != SQLITE_INDEX_CONSTRAINT_EQ) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (iter->iColumn) {
|
|
|
|
case XP_COL_VALUE:
|
|
|
|
case XP_COL_XPATH:
|
|
|
|
viu.column_used(iter);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
viu.allocate_args(2);
|
|
|
|
return SQLITE_OK;
|
|
|
|
}
|
|
|
|
|
2022-03-16 22:38:08 +00:00
|
|
|
static int
|
|
|
|
rcFilter(sqlite3_vtab_cursor* pVtabCursor,
|
|
|
|
int idxNum,
|
|
|
|
const char* idxStr,
|
|
|
|
int argc,
|
|
|
|
sqlite3_value** argv)
|
2020-12-23 23:01:21 +00:00
|
|
|
{
|
2022-03-16 22:38:08 +00:00
|
|
|
auto* pCur = (xpath_vtab::cursor*) pVtabCursor;
|
2020-12-23 23:01:21 +00:00
|
|
|
|
|
|
|
if (argc != 2) {
|
|
|
|
pCur->c_xpath.clear();
|
|
|
|
pCur->c_value.clear();
|
|
|
|
return SQLITE_OK;
|
|
|
|
}
|
|
|
|
|
2021-03-06 00:17:28 +00:00
|
|
|
pCur->c_value_as_blob = (sqlite3_value_type(argv[1]) == SQLITE_BLOB);
|
|
|
|
auto byte_count = sqlite3_value_bytes(argv[1]);
|
2021-06-21 09:44:48 +00:00
|
|
|
|
|
|
|
if (byte_count == 0) {
|
|
|
|
pCur->c_rowid = 0;
|
|
|
|
return SQLITE_OK;
|
|
|
|
}
|
|
|
|
|
2022-03-16 22:38:08 +00:00
|
|
|
auto blob = (const char*) sqlite3_value_blob(argv[1]);
|
2021-03-06 00:17:28 +00:00
|
|
|
pCur->c_value.assign(blob, byte_count);
|
2020-12-23 23:01:21 +00:00
|
|
|
auto parse_res = pCur->c_doc.load_string(pCur->c_value.c_str());
|
|
|
|
if (!parse_res) {
|
2022-03-16 22:38:08 +00:00
|
|
|
pVtabCursor->pVtab->zErrMsg
|
|
|
|
= sqlite3_mprintf("Invalid XML document at offset %d: %s",
|
|
|
|
parse_res.offset,
|
|
|
|
parse_res.description());
|
2020-12-23 23:01:21 +00:00
|
|
|
return SQLITE_ERROR;
|
|
|
|
}
|
|
|
|
|
2022-03-16 22:38:08 +00:00
|
|
|
pCur->c_xpath = (const char*) sqlite3_value_text(argv[0]);
|
2020-12-23 23:01:21 +00:00
|
|
|
pCur->c_query = checkout_query(pCur->c_xpath);
|
|
|
|
if (!pCur->c_query) {
|
|
|
|
auto& res = pCur->c_query.result();
|
2022-03-16 22:38:08 +00:00
|
|
|
pVtabCursor->pVtab->zErrMsg
|
|
|
|
= sqlite3_mprintf("Invalid XPATH expression at offset %d: %s",
|
|
|
|
res.offset,
|
|
|
|
res.description());
|
2020-12-23 23:01:21 +00:00
|
|
|
return SQLITE_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
pCur->c_rowid = 0;
|
|
|
|
pCur->c_results = pCur->c_doc.select_nodes(pCur->c_query);
|
|
|
|
|
|
|
|
return SQLITE_OK;
|
|
|
|
}
|
|
|
|
|
2022-03-16 22:38:08 +00:00
|
|
|
int
|
|
|
|
register_xpath_vtab(sqlite3* db)
|
2020-12-23 23:01:21 +00:00
|
|
|
{
|
|
|
|
static vtab_module<tvt_no_update<xpath_vtab>> XPATH_MODULE;
|
2022-03-16 22:38:08 +00:00
|
|
|
static help_text xpath_help
|
|
|
|
= help_text("xpath",
|
|
|
|
"A table-valued function that executes an xpath expression "
|
|
|
|
"over an XML "
|
|
|
|
"string and returns the selected values.")
|
|
|
|
.sql_table_valued_function()
|
2022-03-31 15:59:19 +00:00
|
|
|
.with_parameter({
|
|
|
|
"xpath",
|
|
|
|
"The XPATH expression to evaluate over the XML document.",
|
|
|
|
})
|
2022-03-16 22:38:08 +00:00
|
|
|
.with_parameter({"xmldoc", "The XML document as a string."})
|
|
|
|
.with_result({"result", "The result of the XPATH expression."})
|
2022-03-31 15:59:19 +00:00
|
|
|
.with_result({
|
|
|
|
"node_path",
|
|
|
|
"The absolute path to the node containing the result.",
|
|
|
|
})
|
2022-03-16 22:38:08 +00:00
|
|
|
.with_result(
|
|
|
|
{"node_attr", "The node's attributes stored in JSON object."})
|
|
|
|
.with_result({"node_text", "The node's text value."})
|
|
|
|
.with_tags({"string", "xml"})
|
2022-03-31 15:59:19 +00:00
|
|
|
.with_example({
|
|
|
|
"To select the XML nodes on the path '/abc/def'",
|
|
|
|
"SELECT * FROM xpath('/abc/def', '<abc><def "
|
|
|
|
"a=\"b\">Hello</def><def>Bye</def></abc>')",
|
|
|
|
})
|
|
|
|
.with_example({
|
|
|
|
"To select all 'a' attributes on the path '/abc/def'",
|
|
|
|
"SELECT * FROM xpath('/abc/def/@a', '<abc><def "
|
|
|
|
"a=\"b\">Hello</def><def>Bye</def></abc>')",
|
|
|
|
})
|
|
|
|
.with_example({
|
|
|
|
"To select the text nodes on the path '/abc/def'",
|
|
|
|
"SELECT * FROM xpath('/abc/def/text()', '<abc><def "
|
|
|
|
"a=\"b\">Hello ★</def></abc>')",
|
|
|
|
});
|
2020-12-23 23:01:21 +00:00
|
|
|
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
XPATH_MODULE.vm_module.xBestIndex = rcBestIndex;
|
|
|
|
XPATH_MODULE.vm_module.xFilter = rcFilter;
|
|
|
|
|
|
|
|
rc = XPATH_MODULE.create(db, "xpath");
|
2022-03-31 15:59:19 +00:00
|
|
|
sqlite_function_help.insert(std::make_pair("xpath", &xpath_help));
|
2020-12-23 23:01:21 +00:00
|
|
|
xpath_help.index_tags();
|
|
|
|
|
|
|
|
ensure(rc == SQLITE_OK);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|