From 351ad7a8fcd758c253b5a7ffd374a450e04bb7f3 Mon Sep 17 00:00:00 2001 From: Timothy Stack Date: Sat, 6 Feb 2021 22:13:08 -0800 Subject: [PATCH] [xml/xpath] suggest xpath() calls for xml fields --- src/CMakeLists.txt | 3 + src/Makefile.am | 3 + src/field_overlay_source.cc | 17 + src/internals/format-v1.schema.json | 3 +- src/log_data_helper.cc | 199 ++++++++++ src/log_data_helper.hh | 123 +----- src/log_format.cc | 2 + src/log_format.hh | 1 + src/log_format_loader.cc | 1 + src/log_vtab_impl.cc | 2 + src/xml_util.cc | 81 ++++ src/xml_util.hh | 45 +++ src/xpath_vtab.cc | 45 +-- test/Makefile.am | 1 + test/formats/xmlmsg/format.json | 2 +- test/logfile_xml_msg.0 | 4 +- test/test_format_loader.sh | 1 + test/test_tui.sh | 7 + test/xpath_tui.0 | 571 ++++++++++++++++++++++++++++ 19 files changed, 947 insertions(+), 164 deletions(-) create mode 100644 src/log_data_helper.cc create mode 100644 src/xml_util.cc create mode 100644 src/xml_util.hh create mode 100644 test/xpath_tui.0 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 63993e37..6ae6412c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -279,6 +279,7 @@ add_library(diag STATIC lnav_util.cc log_accel.cc log_actions.cc + log_data_helper.cc log_data_table.cc log_format.cc log_format_loader.cc @@ -333,6 +334,7 @@ add_library(diag STATIC vt52_curses.cc vtab_module.cc log_vtab_impl.cc + xml_util.cc xpath_vtab.cc xterm_mouse.cc yajlpp/yajlpp.cc @@ -454,6 +456,7 @@ add_library(diag STATIC vtab_module_json.hh yajlpp/yajlpp.hh yajlpp/yajlpp_def.hh + xml_util.hh xpath_vtab.hh mapbox/recursive_wrapper.hpp diff --git a/src/Makefile.am b/src/Makefile.am index 8551f5b2..c93dfbff 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -378,6 +378,7 @@ noinst_HEADERS = \ vtab_module_json.hh \ log_vtab_impl.hh \ log_format_impls.cc \ + xml_util.hh \ xpath_vtab.hh \ xterm_mouse.hh \ spookyhash/SpookyV2.h \ @@ -434,6 +435,7 @@ libdiag_a_SOURCES = \ lnav_util.cc \ log_accel.cc \ log_actions.cc \ + log_data_helper.cc \ log_data_table.cc \ log_format.cc \ log_format_loader.cc \ @@ -482,6 +484,7 @@ libdiag_a_SOURCES = \ vt52_curses.cc \ vtab_module.cc \ log_vtab_impl.cc \ + xml_util.cc \ xpath_vtab.cc \ xterm_mouse.cc \ spookyhash/SpookyV2.cpp diff --git a/src/field_overlay_source.cc b/src/field_overlay_source.cc index b1241ade..252ad19d 100644 --- a/src/field_overlay_source.cc +++ b/src/field_overlay_source.cc @@ -486,6 +486,23 @@ void field_overlay_source::build_field_lines(const listview_curses &lv) } } + if (!this->fos_log_helper.ldh_xml_pairs.empty()) { + this->fos_lines.emplace_back(" XML fields:"); + } + + for (const auto& xml_pair : this->fos_log_helper.ldh_xml_pairs) { + auto_mem qname; + auto_mem xp_call; + + qname = sql_quote_ident(xml_pair.first.first.get()); + xp_call = sqlite3_mprintf("xpath(%Q, %s)", + xml_pair.first.second.c_str(), + qname.in()); + this->fos_lines.emplace_back(fmt::format( + " {} = {}", xp_call, xml_pair.second)); + this->add_key_line_attrs(0); + } + if (this->fos_log_helper.ldh_parser->dp_pairs.empty()) { this->fos_lines.emplace_back(" No discovered message fields"); } diff --git a/src/internals/format-v1.schema.json b/src/internals/format-v1.schema.json index 31d4afb7..881a4f84 100644 --- a/src/internals/format-v1.schema.json +++ b/src/internals/format-v1.schema.json @@ -170,7 +170,8 @@ "boolean", "json", "struct", - "quoted" + "quoted", + "xml" ] }, "collate": { diff --git a/src/log_data_helper.cc b/src/log_data_helper.cc new file mode 100644 index 00000000..b56909d5 --- /dev/null +++ b/src/log_data_helper.cc @@ -0,0 +1,199 @@ +/** + * Copyright (c) 2021, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @file log_data_helper.cc + */ + +#include "config.h" + +#include "log_data_helper.hh" + +void log_data_helper::clear() +{ + this->ldh_file = nullptr; + this->ldh_msg.disown(); + this->ldh_parser.reset(); + this->ldh_scanner.reset(); + this->ldh_namer.reset(); + this->ldh_json_pairs.clear(); + this->ldh_xml_pairs.clear(); + this->ldh_line_attrs.clear(); +} + +bool log_data_helper::parse_line(content_line_t line, bool allow_middle) +{ + logfile::iterator ll; + bool retval = false; + + this->ldh_source_line = this->ldh_line_index = line; + + this->ldh_file = this->ldh_log_source.find(this->ldh_line_index); + ll = this->ldh_file->begin() + this->ldh_line_index; + this->ldh_y_offset = 0; + while (allow_middle && ll->is_continued()) { + --ll; + this->ldh_y_offset += 1; + } + this->ldh_line = ll; + if (!ll->is_message()) { + this->ldh_parser.reset(); + this->ldh_scanner.reset(); + this->ldh_namer.reset(); + this->ldh_json_pairs.clear(); + this->ldh_xml_pairs.clear(); + this->ldh_line_attrs.clear(); + } + else { + auto format = this->ldh_file->get_format(); + struct line_range body; + auto& sa = this->ldh_line_attrs; + + this->ldh_line_attrs.clear(); + this->ldh_line_values.clear(); + this->ldh_file->read_full_message(ll, this->ldh_msg); + format->annotate(this->ldh_line_index, this->ldh_msg, sa, this->ldh_line_values); + + body = find_string_attr_range(sa, &SA_BODY); + if (body.lr_start == -1) { + body.lr_start = this->ldh_msg.length(); + body.lr_end = this->ldh_msg.length(); + } + this->ldh_scanner = std::make_unique( + this->ldh_msg, body.lr_start, body.lr_end); + this->ldh_parser = std::make_unique(this->ldh_scanner.get()); + this->ldh_msg_format.clear(); + this->ldh_parser->dp_msg_format = &this->ldh_msg_format; + this->ldh_parser->parse(); + this->ldh_namer = std::make_unique(); + this->ldh_json_pairs.clear(); + this->ldh_xml_pairs.clear(); + + for (const auto& lv : this->ldh_line_values) { + this->ldh_namer->cn_builtin_names.emplace_back(lv.lv_meta.lvm_name.get()); + } + + for (auto & ldh_line_value : this->ldh_line_values) { + switch (ldh_line_value.lv_meta.lvm_kind) { + case value_kind_t::VALUE_JSON: { + json_ptr_walk jpw; + + if (jpw.parse(ldh_line_value.lv_sbr.get_data(), ldh_line_value.lv_sbr.length()) == yajl_status_ok && + jpw.complete_parse() == yajl_status_ok) { + this->ldh_json_pairs[ldh_line_value.lv_meta.lvm_name] = jpw.jpw_values; + } + break; + } + case value_kind_t::VALUE_XML: { + auto col_name = ldh_line_value.lv_meta.lvm_name; + pugi::xml_document doc; + + auto parse_res = doc.load_buffer( + ldh_line_value.lv_sbr.get_data(), + ldh_line_value.lv_sbr.length()); + + if (parse_res) { + pugi::xpath_query query("//*"); + auto node_set = doc.select_nodes(query); + + for (auto& xpath_node : node_set) { + auto node_path = lnav::pugixml::get_actual_path(xpath_node.node()); + for (auto& attr : xpath_node.node().attributes()) { + auto attr_path = fmt::format( + "{}/@{}", node_path, attr.name()); + + this->ldh_xml_pairs[ + std::make_pair(col_name, attr_path)] = + attr.value(); + } + + if (xpath_node.node().text().empty()) { + continue; + } + + auto text_path = fmt::format( + "{}/text()", node_path); + this->ldh_xml_pairs[std::make_pair(col_name, text_path)] = + trim(xpath_node.node().text().get()); + } + } + break; + } + default: + break; + } + } + + retval = true; + } + + return retval; +} + +int log_data_helper::get_line_bounds(size_t &line_index_out, + size_t &line_end_index_out) const +{ + int retval = 0; + + line_end_index_out = 0; + do { + const char *line_end; + + line_index_out = line_end_index_out; + line_end = (const char *)memchr( + this->ldh_msg.get_data() + line_index_out + 1, + '\n', + this->ldh_msg.length() - line_index_out - 1); + if (line_end != nullptr) { + line_end_index_out = line_end - this->ldh_msg.get_data(); + } else { + line_end_index_out = std::string::npos; + } + retval += 1; + } while (retval <= this->ldh_y_offset); + + if (line_end_index_out == std::string::npos) { + line_end_index_out = this->ldh_msg.length(); + } + + return retval; +} + +std::string +log_data_helper::format_json_getter(const intern_string_t field, int index) +{ + auto_mem qname; + auto_mem jget; + std::string retval; + + qname = sql_quote_ident(field.get()); + jget = sqlite3_mprintf("jget(%s,%Q)", qname.in(), + this->ldh_json_pairs[field][index].wt_ptr.c_str()); + retval = std::string(jget); + + return retval; +} diff --git a/src/log_data_helper.hh b/src/log_data_helper.hh index dcffdf30..df32d0fe 100644 --- a/src/log_data_helper.hh +++ b/src/log_data_helper.hh @@ -44,6 +44,7 @@ #include "yajlpp/json_ptr.hh" #include "base/lnav_log.hh" #include "sql_util.hh" +#include "xml_util.hh" class log_data_helper { @@ -54,117 +55,15 @@ public: }; - void clear() { - this->ldh_file = nullptr; - this->ldh_msg.disown(); - this->ldh_parser.reset(); - this->ldh_scanner.reset(); - this->ldh_namer.reset(); - this->ldh_json_pairs.clear(); - this->ldh_line_attrs.clear(); - }; + void clear(); bool parse_line(vis_line_t line, bool allow_middle = false) { return this->parse_line(this->ldh_log_source.at(line), allow_middle); } - bool parse_line(content_line_t line, bool allow_middle = false) { - logfile::iterator ll; - bool retval = false; - - this->ldh_source_line = this->ldh_line_index = line; - - this->ldh_file = this->ldh_log_source.find(this->ldh_line_index); - ll = this->ldh_file->begin() + this->ldh_line_index; - this->ldh_y_offset = 0; - while (allow_middle && ll->is_continued()) { - --ll; - this->ldh_y_offset += 1; - } - this->ldh_line = ll; - if (!ll->is_message()) { - this->ldh_parser.reset(); - this->ldh_scanner.reset(); - this->ldh_namer.reset(); - this->ldh_json_pairs.clear(); - this->ldh_line_attrs.clear(); - } - else { - auto format = this->ldh_file->get_format(); - struct line_range body; - auto& sa = this->ldh_line_attrs; - - this->ldh_line_attrs.clear(); - this->ldh_line_values.clear(); - this->ldh_file->read_full_message(ll, this->ldh_msg); - format->annotate(this->ldh_line_index, this->ldh_msg, sa, this->ldh_line_values); - - body = find_string_attr_range(sa, &SA_BODY); - if (body.lr_start == -1) { - body.lr_start = this->ldh_msg.length(); - body.lr_end = this->ldh_msg.length(); - } - this->ldh_scanner = std::make_unique( - this->ldh_msg, body.lr_start, body.lr_end); - this->ldh_parser = std::make_unique(this->ldh_scanner.get()); - this->ldh_msg_format.clear(); - this->ldh_parser->dp_msg_format = &this->ldh_msg_format; - this->ldh_parser->parse(); - this->ldh_namer = std::make_unique(); - this->ldh_json_pairs.clear(); + bool parse_line(content_line_t line, bool allow_middle = false); - for (const auto& lv : this->ldh_line_values) { - this->ldh_namer->cn_builtin_names.emplace_back(lv.lv_meta.lvm_name.get()); - } - - for (auto & ldh_line_value : this->ldh_line_values) { - switch (ldh_line_value.lv_meta.lvm_kind) { - case value_kind_t::VALUE_JSON: { - json_ptr_walk jpw; - - if (jpw.parse(ldh_line_value.lv_sbr.get_data(), ldh_line_value.lv_sbr.length()) == yajl_status_ok && - jpw.complete_parse() == yajl_status_ok) { - this->ldh_json_pairs[ldh_line_value.lv_meta.lvm_name] = jpw.jpw_values; - } - break; - } - default: - break; - } - } - - retval = true; - } - - return retval; - }; - - int get_line_bounds(size_t &line_index_out, size_t &line_end_index_out) const { - int retval = 0; - - line_end_index_out = 0; - do { - const char *line_end; - - line_index_out = line_end_index_out; - line_end = (const char *)memchr( - this->ldh_msg.get_data() + line_index_out + 1, - '\n', - this->ldh_msg.length() - line_index_out - 1); - if (line_end != nullptr) { - line_end_index_out = line_end - this->ldh_msg.get_data(); - } else { - line_end_index_out = std::string::npos; - } - retval += 1; - } while (retval <= this->ldh_y_offset); - - if (line_end_index_out == std::string::npos) { - line_end_index_out = this->ldh_msg.length(); - } - - return retval; - }; + int get_line_bounds(size_t &line_index_out, size_t &line_end_index_out) const; int get_value_line(const logline_value &lv) const { return std::count(this->ldh_msg.get_data(), @@ -172,18 +71,7 @@ public: '\n'); }; - std::string format_json_getter(const intern_string_t field, int index) { - auto_mem qname; - auto_mem jget; - std::string retval; - - qname = sql_quote_ident(field.get()); - jget = sqlite3_mprintf("jget(%s,%Q)", qname.in(), - this->ldh_json_pairs[field][index].wt_ptr.c_str()); - retval = std::string(jget); - - return retval; - }; + std::string format_json_getter(const intern_string_t field, int index); logfile_sub_source &ldh_log_source; content_line_t ldh_source_line; @@ -198,6 +86,7 @@ public: string_attrs_t ldh_line_attrs; std::vector ldh_line_values; std::map ldh_json_pairs; + std::map, std::string> ldh_xml_pairs; std::string ldh_msg_format; }; diff --git a/src/log_format.cc b/src/log_format.cc index 8d39381c..362d9f4b 100644 --- a/src/log_format.cc +++ b/src/log_format.cc @@ -111,6 +111,7 @@ logline_value::logline_value(logline_value_meta lvm, shared_buffer_ref &sbr, switch (this->lv_meta.lvm_kind) { case value_kind_t::VALUE_JSON: + case value_kind_t::VALUE_XML: case value_kind_t::VALUE_STRUCT: case value_kind_t::VALUE_TEXT: case value_kind_t::VALUE_QUOTED: @@ -163,6 +164,7 @@ std::string logline_value::to_string() const return "null"; case value_kind_t::VALUE_JSON: + case value_kind_t::VALUE_XML: case value_kind_t::VALUE_STRUCT: case value_kind_t::VALUE_TEXT: case value_kind_t::VALUE_TIMESTAMP: diff --git a/src/log_format.hh b/src/log_format.hh index c1e00811..bb8fb552 100644 --- a/src/log_format.hh +++ b/src/log_format.hh @@ -105,6 +105,7 @@ enum class value_kind_t : int { VALUE_QUOTED, VALUE_W3C_QUOTED, VALUE_TIMESTAMP, + VALUE_XML, VALUE__MAX }; diff --git a/src/log_format_loader.cc b/src/log_format_loader.cc index 13ea1cb0..68da8891 100644 --- a/src/log_format_loader.cc +++ b/src/log_format_loader.cc @@ -441,6 +441,7 @@ static const json_path_handler_base::enum_value_t KIND_ENUM[] = { {"json", value_kind_t::VALUE_JSON}, {"struct", value_kind_t::VALUE_STRUCT}, {"quoted", value_kind_t::VALUE_QUOTED}, + {"xml", value_kind_t::VALUE_XML}, json_path_handler_base::ENUM_TERMINATOR }; diff --git a/src/log_vtab_impl.cc b/src/log_vtab_impl.cc index 2ad45da2..154adf7c 100644 --- a/src/log_vtab_impl.cc +++ b/src/log_vtab_impl.cc @@ -143,6 +143,7 @@ pair log_vtab_impl::logline_value_to_sqlite_type(value_kind_t case value_kind_t::VALUE_QUOTED: case value_kind_t::VALUE_W3C_QUOTED: case value_kind_t::VALUE_TIMESTAMP: + case value_kind_t::VALUE_XML: type = SQLITE3_TEXT; break; case value_kind_t::VALUE_FLOAT: @@ -627,6 +628,7 @@ static int vt_column(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int col) } case value_kind_t::VALUE_STRUCT: case value_kind_t::VALUE_TEXT: + case value_kind_t::VALUE_XML: case value_kind_t::VALUE_TIMESTAMP: { sqlite3_result_text(ctx, lv_iter->text_value(), diff --git a/src/xml_util.cc b/src/xml_util.cc new file mode 100644 index 00000000..4e6608dd --- /dev/null +++ b/src/xml_util.cc @@ -0,0 +1,81 @@ +/** + * Copyright (c) 2021, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "fmtlib/fmt/format.h" + +#include "xml_util.hh" + +namespace lnav { +namespace pugixml { + +std::string get_actual_path(const pugi::xml_node &node) +{ + std::string retval; + auto curr = node; + + while (curr) { + switch (curr.type()) { + case pugi::node_null: + break; + case pugi::node_pcdata: + retval += "text()"; + break; + default: { + auto name = std::string(curr.name()); + + if (curr.previous_sibling(curr.name()) || + curr.next_sibling(curr.name())) { + auto sibling = curr; + int index = 0; + + while (sibling) { + index += 1; + sibling = sibling.previous_sibling(curr.name()); + } + + name = fmt::format("{}[{}]", name, index); + } + if (retval.empty()) { + retval = name; + } else { + retval = fmt::format("{}/{}", name, retval); + } + break; + } + } + curr = curr.parent(); + } + + return retval; +} + +} +} diff --git a/src/xml_util.hh b/src/xml_util.hh new file mode 100644 index 00000000..355bedbe --- /dev/null +++ b/src/xml_util.hh @@ -0,0 +1,45 @@ +/** + * Copyright (c) 2021, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef lnav_xml_util_hh +#define lnav_xml_util_hh + +#include + +#include "pugixml/pugixml.hpp" + +namespace lnav { +namespace pugixml { + +std::string get_actual_path(const pugi::xml_node &node); + +} +} + +#endif diff --git a/src/xpath_vtab.cc b/src/xpath_vtab.cc index c06b7fe5..bf552314 100644 --- a/src/xpath_vtab.cc +++ b/src/xpath_vtab.cc @@ -35,6 +35,7 @@ #include "base/lnav_log.hh" #include "pugixml/pugixml.hpp" #include "sql_util.hh" +#include "xml_util.hh" #include "vtab_module.hh" #include "yajlpp/yajlpp.hh" @@ -85,48 +86,6 @@ void checkin_query(const std::string& query_str, pugi::xpath_query query) QUERY_CACHE[query_str] = std::move(query); } -static -std::string get_actual_path(const pugi::xml_node& node) -{ - std::string retval; - auto curr = node; - - while (curr) { - switch (curr.type()) { - case pugi::node_null: - break; - case pugi::node_pcdata: - retval += "text()"; - break; - default: { - auto name = std::string(curr.name()); - - if (curr.previous_sibling(curr.name()) || - curr.next_sibling(curr.name())) { - auto sibling = curr; - int index = 0; - - while (sibling) { - index += 1; - sibling = sibling.previous_sibling(curr.name()); - } - - name += "[" + std::to_string(index) + "]"; - } - if (retval.empty()) { - retval = name; - } else { - retval = name + std::string("/") + retval; - } - break; - } - } - curr = curr.parent(); - } - - return retval; -} - struct xpath_vtab { static constexpr const char *NAME = "xpath"; static constexpr const char *CREATE_STMT = R"( @@ -218,7 +177,7 @@ CREATE TABLE xpath ( x_node = xpath_node.parent(); } - auto node_path = get_actual_path(x_node); + auto node_path = lnav::pugixml::get_actual_path(x_node); if (x_attr) { node_path += "/@" + std::string(x_attr.name()); } diff --git a/test/Makefile.am b/test/Makefile.am index bb01f758..776619af 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -292,6 +292,7 @@ dist_noinst_DATA = \ vt52_curses_input.1 \ vt52_curses_output.0 \ vt52_curses_output.1 \ + xpath_tui.0 \ formats/collision/format.json \ formats/customlevel/format.json \ formats/jsontest/format.json \ diff --git a/test/formats/xmlmsg/format.json b/test/formats/xmlmsg/format.json index 3640df59..9ee1eabe 100644 --- a/test/formats/xmlmsg/format.json +++ b/test/formats/xmlmsg/format.json @@ -35,7 +35,7 @@ "hidden": true }, "msg_data": { - "kind": "string" + "kind": "xml" } }, "highlights": { diff --git a/test/logfile_xml_msg.0 b/test/logfile_xml_msg.0 index fe1632bc..010dac64 100644 --- a/test/logfile_xml_msg.0 +++ b/test/logfile_xml_msg.0 @@ -9,7 +9,7 @@ x - + x @@ -22,7 +22,7 @@ x - + OK diff --git a/test/test_format_loader.sh b/test/test_format_loader.sh index 9694b118..43f8c185 100644 --- a/test/test_format_loader.sh +++ b/test/test_format_loader.sh @@ -50,6 +50,7 @@ error:format.json:line 18 json struct quoted + xml error:bad_regex_log.regex[std]:missing ) error:bad_regex_log.regex[std]:^(?\d+: (?.*)$ error:bad_regex_log.regex[std]: ^ diff --git a/test/test_tui.sh b/test/test_tui.sh index 2fcc85b4..56386b33 100644 --- a/test/test_tui.sh +++ b/test/test_tui.sh @@ -18,3 +18,10 @@ for fn in ${srcdir}/tui-captures/*; do ;; esac done + +run_test ./scripty -e ${srcdir}/xpath_tui.0 -- \ + ${lnav_test} -I ${test_dir} \ + -c ':goto 2' \ + ${srcdir}/logfile_xml_msg.0 + +on_error_fail_with "xpath() fields are not working?" diff --git a/test/xpath_tui.0 b/test/xpath_tui.0 new file mode 100644 index 00000000..76f18322 --- /dev/null +++ b/test/xpath_tui.0 @@ -0,0 +1,571 @@ +CSI Don't Send Mouse X & Y +CSI Don’t Use Cell Motion Mouse Tracking +CSI Don't ... +CTRL Use alt charset +CTRL save cursor +CSI Use alternate screen buffer +CSI set scrolling region 1-24 +S -1 ┋ ┋ +A └ normal +CSI Reset Replace mode +CSI Application cursor keys +CTRL = +OSC Set window title: LOG +S -1 ┋ ┋ +A └ normal, normal, normal +CSI Erase all +S 1 ┋ Thu Jun 06 1 :: :: LOG ┋ +A └ fg(#c0c0c0), bg(#008080) +S 2 ┋ x┋ +A ···············································································├ normal +A └┛ alt +A ················································································└ normal +S 3 ┋ x┋ +A └┛ alt +A ················································································└ normal +S 4 ┋ x┋ +A └┛ alt +A ················································································└ normal +S 5 ┋ x┋ +A └┛ alt +A ················································································└ normal +S 6 ┋ x┋ +A └┛ alt +A ················································································└ normal +S 7 ┋ x┋ +A └┛ alt +A ················································································└ normal +S 8 ┋ x┋ +A └┛ alt +A ················································································└ normal +S 9 ┋ x┋ +A └┛ alt +A ················································································└ normal +S 10 ┋ x┋ +A └┛ alt +A ················································································└ normal +S 11 ┋ x┋ +A └┛ alt +A ················································································└ normal +S 12 ┋ x┋ +A └┛ alt +A ················································································└ normal +S 13 ┋ x┋ +A └┛ alt +A ················································································└ normal +S 14 ┋ x┋ +A └┛ alt +A ················································································└ normal +S 15 ┋ x┋ +A └┛ alt +A ················································································└ normal +S 16 ┋ x┋ +A └┛ alt +A ················································································└ normal +S 17 ┋ lqqqq No log messages; Log Files: 0; Text Files: 1; Error rate: 0.00/min; Tix┋ +A └----┛ alt │ │ │ │ │ │ │ │ ││ +A ···································└ bold │ │ │ │ │ │ ││ +A ·····································└ normal │ │ │ │ │ │ ││ +A ···················································└ bold │ │ │ ││ +A ·····················································└ normal │ │ │ ││ +A ·······················································└ fg(#800000), bold ││ +A ·································································└ normal ││ +A ···································································└ bold ││ +A ·······································································└ normal││ +A └┛ alt +A ················································································└ normal +S 18 ┋ Files :: Text Filters :: Press q to exit ┋ +A └ fg(#c0c0c0), bg(#000080), bold ││ +A ·└ fg(#008080), bg(#000080), underline ││ +A ··└ normal, fg(#c0c0c0), bg(#000080), bold ││ +A ·······└ normal, fg(#c0c0c0), bg(#000080) ││ +A ········└ fg(#000080), bg(#c0c0c0) ││ +A ·········└ fg(#000000), bg(#c0c0c0), bold ││ +A ··········└ fg(#800080), bg(#c0c0c0), underline ││ +A ···········└ normal, fg(#000000), bg(#c0c0c0), bold ││ +A ·······················└ normal, fg(#000000), bg(#c0c0c0) ││ +A ······································································└ bold +A ·······································································└ normal, fg(#000000), bg(#c0c0c0) +S 19 ┋ ┋ +S 20 ┋→ ` logfile_xml_msg.0 0.0 B — x┋ +A ··├ fg(#008000), bg(#c0c0c0) │ │ │ +A └┛ alt │ │ │ │ +A ···└ fg(#000000), bg(#c0c0c0) │ │ │ +A ························└ fg(#c0c0c0), bg(#c0c0c0) │ +A ······························└ fg(#000000), bg(#c0c0c0), bold │ +A ································└ normal, fg(#000000), bg(#c0c0c0) │ +A ················································································└ normal +A └┛ alt +A ················································································└ normal +S 21 ┋ x┋ +A ···············································································└ fg(#000000), bg(#c0c0c0) +A ················································································└ normal +A └┛ alt +A ················································································└ normal +S 22 ┋ x┋ +A ···············································································└ fg(#000000), bg(#c0c0c0) +A ················································································└ normal +A └┛ alt +A ················································································└ normal +S 23 ┋ L0 0% ?:View Help ┋ +A └ fg(#000000), bg(#c0c0c0) +S 22 ┋ ┋ +A └ normal, normal +S 19 ┋ SPC: Hide ENTER: Jump To ┋ +A ··└ fg(#000000), bg(#c0c0c0), bold +A ·····└ normal, fg(#000000), bg(#c0c0c0) +A ·············└ bold +A ··················└ normal, fg(#000000), bg(#c0c0c0) +S 20 ┋ 64.0 B 2020-12-10 06:56:41.061 — 2020-12-10 06:56:41. ┋ +A ···························└ backspace │ +A ··························└ fg(#c0c0c0), bg(#c0c0c0) │ +A ······························└ fg(#000000), bg(#c0c0c0), bold │ +A ································└ normal, fg(#000000), bg(#c0c0c0) │ +A ···············································································└ carriage-return +S 22 ┋ ┋ +A └ normal, normal +S 20 ┋ 628 ┋ +A ·························└ fg(#c0c0c0), bg(#c0c0c0) +A ····························└ carriage-return +S 22 ┋ ┋ +A └ normal, normal +OSC Set window title: /Users/stackt/github/lnav/test/logfile_xml_msg.0 +S 1 ┋ logfile_xml_msg.0 ┋ +A ·······························└ fg(#c0c0c0), bg(#008080) +S 1 ┋ xml_msg_log ┋ +A ······································································└ carriage-return +S 2 ┋x x ┋ +A ├ normal, bold +A └┛ alt│ +A ·└ normal +A ······└ carriage-return +S 3 ┋x ┋ +A ├ bold ││ +A └┛ alt ││ +A ·└ normal││ +A ·····└ bold +A ·········└ normal +A ··········└ carriage-return +S 4 ┋x ┋ +A ├ bold ││ ││ ││ +A └┛ alt ││ ││ ││ +A ·└ normal││ ││ ││ +A ····└ bold│ ││ ││ +A ·········└ normal│ +A ··········└ fg(#008080) +A ············└ normal +A ·············└ fg(#008000), bold +A ················└ normal +A ·················└ carriage-return +S 5 ┋x ┋ +A ├ bold│ ││ +A └┛ alt│ ││ +A ·└ normal ││ +A ······└ bold││ +A ············└ normal +A ·············└ carriage-return +S 6 ┋x OK ┋ +A ├ bold ││ │ │ ││ +A └┛ alt ││ │ │ ││ +A ·└ normal │ │ ││ +A ·······└ bold, normal ││ +A ········└ bold│ │ ││ +A ··············└ normal ││ +A ···················└ bold││ +A ·························└ normal +A ··························└ carriage-return +S 7 ┋x ┋ +A ├ bold │ ││ +A └┛ alt │ ││ +A ·└ normal ││ +A ·······└ bold││ +A ·············└ normal +A ··············└ carriage-return +S 8 ┋x ┋ +A ├ bold│ ││ +A └┛ alt│ ││ +A ·└ normal ││ +A ······└ bold +A ··········└ normal +A ···········└ carriage-return +S 9 ┋x x ┋ +A ├ bold ││ +A └┛ alt ││ +A ·└ normal +A ·······└ bold, normal +A ········└ carriage-return +S 10 ┋x ┋ +A ├ bold │ ││ +A └┛ alt │ ││ +A ·└ normal ││ +A ·······└ bold +A ···········└ normal +A ············└ carriage-return +S 11 ┋x x┋ +A ├ bold │ ││ +A └┛ alt │ ││ +A ·└ normal │ ││ +A ·····└ bold ││ +A ··········└ normal ││ +A ···············································································└ fg(#000000), bg(#c0c0c0) +A ················································································└ normal +A └┛ alt +A ················································································└ normal +S 12 ┋x x┋ +A ├ fg(#000000), bg(#c0c0c0), normal, bold ││ +A └┛ alt │ ││ +A ·└ normal │ ││ +A ····└ bold │ ││ +A ···················└ normal ││ +A ···············································································└ fg(#000000), bg(#c0c0c0) +A ················································································└ normal +A └┛ alt +A ················································································└ normal +S 13 ┋x x x┋ +A ├ fg(#000000), bg(#c0c0c0), normal, bold ││ +A └┛ alt ││ +A ·└ normal ││ +A ···············································································└ fg(#000000), bg(#c0c0c0) +A ················································································└ normal +A └┛ alt +A ················································································└ normal +S 14 ┋x x┋ +A ├ fg(#000000), bg(#c0c0c0), normal, bold ││ +A └┛ alt │ ││ +A ·└ normal │ ││ +A ·····└ bold │ ││ +A ····················└ normal ││ +A ···············································································└ fg(#000000), bg(#c0c0c0) +A ················································································└ normal +A └┛ alt +A ················································································└ normal +S 15 ┋x x┋ +A ├ fg(#000000), bg(#c0c0c0), normal, bold ││ +A └┛ alt │ ││ +A ·└ normal │ ││ +A ···└ bold │ ││ +A ··········└ normal ││ +A ···············································································└ fg(#000000), bg(#c0c0c0) +A ················································································└ normal +A └┛ alt +A ················································································└ normal +S 16 ┋ x┋ +A ···············································································└ fg(#000000), bg(#c0c0c0) +A ················································································└ normal +A └┛ alt +A ················································································└ normal +S 17 ┋ Last message: in the future; Files: 1; Error rate: 0.00/min; Time span:x┋ +A ·······└ fg(#000000), bg(#c0c0c0), normal │ │ │ │ │ │ ││ +A ·····················└ bold │ │ │ │ │ │ │ ││ +A ··································└ normal │ │ │ │ │ │ ││ +A ···········································└ bold │ │ │ ││ +A ·············································└ normal │ │ │ ││ +A ···············································└ fg(#800000), bold ││ +A ·························································└ normal ││ +A ···························································└ bold ││ +A ·······························································└ normal ││ +A ···············································································└ fg(#000000), bg(#c0c0c0) +A ················································································└ normal +A └┛ alt +A ················································································└ normal +S 23 ┋ 22 10 ┋ +A ··└ fg(#000000), bg(#c0c0c0) +A ·············└ carriage-return +S 22 ┋ ┋ +A └ normal, normal +CSI Erase Below +S 1 ┋ Thu Jun 06 1 logfile_xml_msg.0:: xml_msg_log:: LOG ┋ +A └ fg(#000000), bg(#c0c0c0) │││ │││ +A ················································└ fg(#008080), bg(#c0c0c0) +A ·················································└ fg(#c0c0c0), bg(#008080) +A ··················································└ fg(#000000), bg(#008080) +A ······································································└ fg(#000080), bg(#008080) +A ·······································································└ fg(#008080), bg(#000080) +A ········································································└ fg(#c0c0c0), bg(#000080), bold +S 2 ┋ [2020-12-10 06:56:41,092] DEBUG [connect.client:69] Full request text: ┋ +A ·└ normal │ │ +A ··································└ bold │ +A ················································└ normal +S 3 ┋ x┋ +A ··└ bold ││ ││ ││ │ ││ +A ······└ normal││ ││ ││ │ ││ +A ·······└ fg(#008080)││ ││ │ ││ +A ··············└ normal ││ │ ││ +A ···············└ fg(#008000), bold │ ││ +A ····················└ normal ││ │ ││ +A ·····················└ fg(#008080) │ ││ +A ·····························└ normal │ ││ +A ······························└ fg(#008000), bold ││ +A ··········································└ normal ││ +A ···············································································└ fg(#000000), bg(#c0c0c0) +A ················································································└ normal +A └┛ alt +A ················································································└ normal +S 4 ┋ x┋ +A ·└ fg(#000000), bg(#c0c0c0), normal ││ +A ··└ bold │ ││ +A ···········└ normal ││ +A ···············································································└ fg(#000000), bg(#c0c0c0) +A ················································································└ normal +A └┛ alt +A ················································································└ normal +S 5 ┋ x┋ +A ···└ fg(#000000), bg(#c0c0c0), normal ││ +A ····└ bold ││ +A ········└ normal ││ +A ···············································································└ fg(#000000), bg(#c0c0c0) +A ················································································└ normal +A └┛ alt +A ················································································└ normal +S 6 ┋ x x┋ +A ·····└ fg(#000000), bg(#c0c0c0), normal ││ +A ···············································································└ fg(#000000), bg(#c0c0c0) +A ················································································└ normal +A └┛ alt +A ················································································└ normal +S 7 ┋ x┋ +A ···└ fg(#000000), bg(#c0c0c0), normal ││ +A ·····└ bold ││ +A ·········└ normal ││ +A ···············································································└ fg(#000000), bg(#c0c0c0) +A ················································································└ normal +A └┛ alt +A ················································································└ normal +S 8 ┋ ┋ +A ····└ fg(#000000), bg(#c0c0c0), normal, bold +A ···········└ normal +A ············└ fg(#008080) +A ··············└ normal +A ···············└ fg(#008000), bold +A ··················└ normal +S 12 ┋ ┋ +A ······└ bold +A ··········└ normal +S 13 ┋ x ┋ +S 14 ┋ ┋ +A ·······└ bold +A ···········└ normal +S 15 ┋ x┋ +A ·····└ bold │ ││ +A ············└ normal ││ +A └┛ alt +A ················································································└ normal +S 16 ┋x x┋ +A ├ bold │ ││ +A └┛ alt │ ││ +A ·└ normal │ ││ +A ···└ bold │ ││ +A ············└ normal ││ +A └┛ alt +A ················································································└ normal +S 17 ┋x x┋ +A ├ bold ││ +A └┛ alt ││ +A ·└ normal ││ +A └┛ alt +A ················································································└ normal +S 18 ┋x[2020-12-10 06:56:41,099] DEBUG [m:85] Full reply text: x┋ +A ├ bold ││ ││ +A └┛ alt ││ ││ +A ·└ normal ││ ││ +A ··································└ bold ││ +A ···································└ normal ││ +A └┛ alt +A ················································································└ normal +S 19 ┋x x┋ +A ├ bold││ ││ ││ ││ │ ││ +A └┛ alt││ ││ ││ ││ │ ││ +A ·└ normal ││ ││ ││ │ ││ +A ··└ bold ││ ││ ││ │ ││ +A ······└ normal││ ││ ││ │ ││ +A ·······└ fg(#008080)││ ││ │ ││ +A ··············└ normal ││ │ ││ +A ···············└ fg(#008000), bold │ ││ +A ····················└ normal ││ │ ││ +A ·····················└ fg(#008080) │ ││ +A ·····························└ normal │ ││ +A ······························└ fg(#008000), bold ││ +A ··········································└ normal ││ +A └┛ alt +A ················································································└ normal +S 20 ┋x x┋ +A ├ bold │ ││ +A └┛ alt │ ││ +A ·└ normal│ ││ +A ··└ bold │ ││ +A ·········└ normal ││ +A └┛ alt +A ················································································└ normal +S 21 ┋x x┋ +A ├ bold │ ││ +A └┛ alt │ ││ +A ·└ normal ││ +A ····└ bold ││ +A ········└ normal ││ +A └┛ alt +A ················································································└ normal +S 22 ┋ Files :: Text Filters :: Press TAB to edit ┋ +A └ fg(#c0c0c0), bg(#008080) │ │ +A ····································································└ bold +A ·······································································└ normal, fg(#c0c0c0), bg(#008080) +S 23 ┋ 61 ┋ +A ···└ fg(#000000), bg(#c0c0c0) +A ··············└ carriage-return +S 24 ┋ ┋ +A └ normal, normal +K 70 +S 3 ┋ Received Time: 2020-12-10T06:56:41.092 -- in the future ┋ +A ················└ bold │ │ │ +A ·······································└ normal │ +A ···········································└ bold │ +A ························································└ carriage-return +S 4 ┋ Pattern: xml_msg_log/regex/std = ^\[(?\d{4}-\d{2}-\d{2} \d{2}:\d{2} ┋ +A └ normal ││ │ │ │ │││││ │││││ │││││ │││││ ││││ +A ··································└ fg(#008080), bold││││ │││││ │││││ │││││ ││││ +A ···································└ normal │ │││││ │││││ │││││ │││││ ││││ +A ·····································└ fg(#008000), bold│ │││││ │││││ │││││ ││││ +A ········································└ normal │ │││││ │││││ │││││ │││││ ││││ +A ··················································└ fg(#000080), bold │││││ ││││ +A ····················································└ fg(#008000)││││ │││││ ││││ +A ·····················································└ normal││ │││││ │││││ ││││ +A ······················································└ fg(#008000), bold││ ││││ +A ·······················································└ normal │││││ │││││ ││││ +A ························································└ fg(#000080), bold ││││ +A ··························································└ fg(#008000)││││ ││││ +A ···························································└ normal││ │││││ ││││ +A ····························································└ fg(#008000), bold│ +A ·····························································└ normal │││││ ││││ +A ······························································└ fg(#000080), bold +A ································································└ fg(#008000)│││ +A ·································································└ normal││ ││││ +A ··································································└ fg(#008000), bold +A ···································································└ normal ││││ +A ····································································└ fg(#000080), bold +A ······································································└ fg(#008000) +A ·······································································└ normal│ +A ········································································└ fg(#008000), bold +A ·········································································└ normal +A ··········································································└ fg(#000080), bold +A ············································································└ fg(#008000) +A ·············································································└ normal +A ··············································································└ fg(#008000), bold +A ···············································································└ carriage-return +S 5 ┋ Known message fields for table xml_msg_log: ┋ +A └ normal │ ││ +A ································└ bold ││ +A ···········································└ normal +A ············································└ carriage-return +S 6 ┋ t timestamp = 2020-12-10 06:56:41,092 ┋ +A └┛ alt │ │ │ +A ···└ bold │ │ │ +A ············└ normal │ +A ···············└ bold │ +A ···············································································└ carriage-return +S 7 ┋ t level = DEBUG ┋ +A └ normal│ │ │ +A └┛ alt │ │ │ +A ···└ bold │ │ +A ········└ normal │ +A ···············└ bold │ +A ···············································································└ carriage-return +S 8 ┋ t module = connect.client ┋ +A └ normal │ │ │ +A └┛ alt │ │ │ +A ···└ bold│ │ │ +A ·········└ normal │ +A ···············└ bold │ +A ···············································································└ carriage-return +S 9 ┋ t line = 69 ┋ +A └ normal │ │ │ +A └┛ alt │ │ │ +A ···└ bold │ │ │ +A ·············└ normal │ +A ···············└ bold │ +A ···············································································└ carriage-return +S 10 ┋ t body = Full request text: ┋ +A └ normal │ │ +A └┛ alt│ │ │ +A ···└ bold │ │ +A ·······└ normal│ │ +A ···············└ bold │ +A ···············································································└ carriage-return +S 11 ┋ t msg_data = ┋ +A └ normal │ │ │ +A └┛ alt │ │ │ +A ···└ bold │ │ │ +A ···········└ normal │ +A ···············└ bold │ +A ···············································································└ carriage-return +S 12 ┋ XML fields: ┋ +A └ normal │ +A ············└ carriage-return +S 13 ┋ t xpath('/a-request/head/text()', msg_data) = x ┋ +A └┛ alt │ +A ······└ bold │ +A ···············································································└ carriage-return +S 14 ┋ t xpath('/a-request/request/@id', msg_data) = 1 ┋ +A └ normal │ +A └┛ alt │ +A ······└ bold │ +A ···············································································└ carriage-return +S 15 ┋ t xpath('/a-request/request/name/text()', msg_data) = x ┋ +A └ normal │ +A └┛ alt │ +A ······└ bold │ +A ···············································································└ carriage-return +S 16 ┋ t xpath('/a-request/source/text()', msg_data) = x ┋ +A └ normal │ +A └┛ alt │ +A ······└ bold │ +A ···············································································└ carriage-return +S 17 ┋ No discovered message fields ┋ +A └ normal +S 18 ┋ ┋ +A ··└ bold ││ ││ ││ │ +A ······└ normal││ ││ ││ │ +A ·······└ fg(#008080)││ ││ │ +A ··············└ normal ││ │ +A ···············└ fg(#008000), bold │ +A ····················└ normal ││ │ +A ·····················└ fg(#008080) │ +A ·····························└ normal │ +A ······························└ fg(#008000), bold +A ··········································└ normal +S 19 ┋ a-request> ┋ +A ··└ bold │ +A ···········└ normal +S 20 ┋ ┋ +A ····└ bold +A ········└ normal +S 21 ┋ x ┋ +A ·········└ carriage-return +S 24 ┋ ┋ +A └ normal +K 71 +CSI Erase all +CSI Use normal screen buffer +CTRL restore cursor +S 24 ┋ ┋ +A └ carriage-return +CSI Normal cursor keys +CTRL Normal keypad