diff --git a/NEWS.md b/NEWS.md index 219b039e..be5e1610 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,6 +14,15 @@ Features: internally, they will not interfere with timestamps that are in the commands output. * Added a `:cd` command to change **lnav**'s current directory. +* Added support for automatically converting files that are + in a format not natively supported by **lnav**. The + `/tuning/file-format` section allows you to define a format + that is detected by examining the file header. The format + must also provide a converter script that can be run to + convert the file. You can then define a log format that + matches the MIME type defined by the file format and can + consume the converted form of the file. The built-in PCAP + support in **lnav** is implemented using this mechanism. Bug Fixes: * When piping data into **lnav**'s stdin, the input used to diff --git a/docs/schemas/config-v1.schema.json b/docs/schemas/config-v1.schema.json index 1019772d..0c7001b8 100644 --- a/docs/schemas/config-v1.schema.json +++ b/docs/schemas/config-v1.schema.json @@ -73,6 +73,59 @@ }, "additionalProperties": false }, + "file-format": { + "description": "sdfjdls", + "title": "/tuning/file-format", + "type": "object", + "patternProperties": { + "(\\w+~1[\\w\\.\\-]+)": { + "description": "File format definitions, keyed by their MIME type", + "title": "/tuning/file-format/", + "type": "object", + "properties": { + "title": { + "title": "/tuning/file-format//title", + "description": "The display name for this file format", + "type": "string" + }, + "header": { + "description": "File header detection definitions", + "title": "/tuning/file-format//header", + "type": "object", + "properties": { + "expr": { + "description": "The expressions used to check if a file header matches this file format", + "title": "/tuning/file-format//header/expr", + "type": "object", + "patternProperties": { + "(\\w+)": { + "title": "/tuning/file-format//header/expr/", + "description": "SQLite expression", + "type": "string" + } + }, + "additionalProperties": false + }, + "size": { + "title": "/tuning/file-format//header/size", + "description": "The minimum size required for this header type", + "type": "integer" + } + }, + "additionalProperties": false + }, + "converter": { + "title": "/tuning/file-format//converter", + "description": "The script used to convert the file", + "type": "string", + "pattern": "[\\w\\.\\-]+" + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + }, "logfile": { "description": "Settings related to log files", "title": "/tuning/logfile", diff --git a/docs/schemas/format-v1.schema.json b/docs/schemas/format-v1.schema.json index fe781f95..ab449515 100644 --- a/docs/schemas/format-v1.schema.json +++ b/docs/schemas/format-v1.schema.json @@ -81,9 +81,7 @@ "type": "array", "items": { "type": "string", - "enum": [ - "application/vnd.tcpdump.pcap" - ] + "pattern": "^\\w/[\\w\\.]+" } }, "level-field": { diff --git a/docs/source/config.rst b/docs/source/config.rst index 771c5f36..8c0a0027 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -260,6 +260,10 @@ command. .. jsonschema:: ../schemas/config-v1.schema.json#/properties/tuning/properties/clipboard +.. _tuning_file_format: + +.. jsonschema:: ../schemas/config-v1.schema.json#/properties/tuning/properties/file-format + .. jsonschema:: ../schemas/config-v1.schema.json#/properties/tuning/properties/piper .. jsonschema:: ../schemas/config-v1.schema.json#/definitions/clip-commands diff --git a/docs/source/formats.rst b/docs/source/formats.rst index b30713df..c069c91c 100644 --- a/docs/source/formats.rst +++ b/docs/source/formats.rst @@ -133,6 +133,12 @@ object with the following fields: :json: True if each log line is JSON-encoded. +:mime-types: An array of MIME types that this log format should only be + used with. These MIME types refer to file formats that are defined + using the `Automatic File Conversion`_ feature. This property should + not be defined for log files that do not require conversion and can be + naturally parsed using regexes or are JSON-lines. + :line-format: An array that specifies the text format for JSON-encoded log messages. Log files that are JSON-encoded will have each message converted from the raw JSON encoding into this format. Each element @@ -559,3 +565,29 @@ will win. .. [#] The maximum number of lines to check can be configured. See the :ref:`tuning` section for more details. + +Automatic File Conversion +------------------------- + +File formats that are not naturally understood by **lnav** can be +automatically detected and converted to a usable form using the +:ref:`tuning_file_format` configuration options. For example, +PCAP files can be detected and converted to a JSON-lines form +using :code:`tshark`. The conversion process works as follows: + +#. The first 1024 bytes of the file are read, if available. +#. This header is converted into a hex string. +#. For each file-format, every "header expression" is evaluated + to see if there is a match. The header expressions are + SQLite expressions where the following variables are defined: + + :\:header: A string containing the header as a hex string. + :\:filepath: The path to the file. +#. If a match is found, the converter script defined in the + file format will be invoked and passed the format MIME type + and path to the file as arguments. The script should write + the converted form of the input file on its standard output. + Any errors should be written to the standard error. +#. The MIME type will be associated with the original file and + only log formats that have the corresponding type will be + used to interpret the file. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 79c6e06a..83aafb5a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -351,6 +351,7 @@ add_library( extension-functions.cc field_overlay_source.cc file_collection.cc + file_converter_manager.cc file_format.cc file_vtab.cc files_sub_source.cc @@ -391,7 +392,7 @@ add_library( data_scanner.cc data_scanner_re.cc data_parser.cc - pcap_manager.cc + file_converter_manager.cc plain_text_source.cc pretty_printer.cc pugixml/pugixml.cpp @@ -462,7 +463,9 @@ add_library( elem_to_json.hh field_overlay_source.hh file_collection.hh + file_converter_manager.hh file_format.hh + file_format.cfg.hh files_sub_source.hh filter_observer.hh filter_status_source.hh @@ -502,7 +505,7 @@ add_library( md2attr_line.hh md4cpp.hh optional.hpp - pcap_manager.hh + file_converter_manager.hh piper.looper.hh piper.looper.cfg.hh plain_text_source.hh diff --git a/src/Makefile.am b/src/Makefile.am index a6750404..a97ac930 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -201,7 +201,9 @@ noinst_HEADERS = \ environ_vtab.hh \ field_overlay_source.hh \ file_collection.hh \ + file_converter_manager.hh \ file_format.hh \ + file_format.cfg.hh \ file_vtab.cfg.hh \ files_sub_source.hh \ filter_observer.hh \ @@ -258,7 +260,6 @@ noinst_HEADERS = \ md2attr_line.hh \ md4cpp.hh \ optional.hpp \ - pcap_manager.hh \ piper.looper.hh \ piper.looper.cfg.hh \ plain_text_source.hh \ @@ -391,6 +392,7 @@ libdiag_a_SOURCES = \ extension-functions.cc \ field_overlay_source.cc \ file_collection.cc \ + file_converter_manager.cc \ file_format.cc \ files_sub_source.cc \ filter_observer.cc \ @@ -428,7 +430,6 @@ libdiag_a_SOURCES = \ md4cpp.cc \ network-extension-functions.cc \ data_parser.cc \ - pcap_manager.cc \ piper.looper.cc \ plain_text_source.cc \ pollable.cc \ diff --git a/src/column_namer.cc b/src/column_namer.cc index ba2d8729..8c263c26 100644 --- a/src/column_namer.cc +++ b/src/column_namer.cc @@ -50,7 +50,8 @@ column_namer::existing_name(const string_fragment& in_name) const auto upped = toupper(in_name.to_string()); if (std::binary_search( - std::begin(sql_keywords), std::end(sql_keywords), upped)) { + std::begin(sql_keywords), std::end(sql_keywords), upped)) + { return true; } break; @@ -99,10 +100,13 @@ column_namer::add_column(const string_fragment& in_name) this->cn_name_counters[counter_name] = num; } - log_debug( - "column name already exists: %.*s", retval.length(), retval.data()); fmt::format_to( std::back_inserter(buf), FMT_STRING("{}_{}"), base_name, num); + log_debug("column name already exists (%.*s), trying (%.*s)", + retval.length(), + retval.data(), + buf.size(), + buf.data()); retval = string_fragment::from_memory_buffer(buf); num += 1; } diff --git a/src/file_collection.cc b/src/file_collection.cc index a54b0b22..34745457 100644 --- a/src/file_collection.cc +++ b/src/file_collection.cc @@ -41,9 +41,9 @@ #include "base/opt_util.hh" #include "base/string_util.hh" #include "config.h" +#include "file_converter_manager.hh" #include "lnav_util.hh" #include "logfile.hh" -#include "pcap_manager.hh" #include "service_tags.hh" #include "tailer/tailer.looper.hh" @@ -134,7 +134,6 @@ file_collection::regenerate_unique_file_names() switch (pair.second.ofd_format) { case file_format_t::UNKNOWN: case file_format_t::ARCHIVE: - case file_format_t::PCAP: case file_format_t::SQLITE_DB: { auto bn = ghc::filesystem::path(pair.first).filename().string(); if (bn.length() > this->fc_largest_path_length) { @@ -336,63 +335,6 @@ file_collection::watch_logfile(const std::string& filename, retval.fc_other_files[filename].ofd_format = ff; break; - case file_format_t::PCAP: { - auto res = pcap_manager::convert(filename); - - if (res.isOk()) { - auto convert_res = res.unwrap(); - retval.fc_child_pollers.emplace_back(child_poller{ - std::move(convert_res.cr_child), - [filename, - st, - error_queue = convert_res.cr_error_queue]( - auto& fc, auto& child) { - if (child.was_normal_exit() - && child.exit_status() == EXIT_SUCCESS) - { - log_info("pcap[%d] exited normally", - child.in()); - return; - } - log_error("pcap[%d] exited with %d", - child.in(), - child.status()); - fc.fc_name_to_errors.emplace( - filename, - file_error_info{ - st.st_mtime, - fmt::format( - FMT_STRING("{}"), - fmt::join(*error_queue, "\n")), - }); - }, - }); - loo.with_stat_for_temp(st); - auto open_res - = logfile::open(convert_res.cr_destination, loo); - if (open_res.isOk()) { - retval.fc_files.push_back(open_res.unwrap()); - } else { - log_error("failed to open: %s -- %s", - filename.c_str(), - open_res.unwrapErr().c_str()); - retval.fc_name_to_errors.emplace( - filename, - file_error_info{ - st.st_mtime, - open_res.unwrapErr(), - }); - } - } else { - retval.fc_name_to_errors.emplace(filename, - file_error_info{ - st.st_mtime, - res.unwrapErr(), - }); - } - break; - } - case file_format_t::ARCHIVE: { nonstd::optional< std::list::iterator> @@ -462,10 +404,64 @@ file_collection::watch_logfile(const std::string& filename, break; } - default: + default: { + auto filename_to_open = filename; + + if (!loo.loo_temp_file) { + auto eff = detect_mime_type(filename); + + if (eff) { + auto cr = file_converter_manager::convert( + eff.value(), filename); + + if (cr.isErr()) { + retval.fc_name_to_errors.emplace( + filename, + file_error_info{ + st.st_mtime, + cr.unwrapErr(), + }); + break; + } + + auto convert_res = cr.unwrap(); + retval.fc_child_pollers.emplace_back(child_poller{ + std::move(convert_res.cr_child), + [filename, + st, + error_queue = convert_res.cr_error_queue]( + auto& fc, auto& child) { + if (child.was_normal_exit() + && child.exit_status() == EXIT_SUCCESS) + { + log_info( + "converter[%d] exited normally", + child.in()); + return; + } + log_error("converter[%d] exited with %d", + child.in(), + child.status()); + fc.fc_name_to_errors.emplace( + filename, + file_error_info{ + st.st_mtime, + fmt::format( + FMT_STRING("{}"), + fmt::join(*error_queue, "\n")), + }); + }, + }); + loo.with_filename(filename); + loo.with_stat_for_temp(st); + loo.loo_mime_type = eff->eff_mime_type; + filename_to_open = convert_res.cr_destination; + } + } + log_info("loading new file: filename=%s", filename.c_str()); - auto open_res = logfile::open(filename, loo); + auto open_res = logfile::open(filename_to_open, loo); if (open_res.isOk()) { retval.fc_files.push_back(open_res.unwrap()); } else { @@ -477,6 +473,7 @@ file_collection::watch_logfile(const std::string& filename, }); } break; + } } return retval; diff --git a/src/pcap_manager.cc b/src/file_converter_manager.cc similarity index 78% rename from src/pcap_manager.cc rename to src/file_converter_manager.cc index d6ea2aea..d34de0ce 100644 --- a/src/pcap_manager.cc +++ b/src/file_converter_manager.cc @@ -25,15 +25,13 @@ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * @file pcap_manager.cc */ #include #include #include -#include "pcap_manager.hh" +#include "file_converter_manager.hh" #include @@ -42,16 +40,16 @@ #include "config.h" #include "line_buffer.hh" -namespace pcap_manager { +namespace file_converter_manager { Result -convert(const std::string& filename) +convert(const external_file_format& eff, const std::string& filename) { - log_info("attempting to convert pcap file -- %s", filename.c_str()); + log_info("attempting to convert file -- %s", filename.c_str()); ghc::filesystem::create_directories(lnav::paths::workdir()); auto outfile = TRY(lnav::filesystem::open_temp_file(lnav::paths::workdir() - / "pcap.XXXXXX")); + / "conversion.XXXXXX")); auto err_pipe = TRY(auto_pipe::for_child_fd(STDERR_FILENO)); auto child = TRY(lnav::pid::from_fork()); @@ -62,28 +60,35 @@ convert(const std::string& filename) dup2(dev_null, STDIN_FILENO); dup2(outfile.second.get(), STDOUT_FILENO); outfile.second.reset(); - setenv("TZ", "UTC", 1); + + auto new_path = lnav::filesystem::build_path({ + eff.eff_source_path.parent_path(), + lnav::paths::dotlnav() / "formats/default", + }); + setenv("PATH", new_path.c_str(), 1); + log_info("invoking converter: %s (PATH=%s)", + eff.eff_converter.c_str(), + new_path.c_str()); + auto mime_str = eff.eff_mime_type.to_string(); const char* args[] = { - "tshark", - "-T", - "ek", - "-P", - "-V", - "-t", - "ad", - "-r", + eff.eff_converter.c_str(), + mime_str.c_str(), filename.c_str(), nullptr, }; - execvp("tshark", (char**) args); + setenv("TZ", "UTC", 1); + execvp(eff.eff_converter.c_str(), (char**) args); if (errno == ENOENT) { fprintf(stderr, - "pcap support requires 'tshark' v3+ to be installed\n"); + "cannot find converter: %s\n", + eff.eff_converter.c_str()); } else { - fprintf( - stderr, "failed to execute 'tshark' -- %s\n", strerror(errno)); + fprintf(stderr, + "failed to execute converter: %s -- %s\n", + eff.eff_converter.c_str(), + strerror(errno)); } _exit(EXIT_FAILURE); } @@ -138,4 +143,4 @@ convert(const std::string& filename) }); } -} // namespace pcap_manager +} // namespace file_converter_manager diff --git a/src/pcap_manager.hh b/src/file_converter_manager.hh similarity index 85% rename from src/pcap_manager.hh rename to src/file_converter_manager.hh index 2415ab96..dd8c5eee 100644 --- a/src/pcap_manager.hh +++ b/src/file_converter_manager.hh @@ -25,12 +25,10 @@ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * @file pcap_manager.hh */ -#ifndef lnav_pcap_manager_hh -#define lnav_pcap_manager_hh +#ifndef lnav_file_converter_manager_hh +#define lnav_file_converter_manager_hh #include #include @@ -38,9 +36,10 @@ #include "base/auto_fd.hh" #include "base/auto_pid.hh" #include "base/result.h" +#include "file_format.hh" #include "ghc/filesystem.hpp" -namespace pcap_manager { +namespace file_converter_manager { struct convert_result { auto_pid cr_child; @@ -48,8 +47,9 @@ struct convert_result { std::shared_ptr> cr_error_queue; }; -Result convert(const std::string& filename); +Result convert(const external_file_format& eff, + const std::string& filename); -} // namespace pcap_manager +} // namespace file_converter_manager #endif diff --git a/src/file_format.cc b/src/file_format.cc index d82b78cd..3fa72130 100644 --- a/src/file_format.cc +++ b/src/file_format.cc @@ -29,91 +29,274 @@ * @file file_format.hh */ -#include - #include "file_format.hh" +#include + #include "archive_manager.hh" #include "base/auto_fd.hh" #include "base/fs_util.hh" +#include "base/injector.hh" #include "base/intern_string.hh" #include "base/lnav_log.hh" #include "config.h" +#include "lnav_config.hh" +#include "readline_highlighters.hh" +#include "safe/safe.h" +#include "sql_util.hh" +#include "sqlite-extension-func.hh" -static bool -is_pcap_header(uint8_t* buffer) +file_format_t +detect_file_format(const ghc::filesystem::path& filename) { - size_t offset = 0; - if (buffer[0] == 0x0a && buffer[1] == 0x0d && buffer[2] == 0x0d - && buffer[3] == 0x0a) - { - offset += sizeof(uint32_t) * 2; - if (buffer[offset + 0] == 0x1a && buffer[offset + 1] == 0x2b - && buffer[offset + 2] == 0x3c && buffer[offset + 3] == 0x4d) - { - return true; - } + if (archive_manager::is_archive(filename)) { + return file_format_t::ARCHIVE; + } + + file_format_t retval = file_format_t::UNKNOWN; + auto_fd fd; + + if ((fd = lnav::filesystem::openp(filename, O_RDONLY)) != -1) { + uint8_t buffer[32]; + ssize_t rc; - if (buffer[offset + 0] == 0x4d && buffer[offset + 1] == 0x3c - && buffer[offset + 2] == 0x2b && buffer[offset + 3] == 0x1a) - { - return true; + if ((rc = read(fd, buffer, sizeof(buffer))) > 0) { + static auto SQLITE3_HEADER = "SQLite format 3"; + auto header_frag = string_fragment(buffer, 0, rc); + + if (header_frag.startswith(SQLITE3_HEADER)) { + retval = file_format_t::SQLITE_DB; + } } - return false; } - if (buffer[0] == 0xa1 && buffer[1] == 0xb2 && buffer[2] == 0xc3 - && buffer[3] == 0xd4) - { - return true; + return retval; +} + +mime_type +mime_type::from_str(const std::string& str) +{ + auto slash_index = str.find('/'); + + if (slash_index == std::string::npos) { + return {"application", str}; } - if (buffer[0] == 0xd4 && buffer[1] == 0xc3 && buffer[2] == 0xb2 - && buffer[3] == 0xa1) + return {str.substr(0, slash_index), str.substr(slash_index + 1)}; +} + +struct compiled_header_expr { + auto_mem che_stmt{sqlite3_finalize}; + bool che_enabled{true}; +}; + +struct file_format_expressions : public lnav_config_listener { + void reload_config(error_reporter& reporter) override { - return true; + log_debug("reloading file-format header expressions"); + + safe::WriteAccess> in(instance); + + if (in->e_db.in() == nullptr) { + if (sqlite3_open(":memory:", in->e_db.out()) != SQLITE_OK) { + log_error("unable to open memory DB"); + return; + } + register_sqlite_funcs(in->e_db.in(), sqlite_registration_funcs); + } + + in->e_header_exprs.clear(); + const auto& cfg = injector::get(); + for (const auto& fpair : cfg.c_defs) { + for (const auto& hpair : fpair.second.fd_header.h_exprs.he_exprs) { + auto stmt_str = fmt::format(FMT_STRING("SELECT 1 WHERE {}"), + hpair.second); + compiled_header_expr che; + + log_info("preparing file-format header expression: %s", + stmt_str.c_str()); + auto retcode = sqlite3_prepare_v2(in->e_db.in(), + stmt_str.c_str(), + stmt_str.size(), + che.che_stmt.out(), + nullptr); + if (retcode != SQLITE_OK) { + auto sql_al + = attr_line_t(hpair.second) + .with_attr_for_all(SA_PREFORMATTED.value()) + .with_attr_for_all( + VC_ROLE.value(role_t::VCR_QUOTED_CODE)); + readline_sqlite_highlighter(sql_al, -1); + intern_string_t watch_expr_path + = intern_string::lookup(fmt::format( + FMT_STRING( + "/tuning/file-formats/{}/header/expr/{}"), + json_ptr::encode_str(fpair.first.c_str()), + hpair.first)); + auto snippet = lnav::console::snippet::from( + source_location(watch_expr_path), sql_al); + + auto um = lnav::console::user_message::error( + "SQL expression is invalid") + .with_reason(sqlite3_errmsg(in->e_db.in())) + .with_snippet(snippet); + + reporter(&hpair.second, um); + continue; + } + + in->e_header_exprs[fpair.first][hpair.first] = std::move(che); + } + + if (fpair.second.fd_header.h_exprs.he_exprs.empty()) { + auto um + = lnav::console::user_message::error( + "At least one header expression is required for " + "a file format") + .with_reason( + "Header expressions are used to detect a format"); + reporter(&fpair.second.fd_header.h_exprs, um); + } + if (fpair.second.fd_converter.pp_value.empty()) { + auto um = lnav::console::user_message::error( + "A converter is required for a file format") + .with_reason( + "The converter script transforms the file " + "into a format that can be consumed by lnav"); + reporter(&fpair.second.fd_converter, um); + } + } } - if (buffer[0] == 0xa1 && buffer[1] == 0xb2 && buffer[2] == 0x3c - && buffer[3] == 0x4d) + void unload_config() override { - return true; + safe::WriteAccess> in(instance); + + in->e_header_exprs.clear(); } - if (buffer[0] == 0x4d && buffer[1] == 0x3c && buffer[2] == 0xb2 - && buffer[3] == 0xa1) + struct inner { + auto_sqlite3 e_db; + std::map> + e_header_exprs; + }; + + safe::Safe instance; +}; + +static file_format_expressions format_exprs; + +nonstd::optional +detect_mime_type(const ghc::filesystem::path& filename) +{ + uint8_t buffer[1024]; + size_t buffer_size = 0; + { - return true; + auto_fd fd; + + if ((fd = lnav::filesystem::openp(filename, O_RDONLY)) == -1) { + return nonstd::nullopt; + } + + ssize_t rc; + + if ((rc = read(fd, buffer, sizeof(buffer))) == -1) { + return nonstd::nullopt; + } + buffer_size = rc; } - return false; -} + auto hexbuf = auto_buffer::alloc(buffer_size * 2); -file_format_t -detect_file_format(const ghc::filesystem::path& filename) -{ - if (archive_manager::is_archive(filename)) { - return file_format_t::ARCHIVE; + for (int lpc = 0; lpc < buffer_size; lpc++) { + fmt::format_to( + std::back_inserter(hexbuf), FMT_STRING("{:02x}"), buffer[lpc]); } - file_format_t retval = file_format_t::UNKNOWN; - auto_fd fd; + safe::WriteAccess> in( + format_exprs.instance); - if ((fd = lnav::filesystem::openp(filename, O_RDONLY)) != -1) { - uint8_t buffer[32]; - ssize_t rc; + const auto& cfg = injector::get(); + for (const auto& fpair : cfg.c_defs) { + if (buffer_size < fpair.second.fd_header.h_size) { + log_debug("file content too small (%d) for header detection: %s", + buffer_size, + fpair.first.c_str()); + continue; + } + for (const auto& hpair : fpair.second.fd_header.h_exprs.he_exprs) { + auto& he = in->e_header_exprs[fpair.first][hpair.first]; - if ((rc = read(fd, buffer, sizeof(buffer))) > 0) { - static auto SQLITE3_HEADER = "SQLite format 3"; - auto header_frag = string_fragment(buffer, 0, rc); + if (!he.che_enabled) { + continue; + } - if (header_frag.startswith(SQLITE3_HEADER)) { - retval = file_format_t::SQLITE_DB; - } else if (rc > 24 && is_pcap_header(buffer)) { - retval = file_format_t::PCAP; + auto* stmt = he.che_stmt.in(); + + if (stmt == nullptr) { + continue; + } + sqlite3_reset(stmt); + auto count = sqlite3_bind_parameter_count(stmt); + for (int lpc = 0; lpc < count; lpc++) { + const auto* name = sqlite3_bind_parameter_name(stmt, lpc + 1); + + if (name[0] == '$') { + const char* env_value; + + if ((env_value = getenv(&name[1])) != nullptr) { + sqlite3_bind_text( + stmt, lpc + 1, env_value, -1, SQLITE_STATIC); + } + continue; + } + if (strcmp(name, ":header") == 0) { + sqlite3_bind_text(stmt, + lpc + 1, + hexbuf.in(), + hexbuf.size(), + SQLITE_STATIC); + continue; + } + if (strcmp(name, ":filepath") == 0) { + sqlite3_bind_text( + stmt, lpc + 1, filename.c_str(), -1, SQLITE_STATIC); + continue; + } } + + auto step_res = sqlite3_step(stmt); + + switch (step_res) { + case SQLITE_OK: + case SQLITE_DONE: + continue; + case SQLITE_ROW: + break; + default: { + log_error( + "failed to execute file-format header expression: " + "%s:%s -- %s", + fpair.first.c_str(), + hpair.first.c_str(), + sqlite3_errmsg(in->e_db)); + he.che_enabled = false; + continue; + } + } + + log_info("detected MIME type for: %s -- %s (header-expr: %s)", + filename.c_str(), + fpair.first.c_str(), + hpair.first.c_str()); + return external_file_format{ + mime_type::from_str(fpair.first), + fpair.second.fd_converter.pp_value, + fpair.second.fd_converter.pp_location.sl_source.to_string(), + }; } } - return retval; + return nonstd::nullopt; } diff --git a/src/file_format.cfg.hh b/src/file_format.cfg.hh new file mode 100644 index 00000000..2f15e28e --- /dev/null +++ b/src/file_format.cfg.hh @@ -0,0 +1,63 @@ +/** + * Copyright (c) 2023, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef lnav_file_format_cfg_hh +#define lnav_file_format_cfg_hh + +#include +#include + +#include "yajlpp/yajlpp.hh" + +namespace lnav { +namespace file_formats { + +struct header_exprs { + std::map he_exprs; +}; + +struct header { + header_exprs h_exprs; + size_t h_size{32}; +}; + +struct format_def { + std::string fd_title; + header fd_header; + positioned_property fd_converter; +}; + +struct config { + std::map c_defs; +}; + +} // namespace file_formats +} // namespace lnav + +#endif diff --git a/src/file_format.hh b/src/file_format.hh index a8eb3e47..283a7fa6 100644 --- a/src/file_format.hh +++ b/src/file_format.hh @@ -34,17 +34,45 @@ #include "fmt/format.h" #include "ghc/filesystem.hpp" +#include "optional.hpp" enum class file_format_t : int { UNKNOWN, SQLITE_DB, ARCHIVE, - PCAP, REMOTE, }; +struct mime_type { + static mime_type from_str(const std::string& str); + + std::string mt_type; + std::string mt_subtype; + + bool operator<(const mime_type& other) const + { + return this->mt_type < other.mt_type + && this->mt_subtype < other.mt_subtype; + } + + std::string to_string() const + { + return fmt::format( + FMT_STRING("{}/{}"), this->mt_type, this->mt_subtype); + } +}; + +struct external_file_format { + mime_type eff_mime_type; + std::string eff_converter; + ghc::filesystem::path eff_source_path; +}; + file_format_t detect_file_format(const ghc::filesystem::path& filename); +nonstd::optional detect_mime_type( + const ghc::filesystem::path& filename); + namespace fmt { template<> struct formatter : formatter { @@ -59,9 +87,6 @@ struct formatter : formatter { case file_format_t::ARCHIVE: name = "\U0001F5C4 Archive"; break; - case file_format_t::PCAP: - name = "\U0001F5A5 Pcap"; - break; case file_format_t::REMOTE: name = "\U0001F5A5 Remote"; break; diff --git a/src/listview_curses.cc b/src/listview_curses.cc index eb731eaa..2ddd378f 100644 --- a/src/listview_curses.cc +++ b/src/listview_curses.cc @@ -348,8 +348,9 @@ listview_curses::do_update() } } - if (this->lv_selectable && this->lv_selection >= 0 - && (row > this->lv_tail_space) && (blank_rows < this->lv_tail_space) + if (this->lv_selectable && !this->lv_sync_selection_and_top + && this->lv_selection >= 0 && (row > this->lv_tail_space) + && (blank_rows < this->lv_tail_space) && ((row - this->lv_tail_space) < this->lv_selection)) { this->shift_top(this->lv_selection - row + this->lv_tail_space); diff --git a/src/lnav.cc b/src/lnav.cc index 675dc7b8..9f4fc542 100644 --- a/src/lnav.cc +++ b/src/lnav.cc @@ -2449,6 +2449,7 @@ SELECT tbl_name FROM sqlite_master WHERE sql LIKE 'CREATE VIRTUAL TABLE%' } load_config(lnav_data.ld_config_paths, config_errors); + if (!config_errors.empty()) { if (print_user_msgs(config_errors, mode_flags) != EXIT_SUCCESS) { return EXIT_FAILURE; diff --git a/src/lnav_config.cc b/src/lnav_config.cc index 6be265f2..d72b5ee8 100644 --- a/src/lnav_config.cc +++ b/src/lnav_config.cc @@ -81,6 +81,9 @@ lnav_config_listener* lnav_config_listener::LISTENER_LIST; static auto a = injector::bind::to_instance( +[]() { return &lnav_config.lc_archive_manager; }); +static auto ff = injector::bind::to_instance( + +[]() { return &lnav_config.lc_file_formats; }); + static auto fvc = injector::bind::to_instance( +[]() { return &lnav_config.lc_file_vtab; }); @@ -1040,6 +1043,55 @@ static const struct json_path_container ui_handlers = { .with_children(keymap_defs_handlers), }; +static const struct json_path_container header_expr_handlers = { + yajlpp::pattern_property_handler("(?\\w+)") + .with_description("SQLite expression") + .for_field(&lnav::file_formats::header_exprs::he_exprs), +}; + +static const struct json_path_container header_handlers = { + yajlpp::property_handler("expr") + .with_description("The expressions used to check if a file header " + "matches this file format") + .for_child(&lnav::file_formats::header::h_exprs) + .with_children(header_expr_handlers), + yajlpp::property_handler("size") + .with_description("The minimum size required for this header type") + .for_field(&lnav::file_formats::header::h_size), +}; + +static const struct json_path_container format_def_handlers = { + yajlpp::property_handler("title") + .with_description("The display name for this file format") + .for_field(&lnav::file_formats::format_def::fd_title), + yajlpp::property_handler("header") + .with_description("File header detection definitions") + .for_child(&lnav::file_formats::format_def::fd_header) + .with_children(header_handlers), + yajlpp::property_handler("converter") + .with_description("The script used to convert the file") + .with_pattern(R"([\w\.\-]+)") + .for_field(&lnav::file_formats::format_def::fd_converter), +}; + +static const struct json_path_container format_defs_handlers = { + yajlpp::pattern_property_handler(R"((?\w+~1[\w\.\-]+))") + .with_description("File format definitions, keyed by their MIME type") + .with_obj_provider( + [](const yajlpp_provider_context& ypc, _lnav_config* root) { + auto& retval + = root->lc_file_formats.c_defs[ypc.get_substr("mime_type")]; + return &retval; + }) + .with_path_provider<_lnav_config>( + [](struct _lnav_config* cfg, std::vector& paths_out) { + for (const auto& iter : cfg->lc_file_formats.c_defs) { + paths_out.emplace_back(iter.first); + } + }) + .with_children(format_def_handlers), +}; + static const struct json_path_container archive_handlers = { yajlpp::property_handler("min-free-space") .with_synopsis("") @@ -1267,6 +1319,9 @@ static const struct json_path_container tuning_handlers = { yajlpp::property_handler("file-vtab") .with_description("Settings related to the lnav_file virtual-table") .with_children(file_vtab_handlers), + yajlpp::property_handler("file-format") + .with_description("sdfjdls") + .with_children(format_defs_handlers), yajlpp::property_handler("logfile") .with_description("Settings related to log files") .with_children(logfile_handlers), @@ -1654,28 +1709,31 @@ reload_config(std::vector& errors) auto cb = [&cfg_value, &errors, &errmsg]( const json_path_handler_base& jph, const std::string& path, - void* mem) { + const void* mem) { if (mem != cfg_value) { return; } auto loc_iter = lnav_config_locations.find(intern_string::lookup(path)); - if (loc_iter == lnav_config_locations.end()) { - return; + auto has_loc = loc_iter != lnav_config_locations.end(); + auto um + = lnav::console::user_message::error( + attr_line_t() + .append(has_loc ? "invalid value for property " + : "missing value for property ") + .append_quoted(lnav::roles::symbol(path))) + .with_reason(errmsg) + .with_help(jph.get_help_text(path)); + + if (has_loc) { + um.with_snippet( + lnav::console::snippet::from(loc_iter->second.sl_source, + "") + .with_line(loc_iter->second.sl_line_number)); } - errors.emplace_back( - lnav::console::user_message::error( - attr_line_t() - .append("invalid value for property ") - .append_quoted(lnav::roles::symbol(path))) - .with_reason(errmsg) - .with_snippet( - lnav::console::snippet::from( - loc_iter->second.sl_source, "") - .with_line(loc_iter->second.sl_line_number)) - .with_help(jph.get_help_text(path))); + errors.emplace_back(um); }; for (const auto& jph : lnav_config_handlers.jpc_children) { diff --git a/src/lnav_config.hh b/src/lnav_config.hh index e38f8483..714bc53e 100644 --- a/src/lnav_config.hh +++ b/src/lnav_config.hh @@ -43,6 +43,7 @@ #include "base/file_range.hh" #include "base/lnav.console.hh" #include "base/result.h" +#include "file_format.cfg.hh" #include "file_vtab.cfg.hh" #include "ghc/filesystem.hpp" #include "lnav_config_fwd.hh" @@ -111,6 +112,7 @@ struct _lnav_config { archive_manager::config lc_archive_manager; lnav::piper::config lc_piper; + lnav::file_formats::config lc_file_formats; file_vtab::config lc_file_vtab; lnav::logfile::config lc_logfile; tailer::config lc_tailer; diff --git a/src/log_format.cc b/src/log_format.cc index dd18f60b..81db82ff 100644 --- a/src/log_format.cc +++ b/src/log_format.cc @@ -2950,13 +2950,15 @@ external_log_format::match_name(const std::string& filename) } bool -external_log_format::match_mime_type(const file_format_t ff) const +external_log_format::match_mime_type(const mime_type& mt) const { - if (ff == file_format_t::UNKNOWN && this->elf_mime_types.empty()) { + if (mt.mt_type == "text" && mt.mt_subtype == "plain" + && this->elf_mime_types.empty()) + { return true; } - return this->elf_mime_types.count(ff) == 1; + return this->elf_mime_types.count(mt) == 1; } auto diff --git a/src/log_format.hh b/src/log_format.hh index fdca056c..d379ca7f 100644 --- a/src/log_format.hh +++ b/src/log_format.hh @@ -352,9 +352,9 @@ public: virtual bool match_name(const std::string& filename) { return true; } - virtual bool match_mime_type(const file_format_t ff) const + virtual bool match_mime_type(const mime_type& mt) const { - if (ff == file_format_t::UNKNOWN) { + if (mt.mt_type == "text" && mt.mt_subtype == "plain") { return true; } return false; diff --git a/src/log_format_ext.hh b/src/log_format_ext.hh index f1022b75..e0803e01 100644 --- a/src/log_format_ext.hh +++ b/src/log_format_ext.hh @@ -141,7 +141,7 @@ public: bool match_name(const std::string& filename) override; - bool match_mime_type(const file_format_t ff) const override; + bool match_mime_type(const mime_type& mt) const override; scan_result_t scan(logfile& lf, std::vector& dst, @@ -313,7 +313,7 @@ public: std::vector elf_format_source_order; std::map elf_format_sources; std::list elf_collision; - std::set elf_mime_types; + std::set elf_mime_types; factory_container elf_filename_pcre; std::map> elf_patterns; std::vector> elf_pattern_order; diff --git a/src/log_format_loader.cc b/src/log_format_loader.cc index 4e75b0bb..d35c2173 100644 --- a/src/log_format_loader.cc +++ b/src/log_format_loader.cc @@ -282,10 +282,7 @@ read_format_field(yajlpp_parse_context* ypc, elf->elf_module_id_field = intern_string::lookup(value); elf->elf_container = true; } else if (field_name == "mime-types") { - auto value_opt = ypc->ypc_current_handler->to_enum_value(value); - if (value_opt) { - elf->elf_mime_types.insert((file_format_t) *value_opt); - } + elf->elf_mime_types.insert(mime_type::from_str(value)); } return 1; @@ -820,15 +817,6 @@ static const struct json_path_container search_table_handlers = { .with_children(search_table_def_handlers), }; -static const json_path_handler_base::enum_value_t MIME_TYPE_ENUM[] = { - { - "application/vnd.tcpdump.pcap", - file_format_t::PCAP, - }, - - json_path_handler_base::ENUM_TERMINATOR, -}; - const struct json_path_container format_handlers = { yajlpp::property_handler("regex") .with_description( @@ -858,8 +846,9 @@ const struct json_path_container format_handlers = { "log files with a matching name") .for_field(&external_log_format::elf_filename_pcre), json_path_handler("mime-types#", read_format_field) - .with_description("A list of mime-types this format should be used for") - .with_enum_values(MIME_TYPE_ENUM), + .with_pattern(R"(^\w/[\w\.]+)") + .with_description( + "A list of mime-types this format should be used for"), json_path_handler("level-field") .with_description( "The name of the level field in the log message pattern") diff --git a/src/logfile.cc b/src/logfile.cc index 33632250..e327bea1 100644 --- a/src/logfile.cc +++ b/src/logfile.cc @@ -259,7 +259,10 @@ logfile::process_prefix(shared_buffer_ref& sbr, this->lf_mismatched_formats.insert(curr->get_name()); continue; } - if (!curr->match_mime_type(this->lf_options.loo_file_format)) { + if (this->lf_options.loo_mime_type + && !curr->match_mime_type( + this->lf_options.loo_mime_type.value())) + { if (li.li_file_range.fr_offset == 0) { log_debug("(%s) does not match file format: %s", curr->get_name().get(), diff --git a/src/logfile_fwd.hh b/src/logfile_fwd.hh index 22368b39..5ffa0071 100644 --- a/src/logfile_fwd.hh +++ b/src/logfile_fwd.hh @@ -66,6 +66,7 @@ struct logfile_open_options_base { ssize_t loo_visible_size_limit{-1}; bool loo_tail{true}; file_format_t loo_file_format{file_format_t::UNKNOWN}; + nonstd::optional loo_mime_type; nonstd::optional loo_piper; }; diff --git a/src/readline_possibilities.cc b/src/readline_possibilities.cc index f0b8f175..0525db9d 100644 --- a/src/readline_possibilities.cc +++ b/src/readline_possibilities.cc @@ -420,7 +420,7 @@ add_config_possibilities() std::set visited; auto cb = [rc, &visited](const json_path_handler_base& jph, const std::string& path, - void* mem) { + const void* mem) { if (jph.jph_children) { const auto named_caps = jph.jph_regex->get_named_captures(); diff --git a/src/root-config.json b/src/root-config.json index b8a2ea6c..577e337d 100644 --- a/src/root-config.json +++ b/src/root-config.json @@ -26,6 +26,19 @@ "transfer-command": "cat > {0:} && chmod ugo+rx ./{0:}" } }, + "file-format": { + "application/vnd.tcpdump.pcap": { + "title": "\ud83d\udda5 Pcap", + "header": { + "expr": { + "pcapng": ":header REGEXP '^0a0d0d0a.{8}(?:1a2b3c4d|4d3c2b1a).*'", + "pcap": ":header REGEXP '^(?:a1b2c3d4|d4c3b2a1|a1b23c4d|4d3cb2a1).*'" + }, + "size": 24 + }, + "converter": "pcap-converter.sh" + } + }, "piper": { "max-size": 10485760, "rotations": 4 diff --git a/src/scripts/pcap-converter.sh b/src/scripts/pcap-converter.sh new file mode 100755 index 00000000..5b9642b2 --- /dev/null +++ b/src/scripts/pcap-converter.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Check that tshark is installed and return a nice message. +if ! command -v tshark; then + echo "pcap support requires 'tshark' v3+ to be installed" > /dev/stderr + exit 1 +fi + +# Use tshark to convert the pcap file into a JSON-lines log file +exec tshark -T ek -P -V -t ad -r $2 diff --git a/src/scripts/scripts.am b/src/scripts/scripts.am index 50e10fd1..23908402 100644 --- a/src/scripts/scripts.am +++ b/src/scripts/scripts.am @@ -9,4 +9,5 @@ BUILTIN_LNAVSCRIPTS = \ BUILTIN_SHSCRIPTS = \ $(srcdir)/scripts/dump-pid.sh \ + $(srcdir)/scripts/pcap-converter.sh \ $() diff --git a/src/sqlite-extension-func.cc b/src/sqlite-extension-func.cc index 3a02f149..9d9f2078 100644 --- a/src/sqlite-extension-func.cc +++ b/src/sqlite-extension-func.cc @@ -62,6 +62,7 @@ sqlite_registration_func_t sqlite_registration_funcs[] = { int register_sqlite_funcs(sqlite3* db, sqlite_registration_func_t* reg_funcs) { + static bool help_registration_done = false; int lpc; require(db != nullptr); @@ -94,7 +95,9 @@ register_sqlite_funcs(sqlite3* db, sqlite_registration_func_t* reg_funcs) nullptr, nullptr); - if (fd.fd_help.ht_context != help_context_t::HC_NONE) { + if (!help_registration_done + && fd.fd_help.ht_context != help_context_t::HC_NONE) + { help_text& ht = fd.fd_help; sqlite_function_help.insert(std::make_pair(ht.ht_name, &ht)); @@ -115,7 +118,9 @@ register_sqlite_funcs(sqlite3* db, sqlite_registration_func_t* reg_funcs) agg_funcs[i].xStep, agg_funcs[i].xFinalize); - if (fda.fda_help.ht_context != help_context_t::HC_NONE) { + if (!help_registration_done + && fda.fda_help.ht_context != help_context_t::HC_NONE) + { help_text& ht = fda.fda_help; sqlite_function_help.insert(std::make_pair(ht.ht_name, &ht)); @@ -746,9 +751,11 @@ register_sqlite_funcs(sqlite3* db, sqlite_registration_func_t* reg_funcs) .with_example({"To count down from five to 1", "SELECT value FROM generate_series(1, 5, -1)"})}; - for (auto& ht : builtin_funcs) { - sqlite_function_help.insert(std::make_pair(ht.ht_name, &ht)); - ht.index_tags(); + if (!help_registration_done) { + for (auto& ht : builtin_funcs) { + sqlite_function_help.insert(std::make_pair(ht.ht_name, &ht)); + ht.index_tags(); + } } static help_text builtin_win_funcs[] = { @@ -847,9 +854,11 @@ register_sqlite_funcs(sqlite3* db, sqlite_registration_func_t* reg_funcs) .with_tags({"window"}), }; - for (auto& ht : builtin_win_funcs) { - sqlite_function_help.insert(std::make_pair(ht.ht_name, &ht)); - ht.index_tags(); + if (!help_registration_done) { + for (auto& ht : builtin_win_funcs) { + sqlite_function_help.insert(std::make_pair(ht.ht_name, &ht)); + ht.index_tags(); + } } static help_text idents[] = { @@ -1152,16 +1161,20 @@ register_sqlite_funcs(sqlite3* db, sqlite_registration_func_t* reg_funcs) .optional()), }; - for (auto& ht : idents) { - sqlite_function_help.insert(make_pair(toupper(ht.ht_name), &ht)); - for (const auto& param : ht.ht_parameters) { - if (!param.ht_flag_name) { - continue; + if (!help_registration_done) { + for (auto& ht : idents) { + sqlite_function_help.insert(make_pair(toupper(ht.ht_name), &ht)); + for (const auto& param : ht.ht_parameters) { + if (!param.ht_flag_name) { + continue; + } + sqlite_function_help.insert( + make_pair(toupper(param.ht_flag_name), &ht)); } - sqlite_function_help.insert( - make_pair(toupper(param.ht_flag_name), &ht)); } } + help_registration_done = true; + return 0; } diff --git a/src/sqlitepp.client.hh b/src/sqlitepp.client.hh index 5a827569..46c4a646 100644 --- a/src/sqlitepp.client.hh +++ b/src/sqlitepp.client.hh @@ -103,7 +103,10 @@ bind_values(sqlite3_stmt* stmt, Args... args) } struct prepared_stmt { - prepared_stmt(auto_mem stmt) : ps_stmt(std::move(stmt)) {} + explicit prepared_stmt(auto_mem stmt) + : ps_stmt(std::move(stmt)) + { + } Result execute() { @@ -195,10 +198,12 @@ prepare_stmt(sqlite3* db, const char* sql, Args... args) sqlite3_errmsg(db))); } - if (bind_values(retval.in(), args...) != SQLITE_OK) { - return Err( - fmt::format(FMT_STRING("unable to prepare SQL statement: {}"), - sqlite3_errmsg(db))); + if (sizeof...(args) > 0) { + if (bind_values(retval.in(), args...) != SQLITE_OK) { + return Err( + fmt::format(FMT_STRING("unable to prepare SQL statement: {}"), + sqlite3_errmsg(db))); + } } return Ok(prepared_stmt{ diff --git a/src/string-extension-functions.cc b/src/string-extension-functions.cc index fccc7a94..29a02301 100644 --- a/src/string-extension-functions.cc +++ b/src/string-extension-functions.cc @@ -549,7 +549,7 @@ sql_encode(sqlite3_value* value, encode_algo algo) for (int lpc = 0; lpc < text_len; lpc++) { fmt::format_to(std::back_inserter(buf), - FMT_STRING("{:x}"), + FMT_STRING("{:02x}"), text[lpc]); } diff --git a/src/themes/default-theme.json b/src/themes/default-theme.json index 5448ed10..09e09c3f 100644 --- a/src/themes/default-theme.json +++ b/src/themes/default-theme.json @@ -36,7 +36,7 @@ "bold": true }, "cursor-line": { - "color": "Cyan", + "color": "Cyan1", "background-color": "Red", "bold": true, "underline": true diff --git a/src/yajlpp/json_ptr.cc b/src/yajlpp/json_ptr.cc index 4cc02737..fe7cf1fb 100644 --- a/src/yajlpp/json_ptr.cc +++ b/src/yajlpp/json_ptr.cc @@ -226,6 +226,19 @@ json_ptr::encode(char* dst, size_t dst_len, const char* src, size_t src_len) return retval; } +std::string +json_ptr::encode_str(const char* src, size_t src_len) +{ + if (src_len == (size_t) -1) { + src_len = strlen(src); + } + + char retval[src_len + 1]; + auto rc = encode(retval, sizeof(retval), src, src_len); + + return std::string(retval, rc); +} + size_t json_ptr::decode(char* dst, const char* src, ssize_t src_len) { diff --git a/src/yajlpp/json_ptr.hh b/src/yajlpp/json_ptr.hh index f7822ab9..e9266c49 100644 --- a/src/yajlpp/json_ptr.hh +++ b/src/yajlpp/json_ptr.hh @@ -64,7 +64,8 @@ public: void inc_array_index() { if (!this->jpw_array_indexes.empty() - && this->jpw_array_indexes.back() != -1) { + && this->jpw_array_indexes.back() != -1) + { this->jpw_array_indexes.back() += 1; } } @@ -108,6 +109,12 @@ public: const char* src, size_t src_len = -1); + static std::string encode_str(const char* src, size_t src_len = -1); + static std::string encode_str(const std::string& src) + { + return encode_str(src.c_str(), src.size()); + } + static size_t decode(char* dst, const char* src, ssize_t src_len = -1); json_ptr(const char* value) : jp_value(value), jp_pos(value) {} diff --git a/src/yajlpp/yajlpp.cc b/src/yajlpp/yajlpp.cc index b1362abb..b2e9121d 100644 --- a/src/yajlpp/yajlpp.cc +++ b/src/yajlpp/yajlpp.cc @@ -213,7 +213,7 @@ json_path_handler_base::gen(yajlpp_gen_context& ygc, yajl_gen handle) const if (this->jph_children) { for (const auto& lpath : local_paths) { - std::string full_path = lpath; + std::string full_path = json_ptr::encode_str(lpath); if (this->jph_path_provider) { full_path += "/"; } @@ -455,8 +455,8 @@ json_path_handler_base::gen_schema_type(yajlpp_gen_context& ygc) const void json_path_handler_base::walk( - const std::function< - void(const json_path_handler_base&, const std::string&, void*)>& cb, + const std::function& cb, void* root, const std::string& base) const { @@ -465,11 +465,11 @@ json_path_handler_base::walk( if (this->jph_path_provider) { this->jph_path_provider(root, local_paths); - for (auto& lpath : local_paths) { + for (const auto& lpath : local_paths) { cb(*this, fmt::format(FMT_STRING("{}{}{}"), base, - lpath, + json_ptr::encode_str(lpath), this->jph_children ? "/" : ""), nullptr); } @@ -477,6 +477,12 @@ json_path_handler_base::walk( local_paths.clear(); this->jph_path_provider(root, local_paths); } + if (this->jph_field_getter) { + const auto* field = this->jph_field_getter(root, nonstd::nullopt); + if (field != nullptr) { + cb(*this, base, field); + } + } } else { local_paths.emplace_back(this->jph_property); @@ -484,6 +490,7 @@ json_path_handler_base::walk( if (this->jph_children) { full_path += "/"; } + cb(*this, full_path, nullptr); } @@ -493,7 +500,7 @@ json_path_handler_base::walk( static const intern_string_t POSS_SRC = intern_string::lookup("possibilities"); - std::string full_path = base + lpath; + std::string full_path = base + json_ptr::encode_str(lpath); if (this->jph_children) { full_path += "/"; } @@ -509,13 +516,18 @@ json_path_handler_base::walk( static thread_local auto md = lnav::pcre2pp::match_data::unitialized(); - std::string full_path = lpath + "/"; + const auto short_path = json_ptr::encode_str(lpath) + "/"; - if (!this->jph_regex->capture_from(full_path) + if (!this->jph_regex->capture_from(short_path) .into(md) .matches() .ignore_error()) { + log_error( + "path-handler regex (%s) does not match path: " + "%s", + this->jph_regex->get_pattern().c_str(), + full_path.c_str()); ensure(false); } child_root = this->jph_obj_provider( @@ -527,7 +539,7 @@ json_path_handler_base::walk( } } else { for (auto& lpath : local_paths) { - void* field = nullptr; + const void* field = nullptr; if (this->jph_field_getter) { field = this->jph_field_getter(root, lpath); diff --git a/src/yajlpp/yajlpp.hh b/src/yajlpp/yajlpp.hh index 76323293..497afe17 100644 --- a/src/yajlpp/yajlpp.hh +++ b/src/yajlpp/yajlpp.hh @@ -228,11 +228,11 @@ struct json_path_handler_base { yajl_gen_status gen(yajlpp_gen_context& ygc, yajl_gen handle) const; yajl_gen_status gen_schema(yajlpp_gen_context& ygc) const; yajl_gen_status gen_schema_type(yajlpp_gen_context& ygc) const; - void walk( - const std::function< - void(const json_path_handler_base&, const std::string&, void*)>& cb, - void* root = nullptr, - const std::string& base = "/") const; + void walk(const std::function& cb, + void* root = nullptr, + const std::string& base = "/") const; enum class schema_type_t : std::uint32_t { ANY, @@ -255,7 +255,7 @@ struct json_path_handler_base { std::function jph_validator; - std::function name)> + std::function name)> jph_field_getter; std::function jph_obj_provider; @@ -452,6 +452,16 @@ public: return obj->*MEM; } + void fill_in_source() + { + if (this->ypc_locations != nullptr) { + (*this->ypc_locations)[this->get_full_path()] = source_location{ + this->ypc_source, + this->get_line_number(), + }; + } + } + const intern_string_t ypc_source; int ypc_line_number{1}; const struct json_path_container* ypc_handlers; diff --git a/src/yajlpp/yajlpp_def.hh b/src/yajlpp/yajlpp_def.hh index 22454b1c..6a67e590 100644 --- a/src/yajlpp/yajlpp_def.hh +++ b/src/yajlpp/yajlpp_def.hh @@ -241,11 +241,13 @@ struct json_path_handler : public json_path_handler_base { static int null_field_cb(yajlpp_parse_context* ypc) { + ypc->fill_in_source(); return ypc->ypc_current_handler->jph_null_cb(ypc); } static int bool_field_cb(yajlpp_parse_context* ypc, int val) { + ypc->fill_in_source(); return ypc->ypc_current_handler->jph_bool_cb(ypc, val); } @@ -253,16 +255,19 @@ struct json_path_handler : public json_path_handler_base { const unsigned char* str, size_t len) { + ypc->fill_in_source(); return ypc->ypc_current_handler->jph_str_cb(ypc, str, len); } static int int_field_cb(yajlpp_parse_context* ypc, long long val) { + ypc->fill_in_source(); return ypc->ypc_current_handler->jph_integer_cb(ypc, val); } static int dbl_field_cb(yajlpp_parse_context* ypc, double val) { + ypc->fill_in_source(); return ypc->ypc_current_handler->jph_double_cb(ypc, val); } @@ -525,6 +530,28 @@ struct json_path_handler : public json_path_handler_base { return 1; }; + this->jph_path_provider = + [args...](void* root, std::vector& paths_out) { + const auto& field = json_path_handler::get_field(root, args...); + + for (const auto& pair : field) { + paths_out.emplace_back(pair.first); + } + }; + this->jph_field_getter + = [args...](void* root, + nonstd::optional name) -> const void* { + const auto& field = json_path_handler::get_field(root, args...); + if (!name) { + return &field; + } + + auto iter = field.find(name.value()); + if (iter == field.end()) { + return nullptr; + } + return (void*) &iter->second; + }; this->jph_gen_callback = [args...](yajlpp_gen_context& ygc, const json_path_handler_base& jph, yajl_gen handle) { @@ -897,6 +924,10 @@ struct json_path_handler : public json_path_handler_base { return gen(field.pp_value); }; + this->jph_field_getter + = [args...](void* root, nonstd::optional name) { + return (void*) &json_path_handler::get_field(root, args...); + }; return *this; } diff --git a/test/Makefile.am b/test/Makefile.am index a92ceb89..f1383b4c 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -235,6 +235,7 @@ dist_noinst_DATA = \ bad-config/formats/invalid-sql/init2.sql \ bad-config/formats/no-regexes/format.json \ bad-config/formats/no-samples/format.json \ + bad-config2/configs/invalid-file-format/config.json \ bad-config2/formats/invalid-config/config.json \ bad-config2/formats/invalid-config/config.bad-schema.json \ bad-config2/formats/invalid-config/config.malformed.json \ diff --git a/test/bad-config2/configs/invalid-file-format/config.json b/test/bad-config2/configs/invalid-file-format/config.json new file mode 100644 index 00000000..dd003ab7 --- /dev/null +++ b/test/bad-config2/configs/invalid-file-format/config.json @@ -0,0 +1,19 @@ +{ + "$schema": "https://lnav.org/schemas/config-v1.schema.json", + "tuning": { + "file-format": { + "application/vnd.example.com": { + "title": "example", + "header": { + "expr": { + "default": ":header REGEXP 'foobar" + }, + "size": 8 + } + }, + "application/vnd.example2.com": { + "title": "example" + } + } + } +} \ No newline at end of file diff --git a/test/expected/expected.am b/test/expected/expected.am index e12cbe80..b92c5612 100644 --- a/test/expected/expected.am +++ b/test/expected/expected.am @@ -202,8 +202,6 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_cmds.sh_d3b69abdfb39e4bfa5828c2f9593e2b2b7ed4d5d.out \ $(srcdir)/%reldir%/test_cmds.sh_d76d77ad95b9f120825417a6a8220c13df9541fc.err \ $(srcdir)/%reldir%/test_cmds.sh_d76d77ad95b9f120825417a6a8220c13df9541fc.out \ - $(srcdir)/%reldir%/test_cmds.sh_d7eebacdcf2cb194f25fa4ef97b7b5376b442467.err \ - $(srcdir)/%reldir%/test_cmds.sh_d7eebacdcf2cb194f25fa4ef97b7b5376b442467.out \ $(srcdir)/%reldir%/test_cmds.sh_d836c84398c831c976df46f46fe3bf5983c44c37.err \ $(srcdir)/%reldir%/test_cmds.sh_d836c84398c831c976df46f46fe3bf5983c44c37.out \ $(srcdir)/%reldir%/test_cmds.sh_d8eeef53a58bdeddbc1028d7c525413e3ca1c8df.err \ @@ -322,8 +320,6 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_logfile.sh_08d731a04c877a34819b35de185e30a74c9fd497.out \ $(srcdir)/%reldir%/test_logfile.sh_09bd16e044302f6b121092534708594bdad11b5a.err \ $(srcdir)/%reldir%/test_logfile.sh_09bd16e044302f6b121092534708594bdad11b5a.out \ - $(srcdir)/%reldir%/test_logfile.sh_1c6eee38f66356fcd9a9f0faedaea6dbcc901060.err \ - $(srcdir)/%reldir%/test_logfile.sh_1c6eee38f66356fcd9a9f0faedaea6dbcc901060.out \ $(srcdir)/%reldir%/test_logfile.sh_218ecb88b4753010c4264b3ac351260b4811612f.err \ $(srcdir)/%reldir%/test_logfile.sh_218ecb88b4753010c4264b3ac351260b4811612f.out \ $(srcdir)/%reldir%/test_logfile.sh_290a3c49e53c2229a7400c107338fa0bb38375e2.err \ @@ -804,8 +800,6 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_sql_str_func.sh_352434d199f7b493668c9f2774472eb69ef0d9f0.out \ $(srcdir)/%reldir%/test_sql_str_func.sh_36fc9005464f1106f969559e640d9fa36d5fadad.err \ $(srcdir)/%reldir%/test_sql_str_func.sh_36fc9005464f1106f969559e640d9fa36d5fadad.out \ - $(srcdir)/%reldir%/test_sql_str_func.sh_3855d2cc0ab29171cae8e722f130adec25eae36e.err \ - $(srcdir)/%reldir%/test_sql_str_func.sh_3855d2cc0ab29171cae8e722f130adec25eae36e.out \ $(srcdir)/%reldir%/test_sql_str_func.sh_3de72fe5c1751dd212a1cd45cf2caa7f3b52bced.err \ $(srcdir)/%reldir%/test_sql_str_func.sh_3de72fe5c1751dd212a1cd45cf2caa7f3b52bced.out \ $(srcdir)/%reldir%/test_sql_str_func.sh_4b402274da152135c6c99456b693e1ecabca0256.err \ @@ -830,6 +824,8 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_sql_str_func.sh_6607c0dd8baff16930eb3e0daf6354af5b50052b.out \ $(srcdir)/%reldir%/test_sql_str_func.sh_69f5d49e62da48e188bd9d6af4bd3adeb21eb7d1.err \ $(srcdir)/%reldir%/test_sql_str_func.sh_69f5d49e62da48e188bd9d6af4bd3adeb21eb7d1.out \ + $(srcdir)/%reldir%/test_sql_str_func.sh_6ac7ab1f90c064944ff66bef5974f050c8227d4b.err \ + $(srcdir)/%reldir%/test_sql_str_func.sh_6ac7ab1f90c064944ff66bef5974f050c8227d4b.out \ $(srcdir)/%reldir%/test_sql_str_func.sh_6ff984d8ed3e5099376d19f0dd20d5fd1ed42494.err \ $(srcdir)/%reldir%/test_sql_str_func.sh_6ff984d8ed3e5099376d19f0dd20d5fd1ed42494.out \ $(srcdir)/%reldir%/test_sql_str_func.sh_71f37db33504b2c08a7a3323c482556f53d88100.err \ @@ -1010,8 +1006,6 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_sql_xml_func.sh_b036c73528a446cba46625767517cdac868aba72.out \ $(srcdir)/%reldir%/test_sql_xml_func.sh_fefeb387ae14d4171225ea06cbbff3ec43990cf0.err \ $(srcdir)/%reldir%/test_sql_xml_func.sh_fefeb387ae14d4171225ea06cbbff3ec43990cf0.out \ - $(srcdir)/%reldir%/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.err \ - $(srcdir)/%reldir%/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.out \ $(srcdir)/%reldir%/test_sql_yaml_func.sh_dc189d02e8979b7ed245d5d750f68b9965984699.err \ $(srcdir)/%reldir%/test_sql_yaml_func.sh_dc189d02e8979b7ed245d5d750f68b9965984699.out \ $(srcdir)/%reldir%/test_text_file.sh_5b51b55dff7332c5bee2c9b797c401c5614d574a.err \ diff --git a/test/expected/test_config.sh_a0907769aba112d628e7ebe39c4ec252e5e0bc69.err b/test/expected/test_config.sh_a0907769aba112d628e7ebe39c4ec252e5e0bc69.err index ec11ba51..6b1b18f7 100644 --- a/test/expected/test_config.sh_a0907769aba112d628e7ebe39c4ec252e5e0bc69.err +++ b/test/expected/test_config.sh_a0907769aba112d628e7ebe39c4ec252e5e0bc69.err @@ -36,3 +36,34 @@ ✘ error: invalid JSON reason: parse error: premature EOF  --> {test_dir}/bad-config2/formats/invalid-config/config.truncated.json:3 +✘ error: invalid value for property “/tuning/file-format/application~1vnd.example.com/header/expr/default” + reason: SQL expression is invalid + |  reason: unrecognized token: "'foobar" + |   --> /tuning/file-formats/application~1vnd.example.com/header/expr/default + |   | :header REGEXP 'foobar  + --> {test_dir}/bad-config2/configs/invalid-file-format/config.json:9 + = help: Property Synopsis + /tuning/file-format/application~1vnd.example.com/header/expr/default + Description + SQLite expression +✘ error: missing value for property “/tuning/file-format/application~1vnd.example.com/converter” + reason: A converter is required for a file format + |  reason: The converter script transforms the file into a format that can be consumed by lnav + = help: Property Synopsis + /tuning/file-format/application~1vnd.example.com/converter + Description + The script used to convert the file +✘ error: missing value for property “/tuning/file-format/application~1vnd.example2.com/header/expr/” + reason: At least one header expression is required for a file format + |  reason: Header expressions are used to detect a format + = help: Property Synopsis + /tuning/file-format/application~1vnd.example2.com/header/expr/ + Description + SQLite expression +✘ error: missing value for property “/tuning/file-format/application~1vnd.example2.com/converter” + reason: A converter is required for a file format + |  reason: The converter script transforms the file into a format that can be consumed by lnav + = help: Property Synopsis + /tuning/file-format/application~1vnd.example2.com/converter + Description + The script used to convert the file diff --git a/test/expected/test_sql_str_func.sh_6ac7ab1f90c064944ff66bef5974f050c8227d4b.err b/test/expected/test_sql_str_func.sh_6ac7ab1f90c064944ff66bef5974f050c8227d4b.err new file mode 100644 index 00000000..e69de29b diff --git a/test/expected/test_sql_str_func.sh_6ac7ab1f90c064944ff66bef5974f050c8227d4b.out b/test/expected/test_sql_str_func.sh_6ac7ab1f90c064944ff66bef5974f050c8227d4b.out new file mode 100644 index 00000000..a8735448 --- /dev/null +++ b/test/expected/test_sql_str_func.sh_6ac7ab1f90c064944ff66bef5974f050c8227d4b.out @@ -0,0 +1,2 @@ +Row 0: + Column encode('hi' || char(10), 'hex'): 68690a diff --git a/test/test_sql_str_func.sh b/test/test_sql_str_func.sh index e4cb96b0..a455a1b7 100644 --- a/test/test_sql_str_func.sh +++ b/test/test_sql_str_func.sh @@ -109,6 +109,8 @@ run_cap_test ./drive_sql "SELECT encode('foo', null)" run_cap_test ./drive_sql "SELECT encode(null, 'base64')" +run_cap_test ./drive_sql "SELECT encode('hi' || char(10), 'hex')" + run_cap_test ./drive_sql "SELECT gunzip(decode(encode(gzip('Hello, World!'), 'base64'), 'base64'))" #run_cap_test env TEST_COMMENT=invalid_url ./drive_sql <<'EOF' diff --git a/tools/bin2c.c b/tools/bin2c.c index 2141bf01..b4286b00 100644 --- a/tools/bin2c.c +++ b/tools/bin2c.c @@ -11,6 +11,7 @@ # include #endif +#include #include #include #include @@ -67,7 +68,7 @@ process(struct file_meta* fm, FILE* ofile) } unsigned char* buf = malloc(st.st_size); - unsigned char* dest = malloc(st.st_size); + unsigned char* dest = malloc(st.st_size + 1024); int fd = open(fm->fm_name, O_RDONLY); if (fd == -1) { @@ -80,8 +81,9 @@ process(struct file_meta* fm, FILE* ofile) fm->fm_size += rc; } - uLongf destLen = st.st_size; - compress(dest, &destLen, buf, st.st_size); + uLongf destLen = st.st_size + 1024; + int cres = compress(dest, &destLen, buf, st.st_size); + assert(cres == Z_OK); fm->fm_compressed_size = destLen; int c, col = 1;