[config] simplify file format detection by moving it into the log format definition

pull/1179/head
Tim Stack 10 months ago
parent b1c7fad50d
commit 8df83d562a

1
.gitignore vendored

@ -1,5 +1,6 @@
.deps
.lnav
.DS_Store
*.dat
*.diff
test/*.err

@ -73,59 +73,6 @@
},
"additionalProperties": false
},
"file-format": {
"description": "sdfjdls",
"title": "/tuning/file-format",
"type": "object",
"patternProperties": {
"(\\w+~1[\\w\\.\\-]+)": {
"description": "File format definitions, keyed by their MIME type",
"title": "/tuning/file-format/<mime_type>",
"type": "object",
"properties": {
"title": {
"title": "/tuning/file-format/<mime_type>/title",
"description": "The display name for this file format",
"type": "string"
},
"header": {
"description": "File header detection definitions",
"title": "/tuning/file-format/<mime_type>/header",
"type": "object",
"properties": {
"expr": {
"description": "The expressions used to check if a file header matches this file format",
"title": "/tuning/file-format/<mime_type>/header/expr",
"type": "object",
"patternProperties": {
"(\\w+)": {
"title": "/tuning/file-format/<mime_type>/header/expr/<header_expr_name>",
"description": "SQLite expression",
"type": "string"
}
},
"additionalProperties": false
},
"size": {
"title": "/tuning/file-format/<mime_type>/header/size",
"description": "The minimum size required for this header type",
"type": "integer"
}
},
"additionalProperties": false
},
"converter": {
"title": "/tuning/file-format/<mime_type>/converter",
"description": "The script used to convert the file",
"type": "string",
"pattern": "[\\w\\.\\-]+"
}
},
"additionalProperties": false
}
},
"additionalProperties": false
},
"logfile": {
"description": "Settings related to log files",
"title": "/tuning/logfile",

@ -75,14 +75,50 @@
"description": "A regular expression that restricts this format to log files with a matching name",
"type": "string"
},
"mime-types": {
"title": "/<format_name>/mime-types",
"description": "A list of mime-types this format should be used for",
"type": "array",
"items": {
"type": "string",
"pattern": "^\\w/[\\w\\.]+"
}
"converter": {
"description": "Describes how the file format can be detected and converted to a log that can be understood by lnav",
"title": "/<format_name>/converter",
"type": "object",
"properties": {
"type": {
"title": "/<format_name>/converter/type",
"description": "The MIME type",
"type": "string"
},
"header": {
"description": "File header detection definitions",
"title": "/<format_name>/converter/header",
"type": "object",
"properties": {
"expr": {
"description": "The expressions used to check if a file header matches this file format",
"title": "/<format_name>/converter/header/expr",
"type": "object",
"patternProperties": {
"(\\w+)": {
"title": "/<format_name>/converter/header/expr/<header_expr_name>",
"description": "SQLite expression",
"type": "string"
}
},
"additionalProperties": false
},
"size": {
"title": "/<format_name>/converter/header/size",
"description": "The minimum size required for this header type",
"type": "integer"
}
},
"additionalProperties": false
},
"command": {
"title": "/<format_name>/converter/command",
"description": "The script used to convert the file",
"type": "string",
"pattern": "[\\w\\.\\-]+"
}
},
"additionalProperties": false
},
"level-field": {
"title": "/<format_name>/level-field",

@ -260,10 +260,6 @@ command.
.. jsonschema:: ../schemas/config-v1.schema.json#/properties/tuning/properties/clipboard
.. _tuning_file_format:
.. jsonschema:: ../schemas/config-v1.schema.json#/properties/tuning/properties/file-format
.. jsonschema:: ../schemas/config-v1.schema.json#/properties/tuning/properties/piper
.. jsonschema:: ../schemas/config-v1.schema.json#/definitions/clip-commands

@ -133,11 +133,29 @@ object with the following fields:
:json: True if each log line is JSON-encoded.
:mime-types: An array of MIME types that this log format should only be
used with. These MIME types refer to file formats that are defined
using the `Automatic File Conversion`_ feature. This property should
not be defined for log files that do not require conversion and can be
naturally parsed using regexes or are JSON-lines.
:converter: An object that describes how an input file can be detected and
then converted to a form that can be interpreted by **lnav**. For
example, a PCAP file is in a binary format that cannot be handled natively
by **lnav**. However, a PCAP file can be converted by :file:`tshark`
into JSON-lines that can be handled by **lnav**. So, this configuration
describes how the input file format can be detected and converted. See
`Automatic File Conversion`_ for more information.
:header: An object that describes how to match the header of the input
file.
:expr: An object that contains SQLite expressions that can be used to
check if the input file's header is of this type. The property
name is the name of the expression and the value is the expression.
The expression is evaluated with the following variables:
:\:header: The hex-encoded version of the header content.
:\:filepath: The path to the input file.
:size: The minimum size of header that is needed to do the match.
:command: The command to execute to convert the input file.
:line-format: An array that specifies the text format for JSON-encoded
log messages. Log files that are JSON-encoded will have each message
@ -571,23 +589,25 @@ Automatic File Conversion
File formats that are not naturally understood by **lnav** can be
automatically detected and converted to a usable form using the
:ref:`tuning_file_format` configuration options. For example,
PCAP files can be detected and converted to a JSON-lines form
using :code:`tshark`. The conversion process works as follows:
:code:`converter` property. For example, PCAP files can be
detected and converted to a JSON-lines form using :code:`tshark`.
The conversion process works as follows:
#. The first 1024 bytes of the file are read, if available.
#. This header is converted into a hex string.
#. For each file-format, every "header expression" is evaluated
to see if there is a match. The header expressions are
SQLite expressions where the following variables are defined:
#. For each log format that has defined a :code:`converter`,
every "header expression" is evaluated to see if there is a
match. The header expressions are SQLite expressions where
the following variables are defined:
:\:header: A string containing the header as a hex string.
:\:filepath: The path to the file.
#. If a match is found, the converter script defined in the
file format will be invoked and passed the format MIME type
and path to the file as arguments. The script should write
log format will be invoked and passed the format name and
path to the file as arguments. The script should write
the converted form of the input file on its standard output.
Any errors should be written to the standard error.
#. The MIME type will be associated with the original file and
only log formats that have the corresponding type will be
used to interpret the file.
#. The log format will be associated with the original file will
be used to interpret the converted file.

@ -204,8 +204,7 @@ add_custom_command(
DEPENDS bin2c ${BUILTIN_LNAV_SCRIPTS})
list(APPEND GEN_SRCS builtin-scripts.h builtin-scripts.cc)
set(BUILTIN_SH_SCRIPTS scripts/dhclient-summary.lnav scripts/lnav-pop-view.lnav
scripts/partition-by-boot.lnav scripts/search-for.lnav)
set(BUILTIN_SH_SCRIPTS scripts/dump-pid.sh scripts/pcap_log-converter.sh)
set(BUILTIN_SH_SCRIPT_PATHS ${BUILTIN_SH_SCRIPTS})
@ -465,7 +464,6 @@ add_library(
file_collection.hh
file_converter_manager.hh
file_format.hh
file_format.cfg.hh
files_sub_source.hh
filter_observer.hh
filter_status_source.hh

@ -203,7 +203,6 @@ noinst_HEADERS = \
file_collection.hh \
file_converter_manager.hh \
file_format.hh \
file_format.cfg.hh \
file_vtab.cfg.hh \
files_sub_source.hh \
filter_observer.hh \

@ -454,7 +454,7 @@ file_collection::watch_logfile(const std::string& filename,
});
loo.with_filename(filename);
loo.with_stat_for_temp(st);
loo.loo_mime_type = eff->eff_mime_type;
loo.loo_format_name = eff->eff_format_name;
filename_to_open = convert_res.cr_destination;
}
}

@ -69,11 +69,11 @@ convert(const external_file_format& eff, const std::string& filename)
log_info("invoking converter: %s (PATH=%s)",
eff.eff_converter.c_str(),
new_path.c_str());
auto mime_str = eff.eff_mime_type.to_string();
auto format_str = eff.eff_format_name;
const char* args[] = {
eff.eff_converter.c_str(),
mime_str.c_str(),
format_str.c_str(),
filename.c_str(),
nullptr,
};
@ -95,6 +95,7 @@ convert(const external_file_format& eff, const std::string& filename)
auto error_queue = std::make_shared<std::vector<std::string>>();
std::thread err_reader([err = std::move(err_pipe.read_end()),
converter = eff.eff_converter,
error_queue,
child_pid = child.in()]() mutable {
line_buffer lb;
@ -115,7 +116,7 @@ convert(const external_file_format& eff, const std::string& filename)
done = true;
} else {
lb.read_range(li.li_file_range)
.then([error_queue, child_pid](auto sbr) {
.then([converter, error_queue, child_pid](auto sbr) {
auto line_str = string_fragment(
sbr.get_data(), 0, sbr.length())
.trim("\n");
@ -123,7 +124,8 @@ convert(const external_file_format& eff, const std::string& filename)
error_queue->emplace_back(line_str.to_string());
}
log_debug("pcap[%d]: %.*s",
log_debug("%s[%d]: %.*s",
converter.c_str(),
child_pid,
line_str.length(),
line_str.data());

@ -31,20 +31,11 @@
#include "file_format.hh"
#include <sqlite3.h>
#include "archive_manager.hh"
#include "base/auto_fd.hh"
#include "base/fs_util.hh"
#include "base/injector.hh"
#include "base/intern_string.hh"
#include "base/lnav_log.hh"
#include "config.h"
#include "lnav_config.hh"
#include "readline_highlighters.hh"
#include "safe/safe.h"
#include "sql_util.hh"
#include "sqlite-extension-func.hh"
file_format_t
detect_file_format(const ghc::filesystem::path& filename)
@ -62,7 +53,7 @@ detect_file_format(const ghc::filesystem::path& filename)
if ((rc = read(fd, buffer, sizeof(buffer))) > 0) {
static auto SQLITE3_HEADER = "SQLite format 3";
auto header_frag = string_fragment(buffer, 0, rc);
auto header_frag = string_fragment::from_bytes(buffer, rc);
if (header_frag.startswith(SQLITE3_HEADER)) {
retval = file_format_t::SQLITE_DB;
@ -72,231 +63,3 @@ detect_file_format(const ghc::filesystem::path& filename)
return retval;
}
mime_type
mime_type::from_str(const std::string& str)
{
auto slash_index = str.find('/');
if (slash_index == std::string::npos) {
return {"application", str};
}
return {str.substr(0, slash_index), str.substr(slash_index + 1)};
}
struct compiled_header_expr {
auto_mem<sqlite3_stmt> che_stmt{sqlite3_finalize};
bool che_enabled{true};
};
struct file_format_expressions : public lnav_config_listener {
void reload_config(error_reporter& reporter) override
{
log_debug("reloading file-format header expressions");
safe::WriteAccess<safe::Safe<inner>> in(instance);
if (in->e_db.in() == nullptr) {
if (sqlite3_open(":memory:", in->e_db.out()) != SQLITE_OK) {
log_error("unable to open memory DB");
return;
}
register_sqlite_funcs(in->e_db.in(), sqlite_registration_funcs);
}
in->e_header_exprs.clear();
const auto& cfg = injector::get<const lnav::file_formats::config&>();
for (const auto& fpair : cfg.c_defs) {
for (const auto& hpair : fpair.second.fd_header.h_exprs.he_exprs) {
auto stmt_str = fmt::format(FMT_STRING("SELECT 1 WHERE {}"),
hpair.second);
compiled_header_expr che;
log_info("preparing file-format header expression: %s",
stmt_str.c_str());
auto retcode = sqlite3_prepare_v2(in->e_db.in(),
stmt_str.c_str(),
stmt_str.size(),
che.che_stmt.out(),
nullptr);
if (retcode != SQLITE_OK) {
auto sql_al
= attr_line_t(hpair.second)
.with_attr_for_all(SA_PREFORMATTED.value())
.with_attr_for_all(
VC_ROLE.value(role_t::VCR_QUOTED_CODE));
readline_sqlite_highlighter(sql_al, -1);
intern_string_t watch_expr_path
= intern_string::lookup(fmt::format(
FMT_STRING(
"/tuning/file-formats/{}/header/expr/{}"),
json_ptr::encode_str(fpair.first.c_str()),
hpair.first));
auto snippet = lnav::console::snippet::from(
source_location(watch_expr_path), sql_al);
auto um = lnav::console::user_message::error(
"SQL expression is invalid")
.with_reason(sqlite3_errmsg(in->e_db.in()))
.with_snippet(snippet);
reporter(&hpair.second, um);
continue;
}
in->e_header_exprs[fpair.first][hpair.first] = std::move(che);
}
if (fpair.second.fd_header.h_exprs.he_exprs.empty()) {
auto um
= lnav::console::user_message::error(
"At least one header expression is required for "
"a file format")
.with_reason(
"Header expressions are used to detect a format");
reporter(&fpair.second.fd_header.h_exprs, um);
}
if (fpair.second.fd_converter.pp_value.empty()) {
auto um = lnav::console::user_message::error(
"A converter is required for a file format")
.with_reason(
"The converter script transforms the file "
"into a format that can be consumed by lnav");
reporter(&fpair.second.fd_converter, um);
}
}
}
void unload_config() override
{
safe::WriteAccess<safe::Safe<inner>> in(instance);
in->e_header_exprs.clear();
}
struct inner {
auto_sqlite3 e_db;
std::map<std::string, std::map<std::string, compiled_header_expr>>
e_header_exprs;
};
safe::Safe<inner> instance;
};
static file_format_expressions format_exprs;
nonstd::optional<external_file_format>
detect_mime_type(const ghc::filesystem::path& filename)
{
uint8_t buffer[1024];
size_t buffer_size = 0;
{
auto_fd fd;
if ((fd = lnav::filesystem::openp(filename, O_RDONLY)) == -1) {
return nonstd::nullopt;
}
ssize_t rc;
if ((rc = read(fd, buffer, sizeof(buffer))) == -1) {
return nonstd::nullopt;
}
buffer_size = rc;
}
auto hexbuf = auto_buffer::alloc(buffer_size * 2);
for (int lpc = 0; lpc < buffer_size; lpc++) {
fmt::format_to(
std::back_inserter(hexbuf), FMT_STRING("{:02x}"), buffer[lpc]);
}
safe::WriteAccess<safe::Safe<file_format_expressions::inner>> in(
format_exprs.instance);
const auto& cfg = injector::get<const lnav::file_formats::config&>();
for (const auto& fpair : cfg.c_defs) {
if (buffer_size < fpair.second.fd_header.h_size) {
log_debug("file content too small (%d) for header detection: %s",
buffer_size,
fpair.first.c_str());
continue;
}
for (const auto& hpair : fpair.second.fd_header.h_exprs.he_exprs) {
auto& he = in->e_header_exprs[fpair.first][hpair.first];
if (!he.che_enabled) {
continue;
}
auto* stmt = he.che_stmt.in();
if (stmt == nullptr) {
continue;
}
sqlite3_reset(stmt);
auto count = sqlite3_bind_parameter_count(stmt);
for (int lpc = 0; lpc < count; lpc++) {
const auto* name = sqlite3_bind_parameter_name(stmt, lpc + 1);
if (name[0] == '$') {
const char* env_value;
if ((env_value = getenv(&name[1])) != nullptr) {
sqlite3_bind_text(
stmt, lpc + 1, env_value, -1, SQLITE_STATIC);
}
continue;
}
if (strcmp(name, ":header") == 0) {
sqlite3_bind_text(stmt,
lpc + 1,
hexbuf.in(),
hexbuf.size(),
SQLITE_STATIC);
continue;
}
if (strcmp(name, ":filepath") == 0) {
sqlite3_bind_text(
stmt, lpc + 1, filename.c_str(), -1, SQLITE_STATIC);
continue;
}
}
auto step_res = sqlite3_step(stmt);
switch (step_res) {
case SQLITE_OK:
case SQLITE_DONE:
continue;
case SQLITE_ROW:
break;
default: {
log_error(
"failed to execute file-format header expression: "
"%s:%s -- %s",
fpair.first.c_str(),
hpair.first.c_str(),
sqlite3_errmsg(in->e_db));
he.che_enabled = false;
continue;
}
}
log_info("detected MIME type for: %s -- %s (header-expr: %s)",
filename.c_str(),
fpair.first.c_str(),
hpair.first.c_str());
return external_file_format{
mime_type::from_str(fpair.first),
fpair.second.fd_converter.pp_value,
fpair.second.fd_converter.pp_location.sl_source.to_string(),
};
}
}
return nonstd::nullopt;
}

@ -1,63 +0,0 @@
/**
* Copyright (c) 2023, Timothy Stack
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef lnav_file_format_cfg_hh
#define lnav_file_format_cfg_hh
#include <map>
#include <string>
#include "yajlpp/yajlpp.hh"
namespace lnav {
namespace file_formats {
struct header_exprs {
std::map<std::string, std::string> he_exprs;
};
struct header {
header_exprs h_exprs;
size_t h_size{32};
};
struct format_def {
std::string fd_title;
header fd_header;
positioned_property<std::string> fd_converter;
};
struct config {
std::map<std::string, format_def> c_defs;
};
} // namespace file_formats
} // namespace lnav
#endif

@ -43,27 +43,8 @@ enum class file_format_t : int {
REMOTE,
};
struct mime_type {
static mime_type from_str(const std::string& str);
std::string mt_type;
std::string mt_subtype;
bool operator<(const mime_type& other) const
{
return this->mt_type < other.mt_type
&& this->mt_subtype < other.mt_subtype;
}
std::string to_string() const
{
return fmt::format(
FMT_STRING("{}/{}"), this->mt_type, this->mt_subtype);
}
};
struct external_file_format {
mime_type eff_mime_type;
std::string eff_format_name;
std::string eff_converter;
ghc::filesystem::path eff_source_path;
};

@ -4,11 +4,18 @@
"json": true,
"title": "Packet Capture",
"description": "Internal format for pcap files",
"mime-types": [
"application/vnd.tcpdump.pcap"
],
"multiline": false,
"convert-to-local-time": true,
"converter": {
"header": {
"expr": {
"pcapng": ":header REGEXP '^0a0d0d0a.{8}(?:1a2b3c4d|4d3c2b1a).*'",
"pcap": ":header REGEXP '^(?:a1b2c3d4|d4c3b2a1|a1b23c4d|4d3cb2a1).*'"
},
"size": 24
},
"command": "pcap_log-converter.sh"
},
"line-format": [
{
"field": "time"

@ -1002,7 +1002,13 @@ wait_for_pipers()
log_debug("all pipers finished");
break;
}
std::this_thread::sleep_for(sleep_time);
// Use usleep() since it is defined to be interruptable by a signal.
auto urc = usleep(
std::chrono::duration_cast<std::chrono::microseconds>(sleep_time)
.count());
if (urc == -1 && errno == EINTR) {
log_trace("wait_for_pipers(): sleep interrupted");
}
rebuild_indexes();
log_debug("%d pipers and %d children are still active",
@ -1201,7 +1207,6 @@ looper()
(void) signal(SIGINT, sigint);
(void) signal(SIGTERM, sigint);
(void) signal(SIGWINCH, sigwinch);
(void) signal(SIGCHLD, sigchld);
auto create_screen_res = screen_curses::create();
@ -2115,6 +2120,7 @@ main(int argc, char* argv[])
}
(void) signal(SIGPIPE, SIG_IGN);
(void) signal(SIGCHLD, sigchld);
setlocale(LC_ALL, "");
try {
std::locale::global(std::locale(""));

@ -81,9 +81,6 @@ lnav_config_listener* lnav_config_listener::LISTENER_LIST;
static auto a = injector::bind<archive_manager::config>::to_instance(
+[]() { return &lnav_config.lc_archive_manager; });
static auto ff = injector::bind<lnav::file_formats::config>::to_instance(
+[]() { return &lnav_config.lc_file_formats; });
static auto fvc = injector::bind<file_vtab::config>::to_instance(
+[]() { return &lnav_config.lc_file_vtab; });
@ -1043,55 +1040,6 @@ static const struct json_path_container ui_handlers = {
.with_children(keymap_defs_handlers),
};
static const struct json_path_container header_expr_handlers = {
yajlpp::pattern_property_handler("(?<header_expr_name>\\w+)")
.with_description("SQLite expression")
.for_field(&lnav::file_formats::header_exprs::he_exprs),
};
static const struct json_path_container header_handlers = {
yajlpp::property_handler("expr")
.with_description("The expressions used to check if a file header "
"matches this file format")
.for_child(&lnav::file_formats::header::h_exprs)
.with_children(header_expr_handlers),
yajlpp::property_handler("size")
.with_description("The minimum size required for this header type")
.for_field(&lnav::file_formats::header::h_size),
};
static const struct json_path_container format_def_handlers = {
yajlpp::property_handler("title")
.with_description("The display name for this file format")
.for_field(&lnav::file_formats::format_def::fd_title),
yajlpp::property_handler("header")
.with_description("File header detection definitions")
.for_child(&lnav::file_formats::format_def::fd_header)
.with_children(header_handlers),
yajlpp::property_handler("converter")
.with_description("The script used to convert the file")
.with_pattern(R"([\w\.\-]+)")
.for_field(&lnav::file_formats::format_def::fd_converter),
};
static const struct json_path_container format_defs_handlers = {
yajlpp::pattern_property_handler(R"((?<mime_type>\w+~1[\w\.\-]+))")
.with_description("File format definitions, keyed by their MIME type")
.with_obj_provider<lnav::file_formats::format_def, _lnav_config>(
[](const yajlpp_provider_context& ypc, _lnav_config* root) {
auto& retval
= root->lc_file_formats.c_defs[ypc.get_substr("mime_type")];
return &retval;
})
.with_path_provider<_lnav_config>(
[](struct _lnav_config* cfg, std::vector<std::string>& paths_out) {
for (const auto& iter : cfg->lc_file_formats.c_defs) {
paths_out.emplace_back(iter.first);
}
})
.with_children(format_def_handlers),
};
static const struct json_path_container archive_handlers = {
yajlpp::property_handler("min-free-space")
.with_synopsis("<bytes>")
@ -1319,9 +1267,6 @@ static const struct json_path_container tuning_handlers = {
yajlpp::property_handler("file-vtab")
.with_description("Settings related to the lnav_file virtual-table")
.with_children(file_vtab_handlers),
yajlpp::property_handler("file-format")
.with_description("sdfjdls")
.with_children(format_defs_handlers),
yajlpp::property_handler("logfile")
.with_description("Settings related to log files")
.with_children(logfile_handlers),
@ -1706,6 +1651,8 @@ reload_config(std::vector<lnav::console::user_message>& errors)
while (curr != nullptr) {
auto reporter = [&errors](const void* cfg_value,
const lnav::console::user_message& errmsg) {
log_error("configuration error: %s",
errmsg.to_attr_line().get_string().c_str());
auto cb = [&cfg_value, &errors, &errmsg](
const json_path_handler_base& jph,
const std::string& path,

@ -43,7 +43,6 @@
#include "base/file_range.hh"
#include "base/lnav.console.hh"
#include "base/result.h"
#include "file_format.cfg.hh"
#include "file_vtab.cfg.hh"
#include "ghc/filesystem.hpp"
#include "lnav_config_fwd.hh"
@ -112,7 +111,6 @@ struct _lnav_config {
archive_manager::config lc_archive_manager;
lnav::piper::config lc_piper;
lnav::file_formats::config lc_file_formats;
file_vtab::config lc_file_vtab;
lnav::logfile::config lc_logfile;
tailer::config lc_tailer;

@ -33,6 +33,7 @@
#include <stdio.h>
#include <string.h>
#include "base/fs_util.hh"
#include "base/is_utf8.hh"
#include "base/snippet_highlighters.hh"
#include "base/string_util.hh"
@ -44,8 +45,10 @@
#include "log_search_table.hh"
#include "log_vtab_impl.hh"
#include "ptimec.hh"
#include "readline_highlighters.hh"
#include "scn/scn.h"
#include "sql_util.hh"
#include "sqlite-extension-func.hh"
#include "yajlpp/yajlpp.hh"
#include "yajlpp/yajlpp_def.hh"
@ -1836,6 +1839,139 @@ external_log_format::get_subline(const logline& ll,
this->jlf_line_values.lvv_sbr = sbr;
}
struct compiled_header_expr {
auto_mem<sqlite3_stmt> che_stmt{sqlite3_finalize};
bool che_enabled{true};
};
struct format_header_expressions : public lnav_config_listener {
auto_sqlite3 e_db;
std::map<intern_string_t, std::map<std::string, compiled_header_expr>>
e_header_exprs;
};
using safe_format_header_expressions = safe::Safe<format_header_expressions>;
static safe_format_header_expressions format_header_exprs;
nonstd::optional<external_file_format>
detect_mime_type(const ghc::filesystem::path& filename)
{
uint8_t buffer[1024];
size_t buffer_size = 0;
{
auto_fd fd;
if ((fd = lnav::filesystem::openp(filename, O_RDONLY)) == -1) {
return nonstd::nullopt;
}
ssize_t rc;
if ((rc = read(fd, buffer, sizeof(buffer))) == -1) {
return nonstd::nullopt;
}
buffer_size = rc;
}
auto hexbuf = auto_buffer::alloc(buffer_size * 2);
for (int lpc = 0; lpc < buffer_size; lpc++) {
fmt::format_to(
std::back_inserter(hexbuf), FMT_STRING("{:02x}"), buffer[lpc]);
}
safe::WriteAccess<safe_format_header_expressions> in(format_header_exprs);
for (const auto& format : log_format::get_root_formats()) {
auto elf = std::dynamic_pointer_cast<external_log_format>(format);
if (elf == nullptr) {
continue;
}
if (buffer_size < elf->elf_converter.c_header.h_size) {
log_debug("file content too small (%d) for header detection: %s",
buffer_size,
elf->get_name().get());
continue;
}
for (const auto& hpair : elf->elf_converter.c_header.h_exprs.he_exprs) {
auto& he = in->e_header_exprs[elf->get_name()][hpair.first];
if (!he.che_enabled) {
continue;
}
auto* stmt = he.che_stmt.in();
if (stmt == nullptr) {
continue;
}
sqlite3_reset(stmt);
auto count = sqlite3_bind_parameter_count(stmt);
for (int lpc = 0; lpc < count; lpc++) {
const auto* name = sqlite3_bind_parameter_name(stmt, lpc + 1);
if (name[0] == '$') {
const char* env_value;
if ((env_value = getenv(&name[1])) != nullptr) {
sqlite3_bind_text(
stmt, lpc + 1, env_value, -1, SQLITE_STATIC);
}
continue;
}
if (strcmp(name, ":header") == 0) {
sqlite3_bind_text(stmt,
lpc + 1,
hexbuf.in(),
hexbuf.size(),
SQLITE_STATIC);
continue;
}
if (strcmp(name, ":filepath") == 0) {
sqlite3_bind_text(
stmt, lpc + 1, filename.c_str(), -1, SQLITE_STATIC);
continue;
}
}
auto step_res = sqlite3_step(stmt);
switch (step_res) {
case SQLITE_OK:
case SQLITE_DONE:
continue;
case SQLITE_ROW:
break;
default: {
log_error(
"failed to execute file-format header expression: "
"%s:%s -- %s",
elf->get_name().get(),
hpair.first.c_str(),
sqlite3_errmsg(in->e_db));
he.che_enabled = false;
continue;
}
}
log_info("detected format for: %s -- %s (header-expr: %s)",
filename.c_str(),
elf->get_name().get(),
hpair.first.c_str());
return external_file_format{
elf->get_name().to_string(),
elf->elf_converter.c_command.pp_value,
elf->elf_converter.c_command.pp_location.sl_source.to_string(),
};
}
}
return nonstd::nullopt;
}
void
external_log_format::build(std::vector<lnav::console::user_message>& errors)
{
@ -2013,6 +2149,70 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
stable_sort(this->elf_level_pairs.begin(), this->elf_level_pairs.end());
{
safe::WriteAccess<safe_format_header_expressions> hexprs(
format_header_exprs);
if (hexprs->e_db.in() == nullptr) {
if (sqlite3_open(":memory:", hexprs->e_db.out()) != SQLITE_OK) {
log_error("unable to open memory DB");
return;
}
register_sqlite_funcs(hexprs->e_db.in(), sqlite_registration_funcs);
}
for (const auto& hpair : this->elf_converter.c_header.h_exprs.he_exprs)
{
auto stmt_str
= fmt::format(FMT_STRING("SELECT 1 WHERE {}"), hpair.second);
compiled_header_expr che;
log_info("preparing file-format header expression: %s",
stmt_str.c_str());
auto retcode = sqlite3_prepare_v2(hexprs->e_db.in(),
stmt_str.c_str(),
stmt_str.size(),
che.che_stmt.out(),
nullptr);
if (retcode != SQLITE_OK) {
auto sql_al = attr_line_t(hpair.second)
.with_attr_for_all(SA_PREFORMATTED.value())
.with_attr_for_all(
VC_ROLE.value(role_t::VCR_QUOTED_CODE));
readline_sqlite_highlighter(sql_al, -1);
intern_string_t watch_expr_path = intern_string::lookup(
fmt::format(FMT_STRING("/{}/converter/header/expr/{}"),
this->elf_name,
hpair.first));
auto snippet = lnav::console::snippet::from(
source_location(watch_expr_path), sql_al);
auto um = lnav::console::user_message::error(
"SQL expression is invalid")
.with_reason(sqlite3_errmsg(hexprs->e_db.in()))
.with_snippet(snippet);
errors.emplace_back(um);
continue;
}
hexprs->e_header_exprs[this->elf_name][hpair.first]
= std::move(che);
}
if (!this->elf_converter.c_header.h_exprs.he_exprs.empty()
&& this->elf_converter.c_command.pp_value.empty())
{
auto um = lnav::console::user_message::error(
"A command is required when a converter is defined")
.with_help(
"The converter command transforms the file "
"into a format that can be consumed by lnav")
.with_snippets(this->get_snippets());
errors.emplace_back(um);
}
}
for (auto& vd : this->elf_value_def_order) {
std::vector<std::string>::iterator act_iter;
@ -2949,18 +3149,6 @@ external_log_format::match_name(const std::string& filename)
.has_value();
}
bool
external_log_format::match_mime_type(const mime_type& mt) const
{
if (mt.mt_type == "text" && mt.mt_subtype == "plain"
&& this->elf_mime_types.empty())
{
return true;
}
return this->elf_mime_types.count(mt) == 1;
}
auto
external_log_format::value_line_count(const intern_string_t ist,
bool top_level,

@ -352,14 +352,6 @@ public:
virtual bool match_name(const std::string& filename) { return true; }
virtual bool match_mime_type(const mime_type& mt) const
{
if (mt.mt_type == "text" && mt.mt_subtype == "plain") {
return true;
}
return false;
}
struct scan_match {
uint32_t sm_quality;
};

@ -141,8 +141,6 @@ public:
bool match_name(const std::string& filename) override;
bool match_mime_type(const mime_type& mt) const override;
scan_result_t scan(logfile& lf,
std::vector<logline>& dst,
const line_info& offset,
@ -313,7 +311,6 @@ public:
std::vector<ghc::filesystem::path> elf_format_source_order;
std::map<intern_string_t, int> elf_format_sources;
std::list<intern_string_t> elf_collision;
std::set<mime_type> elf_mime_types;
factory_container<lnav::pcre2pp::code> elf_filename_pcre;
std::map<std::string, std::shared_ptr<pattern>> elf_patterns;
std::vector<std::shared_ptr<pattern>> elf_pattern_order;
@ -344,6 +341,23 @@ public:
bool elf_has_module_format{false};
bool elf_builtin_format{false};
struct header_exprs {
std::map<std::string, std::string> he_exprs;
};
struct header {
header_exprs h_exprs;
size_t h_size{32};
};
struct converter {
std::string c_type;
header c_header;
positioned_property<std::string> c_command;
};
converter elf_converter;
using search_table_pcre2pp
= factory_container<lnav::pcre2pp::code, int>::with_default_args<
log_search_table_ns::PATTERN_OPTIONS>;

@ -281,8 +281,6 @@ read_format_field(yajlpp_parse_context* ypc,
} else if (field_name == "module-field") {
elf->elf_module_id_field = intern_string::lookup(value);
elf->elf_container = true;
} else if (field_name == "mime-types") {
elf->elf_mime_types.insert(mime_type::from_str(value));
}
return 1;
@ -817,6 +815,37 @@ static const struct json_path_container search_table_handlers = {
.with_children(search_table_def_handlers),
};
static const struct json_path_container header_expr_handlers = {
yajlpp::pattern_property_handler(R"((?<header_expr_name>\w+))")
.with_description("SQLite expression")
.for_field(&external_log_format::header_exprs::he_exprs),
};
static const struct json_path_container header_handlers = {
yajlpp::property_handler("expr")
.with_description("The expressions used to check if a file header "
"matches this file format")
.for_child(&external_log_format::header::h_exprs)
.with_children(header_expr_handlers),
yajlpp::property_handler("size")
.with_description("The minimum size required for this header type")
.for_field(&external_log_format::header::h_size),
};
static const struct json_path_container converter_handlers = {
yajlpp::property_handler("type")
.with_description("The MIME type")
.for_field(&external_log_format::converter::c_type),
yajlpp::property_handler("header")
.with_description("File header detection definitions")
.for_child(&external_log_format::converter::c_header)
.with_children(header_handlers),
yajlpp::property_handler("command")
.with_description("The script used to convert the file")
.with_pattern(R"([\w\.\-]+)")
.for_field(&external_log_format::converter::c_command),
};
const struct json_path_container format_handlers = {
yajlpp::property_handler("regex")
.with_description(
@ -826,14 +855,16 @@ const struct json_path_container format_handlers = {
json_path_handler("json", read_format_bool)
.with_description(
R"(Indicates that log files are JSON-encoded (deprecated, use "file-type": "json"))"),
json_path_handler("convert-to-local-time", read_format_bool)
json_path_handler("convert-to-local-time")
.with_description("Indicates that displayed timestamps should "
"automatically be converted to local time"),
json_path_handler("hide-extra", read_format_bool)
"automatically be converted to local time")
.for_field(&external_log_format::lf_date_time,
&date_time_scanner::dts_local_time),
json_path_handler("hide-extra")
.with_description(
"Specifies whether extra values in JSON logs should be displayed")
.for_field(&external_log_format::jlf_hide_extra),
json_path_handler("multiline", read_format_bool)
json_path_handler("multiline")
.with_description("Indicates that log messages can span multiple lines")
.for_field(&log_format::lf_multiline),
json_path_handler("timestamp-divisor", read_format_double)
@ -845,10 +876,11 @@ const struct json_path_container format_handlers = {
.with_description("A regular expression that restricts this format to "
"log files with a matching name")
.for_field(&external_log_format::elf_filename_pcre),
json_path_handler("mime-types#", read_format_field)
.with_pattern(R"(^\w/[\w\.]+)")
.with_description(
"A list of mime-types this format should be used for"),
json_path_handler("converter")
.with_description("Describes how the file format can be detected and "
"converted to a log that can be understood by lnav")
.for_child(&external_log_format::elf_converter)
.with_children(converter_handlers),
json_path_handler("level-field")
.with_description(
"The name of the level field in the log message pattern")

@ -259,9 +259,9 @@ logfile::process_prefix(shared_buffer_ref& sbr,
this->lf_mismatched_formats.insert(curr->get_name());
continue;
}
if (this->lf_options.loo_mime_type
&& !curr->match_mime_type(
this->lf_options.loo_mime_type.value()))
if (this->lf_options.loo_format_name
&& !(curr->get_name()
== this->lf_options.loo_format_name.value()))
{
if (li.li_file_range.fr_offset == 0) {
log_debug("(%s) does not match file format: %s",

@ -66,7 +66,7 @@ struct logfile_open_options_base {
ssize_t loo_visible_size_limit{-1};
bool loo_tail{true};
file_format_t loo_file_format{file_format_t::UNKNOWN};
nonstd::optional<mime_type> loo_mime_type;
nonstd::optional<std::string> loo_format_name;
nonstd::optional<lnav::piper::running_handle> loo_piper;
};

@ -26,19 +26,6 @@
"transfer-command": "cat > {0:} && chmod ugo+rx ./{0:}"
}
},
"file-format": {
"application/vnd.tcpdump.pcap": {
"title": "\ud83d\udda5 Pcap",
"header": {
"expr": {
"pcapng": ":header REGEXP '^0a0d0d0a.{8}(?:1a2b3c4d|4d3c2b1a).*'",
"pcap": ":header REGEXP '^(?:a1b2c3d4|d4c3b2a1|a1b23c4d|4d3cb2a1).*'"
},
"size": 24
},
"converter": "pcap-converter.sh"
}
},
"piper": {
"max-size": 10485760,
"rotations": 4

@ -9,5 +9,5 @@ BUILTIN_LNAVSCRIPTS = \
BUILTIN_SHSCRIPTS = \
$(srcdir)/scripts/dump-pid.sh \
$(srcdir)/scripts/pcap-converter.sh \
$(srcdir)/scripts/pcap_log-converter.sh \
$()

@ -501,7 +501,7 @@ public:
void reload_config(error_reporter& reporter) override
{
if (!view_colors::initialized) {
return;
view_colors::vc_active_palette = ansi_colors();
}
auto& vc = view_colors::singleton();
@ -556,9 +556,8 @@ view_colors::init(bool headless)
initialized = true;
{
auto reporter = [](const void*, const lnav::console::user_message&) {
};
auto reporter
= [](const void*, const lnav::console::user_message& um) {};
_COLOR_LISTENER.reload_config(reporter);
}

@ -278,10 +278,9 @@ public:
std::unordered_map<std::string, string_attr_pair> vc_class_to_role;
static bool initialized;
private:
static term_color_palette* vc_active_palette;
private:
/** Private constructor that initializes the member fields. */
view_colors();

@ -866,8 +866,8 @@ execute_examples()
EXAMPLE_RESULTS[ex.he_cmd] = result;
log_debug("example: %s", ex.he_cmd);
log_debug("example result: %s",
log_trace("example: %s", ex.he_cmd);
log_trace("example result: %s",
result.get_string().c_str());
break;
}

@ -226,6 +226,7 @@ dist_noinst_DATA = \
expected/test_tailer.sh_12f539e535df04364316699f9edeac461aa9f9de.err \
expected/test_tailer.sh_12f539e535df04364316699f9edeac461aa9f9de.out \
ansi-colors.0.in \
bad-config/formats/invalid-file-format/format.json \
bad-config/formats/invalid-json-format/format.json \
bad-config/formats/invalid-properties/format.json \
bad-config/formats/invalid-regex/format.json \
@ -235,7 +236,7 @@ dist_noinst_DATA = \
bad-config/formats/invalid-sql/init2.sql \
bad-config/formats/no-regexes/format.json \
bad-config/formats/no-samples/format.json \
bad-config2/configs/invalid-file-format/config.json \
bad-config2/configs/invalid-theme/config.json \
bad-config2/formats/invalid-config/config.json \
bad-config2/formats/invalid-config/config.bad-schema.json \
bad-config2/formats/invalid-config/config.malformed.json \

@ -0,0 +1,27 @@
{
"$schema": "https://lnav.org/schemas/format-v1.schema.json",
"bad_file_format1": {
"title": "bad file format",
"regex": {
"std": {
"pattern": "(?<timestamp>\\d+): (?<body>.*)$"
}
},
"converter": {
"header": {
"expr": {
"default": ":header REGEXP 'foobar"
},
"size": 8
}
},
"timestamp-format": [
"%i"
],
"sample": [
{
"line": "1234: abcd"
}
]
}
}

@ -1,19 +0,0 @@
{
"$schema": "https://lnav.org/schemas/config-v1.schema.json",
"tuning": {
"file-format": {
"application/vnd.example.com": {
"title": "example",
"header": {
"expr": {
"default": ":header REGEXP 'foobar"
},
"size": 8
}
},
"application/vnd.example2.com": {
"title": "example"
}
}
}
}

@ -0,0 +1,15 @@
{
"$schema": "https://lnav.org/schemas/config-v1.schema.json",
"ui": {
"theme-defs": {
"invalid-theme": {
"styles": {
"text": {
"color": "InvalidColor",
"bad-property": "abc"
}
}
}
}
}
}

@ -1,3 +1,11 @@
⚠ warning: unexpected value for property “/ui/theme-defs/invalid-theme/styles/text/bad-property”
 --> {test_dir}/bad-config2/configs/invalid-theme/config.json:9
 |  "bad-property": "abc"
 = help: Available Properties
color #hex|color_name
background-color #hex|color_name
underline
bold
✘ error: 'bad' is not a supported configuration $schema version
 --> {test_dir}/bad-config2/formats/invalid-config/config.bad-schema.json:2
 |  "$schema": "bad" 
@ -36,34 +44,15 @@
✘ error: invalid JSON
reason: parse error: premature EOF
 --> {test_dir}/bad-config2/formats/invalid-config/config.truncated.json:3
✘ error: invalid value for property “/tuning/file-format/application~1vnd.example.com/header/expr/default”
reason: SQL expression is invalid
 |  reason: unrecognized token: "'foobar"
 |   --> /tuning/file-formats/application~1vnd.example.com/header/expr/default
 |   | :header REGEXP 'foobar 
 --> {test_dir}/bad-config2/configs/invalid-file-format/config.json:9
 = help: Property Synopsis
/tuning/file-format/application~1vnd.example.com/header/expr/default
Description
SQLite expression
✘ error: missing value for property “/tuning/file-format/application~1vnd.example.com/converter”
reason: A converter is required for a file format
 |  reason: The converter script transforms the file into a format that can be consumed by lnav
 = help: Property Synopsis
/tuning/file-format/application~1vnd.example.com/converter
Description
The script used to convert the file
✘ error: missing value for property “/tuning/file-format/application~1vnd.example2.com/header/expr/”
reason: At least one header expression is required for a file format
 |  reason: Header expressions are used to detect a format
 = help: Property Synopsis
/tuning/file-format/application~1vnd.example2.com/header/expr/
Description
SQLite expression
✘ error: missing value for property “/tuning/file-format/application~1vnd.example2.com/converter”
reason: A converter is required for a file format
 |  reason: The converter script transforms the file into a format that can be consumed by lnav
✘ error: invalid value for property “/ui/theme-defs/invalid-theme/styles/text/color”
reason: invalid color -- “InvalidColor”
 |  reason: Unknown color: 'InvalidColor'. See https://jonasjacek.github.io/colors/ for a list of supported color names
 --> {test_dir}/bad-config2/configs/invalid-theme/config.json:8
 = help: Property Synopsis
/tuning/file-format/application~1vnd.example2.com/converter
/ui/theme-defs/invalid-theme/styles/text/color #hex|color_name
Description
The script used to convert the file
The foreground color value for this style. The value can be the name of an xterm color, the hexadecimal value, or a theme variable reference.
Examples
#fff
Green
$black

@ -104,6 +104,13 @@
/$schema The URI of the schema for this file
Description
Specifies the type of this file
✘ error: SQL expression is invalid
reason: unrecognized token: "'foobar"
 --> /bad_file_format1/converter/header/expr/default
 | :header REGEXP 'foobar 
✘ error: A command is required when a converter is defined
 --> {test_dir}/bad-config/formats/invalid-file-format/format.json:4
 = help: The converter command transforms the file into a format that can be consumed by lnav
✘ error: invalid line format element “/bad_json_log/line-format/0/field”
reason: “” is not a defined value
 --> {test_dir}/bad-config/formats/invalid-json-format/format.json:7

Loading…
Cancel
Save