[json-log] handle invalid json log lines

Header from folded patch 'foldme1.patch':

[local] foldme
pull/692/head
Timothy Stack 5 years ago
parent cd3b9bbb71
commit 73e902f7d8

@ -203,7 +203,7 @@ add_library(diag STATIC
hist_source.cc
hotkeys.cc
input_dispatcher.cc
intern_string.cc
base/intern_string.cc
base/is_utf8.cc
json-extension-functions.cc
yajlpp/json_op.cc
@ -308,7 +308,7 @@ add_library(diag STATIC
highlighter.hh
hotkeys.hh
input_dispatcher.hh
intern_string.hh
base/intern_string.hh
base/is_utf8.hh
k_merge_tree.h
log_actions.hh

@ -234,7 +234,6 @@ noinst_HEADERS = \
init.sql \
init-sql.h \
input_dispatcher.hh \
intern_string.hh \
k_merge_tree.h \
line_buffer.hh \
listview_curses.hh \
@ -347,7 +346,6 @@ libdiag_a_SOURCES = \
hist_source.cc \
hotkeys.cc \
input_dispatcher.cc \
intern_string.cc \
json-extension-functions.cc \
line_buffer.cc \
listview_curses.cc \

@ -39,7 +39,7 @@
#include <string>
#include <vector>
#include "intern_string.hh"
#include "base/intern_string.hh"
/**
* Encapsulates a range in a string.

@ -8,6 +8,7 @@ noinst_LIBRARIES = libbase.a
noinst_HEADERS = \
enum_util.hh \
file_range.hh \
intern_string.hh \
is_utf8.hh \
lnav_log.hh \
opt_util.hh \
@ -16,6 +17,7 @@ noinst_HEADERS = \
string_util.hh
libbase_a_SOURCES = \
intern_string.cc \
is_utf8.cc \
lnav_log.cc \
string_util.cc

@ -256,8 +256,9 @@ public:
return strcmp(this->get(), rhs) == 0;
}
void operator=(const intern_string_t &rhs) {
intern_string_t &operator=(const intern_string_t &rhs) {
this->ist_interned_string = rhs.ist_interned_string;
return *this;
}
private:

@ -34,6 +34,7 @@
#include <string.h>
#include <strings.h>
#include "fmt/format.h"
#include "yajlpp/yajlpp.hh"
#include "yajlpp/yajlpp_def.hh"
#include "sql_util.hh"
@ -495,32 +496,21 @@ bool external_log_format::scan_for_partial(shared_buffer_ref &sbr, size_t &len_o
log_format::scan_result_t external_log_format::scan(logfile &lf,
std::vector<logline> &dst,
off_t offset,
const line_info &li,
shared_buffer_ref &sbr)
{
if (this->elf_type == ELF_TYPE_JSON) {
yajlpp_parse_context &ypc = *(this->jlf_parse_context);
logline ll(offset, 0, 0, LEVEL_INFO);
logline ll(li.li_file_range.fr_offset, 0, 0, LEVEL_INFO);
yajl_handle handle = this->jlf_yajl_handle.in();
json_log_userdata jlu(sbr);
if (sbr.empty()) {
if (li.li_partial) {
log_debug("skipping partial line at offset %d", li.li_file_range.fr_offset);
return log_format::SCAN_INCOMPLETE;
}
const auto *line_data = (const unsigned char *) sbr.get_data();
size_t line_end = sbr.length();
if (line_end > 0) {
line_end -= 1;
}
while (line_end > 0 && isspace(line_data[line_end])) {
line_end -= 1;
}
if (line_end == 0 || line_data[line_end] != '}') {
return log_format::SCAN_INCOMPLETE;
}
yajl_reset(handle);
ypc.set_static_handler(json_log_handlers[0]);
@ -528,8 +518,8 @@ log_format::scan_result_t external_log_format::scan(logfile &lf,
ypc.ypc_ignore_unused = true;
ypc.ypc_alt_callbacks.yajl_start_array = json_array_start;
ypc.ypc_alt_callbacks.yajl_start_map = json_array_start;
ypc.ypc_alt_callbacks.yajl_end_array = NULL;
ypc.ypc_alt_callbacks.yajl_end_map = NULL;
ypc.ypc_alt_callbacks.yajl_end_array = nullptr;
ypc.ypc_alt_callbacks.yajl_end_map = nullptr;
jlu.jlu_format = this;
jlu.jlu_base_line = &ll;
jlu.jlu_line_value = sbr.get_data();
@ -548,18 +538,28 @@ log_format::scan_result_t external_log_format::scan(logfile &lf,
ll.set_level((log_level_t) (ll.get_level_and_flags() |
LEVEL_CONTINUED));
}
dst.push_back(ll);
dst.emplace_back(ll);
}
}
else {
unsigned char *msg;
int line_count = 1;
msg = yajl_get_error(handle, 1, (const unsigned char *)sbr.get_data(), sbr.length());
if (msg != NULL) {
log_debug("Unable to parse line at offset %d: %s", offset, msg);
if (msg != nullptr) {
log_debug("Unable to parse line at offset %d: %s", li.li_file_range.fr_offset, msg);
line_count = count(msg, msg + strlen((char *) msg), '\n');
yajl_free_error(handle, msg);
}
return log_format::SCAN_INCOMPLETE;
if (!this->lf_specialized) {
return log_format::SCAN_NO_MATCH;
}
for (int lpc = 0; lpc < line_count; lpc++) {
ll.set_time(dst.back().get_time());
ll.set_level(log_level_t::LEVEL_ERROR);
ll.set_sub_offset(lpc);
dst.emplace_back(ll);
}
}
return log_format::SCAN_MATCH;
@ -678,7 +678,7 @@ log_format::scan_result_t external_log_format::scan(logfile &lf,
}
}
dst.emplace_back(offset, log_tv, level, mod_index, opid);
dst.emplace_back(li.li_file_range.fr_offset, log_tv, level, mod_index, opid);
if (orig_lock != curr_fmt) {
uint32_t lock_line;
@ -1032,8 +1032,20 @@ void external_log_format::get_subline(const logline &ll, shared_buffer_ref &sbr,
yajl_status parse_status = yajl_parse(handle,
(const unsigned char *)sbr.get_data(), sbr.length());
if (parse_status == yajl_status_ok &&
yajl_complete_parse(handle) == yajl_status_ok) {
if (parse_status != yajl_status_ok ||
yajl_complete_parse(handle) != yajl_status_ok) {
unsigned char *msg;
string full_msg;
msg = yajl_get_error(handle, 1, (const unsigned char *)sbr.get_data(), sbr.length());
if (msg != nullptr) {
full_msg = fmt::format("lnav: unable to parse line at offset {}: {}", ll.get_offset(), msg);
yajl_free_error(handle, msg);
}
this->jlf_cached_line.resize(full_msg.size());
memcpy(this->jlf_cached_line.data(), full_msg.data(), full_msg.size());
} else {
std::vector<logline_value>::iterator lv_iter;
bool used_values[this->jlf_line_values.size()];
struct line_range lr;
@ -1235,15 +1247,15 @@ void external_log_format::get_subline(const logline &ll, shared_buffer_ref &sbr,
nl_pos < str.size());
}
this->jlf_line_offsets.push_back(0);
for (size_t lpc = 0; lpc < this->jlf_cached_line.size(); lpc++) {
if (this->jlf_cached_line[lpc] == '\n') {
this->jlf_line_offsets.push_back(lpc + 1);
}
}
this->jlf_line_offsets.push_back(this->jlf_cached_line.size());
}
this->jlf_line_offsets.push_back(0);
for (size_t lpc = 0; lpc < this->jlf_cached_line.size(); lpc++) {
if (this->jlf_cached_line[lpc] == '\n') {
this->jlf_line_offsets.push_back(lpc + 1);
}
}
this->jlf_line_offsets.push_back(this->jlf_cached_line.size());
this->jlf_cached_offset = ll.get_offset();
this->jlf_cached_full = full_message;
}
@ -1272,7 +1284,7 @@ void external_log_format::get_subline(const logline &ll, shared_buffer_ref &sbr,
}
else {
sbr.share(this->jlf_share_manager,
&this->jlf_cached_line[0] + this_off,
this->jlf_cached_line.data() + this_off,
next_off - this_off);
}
}

@ -54,10 +54,11 @@
#include "lnav_util.hh"
#include "byte_array.hh"
#include "view_curses.hh"
#include "intern_string.hh"
#include "base/intern_string.hh"
#include "shared_buffer.hh"
#include "highlighter.hh"
#include "log_level.hh"
#include "line_buffer.hh"
struct sqlite3;
class logfile;
@ -640,10 +641,7 @@ public:
static log_format *find_root_format(const char *name) {
std::vector<log_format *> &fmts = get_root_formats();
for (std::vector<log_format *>::iterator iter = fmts.begin();
iter != fmts.end();
++iter) {
log_format *lf = *iter;
for (auto lf : fmts) {
if (lf->get_name() == name) {
return lf;
}
@ -705,7 +703,7 @@ public:
*/
virtual scan_result_t scan(logfile &lf,
std::vector<logline> &dst,
off_t offset,
const line_info &li,
shared_buffer_ref &sbr) = 0;
virtual bool scan_for_partial(shared_buffer_ref &sbr, size_t &len_out) {
@ -813,6 +811,7 @@ public:
std::vector<highlighter> lf_highlighters;
bool lf_is_self_describing;
bool lf_time_ordered;
bool lf_specialized{false};
protected:
static std::vector<log_format *> lf_root_formats;
@ -963,7 +962,7 @@ public:
scan_result_t scan(logfile &lf,
std::vector<logline> &dst,
off_t offset,
const line_info &offset,
shared_buffer_ref &sbr);
bool scan_for_partial(shared_buffer_ref &sbr, size_t &len_out);
@ -998,6 +997,7 @@ public:
external_log_format *elf = new external_log_format(*this);
std::unique_ptr<log_format> retval(elf);
elf->lf_specialized = true;
this->lf_pattern_locks.clear();
if (fmt_lock != -1) {
elf->lf_pattern_locks.emplace_back(0, fmt_lock);

@ -139,7 +139,7 @@ class generic_log_format : public log_format {
scan_result_t scan(logfile &lf,
vector<logline> &dst,
off_t offset,
const line_info &li,
shared_buffer_ref &sbr)
{
struct exttm log_time;
@ -167,7 +167,7 @@ class generic_log_format : public log_format {
this->check_for_new_year(dst, log_time, log_tv);
}
dst.emplace_back(offset, log_tv, level_val);
dst.emplace_back(li.li_file_range.fr_offset, log_tv, level_val);
return SCAN_MATCH;
}
@ -399,7 +399,7 @@ public:
};
scan_result_t scan_int(std::vector<logline> &dst,
off_t offset,
const line_info &li,
shared_buffer_ref &sbr) {
static const intern_string_t STATUS_CODE = intern_string::lookup("bro_status_code");
static const intern_string_t TS = intern_string::lookup("bro_ts");
@ -464,7 +464,7 @@ public:
}
if (found_ts) {
dst.emplace_back(offset, tv, level, 0, opid);
dst.emplace_back(li.li_file_range.fr_offset, tv, level, 0, opid);
return SCAN_MATCH;
} else {
return SCAN_NO_MATCH;
@ -473,12 +473,12 @@ public:
scan_result_t scan(logfile &lf,
std::vector<logline> &dst,
off_t offset,
const line_info &li,
shared_buffer_ref &sbr) {
static pcrepp SEP_RE(R"(^#separator\s+(.+))");
if (!this->blf_format_name.empty()) {
return this->scan_int(dst, offset, sbr);
return this->scan_int(dst, li, sbr);
}
if (dst.empty() || dst.size() > 20 || sbr.empty() || sbr.get_data()[0] == '#') {
@ -606,7 +606,7 @@ public:
!this->blf_field_defs.empty()) {
this->blf_header_size = dst.size() - 1;
dst.clear();
return this->scan_int(dst, offset, sbr);
return this->scan_int(dst, li, sbr);
}
this->blf_format_name.clear();

@ -147,7 +147,7 @@ bool logfile::process_prefix(shared_buffer_ref &sbr, const line_info &li)
prescan_time = this->lf_index[0].get_time();
}
/* We've locked onto a format, just use that scanner. */
found = this->lf_format->scan(*this, this->lf_index, li.li_file_range.fr_offset, sbr);
found = this->lf_format->scan(*this, this->lf_index, li, sbr);
}
else if (this->lf_options.loo_detect_format &&
this->lf_index.size() < MAX_UNRECOGNIZED_LINES) {
@ -168,7 +168,7 @@ bool logfile::process_prefix(shared_buffer_ref &sbr, const line_info &li)
(*iter)->clear();
this->set_format_base_time(*iter);
found = (*iter)->scan(*this, this->lf_index, li.li_file_range.fr_offset, sbr);
found = (*iter)->scan(*this, this->lf_index, li, sbr);
if (found == log_format::SCAN_MATCH) {
#if 0
require(this->lf_index.size() == 1 ||

@ -57,7 +57,7 @@
#include "base/lnav_log.hh"
#include "auto_mem.hh"
#include "intern_string.hh"
#include "base/intern_string.hh"
#include <stdio.h>

@ -35,7 +35,7 @@
#include <vector>
#include "log_level.hh"
#include "intern_string.hh"
#include "base/intern_string.hh"
struct rgb_color {
static bool from_str(const string_fragment &color,

@ -542,6 +542,26 @@ yajl_status yajlpp_parse_context::complete_parse()
return retval;
}
const intern_string_t yajlpp_parse_context::get_path() const
{
if (this->ypc_path.size() <= 1) {
return intern_string_t();
}
return intern_string::lookup(&this->ypc_path[1],
this->ypc_path.size() - 2);
}
const intern_string_t yajlpp_parse_context::get_full_path() const
{
if (this->ypc_path.size() <= 1) {
static intern_string_t SLASH = intern_string::lookup("/");
return SLASH;
}
return intern_string::lookup(&this->ypc_path[0],
this->ypc_path.size() - 1);
}
void yajlpp_gen_context::gen()
{
yajlpp_map root(this->ygc_handle);
@ -559,7 +579,7 @@ yajlpp_gen_context &yajlpp_gen_context::with_context(yajlpp_parse_context &ypc)
if (ypc.ypc_current_handler == nullptr &&
!ypc.ypc_handler_stack.empty() &&
ypc.ypc_handler_stack.back() != nullptr) {
this->ygc_handlers = static_cast<json_path_handler *>(ypc.ypc_handler_stack.back()->jph_children);
this->ygc_handlers = dynamic_cast<json_path_handler *>(ypc.ypc_handler_stack.back()->jph_children);
this->ygc_depth += 1;
}
return *this;

@ -49,7 +49,7 @@
#include "optional.hpp"
#include "pcrepp/pcrepp.hh"
#include "json_ptr.hh"
#include "intern_string.hh"
#include "base/intern_string.hh"
#include "yajl/api/yajl_parse.h"
#include "yajl/api/yajl_gen.h"
@ -262,15 +262,9 @@ public:
return std::string(frag, len);
};
const intern_string_t get_path() const {
return intern_string::lookup(&this->ypc_path[1],
this->ypc_path.size() - 2);
};
const intern_string_t get_path() const;
const intern_string_t get_full_path() const {
return intern_string::lookup(&this->ypc_path[0],
this->ypc_path.size() - 1);
};
const intern_string_t get_full_path() const;
bool is_level(size_t level) const {
return this->ypc_path_index_stack.size() == level;

@ -154,10 +154,11 @@ int main(int argc, char *argv[])
for (iter = root_formats.begin();
iter != root_formats.end() && !found;
++iter) {
line_info li = { {13}};
logfile *lf = nullptr; // XXX
(*iter)->clear();
if ((*iter)->scan(*lf, index, 13, sbr) == log_format::SCAN_MATCH) {
if ((*iter)->scan(*lf, index, li, sbr) == log_format::SCAN_MATCH) {
format = (*iter)->specialized();
found = true;
}

@ -2,7 +2,7 @@
"ntest_log" : {
"title" : "Test JSON Log",
"json" : true,
"file-pattern" : "logfile_nested_json\\.json",
"file-pattern" : "logfile_(nested|invalid)_json\\.json",
"description" : "Test config",
"line-format" : [
{ "field" : "ts" },

@ -0,0 +1,5 @@
{"ts": "2013-09-06T20:00:48.124817Z", "@fields": { "lvl": "TRACE", "msg": "trace test"}}
{"ts": "2013-09-06T20:00:49.124817Z", "@fields": { "lvl": "INFO", "msg": "Starting up service"}}
{"ts": "2013-09-06T22:00:49.124817Z", "@fields": { "lvl": "INFO", ..."msg": "Shutting down service", "user": "steve@example.com"}}
{"ts": "2013-09-06T22:00:59.124817Z", "@fields": { "lvl": "DEBUG5", "msg": "Details..."}}
{"ts": "2013-09-06T22:00:59.124817Z", "@fields": { "lvl": "DEBUG4", "msg": "Details..."}}

@ -1,2 +1,2 @@
{ "__CURSOR" : "s=e991dd5775894620914f2f7126aae6c1;i=372a;b=0dc874d3fbd44bdbb6fbcfaa27481492;m=24ea327;t=573f280a663fd;x=9ceeb77ef4331563", "__REALTIME_TIMESTAMP" : "1534860261221373", "__MONOTONIC_TIMESTAMP" : "38708007", "_BOOT_ID" : "0dc874d3fbd44bdbb6fbcfaa27481492", "PRIORITY" : "1", "_MACHINE_ID" : "1234567890abcdef1234567890abcdef", "_HOSTNAME" : "imx6ul-medusa", "SYSLOG_FACILITY" : "3", "_UID" : "0", "_GID" : "0", "_SYSTEMD_SLICE" : "system.slice", "_CAP_EFFECTIVE" : "3fffffffff", "_TRANSPORT" : "stdout", "_STREAM_ID" : "d6422e98dfb24d128e274f917f04362f", "SYSLOG_IDENTIFIER" : "python", "_PID" : "184", "_COMM" : "python", "_EXE" : "/usr/bin/python2.7", "_CMDLINE" : "/usr/bin/python -u /usr/bin/medusa/GpsLocator/GpsLocator.py", "_SYSTEMD_CGROUP" : "/system.slice/medusa-GpsLocator.service", "_SYSTEMD_UNIT" : "medusa-GpsLocator.service", "_SYSTEMD_INVOCATION_ID" : "e8fb03c1b8bf47b8ac2fc82f9870fd57", "MESSAGE" : "GPS Reference longitude: 7.358143333" }
{ "__CURSOR" : "s=e991dd5775894620914f2f7126aae6c1;i=372b;b=0dc874d3fbd44bdbb6fbcfaa27481492;m=24ea327;t=573f280a663fd;x=26df66509c108970", "__REALTIME_TIMESTAMP" : "1534860261221373", "__MONOTONIC_TIMESTAMP" : "38708007", "_BOOT_ID" : "0dc874d3fbd44bdbb6fbcfaa27481492", "PRIORITY" : "6", "_MACHINE_ID" : "1234567890abcdef1234567890abcdef", "_HOSTNAME" : "imx6ul-medusa", "SYSLOG_FACILITY" : "3", "_UID" : "0", "_GID" : "0", "_SYSTEMD_SLICE" : "system.slice", "_CAP_EFFECTIVE" : "3fffffffff", "_TRANSPORT" : "stdout", "_STREAM_ID" : "d6422e98dfb24d128e274f917f04362f", "SYSLOG_IDENTIFIER" : "python", "_PID" : "184", "_COMM" : "python", "_EXE" : "/usr/bin/python2.7", "_CMDLINE" : "/usr/bin/python -u /usr/bin/medusa/GpsLocator/GpsLocator.py", "_SYSTEMD_CGROUP" : "/system.slice/medusa-GpsLocator.service", "_SYSTEMD_UNIT" : "medusa-GpsLocator.service", "_SYSTEMD_INVOCATION_ID" : "e8fb03c1b8bf47b8ac2fc82f9870fd57", "MESSAGE" : "GPS Reference latitude: 46.908706667" }
{ "__CURSOR" : "s=e991dd5775894620914f2f7126aae6c1;i=372b;b=0dc874d3fbd44bdbb6fbcfaa27481492;m=24ea327;t=573f280a663fd;x=26df66509c108970", "__REALTIME_TIMESTAMP" : "1534860261221373", "__MONOTONIC_TIMESTAMP" : "38708007", "_BOOT_ID" : "0dc874d3fbd44bdbb6fbcfaa27481492", "PRIORITY" : "6", "_MACHINE_ID" : "1234567890abcdef1234567890abcdef", "_HOSTNAME" : "imx6ul-medusa", "SYSLOG_FACILITY" : "3", "_UID" : "0", "_GID" : "0", "_SYSTEMD_SLICE" : "system.slice", "_CAP_EFFECTIVE" : "3fffffffff", "_TRANSPORT" : "stdout", "_STREAM_ID" : "d6422e98dfb24d128e274f917f04362f", "SYSLOG_IDENTIFIER" : "python", "_PID" : "184", "_COMM" : "python", "_EXE" : "/usr/bin/python2.7", "_CMDLINE" : "/usr/bin/python -u /usr/bin/medusa/GpsLocator/GpsLocator.py", "_SYSTEMD_CGROUP" : "/system.slice/medusa-GpsLocator.service", "_SYSTEMD_UNIT" : "medusa-GpsLocator.service", "_SYSTEMD_INVOCATION_ID" : "e8fb03c1b8bf47b8ac2fc82f9870fd57", "MESSAGE" : "GPS Reference latitude: 46.908706667" }

@ -517,3 +517,22 @@ log_line,log_part,log_time,log_idle_msecs,log_level,log_mark,log_comment,log_tag
1,<NULL>,2017-03-24 20:12:47.764,381524,critical,0,<NULL>,<NULL>,[],1.1.1.1,<NULL>,<NULL>,<NULL>,GET,166,/example/uri/5,500
2,<NULL>,2017-03-24 20:15:31.694,163930,warning,0,<NULL>,<NULL>,[],1.1.1.1,"{""foo"": ""bar""}","{""foo"": ""bar""}","{""foo"": ""bar""}",GET,166,/example/uri/5,400
EOF
run_test ${lnav_test} -n \
-d /tmp/lnav.err \
-I ${test_dir} \
${test_dir}/logfile_invalid_json.json
check_output "json log format is not working" <<EOF
2013-09-06T20:00:48.124 TRACE trace test
@fields: { "lvl": "TRACE", "msg": "trace test"}
2013-09-06T20:00:49.124 INFO Starting up service
@fields: { "lvl": "INFO", "msg": "Starting up service"}
lnav: unable to parse line at offset 186: lexical error: invalid char in json text.
"@fields": { "lvl": "INFO", ..."msg": "Shutting down servic
(right here) ------^
2013-09-06T22:00:59.124 DEBUG5 Details...
@fields: { "lvl": "DEBUG5", "msg": "Details..."}
2013-09-06T22:00:59.124 DEBUG4 Details...
@fields: { "lvl": "DEBUG4", "msg": "Details..."}
EOF

@ -31,7 +31,7 @@
#include <sys/time.h>
#include "intern_string.hh"
#include "base/intern_string.hh"
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
#include "doctest.hh"

Loading…
Cancel
Save