[format] try to validate formats against a given file

pull/254/head
Timothy Stack 9 years ago
parent f392822aa8
commit 7ef92dab79

@ -6,6 +6,8 @@ lnav v0.7.4:
Shift+Left/Shift+Right). Shift+Left/Shift+Right).
* A color-coded bar has been added to the left side to show where * A color-coded bar has been added to the left side to show where
messages from one file stop and messages from another file start. messages from one file stop and messages from another file start.
* The '-C' option will now try to check any specified log files to
make sure the format(s) match all of the lines.
Fixes: Fixes:
* Nested fields in JSON logs are now supported for levels, bodies, etc... * Nested fields in JSON logs are now supported for levels, bodies, etc...

@ -82,6 +82,10 @@ check_output() {
test_num=`expr ${test_num} \+ 1` test_num=`expr ${test_num} \+ 1`
} }
test_err_filename() {
echo ${test_file_base}_${test_num}.err
}
check_error_output() { check_error_output() {
diff -w -u - ${test_file_base}_${test_num}.err \ diff -w -u - ${test_file_base}_${test_num}.err \
> ${test_file_base}_${test_num}.err.diff > ${test_file_base}_${test_num}.err.diff

@ -57,7 +57,9 @@ Install the given format files in the $HOME/.lnav/formats/installed directory
and exit. and exit.
.TP .TP
\fB\-C\fR \fB\-C\fR
Check the configuration and exit. Check the configuration and exit. The log format files will be loaded and
checked. Any files given on the command-line will be loaded checked to make
sure they match a log format.
.TP .TP
\fB\-d\fR file \fB\-d\fR file
Write debug messages to the given file. Write debug messages to the given file.

@ -3,6 +3,7 @@
"title" : "Common Access Log", "title" : "Common Access Log",
"description" : "The default web access log format for servers like Apache.", "description" : "The default web access log format for servers like Apache.",
"url" : "http://en.wikipedia.org/wiki/Common_Log_Format", "url" : "http://en.wikipedia.org/wiki/Common_Log_Format",
"multiline" : false,
"regex" : { "regex" : {
"ts-first-noquotes" : { "ts-first-noquotes" : {
"pattern" : "^(?<timestamp>\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?) (?<c_ip>[^ ]+) (?<cs_username>[^ ]+) (?<cs_method>[A-Z]+) (?<cs_uri_stem>[^ \\?]+)(?:\\?(?<cs_uri_query>[^ ]*))? (?:-1|\\d+) (?<sc_status>\\d+) \\d+" "pattern" : "^(?<timestamp>\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?) (?<c_ip>[^ ]+) (?<cs_username>[^ ]+) (?<cs_method>[A-Z]+) (?<cs_uri_stem>[^ \\?]+)(?:\\?(?<cs_uri_query>[^ ]*))? (?:-1|\\d+) (?<sc_status>\\d+) \\d+"
@ -567,6 +568,7 @@
"title" : "CUPS Page Log", "title" : "CUPS Page Log",
"description" : "The CUPS server log of printed pages.", "description" : "The CUPS server log of printed pages.",
"url" : "http://www.cups.org/documentation.php/doc-1.7/ref-page_log.html", "url" : "http://www.cups.org/documentation.php/doc-1.7/ref-page_log.html",
"multiline" : false,
"regex" : { "regex" : {
"pre-1.7" : { "pre-1.7" : {
"pattern" : "^(?<printer>[\\w_\\-\\.]+) (?<username>[\\w\\.\\-]+) (?<job_id>\\d+) \\[(?<timestamp>[^\\]]+)\\] (?<page_number>total|\\d+) (?<num_copies>\\d+) (?<job_billing>[^ ]+) (?<job_originating_hostname>[\\w\\.:\\-]+)$" "pattern" : "^(?<printer>[\\w_\\-\\.]+) (?<username>[\\w\\.\\-]+) (?<job_id>\\d+) \\[(?<timestamp>[^\\]]+)\\] (?<page_number>total|\\d+) (?<num_copies>\\d+) (?<job_billing>[^ ]+) (?<job_originating_hostname>[\\w\\.:\\-]+)$"
@ -747,6 +749,7 @@
"title" : "Strace", "title" : "Strace",
"description" : "The strace output format.", "description" : "The strace output format.",
"url" : "http://en.wikipedia.org/wiki/Strace", "url" : "http://en.wikipedia.org/wiki/Strace",
"multiline" : false,
"regex" : { "regex" : {
"std" : { "std" : {
"pattern" : "^(?<timestamp>\\d{2}:\\d{2}:\\d{2}\\.\\d{6}) (?<syscall>\\w+)\\((?<body>.*)\\)\\s+=\\s+(?<rc>[-\\w]+)(?: (?<errno>\\w+) \\([^\\)]+\\))?(?: <(?<duration>\\d+\\.\\d+)>)?$" "pattern" : "^(?<timestamp>\\d{2}:\\d{2}:\\d{2}\\.\\d{6}) (?<syscall>\\w+)\\((?<body>.*)\\)\\s+=\\s+(?<rc>[-\\w]+)(?: (?<errno>\\w+) \\([^\\)]+\\))?(?: <(?<duration>\\d+\\.\\d+)>)?$"
@ -897,6 +900,7 @@
"uwsgi_log" : { "uwsgi_log" : {
"title" : "Uwsgi Log", "title" : "Uwsgi Log",
"description" : "The uwsgi log format.", "description" : "The uwsgi log format.",
"multiline" : false,
"regex" : { "regex" : {
"std" : { "std" : {
"pattern" : "^\\[pid: (?<s_pid>\\d+)\\|app: (?<s_app>[\\-\\d]+)\\|req: (?<s_req>[\\-\\d]+)/(?<s_worker_reqs>\\d+)\\] (?<c_ip>[^ ]+) \\((?<cs_username>[^\\)]*)\\) \\{(?<cs_vars>\\d+) vars in (?<cs_bytes>\\d+) bytes\\} \\[(?<timestamp>[^\\]]+)\\] (?<cs_method>[A-Z]+) (?<cs_uri_stem>[^ \\?]+)(?:\\?(?<cs_uri_query>[^ ]*))? => generated (?<sc_bytes>\\d+) bytes in (?<s_runtime>\\d+) (?<rt_unit>\\w+) \\((?<cs_version>[^ ]+) (?<sc_status>\\d+)\\) (?<sc_headers>\\d+) headers in (?<sc_header_bytes>\\d+) bytes \\((?<s_switches>\\d+) switches on core (?<s_core>\\d+)\\)" "pattern" : "^\\[pid: (?<s_pid>\\d+)\\|app: (?<s_app>[\\-\\d]+)\\|req: (?<s_req>[\\-\\d]+)/(?<s_worker_reqs>\\d+)\\] (?<c_ip>[^ ]+) \\((?<cs_username>[^\\)]*)\\) \\{(?<cs_vars>\\d+) vars in (?<cs_bytes>\\d+) bytes\\} \\[(?<timestamp>[^\\]]+)\\] (?<cs_method>[A-Z]+) (?<cs_uri_stem>[^ \\?]+)(?:\\?(?<cs_uri_query>[^ ]*))? => generated (?<sc_bytes>\\d+) bytes in (?<s_runtime>\\d+) (?<rt_unit>\\w+) \\((?<cs_version>[^ ]+) (?<sc_status>\\d+)\\) (?<sc_headers>\\d+) headers in (?<sc_header_bytes>\\d+) bytes \\((?<s_switches>\\d+) switches on core (?<s_core>\\d+)\\)"

@ -343,7 +343,7 @@ public:
{ {
static sig_atomic_t index_counter = 0; static sig_atomic_t index_counter = 0;
if (lnav_data.ld_flags & LNF_HEADLESS) { if (lnav_data.ld_flags & (LNF_HEADLESS|LNF_CHECK_CONFIG)) {
return; return;
} }
@ -2371,10 +2371,6 @@ int main(int argc, char *argv[])
return EXIT_FAILURE; return EXIT_FAILURE;
} }
if (lnav_data.ld_flags & LNF_CHECK_CONFIG) {
return EXIT_SUCCESS;
}
/* If we statically linked against an ncurses library that had a non- /* If we statically linked against an ncurses library that had a non-
* standard path to the terminfo database, we need to set this variable * standard path to the terminfo database, we need to set this variable
* so that it will try the default path. * so that it will try the default path.
@ -2431,10 +2427,14 @@ int main(int argc, char *argv[])
} }
} }
DEFAULT_FILES.insert(make_pair(LNF_SYSLOG, string("var/log/messages"))); if (!(lnav_data.ld_flags & LNF_CHECK_CONFIG)) {
DEFAULT_FILES.insert(make_pair(LNF_SYSLOG, string("var/log/system.log"))); DEFAULT_FILES.insert(make_pair(LNF_SYSLOG, string("var/log/messages")));
DEFAULT_FILES.insert(make_pair(LNF_SYSLOG, string("var/log/syslog"))); DEFAULT_FILES.insert(
DEFAULT_FILES.insert(make_pair(LNF_SYSLOG, string("var/log/syslog.log"))); make_pair(LNF_SYSLOG, string("var/log/system.log")));
DEFAULT_FILES.insert(make_pair(LNF_SYSLOG, string("var/log/syslog")));
DEFAULT_FILES.insert(
make_pair(LNF_SYSLOG, string("var/log/syslog.log")));
}
init_lnav_commands(lnav_commands); init_lnav_commands(lnav_commands);
@ -2577,7 +2577,47 @@ int main(int argc, char *argv[])
} }
} }
if (!(lnav_data.ld_flags & LNF_HEADLESS) && !isatty(STDOUT_FILENO)) { if (lnav_data.ld_flags & LNF_CHECK_CONFIG) {
rescan_files(true);
for (list<logfile *>::iterator file_iter = lnav_data.ld_files.begin();
file_iter != lnav_data.ld_files.end();
++file_iter) {
logfile *lf = (*file_iter);
lf->rebuild_index();
lf->rebuild_index();
log_format *fmt = lf->get_format();
if (fmt == NULL) {
fprintf(stderr, "error:%s:no format found for file\n",
lf->get_filename().c_str());
retval = EXIT_FAILURE;
continue;
}
for (logfile::iterator line_iter = lf->begin();
line_iter != lf->end();
++line_iter) {
if (!line_iter->is_continued()) {
continue;
}
shared_buffer_ref sbr;
lf->read_line(line_iter, sbr);
if (fmt->scan_for_partial(sbr)) {
long line_number = distance(lf->begin(), line_iter);
fprintf(stderr,
"error:%s:%ld:line did not match format %s\n",
lf->get_filename().c_str(), line_number,
fmt->get_pattern_name().c_str());
retval = EXIT_FAILURE;
}
}
}
return retval;
}
if (!(lnav_data.ld_flags & (LNF_HEADLESS|LNF_CHECK_CONFIG)) && !isatty(STDOUT_FILENO)) {
fprintf(stderr, "error: stdout is not a tty.\n"); fprintf(stderr, "error: stdout is not a tty.\n");
retval = EXIT_FAILURE; retval = EXIT_FAILURE;
} }

@ -575,6 +575,28 @@ static struct json_path_handler json_log_rewrite_handlers[] = {
json_path_handler() json_path_handler()
}; };
bool external_log_format::scan_for_partial(shared_buffer_ref &sbr)
{
if (this->jlf_json) {
return false;
}
if (!this->elf_multiline) {
return true;
}
pattern *pat = this->elf_pattern_order[this->lf_fmt_lock];
if (pat->p_timestamp_end == -1 || pat->p_timestamp_end > sbr.length()) {
return false;
}
pcre_input pi(sbr.get_data(), 0, pat->p_timestamp_end);
pcre_context_static<128> pc;
return pat->p_pcre->match(pc, pi, PCRE_PARTIAL);
}
bool external_log_format::scan(std::vector<logline> &dst, bool external_log_format::scan(std::vector<logline> &dst,
off_t offset, off_t offset,
shared_buffer_ref &sbr) shared_buffer_ref &sbr)
@ -1200,14 +1222,18 @@ void external_log_format::build(std::vector<std::string> &errors)
} }
if (pat.p_pcre->match(pc, pi)) { if (pat.p_pcre->match(pc, pi)) {
const char *ts = pi.get_substr_start( pcre_context::capture_t *ts_cap =
pc[this->lf_timestamp_field.get()]); pc[this->lf_timestamp_field.get()];
const char *ts = pi.get_substr_start(ts_cap);
ssize_t ts_len = pc[this->lf_timestamp_field.get()]->length(); ssize_t ts_len = pc[this->lf_timestamp_field.get()]->length();
const char *const *custom_formats = this->get_timestamp_formats(); const char *const *custom_formats = this->get_timestamp_formats();
date_time_scanner dts; date_time_scanner dts;
struct timeval tv; struct timeval tv;
struct exttm tm; struct exttm tm;
if (ts_cap->c_begin == 0) {
pat.p_timestamp_end = ts_cap->c_end;
}
found = true; found = true;
if (ts_len == -1 || dts.scan(ts, ts_len, custom_formats, &tm, tv) == NULL) { if (ts_len == -1 || dts.scan(ts, ts_len, custom_formats, &tm, tv) == NULL) {
errors.push_back("error:" + errors.push_back("error:" +

@ -537,6 +537,7 @@ public:
log_format() : lf_fmt_lock(-1), log_format() : lf_fmt_lock(-1),
lf_timestamp_field(intern_string::lookup("timestamp", -1)) { lf_timestamp_field(intern_string::lookup("timestamp", -1)) {
}; };
virtual ~log_format() { }; virtual ~log_format() { };
virtual void clear(void) virtual void clear(void)
@ -567,6 +568,10 @@ public:
off_t offset, off_t offset,
shared_buffer_ref &sbr) = 0; shared_buffer_ref &sbr) = 0;
virtual bool scan_for_partial(shared_buffer_ref &sbr) {
return false;
};
/** /**
* Remove redundant data from the log line string. * Remove redundant data from the log line string.
* *
@ -694,12 +699,13 @@ public:
}; };
struct pattern { struct pattern {
pattern() : p_pcre(NULL) { }; pattern() : p_pcre(NULL), p_timestamp_end(-1) { };
std::string p_config_path; std::string p_config_path;
std::string p_string; std::string p_string;
pcrepp *p_pcre; pcrepp *p_pcre;
std::vector<value_def> p_value_by_index; std::vector<value_def> p_value_by_index;
int p_timestamp_end;
}; };
struct level_pattern { struct level_pattern {
@ -715,6 +721,7 @@ public:
elf_column_count(0), elf_column_count(0),
elf_timestamp_divisor(1.0), elf_timestamp_divisor(1.0),
elf_body_field(intern_string::lookup("body", -1)), elf_body_field(intern_string::lookup("body", -1)),
elf_multiline(true),
jlf_json(false), jlf_json(false),
jlf_hide_extra(false), jlf_hide_extra(false),
jlf_cached_offset(-1), jlf_cached_offset(-1),
@ -737,7 +744,9 @@ public:
bool scan(std::vector<logline> &dst, bool scan(std::vector<logline> &dst,
off_t offset, off_t offset,
shared_buffer_ref &sbr); shared_buffer_ref &sbr);
bool scan_for_partial(shared_buffer_ref &sbr);
void annotate(shared_buffer_ref &line, void annotate(shared_buffer_ref &line,
string_attrs_t &sa, string_attrs_t &sa,
std::vector<logline_value> &values) const; std::vector<logline_value> &values) const;
@ -837,6 +846,7 @@ public:
intern_string_t elf_body_field; intern_string_t elf_body_field;
int elf_body_field_index; int elf_body_field_index;
std::map<logline::level_t, level_pattern> elf_level_patterns; std::map<logline::level_t, level_pattern> elf_level_patterns;
bool elf_multiline;
enum json_log_field { enum json_log_field {
JLF_CONSTANT, JLF_CONSTANT,

@ -99,6 +99,8 @@ static int read_format_bool(yajlpp_parse_context *ypc, int val)
elf->jlf_json = val; elf->jlf_json = val;
else if (field_name == "hide-extra") else if (field_name == "hide-extra")
elf->jlf_hide_extra = val; elf->jlf_hide_extra = val;
else if (field_name == "multiline")
elf->elf_multiline = val;
return 1; return 1;
} }
@ -378,7 +380,7 @@ static int read_json_variable_num(yajlpp_parse_context *ypc, long long val)
static struct json_path_handler format_handlers[] = { static struct json_path_handler format_handlers[] = {
json_path_handler("^/\\w+/regex/[^/]+/pattern$", read_format_regex), json_path_handler("^/\\w+/regex/[^/]+/pattern$", read_format_regex),
json_path_handler("^/\\w+/(json|convert-to-local-time|epoch-timestamp|hide-extra)$", read_format_bool), json_path_handler("^/\\w+/(json|convert-to-local-time|epoch-timestamp|hide-extra|multiline)$", read_format_bool),
json_path_handler("^/\\w+/timestamp-divisor$", read_format_double) json_path_handler("^/\\w+/timestamp-divisor$", read_format_double)
.add_cb(read_format_int), .add_cb(read_format_int),
json_path_handler("^/\\w+/(file-pattern|level-field|timestamp-field|" json_path_handler("^/\\w+/(file-pattern|level-field|timestamp-field|"

@ -324,7 +324,6 @@ void logfile::read_line(logfile::iterator ll, string &line_out)
if (this->lf_line_buffer.read_line(off, sbr)) { if (this->lf_line_buffer.read_line(off, sbr)) {
if (this->lf_format.get() != NULL) { if (this->lf_format.get() != NULL) {
this->lf_format->get_subline(*ll, sbr); this->lf_format->get_subline(*ll, sbr);
} }
line_out.append(sbr.get_data(), sbr.length()); line_out.append(sbr.get_data(), sbr.length());
} }

@ -214,6 +214,8 @@ dist_noinst_DATA = \
listview_output.6 \ listview_output.6 \
logfile_access_log.0 \ logfile_access_log.0 \
logfile_access_log.1 \ logfile_access_log.1 \
logfile_bad_access_log.0 \
logfile_bad_syslog.0 \
logfile_blued.0 \ logfile_blued.0 \
logfile_empty.0 \ logfile_empty.0 \
logfile_epoch.0 \ logfile_epoch.0 \

@ -0,0 +1,3 @@
192.168.202.254 - - [20/Jul/2009:22:59:26 +0000] "GET /vmw/cgi/tramp HTTP/1.0" 200 134 "-" "gPXE/0.9.7"
192.168.202.254 [20/Jul/2009:22:59:29 +0000] "GET /vmw/vSphere/default/vmkboot.gz HTTP/1.0" 404 46210 "-" "gPXE/0.9.7"
192.168.202.254 - - [20/Jul/2009:22:59:29 +0000] "GET /vmw/vSphere/default/vmkernel.gz HTTP/1.0" 200 78929 "-" "gPXE/0.9.7"

@ -0,0 +1,4 @@
Nov 3 09:23:38 veridian automount[7998]: lookup(file): lookup for foobar failed
Nov 3 09:23:38 veridian automount[16442]: attempting to mount entry /auto/opt
Nov 3 09:23:38 veridian lookup for opt failed
Nov 3 09:47:02 veridian sudo: timstack : TTY=pts/6 ; PWD=/auto/wstimstack/rpms/lbuild/test ; USER=root ; COMMAND=/usr/bin/tail /var/log/messages

@ -236,3 +236,19 @@ check_output "piping to stdin is not working?" <<EOF
2013-06-06T19:13:20.123 Hi 2013-06-06T19:13:20.123 Hi
2013-06-06T19:13:20.123 ---- END-OF-STDIN ---- 2013-06-06T19:13:20.123 ---- END-OF-STDIN ----
EOF EOF
run_test ${lnav_test} -C ${srcdir}/logfile_bad_syslog.0
sed -i "" -e "s|/.*/logfile_bad_syslog.0|logfile_bad_syslog.0|g" `test_err_filename`
check_error_output "bad syslog line not found?" <<EOF
error:logfile_bad_syslog.0:2:line did not match format syslog_log/regex/std/pattern
EOF
run_test ${lnav_test} -C ${srcdir}/logfile_bad_access_log.0
sed -i "" -e "s|/.*/logfile_bad_access_log.0|logfile_bad_access_log.0|g" `test_err_filename`
check_error_output "bad access_log line not found?" <<EOF
error:logfile_bad_access_log.0:1:line did not match format access_log/regex/std/pattern
EOF

Loading…
Cancel
Save