[format] try to validate formats against a given file

pull/254/head
Timothy Stack 9 years ago
parent f392822aa8
commit 7ef92dab79

@ -6,6 +6,8 @@ lnav v0.7.4:
Shift+Left/Shift+Right).
* A color-coded bar has been added to the left side to show where
messages from one file stop and messages from another file start.
* The '-C' option will now try to check any specified log files to
make sure the format(s) match all of the lines.
Fixes:
* Nested fields in JSON logs are now supported for levels, bodies, etc...

@ -82,6 +82,10 @@ check_output() {
test_num=`expr ${test_num} \+ 1`
}
test_err_filename() {
echo ${test_file_base}_${test_num}.err
}
check_error_output() {
diff -w -u - ${test_file_base}_${test_num}.err \
> ${test_file_base}_${test_num}.err.diff

@ -57,7 +57,9 @@ Install the given format files in the $HOME/.lnav/formats/installed directory
and exit.
.TP
\fB\-C\fR
Check the configuration and exit.
Check the configuration and exit. The log format files will be loaded and
checked. Any files given on the command-line will be loaded checked to make
sure they match a log format.
.TP
\fB\-d\fR file
Write debug messages to the given file.

@ -3,6 +3,7 @@
"title" : "Common Access Log",
"description" : "The default web access log format for servers like Apache.",
"url" : "http://en.wikipedia.org/wiki/Common_Log_Format",
"multiline" : false,
"regex" : {
"ts-first-noquotes" : {
"pattern" : "^(?<timestamp>\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?) (?<c_ip>[^ ]+) (?<cs_username>[^ ]+) (?<cs_method>[A-Z]+) (?<cs_uri_stem>[^ \\?]+)(?:\\?(?<cs_uri_query>[^ ]*))? (?:-1|\\d+) (?<sc_status>\\d+) \\d+"
@ -567,6 +568,7 @@
"title" : "CUPS Page Log",
"description" : "The CUPS server log of printed pages.",
"url" : "http://www.cups.org/documentation.php/doc-1.7/ref-page_log.html",
"multiline" : false,
"regex" : {
"pre-1.7" : {
"pattern" : "^(?<printer>[\\w_\\-\\.]+) (?<username>[\\w\\.\\-]+) (?<job_id>\\d+) \\[(?<timestamp>[^\\]]+)\\] (?<page_number>total|\\d+) (?<num_copies>\\d+) (?<job_billing>[^ ]+) (?<job_originating_hostname>[\\w\\.:\\-]+)$"
@ -747,6 +749,7 @@
"title" : "Strace",
"description" : "The strace output format.",
"url" : "http://en.wikipedia.org/wiki/Strace",
"multiline" : false,
"regex" : {
"std" : {
"pattern" : "^(?<timestamp>\\d{2}:\\d{2}:\\d{2}\\.\\d{6}) (?<syscall>\\w+)\\((?<body>.*)\\)\\s+=\\s+(?<rc>[-\\w]+)(?: (?<errno>\\w+) \\([^\\)]+\\))?(?: <(?<duration>\\d+\\.\\d+)>)?$"
@ -897,6 +900,7 @@
"uwsgi_log" : {
"title" : "Uwsgi Log",
"description" : "The uwsgi log format.",
"multiline" : false,
"regex" : {
"std" : {
"pattern" : "^\\[pid: (?<s_pid>\\d+)\\|app: (?<s_app>[\\-\\d]+)\\|req: (?<s_req>[\\-\\d]+)/(?<s_worker_reqs>\\d+)\\] (?<c_ip>[^ ]+) \\((?<cs_username>[^\\)]*)\\) \\{(?<cs_vars>\\d+) vars in (?<cs_bytes>\\d+) bytes\\} \\[(?<timestamp>[^\\]]+)\\] (?<cs_method>[A-Z]+) (?<cs_uri_stem>[^ \\?]+)(?:\\?(?<cs_uri_query>[^ ]*))? => generated (?<sc_bytes>\\d+) bytes in (?<s_runtime>\\d+) (?<rt_unit>\\w+) \\((?<cs_version>[^ ]+) (?<sc_status>\\d+)\\) (?<sc_headers>\\d+) headers in (?<sc_header_bytes>\\d+) bytes \\((?<s_switches>\\d+) switches on core (?<s_core>\\d+)\\)"

@ -343,7 +343,7 @@ public:
{
static sig_atomic_t index_counter = 0;
if (lnav_data.ld_flags & LNF_HEADLESS) {
if (lnav_data.ld_flags & (LNF_HEADLESS|LNF_CHECK_CONFIG)) {
return;
}
@ -2371,10 +2371,6 @@ int main(int argc, char *argv[])
return EXIT_FAILURE;
}
if (lnav_data.ld_flags & LNF_CHECK_CONFIG) {
return EXIT_SUCCESS;
}
/* If we statically linked against an ncurses library that had a non-
* standard path to the terminfo database, we need to set this variable
* so that it will try the default path.
@ -2431,10 +2427,14 @@ int main(int argc, char *argv[])
}
}
DEFAULT_FILES.insert(make_pair(LNF_SYSLOG, string("var/log/messages")));
DEFAULT_FILES.insert(make_pair(LNF_SYSLOG, string("var/log/system.log")));
DEFAULT_FILES.insert(make_pair(LNF_SYSLOG, string("var/log/syslog")));
DEFAULT_FILES.insert(make_pair(LNF_SYSLOG, string("var/log/syslog.log")));
if (!(lnav_data.ld_flags & LNF_CHECK_CONFIG)) {
DEFAULT_FILES.insert(make_pair(LNF_SYSLOG, string("var/log/messages")));
DEFAULT_FILES.insert(
make_pair(LNF_SYSLOG, string("var/log/system.log")));
DEFAULT_FILES.insert(make_pair(LNF_SYSLOG, string("var/log/syslog")));
DEFAULT_FILES.insert(
make_pair(LNF_SYSLOG, string("var/log/syslog.log")));
}
init_lnav_commands(lnav_commands);
@ -2577,7 +2577,47 @@ int main(int argc, char *argv[])
}
}
if (!(lnav_data.ld_flags & LNF_HEADLESS) && !isatty(STDOUT_FILENO)) {
if (lnav_data.ld_flags & LNF_CHECK_CONFIG) {
rescan_files(true);
for (list<logfile *>::iterator file_iter = lnav_data.ld_files.begin();
file_iter != lnav_data.ld_files.end();
++file_iter) {
logfile *lf = (*file_iter);
lf->rebuild_index();
lf->rebuild_index();
log_format *fmt = lf->get_format();
if (fmt == NULL) {
fprintf(stderr, "error:%s:no format found for file\n",
lf->get_filename().c_str());
retval = EXIT_FAILURE;
continue;
}
for (logfile::iterator line_iter = lf->begin();
line_iter != lf->end();
++line_iter) {
if (!line_iter->is_continued()) {
continue;
}
shared_buffer_ref sbr;
lf->read_line(line_iter, sbr);
if (fmt->scan_for_partial(sbr)) {
long line_number = distance(lf->begin(), line_iter);
fprintf(stderr,
"error:%s:%ld:line did not match format %s\n",
lf->get_filename().c_str(), line_number,
fmt->get_pattern_name().c_str());
retval = EXIT_FAILURE;
}
}
}
return retval;
}
if (!(lnav_data.ld_flags & (LNF_HEADLESS|LNF_CHECK_CONFIG)) && !isatty(STDOUT_FILENO)) {
fprintf(stderr, "error: stdout is not a tty.\n");
retval = EXIT_FAILURE;
}

@ -575,6 +575,28 @@ static struct json_path_handler json_log_rewrite_handlers[] = {
json_path_handler()
};
bool external_log_format::scan_for_partial(shared_buffer_ref &sbr)
{
if (this->jlf_json) {
return false;
}
if (!this->elf_multiline) {
return true;
}
pattern *pat = this->elf_pattern_order[this->lf_fmt_lock];
if (pat->p_timestamp_end == -1 || pat->p_timestamp_end > sbr.length()) {
return false;
}
pcre_input pi(sbr.get_data(), 0, pat->p_timestamp_end);
pcre_context_static<128> pc;
return pat->p_pcre->match(pc, pi, PCRE_PARTIAL);
}
bool external_log_format::scan(std::vector<logline> &dst,
off_t offset,
shared_buffer_ref &sbr)
@ -1200,14 +1222,18 @@ void external_log_format::build(std::vector<std::string> &errors)
}
if (pat.p_pcre->match(pc, pi)) {
const char *ts = pi.get_substr_start(
pc[this->lf_timestamp_field.get()]);
pcre_context::capture_t *ts_cap =
pc[this->lf_timestamp_field.get()];
const char *ts = pi.get_substr_start(ts_cap);
ssize_t ts_len = pc[this->lf_timestamp_field.get()]->length();
const char *const *custom_formats = this->get_timestamp_formats();
date_time_scanner dts;
struct timeval tv;
struct exttm tm;
if (ts_cap->c_begin == 0) {
pat.p_timestamp_end = ts_cap->c_end;
}
found = true;
if (ts_len == -1 || dts.scan(ts, ts_len, custom_formats, &tm, tv) == NULL) {
errors.push_back("error:" +

@ -537,6 +537,7 @@ public:
log_format() : lf_fmt_lock(-1),
lf_timestamp_field(intern_string::lookup("timestamp", -1)) {
};
virtual ~log_format() { };
virtual void clear(void)
@ -567,6 +568,10 @@ public:
off_t offset,
shared_buffer_ref &sbr) = 0;
virtual bool scan_for_partial(shared_buffer_ref &sbr) {
return false;
};
/**
* Remove redundant data from the log line string.
*
@ -694,12 +699,13 @@ public:
};
struct pattern {
pattern() : p_pcre(NULL) { };
pattern() : p_pcre(NULL), p_timestamp_end(-1) { };
std::string p_config_path;
std::string p_string;
pcrepp *p_pcre;
std::vector<value_def> p_value_by_index;
int p_timestamp_end;
};
struct level_pattern {
@ -715,6 +721,7 @@ public:
elf_column_count(0),
elf_timestamp_divisor(1.0),
elf_body_field(intern_string::lookup("body", -1)),
elf_multiline(true),
jlf_json(false),
jlf_hide_extra(false),
jlf_cached_offset(-1),
@ -737,7 +744,9 @@ public:
bool scan(std::vector<logline> &dst,
off_t offset,
shared_buffer_ref &sbr);
bool scan_for_partial(shared_buffer_ref &sbr);
void annotate(shared_buffer_ref &line,
string_attrs_t &sa,
std::vector<logline_value> &values) const;
@ -837,6 +846,7 @@ public:
intern_string_t elf_body_field;
int elf_body_field_index;
std::map<logline::level_t, level_pattern> elf_level_patterns;
bool elf_multiline;
enum json_log_field {
JLF_CONSTANT,

@ -99,6 +99,8 @@ static int read_format_bool(yajlpp_parse_context *ypc, int val)
elf->jlf_json = val;
else if (field_name == "hide-extra")
elf->jlf_hide_extra = val;
else if (field_name == "multiline")
elf->elf_multiline = val;
return 1;
}
@ -378,7 +380,7 @@ static int read_json_variable_num(yajlpp_parse_context *ypc, long long val)
static struct json_path_handler format_handlers[] = {
json_path_handler("^/\\w+/regex/[^/]+/pattern$", read_format_regex),
json_path_handler("^/\\w+/(json|convert-to-local-time|epoch-timestamp|hide-extra)$", read_format_bool),
json_path_handler("^/\\w+/(json|convert-to-local-time|epoch-timestamp|hide-extra|multiline)$", read_format_bool),
json_path_handler("^/\\w+/timestamp-divisor$", read_format_double)
.add_cb(read_format_int),
json_path_handler("^/\\w+/(file-pattern|level-field|timestamp-field|"

@ -324,7 +324,6 @@ void logfile::read_line(logfile::iterator ll, string &line_out)
if (this->lf_line_buffer.read_line(off, sbr)) {
if (this->lf_format.get() != NULL) {
this->lf_format->get_subline(*ll, sbr);
}
line_out.append(sbr.get_data(), sbr.length());
}

@ -214,6 +214,8 @@ dist_noinst_DATA = \
listview_output.6 \
logfile_access_log.0 \
logfile_access_log.1 \
logfile_bad_access_log.0 \
logfile_bad_syslog.0 \
logfile_blued.0 \
logfile_empty.0 \
logfile_epoch.0 \

@ -0,0 +1,3 @@
192.168.202.254 - - [20/Jul/2009:22:59:26 +0000] "GET /vmw/cgi/tramp HTTP/1.0" 200 134 "-" "gPXE/0.9.7"
192.168.202.254 [20/Jul/2009:22:59:29 +0000] "GET /vmw/vSphere/default/vmkboot.gz HTTP/1.0" 404 46210 "-" "gPXE/0.9.7"
192.168.202.254 - - [20/Jul/2009:22:59:29 +0000] "GET /vmw/vSphere/default/vmkernel.gz HTTP/1.0" 200 78929 "-" "gPXE/0.9.7"

@ -0,0 +1,4 @@
Nov 3 09:23:38 veridian automount[7998]: lookup(file): lookup for foobar failed
Nov 3 09:23:38 veridian automount[16442]: attempting to mount entry /auto/opt
Nov 3 09:23:38 veridian lookup for opt failed
Nov 3 09:47:02 veridian sudo: timstack : TTY=pts/6 ; PWD=/auto/wstimstack/rpms/lbuild/test ; USER=root ; COMMAND=/usr/bin/tail /var/log/messages

@ -236,3 +236,19 @@ check_output "piping to stdin is not working?" <<EOF
2013-06-06T19:13:20.123 Hi
2013-06-06T19:13:20.123 ---- END-OF-STDIN ----
EOF
run_test ${lnav_test} -C ${srcdir}/logfile_bad_syslog.0
sed -i "" -e "s|/.*/logfile_bad_syslog.0|logfile_bad_syslog.0|g" `test_err_filename`
check_error_output "bad syslog line not found?" <<EOF
error:logfile_bad_syslog.0:2:line did not match format syslog_log/regex/std/pattern
EOF
run_test ${lnav_test} -C ${srcdir}/logfile_bad_access_log.0
sed -i "" -e "s|/.*/logfile_bad_access_log.0|logfile_bad_access_log.0|g" `test_err_filename`
check_error_output "bad access_log line not found?" <<EOF
error:logfile_bad_access_log.0:1:line did not match format access_log/regex/std/pattern
EOF

Loading…
Cancel
Save