[locale] some tweaks for recognizing non-english logs

Try to address some locale issues brought up in #533
pull/536/head
Timothy Stack 6 years ago
parent 069650153f
commit 26cccdafce

@ -131,7 +131,7 @@
"description" : "A generic format for logs, like cron, that have a date at the start of a block.",
"regex" : {
"std" : {
"pattern" : "^(?<timestamp>\\w{3} \\w{3}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2} \\w+ \\d{4})(?<body>(?:.|\\n)*)$"
"pattern" : "^(?<timestamp>\\S{3,8} \\w{3}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2} \\w+ \\d{4})(?<body>(?:.|\\n)*)$"
}
},
"sample" : [
@ -199,10 +199,10 @@
"description" : "Log format used by the Common Unix Printing System",
"regex" : {
"system" : {
"pattern" : "^(?<level>[IEW]) \\[(?<timestamp>\\d{2}/\\w{3}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [+-]\\d{2,4})\\] (?<section>\\w+): (?<body>.*)$"
"pattern" : "^(?<level>[IEW]) \\[(?<timestamp>\\d{2}/\\S{3,8}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [+-]\\d{2,4})\\] (?<section>\\w+): (?<body>.*)$"
},
"default" : {
"pattern" : "^(?<level>[IEW]) \\[(?<timestamp>\\d{2}/\\w{3}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [+-]\\d{2,4})\\] (?<body>.*)$"
"pattern" : "^(?<level>[IEW]) \\[(?<timestamp>\\d{2}/\\S{3,8}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [+-]\\d{2,4})\\] (?<body>.*)$"
}
},
"level" : {
@ -594,7 +594,7 @@
"description" : "Log for the fsck_hfs tool on Mac OS X.",
"regex" : {
"std" : {
"pattern" : "^(?<device>[^:]+): fsck_hfs (?:run|started) at (?<timestamp>\\w{3} \\w{3}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2} \\d{4})(?<body>(?:.|\\n)*)"
"pattern" : "^(?<device>[^:]+): fsck_hfs (?:run|started) at (?<timestamp>\\S{3,8} \\S{3,8}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2} \\d{4})(?<body>(?:.|\\n)*)"
}
},
"value" : {
@ -1070,10 +1070,10 @@
"url" : "http://fedorahosted.org/sssd",
"regex" : {
"core" : {
"pattern" : "^\\((?<timestamp>\\w{3} \\w{3} ( \\d|\\d{2}) \\d{2}:\\d{2}:\\d{2} \\d{4})\\) \\[sssd\\] \\[(?<function>\\w+)\\] \\((?<debug_level>0x[0-9a-fA-F]{4})\\): (?<body>.*)$"
"pattern" : "^\\((?<timestamp>\\S{3,8} \\S{3,8} ( \\d|\\d{2}) \\d{2}:\\d{2}:\\d{2} \\d{4})\\) \\[sssd\\] \\[(?<function>\\w+)\\] \\((?<debug_level>0x[0-9a-fA-F]{4})\\): (?<body>.*)$"
},
"module" : {
"pattern" : "^\\((?<timestamp>\\w{3} \\w{3} ( \\d|\\d{2}) \\d{2}:\\d{2}:\\d{2} \\d{4})\\) \\[sssd(?<module>\\[.*?\\])\\] \\[(?<function>\\w+)\\] \\((?<debug_level>0x[0-9a-fA-F]{4})\\): (?<body>.*)$"
"pattern" : "^\\((?<timestamp>\\S{3,8} \\S{3,8} ( \\d|\\d{2}) \\d{2}:\\d{2}:\\d{2} \\d{4})\\) \\[sssd(?<module>\\[.*?\\])\\] \\[(?<function>\\w+)\\] \\((?<debug_level>0x[0-9a-fA-F]{4})\\): (?<body>.*)$"
}
},
"value" : {
@ -1185,7 +1185,7 @@
"url" : "http://en.wikipedia.org/wiki/Syslog",
"regex" : {
"std" : {
"pattern" : "^(?<timestamp>\\w{3}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2})(?: (?<log_hostname>[a-zA-Z0-9:][^ ]+[a-zA-Z0-9]))?(?:(?: (?<log_procname>(?:[^\\[:]+|[^:]+))(?:\\[(?<log_pid>\\d+)\\])?:(?<body>(?:.|\\n)*))$|:?(?:(?: ---)? last message repeated \\d+ times?(?: ---)?))"
"pattern" : "^(?<timestamp>\\S{3,8}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2})(?: (?<log_hostname>[a-zA-Z0-9:][^ ]+[a-zA-Z0-9]))?(?:(?: (?<log_procname>(?:[^\\[:]+|[^:]+))(?:\\[(?<log_pid>\\d+)\\])?:(?<body>(?:.|\\n)*))$|:?(?:(?: ---)? last message repeated \\d+ times?(?: ---)?))"
},
"rfc5424" : {
"pattern" : "^<(?<log_pri>\\d+)>(?<syslog_version>\\d+) (?<timestamp>\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{6})?(?:[^ ]+)?) (?<log_hostname>[^ ]+|-) (?<log_procname>[^ ]+|-) (?<log_pid>[^ ]+|-) (?<log_msgid>[^ ]+|-) (?<log_struct>\\[(?:[^\\]\"]|\"(?:\\.|[^\"])+\")*\\]|-|)\\s+(?<body>(?:.|\\n)*)"

@ -374,7 +374,7 @@ public:
int eoff;
if ((this->p_code = pcre_compile(pattern,
options,
options | PCRE_UTF8,
&errptr,
&eoff,
NULL)) == NULL) {
@ -393,7 +393,7 @@ public:
int eoff;
if ((this->p_code = pcre_compile(pattern.c_str(),
options,
options | PCRE_UTF8,
&errptr,
&eoff,
NULL)) == NULL) {

@ -59,7 +59,7 @@ public:
: filename_wire(*this, &top_status_source::update_filename),
view_name_wire(*this, &top_status_source::update_view_name)
{
this->tss_fields[TSF_TIME].set_width(24);
this->tss_fields[TSF_TIME].set_width(28);
this->tss_fields[TSF_PARTITION_NAME].set_width(34);
this->tss_fields[TSF_PARTITION_NAME].set_left_pad(1);
this->tss_fields[TSF_VIEW_NAME].set_width(8);

@ -321,6 +321,7 @@ dist_noinst_DATA = \
logfile_syslog.0 \
logfile_syslog.1 \
logfile_syslog.2 \
logfile_syslog_fr.0 \
logfile_syslog_with_access_log.0 \
logfile_syslog_with_mixed_times.0 \
logfile_tai64n.0 \
@ -423,6 +424,7 @@ DISTCLEANFILES = \
test.log \
logfile_stdin.log \
logfile_syslog.0 \
logfile_syslog_fr.0 \
unreadable.log \
empty \
scripts-empty

@ -0,0 +1 @@
août 19 11:08:37 nlaptop symphorien[4961]: test

@ -117,6 +117,20 @@ int main(int argc, char *argv[])
}
}
{
const char *en_date = "Jan 1 12:00:00";
const char *fr_date = "août 19 11:08:37";
struct timeval en_tv, fr_tv;
struct exttm en_tm, fr_tm;
date_time_scanner dts;
if (setlocale(LC_TIME, "fr_FR.UTF-8") != NULL) {
assert(dts.scan(en_date, strlen(en_date), NULL, &en_tm, en_tv) != NULL);
dts.clear();
assert(dts.scan(fr_date, strlen(fr_date), NULL, &fr_tm, fr_tv) != NULL);
}
}
{
const char *epoch_str = "ts 1428721664 ]";
struct exttm tm;

@ -1,5 +1,17 @@
#! /bin/bash
cp ${srcdir}/logfile_syslog_fr.0 logfile_syslog_fr.0
touch -t 200711030923 logfile_syslog_fr.0
run_test env LC_TIME=fr_FR.UTF-8 ${lnav_test} -n \
-c ";SELECT log_time FROM syslog_log" \
-c ":write-csv-to -" \
logfile_syslog_fr.0
check_output "french locale is not recognized" <<EOF
log_time
2007-08-19 11:08:37.000
EOF
touch unreadable.log
chmod ugo-r unreadable.log

Loading…
Cancel
Save