From 26cccdafce524fe9807e40f5719b0dfd780c6918 Mon Sep 17 00:00:00 2001 From: Timothy Stack Date: Tue, 21 Aug 2018 21:43:32 -0700 Subject: [PATCH] [locale] some tweaks for recognizing non-english logs Try to address some locale issues brought up in #533 --- src/default-log-formats.json | 14 +++++++------- src/pcrepp.hh | 4 ++-- src/top_status_source.hh | 2 +- test/Makefile.am | 2 ++ test/logfile_syslog_fr.0 | 1 + test/test_date_time_scanner.cc | 14 ++++++++++++++ test/test_logfile.sh | 12 ++++++++++++ 7 files changed, 39 insertions(+), 10 deletions(-) create mode 100644 test/logfile_syslog_fr.0 diff --git a/src/default-log-formats.json b/src/default-log-formats.json index 53e47558..ec8e1c77 100644 --- a/src/default-log-formats.json +++ b/src/default-log-formats.json @@ -131,7 +131,7 @@ "description" : "A generic format for logs, like cron, that have a date at the start of a block.", "regex" : { "std" : { - "pattern" : "^(?\\w{3} \\w{3}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2} \\w+ \\d{4})(?(?:.|\\n)*)$" + "pattern" : "^(?\\S{3,8} \\w{3}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2} \\w+ \\d{4})(?(?:.|\\n)*)$" } }, "sample" : [ @@ -199,10 +199,10 @@ "description" : "Log format used by the Common Unix Printing System", "regex" : { "system" : { - "pattern" : "^(?[IEW]) \\[(?\\d{2}/\\w{3}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [+-]\\d{2,4})\\] (?
\\w+): (?.*)$" + "pattern" : "^(?[IEW]) \\[(?\\d{2}/\\S{3,8}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [+-]\\d{2,4})\\] (?
\\w+): (?.*)$" }, "default" : { - "pattern" : "^(?[IEW]) \\[(?\\d{2}/\\w{3}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [+-]\\d{2,4})\\] (?.*)$" + "pattern" : "^(?[IEW]) \\[(?\\d{2}/\\S{3,8}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [+-]\\d{2,4})\\] (?.*)$" } }, "level" : { @@ -594,7 +594,7 @@ "description" : "Log for the fsck_hfs tool on Mac OS X.", "regex" : { "std" : { - "pattern" : "^(?[^:]+): fsck_hfs (?:run|started) at (?\\w{3} \\w{3}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2} \\d{4})(?(?:.|\\n)*)" + "pattern" : "^(?[^:]+): fsck_hfs (?:run|started) at (?\\S{3,8} \\S{3,8}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2} \\d{4})(?(?:.|\\n)*)" } }, "value" : { @@ -1070,10 +1070,10 @@ "url" : "http://fedorahosted.org/sssd", "regex" : { "core" : { - "pattern" : "^\\((?\\w{3} \\w{3} ( \\d|\\d{2}) \\d{2}:\\d{2}:\\d{2} \\d{4})\\) \\[sssd\\] \\[(?\\w+)\\] \\((?0x[0-9a-fA-F]{4})\\): (?.*)$" + "pattern" : "^\\((?\\S{3,8} \\S{3,8} ( \\d|\\d{2}) \\d{2}:\\d{2}:\\d{2} \\d{4})\\) \\[sssd\\] \\[(?\\w+)\\] \\((?0x[0-9a-fA-F]{4})\\): (?.*)$" }, "module" : { - "pattern" : "^\\((?\\w{3} \\w{3} ( \\d|\\d{2}) \\d{2}:\\d{2}:\\d{2} \\d{4})\\) \\[sssd(?\\[.*?\\])\\] \\[(?\\w+)\\] \\((?0x[0-9a-fA-F]{4})\\): (?.*)$" + "pattern" : "^\\((?\\S{3,8} \\S{3,8} ( \\d|\\d{2}) \\d{2}:\\d{2}:\\d{2} \\d{4})\\) \\[sssd(?\\[.*?\\])\\] \\[(?\\w+)\\] \\((?0x[0-9a-fA-F]{4})\\): (?.*)$" } }, "value" : { @@ -1185,7 +1185,7 @@ "url" : "http://en.wikipedia.org/wiki/Syslog", "regex" : { "std" : { - "pattern" : "^(?\\w{3}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2})(?: (?[a-zA-Z0-9:][^ ]+[a-zA-Z0-9]))?(?:(?: (?(?:[^\\[:]+|[^:]+))(?:\\[(?\\d+)\\])?:(?(?:.|\\n)*))$|:?(?:(?: ---)? last message repeated \\d+ times?(?: ---)?))" + "pattern" : "^(?\\S{3,8}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2})(?: (?[a-zA-Z0-9:][^ ]+[a-zA-Z0-9]))?(?:(?: (?(?:[^\\[:]+|[^:]+))(?:\\[(?\\d+)\\])?:(?(?:.|\\n)*))$|:?(?:(?: ---)? last message repeated \\d+ times?(?: ---)?))" }, "rfc5424" : { "pattern" : "^<(?\\d+)>(?\\d+) (?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{6})?(?:[^ ]+)?) (?[^ ]+|-) (?[^ ]+|-) (?[^ ]+|-) (?[^ ]+|-) (?\\[(?:[^\\]\"]|\"(?:\\.|[^\"])+\")*\\]|-|)\\s+(?(?:.|\\n)*)" diff --git a/src/pcrepp.hh b/src/pcrepp.hh index 968c6966..650cf129 100644 --- a/src/pcrepp.hh +++ b/src/pcrepp.hh @@ -374,7 +374,7 @@ public: int eoff; if ((this->p_code = pcre_compile(pattern, - options, + options | PCRE_UTF8, &errptr, &eoff, NULL)) == NULL) { @@ -393,7 +393,7 @@ public: int eoff; if ((this->p_code = pcre_compile(pattern.c_str(), - options, + options | PCRE_UTF8, &errptr, &eoff, NULL)) == NULL) { diff --git a/src/top_status_source.hh b/src/top_status_source.hh index 70455996..af217932 100644 --- a/src/top_status_source.hh +++ b/src/top_status_source.hh @@ -59,7 +59,7 @@ public: : filename_wire(*this, &top_status_source::update_filename), view_name_wire(*this, &top_status_source::update_view_name) { - this->tss_fields[TSF_TIME].set_width(24); + this->tss_fields[TSF_TIME].set_width(28); this->tss_fields[TSF_PARTITION_NAME].set_width(34); this->tss_fields[TSF_PARTITION_NAME].set_left_pad(1); this->tss_fields[TSF_VIEW_NAME].set_width(8); diff --git a/test/Makefile.am b/test/Makefile.am index 52716cf0..41ce4955 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -321,6 +321,7 @@ dist_noinst_DATA = \ logfile_syslog.0 \ logfile_syslog.1 \ logfile_syslog.2 \ + logfile_syslog_fr.0 \ logfile_syslog_with_access_log.0 \ logfile_syslog_with_mixed_times.0 \ logfile_tai64n.0 \ @@ -423,6 +424,7 @@ DISTCLEANFILES = \ test.log \ logfile_stdin.log \ logfile_syslog.0 \ + logfile_syslog_fr.0 \ unreadable.log \ empty \ scripts-empty diff --git a/test/logfile_syslog_fr.0 b/test/logfile_syslog_fr.0 new file mode 100644 index 00000000..8b93e0ce --- /dev/null +++ b/test/logfile_syslog_fr.0 @@ -0,0 +1 @@ +août 19 11:08:37 nlaptop symphorien[4961]: test diff --git a/test/test_date_time_scanner.cc b/test/test_date_time_scanner.cc index 459a0b7c..c5371d2e 100644 --- a/test/test_date_time_scanner.cc +++ b/test/test_date_time_scanner.cc @@ -117,6 +117,20 @@ int main(int argc, char *argv[]) } } + { + const char *en_date = "Jan 1 12:00:00"; + const char *fr_date = "août 19 11:08:37"; + struct timeval en_tv, fr_tv; + struct exttm en_tm, fr_tm; + date_time_scanner dts; + + if (setlocale(LC_TIME, "fr_FR.UTF-8") != NULL) { + assert(dts.scan(en_date, strlen(en_date), NULL, &en_tm, en_tv) != NULL); + dts.clear(); + assert(dts.scan(fr_date, strlen(fr_date), NULL, &fr_tm, fr_tv) != NULL); + } + } + { const char *epoch_str = "ts 1428721664 ]"; struct exttm tm; diff --git a/test/test_logfile.sh b/test/test_logfile.sh index 6bbb5e76..59208e2b 100644 --- a/test/test_logfile.sh +++ b/test/test_logfile.sh @@ -1,5 +1,17 @@ #! /bin/bash +cp ${srcdir}/logfile_syslog_fr.0 logfile_syslog_fr.0 +touch -t 200711030923 logfile_syslog_fr.0 +run_test env LC_TIME=fr_FR.UTF-8 ${lnav_test} -n \ + -c ";SELECT log_time FROM syslog_log" \ + -c ":write-csv-to -" \ + logfile_syslog_fr.0 + +check_output "french locale is not recognized" <