From 2589345e5cce522eb9491a09227b315104a650e2 Mon Sep 17 00:00:00 2001 From: Timothy Stack Date: Wed, 17 Oct 2018 07:03:33 -0700 Subject: [PATCH] [perf] improve initial indexing times --- configure.ac | 19 +- src/CMakeLists.txt | 3 + src/Makefile.am | 12 +- src/hist_source.hh | 4 +- src/is_utf8.cc | 298 +++++++++++++++++++ src/is_utf8.hh | 36 +++ src/line_buffer.cc | 47 ++- src/line_buffer.hh | 1 + src/lnav.cc | 24 +- src/log_format.cc | 4 +- src/log_format.hh | 13 +- src/log_format_impls.cc | 2 +- src/log_level.cc | 30 -- src/log_level_re.cc | 590 ++++++++++++++++++++++++++++++++++++++ src/log_level_re.re | 105 +++++++ src/logfile.cc | 20 +- src/logfile.hh | 23 +- src/logfile_sub_source.cc | 8 +- src/pcrepp.cc | 2 +- src/pcrepp.hh | 14 +- src/simdutf8check.h | 237 +++++++++++++++ test/Makefile.am | 1 + test/UTF-8-test.txt | Bin 0 -> 22781 bytes test/drive_line_buffer.cc | 253 ++++++++-------- test/drive_logfile.cc | 4 +- test/test_line_buffer.sh | 6 + test/test_logfile.sh | 66 ++--- 27 files changed, 1558 insertions(+), 264 deletions(-) create mode 100644 src/is_utf8.cc create mode 100644 src/is_utf8.hh create mode 100644 src/log_level_re.cc create mode 100644 src/log_level_re.re create mode 100644 src/simdutf8check.h create mode 100644 test/UTF-8-test.txt diff --git a/configure.ac b/configure.ac index 6a488763..c13a1a0b 100644 --- a/configure.ac +++ b/configure.ac @@ -56,23 +56,6 @@ AS_VAR_IF([enable_profiling], [yes], AC_ARG_VAR(SFTP_TEST_URL) -AC_ARG_ENABLE([profiling], - AS_HELP_STRING([--enable-profiling], - [Compile with gprof(1) profiling support])) - -AC_MSG_CHECKING(gprof(4) profiling support) - -AS_VAR_IF([enable_profiling], [yes], - [CFLAGS="$CFLAGS -pg -gstabs" - CXXFLAGS="$CXXFLAGS -pg -gstabs" - LDFLAGS="$LDFLAGS -pg"], - [enable_profiling=no]dnl -) - -AC_MSG_RESULT($enable_profiling) - -AC_SUBST(CFLAGS_PG) - AC_PROG_INSTALL AC_PROG_RANLIB AM_PROG_AR @@ -134,7 +117,7 @@ AS_CASE(["$host_os"], ) ) -AC_CHECK_HEADERS(execinfo.h pty.h util.h zlib.h bzlib.h libutil.h sys/ttydefaults.h) +AC_CHECK_HEADERS(execinfo.h pty.h util.h zlib.h bzlib.h libutil.h sys/ttydefaults.h x86intrin.h) LNAV_WITH_JEMALLOC diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a64aa978..23d96e57 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -19,6 +19,7 @@ set(diag_STAT_SRCS hist_source.cc hotkeys.cc intern_string.cc + is_utf8.cc json-extension-functions.cc json_op.cc json_ptr.cc @@ -116,6 +117,7 @@ set(diag_STAT_SRCS hotkeys.hh init-sql.hh intern_string.hh + is_utf8.hh k_merge_tree.h log_data_helper.hh log_data_table.hh @@ -136,6 +138,7 @@ set(diag_STAT_SRCS relative_time.hh sequence_sink.hh shlex.hh + simdutf8check.h spectro_source.hh strong_int.hh sysclip.hh diff --git a/src/Makefile.am b/src/Makefile.am index 129e8be1..09c53bd3 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -96,7 +96,7 @@ time_fmts.cc: ptimec if HAVE_RE2C %.cc: %.re - $(RE2C_V)$(RE2C_CMD) -8 -o $@ $< + $(RE2C_V)$(RE2C_CMD) --tags -8 -o $@ $< $(REC2_V)test $@ -ef $(srcdir)/$*.cc || cp $@ $(srcdir)/$*.cc endif @@ -173,6 +173,7 @@ noinst_HEADERS = \ init.sql \ init-sql.hh \ intern_string.hh \ + is_utf8.hh \ json_op.hh \ json_ptr.hh \ k_merge_tree.h \ @@ -189,6 +190,7 @@ noinst_HEADERS = \ log_format.hh \ log_format_loader.hh \ log_level.hh \ + log_level_re.re \ log_search_table.hh \ logfile.hh \ logfile_sub_source.hh \ @@ -216,6 +218,7 @@ noinst_HEADERS = \ session_data.hh \ shared_buffer.hh \ shlex.hh \ + simdutf8check.h \ spectro_source.hh \ sql_util.hh \ sqlite-extension-func.hh \ @@ -284,6 +287,7 @@ libdiag_a_SOURCES = \ hist_source.cc \ hotkeys.cc \ intern_string.cc \ + is_utf8.cc \ json-extension-functions.cc \ json_op.cc \ json_ptr.cc \ @@ -297,6 +301,7 @@ libdiag_a_SOURCES = \ log_format.cc \ log_format_loader.cc \ log_level.cc \ + log_level_re.cc \ logfile.cc \ logfile_sub_source.cc \ network-extension-functions.cc \ @@ -389,11 +394,12 @@ ptimec_LDADD = DISTCLEANFILES = \ data_scanner_re.cc \ + default-config-json.c \ + default-log-formats-json.c \ dump-pid-sh.c \ help.c \ init-sql.c \ - default-log-formats-json.c \ - default-config-json.c \ + log_level_re.cc \ time_fmts.cc \ xterm-palette.c diff --git a/src/hist_source.hh b/src/hist_source.hh index 80b3ae35..cefeb252 100644 --- a/src/hist_source.hh +++ b/src/hist_source.hh @@ -433,7 +433,7 @@ private: hist_value b_values[HT__MAX]; }; - static const unsigned int BLOCK_SIZE = 100; + static const int64_t BLOCK_SIZE = 100; struct bucket_block { bucket_block() : bb_used(0) { @@ -445,7 +445,7 @@ private: }; bucket_t &find_bucket(int64_t index) { - struct bucket_block &bb = this->hs_blocks[index / this->BLOCK_SIZE]; + struct bucket_block &bb = this->hs_blocks[index / BLOCK_SIZE]; unsigned int intra_block_index = index % BLOCK_SIZE; bb.bb_used = std::max(intra_block_index, bb.bb_used); this->hs_line_count = std::max(this->hs_line_count, index + 1); diff --git a/src/is_utf8.cc b/src/is_utf8.cc new file mode 100644 index 00000000..6cc5a5df --- /dev/null +++ b/src/is_utf8.cc @@ -0,0 +1,298 @@ +/* + * is_utf8 is distributed under the following terms: + * + * Copyright (c) 2013 Palard Julien. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" + +#include "is_utf8.hh" + +/* + Check if the given unsigned char * is a valid utf-8 sequence. + + Return value : + If the string is valid utf-8, 0 is returned. + Else the position, starting from 1, is returned. + + Source: + http://www.unicode.org/versions/Unicode7.0.0/UnicodeStandard-7.0.pdf + page 124, 3.9 "Unicode Encoding Forms", "UTF-8" + + + Table 3-7. Well-Formed UTF-8 Byte Sequences + ----------------------------------------------------------------------------- + | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte | + | U+0000..U+007F | 00..7F | | | | + | U+0080..U+07FF | C2..DF | 80..BF | | | + | U+0800..U+0FFF | E0 | A0..BF | 80..BF | | + | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | | + | U+D000..U+D7FF | ED | 80..9F | 80..BF | | + | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | | + | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF | + | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF | + | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF | + ----------------------------------------------------------------------------- + + Returns the first erroneous byte position, and give in + `faulty_bytes` the number of actually existing bytes taking part in this error. +*/ +ssize_t is_utf8(unsigned char *str, size_t len, const char **message, int *faulty_bytes) +{ + size_t i = 0; + + *message = nullptr; + *faulty_bytes = 0; + while (i < len) + { + if (str[i] == '\n') { + *message = nullptr; + return i; + } + + if (str[i] <= 0x7F) /* 00..7F */ + { + i += 1; + } + else if (str[i] >= 0xC2 && str[i] <= 0xDF) /* C2..DF 80..BF */ + { + if (i + 1 < len) /* Expect a 2nd byte */ + { + if (str[i + 1] < 0x80 || str[i + 1] > 0xBF) + { + *message = "After a first byte between C2 and DF, expecting a 2nd byte between 80 and BF"; + *faulty_bytes = 2; + return i; + } + } + else + { + *message = "After a first byte between C2 and DF, expecting a 2nd byte."; + *faulty_bytes = 1; + return i; + } + i += 2; + } + else if (str[i] == 0xE0) /* E0 A0..BF 80..BF */ + { + if (i + 2 < len) /* Expect a 2nd and 3rd byte */ + { + if (str[i + 1] < 0xA0 || str[i + 1] > 0xBF) + { + *message = "After a first byte of E0, expecting a 2nd byte between A0 and BF."; + *faulty_bytes = 2; + return i; + } + if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) + { + *message = "After a first byte of E0, expecting a 3nd byte between 80 and BF."; + *faulty_bytes = 3; + return i; + } + } + else + { + *message = "After a first byte of E0, expecting two following bytes."; + *faulty_bytes = 1; + return i; + } + i += 3; + } + else if (str[i] >= 0xE1 && str[i] <= 0xEC) /* E1..EC 80..BF 80..BF */ + { + if (i + 2 < len) /* Expect a 2nd and 3rd byte */ + { + if (str[i + 1] < 0x80 || str[i + 1] > 0xBF) + { + *message = "After a first byte between E1 and EC, expecting the 2nd byte between 80 and BF."; + *faulty_bytes = 2; + return i; + } + if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) + { + *message = "After a first byte between E1 and EC, expecting the 3rd byte between 80 and BF."; + *faulty_bytes = 3; + return i; + } + } + else + { + *message = "After a first byte between E1 and EC, expecting two following bytes."; + *faulty_bytes = 1; + return i; + } + i += 3; + } + else if (str[i] == 0xED) /* ED 80..9F 80..BF */ + { + if (i + 2 < len) /* Expect a 2nd and 3rd byte */ + { + if (str[i + 1] < 0x80 || str[i + 1] > 0x9F) + { + *message = "After a first byte of ED, expecting 2nd byte between 80 and 9F."; + *faulty_bytes = 2; + return i; + } + if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) + { + *message = "After a first byte of ED, expecting 3rd byte between 80 and BF."; + *faulty_bytes = 3; + return i; + } + } + else + { + *message = "After a first byte of ED, expecting two following bytes."; + *faulty_bytes = 1; + return i; + } + i += 3; + } + else if (str[i] >= 0xEE && str[i] <= 0xEF) /* EE..EF 80..BF 80..BF */ + { + if (i + 2 < len) /* Expect a 2nd and 3rd byte */ + { + if (str[i + 1] < 0x80 || str[i + 1] > 0xBF) + { + *message = "After a first byte between EE and EF, expecting 2nd byte between 80 and BF."; + *faulty_bytes = 2; + return i; + } + if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) + { + *message = "After a first byte between EE and EF, expecting 3rd byte between 80 and BF."; + *faulty_bytes = 3; + return i; + } + } + else + { + *message = "After a first byte between EE and EF, two following bytes."; + *faulty_bytes = 1; + return i; + } + i += 3; + } + else if (str[i] == 0xF0) /* F0 90..BF 80..BF 80..BF */ + { + if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */ + { + if (str[i + 1] < 0x90 || str[i + 1] > 0xBF) + { + *message = "After a first byte of F0, expecting 2nd byte between 90 and BF."; + *faulty_bytes = 2; + return i; + } + if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) + { + *message = "After a first byte of F0, expecting 3rd byte between 80 and BF."; + *faulty_bytes = 3; + return i; + } + if (str[i + 3] < 0x80 || str[i + 3] > 0xBF) + { + *message = "After a first byte of F0, expecting 4th byte between 80 and BF."; + *faulty_bytes = 4; + return i; + } + } + else + { + *message = "After a first byte of F0, expecting three following bytes."; + *faulty_bytes = 1; + return i; + } + i += 4; + } + else if (str[i] >= 0xF1 && str[i] <= 0xF3) /* F1..F3 80..BF 80..BF 80..BF */ + { + if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */ + { + if (str[i + 1] < 0x80 || str[i + 1] > 0xBF) + { + *message = "After a first byte of F1, F2, or F3, expecting a 2nd byte between 80 and BF."; + *faulty_bytes = 2; + return i; + } + if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) + { + *message = "After a first byte of F1, F2, or F3, expecting a 3rd byte between 80 and BF."; + *faulty_bytes = 3; + return i; + } + if (str[i + 3] < 0x80 || str[i + 3] > 0xBF) + { + *message = "After a first byte of F1, F2, or F3, expecting a 4th byte between 80 and BF."; + *faulty_bytes = 4; + return i; + } + } + else + { + *message = "After a first byte of F1, F2, or F3, expecting three following bytes."; + *faulty_bytes = 1; + return i; + } + i += 4; + } + else if (str[i] == 0xF4) /* F4 80..8F 80..BF 80..BF */ + { + if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */ + { + if (str[i + 1] < 0x80 || str[i + 1] > 0x8F) + { + *message = "After a first byte of F4, expecting 2nd byte between 80 and 8F."; + *faulty_bytes = 2; + return i; + } + if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) + { + *message = "After a first byte of F4, expecting 3rd byte between 80 and BF."; + *faulty_bytes = 3; + return i; + } + if (str[i + 3] < 0x80 || str[i + 3] > 0xBF) + { + *message = "After a first byte of F4, expecting 4th byte between 80 and BF."; + *faulty_bytes = 4; + return i; + } + } + else + { + *message = "After a first byte of F4, expecting three following bytes."; + *faulty_bytes = 1; + return i; + } + i += 4; + } + else + { + *message = "Expecting bytes in the following ranges: 00..7F C2..F4."; + *faulty_bytes = 1; + return i; + } + } + return -1; +} diff --git a/src/is_utf8.hh b/src/is_utf8.hh new file mode 100644 index 00000000..dc0a00da --- /dev/null +++ b/src/is_utf8.hh @@ -0,0 +1,36 @@ +/* + * is_utf8 is distributed under the following terms: + * + * Copyright (c) 2013 Palard Julien. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _IS_UTF8_H +#define _IS_UTF8_H + +#include +#include + +ssize_t is_utf8(unsigned char *str, size_t len, const char **message, int *faulty_bytes); + +#endif /* _IS_UTF8_H */ diff --git a/src/line_buffer.cc b/src/line_buffer.cc index 66a44d03..a22ae756 100644 --- a/src/line_buffer.cc +++ b/src/line_buffer.cc @@ -43,6 +43,11 @@ #include +#ifdef HAVE_X86INTRIN_H +#include "simdutf8check.h" +#endif + +#include "is_utf8.hh" #include "lnav_util.hh" #include "line_buffer.hh" @@ -497,6 +502,7 @@ bool line_buffer::read_line(off_t &offset, line_value &lv, bool include_delim) lv.lv_len = 0; lv.lv_partial = false; + lv.lv_valid_utf = true; while (!retval) { char *line_start, *lf; @@ -505,7 +511,30 @@ bool line_buffer::read_line(off_t &offset, line_value &lv, bool include_delim) /* Find the data in the cache and */ line_start = this->get_range(offset, lv.lv_len); /* ... look for the end-of-line or end-of-file. */ - if (((lf = (char *)memchr(line_start, '\n', lv.lv_len)) != NULL) || + ssize_t utf8_end = -1; + +#ifdef HAVE_X86INTRIN_H + if (!validate_utf8_fast(line_start, lv.lv_len, &utf8_end)) { + lv.lv_valid_utf = false; + } +#else + { + const char *msg; + int faulty_bytes; + + utf8_end = is_utf8(line_start, lv.lv_len, &msg, &faulty_bytes); + if (msg != nullptr) { + lv.lv_valid_utf = false; + } + } +#endif + if (utf8_end >= 0) { + lf = line_start + utf8_end; + } else { + lf = nullptr; + } + + if (lf != nullptr || (lv.lv_len >= MAX_LINE_BUFFER_SIZE) || (request_size == MAX_LINE_BUFFER_SIZE) || ((request_size > lv.lv_len) && lv.lv_len > 0)) { @@ -604,6 +633,22 @@ bool line_buffer::read_line(off_t &offset_inout, shared_buffer_ref &sbr, line_va sbr.disown(); if ((retval = this->read_line(offset_inout, *lv))) { sbr.share(this->lb_share_manager, lv->lv_start, lv->lv_len); + if (!lv->lv_valid_utf) { + auto *bits = (unsigned char *) sbr.get_writable_data(); + const char *msg; + int faulty_bytes; + + while (true) { + ssize_t utf8_end = is_utf8(bits, sbr.length(), &msg, &faulty_bytes); + + if (msg == nullptr) { + break; + } + for (int lpc = 0; lpc < faulty_bytes; lpc++) { + bits[utf8_end + lpc] = '?'; + } + } + } } return retval; diff --git a/src/line_buffer.hh b/src/line_buffer.hh index be8474d6..d134b158 100644 --- a/src/line_buffer.hh +++ b/src/line_buffer.hh @@ -48,6 +48,7 @@ struct line_value { char *lv_start; size_t lv_len; bool lv_partial; + bool lv_valid_utf; void terminate() { this->lv_start[this->lv_len] = '\0'; diff --git a/src/lnav.cc b/src/lnav.cc index 359f95a2..2e182eae 100644 --- a/src/lnav.cc +++ b/src/lnav.cc @@ -365,15 +365,15 @@ bool setup_logline_table(exec_context &ec) iter.second->get_foreign_keys(db_key_names); } - db_key_names.push_back("device"); - db_key_names.push_back("inode"); - db_key_names.push_back("rowid"); - db_key_names.push_back("st_dev"); - db_key_names.push_back("st_ino"); - db_key_names.push_back("st_mode"); - db_key_names.push_back("st_rdev"); - db_key_names.push_back("st_uid"); - db_key_names.push_back("st_gid"); + db_key_names.emplace_back("device"); + db_key_names.emplace_back("inode"); + db_key_names.emplace_back("rowid"); + db_key_names.emplace_back("st_dev"); + db_key_names.emplace_back("st_ino"); + db_key_names.emplace_back("st_mode"); + db_key_names.emplace_back("st_rdev"); + db_key_names.emplace_back("st_uid"); + db_key_names.emplace_back("st_gid"); stable_sort(db_key_names.begin(), db_key_names.end()); @@ -490,7 +490,7 @@ class textfile_callback { public: textfile_callback() : force(false), front_file(NULL), front_top(-1) { }; - void closed_file(shared_ptr lf) { + void closed_file(const shared_ptr &lf) { log_info("closed text file: %s", lf->get_filename().c_str()); if (!lf->is_valid_filename()) { lnav_data.ld_file_names.erase(lf->get_filename()); @@ -503,7 +503,7 @@ public: regenerate_unique_file_names(); }; - void promote_file(shared_ptr lf) { + void promote_file(const shared_ptr &lf) { if (lnav_data.ld_log_source.insert_file(lf)) { force = true; @@ -523,7 +523,7 @@ public: } }; - void scanned_file(shared_ptr lf) { + void scanned_file(const shared_ptr &lf) { if (!lnav_data.ld_files_to_front.empty() && lnav_data.ld_files_to_front.front().first == lf->get_filename()) { diff --git a/src/log_format.cc b/src/log_format.cc index 4a8aeb8f..59c52fce 100644 --- a/src/log_format.cc +++ b/src/log_format.cc @@ -179,11 +179,11 @@ const char *log_format::log_scanf(const char *line, va_start(args, tv_out); pi.reset(line, 0, len); - if (!fmt[curr_fmt].pcre.match(pc, pi)) { + if (!fmt[curr_fmt].pcre.match(pc, pi, PCRE_NO_UTF8_CHECK)) { retval = NULL; } else { - pcre_context::capture_t *ts = pc["timestamp"]; + pcre_context::capture_t *ts = pc[fmt[curr_fmt].pf_timestamp_index]; for (auto &iter : pc) { pcre_context::capture_t *cap = va_arg( diff --git a/src/log_format.hh b/src/log_format.hh index 34208ca7..49fb2745 100644 --- a/src/log_format.hh +++ b/src/log_format.hh @@ -790,13 +790,14 @@ protected: struct pcre_format { pcre_format(const char *regex) : name(regex), pcre(regex) { - + this->pf_timestamp_index = this->pcre.name_index("timestamp"); }; pcre_format() : name(NULL), pcre("") { }; const char *name; pcrepp pcre; + int pf_timestamp_index{-1}; }; static bool next_format(pcre_format *fmt, int &index, int &locked_index); @@ -1140,7 +1141,7 @@ public: log_level_t convert_level(const pcre_input &pi, pcre_context::capture_t *level_cap) const { log_level_t retval = LEVEL_INFO; - if (level_cap != NULL && level_cap->is_valid()) { + if (level_cap != nullptr && level_cap->is_valid()) { pcre_context_static<128> pc_level; pcre_input pi_level(pi.get_substr_start(level_cap), 0, @@ -1149,11 +1150,9 @@ public: if (this->elf_level_patterns.empty()) { retval = string2level(pi_level.get_string(), level_cap->length()); } else { - for (auto iter = this->elf_level_patterns.begin(); - iter != this->elf_level_patterns.end(); - ++iter) { - if (iter->second.lp_pcre->match(pc_level, pi_level)) { - retval = iter->first; + for (const auto &elf_level_pattern : this->elf_level_patterns) { + if (elf_level_pattern.second.lp_pcre->match(pc_level, pi_level)) { + retval = elf_level_pattern.first; break; } } diff --git a/src/log_format_impls.cc b/src/log_format_impls.cc index f120eb02..0c28b5ec 100644 --- a/src/log_format_impls.cc +++ b/src/log_format_impls.cc @@ -165,7 +165,7 @@ class generic_log_format : public log_format { this->check_for_new_year(dst, log_time, log_tv); } - dst.push_back(logline(offset, log_tv, level_val)); + dst.emplace_back(offset, log_tv, level_val); return SCAN_MATCH; } diff --git a/src/log_level.cc b/src/log_level.cc index ceb53d32..e6d9c18a 100644 --- a/src/log_level.cc +++ b/src/log_level.cc @@ -50,36 +50,6 @@ const char *level_names[LEVEL__MAX + 1] = { NULL }; -static pcrepp LEVEL_RE( - "(?i)(TRACE|DEBUG\\d*|INFO|NOTICE|STATS|WARN(?:ING)?|ERR(?:OR)?|CRITICAL|SEVERE|FATAL)"); - -log_level_t string2level(const char *levelstr, ssize_t len, bool exact) -{ - log_level_t retval = LEVEL_UNKNOWN; - - if (len == (ssize_t)-1) { - len = strlen(levelstr); - } - - if (((len == 1) || ((len > 1) && (levelstr[1] == ' '))) && - (retval = abbrev2level(levelstr, 1)) != LEVEL_UNKNOWN) { - return retval; - } - - pcre_input pi(levelstr, 0, len); - pcre_context_static<10> pc; - - if (LEVEL_RE.match(pc, pi)) { - auto iter = pc.begin(); - if (!exact || pc[0]->c_begin == 0) { - retval = abbrev2level(pi.get_substr_start(iter), - pi.get_substr_len(iter)); - } - } - - return retval; -} - log_level_t abbrev2level(const char *levelstr, ssize_t len) { if (len == 0 || levelstr[0] == '\0') { diff --git a/src/log_level_re.cc b/src/log_level_re.cc new file mode 100644 index 00000000..8ce88c74 --- /dev/null +++ b/src/log_level_re.cc @@ -0,0 +1,590 @@ +/* Generated by re2c 1.1.1 on Tue Oct 16 06:58:50 2018 */ +#line 1 "../../lnav2/src/log_level_re.re" +/** + * Copyright (c) 2018, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include +#include +#include + +#include "log_level.hh" + +log_level_t string2level(const char *levelstr, ssize_t len, bool exact) +{ + log_level_t retval = LEVEL_UNKNOWN; + + if (len == (ssize_t)-1) { + len = strlen(levelstr); + } + + if (((len == 1) || ((len > 1) && (levelstr[1] == ' '))) && + (retval = abbrev2level(levelstr, 1)) != LEVEL_UNKNOWN) { + return retval; + } + +# define YYCTYPE unsigned char +# define RET(tok) { \ + return tok; \ + } + + const YYCTYPE *YYCURSOR = (const unsigned char *) levelstr; + const YYCTYPE *YYLIMIT = (const unsigned char *) levelstr + len; + const YYCTYPE *YYMARKER = YYCURSOR; + const YYCTYPE *debug_level = nullptr; + +# define YYPEEK() (YYCURSOR < YYLIMIT ? *YYCURSOR : 0) +# define YYSKIP() ++YYCURSOR +# define YYBACKUP() YYMARKER = YYCURSOR +# define YYRESTORE() YYCURSOR = YYMARKER +# define YYSTAGP(x) x = YYCURSOR - 1 + + const unsigned char *yyt1; + loop: + +#line 73 "log_level_re.cc" +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + yych = YYPEEK (); + switch (yych) { + case 0x00: goto yy2; + case 'C': + case 'c': goto yy6; + case 'D': + case 'd': goto yy7; + case 'E': + case 'e': goto yy8; + case 'F': + case 'f': goto yy9; + case 'I': + case 'i': goto yy10; + case 'N': + case 'n': goto yy11; + case 'S': + case 's': goto yy12; + case 'T': + case 't': goto yy13; + case 'W': + case 'w': goto yy14; + default: goto yy4; + } +yy2: + YYSKIP (); +#line 75 "../../lnav2/src/log_level_re.re" + { RET(LEVEL_UNKNOWN); } +#line 104 "log_level_re.cc" +yy4: + YYSKIP (); +yy5: +#line 102 "../../lnav2/src/log_level_re.re" + { goto loop; } +#line 110 "log_level_re.cc" +yy6: + yyaccept = 0; + YYSKIP (); + YYBACKUP (); + yych = YYPEEK (); + switch (yych) { + case 'R': + case 'r': goto yy15; + default: goto yy5; + } +yy7: + yyaccept = 0; + YYSKIP (); + YYBACKUP (); + yych = YYPEEK (); + switch (yych) { + case 'E': + case 'e': goto yy17; + default: goto yy5; + } +yy8: + yyaccept = 0; + YYSKIP (); + YYBACKUP (); + yych = YYPEEK (); + switch (yych) { + case 'R': + case 'r': goto yy18; + default: goto yy5; + } +yy9: + yyaccept = 0; + YYSKIP (); + YYBACKUP (); + yych = YYPEEK (); + switch (yych) { + case 'A': + case 'a': goto yy19; + default: goto yy5; + } +yy10: + yyaccept = 0; + YYSKIP (); + YYBACKUP (); + yych = YYPEEK (); + switch (yych) { + case 'N': + case 'n': goto yy20; + default: goto yy5; + } +yy11: + yyaccept = 0; + YYSKIP (); + YYBACKUP (); + yych = YYPEEK (); + switch (yych) { + case 'O': + case 'o': goto yy21; + default: goto yy5; + } +yy12: + yyaccept = 0; + YYSKIP (); + YYBACKUP (); + yych = YYPEEK (); + switch (yych) { + case 'E': + case 'e': goto yy22; + case 'T': + case 't': goto yy23; + default: goto yy5; + } +yy13: + yyaccept = 0; + YYSKIP (); + YYBACKUP (); + yych = YYPEEK (); + switch (yych) { + case 'R': + case 'r': goto yy24; + default: goto yy5; + } +yy14: + yyaccept = 0; + YYSKIP (); + YYBACKUP (); + yych = YYPEEK (); + switch (yych) { + case 'A': + case 'a': goto yy25; + default: goto yy5; + } +yy15: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'I': + case 'i': goto yy26; + default: goto yy16; + } +yy16: + YYRESTORE (); + switch (yyaccept) { + case 0: goto yy5; + case 1: goto yy29; + default: goto yy48; + } +yy17: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'B': + case 'b': goto yy27; + default: goto yy16; + } +yy18: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'R': + case 'r': goto yy28; + default: goto yy16; + } +yy19: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'T': + case 't': goto yy30; + default: goto yy16; + } +yy20: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'F': + case 'f': goto yy31; + default: goto yy16; + } +yy21: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'T': + case 't': goto yy32; + default: goto yy16; + } +yy22: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'V': + case 'v': goto yy33; + default: goto yy16; + } +yy23: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'A': + case 'a': goto yy34; + default: goto yy16; + } +yy24: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'A': + case 'a': goto yy35; + default: goto yy16; + } +yy25: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'R': + case 'r': goto yy36; + default: goto yy16; + } +yy26: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'T': + case 't': goto yy37; + default: goto yy16; + } +yy27: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'U': + case 'u': goto yy38; + default: goto yy16; + } +yy28: + yyaccept = 1; + YYSKIP (); + YYBACKUP (); + yych = YYPEEK (); + switch (yych) { + case 'O': + case 'o': goto yy39; + default: goto yy29; + } +yy29: +#line 98 "../../lnav2/src/log_level_re.re" + { RET(LEVEL_ERROR); } +#line 319 "log_level_re.cc" +yy30: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'A': + case 'a': goto yy40; + default: goto yy16; + } +yy31: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'O': + case 'o': goto yy41; + default: goto yy16; + } +yy32: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'I': + case 'i': goto yy43; + default: goto yy16; + } +yy33: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'E': + case 'e': goto yy44; + default: goto yy16; + } +yy34: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'T': + case 't': goto yy45; + default: goto yy16; + } +yy35: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'C': + case 'c': goto yy46; + default: goto yy16; + } +yy36: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'N': + case 'n': goto yy47; + default: goto yy16; + } +yy37: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'I': + case 'i': goto yy49; + default: goto yy16; + } +yy38: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'G': + case 'g': goto yy50; + default: goto yy16; + } +yy39: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'R': + case 'r': goto yy52; + default: goto yy16; + } +yy40: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'L': + case 'l': goto yy53; + default: goto yy16; + } +yy41: + YYSKIP (); +#line 94 "../../lnav2/src/log_level_re.re" + { RET(LEVEL_INFO); } +#line 412 "log_level_re.cc" +yy43: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'C': + case 'c': goto yy55; + default: goto yy16; + } +yy44: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'R': + case 'r': goto yy56; + default: goto yy16; + } +yy45: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'S': + case 's': goto yy57; + default: goto yy16; + } +yy46: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'E': + case 'e': goto yy59; + default: goto yy16; + } +yy47: + yyaccept = 2; + YYSKIP (); + YYBACKUP (); + yych = YYPEEK (); + switch (yych) { + case 'I': + case 'i': goto yy61; + default: goto yy48; + } +yy48: +#line 97 "../../lnav2/src/log_level_re.re" + { RET(LEVEL_WARNING); } +#line 458 "log_level_re.cc" +yy49: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'C': + case 'c': goto yy62; + default: goto yy16; + } +yy50: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case '2': + case '3': + case '4': + case '5': goto yy63; + default: + YYSTAGP (yyt1); + goto yy51; + } +yy51: + debug_level = yyt1; +#line 77 "../../lnav2/src/log_level_re.re" + { + if (debug_level == nullptr) { + RET(LEVEL_DEBUG); + } + switch (*debug_level) { + case '2': + RET(LEVEL_DEBUG2); + case '3': + RET(LEVEL_DEBUG3); + case '4': + RET(LEVEL_DEBUG4); + case '5': + RET(LEVEL_DEBUG5); + default: + RET(LEVEL_DEBUG); + } + } +#line 499 "log_level_re.cc" +yy52: + YYSKIP (); + goto yy29; +yy53: + YYSKIP (); +#line 101 "../../lnav2/src/log_level_re.re" + { RET(LEVEL_FATAL); } +#line 507 "log_level_re.cc" +yy55: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'E': + case 'e': goto yy64; + default: goto yy16; + } +yy56: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'E': + case 'e': goto yy66; + default: goto yy16; + } +yy57: + YYSKIP (); +#line 96 "../../lnav2/src/log_level_re.re" + { RET(LEVEL_STATS); } +#line 528 "log_level_re.cc" +yy59: + YYSKIP (); +#line 76 "../../lnav2/src/log_level_re.re" + { RET(LEVEL_TRACE); } +#line 533 "log_level_re.cc" +yy61: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'N': + case 'n': goto yy68; + default: goto yy16; + } +yy62: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'A': + case 'a': goto yy69; + default: goto yy16; + } +yy63: + YYSKIP (); + YYSTAGP (yyt1); + goto yy51; +yy64: + YYSKIP (); +#line 95 "../../lnav2/src/log_level_re.re" + { RET(LEVEL_INFO); } +#line 558 "log_level_re.cc" +yy66: + YYSKIP (); +#line 100 "../../lnav2/src/log_level_re.re" + { RET(LEVEL_CRITICAL); } +#line 563 "log_level_re.cc" +yy68: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'G': + case 'g': goto yy70; + default: goto yy16; + } +yy69: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case 'L': + case 'l': goto yy71; + default: goto yy16; + } +yy70: + YYSKIP (); + goto yy48; +yy71: + YYSKIP (); +#line 99 "../../lnav2/src/log_level_re.re" + { RET(LEVEL_CRITICAL); } +#line 587 "log_level_re.cc" +} +#line 104 "../../lnav2/src/log_level_re.re" + +} diff --git a/src/log_level_re.re b/src/log_level_re.re new file mode 100644 index 00000000..5b453447 --- /dev/null +++ b/src/log_level_re.re @@ -0,0 +1,105 @@ +/** + * Copyright (c) 2018, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include +#include +#include + +#include "log_level.hh" + +log_level_t string2level(const char *levelstr, ssize_t len, bool exact) +{ + log_level_t retval = LEVEL_UNKNOWN; + + if (len == (ssize_t)-1) { + len = strlen(levelstr); + } + + if (((len == 1) || ((len > 1) && (levelstr[1] == ' '))) && + (retval = abbrev2level(levelstr, 1)) != LEVEL_UNKNOWN) { + return retval; + } + +# define YYCTYPE unsigned char +# define RET(tok) { \ + return tok; \ + } + + const YYCTYPE *YYCURSOR = (const unsigned char *) levelstr; + const YYCTYPE *YYLIMIT = (const unsigned char *) levelstr + len; + const YYCTYPE *YYMARKER = YYCURSOR; + const YYCTYPE *debug_level = nullptr; + +# define YYPEEK() (YYCURSOR < YYLIMIT ? *YYCURSOR : 0) +# define YYSKIP() ++YYCURSOR +# define YYBACKUP() YYMARKER = YYCURSOR +# define YYRESTORE() YYCURSOR = YYMARKER +# define YYSTAGP(x) x = YYCURSOR - 1 + + /*!stags:re2c format = 'const unsigned char *@@;'; */ + loop: + /*!re2c + re2c:yyfill:enable = 0; + re2c:flags:input = custom; + + EOF = "\x00"; + + EOF { RET(LEVEL_UNKNOWN); } + 'trace' { RET(LEVEL_TRACE); } + 'debug' [2-5]? @debug_level { + if (debug_level == nullptr) { + RET(LEVEL_DEBUG); + } + switch (*debug_level) { + case '2': + RET(LEVEL_DEBUG2); + case '3': + RET(LEVEL_DEBUG3); + case '4': + RET(LEVEL_DEBUG4); + case '5': + RET(LEVEL_DEBUG5); + default: + RET(LEVEL_DEBUG); + } + } + 'info' { RET(LEVEL_INFO); } + 'notice' { RET(LEVEL_INFO); } + 'stats' { RET(LEVEL_STATS); } + 'warn'|'warning' { RET(LEVEL_WARNING); } + 'err'|'error' { RET(LEVEL_ERROR); } + 'critical' { RET(LEVEL_CRITICAL); } + 'severe' { RET(LEVEL_CRITICAL); } + 'fatal' { RET(LEVEL_FATAL); } + * { goto loop; } + + */ +} diff --git a/src/logfile.cc b/src/logfile.cc index 24437a35..f70908c6 100644 --- a/src/logfile.cc +++ b/src/logfile.cc @@ -56,7 +56,7 @@ static const size_t INDEX_RESERVE_INCREMENT = 1024; logfile::logfile(const string &filename, logfile_open_options &loo) : lf_filename(filename) { - require(filename.size() > 0); + require(!filename.empty()); memset(&this->lf_stat, 0, sizeof(this->lf_stat)); if (loo.loo_fd == -1) { @@ -107,7 +107,7 @@ logfile::~logfile() { } -bool logfile::exists(void) const +bool logfile::exists() const { struct stat st; @@ -210,14 +210,7 @@ bool logfile::process_prefix(off_t offset, shared_buffer_ref &sbr) if (latest < second_to_last) { if (this->lf_format->lf_time_ordered) { - log_debug( - "%s:%d: out-of-time-order line detected %d.%03d < %d.%03d", - this->lf_filename.c_str(), - prescan_size, - latest.get_time(), - latest.get_millis(), - second_to_last.get_time(), - second_to_last.get_millis()); + this->lf_out_of_time_order_count += 1; for (size_t lpc = prescan_size; lpc < this->lf_index.size(); lpc++) { logline &line_to_update = this->lf_index[lpc]; @@ -416,6 +409,13 @@ logfile::rebuild_result_t logfile::rebuild_index() this->lf_index_time = st.st_mtime; } + if (this->lf_out_of_time_order_count) { + log_info("Detected %d out-of-time-order lines in file: %s", + this->lf_out_of_time_order_count, + this->lf_filename.c_str()); + this->lf_out_of_time_order_count = 0; + } + return retval; } diff --git a/src/logfile.hh b/src/logfile.hh index f7bfed8b..1ef73f07 100644 --- a/src/logfile.hh +++ b/src/logfile.hh @@ -1,3 +1,5 @@ +#include + /** * Copyright (c) 2007-2012, Timothy Stack * @@ -109,8 +111,8 @@ public: class error { public: - error(const std::string &filename, int err) - : e_filename(filename), + error(std::string filename, int err) + : e_filename(std::move(filename)), e_err(err) { }; std::string e_filename; @@ -205,26 +207,24 @@ public: else { timeradd(&old_time, &tv, &this->lf_time_offset); } - for (iterator iter = this->begin(); - iter != this->end(); - ++iter) { + for (auto &iter : *this) { struct timeval curr, diff, new_time; - curr = iter->get_timeval(); + curr = iter.get_timeval(); timersub(&curr, &old_time, &diff); timeradd(&diff, &this->lf_time_offset, &new_time); - iter->set_time(new_time); + iter.set_time(new_time); } this->lf_sort_needed = true; }; - void clear_time_offset(void) { + void clear_time_offset() { struct timeval tv = { 0, 0 }; this->adjust_content_time(-1, tv); }; - bool is_time_adjusted(void) const { + bool is_time_adjusted() const { return (this->lf_time_offset.tv_sec != 0 || this->lf_time_offset.tv_usec != 0); } @@ -392,9 +392,9 @@ public: }; /** Check the invariants for this object. */ - bool invariant(void) + bool invariant() { - require(this->lf_filename.size() > 0); + require(!this->lf_filename.empty()); return true; } @@ -435,6 +435,7 @@ protected: logfile_observer *lf_logfile_observer{nullptr}; size_t lf_longest_line{0}; text_format_t lf_text_format{TF_UNKNOWN}; + uint32_t lf_out_of_time_order_count{0}; }; class logline_observer { diff --git a/src/logfile_sub_source.cc b/src/logfile_sub_source.cc index d778c009..6e05fd32 100644 --- a/src/logfile_sub_source.cc +++ b/src/logfile_sub_source.cc @@ -541,7 +541,7 @@ bool logfile_sub_source::rebuild_index(bool force) { iterator iter; size_t total_lines = 0; - bool retval, full_sort = false; + bool retval, full_sort = false, new_order = false; int file_count = 0; force = force || this->lss_force_rebuild; @@ -584,6 +584,7 @@ bool logfile_sub_source::rebuild_index(bool force) case logfile::RR_NEW_ORDER: retval = true; force = true; + new_order = true; break; } file_count += 1; @@ -644,7 +645,10 @@ bool logfile_sub_source::rebuild_index(bool force) } } - sort(this->lss_index.begin(), this->lss_index.end(), line_cmper); + if (new_order || (this->lss_files.size() > 1)) { + sort(this->lss_index.begin(), this->lss_index.end(), + line_cmper); + } } else { kmerge_tree_c merge( file_count); diff --git a/src/pcrepp.cc b/src/pcrepp.cc index 2533d602..d3d967f5 100644 --- a/src/pcrepp.cc +++ b/src/pcrepp.cc @@ -85,7 +85,7 @@ void pcrepp::find_captures(const char *pattern) in_class = true; break; case '(': - cap_in_progress.push_back(pcre_context::capture(lpc, lpc)); + cap_in_progress.emplace_back(lpc, lpc); break; case ')': { if (!cap_in_progress.empty()) { diff --git a/src/pcrepp.hh b/src/pcrepp.hh index 650cf129..d6943ff1 100644 --- a/src/pcrepp.hh +++ b/src/pcrepp.hh @@ -373,8 +373,12 @@ public: const char *errptr; int eoff; + if (!(options & PCRE_NEVER_UTF)) { + options |= PCRE_UTF8; + } + if ((this->p_code = pcre_compile(pattern, - options | PCRE_UTF8, + options, &errptr, &eoff, NULL)) == NULL) { @@ -421,16 +425,15 @@ public: }; pcre_named_capture::iterator named_begin() const { - return pcre_named_capture::iterator(this->p_named_entries, - this->p_name_len); + return {this->p_named_entries, static_cast(this->p_name_len)}; }; pcre_named_capture::iterator named_end() const { char *ptr = (char *)this->p_named_entries; ptr += this->p_named_count * this->p_name_len; - return pcre_named_capture::iterator((pcre_named_capture *)ptr, - this->p_name_len); + return {(pcre_named_capture *)ptr, + static_cast(this->p_name_len)}; }; const std::vector &captures() const { @@ -565,6 +568,7 @@ public: return length; }; +// #undef PCRE_STUDY_JIT_COMPILE #ifdef PCRE_STUDY_JIT_COMPILE static pcre_jit_stack *jit_stack(void); diff --git a/src/simdutf8check.h b/src/simdutf8check.h new file mode 100644 index 00000000..3e24f4d9 --- /dev/null +++ b/src/simdutf8check.h @@ -0,0 +1,237 @@ +/** + * https://github.com/lemire/fastvalidate-utf-8 + */ + +#ifndef SIMDUTF8CHECK_H +#define SIMDUTF8CHECK_H +#include +#include +#include +#include + +#include "lnav_log.hh" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * legal utf-8 byte sequence + * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94 + * + * Code Points 1st 2s 3s 4s + * U+0000..U+007F 00..7F + * U+0080..U+07FF C2..DF 80..BF + * U+0800..U+0FFF E0 A0..BF 80..BF + * U+1000..U+CFFF E1..EC 80..BF 80..BF + * U+D000..U+D7FF ED 80..9F 80..BF + * U+E000..U+FFFF EE..EF 80..BF 80..BF + * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF + * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF + * U+100000..U+10FFFF F4 80..8F 80..BF 80..BF + * + */ + +// all byte values must be no larger than 0xF4 +static inline void checkSmallerThan0xF4(__m128i current_bytes, + __m128i *has_error) +{ + // unsigned, saturates to 0 below max + *has_error = _mm_or_si128(*has_error, + _mm_subs_epu8(current_bytes, + _mm_set1_epi8(0xF4))); +} + +static inline __m128i continuationLengths(__m128i high_nibbles) +{ + return _mm_shuffle_epi8( + _mm_setr_epi8(1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII) + 0, 0, 0, 0, // 10xx (continuation) + 2, 2, // 110x + 3, // 1110 + 4), // 1111, next should be 0 (not checked here) + high_nibbles); +} + +static inline __m128i carryContinuations(__m128i initial_lengths, + __m128i previous_carries) +{ + + __m128i right1 = _mm_subs_epu8( + _mm_alignr_epi8(initial_lengths, previous_carries, 16 - 1), + _mm_set1_epi8(1)); + __m128i sum = _mm_add_epi8(initial_lengths, right1); + + __m128i right2 = _mm_subs_epu8( + _mm_alignr_epi8(sum, previous_carries, 16 - 2), + _mm_set1_epi8(2)); + return _mm_add_epi8(sum, right2); +} + +static inline void checkContinuations(__m128i initial_lengths, + __m128i carries, + __m128i *has_error) +{ + + // overlap || underlap + // carry > length && length > 0 || !(carry > length) && !(length > 0) + // (carries > length) == (lengths > 0) + __m128i overunder = _mm_cmpeq_epi8( + _mm_cmpgt_epi8(carries, initial_lengths), + _mm_cmpgt_epi8(initial_lengths, _mm_setzero_si128())); + + *has_error = _mm_or_si128(*has_error, overunder); +} + +// when 0xED is found, next byte must be no larger than 0x9F +// when 0xF4 is found, next byte must be no larger than 0x8F +// next byte must be continuation, ie sign bit is set, so signed < is ok +static inline void checkFirstContinuationMax(__m128i current_bytes, + __m128i off1_current_bytes, + __m128i *has_error) +{ + __m128i maskED = _mm_cmpeq_epi8(off1_current_bytes, _mm_set1_epi8(0xED)); + __m128i maskF4 = _mm_cmpeq_epi8(off1_current_bytes, _mm_set1_epi8(0xF4)); + + __m128i badfollowED = _mm_and_si128( + _mm_cmpgt_epi8(current_bytes, _mm_set1_epi8(0x9F)), + maskED); + __m128i badfollowF4 = _mm_and_si128( + _mm_cmpgt_epi8(current_bytes, _mm_set1_epi8(0x8F)), + maskF4); + + *has_error = _mm_or_si128(*has_error, + _mm_or_si128(badfollowED, badfollowF4)); +} + +// map off1_hibits => error condition +// hibits off1 cur +// C => < C2 && true +// E => < E1 && < A0 +// F => < F1 && < 90 +// else false && false +static inline void checkOverlong(__m128i current_bytes, + __m128i off1_current_bytes, + __m128i hibits, + __m128i previous_hibits, + __m128i *has_error) +{ + __m128i off1_hibits = _mm_alignr_epi8(hibits, previous_hibits, 16 - 1); + __m128i initial_mins = _mm_shuffle_epi8( + _mm_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, // 10xx => false + 0xC2, -128, // 110x + 0xE1, // 1110 + 0xF1), + off1_hibits); + + __m128i initial_under = _mm_cmpgt_epi8(initial_mins, off1_current_bytes); + + __m128i second_mins = _mm_shuffle_epi8( + _mm_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, // 10xx => false + 127, 127, // 110x => true + 0xA0, // 1110 + 0x90), + off1_hibits); + __m128i second_under = _mm_cmpgt_epi8(second_mins, current_bytes); + *has_error = _mm_or_si128(*has_error, + _mm_and_si128(initial_under, second_under)); +} + +struct processed_utf_bytes { + __m128i rawbytes; + __m128i high_nibbles; + __m128i carried_continuations; +}; + +static inline void count_nibbles(__m128i bytes, + struct processed_utf_bytes *answer) +{ + answer->rawbytes = bytes; + answer->high_nibbles = _mm_and_si128(_mm_srli_epi16(bytes, 4), + _mm_set1_epi8(0x0F)); +} + +// check whether the current bytes are valid UTF-8 +// at the end of the function, previous gets updated +static struct processed_utf_bytes +checkUTF8Bytes(__m128i current_bytes, struct processed_utf_bytes *previous, + __m128i *has_error) +{ + struct processed_utf_bytes pb; + count_nibbles(current_bytes, &pb); + + checkSmallerThan0xF4(current_bytes, has_error); + + __m128i initial_lengths = continuationLengths(pb.high_nibbles); + + pb.carried_continuations = carryContinuations( + initial_lengths, + previous->carried_continuations); + + checkContinuations(initial_lengths, pb.carried_continuations, has_error); + + __m128i off1_current_bytes = + _mm_alignr_epi8(pb.rawbytes, previous->rawbytes, 16 - 1); + checkFirstContinuationMax(current_bytes, off1_current_bytes, + has_error); + + checkOverlong(current_bytes, off1_current_bytes, + pb.high_nibbles, previous->high_nibbles, has_error); + return pb; +} + +static bool validate_utf8_fast(const char *src, size_t len, ssize_t *len_out) +{ + size_t i = 0, orig_len = len; + __m128i has_error = _mm_setzero_si128(); + __m128i lfchars = _mm_set1_epi8('\n'); + __m128i lfresult = _mm_setzero_si128(); + struct processed_utf_bytes previous = {.rawbytes = _mm_setzero_si128(), + .high_nibbles = _mm_setzero_si128(), + .carried_continuations = _mm_setzero_si128()}; + if (len >= 16) { + for (; i <= len - 16; i += 16) { + __m128i current_bytes = _mm_loadu_si128( + (const __m128i *) (src + i)); + previous = checkUTF8Bytes(current_bytes, &previous, &has_error); + lfresult = _mm_cmpeq_epi8(current_bytes, lfchars); + if (_mm_movemask_epi8(lfresult)) { + for (; src[i] != '\n'; i++) { + } + len = i; + break; + } + } + } + + //last part + if (i < len) { + char buffer[16]; + memset(buffer, 0, 16); + memcpy(buffer, src + i, len - i); + __m128i current_bytes = _mm_loadu_si128((const __m128i *) (buffer)); + previous = checkUTF8Bytes(current_bytes, &previous, &has_error); + for (; i < len && src[i] != '\n'; i++) { + } + } else { + has_error = _mm_or_si128(_mm_cmpgt_epi8(previous.carried_continuations, + _mm_setr_epi8(9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, + 9, 9, 9, 1)), + has_error); + } + + if (i < orig_len && src[i] == '\n') { + *len_out = i; + } + + return _mm_testz_si128(has_error, has_error); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/test/Makefile.am b/test/Makefile.am index 8a629b5c..49710e60 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -335,6 +335,7 @@ dist_noinst_DATA = \ textfile_json_one_line.0 \ textfile_quoted_json.0 \ toplevel.lnav \ + UTF-8-test.txt \ view_colors_output.0 \ vt52_curses_input.0 \ vt52_curses_input.1 \ diff --git a/test/UTF-8-test.txt b/test/UTF-8-test.txt new file mode 100644 index 0000000000000000000000000000000000000000..a5b5d50e6b61eb9a3b751b3954f83e61bb59db9b GIT binary patch literal 22781 zcmdU1X_Fh*b>CYjiIrHEbIyIeVd1bb#8j*y4MbNJE8ngr{T%> zH=?M0*NGF|Zns=*maBHFY*)*j-4j3B+Sy$_dEy6TNmiFvPA)BPEUj+fmviUj>}zDb zylhzyHeB;;sk==Fw0Y8Snr+$lJK|ijTdwCUO2hB+4}n}98At=QQVnO2tbk<@(9!SA*KTMb-()7zJ}9Yk)m)3ovMPE_?< z4&m`=AYfdi2gL9x(zW;T2&>e#!>~kZOLg0AmhhcCgBHcvb3FXf@9Z>qKS;P_9xq{M zX9r^v+X4}FC$KAEXd?8A)3Eozr9kXRLJ%VDrme$0ABAGOEs(JYzJ!rugIk~^6$2>n zcEzq>1dFOMR23y23`tPI^(hkaJx~+w1GHs>5#U|33BkDdJ8tNBov>p!@zi!F0^_$X zwVlAF6hNWW!L)ctp%pbicFV`;>TVE5OOcBa*d6d8P>GKyMu;y#v2jP!v2ly zk#^vNEf_7C=(s`3piW3-P;OK9WvZqTur#5BA;#K-8dm*1m=Jj3)$CxO%20DT9xyLe zwsDmL*Fxz!%_e`wy4l5ZQiI8()+YdS1n&OdNLR+dk#pITbEYjt^fc~L?miBLAnr&X@2<+Unk z1xxnas$;=~9Ct4@5j@+*5Y%*_V9+xuWkXKLj~OeW4FN%?jpW=YyARqX)rK9|RoI{z z6*L^m6}lKPRobrk?FftsJC)FhN~FyyrMJYKuwgkgxUNh6oN`}GFDwzt;8txQTCib0 zpaX3vNd^8-vO$_rm*s*-zPDPw2N~g;P^31r#;!I1WQ{2rZPN#c8k3rQ#uD?@lrel9 zc&guQIdCZHh-Bg7@$K#H&0~wwZMb+&)Ax7DrV9Ahba&}X@N9^jV}PH6M+Px?o*Hkbs)2c49sb4l;8pIYPlARu zoGPT(Z8+q>K>~n^Evy`)jv=&wc~r>Af(RF7VRiKHP%*^JY6M2Y=RqsH9fW|J=p5L9 znCI@cwPR{0b+~K4%VEB*<45q|vDS%Qr|sIF1&&Z+V?K-_EmasQ z8kByYLD(%L0K%By=V1!rzE^DozUTgkfsVl1(8MKiTZq~&WQ?&SPutJLna1mUxDM_8 zP)ZNpy(L@sZI)m$I*2Ai09`mdo-eM(DM*@X6gpHP8)DPgmBh+wV22I*AB0J!9+fy; z8{>>!u-$}rV^pxJHiRsQ=#+ES2`J?=&=eCklnU@e6AC)5L_(+U%sfB{lLwa<+D)c0 z@KC(hsyj~Ym{ISVi82sR3hY2obbeCmm0&TKs!kix5mfO0s0&XAKVcanR?92p6&3Aj z8SfK619~GH1bV)=)P}u8YO7}+ICuU$#}Wm8Gqf%qN3>YhoB8(iHYkdjXatJ~CgW3j zY>f4iQ(Nk~HBd)WUqlpu?vm2{m{U#~UP-4-nIT8$v^)WO76SEQ@N+P=V`Y;of&qRv z)cla%oCy!KxVqci&#Y-mCc=zBS!*Ss((AM;P-3Qwi3bb$e1=vyB2^fTxfonxrVgS2 zoKKuoB7(+ABNaY2vzm!32iYF<<33-#7gY~A0sMvCghhF zq-e|{8C-$Ugm&8k%c>);(`u^J2$kppjT%GKyf1X4j@nzOxnwR`JcchcJNi(NkUAXU zsBwD@$v}mutT zh$~}5e&&(OGeVveIhlv#Txx{K7vFe!R?Jf(r*fF<3Yq8RORop!41|tK>vn&mbRg_| zm`Ke*yX7d<8wxo^y-#oF)cePZYNK$x)71O)b{_BPK~X?uV0lu#G3m<#4hJ(Lj;r?y z0rQC89Lx$irrv9cn1}opAZH|VRK3AOy>gQIHZf-ayJ7t0~3rOy}M?jxyqUWGiivnIv=ZUUoW)yJ@%RT>A zZ|6_n7AGs61L#Y#T@(gG2L*alV=(oPCW|Yqyd_s}%50DMoQs02-}TDL_lo|U|9IgU zC=ru;a?(B`3|YRkv3!@5islZ`Ka<&Mu%8qz@G5oS74LhKL_mwUeB`g-0 z885ud+RgBaa?Su6Jh+(ko8rwxp}|7{J$ZAyStvAkr{x|eJ@}@Vx<*jdf`#awGu6MuZz3+SfqlZB<_ZYm#KJdX0efT3E z{n*Dp@ySnp`tc{O+%Dc{KKqNm^vj?7mHw}O{tLhM#V4=MWD%p!p1(ZSbgF~$IZ9|@ zg!S8ed2saM1%5jc`snXjU|w4_tD$k&MOa#0(%M{7dBDVc`BIg7T&ttBKjlj4(;A;C z%OM8p(pcEW0&zC_v`#PuCjC>2m;UAU-Y8z_ntVyVEMJkY%CF0B$ZyJT$#2W=$nVPU z$?wZk^0gTrVd(n<`9t|5`D6JL`BV8b`E&UT`AhjL`D^(b`CIurd0L7ms`DQ&^p6@y z`L3KAm+#sX`8t%Z^NN=5b@{sdz5Ijxqx_Tnv;2$vtNfe%yZndzr`(Wl$ngJN9>_N+ zkBxD0tWOa~o#LpUo^Ebg{P?c6w`b&8c}|{}7v#U>zvY`V@PFJCkH$WbuTdWB&64GgT zysDZJJZ*k%+6;6}KSAQkmiF_oRc4|oB7=iG(#2q13uZ9^c@>u(QBsJ|L8XgMXgXOq zm#rG3WWd$cxU&sS8R<2D{;i>eh5TVo7);F63rof4=zI7naP@a<-sQ;!47FNLX26 z$t>m86H2;}Fo~I(<6PcQ(on~9X!b$vkUblwOT$G!=a?r8i#%}tK7u}$E(?52e5$%z z*gDHJG1+X$Lc>MgN8*O8*Ce66mo|5>-H8DsEtVFV6DByx&-srR$fax7FT;7z+SNRt z9uA?!e{cTuJqZ2D`v2HZW++5+qqcUo+YTB| z5%$vF(4+}4($33K3g=HB_{TU{ceuFp^0>{9KK<6e(WXyw-d)#qrSHrK4`C4W@n1a# ze);9^9145>{DTL(fKE5hGUG_Lmbq~a<|`Q&!6_2p=}QBWMm2ea0_}P(FGrd?Dc08U2^rTakV#X z(Xa@!XT)ZkA}VQUq(i@Jt;5YC4X0I>Gg1m|sD$YQ%kbjn(^BG&1Lw8KC-o-N)2xl) z*{Cy)XwN6?Gk^8aUTcTgmT;Fg4`YW4w{vK-y7n%!r@hIhc*HIwuXW+JhA~|rCz7kD zn_I9`2oX=0$j~ z30Q8Z3fn^)s82;V(zUh!)zHLRu!j}ZoYp3d(oJZC#sC`E$OSyN?QmdVhS@=Y=}{-( zrZe5oLhIafo$g}7J)};MR=h6_ACB8crtjR2W%^xdvJl5O|J8_AObq4NWVKM1)X&S53})42gHvR~Io zCU5;QAF6wAcnkNvJ|GL31u)a|b4=={0}XCp4!onNK0W z_(((4S{kxYI#EJ5wb`r%y)(juZ6z1&m-~8h>Gr6A7gfNC(|{@KZm0;o+q!9y_RIk$ z7vs^vE!Ql8fE`MnZe=scf6dnoFWAP%kJAe|{{-W{H~n<>H}jV-^bZ=y^2RC~gR0n} zkc;=A5NYAHD~0p8o<+db4A9Y@pgbl3uV0=OaVhN4#N5@hu%i?v24T zJ(F?mj%30|pjAJ?#2fmEhk#j-o4F^<;Vp>-Cod#(nAA=*Rwo7g`!3#-%}x${>ipI5 z!O2`BF>Lq(lPBm7t^@2UBe@rEL(U*LH;rLfodHTRS5xE6G8Hmm;>xXhvJ0b73K;#4*}HwR>dIZnepaCUAE6hTQO%}vGzUetHYUmh$4KVTMi zNfR2UFij8q9n>fC1Idl)ptnatpK)KC*w6+8W0gf`1F*Flj}by43 zC48);eEiH|X~d!}__&>n8g^^E;Ek(?MZ~f#5tC7yM2Vv=mLH8tX5`n~{)LiWmHTZs zfIPlSt;OW-$JJWE$W++IXh3%+^~WyXxV7MQ%qHF^n5l3w%v3jld22x#%-CXJgClgR zDUQ(j_@FSIvCSCFjLsA@r4yJ%=`>bD%O0UqReglcCyMja*iDR?(V1eVbON&|oyKzL zFGlE8s1Z7^6z3;;9sm|&W^|^QDV@M9Lg#Ip;0OTqXie_JvV5|DfU-W8I1Zch%s+aK zW;-RE7Lt;R(lwWfN<5k-*n@yrRL~=$aGDH@-g?Z^*m(16PpHNsaT!^{^P4=S4s$u4 zn4_g-PG6ET$^-GbU5eSWO3BPePsM`!RsujH>i4x$Ilc89&MIe9-DdQnA3T!WrWLlMIw zp{lN#KWL=_YYXWDsV)UqntpW`df>;3Z59c&tui(!b9f~fie^igAJ#+iN8yc(*PM%o zgQ+lQ#JPjq$ur$Tjq@=p!HGQLh0Y9+wa(;}L%?JP$Ax_^nZjg87@pla_1*mCaesQs zBt{CNc)%f4raCGP@*pJw5#o&C&kib4X2*-!d*E^m64IZa^19wsZbc|NX=Mqr?7V z!~O?`{SOZN9~$;QJnVmD*#GFT|FL2J|ZGz&1f`h z_Eg0#9_P<(SztEeVN5IK^2&0qE&E{3&!4{dd8KC_9bCn0@XQtbyn>$t{6x|G*@p*L z@ftjf&+!^Oi_h^IJd4lq8a#*3@fsXVxA`=q{yCuGHFyqacnzKd8eW6vfri)Md3=u7 p;CXzG*Wh`4j@JOEDqJmkvO@N>BDCH5J3T8tzwg#xE%c3^^?x0SNE-kE literal 0 HcmV?d00001 diff --git a/test/drive_line_buffer.cc b/test/drive_line_buffer.cc index 4e59fc91..47e36005 100644 --- a/test/drive_line_buffer.cc +++ b/test/drive_line_buffer.cc @@ -52,136 +52,139 @@ using namespace std; int main(int argc, char *argv[]) { - int c, rnd_iters = 5, retval = EXIT_SUCCESS; - vector > index; - auto_fd fd = STDIN_FILENO; + int c, rnd_iters = 5, retval = EXIT_SUCCESS; + vector > index; + auto_fd fd = STDIN_FILENO; int offseti = 0; off_t offset = 0; - struct stat st; - - while ((c = getopt(argc, argv, "o:i:n:")) != -1) { - switch (c) { - case 'o': - if (sscanf(optarg, "%d", &offseti) != 1) { - fprintf(stderr, - "error: offset is not an integer -- %s\n", - optarg); - retval = EXIT_FAILURE; - } else { - offset = offseti; + int count = 1000; + struct stat st; + + while ((c = getopt(argc, argv, "o:i:n:c:")) != -1) { + switch (c) { + case 'o': + if (sscanf(optarg, "%d", &offseti) != 1) { + fprintf(stderr, + "error: offset is not an integer -- %s\n", + optarg); + retval = EXIT_FAILURE; + } else { + offset = offseti; + } + break; + case 'n': + if (sscanf(optarg, "%d", &rnd_iters) != 1) { + fprintf(stderr, + "error: offset is not an integer -- %s\n", + optarg); + retval = EXIT_FAILURE; + } + break; + case 'c': + if (sscanf(optarg, "%d", &count) != 1) { + fprintf(stderr, + "error: count is not an integer -- %s\n", + optarg); + retval = EXIT_FAILURE; + } + break; + case 'i': { + FILE *file; + + if ((file = fopen(optarg, "r")) == NULL) { + perror("open"); + retval = EXIT_FAILURE; + } else { + int line_number = 1, line_offset; + + while (fscanf(file, "%d", &line_offset) == 1) { + index.push_back( + make_pair(line_number, line_offset)); + line_number += 1; + } + fclose(file); + file = NULL; + } + } + break; + default: + retval = EXIT_FAILURE; + break; } - break; - case 'n': - if (sscanf(optarg, "%d", &rnd_iters) != 1) { - fprintf(stderr, - "error: offset is not an integer -- %s\n", - optarg); + } + + argc -= optind; + argv += optind; + + if (retval != EXIT_SUCCESS) { + } else if ((argc == 0) && (index.size() > 0)) { + fprintf(stderr, "error: cannot randomize stdin\n"); retval = EXIT_FAILURE; - } - break; - case 'i': - { - FILE *file; - - if ((file = fopen(optarg, "r")) == NULL) { - perror("open"); - retval = EXIT_FAILURE; - } - else { - int line_number = 1, line_offset; - - while (fscanf(file, "%d", &line_offset) == 1) { - index.push_back( - make_pair(line_number, line_offset)); - line_number += 1; - } - fclose(file); - file = NULL; + } else if ((argc > 0) && (fd = open(argv[0], O_RDONLY)) == -1) { + perror("open"); + retval = EXIT_FAILURE; + } else if ((argc > 0) && (fstat(fd, &st) == -1)) { + perror("fstat"); + retval = EXIT_FAILURE; + } else { + try { + off_t last_offset = offset; + line_buffer lb; + line_value lv; + char *maddr; + + lb.set_fd(fd); + if (index.size() == 0) { + shared_buffer_ref sbr; + + while (count && lb.read_line(offset, sbr, &lv)) { + printf("%.*s", (int) sbr.length(), sbr.get_data()); + if ((off_t) (last_offset + lv.lv_len) < offset) + printf("\n"); + last_offset = offset; + count -= 1; + } + } else if ((maddr = (char *) mmap(NULL, + st.st_size, + PROT_READ, + MAP_FILE | MAP_PRIVATE, + lb.get_fd(), + 0)) == MAP_FAILED) { + perror("mmap"); + retval = EXIT_FAILURE; + } else { + off_t seq_offset = 0; + + while (lb.read_line(seq_offset, lv)) {} + do { + bool ret; + size_t lpc; + + random_shuffle(index.begin(), index.end()); + for (lpc = 0; lpc < index.size(); lpc++) { + + offset = index[lpc].second; + ret = lb.read_line(offset, lv); + + assert(ret); + assert(offset >= 0); + assert(offset <= st.st_size); + assert(memcmp(lv.lv_start, + &maddr[index[lpc].second], + lv.lv_len) == 0); + } + + rnd_iters -= 1; + } while (rnd_iters); + + printf("All done\n"); + } } - } - break; - default: - retval = EXIT_FAILURE; - break; - } - } - - argc -= optind; - argv += optind; - - if (retval != EXIT_SUCCESS) { - } - else if ((argc == 0) && (index.size() > 0)) { - fprintf(stderr, "error: cannot randomize stdin\n"); - retval = EXIT_FAILURE; - } - else if ((argc > 0) && (fd = open(argv[0], O_RDONLY)) == -1) { - perror("open"); - retval = EXIT_FAILURE; - } - else if ((argc > 0) && (fstat(fd, &st) == -1)) { - perror("fstat"); - retval = EXIT_FAILURE; - } - else { - try { - off_t last_offset = offset; - line_buffer lb; - line_value lv; - char *maddr; - - lb.set_fd(fd); - if (index.size() == 0) { - while (lb.read_line(offset, lv)) { - lv.terminate(); - printf("%s", lv.lv_start); - if ((off_t)(last_offset + lv.lv_len) < offset) - printf("\n"); - last_offset = offset; + catch (line_buffer::error &e) { + fprintf(stderr, "error: %s\n", strerror(e.e_err)); + retval = EXIT_FAILURE; } - } - else if ((maddr = (char *)mmap(NULL, - st.st_size, - PROT_READ, - MAP_FILE | MAP_PRIVATE, - lb.get_fd(), - 0)) == MAP_FAILED) { - perror("mmap"); - retval = EXIT_FAILURE; - } - else { - off_t seq_offset = 0; - - while (lb.read_line(seq_offset, lv)) { } - do { - bool ret; - size_t lpc; - - random_shuffle(index.begin(), index.end()); - for (lpc = 0; lpc < index.size(); lpc++) { - - offset = index[lpc].second; - ret = lb.read_line(offset, lv); - - assert(ret); - assert(offset >= 0); - assert(offset <= st.st_size); - assert(memcmp(lv.lv_start, - &maddr[index[lpc].second], - lv.lv_len) == 0); - } - - rnd_iters -= 1; - } while (rnd_iters); - - printf("All done\n"); - } } - catch (line_buffer::error &e) { - fprintf(stderr, "error: %s\n", strerror(e.e_err)); - retval = EXIT_FAILURE; - } - } - - return retval; + + return retval; } diff --git a/test/drive_logfile.cc b/test/drive_logfile.cc index ec6ece3f..83a2ae66 100644 --- a/test/drive_logfile.cc +++ b/test/drive_logfile.cc @@ -161,7 +161,9 @@ int main(int argc, char *argv[]) { break; case MODE_LEVELS: for (logfile::iterator iter = lf.begin(); iter != lf.end(); ++iter) { - printf("0x%02x\n", iter->get_level_and_flags()); + log_level_t level = iter->get_level_and_flags(); + printf("%s 0x%x\n", level_names[level & ~LEVEL__FLAGS], + level & LEVEL__FLAGS); } break; } diff --git a/test/test_line_buffer.sh b/test/test_line_buffer.sh index 014dac99..5d4982ec 100644 --- a/test/test_line_buffer.sh +++ b/test/test_line_buffer.sh @@ -42,6 +42,12 @@ check_output "Seeking in the line buffer doesn't work?" < lb-2.dat grep -b '$' lb-2.dat | cut -f 1 -d : > lb.index line_count=`wc -l lb-2.dat` diff --git a/test/test_logfile.sh b/test/test_logfile.sh index b4825138..4f6adc93 100644 --- a/test/test_logfile.sh +++ b/test/test_logfile.sh @@ -222,41 +222,41 @@ EOF run_test ./drive_logfile -v -f syslog_log ${srcdir}/logfile_syslog.0 check_output "Syslog level interpreted incorrectly?" <