From 2589345e5cce522eb9491a09227b315104a650e2 Mon Sep 17 00:00:00 2001
From: Timothy Stack <timothyshanestack@gmail.com>
Date: Wed, 17 Oct 2018 07:03:33 -0700
Subject: [PATCH] [perf] improve initial indexing times

---
 configure.ac              |  19 +-
 src/CMakeLists.txt        |   3 +
 src/Makefile.am           |  12 +-
 src/hist_source.hh        |   4 +-
 src/is_utf8.cc            | 298 +++++++++++++++++++
 src/is_utf8.hh            |  36 +++
 src/line_buffer.cc        |  47 ++-
 src/line_buffer.hh        |   1 +
 src/lnav.cc               |  24 +-
 src/log_format.cc         |   4 +-
 src/log_format.hh         |  13 +-
 src/log_format_impls.cc   |   2 +-
 src/log_level.cc          |  30 --
 src/log_level_re.cc       | 590 ++++++++++++++++++++++++++++++++++++++
 src/log_level_re.re       | 105 +++++++
 src/logfile.cc            |  20 +-
 src/logfile.hh            |  23 +-
 src/logfile_sub_source.cc |   8 +-
 src/pcrepp.cc             |   2 +-
 src/pcrepp.hh             |  14 +-
 src/simdutf8check.h       | 237 +++++++++++++++
 test/Makefile.am          |   1 +
 test/UTF-8-test.txt       | Bin 0 -> 22781 bytes
 test/drive_line_buffer.cc | 253 ++++++++--------
 test/drive_logfile.cc     |   4 +-
 test/test_line_buffer.sh  |   6 +
 test/test_logfile.sh      |  66 ++---
 27 files changed, 1558 insertions(+), 264 deletions(-)
 create mode 100644 src/is_utf8.cc
 create mode 100644 src/is_utf8.hh
 create mode 100644 src/log_level_re.cc
 create mode 100644 src/log_level_re.re
 create mode 100644 src/simdutf8check.h
 create mode 100644 test/UTF-8-test.txt

diff --git a/configure.ac b/configure.ac
index 6a488763..c13a1a0b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -56,23 +56,6 @@ AS_VAR_IF([enable_profiling], [yes],
 
 AC_ARG_VAR(SFTP_TEST_URL)
 
-AC_ARG_ENABLE([profiling],
-              AS_HELP_STRING([--enable-profiling],
-                             [Compile with gprof(1) profiling support]))
-
-AC_MSG_CHECKING(gprof(4) profiling support)
-
-AS_VAR_IF([enable_profiling], [yes],
-      [CFLAGS="$CFLAGS -pg -gstabs"
-       CXXFLAGS="$CXXFLAGS -pg -gstabs"
-       LDFLAGS="$LDFLAGS -pg"],
-      [enable_profiling=no]dnl
-)
-
-AC_MSG_RESULT($enable_profiling)
-
-AC_SUBST(CFLAGS_PG)
-
 AC_PROG_INSTALL
 AC_PROG_RANLIB
 AM_PROG_AR
@@ -134,7 +117,7 @@ AS_CASE(["$host_os"],
     )
 )
 
-AC_CHECK_HEADERS(execinfo.h pty.h util.h zlib.h bzlib.h libutil.h sys/ttydefaults.h)
+AC_CHECK_HEADERS(execinfo.h pty.h util.h zlib.h bzlib.h libutil.h sys/ttydefaults.h x86intrin.h)
 
 LNAV_WITH_JEMALLOC
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a64aa978..23d96e57 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -19,6 +19,7 @@ set(diag_STAT_SRCS
     hist_source.cc
     hotkeys.cc
     intern_string.cc
+    is_utf8.cc
     json-extension-functions.cc
     json_op.cc
     json_ptr.cc
@@ -116,6 +117,7 @@ set(diag_STAT_SRCS
     hotkeys.hh
     init-sql.hh
     intern_string.hh
+    is_utf8.hh
     k_merge_tree.h
     log_data_helper.hh
     log_data_table.hh
@@ -136,6 +138,7 @@ set(diag_STAT_SRCS
     relative_time.hh
     sequence_sink.hh
     shlex.hh
+    simdutf8check.h
     spectro_source.hh
     strong_int.hh
     sysclip.hh
diff --git a/src/Makefile.am b/src/Makefile.am
index 129e8be1..09c53bd3 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -96,7 +96,7 @@ time_fmts.cc: ptimec
 
 if HAVE_RE2C
 %.cc: %.re
-	$(RE2C_V)$(RE2C_CMD) -8 -o $@ $<
+	$(RE2C_V)$(RE2C_CMD) --tags -8 -o $@ $<
 	$(REC2_V)test $@ -ef $(srcdir)/$*.cc || cp $@ $(srcdir)/$*.cc
 endif
 
@@ -173,6 +173,7 @@ noinst_HEADERS = \
 	init.sql \
 	init-sql.hh \
 	intern_string.hh \
+	is_utf8.hh \
 	json_op.hh \
 	json_ptr.hh \
 	k_merge_tree.h \
@@ -189,6 +190,7 @@ noinst_HEADERS = \
 	log_format.hh \
 	log_format_loader.hh \
 	log_level.hh \
+	log_level_re.re \
 	log_search_table.hh \
 	logfile.hh \
 	logfile_sub_source.hh \
@@ -216,6 +218,7 @@ noinst_HEADERS = \
 	session_data.hh \
 	shared_buffer.hh \
 	shlex.hh \
+	simdutf8check.h \
 	spectro_source.hh \
 	sql_util.hh \
 	sqlite-extension-func.hh \
@@ -284,6 +287,7 @@ libdiag_a_SOURCES = \
 	hist_source.cc \
 	hotkeys.cc \
 	intern_string.cc \
+	is_utf8.cc \
 	json-extension-functions.cc \
 	json_op.cc \
 	json_ptr.cc \
@@ -297,6 +301,7 @@ libdiag_a_SOURCES = \
 	log_format.cc \
 	log_format_loader.cc \
 	log_level.cc \
+	log_level_re.cc \
 	logfile.cc \
 	logfile_sub_source.cc \
 	network-extension-functions.cc \
@@ -389,11 +394,12 @@ ptimec_LDADD =
 
 DISTCLEANFILES = \
 	data_scanner_re.cc \
+	default-config-json.c \
+	default-log-formats-json.c \
 	dump-pid-sh.c \
 	help.c \
 	init-sql.c \
-	default-log-formats-json.c \
-	default-config-json.c \
+	log_level_re.cc \
     time_fmts.cc \
     xterm-palette.c
 
diff --git a/src/hist_source.hh b/src/hist_source.hh
index 80b3ae35..cefeb252 100644
--- a/src/hist_source.hh
+++ b/src/hist_source.hh
@@ -433,7 +433,7 @@ private:
         hist_value b_values[HT__MAX];
     };
 
-    static const unsigned int BLOCK_SIZE = 100;
+    static const int64_t BLOCK_SIZE = 100;
 
     struct bucket_block {
         bucket_block() : bb_used(0) {
@@ -445,7 +445,7 @@ private:
     };
 
     bucket_t &find_bucket(int64_t index) {
-        struct bucket_block &bb = this->hs_blocks[index / this->BLOCK_SIZE];
+        struct bucket_block &bb = this->hs_blocks[index / BLOCK_SIZE];
         unsigned int intra_block_index = index % BLOCK_SIZE;
         bb.bb_used = std::max(intra_block_index, bb.bb_used);
         this->hs_line_count = std::max(this->hs_line_count, index + 1);
diff --git a/src/is_utf8.cc b/src/is_utf8.cc
new file mode 100644
index 00000000..6cc5a5df
--- /dev/null
+++ b/src/is_utf8.cc
@@ -0,0 +1,298 @@
+/*
+ * is_utf8 is distributed under the following terms:
+ *
+ * Copyright (c) 2013 Palard Julien. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "is_utf8.hh"
+
+/*
+  Check if the given unsigned char * is a valid utf-8 sequence.
+
+  Return value :
+  If the string is valid utf-8, 0 is returned.
+  Else the position, starting from 1, is returned.
+
+  Source:
+   http://www.unicode.org/versions/Unicode7.0.0/UnicodeStandard-7.0.pdf
+   page 124, 3.9 "Unicode Encoding Forms", "UTF-8"
+
+
+  Table 3-7. Well-Formed UTF-8 Byte Sequences
+  -----------------------------------------------------------------------------
+  |  Code Points        | First Byte | Second Byte | Third Byte | Fourth Byte |
+  |  U+0000..U+007F     |     00..7F |             |            |             |
+  |  U+0080..U+07FF     |     C2..DF |      80..BF |            |             |
+  |  U+0800..U+0FFF     |         E0 |      A0..BF |     80..BF |             |
+  |  U+1000..U+CFFF     |     E1..EC |      80..BF |     80..BF |             |
+  |  U+D000..U+D7FF     |         ED |      80..9F |     80..BF |             |
+  |  U+E000..U+FFFF     |     EE..EF |      80..BF |     80..BF |             |
+  |  U+10000..U+3FFFF   |         F0 |      90..BF |     80..BF |      80..BF |
+  |  U+40000..U+FFFFF   |     F1..F3 |      80..BF |     80..BF |      80..BF |
+  |  U+100000..U+10FFFF |         F4 |      80..8F |     80..BF |      80..BF |
+  -----------------------------------------------------------------------------
+
+  Returns the first erroneous byte position, and give in
+  `faulty_bytes` the number of actually existing bytes taking part in this error.
+*/
+ssize_t is_utf8(unsigned char *str, size_t len, const char **message, int *faulty_bytes)
+{
+    size_t i = 0;
+
+    *message = nullptr;
+    *faulty_bytes = 0;
+    while (i < len)
+    {
+        if (str[i] == '\n') {
+            *message = nullptr;
+            return i;
+        }
+
+        if (str[i] <= 0x7F) /* 00..7F */
+        {
+            i += 1;
+        }
+        else if (str[i] >= 0xC2 && str[i] <= 0xDF) /* C2..DF 80..BF */
+        {
+            if (i + 1 < len) /* Expect a 2nd byte */
+            {
+                if (str[i + 1] < 0x80 || str[i + 1] > 0xBF)
+                {
+                    *message = "After a first byte between C2 and DF, expecting a 2nd byte between 80 and BF";
+                    *faulty_bytes = 2;
+                    return i;
+                }
+            }
+            else
+            {
+                *message = "After a first byte between C2 and DF, expecting a 2nd byte.";
+                *faulty_bytes = 1;
+                return i;
+            }
+            i += 2;
+        }
+        else if (str[i] == 0xE0) /* E0 A0..BF 80..BF */
+        {
+            if (i + 2 < len) /* Expect a 2nd and 3rd byte */
+            {
+                if (str[i + 1] < 0xA0 || str[i + 1] > 0xBF)
+                {
+                    *message = "After a first byte of E0, expecting a 2nd byte between A0 and BF.";
+                    *faulty_bytes = 2;
+                    return i;
+                }
+                if (str[i + 2] < 0x80 || str[i + 2] > 0xBF)
+                {
+                    *message = "After a first byte of E0, expecting a 3nd byte between 80 and BF.";
+                    *faulty_bytes = 3;
+                    return i;
+                }
+            }
+            else
+            {
+                *message = "After a first byte of E0, expecting two following bytes.";
+                *faulty_bytes = 1;
+                return i;
+            }
+            i += 3;
+        }
+        else if (str[i] >= 0xE1 && str[i] <= 0xEC) /* E1..EC 80..BF 80..BF */
+        {
+            if (i + 2 < len) /* Expect a 2nd and 3rd byte */
+            {
+                if (str[i + 1] < 0x80 || str[i + 1] > 0xBF)
+                {
+                    *message = "After a first byte between E1 and EC, expecting the 2nd byte between 80 and BF.";
+                    *faulty_bytes = 2;
+                    return i;
+                }
+                if (str[i + 2] < 0x80 || str[i + 2] > 0xBF)
+                {
+                    *message = "After a first byte between E1 and EC, expecting the 3rd byte between 80 and BF.";
+                    *faulty_bytes = 3;
+                    return i;
+                }
+            }
+            else
+            {
+                *message = "After a first byte between E1 and EC, expecting two following bytes.";
+                *faulty_bytes = 1;
+                return i;
+            }
+            i += 3;
+        }
+        else if (str[i] == 0xED) /* ED 80..9F 80..BF */
+        {
+            if (i + 2 < len) /* Expect a 2nd and 3rd byte */
+            {
+                if (str[i + 1] < 0x80 || str[i + 1] > 0x9F)
+                {
+                    *message = "After a first byte of ED, expecting 2nd byte between 80 and 9F.";
+                    *faulty_bytes = 2;
+                    return i;
+                }
+                if (str[i + 2] < 0x80 || str[i + 2] > 0xBF)
+                {
+                    *message = "After a first byte of ED, expecting 3rd byte between 80 and BF.";
+                    *faulty_bytes = 3;
+                    return i;
+                }
+            }
+            else
+            {
+                *message = "After a first byte of ED, expecting two following bytes.";
+                *faulty_bytes = 1;
+                return i;
+            }
+            i += 3;
+        }
+        else if (str[i] >= 0xEE && str[i] <= 0xEF) /* EE..EF 80..BF 80..BF */
+        {
+            if (i + 2 < len) /* Expect a 2nd and 3rd byte */
+            {
+                if (str[i + 1] < 0x80 || str[i + 1] > 0xBF)
+                {
+                    *message = "After a first byte between EE and EF, expecting 2nd byte between 80 and BF.";
+                    *faulty_bytes = 2;
+                    return i;
+                }
+                if (str[i + 2] < 0x80 || str[i + 2] > 0xBF)
+                {
+                    *message = "After a first byte between EE and EF, expecting 3rd byte between 80 and BF.";
+                    *faulty_bytes = 3;
+                    return i;
+                }
+            }
+            else
+            {
+                *message = "After a first byte between EE and EF, two following bytes.";
+                *faulty_bytes = 1;
+                return i;
+            }
+            i += 3;
+        }
+        else if (str[i] == 0xF0) /* F0 90..BF 80..BF 80..BF */
+        {
+            if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */
+            {
+                if (str[i + 1] < 0x90 || str[i + 1] > 0xBF)
+                {
+                    *message = "After a first byte of F0, expecting 2nd byte between 90 and BF.";
+                    *faulty_bytes = 2;
+                    return i;
+                }
+                if (str[i + 2] < 0x80 || str[i + 2] > 0xBF)
+                {
+                    *message = "After a first byte of F0, expecting 3rd byte between 80 and BF.";
+                    *faulty_bytes = 3;
+                    return i;
+                }
+                if (str[i + 3] < 0x80 || str[i + 3] > 0xBF)
+                {
+                    *message = "After a first byte of F0, expecting 4th byte between 80 and BF.";
+                    *faulty_bytes = 4;
+                    return i;
+                }
+            }
+            else
+            {
+                *message = "After a first byte of F0, expecting three following bytes.";
+                *faulty_bytes = 1;
+                return i;
+            }
+            i += 4;
+        }
+        else if (str[i] >= 0xF1 && str[i] <= 0xF3) /* F1..F3 80..BF 80..BF 80..BF */
+        {
+            if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */
+            {
+                if (str[i + 1] < 0x80 || str[i + 1] > 0xBF)
+                {
+                    *message = "After a first byte of F1, F2, or F3, expecting a 2nd byte between 80 and BF.";
+                    *faulty_bytes = 2;
+                    return i;
+                }
+                if (str[i + 2] < 0x80 || str[i + 2] > 0xBF)
+                {
+                    *message = "After a first byte of F1, F2, or F3, expecting a 3rd byte between 80 and BF.";
+                    *faulty_bytes = 3;
+                    return i;
+                }
+                if (str[i + 3] < 0x80 || str[i + 3] > 0xBF)
+                {
+                    *message = "After a first byte of F1, F2, or F3, expecting a 4th byte between 80 and BF.";
+                    *faulty_bytes = 4;
+                    return i;
+                }
+            }
+            else
+            {
+                *message = "After a first byte of F1, F2, or F3, expecting three following bytes.";
+                *faulty_bytes = 1;
+                return i;
+            }
+            i += 4;
+        }
+        else if (str[i] == 0xF4) /* F4 80..8F 80..BF 80..BF */
+        {
+            if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */
+            {
+                if (str[i + 1] < 0x80 || str[i + 1] > 0x8F)
+                {
+                    *message = "After a first byte of F4, expecting 2nd byte between 80 and 8F.";
+                    *faulty_bytes = 2;
+                    return i;
+                }
+                if (str[i + 2] < 0x80 || str[i + 2] > 0xBF)
+                {
+                    *message = "After a first byte of F4, expecting 3rd byte between 80 and BF.";
+                    *faulty_bytes = 3;
+                    return i;
+                }
+                if (str[i + 3] < 0x80 || str[i + 3] > 0xBF)
+                {
+                    *message = "After a first byte of F4, expecting 4th byte between 80 and BF.";
+                    *faulty_bytes = 4;
+                    return i;
+                }
+            }
+            else
+            {
+                *message = "After a first byte of F4, expecting three following bytes.";
+                *faulty_bytes = 1;
+                return i;
+            }
+            i += 4;
+        }
+        else
+        {
+            *message = "Expecting bytes in the following ranges: 00..7F C2..F4.";
+            *faulty_bytes = 1;
+            return i;
+        }
+    }
+    return -1;
+}
diff --git a/src/is_utf8.hh b/src/is_utf8.hh
new file mode 100644
index 00000000..dc0a00da
--- /dev/null
+++ b/src/is_utf8.hh
@@ -0,0 +1,36 @@
+/*
+ * is_utf8 is distributed under the following terms:
+ *
+ * Copyright (c) 2013 Palard Julien. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _IS_UTF8_H
+#define _IS_UTF8_H
+
+#include <sys/types.h>
+#include <stdlib.h>
+
+ssize_t is_utf8(unsigned char *str, size_t len, const char **message, int *faulty_bytes);
+
+#endif /* _IS_UTF8_H */
diff --git a/src/line_buffer.cc b/src/line_buffer.cc
index 66a44d03..a22ae756 100644
--- a/src/line_buffer.cc
+++ b/src/line_buffer.cc
@@ -43,6 +43,11 @@
 
 #include <set>
 
+#ifdef HAVE_X86INTRIN_H
+#include "simdutf8check.h"
+#endif
+
+#include "is_utf8.hh"
 #include "lnav_util.hh"
 #include "line_buffer.hh"
 
@@ -497,6 +502,7 @@ bool line_buffer::read_line(off_t &offset, line_value &lv, bool include_delim)
 
     lv.lv_len = 0;
     lv.lv_partial = false;
+    lv.lv_valid_utf = true;
     while (!retval) {
         char *line_start, *lf;
 
@@ -505,7 +511,30 @@ bool line_buffer::read_line(off_t &offset, line_value &lv, bool include_delim)
         /* Find the data in the cache and */
         line_start = this->get_range(offset, lv.lv_len);
         /* ... look for the end-of-line or end-of-file. */
-        if (((lf = (char *)memchr(line_start, '\n', lv.lv_len)) != NULL) ||
+        ssize_t utf8_end = -1;
+
+#ifdef HAVE_X86INTRIN_H
+        if (!validate_utf8_fast(line_start, lv.lv_len, &utf8_end)) {
+            lv.lv_valid_utf = false;
+        }
+#else
+        {
+            const char *msg;
+            int faulty_bytes;
+
+            utf8_end = is_utf8(line_start, lv.lv_len, &msg, &faulty_bytes);
+            if (msg != nullptr) {
+                lv.lv_valid_utf = false;
+            }
+        }
+#endif
+        if (utf8_end >= 0) {
+            lf = line_start + utf8_end;
+        } else {
+            lf = nullptr;
+        }
+
+        if (lf != nullptr ||
             (lv.lv_len >= MAX_LINE_BUFFER_SIZE) ||
             (request_size == MAX_LINE_BUFFER_SIZE) ||
             ((request_size > lv.lv_len) && lv.lv_len > 0)) {
@@ -604,6 +633,22 @@ bool line_buffer::read_line(off_t &offset_inout, shared_buffer_ref &sbr, line_va
     sbr.disown();
     if ((retval = this->read_line(offset_inout, *lv))) {
         sbr.share(this->lb_share_manager, lv->lv_start, lv->lv_len);
+        if (!lv->lv_valid_utf) {
+            auto *bits = (unsigned char *) sbr.get_writable_data();
+            const char *msg;
+            int faulty_bytes;
+
+            while (true) {
+                ssize_t utf8_end = is_utf8(bits, sbr.length(), &msg, &faulty_bytes);
+
+                if (msg == nullptr) {
+                    break;
+                }
+                for (int lpc = 0; lpc < faulty_bytes; lpc++) {
+                    bits[utf8_end + lpc] = '?';
+                }
+            }
+        }
     }
 
     return retval;
diff --git a/src/line_buffer.hh b/src/line_buffer.hh
index be8474d6..d134b158 100644
--- a/src/line_buffer.hh
+++ b/src/line_buffer.hh
@@ -48,6 +48,7 @@ struct line_value {
     char *lv_start;
     size_t lv_len;
     bool lv_partial;
+    bool lv_valid_utf;
 
     void terminate() {
         this->lv_start[this->lv_len] = '\0';
diff --git a/src/lnav.cc b/src/lnav.cc
index 359f95a2..2e182eae 100644
--- a/src/lnav.cc
+++ b/src/lnav.cc
@@ -365,15 +365,15 @@ bool setup_logline_table(exec_context &ec)
         iter.second->get_foreign_keys(db_key_names);
     }
 
-    db_key_names.push_back("device");
-    db_key_names.push_back("inode");
-    db_key_names.push_back("rowid");
-    db_key_names.push_back("st_dev");
-    db_key_names.push_back("st_ino");
-    db_key_names.push_back("st_mode");
-    db_key_names.push_back("st_rdev");
-    db_key_names.push_back("st_uid");
-    db_key_names.push_back("st_gid");
+    db_key_names.emplace_back("device");
+    db_key_names.emplace_back("inode");
+    db_key_names.emplace_back("rowid");
+    db_key_names.emplace_back("st_dev");
+    db_key_names.emplace_back("st_ino");
+    db_key_names.emplace_back("st_mode");
+    db_key_names.emplace_back("st_rdev");
+    db_key_names.emplace_back("st_uid");
+    db_key_names.emplace_back("st_gid");
 
     stable_sort(db_key_names.begin(), db_key_names.end());
 
@@ -490,7 +490,7 @@ class textfile_callback {
 public:
     textfile_callback() : force(false), front_file(NULL), front_top(-1) { };
 
-    void closed_file(shared_ptr<logfile> lf) {
+    void closed_file(const shared_ptr<logfile> &lf) {
         log_info("closed text file: %s", lf->get_filename().c_str());
         if (!lf->is_valid_filename()) {
             lnav_data.ld_file_names.erase(lf->get_filename());
@@ -503,7 +503,7 @@ public:
         regenerate_unique_file_names();
     };
 
-    void promote_file(shared_ptr<logfile> lf) {
+    void promote_file(const shared_ptr<logfile> &lf) {
         if (lnav_data.ld_log_source.insert_file(lf)) {
             force = true;
 
@@ -523,7 +523,7 @@ public:
         }
     };
 
-    void scanned_file(shared_ptr<logfile> lf) {
+    void scanned_file(const shared_ptr<logfile> &lf) {
         if (!lnav_data.ld_files_to_front.empty() &&
                 lnav_data.ld_files_to_front.front().first ==
                         lf->get_filename()) {
diff --git a/src/log_format.cc b/src/log_format.cc
index 4a8aeb8f..59c52fce 100644
--- a/src/log_format.cc
+++ b/src/log_format.cc
@@ -179,11 +179,11 @@ const char *log_format::log_scanf(const char *line,
         va_start(args, tv_out);
 
         pi.reset(line, 0, len);
-        if (!fmt[curr_fmt].pcre.match(pc, pi)) {
+        if (!fmt[curr_fmt].pcre.match(pc, pi, PCRE_NO_UTF8_CHECK)) {
             retval = NULL;
         }
         else {
-            pcre_context::capture_t *ts = pc["timestamp"];
+            pcre_context::capture_t *ts = pc[fmt[curr_fmt].pf_timestamp_index];
 
             for (auto &iter : pc) {
                 pcre_context::capture_t *cap = va_arg(
diff --git a/src/log_format.hh b/src/log_format.hh
index 34208ca7..49fb2745 100644
--- a/src/log_format.hh
+++ b/src/log_format.hh
@@ -790,13 +790,14 @@ protected:
 
     struct pcre_format {
         pcre_format(const char *regex) : name(regex), pcre(regex) {
-
+            this->pf_timestamp_index = this->pcre.name_index("timestamp");
         };
 
         pcre_format() : name(NULL), pcre("") { };
 
         const char *name;
         pcrepp pcre;
+        int pf_timestamp_index{-1};
     };
 
     static bool next_format(pcre_format *fmt, int &index, int &locked_index);
@@ -1140,7 +1141,7 @@ public:
     log_level_t convert_level(const pcre_input &pi, pcre_context::capture_t *level_cap) const {
         log_level_t retval = LEVEL_INFO;
 
-        if (level_cap != NULL && level_cap->is_valid()) {
+        if (level_cap != nullptr && level_cap->is_valid()) {
             pcre_context_static<128> pc_level;
             pcre_input pi_level(pi.get_substr_start(level_cap),
                                 0,
@@ -1149,11 +1150,9 @@ public:
             if (this->elf_level_patterns.empty()) {
                 retval = string2level(pi_level.get_string(), level_cap->length());
             } else {
-                for (auto iter = this->elf_level_patterns.begin();
-                     iter != this->elf_level_patterns.end();
-                     ++iter) {
-                    if (iter->second.lp_pcre->match(pc_level, pi_level)) {
-                        retval = iter->first;
+                for (const auto &elf_level_pattern : this->elf_level_patterns) {
+                    if (elf_level_pattern.second.lp_pcre->match(pc_level, pi_level)) {
+                        retval = elf_level_pattern.first;
                         break;
                     }
                 }
diff --git a/src/log_format_impls.cc b/src/log_format_impls.cc
index f120eb02..0c28b5ec 100644
--- a/src/log_format_impls.cc
+++ b/src/log_format_impls.cc
@@ -165,7 +165,7 @@ class generic_log_format : public log_format {
                 this->check_for_new_year(dst, log_time, log_tv);
             }
 
-            dst.push_back(logline(offset, log_tv, level_val));
+            dst.emplace_back(offset, log_tv, level_val);
             return SCAN_MATCH;
         }
 
diff --git a/src/log_level.cc b/src/log_level.cc
index ceb53d32..e6d9c18a 100644
--- a/src/log_level.cc
+++ b/src/log_level.cc
@@ -50,36 +50,6 @@ const char *level_names[LEVEL__MAX + 1] = {
     NULL
 };
 
-static pcrepp LEVEL_RE(
-    "(?i)(TRACE|DEBUG\\d*|INFO|NOTICE|STATS|WARN(?:ING)?|ERR(?:OR)?|CRITICAL|SEVERE|FATAL)");
-
-log_level_t string2level(const char *levelstr, ssize_t len, bool exact)
-{
-    log_level_t retval = LEVEL_UNKNOWN;
-
-    if (len == (ssize_t)-1) {
-        len = strlen(levelstr);
-    }
-
-    if (((len == 1) || ((len > 1) && (levelstr[1] == ' '))) &&
-        (retval = abbrev2level(levelstr, 1)) != LEVEL_UNKNOWN) {
-        return retval;
-    }
-
-    pcre_input pi(levelstr, 0, len);
-    pcre_context_static<10> pc;
-
-    if (LEVEL_RE.match(pc, pi)) {
-        auto iter = pc.begin();
-        if (!exact || pc[0]->c_begin == 0) {
-            retval = abbrev2level(pi.get_substr_start(iter),
-                                  pi.get_substr_len(iter));
-        }
-    }
-
-    return retval;
-}
-
 log_level_t abbrev2level(const char *levelstr, ssize_t len)
 {
     if (len == 0 || levelstr[0] == '\0') {
diff --git a/src/log_level_re.cc b/src/log_level_re.cc
new file mode 100644
index 00000000..8ce88c74
--- /dev/null
+++ b/src/log_level_re.cc
@@ -0,0 +1,590 @@
+/* Generated by re2c 1.1.1 on Tue Oct 16 06:58:50 2018 */
+#line 1 "../../lnav2/src/log_level_re.re"
+/**
+ * Copyright (c) 2018, Timothy Stack
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * * Neither the name of Timothy Stack nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "log_level.hh"
+
+log_level_t string2level(const char *levelstr, ssize_t len, bool exact)
+{
+    log_level_t retval = LEVEL_UNKNOWN;
+
+    if (len == (ssize_t)-1) {
+        len = strlen(levelstr);
+    }
+
+    if (((len == 1) || ((len > 1) && (levelstr[1] == ' '))) &&
+        (retval = abbrev2level(levelstr, 1)) != LEVEL_UNKNOWN) {
+        return retval;
+    }
+
+#   define YYCTYPE unsigned char
+#   define RET(tok) { \
+        return tok; \
+    }
+
+    const YYCTYPE *YYCURSOR = (const unsigned char *) levelstr;
+    const YYCTYPE *YYLIMIT = (const unsigned char *) levelstr + len;
+    const YYCTYPE *YYMARKER = YYCURSOR;
+    const YYCTYPE *debug_level = nullptr;
+
+#   define YYPEEK()    (YYCURSOR < YYLIMIT ? *YYCURSOR : 0)
+#   define YYSKIP()    ++YYCURSOR
+#   define YYBACKUP()  YYMARKER = YYCURSOR
+#   define YYRESTORE() YYCURSOR = YYMARKER
+#   define YYSTAGP(x)  x = YYCURSOR - 1
+
+    const unsigned char *yyt1;
+    loop:
+    
+#line 73 "log_level_re.cc"
+{
+	YYCTYPE yych;
+	unsigned int yyaccept = 0;
+	yych = YYPEEK ();
+	switch (yych) {
+	case 0x00:	goto yy2;
+	case 'C':
+	case 'c':	goto yy6;
+	case 'D':
+	case 'd':	goto yy7;
+	case 'E':
+	case 'e':	goto yy8;
+	case 'F':
+	case 'f':	goto yy9;
+	case 'I':
+	case 'i':	goto yy10;
+	case 'N':
+	case 'n':	goto yy11;
+	case 'S':
+	case 's':	goto yy12;
+	case 'T':
+	case 't':	goto yy13;
+	case 'W':
+	case 'w':	goto yy14;
+	default:	goto yy4;
+	}
+yy2:
+	YYSKIP ();
+#line 75 "../../lnav2/src/log_level_re.re"
+	{ RET(LEVEL_UNKNOWN); }
+#line 104 "log_level_re.cc"
+yy4:
+	YYSKIP ();
+yy5:
+#line 102 "../../lnav2/src/log_level_re.re"
+	{ goto loop; }
+#line 110 "log_level_re.cc"
+yy6:
+	yyaccept = 0;
+	YYSKIP ();
+	YYBACKUP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'R':
+	case 'r':	goto yy15;
+	default:	goto yy5;
+	}
+yy7:
+	yyaccept = 0;
+	YYSKIP ();
+	YYBACKUP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'E':
+	case 'e':	goto yy17;
+	default:	goto yy5;
+	}
+yy8:
+	yyaccept = 0;
+	YYSKIP ();
+	YYBACKUP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'R':
+	case 'r':	goto yy18;
+	default:	goto yy5;
+	}
+yy9:
+	yyaccept = 0;
+	YYSKIP ();
+	YYBACKUP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'A':
+	case 'a':	goto yy19;
+	default:	goto yy5;
+	}
+yy10:
+	yyaccept = 0;
+	YYSKIP ();
+	YYBACKUP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'N':
+	case 'n':	goto yy20;
+	default:	goto yy5;
+	}
+yy11:
+	yyaccept = 0;
+	YYSKIP ();
+	YYBACKUP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'O':
+	case 'o':	goto yy21;
+	default:	goto yy5;
+	}
+yy12:
+	yyaccept = 0;
+	YYSKIP ();
+	YYBACKUP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'E':
+	case 'e':	goto yy22;
+	case 'T':
+	case 't':	goto yy23;
+	default:	goto yy5;
+	}
+yy13:
+	yyaccept = 0;
+	YYSKIP ();
+	YYBACKUP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'R':
+	case 'r':	goto yy24;
+	default:	goto yy5;
+	}
+yy14:
+	yyaccept = 0;
+	YYSKIP ();
+	YYBACKUP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'A':
+	case 'a':	goto yy25;
+	default:	goto yy5;
+	}
+yy15:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'I':
+	case 'i':	goto yy26;
+	default:	goto yy16;
+	}
+yy16:
+	YYRESTORE ();
+	switch (yyaccept) {
+	case 0: 	goto yy5;
+	case 1: 	goto yy29;
+	default:	goto yy48;
+	}
+yy17:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'B':
+	case 'b':	goto yy27;
+	default:	goto yy16;
+	}
+yy18:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'R':
+	case 'r':	goto yy28;
+	default:	goto yy16;
+	}
+yy19:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'T':
+	case 't':	goto yy30;
+	default:	goto yy16;
+	}
+yy20:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'F':
+	case 'f':	goto yy31;
+	default:	goto yy16;
+	}
+yy21:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'T':
+	case 't':	goto yy32;
+	default:	goto yy16;
+	}
+yy22:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'V':
+	case 'v':	goto yy33;
+	default:	goto yy16;
+	}
+yy23:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'A':
+	case 'a':	goto yy34;
+	default:	goto yy16;
+	}
+yy24:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'A':
+	case 'a':	goto yy35;
+	default:	goto yy16;
+	}
+yy25:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'R':
+	case 'r':	goto yy36;
+	default:	goto yy16;
+	}
+yy26:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'T':
+	case 't':	goto yy37;
+	default:	goto yy16;
+	}
+yy27:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'U':
+	case 'u':	goto yy38;
+	default:	goto yy16;
+	}
+yy28:
+	yyaccept = 1;
+	YYSKIP ();
+	YYBACKUP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'O':
+	case 'o':	goto yy39;
+	default:	goto yy29;
+	}
+yy29:
+#line 98 "../../lnav2/src/log_level_re.re"
+	{ RET(LEVEL_ERROR); }
+#line 319 "log_level_re.cc"
+yy30:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'A':
+	case 'a':	goto yy40;
+	default:	goto yy16;
+	}
+yy31:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'O':
+	case 'o':	goto yy41;
+	default:	goto yy16;
+	}
+yy32:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'I':
+	case 'i':	goto yy43;
+	default:	goto yy16;
+	}
+yy33:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'E':
+	case 'e':	goto yy44;
+	default:	goto yy16;
+	}
+yy34:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'T':
+	case 't':	goto yy45;
+	default:	goto yy16;
+	}
+yy35:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'C':
+	case 'c':	goto yy46;
+	default:	goto yy16;
+	}
+yy36:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'N':
+	case 'n':	goto yy47;
+	default:	goto yy16;
+	}
+yy37:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'I':
+	case 'i':	goto yy49;
+	default:	goto yy16;
+	}
+yy38:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'G':
+	case 'g':	goto yy50;
+	default:	goto yy16;
+	}
+yy39:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'R':
+	case 'r':	goto yy52;
+	default:	goto yy16;
+	}
+yy40:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'L':
+	case 'l':	goto yy53;
+	default:	goto yy16;
+	}
+yy41:
+	YYSKIP ();
+#line 94 "../../lnav2/src/log_level_re.re"
+	{ RET(LEVEL_INFO); }
+#line 412 "log_level_re.cc"
+yy43:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'C':
+	case 'c':	goto yy55;
+	default:	goto yy16;
+	}
+yy44:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'R':
+	case 'r':	goto yy56;
+	default:	goto yy16;
+	}
+yy45:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'S':
+	case 's':	goto yy57;
+	default:	goto yy16;
+	}
+yy46:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'E':
+	case 'e':	goto yy59;
+	default:	goto yy16;
+	}
+yy47:
+	yyaccept = 2;
+	YYSKIP ();
+	YYBACKUP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'I':
+	case 'i':	goto yy61;
+	default:	goto yy48;
+	}
+yy48:
+#line 97 "../../lnav2/src/log_level_re.re"
+	{ RET(LEVEL_WARNING); }
+#line 458 "log_level_re.cc"
+yy49:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'C':
+	case 'c':	goto yy62;
+	default:	goto yy16;
+	}
+yy50:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case '2':
+	case '3':
+	case '4':
+	case '5':	goto yy63;
+	default:
+		YYSTAGP (yyt1);
+		goto yy51;
+	}
+yy51:
+	debug_level = yyt1;
+#line 77 "../../lnav2/src/log_level_re.re"
+	{
+         if (debug_level == nullptr) {
+             RET(LEVEL_DEBUG);
+         }
+         switch (*debug_level) {
+         case '2':
+             RET(LEVEL_DEBUG2);
+         case '3':
+             RET(LEVEL_DEBUG3);
+         case '4':
+             RET(LEVEL_DEBUG4);
+         case '5':
+             RET(LEVEL_DEBUG5);
+         default:
+             RET(LEVEL_DEBUG);
+         }
+     }
+#line 499 "log_level_re.cc"
+yy52:
+	YYSKIP ();
+	goto yy29;
+yy53:
+	YYSKIP ();
+#line 101 "../../lnav2/src/log_level_re.re"
+	{ RET(LEVEL_FATAL); }
+#line 507 "log_level_re.cc"
+yy55:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'E':
+	case 'e':	goto yy64;
+	default:	goto yy16;
+	}
+yy56:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'E':
+	case 'e':	goto yy66;
+	default:	goto yy16;
+	}
+yy57:
+	YYSKIP ();
+#line 96 "../../lnav2/src/log_level_re.re"
+	{ RET(LEVEL_STATS); }
+#line 528 "log_level_re.cc"
+yy59:
+	YYSKIP ();
+#line 76 "../../lnav2/src/log_level_re.re"
+	{ RET(LEVEL_TRACE); }
+#line 533 "log_level_re.cc"
+yy61:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'N':
+	case 'n':	goto yy68;
+	default:	goto yy16;
+	}
+yy62:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'A':
+	case 'a':	goto yy69;
+	default:	goto yy16;
+	}
+yy63:
+	YYSKIP ();
+	YYSTAGP (yyt1);
+	goto yy51;
+yy64:
+	YYSKIP ();
+#line 95 "../../lnav2/src/log_level_re.re"
+	{ RET(LEVEL_INFO); }
+#line 558 "log_level_re.cc"
+yy66:
+	YYSKIP ();
+#line 100 "../../lnav2/src/log_level_re.re"
+	{ RET(LEVEL_CRITICAL); }
+#line 563 "log_level_re.cc"
+yy68:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'G':
+	case 'g':	goto yy70;
+	default:	goto yy16;
+	}
+yy69:
+	YYSKIP ();
+	yych = YYPEEK ();
+	switch (yych) {
+	case 'L':
+	case 'l':	goto yy71;
+	default:	goto yy16;
+	}
+yy70:
+	YYSKIP ();
+	goto yy48;
+yy71:
+	YYSKIP ();
+#line 99 "../../lnav2/src/log_level_re.re"
+	{ RET(LEVEL_CRITICAL); }
+#line 587 "log_level_re.cc"
+}
+#line 104 "../../lnav2/src/log_level_re.re"
+
+}
diff --git a/src/log_level_re.re b/src/log_level_re.re
new file mode 100644
index 00000000..5b453447
--- /dev/null
+++ b/src/log_level_re.re
@@ -0,0 +1,105 @@
+/**
+ * Copyright (c) 2018, Timothy Stack
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * * Neither the name of Timothy Stack nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "log_level.hh"
+
+log_level_t string2level(const char *levelstr, ssize_t len, bool exact)
+{
+    log_level_t retval = LEVEL_UNKNOWN;
+
+    if (len == (ssize_t)-1) {
+        len = strlen(levelstr);
+    }
+
+    if (((len == 1) || ((len > 1) && (levelstr[1] == ' '))) &&
+        (retval = abbrev2level(levelstr, 1)) != LEVEL_UNKNOWN) {
+        return retval;
+    }
+
+#   define YYCTYPE unsigned char
+#   define RET(tok) { \
+        return tok; \
+    }
+
+    const YYCTYPE *YYCURSOR = (const unsigned char *) levelstr;
+    const YYCTYPE *YYLIMIT = (const unsigned char *) levelstr + len;
+    const YYCTYPE *YYMARKER = YYCURSOR;
+    const YYCTYPE *debug_level = nullptr;
+
+#   define YYPEEK()    (YYCURSOR < YYLIMIT ? *YYCURSOR : 0)
+#   define YYSKIP()    ++YYCURSOR
+#   define YYBACKUP()  YYMARKER = YYCURSOR
+#   define YYRESTORE() YYCURSOR = YYMARKER
+#   define YYSTAGP(x)  x = YYCURSOR - 1
+
+    /*!stags:re2c format = 'const unsigned char *@@;'; */
+    loop:
+    /*!re2c
+     re2c:yyfill:enable = 0;
+     re2c:flags:input = custom;
+
+     EOF = "\x00";
+
+     EOF { RET(LEVEL_UNKNOWN); }
+     'trace' { RET(LEVEL_TRACE); }
+     'debug' [2-5]? @debug_level {
+         if (debug_level == nullptr) {
+             RET(LEVEL_DEBUG);
+         }
+         switch (*debug_level) {
+         case '2':
+             RET(LEVEL_DEBUG2);
+         case '3':
+             RET(LEVEL_DEBUG3);
+         case '4':
+             RET(LEVEL_DEBUG4);
+         case '5':
+             RET(LEVEL_DEBUG5);
+         default:
+             RET(LEVEL_DEBUG);
+         }
+     }
+     'info' { RET(LEVEL_INFO); }
+     'notice' { RET(LEVEL_INFO); }
+     'stats' { RET(LEVEL_STATS); }
+     'warn'|'warning' { RET(LEVEL_WARNING); }
+     'err'|'error' { RET(LEVEL_ERROR); }
+     'critical' { RET(LEVEL_CRITICAL); }
+     'severe' { RET(LEVEL_CRITICAL); }
+     'fatal' { RET(LEVEL_FATAL); }
+     * { goto loop; }
+
+     */
+}
diff --git a/src/logfile.cc b/src/logfile.cc
index 24437a35..f70908c6 100644
--- a/src/logfile.cc
+++ b/src/logfile.cc
@@ -56,7 +56,7 @@ static const size_t INDEX_RESERVE_INCREMENT = 1024;
 logfile::logfile(const string &filename, logfile_open_options &loo)
     : lf_filename(filename)
 {
-    require(filename.size() > 0);
+    require(!filename.empty());
 
     memset(&this->lf_stat, 0, sizeof(this->lf_stat));
     if (loo.loo_fd == -1) {
@@ -107,7 +107,7 @@ logfile::~logfile()
 {
 }
 
-bool logfile::exists(void) const
+bool logfile::exists() const
 {
     struct stat st;
 
@@ -210,14 +210,7 @@ bool logfile::process_prefix(off_t offset, shared_buffer_ref &sbr)
 
                 if (latest < second_to_last) {
                     if (this->lf_format->lf_time_ordered) {
-                        log_debug(
-                            "%s:%d: out-of-time-order line detected %d.%03d < %d.%03d",
-                            this->lf_filename.c_str(),
-                            prescan_size,
-                            latest.get_time(),
-                            latest.get_millis(),
-                            second_to_last.get_time(),
-                            second_to_last.get_millis());
+                        this->lf_out_of_time_order_count += 1;
                         for (size_t lpc = prescan_size;
                              lpc < this->lf_index.size(); lpc++) {
                             logline &line_to_update = this->lf_index[lpc];
@@ -416,6 +409,13 @@ logfile::rebuild_result_t logfile::rebuild_index()
         this->lf_index_time = st.st_mtime;
     }
 
+    if (this->lf_out_of_time_order_count) {
+        log_info("Detected %d out-of-time-order lines in file: %s",
+                 this->lf_out_of_time_order_count,
+                 this->lf_filename.c_str());
+        this->lf_out_of_time_order_count = 0;
+    }
+
     return retval;
 }
 
diff --git a/src/logfile.hh b/src/logfile.hh
index f7bfed8b..1ef73f07 100644
--- a/src/logfile.hh
+++ b/src/logfile.hh
@@ -1,3 +1,5 @@
+#include <utility>
+
 /**
  * Copyright (c) 2007-2012, Timothy Stack
  *
@@ -109,8 +111,8 @@ public:
 
     class error {
 public:
-        error(const std::string &filename, int err)
-            : e_filename(filename),
+        error(std::string filename, int err)
+            : e_filename(std::move(filename)),
               e_err(err) { };
 
         std::string e_filename;
@@ -205,26 +207,24 @@ public:
         else {
             timeradd(&old_time, &tv, &this->lf_time_offset);
         }
-        for (iterator iter = this->begin();
-             iter != this->end();
-             ++iter) {
+        for (auto &iter : *this) {
             struct timeval curr, diff, new_time;
 
-            curr = iter->get_timeval();
+            curr = iter.get_timeval();
             timersub(&curr, &old_time, &diff);
             timeradd(&diff, &this->lf_time_offset, &new_time);
-            iter->set_time(new_time);
+            iter.set_time(new_time);
         }
         this->lf_sort_needed = true;
     };
 
-    void clear_time_offset(void) {
+    void clear_time_offset() {
         struct timeval tv = { 0, 0 };
 
         this->adjust_content_time(-1, tv);
     };
 
-    bool is_time_adjusted(void) const {
+    bool is_time_adjusted() const {
         return (this->lf_time_offset.tv_sec != 0 ||
                 this->lf_time_offset.tv_usec != 0);
     }
@@ -392,9 +392,9 @@ public:
     };
 
     /** Check the invariants for this object. */
-    bool invariant(void)
+    bool invariant()
     {
-        require(this->lf_filename.size() > 0);
+        require(!this->lf_filename.empty());
 
         return true;
     }
@@ -435,6 +435,7 @@ protected:
     logfile_observer *lf_logfile_observer{nullptr};
     size_t lf_longest_line{0};
     text_format_t lf_text_format{TF_UNKNOWN};
+    uint32_t lf_out_of_time_order_count{0};
 };
 
 class logline_observer {
diff --git a/src/logfile_sub_source.cc b/src/logfile_sub_source.cc
index d778c009..6e05fd32 100644
--- a/src/logfile_sub_source.cc
+++ b/src/logfile_sub_source.cc
@@ -541,7 +541,7 @@ bool logfile_sub_source::rebuild_index(bool force)
 {
     iterator iter;
     size_t total_lines = 0;
-    bool retval, full_sort = false;
+    bool retval, full_sort = false, new_order = false;
     int file_count = 0;
 
     force = force || this->lss_force_rebuild;
@@ -584,6 +584,7 @@ bool logfile_sub_source::rebuild_index(bool force)
                 case logfile::RR_NEW_ORDER:
                     retval = true;
                     force = true;
+                    new_order = true;
                     break;
             }
             file_count += 1;
@@ -644,7 +645,10 @@ bool logfile_sub_source::rebuild_index(bool force)
                 }
             }
 
-            sort(this->lss_index.begin(), this->lss_index.end(), line_cmper);
+            if (new_order || (this->lss_files.size() > 1)) {
+                sort(this->lss_index.begin(), this->lss_index.end(),
+                     line_cmper);
+            }
         } else {
             kmerge_tree_c<logline, logfile_data, logfile::iterator> merge(
                 file_count);
diff --git a/src/pcrepp.cc b/src/pcrepp.cc
index 2533d602..d3d967f5 100644
--- a/src/pcrepp.cc
+++ b/src/pcrepp.cc
@@ -85,7 +85,7 @@ void pcrepp::find_captures(const char *pattern)
                     in_class = true;
                     break;
                 case '(':
-                    cap_in_progress.push_back(pcre_context::capture(lpc, lpc));
+                    cap_in_progress.emplace_back(lpc, lpc);
                     break;
                 case ')': {
                     if (!cap_in_progress.empty()) {
diff --git a/src/pcrepp.hh b/src/pcrepp.hh
index 650cf129..d6943ff1 100644
--- a/src/pcrepp.hh
+++ b/src/pcrepp.hh
@@ -373,8 +373,12 @@ public:
         const char *errptr;
         int         eoff;
 
+        if (!(options & PCRE_NEVER_UTF)) {
+            options |= PCRE_UTF8;
+        }
+
         if ((this->p_code = pcre_compile(pattern,
-                                         options | PCRE_UTF8,
+                                         options,
                                          &errptr,
                                          &eoff,
                                          NULL)) == NULL) {
@@ -421,16 +425,15 @@ public:
     };
 
     pcre_named_capture::iterator named_begin() const {
-        return pcre_named_capture::iterator(this->p_named_entries,
-                                            this->p_name_len);
+        return {this->p_named_entries, static_cast<size_t>(this->p_name_len)};
     };
 
     pcre_named_capture::iterator named_end() const {
         char *ptr = (char *)this->p_named_entries;
 
         ptr += this->p_named_count * this->p_name_len;
-        return pcre_named_capture::iterator((pcre_named_capture *)ptr,
-                                            this->p_name_len);
+        return {(pcre_named_capture *)ptr,
+                static_cast<size_t>(this->p_name_len)};
     };
 
     const std::vector<pcre_context::capture> &captures() const {
@@ -565,6 +568,7 @@ public:
         return length;
     };
 
+// #undef PCRE_STUDY_JIT_COMPILE
 #ifdef PCRE_STUDY_JIT_COMPILE
     static pcre_jit_stack *jit_stack(void);
 
diff --git a/src/simdutf8check.h b/src/simdutf8check.h
new file mode 100644
index 00000000..3e24f4d9
--- /dev/null
+++ b/src/simdutf8check.h
@@ -0,0 +1,237 @@
+/**
+ * https://github.com/lemire/fastvalidate-utf-8
+ */
+
+#ifndef SIMDUTF8CHECK_H
+#define SIMDUTF8CHECK_H
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <x86intrin.h>
+
+#include "lnav_log.hh"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * legal utf-8 byte sequence
+ * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
+ *
+ *  Code Points        1st       2s       3s       4s
+ * U+0000..U+007F     00..7F
+ * U+0080..U+07FF     C2..DF   80..BF
+ * U+0800..U+0FFF     E0       A0..BF   80..BF
+ * U+1000..U+CFFF     E1..EC   80..BF   80..BF
+ * U+D000..U+D7FF     ED       80..9F   80..BF
+ * U+E000..U+FFFF     EE..EF   80..BF   80..BF
+ * U+10000..U+3FFFF   F0       90..BF   80..BF   80..BF
+ * U+40000..U+FFFFF   F1..F3   80..BF   80..BF   80..BF
+ * U+100000..U+10FFFF F4       80..8F   80..BF   80..BF
+ *
+ */
+
+// all byte values must be no larger than 0xF4
+static inline void checkSmallerThan0xF4(__m128i current_bytes,
+                                        __m128i *has_error)
+{
+    // unsigned, saturates to 0 below max
+    *has_error = _mm_or_si128(*has_error,
+                              _mm_subs_epu8(current_bytes,
+                                            _mm_set1_epi8(0xF4)));
+}
+
+static inline __m128i continuationLengths(__m128i high_nibbles)
+{
+    return _mm_shuffle_epi8(
+        _mm_setr_epi8(1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII)
+                      0, 0, 0, 0,             // 10xx (continuation)
+                      2, 2,                   // 110x
+                      3,                      // 1110
+                      4), // 1111, next should be 0 (not checked here)
+        high_nibbles);
+}
+
+static inline __m128i carryContinuations(__m128i initial_lengths,
+                                         __m128i previous_carries)
+{
+
+    __m128i right1 = _mm_subs_epu8(
+        _mm_alignr_epi8(initial_lengths, previous_carries, 16 - 1),
+        _mm_set1_epi8(1));
+    __m128i sum = _mm_add_epi8(initial_lengths, right1);
+
+    __m128i right2 = _mm_subs_epu8(
+        _mm_alignr_epi8(sum, previous_carries, 16 - 2),
+        _mm_set1_epi8(2));
+    return _mm_add_epi8(sum, right2);
+}
+
+static inline void checkContinuations(__m128i initial_lengths,
+                                      __m128i carries,
+                                      __m128i *has_error)
+{
+
+    // overlap || underlap
+    // carry > length && length > 0 || !(carry > length) && !(length > 0)
+    // (carries > length) == (lengths > 0)
+    __m128i overunder = _mm_cmpeq_epi8(
+        _mm_cmpgt_epi8(carries, initial_lengths),
+        _mm_cmpgt_epi8(initial_lengths, _mm_setzero_si128()));
+
+    *has_error = _mm_or_si128(*has_error, overunder);
+}
+
+// when 0xED is found, next byte must be no larger than 0x9F
+// when 0xF4 is found, next byte must be no larger than 0x8F
+// next byte must be continuation, ie sign bit is set, so signed < is ok
+static inline void checkFirstContinuationMax(__m128i current_bytes,
+                                             __m128i off1_current_bytes,
+                                             __m128i *has_error)
+{
+    __m128i maskED = _mm_cmpeq_epi8(off1_current_bytes, _mm_set1_epi8(0xED));
+    __m128i maskF4 = _mm_cmpeq_epi8(off1_current_bytes, _mm_set1_epi8(0xF4));
+
+    __m128i badfollowED = _mm_and_si128(
+        _mm_cmpgt_epi8(current_bytes, _mm_set1_epi8(0x9F)),
+        maskED);
+    __m128i badfollowF4 = _mm_and_si128(
+        _mm_cmpgt_epi8(current_bytes, _mm_set1_epi8(0x8F)),
+        maskF4);
+
+    *has_error = _mm_or_si128(*has_error,
+                              _mm_or_si128(badfollowED, badfollowF4));
+}
+
+// map off1_hibits => error condition
+// hibits     off1    cur
+// C       => < C2 && true  
+// E       => < E1 && < A0
+// F       => < F1 && < 90
+// else      false && false
+static inline void checkOverlong(__m128i current_bytes,
+                                 __m128i off1_current_bytes,
+                                 __m128i hibits,
+                                 __m128i previous_hibits,
+                                 __m128i *has_error)
+{
+    __m128i off1_hibits = _mm_alignr_epi8(hibits, previous_hibits, 16 - 1);
+    __m128i initial_mins = _mm_shuffle_epi8(
+        _mm_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128,
+                      -128, -128, -128, -128,  // 10xx => false
+                      0xC2, -128, // 110x
+                      0xE1, // 1110
+                      0xF1),
+        off1_hibits);
+
+    __m128i initial_under = _mm_cmpgt_epi8(initial_mins, off1_current_bytes);
+
+    __m128i second_mins = _mm_shuffle_epi8(
+        _mm_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128,
+                      -128, -128, -128, -128,  // 10xx => false
+                      127, 127, // 110x => true
+                      0xA0, // 1110
+                      0x90),
+        off1_hibits);
+    __m128i second_under = _mm_cmpgt_epi8(second_mins, current_bytes);
+    *has_error = _mm_or_si128(*has_error,
+                              _mm_and_si128(initial_under, second_under));
+}
+
+struct processed_utf_bytes {
+    __m128i rawbytes;
+    __m128i high_nibbles;
+    __m128i carried_continuations;
+};
+
+static inline void count_nibbles(__m128i bytes,
+                                 struct processed_utf_bytes *answer)
+{
+    answer->rawbytes = bytes;
+    answer->high_nibbles = _mm_and_si128(_mm_srli_epi16(bytes, 4),
+                                         _mm_set1_epi8(0x0F));
+}
+
+// check whether the current bytes are valid UTF-8
+// at the end of the function, previous gets updated
+static struct processed_utf_bytes
+checkUTF8Bytes(__m128i current_bytes, struct processed_utf_bytes *previous,
+               __m128i *has_error)
+{
+    struct processed_utf_bytes pb;
+    count_nibbles(current_bytes, &pb);
+
+    checkSmallerThan0xF4(current_bytes, has_error);
+
+    __m128i initial_lengths = continuationLengths(pb.high_nibbles);
+
+    pb.carried_continuations = carryContinuations(
+        initial_lengths,
+        previous->carried_continuations);
+
+    checkContinuations(initial_lengths, pb.carried_continuations, has_error);
+
+    __m128i off1_current_bytes =
+        _mm_alignr_epi8(pb.rawbytes, previous->rawbytes, 16 - 1);
+    checkFirstContinuationMax(current_bytes, off1_current_bytes,
+                              has_error);
+
+    checkOverlong(current_bytes, off1_current_bytes,
+                  pb.high_nibbles, previous->high_nibbles, has_error);
+    return pb;
+}
+
+static bool validate_utf8_fast(const char *src, size_t len, ssize_t *len_out)
+{
+    size_t i = 0, orig_len = len;
+    __m128i has_error = _mm_setzero_si128();
+    __m128i lfchars = _mm_set1_epi8('\n');
+    __m128i lfresult = _mm_setzero_si128();
+    struct processed_utf_bytes previous = {.rawbytes = _mm_setzero_si128(),
+        .high_nibbles = _mm_setzero_si128(),
+        .carried_continuations = _mm_setzero_si128()};
+    if (len >= 16) {
+        for (; i <= len - 16; i += 16) {
+            __m128i current_bytes = _mm_loadu_si128(
+                (const __m128i *) (src + i));
+            previous = checkUTF8Bytes(current_bytes, &previous, &has_error);
+            lfresult = _mm_cmpeq_epi8(current_bytes, lfchars);
+            if (_mm_movemask_epi8(lfresult)) {
+                for (; src[i] != '\n'; i++) {
+                }
+                len = i;
+                break;
+            }
+        }
+    }
+
+    //last part
+    if (i < len) {
+        char buffer[16];
+        memset(buffer, 0, 16);
+        memcpy(buffer, src + i, len - i);
+        __m128i current_bytes = _mm_loadu_si128((const __m128i *) (buffer));
+        previous = checkUTF8Bytes(current_bytes, &previous, &has_error);
+        for (; i < len && src[i] != '\n'; i++) {
+        }
+    } else {
+        has_error = _mm_or_si128(_mm_cmpgt_epi8(previous.carried_continuations,
+                                                _mm_setr_epi8(9, 9, 9, 9, 9, 9,
+                                                              9, 9, 9, 9, 9, 9,
+                                                              9, 9, 9, 1)),
+                                 has_error);
+    }
+
+    if (i < orig_len && src[i] == '\n') {
+        *len_out = i;
+    }
+
+    return _mm_testz_si128(has_error, has_error);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/test/Makefile.am b/test/Makefile.am
index 8a629b5c..49710e60 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -335,6 +335,7 @@ dist_noinst_DATA = \
 	textfile_json_one_line.0 \
 	textfile_quoted_json.0 \
 	toplevel.lnav \
+	UTF-8-test.txt \
 	view_colors_output.0 \
 	vt52_curses_input.0 \
 	vt52_curses_input.1 \
diff --git a/test/UTF-8-test.txt b/test/UTF-8-test.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5b5d50e6b61eb9a3b751b3954f83e61bb59db9b
GIT binary patch
literal 22781
zcmdU1X_Fh*b<OAg6*tHS4b2&FHj*P%oGOwt6R{$Rs*+eKe{2AaL3@C1xVtgnq)-VH
zIkB@!-gishSBZ8K2eD%j^pZW1v+s%M>>CYjiIrHEbIyIeVd1bb#8j*<f`b{LPv3X<
zci(;Q;)B~u8&Y$se$5G_YPao*+jOISvAvpvQQ(B3L{1o4O9d~>y4MbNJE8ngr{T%>
zH=?M0*NGF|Zns=*maBHFY*)*j-4j3B+Sy$_dEy6TNmiFvPA)BPEUj+fmviUj>}zDb
zylhzyHeB;;sk==Fw0Y8Snr+$lJK|ijTdwCUO2hB+4}n}98At=<H=WShvjf)$G0}e8
z3B|8V)ei!v8ZAo8Zr1&v<<u6%55$iePSABjM_ey*FzU$lb_}K!I<M<EUeyW9q9iFb
zAkFh5X*sqRN#sk#5gXthF>QQVnO2tbk<@(9!SA*KTMb-()7zJ}9Yk)m)3ovMPE_?<
z4&m`=AYfdi2gL9x(zW;T2&>e#!>~kZOLg0AmhhcCgBHcvb3FXf@9Z>qKS;P_9xq{M
zX9r^v+X4}FC$KAEXd?8A)3Eozr9kXRLJ%VDrme$0ABAGOEs(JYzJ!rugIk~^6$2>n
zcEzq>1dFOMR23y23`tPI^(hkaJx~+w1GHs>5#U|33BkDdJ8tNBov>p!@zi!F0^_$X
zwVlAF6hNWW!L)ctp%pbicFV`;>TVE5OOcBa*d6d8P>GKyMu;y#v2jP!v2ly<Igqc>
zk#^vNEf_7C=(s`3piW3-P;OK9WvZqTur#5BA;#K-8dm*1m=Jj3)$CxO%20DT9xyLe
zwsDmL*Fxz!%_e`wy4l<p3>5ZQi<LT6Kyv1y=kharx#n1HAL<J2CJRCczF89wS^$-*
zRv}M}gi5IzA7jM>I8()+YdS1n&OdNLR+dk#pITbEYjt^fc~L?miBLAnr&X@2<+Unk
z1xxnas$;=~9Ct4@5j@+*5Y%*_V9+xuWkXKLj~OeW4FN%?jpW=YyARqX)rK9|RoI{z
z6*L^m6}lKPRobrk?FftsJC)FhN~FyyrMJYKuwgkgxUNh6oN`}GFDwzt;8txQTCib0
zpaX3vNd^8-vO$_rm*s*-zPDPw2N~g;P^31r#;!I1WQ{2rZPN#c8k3rQ#uD?@lrel9
zc&guQIdCZHh-Bg7@$K#H&0~wwZMb+&)Ax7DrV9Ahba&}X@N9^jV}PH6<AMdX;DXh!
z*%G=fmrf?!Z&m!JdZd=S(};}6+VTCGfVno!de5cJVQ4j1{a?oo8+g=B$*u=}ixM*m
z*4(#JwM}$%LYA-X!00F&dk-#5-B2tc=xczXTXi4fuXJFV5QlNe^$v+@Iy-iA)XC)(
zSKX|4M`XeK)*w&_(}mwS4-0^^t=b`csy^J^@%(_An++?G15_=rI$lOvDuz(umGIcd
zQ?}{jAsiR_-?DcdtL9YTZ1n+f<X~mJPB$O>M+Px?o*Hkbs)2c49sb4l;8pIYPlARu
zoGPT(Z8+q>K>~n^Evy`)jv=&wc~r>Af(RF7VRiKHP%*^JY6M2Y=RqsH9fW|J=p5L9
znCI@<O)6sumf1~cAX9>cwPR{0b+~K4%VEB*<45q|vDS%Qr|sIF1&&Z+V?K-_EmasQ
z8kByYLD(%L0K%By=V1!rzE^DozUTgkfsVl1(8MKiTZq~&WQ?&SPutJLna1mUxDM_8
zP)ZNpy(L@sZI)m$I*2Ai09`mdo-eM(DM*@X6gpHP8)DPgmBh+wV22I*AB0J!9+fy;
z8{>>!u-$}rV^pxJHiRsQ=#+ES2`J?=&=eCklnU@e6AC)5L_(+U%sfB{lLwa<+D)c0
z@KC(hsyj~Ym{ISVi82sR3hY2obbeCmm0&TKs!kix5mfO0s0&XAKVcanR?92p6&3Aj
z8SfK619~GH1bV)=)P}u8YO7}+ICuU$#}Wm8Gqf%qN3>YhoB8(iHYkdjXatJ~CgW3j
zY>f4iQ(Nk~HBd)WUqlpu?vm2{m{U#~UP-4-nIT8$v^)WO76SEQ@N+P=V`Y;of&qRv
z)cla%oCy!KxVqci&#Y-mCc=zBS!*Ss((AM;P-3Qwi3bb$e1=vyB2^fTxfonxrVgS2
zoKKuoB7(+ABNaY2vzm!32iYF<<33-#7gY~A0sMvCghhF<gFOh(L@9Vs8XO3me4+*>
zq-e|{8C-$Ugm&8k%c>);(`u^J2$kppjT%GKyf1X4j@nzOxnwR`JcchcJNi(NkUAXU
zsBwD@$v}<yT4f)^gRETCNbS+6B;)FoK$mKcaV$o0F#mv=@9g6VR^ALX!}Q<|)L*Jc
zr?tjfq)GxtT6Z&rW@c^l9{Ld8C5OIx)}e2mdSlO85qW^rC2D7~0?pc|-f{3B=TF~j
zFwu$)(YQlLelT#HUFjlslsk6)u;tv5aJ2N)7ry@F=fC#QQ(t`QbETV8I)5rxMb1v-
z+{XGD_QRXgGVPNipbzd<wmCJCwjYKrr(TJ7Fs6dQpuQH-b_Z_p!0SlpBZVu*c9C!{
z(W&z~mdI90-*bRSN9nZ~P#}|4#Y2LU;BDmbzI?eTU{3+pMApUtH%17&{@9F&>mutT
zh$~}5e&&(OGeVveIhlv#Txx{K7vFe!R?Jf(r*fF<3Yq8RORop!41|tK>vn&mbRg_|
zm`Ke*yX7d<8wxo^y-#oF)cePZYNK$x)71O)b{_BPK~X?uV0lu#G3m<#4hJ(Lj;r?y
z0rQC89Lx$irrv9cn1}opAZH|VRK3AOy>gQIHZf-ay<LSKy`R#7Y%U<Tm|EEx{r0VY
zI|}t)qk0=By9ox$s|E9uJL^)Z<8A52qlKBL<m-s3;0AFE{)UK-Y%KF15Wi8}+vE_z
zMS}<dZxT@ntkj4&96VXnTyluea3ZcOQ)K{CUzZI7ssn%F;o@o|@H-H685*vT8VU^L
zojPnY@)u@^2!y<~KFsoso&_G>J7t0~3rOy}M?jxyqUWGiivnIv=ZUUoW)yJ@%RT>A
zZ|6_n7AGs61L#Y#T@(gG2L*alV=(oPCW|Yqyd_s}%50DMoQs02-}TDL_lo|U|9IgU
zC=ru;a?(B`3|YRkv3!@5islZ`Ka<&Mu%8qz@G5oS74<hqGhW!p>LhKL_mwUeB`g-0
z885ud+RgBaa?Su6Jh+(ko8rwxp}|7{J$ZAyStvAkr<rd~=!)Ur<-sgGG<X{#rw_s_
zPT|q$%bl2m$E6ybjG=~hYCTJ3CpjStw3{U=dzPk9!t<vOpry+{{u2+q{wIIx4R8GE
zpZVFJ`}tpZ*yz$>{x|eJ@}@Vx<*jdf`#awGu6MuZz3+SfqlZB<_ZYm#KJdX0efT3E
z{n*Dp@ySnp`tc{O+%Dc{KKqNm^vj?7mHw}O{tLhM#V4=MWD%p!p1(ZSbgF~$IZ9|@
zg!S8ed2saM1%5jc`snXjU|w4_tD$k&MOa#0(%M{7dBDVc`BIg7T&ttBKjlj4(;A;C
z%OM8p(pcEW0&zC_v`#PuCjC>2m;UAU-Y8z_ntVyVEMJkY%CF0B$ZyJT$#2W=$nVPU
z$?wZk^0gTrVd(n<`9t|5`D6JL`BV8b`E&UT`AhjL`D^(b`CIurd0L7ms`DQ&^p6@y
z`L3KAm+#sX`8t%Z^NN=5b@{sdz5Ijxqx_Tnv;2$vtNfe%yZndzr`(Wl$ngJN9>_N+
zkBxD0tWOa~o#LpUo^Ebg{P?c6w`b&8c}|{}7v#U>zvY`V@PFJCkH$WbuTdWB<MKE;
zO&%MRN97eQj~C^C<bUN`w;_i_E=MB|<hzu|L^wJ%O&(RsqxOoH$4m0<Imm)CISP5e
zTP4{pRxY_htV`e%S+2BPtW;px`G9Nw^iffmq-fig_O)Q~4vTzPQcTj8{0v>&64GgT
zysDZJJZ*k%+6;6}KSAQkmiF_oRc4|oB7=iG(#2q13uZ9^c@>u(QBsJ|L8XgMXgXOq
zm#rG3WWd$cxU&sS8R<2D{;i>eh5T<U)62<OGr&Z&=q$=DPn52WXEvO_X~sL2Sro*(
zet9||RB$Xd76g1|vhHX|gv%fE?gbHFoU(9YAmO#f98^Kbm!>Vo7);F63rof<!I?FM
z($mu!iJ5v~$tWJ{q?odz+&Uc)Q}x1<Vd0f{3W%kI>4=zI7naP@a<-sQ;!47FNLX26
z$t>m86H2;}Fo~I(<6PcQ(on~9X!b$vkUblwOT$G!=a?r8i#%}tK7u}$E(?52e5$%z
z*gDHJG1+X$Lc>MgN8*O8*Ce66mo|5>-H8DsEtVFV6DByx&-srR$fax7FT;7z+SNRt
z9uA<fRI#Z#k<e1~=lp5bg^2U5bUoF?#G>?!e{cTuJqZ2D`v2HZW++5+qqcUo+YTB|
z5%$vF(4+}4($33K3g=HB_{TU{ceuFp^0>{9KK<6e(WXyw-d)#qrSHrK4`C4W@n1a#
ze);9^9145>{DTL(fKE5<JItKKtvNOQ8}p}cRR1yxlfLiFPodE*YjdaDLo&(sROD^x
z)nf-@+E%|>hGUG_Lmbq~a<|`Q&!6_2p=}QBWMm2ea0_}P(FGrd?Dc08U2^rTakV#X
z(Xa@!XT)ZkA}VQUq(i@Jt;5YC4X0I>Gg1m|sD$YQ%kbjn(^BG&1Lw8KC-o-N)2xl)
z*{Cy)XwN6?Gk^8aUTcTgmT;Fg4`YW4w{vK-y7n%!r@hIhc*HIwuXW+JhA~|rCz7kD
zn_I9`2oX=0$j~<I*0LVK7ziv+^Jnacs5quKRvSbO0Wl!t;Y--o4KunSBgF0wJTGqL
zM0$-ZdWwF6muN!F=*sBJH&CXni_^){uFk4%qyRtNDmM?I+KUCj^y*pK`41v1``nR(
zN&_1-*+Gh37}~sTCKP0T+%TKY2g<UIzm8lzi(6~BXNVgt;uJCIq-U=wyaX7#aL@KJ
zG~VeJ5AX&Qfq~k`5B19f_O!7V!MwwrOfgmb1#zDH7M3q9WBUN7S&#zy?9n4?VA1o>
z30Q8Z3fn^)s82;V(zUh!)zHLRu!j}ZoYp3d(oJZC#sC`E$OSyN?QmdVhS@=Y=}{-(
zrZe5oLhIafo$g}7J)};MR=h6_ACB8crtjR2W%^xdvJl5O|J8<HMLxWW{nCjY<_Ast
z$olCqM1x^M9D*7L6xi?r<RmM4r|al6_pxUu>_AObq4NWVKM1)X&S53})42gHvR~Io
zCU5;QAF<y{$B_dheQN)yU5@T1fkHyXOhY>6wAcnkNvJ|GL31u)a|b4=={0}<yw2#m
zg&lgByC-w`l-Mzeq-5Om&HU;6VHQ5Xn#|Y6+%`S*kP~H`WjJ+qUZ(>XCp4!onNK0W
z_(((4S{kxYI#EJ5wb`r%y)(juZ6z1&m-~8h>Gr6A7gfNC(|{@KZm0;o+q!9y_RIk$
z7vs^vE!Ql8fE`MnZe=scf6dnoFWAP%kJAe|{{-W{H~n<>H}jV-^bZ=y^2RC~gR0n}
zkc;=A5NYAHD~0p8o<+db4A9Y@pgbl3uV0=OaV<lvCtXG<C=fAcnH6$92e}cqY@v6_
zpaPO}pjk0b<}j-nrhx^f&Z%Yron1u^x`Wa|a(=IU$!&F7nVDum;h?)GKbyZgp4Hu<
zI-Q20(K9r#d4;=ElD(YBm^%Cf0Sh^ziNn|fg<WIx*qlgtDWN?p^+d@KnWW`omYP4G
zvjfM4G&SALqA@Jq7L@=Z{a{y9x@}dr%`k#YS~;Qy%LnQXvs^CmWiepS$~(2O%yiU|
z<)ctS&cx68%LDqt0A<<{xoe3&0`2b;KH^IgHK+;d@CQ{us$dKl_h2r^J(%e&^by6H
zF%2SShz5~mHjqdkk@PnjQ2L01U5*A46Fcp1B%(sXN1W!s%|D>hN4#N5@hu%i?v24T
zJ(F?mj%30|pjAJ?#2fmEhk#j-o4F^<;Vp>-Cod#(nAA=*Rwo7g`!3#-%}x${>ipI5
z!O2`BF>Lq(lPBm7t^@2UBe@rEL(U*LH;rLfodHTRS5xE6G8Hmm;>xXh<Z{M|uND5o
z{P_p@syUmzh#Kw@oAMVF@Y=*`ME(o;_o^^z&IzCi_jybJUY}fz$fM-n8$_COf|!Q;
z6cmUE_cIbnb50?1;ohJElHq<vOqz3wIU4Q(3rr37Gl1qfar^Ik76?2a?=04H5KtvG
zrz{)_eE#Y<YR+Q;FEPYIrpy853heosiL)P{^N_kTF;$mzOM2$c5Z$xt6ek0Qrevb7
zS%=vWs+(Y2So>vJ0b73K;#4*}HwR>dIZnepaCUAE6hTQO%}vGzUetHYUmh$4KVTMi
zNfR2UFij8q9n>fC1Idl)ptnatpK)KC*w6+8W0gf`1F*Flj}<nwlxSFpY16)VeDkd4
zHbg|<o(AF@j~9x<DH`7xMD<^rXVnQ1QFnxp#wQL-qq?unvzmzzQBwvYUO6m{>by43
zC48);eEiH|X~d!}__&>n8g^^E;Ek(?MZ~f#5tC7yM2Vv=mLH8tX5`n~{)LiWmHTZs
zfIPlSt;OW-$JJWE$W++IXh3%+^~WyXxV7MQ%qHF^n5l3w%v3jld22x#%-CXJgClgR
zDUQ(j_@FSIvCSCFjLsA@r4yJ%=`>bD%O0UqReglcCyMja*iDR?(V1eVbON&|oyKzL
zFGlE8s1Z7^6z3;;9sm|&W^|^QDV@M9Lg#Ip;0OTqXie_JvV5|DfU-W8I1Zch%s+aK
zW;-RE7Lt;R(lwWfN<5k-*n@yrRL~=$aGDH@-g?Z^*m(16PpHNsaT!^{^P4=S4s$u4
zn4_g-PG6ET$^-GbU5eSWO3BPePsM`!RsujH>i4x$Ilc89&M<O!aBd1pNZ)s-)B+!i
z*jS<idhSAL*4Fhyj0sEGdRkdo?jtt7iDEaFRPY8zhnWQgtn8t9qs<>I<iv@W9VOLN
zWKXE<1ULar7t68ch4Xe?a3@W<0yo4_KsBcg(g=Zek}k_cTr)u3re#ALP_K?JUCP8K
zR1(Y`c}5mbA;FO!JV7n1$A=v~wFBz{u_$iRtTBIm!3lqQ<b@u2bLRf@aTnqWdU{Z|
z&U6PO(-XE-0+bmST}_#63ALMaym<3!STHPio9n5j`-J5(@N3DhFX?j@wk}}p4BF=)
z@zbMt%#k@0N$HV{_k4V~N*qtksbbLJ#l)5|JTru?IQ52V;kZ7Y{4{nceJtWFg<ZQX
zJS)kRF%~d(bnj3PF~N_z4$6M!dG~Co6`5boZsE+J+%^C4>e9-DdQnA3T!WrWLlMIw
zp{lN#KWL=_YYXWDsV)UqntpW`df>;3Z59c&tui(!b9f~fie^igAJ#+iN8yc(*PM%o
zgQ+lQ#JPjq$ur$Tjq@=p!HGQLh0Y9+wa(;}L%?JP$Ax_^nZjg87@pla_1*mCaesQs
zBt{CNc)%f4raCGP@*p<ByaS%_3iGEA@y*PBV8C0P{b<02Rv;rD4z3nnqe<AQ5z}OI
z>Jw5#o&C&kib4X2*-!d*E^m64IZa^19ws<l{896#k22BRb$fGjS;`2K$G11PST8OO
z`;QF!ZyNUBJnX+^*njJ=|F&WO?Zf^%hW&RA`|ld|-#zTVXV`!5u>Zbc|NX=Mqr?7V
z!~O?`{SOZN9~$;QJnVmD*#GFT|FL2J<HP<ZhW$?t`=1*2KRxU}KI}g+>|ZGz&1f`h
z_Eg0#9_P<(SztEeVN5IK^2&0qE&E{3&!4{dd8KC_9bCn0@XQtbyn>$t{6x|G*@p*L
z@ftjf&+!^Oi_h^IJd4lq8a#*3@fsXVxA`=q{yCuGHFyqacnzKd8eW6vfri)Md3=u7
p;CXzG*Wh`4j@JOEDqJmkvO@N>BDCH5J3T8tzwg#xE%c3^^?x0SNE-kE

literal 0
HcmV?d00001

diff --git a/test/drive_line_buffer.cc b/test/drive_line_buffer.cc
index 4e59fc91..47e36005 100644
--- a/test/drive_line_buffer.cc
+++ b/test/drive_line_buffer.cc
@@ -52,136 +52,139 @@ using namespace std;
 
 int main(int argc, char *argv[])
 {
-    int c, rnd_iters = 5, retval = EXIT_SUCCESS;
-    vector<pair<int, int> > index;
-    auto_fd fd = STDIN_FILENO;
+	int c, rnd_iters = 5, retval = EXIT_SUCCESS;
+	vector<pair<int, int> > index;
+	auto_fd fd = STDIN_FILENO;
 	int offseti = 0;
 	off_t offset = 0;
-    struct stat st;
-    
-    while ((c = getopt(argc, argv, "o:i:n:")) != -1) {
-	switch (c) {
-	case 'o':
-	    if (sscanf(optarg, "%d", &offseti) != 1) {
-		fprintf(stderr,
-			"error: offset is not an integer -- %s\n",
-			optarg);
-		retval = EXIT_FAILURE;
-	    } else {
-			offset = offseti;
+	int count = 1000;
+	struct stat st;
+
+	while ((c = getopt(argc, argv, "o:i:n:c:")) != -1) {
+		switch (c) {
+			case 'o':
+				if (sscanf(optarg, "%d", &offseti) != 1) {
+					fprintf(stderr,
+							"error: offset is not an integer -- %s\n",
+							optarg);
+					retval = EXIT_FAILURE;
+				} else {
+					offset = offseti;
+				}
+				break;
+			case 'n':
+				if (sscanf(optarg, "%d", &rnd_iters) != 1) {
+					fprintf(stderr,
+							"error: offset is not an integer -- %s\n",
+							optarg);
+					retval = EXIT_FAILURE;
+				}
+				break;
+			case 'c':
+				if (sscanf(optarg, "%d", &count) != 1) {
+					fprintf(stderr,
+							"error: count is not an integer -- %s\n",
+							optarg);
+					retval = EXIT_FAILURE;
+				}
+				break;
+			case 'i': {
+				FILE *file;
+
+				if ((file = fopen(optarg, "r")) == NULL) {
+					perror("open");
+					retval = EXIT_FAILURE;
+				} else {
+					int line_number = 1, line_offset;
+
+					while (fscanf(file, "%d", &line_offset) == 1) {
+						index.push_back(
+							make_pair(line_number, line_offset));
+						line_number += 1;
+					}
+					fclose(file);
+					file = NULL;
+				}
+			}
+				break;
+			default:
+				retval = EXIT_FAILURE;
+				break;
 		}
-	    break;
-	case 'n':
-	    if (sscanf(optarg, "%d", &rnd_iters) != 1) {
-		fprintf(stderr,
-			"error: offset is not an integer -- %s\n",
-			optarg);
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (retval != EXIT_SUCCESS) {
+	} else if ((argc == 0) && (index.size() > 0)) {
+		fprintf(stderr, "error: cannot randomize stdin\n");
 		retval = EXIT_FAILURE;
-	    }
-	    break;
-	case 'i':
-	    {
-		FILE *file;
-
-		if ((file = fopen(optarg, "r")) == NULL) {
-		    perror("open");
-		    retval = EXIT_FAILURE;
-		}
-		else {
-		    int line_number = 1, line_offset;
-		    
-		    while (fscanf(file, "%d", &line_offset) == 1) {
-			index.push_back(
-				make_pair(line_number, line_offset));
-			line_number += 1;
-		    }
-		    fclose(file);
-		    file = NULL;
+	} else if ((argc > 0) && (fd = open(argv[0], O_RDONLY)) == -1) {
+		perror("open");
+		retval = EXIT_FAILURE;
+	} else if ((argc > 0) && (fstat(fd, &st) == -1)) {
+		perror("fstat");
+		retval = EXIT_FAILURE;
+	} else {
+		try {
+			off_t last_offset = offset;
+			line_buffer lb;
+			line_value lv;
+			char *maddr;
+
+			lb.set_fd(fd);
+			if (index.size() == 0) {
+				shared_buffer_ref sbr;
+
+				while (count && lb.read_line(offset, sbr, &lv)) {
+					printf("%.*s", (int) sbr.length(), sbr.get_data());
+					if ((off_t) (last_offset + lv.lv_len) < offset)
+						printf("\n");
+					last_offset = offset;
+					count -= 1;
+				}
+			} else if ((maddr = (char *) mmap(NULL,
+											  st.st_size,
+											  PROT_READ,
+											  MAP_FILE | MAP_PRIVATE,
+											  lb.get_fd(),
+											  0)) == MAP_FAILED) {
+				perror("mmap");
+				retval = EXIT_FAILURE;
+			} else {
+				off_t seq_offset = 0;
+
+				while (lb.read_line(seq_offset, lv)) {}
+				do {
+					bool ret;
+					size_t lpc;
+
+					random_shuffle(index.begin(), index.end());
+					for (lpc = 0; lpc < index.size(); lpc++) {
+
+						offset = index[lpc].second;
+						ret = lb.read_line(offset, lv);
+
+						assert(ret);
+						assert(offset >= 0);
+						assert(offset <= st.st_size);
+						assert(memcmp(lv.lv_start,
+									  &maddr[index[lpc].second],
+									  lv.lv_len) == 0);
+					}
+
+					rnd_iters -= 1;
+				} while (rnd_iters);
+
+				printf("All done\n");
+			}
 		}
-	    }
-	    break;
-	default:
-	    retval = EXIT_FAILURE;
-	    break;
-	}
-    }
-
-    argc -= optind;
-    argv += optind;
-
-    if (retval != EXIT_SUCCESS) {
-    }
-    else if ((argc == 0) && (index.size() > 0)) {
-	fprintf(stderr, "error: cannot randomize stdin\n");
-	retval = EXIT_FAILURE;
-    }
-    else if ((argc > 0) && (fd = open(argv[0], O_RDONLY)) == -1) {
-	perror("open");
-	retval = EXIT_FAILURE;
-    }
-    else if ((argc > 0) && (fstat(fd, &st) == -1)) {
-	perror("fstat");
-	retval = EXIT_FAILURE;
-    }
-    else {
-	try {
-	    off_t last_offset = offset;
-	    line_buffer lb;
-        line_value lv;
-	    char *maddr;
-
-	    lb.set_fd(fd);
-	    if (index.size() == 0) {
-		while (lb.read_line(offset, lv)) {
-            lv.terminate();
-		    printf("%s", lv.lv_start);
-		    if ((off_t)(last_offset + lv.lv_len) < offset)
-			printf("\n");
-		    last_offset = offset;
+		catch (line_buffer::error &e) {
+			fprintf(stderr, "error: %s\n", strerror(e.e_err));
+			retval = EXIT_FAILURE;
 		}
-	    }
-	    else if ((maddr = (char *)mmap(NULL,
-					   st.st_size,
-					   PROT_READ,
-					   MAP_FILE | MAP_PRIVATE,
-					   lb.get_fd(),
-					   0)) == MAP_FAILED) {
-		perror("mmap");
-		retval = EXIT_FAILURE;
-	    }
-	    else {
-                off_t seq_offset = 0;
-
-                while (lb.read_line(seq_offset, lv)) { }
-		do {
-            bool ret;
-		    size_t lpc;
-
-		    random_shuffle(index.begin(), index.end());
-		    for (lpc = 0; lpc < index.size(); lpc++) {
-
-			offset = index[lpc].second;
-			ret = lb.read_line(offset, lv);
-
-                        assert(ret);
-			assert(offset >= 0);
-			assert(offset <= st.st_size);
-			assert(memcmp(lv.lv_start,
-				      &maddr[index[lpc].second],
-				      lv.lv_len) == 0);
-		    }
-
-		    rnd_iters -= 1;
-		} while (rnd_iters);
-
-		printf("All done\n");
-	    }
 	}
-	catch (line_buffer::error &e) {
-	    fprintf(stderr, "error: %s\n", strerror(e.e_err));
-	    retval = EXIT_FAILURE;
-	}
-    }
-    
-    return retval;
+
+	return retval;
 }
diff --git a/test/drive_logfile.cc b/test/drive_logfile.cc
index ec6ece3f..83a2ae66 100644
--- a/test/drive_logfile.cc
+++ b/test/drive_logfile.cc
@@ -161,7 +161,9 @@ int main(int argc, char *argv[]) {
               break;
         case MODE_LEVELS:
           for (logfile::iterator iter = lf.begin(); iter != lf.end(); ++iter) {
-            printf("0x%02x\n", iter->get_level_and_flags());
+            log_level_t level = iter->get_level_and_flags();
+            printf("%s 0x%x\n", level_names[level & ~LEVEL__FLAGS],
+                level & LEVEL__FLAGS);
           }
               break;
       }
diff --git a/test/test_line_buffer.sh b/test/test_line_buffer.sh
index 014dac99..5d4982ec 100644
--- a/test/test_line_buffer.sh
+++ b/test/test_line_buffer.sh
@@ -42,6 +42,12 @@ check_output "Seeking in the line buffer doesn't work?" <<EOF
 5
 EOF
 
+run_test ./drive_line_buffer -o 4424 -c 1 ${srcdir}/UTF-8-test.txt
+
+check_output "Invalid UTF is not scrubbed?" <<EOF
+2.1.5  5 bytes (U-00200000):        "?????"                                       |
+EOF
+
 cat "${top_srcdir}/src/"*.hh "${top_srcdir}/src/"*.cc > lb-2.dat
 grep -b '$' lb-2.dat | cut -f 1 -d : > lb.index
 line_count=`wc -l lb-2.dat`
diff --git a/test/test_logfile.sh b/test/test_logfile.sh
index b4825138..4f6adc93 100644
--- a/test/test_logfile.sh
+++ b/test/test_logfile.sh
@@ -222,41 +222,41 @@ EOF
 run_test ./drive_logfile -v -f syslog_log ${srcdir}/logfile_syslog.0
 
 check_output "Syslog level interpreted incorrectly?" <<EOF
-0x0a
-0x07
-0x0a
-0x07
+error 0x0
+info 0x0
+error 0x0
+info 0x0
 EOF
 
 run_test ./drive_logfile -v -f tcsh_history ${srcdir}/logfile_tcsh_history.0
 
 check_output "TCSH level interpreted incorrectly?" <<EOF
-0x07
-0x87
-0x07
-0x87
+info 0x0
+info 0x80
+info 0x0
+info 0x80
 EOF
 
 run_test ./drive_logfile -v -f access_log ${srcdir}/logfile_access_log.0
 
 check_output "access_log level interpreted incorrectly?" <<EOF
-0x07
-0x0a
-0x07
+info 0x0
+error 0x0
+info 0x0
 EOF
 
 run_test ./drive_logfile -v -f strace_log ${srcdir}/logfile_strace_log.0
 
 check_output "strace_log level interpreted incorrectly?" <<EOF
-0x07
-0x07
-0x07
-0x0a
-0x07
-0x0a
-0x07
-0x07
-0x07
+info 0x0
+info 0x0
+info 0x0
+error 0x0
+info 0x0
+error 0x0
+info 0x0
+info 0x0
+info 0x0
 EOF
 
 run_test ./drive_logfile -t -f generic_log ${srcdir}/logfile_generic.0
@@ -269,22 +269,22 @@ EOF
 run_test ./drive_logfile -v -f generic_log ${srcdir}/logfile_generic.0
 
 check_output "generic_log level interpreted incorrectly?" <<EOF
-0x06
-0x09
+debug 0x0
+warning 0x0
 EOF
 
 run_test ./drive_logfile -v -f generic_log ${srcdir}/logfile_generic.1
 
 check_output "generic_log (1) level interpreted incorrectly?" <<EOF
-0x07
-0x0a
+info 0x0
+error 0x0
 EOF
 
 run_test ./drive_logfile -v -f generic_log ${srcdir}/logfile_generic.2
 
 check_output "generic_log (2) level interpreted incorrectly?" <<EOF
-0x0a
-0x0a
+error 0x0
+error 0x0
 EOF
 
 touch -t 200711030923 ${srcdir}/logfile_glog.0
@@ -303,13 +303,13 @@ EOF
 run_test ./drive_logfile -v -f glog_log ${srcdir}/logfile_glog.0
 
 check_output "glog_log level interpreted incorrectly?" <<EOF
-0x0a
-0x07
-0x07
-0x09
-0x07
-0x07
-0x0a
+error 0x0
+info 0x0
+info 0x0
+warning 0x0
+info 0x0
+info 0x0
+error 0x0
 EOF
 
 cp ${srcdir}/logfile_syslog.0 truncfile.0