diff --git a/CMakeLists.txt b/CMakeLists.txt index 48088dcf..b8b039f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,6 +19,7 @@ find_package(BZip2 REQUIRED) find_package(LibArchive REQUIRED) find_package(ZLIB REQUIRED) find_package(pcre REQUIRED) +find_package(pcre2 REQUIRED) find_package(readline REQUIRED) find_package(ncurses REQUIRED) find_package(CURL REQUIRED) @@ -29,6 +30,7 @@ set(lnav_LIBS BZip2::BZip2 ncurses::libcurses pcre::libpcre + pcre2::pcre2 readline::readline LibArchive::LibArchive ZLIB::ZLIB @@ -39,19 +41,19 @@ add_subdirectory(src) # ---- Install rules ---- -if(NOT CMAKE_SKIP_INSTALL_RULES) - include(cmake/install-rules.cmake) -endif() +if (NOT CMAKE_SKIP_INSTALL_RULES) + include(cmake/install-rules.cmake) +endif () # ---- Developer mode ---- -if(NOT lnav_DEVELOPER_MODE) - return() -elseif(NOT PROJECT_IS_TOP_LEVEL) - message( - AUTHOR_WARNING - "Developer mode is intended for developers of lnav" - ) -endif() +if (NOT lnav_DEVELOPER_MODE) + return() +elseif (NOT PROJECT_IS_TOP_LEVEL) + message( + AUTHOR_WARNING + "Developer mode is intended for developers of lnav" + ) +endif () include(cmake/dev-mode.cmake) diff --git a/NEWS b/NEWS index df861d08..c7a1c0a4 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,9 @@ +lnav v0.11.1: + Breaking changes: + * The regexp_capture() table-valued-function now returns NULL + instead of an empty string for the `capture_name` column if + the capture is not named. + lnav v0.11.0: Features: * Redesigned the top status area to allow for user-specified diff --git a/aminclude_static.am b/aminclude_static.am index 9ab19cac..d89dbbd7 100644 --- a/aminclude_static.am +++ b/aminclude_static.am @@ -1,6 +1,6 @@ # aminclude_static.am generated automatically by Autoconf -# from AX_AM_MACROS_STATIC on Sat Aug 20 18:43:07 PDT 2022 +# from AX_AM_MACROS_STATIC on Sat Sep 10 09:23:23 PDT 2022 # Code coverage diff --git a/cmake/docs.cmake b/cmake/docs.cmake index e0e99394..286d027f 100644 --- a/cmake/docs.cmake +++ b/cmake/docs.cmake @@ -1,12 +1,18 @@ # ---- Dependencies ---- +set(extract_timestamps "") +if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24") + set(extract_timestamps DOWNLOAD_EXTRACT_TIMESTAMP YES) +endif () + include(FetchContent) FetchContent_Declare( - mcss URL - https://github.com/friendlyanon/m.css/releases/download/release-1/mcss.zip - URL_MD5 00cd2757ebafb9bcba7f5d399b3bec7f - SOURCE_DIR "${PROJECT_BINARY_DIR}/mcss" - UPDATE_DISCONNECTED YES + mcss URL + https://github.com/friendlyanon/m.css/releases/download/release-1/mcss.zip + URL_MD5 00cd2757ebafb9bcba7f5d399b3bec7f + SOURCE_DIR "${PROJECT_BINARY_DIR}/mcss" + UPDATE_DISCONNECTED YES + ${extract_timestamps} ) FetchContent_MakeAvailable(mcss) @@ -15,26 +21,26 @@ find_package(Python3 3.6 REQUIRED) # ---- Declare documentation target ---- set( - DOXYGEN_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/docs" - CACHE PATH "Path for the generated Doxygen documentation" + DOXYGEN_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/docs" + CACHE PATH "Path for the generated Doxygen documentation" ) set(working_dir "${PROJECT_BINARY_DIR}/docs") -foreach(file IN ITEMS Doxyfile conf.py) - configure_file("docs/${file}.in" "${working_dir}/${file}" @ONLY) -endforeach() +foreach (file IN ITEMS Doxyfile conf.py) + configure_file("docs/${file}.in" "${working_dir}/${file}" @ONLY) +endforeach () set(mcss_script "${mcss_SOURCE_DIR}/documentation/doxygen.py") set(config "${working_dir}/conf.py") add_custom_target( - docs - COMMAND "${CMAKE_COMMAND}" -E remove_directory - "${DOXYGEN_OUTPUT_DIRECTORY}/html" - "${DOXYGEN_OUTPUT_DIRECTORY}/xml" - COMMAND "${Python3_EXECUTABLE}" "${mcss_script}" "${config}" - COMMENT "Building documentation using Doxygen and m.css" - WORKING_DIRECTORY "${working_dir}" - VERBATIM + docs + COMMAND "${CMAKE_COMMAND}" -E remove_directory + "${DOXYGEN_OUTPUT_DIRECTORY}/html" + "${DOXYGEN_OUTPUT_DIRECTORY}/xml" + COMMAND "${Python3_EXECUTABLE}" "${mcss_script}" "${config}" + COMMENT "Building documentation using Doxygen and m.css" + WORKING_DIRECTORY "${working_dir}" + VERBATIM ) diff --git a/conanfile.py b/conanfile.py index 1611e99e..e903c779 100644 --- a/conanfile.py +++ b/conanfile.py @@ -20,7 +20,7 @@ class LnavConan(ConanFile): "libarchive/3.6.0", "libcurl/7.80.0", "ncurses/6.3", - "pcre/8.45", + "pcre2/10.40", "readline/8.1.2", "sqlite3/3.38.0", "zlib/1.2.12", @@ -32,7 +32,8 @@ class LnavConan(ConanFile): "libarchive:with_lzo": True, "libarchive:with_lzma": True, "libarchive:with_zstd": True, - "pcre:with_jit": True, + "pcre2:support_jit": True, + "pcre2:build_pcre2_8": True, "sqlite3:enable_json1": True, "sqlite3:enable_soundex": True, "readline:with_library": "curses", diff --git a/configure.ac b/configure.ac index 293448ec..6683fe4f 100644 --- a/configure.ac +++ b/configure.ac @@ -184,7 +184,7 @@ AS_VAR_IF([ax_cv_curses],[yes],[], ) AX_PATH_LIB_ARCHIVE -AX_PATH_LIB_PCRE([], [AC_MSG_ERROR([pcre required to build])]) +AX_CHECK_PCRE2([8], [], [AC_MSG_ERROR([pcre2 is required to build])]) AX_PATH_LIB_READLINE AX_CODE_COVERAGE @@ -206,7 +206,7 @@ AS_VAR_SET(ALL_LDFLAGS, "$SQLITE3_LDFLAGS $READLINE_LDFLAGS $LIBARCHIVE_LDFLAGS AS_VAR_SET(static_lib_list, ["libncurses.a libncursesw.a libreadline.a libsqlite3.a libz.a libtinfo.a libtinfow.a"]) AS_VAR_SET(static_lib_list, - ["$static_lib_list libpcre.a libncursesw.a libbz2.a"]) + ["$static_lib_list libpcre2.a libncursesw.a libbz2.a"]) AS_VAR_SET(static_lib_list, ["$static_lib_list libgpm.a libcurl.a libcrypto.a libssl.a libssh2.a"]) AS_VAR_SET(static_lib_list, diff --git a/docs/schemas/format-v1.schema.json b/docs/schemas/format-v1.schema.json index cda4dd1d..506e6adb 100644 --- a/docs/schemas/format-v1.schema.json +++ b/docs/schemas/format-v1.schema.json @@ -219,9 +219,9 @@ "([^/]+)": { "title": "//value//unit/scaling-factor/", "type": "object", - "patternProperties": { + "properties": { "op": { - "title": "//value//unit/scaling-factor//<>", + "title": "//value//unit/scaling-factor//op", "type": "string", "enum": [ "identity", @@ -230,7 +230,7 @@ ] }, "value": { - "title": "//value//unit/scaling-factor//<>", + "title": "//value//unit/scaling-factor//value", "type": "number" } }, diff --git a/m4/ax_check_pcre2.m4 b/m4/ax_check_pcre2.m4 new file mode 100644 index 00000000..9ae01add --- /dev/null +++ b/m4/ax_check_pcre2.m4 @@ -0,0 +1,163 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_check_pcre2.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_CHECK_PCRE2([bits], [action-if-found], [action-if-not-found]) +# +# DESCRIPTION +# +# Search for an installed libpcre2-8 library. If nothing was specified +# when calling configure, it searches first in /usr/local and then in +# /usr, /opt/local and /sw. If the --with-pcre2=DIR is specified, it will +# try to find it in DIR/include/pcre2.h and DIR/lib/libpcre2-8. If +# --without-pcre2 is specified, the library is not searched at all. +# +# If 'bits' is empty or '8', PCRE2 8-bit character support is checked +# only. If 'bits' contains '16', PCRE2 8-bit and 16-bit character support +# are checked. If 'bits' contains '32', PCRE2 8-bit and 32-bit character +# support are checked. When 'bits' contains both '16' and '32', PCRE2 +# 8-bit, 16-bit, and 32-bit character support is checked. +# +# If either the header file (pcre2.h), or the library (libpcre2-8) is not +# found, or the specified PCRE2 character bit width is not supported, +# shell commands 'action-if-not-found' is run. If 'action-if-not-found' is +# not specified, the configuration exits on error, asking for a valid +# PCRE2 installation directory or --without-pcre2. +# +# If both header file and library are found, and the specified PCRE2 bit +# widths are supported, shell commands 'action-if-found' is run. If +# 'action-if-found' is not specified, the default action appends +# '-I${PCRE2_HOME}/include' to CPFLAGS, appends '-L$PCRE2_HOME}/lib' to +# LDFLAGS, prepends '-lpcre2-8' to LIBS, and calls AC_DEFINE(HAVE_PCRE2). +# You should use autoheader to include a definition for this symbol in a +# config.h file. Sample usage in a C/C++ source is as follows: +# +# #ifdef HAVE_PCRE2 +# #define PCRE2_CODE_UNIT_WIDTH 8 +# #include +# #endif /* HAVE_PCRE2 */ +# +# LICENSE +# +# Copyright (c) 2020 Robert van Engelen +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 2 + +AC_DEFUN([AX_CHECK_PCRE2], +# +# Handle user hints +# +[AC_MSG_CHECKING(if PCRE2 is wanted) +pcre2_places="/usr/local /usr /opt/local /sw" +AC_ARG_WITH([pcre2], +[ --with-pcre2=DIR root directory path of PCRE2 installation @<:@defaults to + /usr/local or /usr if not found in /usr/local@:>@ + --without-pcre2 to disable PCRE2 usage completely], +[if test "$withval" != "no" ; then + AC_MSG_RESULT(yes) + if test -d "$withval" + then + pcre2_places="$withval $pcre2_places" + else + AC_MSG_WARN([Sorry, $withval does not exist, checking usual places]) + fi +else + pcre2_places="" + AC_MSG_RESULT(no) +fi], +[AC_MSG_RESULT(yes)]) +# +# Locate PCRE2, if wanted +# +if test -n "${pcre2_places}" +then + # check the user supplied or any other more or less 'standard' place: + # Most UNIX systems : /usr/local and /usr + # MacPorts / Fink on OSX : /opt/local respectively /sw + for PCRE2_HOME in ${pcre2_places} ; do + if test -f "${PCRE2_HOME}/include/pcre2.h"; then break; fi + PCRE2_HOME="" + done + + PCRE2_OLD_LDFLAGS=$LDFLAGS + PCRE2_OLD_CPPFLAGS=$CPPFLAGS + if test -n "${PCRE2_HOME}"; then + LDFLAGS="$LDFLAGS -L${PCRE2_HOME}/lib" + CPPFLAGS="$CPPFLAGS -I${PCRE2_HOME}/include" + fi + AC_LANG_PUSH([C]) + AC_CHECK_LIB([pcre2-8], [pcre2_compile_8], [pcre2_cv_libpcre2=yes], [pcre2_cv_libpcre2=no]) + AC_CHECK_HEADER([pcre2.h], [pcre2_cv_pcre2_h=yes], [pcre2_cv_pcre2_h=no], [#define PCRE2_CODE_UNIT_WIDTH 8]) + case "$1" in + *16*) + AC_CHECK_LIB([pcre2-16], [pcre2_compile_16], [pcre2_cv_libpcre2_16=yes], [pcre2_cv_libpcre2_16=no]) + AC_CHECK_HEADER([pcre2.h], [pcre2_cv_pcre2_16_h=yes], [pcre2_cv_pcre2_16_h=no], [#define PCRE2_CODE_UNIT_WIDTH 16]) + if test "$pcre2_cv_libpcre2_16" = "no" || test "$pcre2_cv_pcre2_16_h" = "no"; then + pcre2_cv_libpcre2=no + fi + ;; + esac + case "$1" in + *32*) + AC_CHECK_LIB([pcre2-32], [pcre2_compile_32], [pcre2_cv_libpcre2_32=yes], [pcre2_cv_libpcre2_32=no]) + AC_CHECK_HEADER([pcre2.h], [pcre2_cv_pcre2_32_h=yes], [pcre2_cv_pcre2_32_h=no], [#define PCRE2_CODE_UNIT_WIDTH 32]) + if test "$pcre2_cv_libpcre2_32" = "no" || test "$pcre2_cv_pcre2_32_h" = "no"; then + pcre2_cv_libpcre2=no + fi + esac + AC_LANG_POP([C]) + if test "$pcre2_cv_libpcre2" = "yes" && test "$pcre2_cv_pcre2_h" = "yes" + then + # + # If both library and header were found, action-if-found + # + m4_ifblank([$2],[ + CPPFLAGS="$CPPFLAGS -I${PCRE2_HOME}/include" + LDFLAGS="$LDFLAGS -L${PCRE2_HOME}/lib" + LIBS="-lpcre2-8 $LIBS" + AC_DEFINE([HAVE_PCRE2], [1], + [Define to 1 if you have `PCRE2' library (-lpcre2-$1)]) + ],[ + # Restore variables + LDFLAGS="$PCRE2_OLD_LDFLAGS" + CPPFLAGS="$PCRE2_OLD_CPPFLAGS" + $2 + ]) + else + # + # If either header or library was not found, action-if-not-found + # + m4_default([$3],[ + AC_MSG_ERROR([either specify a valid PCRE2 installation with --with-pcre2=DIR or disable PCRE2 usage with --without-pcre2]) + ]) + fi +fi +]) diff --git a/m4/lnav_with_pcre.m4 b/m4/lnav_with_pcre.m4 deleted file mode 100644 index 4e33d67d..00000000 --- a/m4/lnav_with_pcre.m4 +++ /dev/null @@ -1,74 +0,0 @@ -dnl -dnl Copyright (c) 2007-2015, Timothy Stack -dnl -dnl All rights reserved. -dnl -dnl Redistribution and use in source and binary forms, with or without -dnl modification, are permitted provided that the following conditions are met: -dnl -dnl dnl Redistributions of source code must retain the above copyright notice, this -dnl list of conditions and the following disclaimer. -dnl dnl Redistributions in binary form must reproduce the above copyright notice, -dnl this list of conditions and the following disclaimer in the documentation -dnl and/or other materials provided with the distribution. -dnl dnl Neither the name of Timothy Stack nor the names of its contributors -dnl may be used to endorse or promote products derived from this software -dnl without specific prior written permission. -dnl -dnl THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY -dnl EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -dnl WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -dnl DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY -dnl DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -dnl (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -dnl LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -dnl ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -dnl (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -dnl SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -dnl -dnl @file lnav_with_pcre.m4 -dnl -AC_DEFUN([AX_PATH_LIB_PCRE],[dnl -AC_MSG_CHECKING([lib pcre]) -AC_ARG_WITH(pcre, -[ --with-pcre[[=prefix]]],, - with_pcre="yes") -if test ".$with_pcre" = ".no" ; then - AC_MSG_RESULT([disabled]) - m4_ifval($2,$2) -else - AC_MSG_RESULT([(testing)]) - AS_VAR_SET(saved_LIBS, $LIBS) - if test ".$with_pcre" = "." && test "$ac_cv_lib_pcre_pcre_study" = "yes" ; then - PCRE_LIBS="-lpcre" - AC_MSG_CHECKING([lib pcre]) - AC_CHECK_LIB(pcre, pcre_study) - AC_CHECK_HEADERS(pcre.h pcre/pcre.h) - AC_MSG_RESULT([$PCRE_LIBS]) - m4_ifval($1,$1) - else - OLDLDFLAGS="$LDFLAGS" ; LDFLAGS="$LDFLAGS -L$with_pcre/lib" - OLDCPPFLAGS="$CPPFLAGS" ; CPPFLAGS="$CPPFLAGS -I$with_pcre/include" - AC_CHECK_LIB(pcre, pcre_compile) - AC_CHECK_HEADERS(pcre.h pcre/pcre.h) - CPPFLAGS="$OLDCPPFLAGS" - LDFLAGS="$OLDLDFLAGS" - if test "$ac_cv_lib_pcre_pcre_compile" = "yes" ; then - AC_MSG_RESULT(.setting PCRE_LIBS -L$with_pcre/lib -lpcre) - PCRE_LDFLAGS="-L$with_pcre/lib" - PCRE_LIBS="-lpcre" - test -d "$with_pcre/include" && PCRE_CFLAGS="-I$with_pcre/include" - AC_MSG_CHECKING([lib pcre]) - AC_MSG_RESULT([$PCRE_LIBS]) - m4_ifval($1,$1) - else - AC_MSG_CHECKING([lib pcre]) - AC_MSG_RESULT([[no, (WARNING)]]) - m4_ifval($2,$2) - fi - fi -fi -AC_SUBST([PCRE_LIBS]) -AC_SUBST([PCRE_CFLAGS]) -]) - diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 62c637c4..60e7e335 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -492,6 +492,7 @@ add_library( log_gutter_source.hh log_level.hh log_search_table.hh + log_search_table_fwd.hh logfile_sub_source.cfg.hh logfile.hh logfile_fwd.hh diff --git a/src/Makefile.am b/src/Makefile.am index 0b487bb0..5e5b9a8c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -239,6 +239,7 @@ noinst_HEADERS = \ log_level.hh \ log_level_re.re \ log_search_table.hh \ + log_search_table_fwd.hh \ logfile.hh \ logfile.cfg.hh \ logfile_fwd.hh \ diff --git a/src/all_logs_vtab.cc b/src/all_logs_vtab.cc index 87f920ab..f4468a6f 100644 --- a/src/all_logs_vtab.cc +++ b/src/all_logs_vtab.cc @@ -29,8 +29,8 @@ #include "all_logs_vtab.hh" -#include "config.h" #include "base/attr_line.hh" +#include "config.h" static auto intern_lifetime = intern_string::get_table_lifetime(); @@ -65,7 +65,7 @@ all_logs_vtab::extract(logfile* lf, logline_value_vector& values) { auto& line = values.lvv_sbr; - auto format = lf->get_format_ptr(); + auto* format = lf->get_format_ptr(); logline_value_vector sub_values; @@ -79,7 +79,8 @@ all_logs_vtab::extract(logfile* lf, body.lr_end = line.length(); } - data_scanner ds(line, body.lr_start, body.lr_end); + data_scanner ds( + line.to_string_fragment().sub_range(body.lr_start, body.lr_end)); data_parser dp(&ds); std::string str; diff --git a/src/base/CMakeLists.txt b/src/base/CMakeLists.txt index 6015b214..aa4143f6 100644 --- a/src/base/CMakeLists.txt +++ b/src/base/CMakeLists.txt @@ -46,6 +46,7 @@ add_library( isc.hh itertools.hh lnav.console.hh + lnav.console.into.hh log_level_enum.hh lrucache.hpp math_util.hh @@ -63,7 +64,7 @@ add_library( target_include_directories(base PUBLIC . .. ../third-party ${CMAKE_CURRENT_BINARY_DIR}/..) -target_link_libraries(base cppfmt cppscnlib pcre::libpcre ncurses::libcurses pthread) +target_link_libraries(base cppfmt cppscnlib pcrepp ncurses::libcurses pthread) add_executable( test_base diff --git a/src/base/Makefile.am b/src/base/Makefile.am index 185f609f..4a459a63 100644 --- a/src/base/Makefile.am +++ b/src/base/Makefile.am @@ -45,6 +45,7 @@ noinst_HEADERS = \ itertools.hh \ lnav_log.hh \ lnav.console.hh \ + lnav.console.into.hh \ lnav.gzip.hh \ log_level_enum.hh \ lrucache.hpp \ diff --git a/src/base/ansi_scrubber.cc b/src/base/ansi_scrubber.cc index 2212c37f..d413490d 100644 --- a/src/base/ansi_scrubber.cc +++ b/src/base/ansi_scrubber.cc @@ -35,15 +35,15 @@ #include "base/opt_util.hh" #include "config.h" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "scn/scn.h" #include "view_curses.hh" -static const pcrepp& +static const lnav::pcre2pp::code& ansi_regex() { - static const pcrepp retval("\x1b\\[([\\d=;\\?]*)([a-zA-Z])|(?:\\X\x08\\X)+", - PCRE_UTF8); + static const auto retval = lnav::pcre2pp::code::from_const( + "\x1b\\[([\\d=;\\?]*)([a-zA-Z])|(?:\\X\x08\\X)+"); return retval; } @@ -51,16 +51,25 @@ ansi_regex() void scrub_ansi_string(std::string& str, string_attrs_t* sa) { - pcre_context_static<60> context; const auto& regex = ansi_regex(); - pcre_input pi(str); + auto md = regex.create_match_data(); int64_t origin_offset = 0; int last_origin_offset_end = 0; replace(str.begin(), str.end(), '\0', ' '); - while (regex.match(context, pi, PCRE_NO_UTF8_CHECK)) { - auto* caps = context.all(); - const auto sf = pi.get_string_fragment(caps); + auto matcher = regex.capture_from(str).into(md); + while (true) { + auto match_res = matcher.matches(PCRE2_NO_UTF_CHECK); + + if (match_res.is()) { + break; + } + if (match_res.is()) { + log_error("ansi scrub regex failure"); + break; + } + + const auto sf = md[0].value(); auto bs_index_res = sf.codepoint_to_byte_index(1); if (sf.length() >= 3 && bs_index_res.isOk() @@ -139,7 +148,7 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa) *sa, caps->c_begin + sf.length() / 3, -erased_size); #endif sa->emplace_back(line_range{last_origin_offset_end, - caps->c_begin + (int) output_size}, + sf.sf_begin + (int) output_size}, SA_ORIGIN_OFFSET.value(origin_offset)); } @@ -154,27 +163,28 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa) bold_range.clear(); } - str.erase(str.begin() + fill_index, str.begin() + caps->c_end); - last_origin_offset_end = caps->c_begin + output_size; + str.erase(str.begin() + fill_index, str.begin() + sf.sf_end); + last_origin_offset_end = sf.sf_begin + output_size; origin_offset += erased_size; - pi.reset(str); - pi.pi_next_offset = last_origin_offset_end; + matcher.reload_input(str, last_origin_offset_end); continue; } + auto seq = md[1].value(); + auto terminator = md[2].value(); struct line_range lr; bool has_attrs = false; text_attrs attrs; auto role = nonstd::optional(); size_t lpc; - switch (pi.get_substr_start(&caps[2])[0]) { + switch (terminator[0]) { case 'm': - for (lpc = caps[1].c_begin; - lpc != std::string::npos && lpc < (size_t) caps[1].c_end;) + for (lpc = seq.sf_begin; + lpc != std::string::npos && lpc < (size_t) seq.sf_end;) { auto ansi_code_res = scn::scan_value( - scn::string_view{&str[lpc], &str[caps[1].c_end]}); + scn::string_view{&str[lpc], &str[seq.sf_end]}); if (ansi_code_res) { auto ansi_code = ansi_code_res.value(); @@ -215,11 +225,11 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa) break; case 'C': { - auto spaces_res = scn::scan_value( - pi.to_string_view(&caps[1])); + auto spaces_res + = scn::scan_value(seq.to_string_view()); if (spaces_res && spaces_res.value() > 0) { - str.insert((std::string::size_type) caps[0].c_end, + str.insert((std::string::size_type) sf.sf_end, spaces_res.value(), ' '); } @@ -229,13 +239,13 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa) case 'H': { unsigned int row = 0, spaces = 0; - if (scn::scan(pi.to_string_view(&caps[1]), "{};{}", row, spaces) + if (scn::scan(seq.to_string_view(), "{};{}", row, spaces) && spaces > 1) { int ispaces = spaces - 1; - if (ispaces > caps[0].c_begin) { - str.insert((unsigned long) caps[0].c_end, - ispaces - caps[0].c_begin, + if (ispaces > sf.sf_begin) { + str.insert((unsigned long) sf.sf_end, + ispaces - sf.sf_begin, ' '); } } @@ -243,8 +253,7 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa) } case 'O': { - auto role_res - = scn::scan_value(pi.to_string_view(&caps[1])); + auto role_res = scn::scan_value(seq.to_string_view()); if (role_res) { role_t role_tmp = (role_t) role_res.value(); @@ -258,18 +267,18 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa) break; } } - str.erase(str.begin() + caps[0].c_begin, str.begin() + caps[0].c_end); + str.erase(str.begin() + sf.sf_begin, str.begin() + sf.sf_end); if (sa != nullptr) { - shift_string_attrs(*sa, caps[0].c_begin, -caps[0].length()); + shift_string_attrs(*sa, sf.sf_begin, -sf.length()); if (has_attrs) { for (auto rit = sa->rbegin(); rit != sa->rend(); rit++) { if (rit->sa_range.lr_end != -1) { continue; } - rit->sa_range.lr_end = caps[0].c_begin; + rit->sa_range.lr_end = sf.sf_begin; } - lr.lr_start = caps[0].c_begin; + lr.lr_start = sf.sf_begin; lr.lr_end = -1; if (attrs.ta_attrs || attrs.ta_fg_color || attrs.ta_bg_color) { sa->emplace_back(lr, VC_STYLE.value(attrs)); @@ -278,14 +287,13 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa) sa->emplace_back(lr, VC_ROLE.value(r)); }; } - sa->emplace_back(line_range{last_origin_offset_end, caps->c_begin}, + sa->emplace_back(line_range{last_origin_offset_end, sf.sf_begin}, SA_ORIGIN_OFFSET.value(origin_offset)); - last_origin_offset_end = caps->c_begin; - origin_offset += caps->length(); + last_origin_offset_end = sf.sf_begin; + origin_offset += sf.length(); } - pi.reset(str); - pi.pi_next_offset = caps->c_begin; + matcher.reload_input(str, sf.sf_begin); } if (sa != nullptr && last_origin_offset_end > 0) { diff --git a/src/base/attr_line.cc b/src/base/attr_line.cc index 9b928a21..db9c0f17 100644 --- a/src/base/attr_line.cc +++ b/src/base/attr_line.cc @@ -37,7 +37,7 @@ #include "auto_mem.hh" #include "config.h" #include "lnav_log.hh" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" attr_line_t& attr_line_t::with_ansi_string(const char* str, ...) @@ -91,18 +91,19 @@ using chunk = mapbox::util::variant; chunk consume(const string_fragment text) { - static const pcrepp WORD_RE(R"((*UTF)^[^\p{Z}\p{So}\p{C}]+)"); - static const pcrepp SPACE_RE(R"((*UTF)^\s)"); + static const auto WORD_RE + = lnav::pcre2pp::code::from_const(R"((*UTF)^[^\p{Z}\p{So}\p{C}]+)"); + static const auto SPACE_RE + = lnav::pcre2pp::code::from_const(R"((*UTF)^\s)"); if (text.empty()) { return eof{text}; } - pcre_input pi(text); - pcre_context_static<30> pc; - - if (WORD_RE.match(pc, pi, PCRE_NO_UTF8_CHECK)) { - auto split_res = text.split_n(pc.all()->length()).value(); + auto word_find_res + = WORD_RE.find_in(text, PCRE2_NO_UTF_CHECK).ignore_error(); + if (word_find_res) { + auto split_res = text.split_n(word_find_res->f_all.length()).value(); return word{split_res.first, split_res.second}; } @@ -113,8 +114,10 @@ consume(const string_fragment text) return space{split_res.first, split_res.second}; } - if (SPACE_RE.match(pc, pi, PCRE_NO_UTF8_CHECK)) { - auto split_res = text.split_n(pc.all()->length()).value(); + auto space_find_res + = SPACE_RE.find_in(text, PCRE2_NO_UTF_CHECK).ignore_error(); + if (space_find_res) { + auto split_res = text.split_n(space_find_res->f_all.length()).value(); return space{split_res.first, split_res.second}; } @@ -184,8 +187,6 @@ attr_line_t::insert(size_t index, return *this; } - static const pcrepp SPACE_RE(R"(\s?)"); - auto starting_line_index = this->al_string.rfind('\n', index); if (starting_line_index == std::string::npos) { starting_line_index = 0; diff --git a/src/base/attr_line.hh b/src/base/attr_line.hh index 87a0c108..e19e157d 100644 --- a/src/base/attr_line.hh +++ b/src/base/attr_line.hh @@ -70,7 +70,8 @@ struct line_range { bool empty() const { return this->length() == 0; } - void clear() { + void clear() + { this->lr_start = -1; this->lr_end = -1; } @@ -163,6 +164,12 @@ struct line_range { } }; +inline line_range +to_line_range(const string_fragment& frag) +{ + return line_range{frag.sf_begin, frag.sf_end}; +} + struct string_attr { string_attr(const struct line_range& lr, const string_attr_pair& value) : sa_range(lr), sa_type(value.first), sa_value(value.second) diff --git a/src/base/auto_mem.hh b/src/base/auto_mem.hh index e0d3122d..e6b456c4 100644 --- a/src/base/auto_mem.hh +++ b/src/base/auto_mem.hh @@ -86,6 +86,8 @@ public: ~auto_mem() { this->reset(); } + bool empty() const { return this->am_ptr == nullptr; } + operator T*() const { return this->am_ptr; } T* operator->() { return this->am_ptr; } diff --git a/src/base/humanize.network.cc b/src/base/humanize.network.cc index 004a435a..2bf390d7 100644 --- a/src/base/humanize.network.cc +++ b/src/base/humanize.network.cc @@ -30,39 +30,44 @@ #include "humanize.network.hh" #include "config.h" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" namespace humanize { namespace network { namespace path { nonstd::optional<::network::path> -from_str(const char* str) +from_str(string_fragment sf) { - static const pcrepp REMOTE_PATTERN( - "(?:(?[\\w\\._\\-]+)@)?" + static const auto REMOTE_PATTERN = lnav::pcre2pp::code::from_const( + "^(?:(?[\\w\\._\\-]+)@)?" "(?:\\[(?[^\\]]+)\\]|(?[^\\[/:]+)):" - "(?.*)"); + "(?.*)$"); + static thread_local auto REMOTE_MATCH_DATA + = REMOTE_PATTERN.create_match_data(); - pcre_context_static<30> pc; - pcre_input pi(str); + auto match_res = REMOTE_PATTERN.capture_from(sf) + .into(REMOTE_MATCH_DATA) + .matches() + .ignore_error(); - if (!REMOTE_PATTERN.match(pc, pi)) { + if (!match_res) { return nonstd::nullopt; } - const auto username = pi.get_substr_opt(pc["username"]); - const auto ipv6 = pi.get_substr_opt(pc["ipv6"]); - const auto hostname = pi.get_substr_opt(pc["hostname"]); + const auto username = REMOTE_MATCH_DATA["username"].map( + [](auto sf) { return sf.to_string(); }); + const auto ipv6 = REMOTE_MATCH_DATA["ipv6"]; + const auto hostname = REMOTE_MATCH_DATA["hostname"]; const auto locality_hostname = ipv6 ? ipv6.value() : hostname.value(); - auto path = pi.get_substr(pc["path"]); + auto path = *REMOTE_MATCH_DATA["path"]; if (path.empty()) { - path = "."; + path = string_fragment::from_const("."); } return ::network::path{ - {username, locality_hostname, nonstd::nullopt}, - path, + {username, locality_hostname.to_string(), nonstd::nullopt}, + path.to_string(), }; } diff --git a/src/base/humanize.network.hh b/src/base/humanize.network.hh index 08563f76..609f57dd 100644 --- a/src/base/humanize.network.hh +++ b/src/base/humanize.network.hh @@ -33,6 +33,7 @@ #include #include "fmt/format.h" +#include "intern_string.hh" #include "network.tcp.hh" #include "optional.hpp" @@ -99,13 +100,7 @@ namespace humanize { namespace network { namespace path { -nonstd::optional<::network::path> from_str(const char* str); - -inline nonstd::optional<::network::path> -from_str(const std::string& str) -{ - return from_str(str.c_str()); -} +nonstd::optional<::network::path> from_str(string_fragment sf); } // namespace path } // namespace network diff --git a/src/base/humanize.network.tests.cc b/src/base/humanize.network.tests.cc index 3cbfcb88..fe2e9d2b 100644 --- a/src/base/humanize.network.tests.cc +++ b/src/base/humanize.network.tests.cc @@ -36,17 +36,19 @@ TEST_CASE("humanize::network::path") { { - auto rp_opt = humanize::network::path::from_str("foobar"); + auto rp_opt = humanize::network::path::from_str( + string_fragment::from_const("foobar")); CHECK(!rp_opt); } { - auto rp_opt = humanize::network::path::from_str("dean@foobar/bar"); + auto rp_opt = humanize::network::path::from_str( + string_fragment::from_const("dean@foobar/bar")); CHECK(!rp_opt); } { auto rp_opt = humanize::network::path::from_str( - "dean@host1.example.com:/var/log"); + string_fragment::from_const("dean@host1.example.com:/var/log")); CHECK(rp_opt.has_value()); auto rp = *rp_opt; @@ -58,8 +60,9 @@ TEST_CASE("humanize::network::path") } { - auto rp_opt = humanize::network::path::from_str( - "dean@[fe80::184f:c67:baf1:fe02%en0]:/var/log"); + auto rp_opt + = humanize::network::path::from_str(string_fragment::from_const( + "dean@[fe80::184f:c67:baf1:fe02%en0]:/var/log")); CHECK(rp_opt.has_value()); auto rp = *rp_opt; @@ -74,8 +77,9 @@ TEST_CASE("humanize::network::path") } { - auto rp_opt = humanize::network::path::from_str( - "[fe80::184f:c67:baf1:fe02%en0]:/var/log"); + auto rp_opt + = humanize::network::path::from_str(string_fragment::from_const( + "[fe80::184f:c67:baf1:fe02%en0]:/var/log")); CHECK(rp_opt.has_value()); auto rp = *rp_opt; @@ -89,8 +93,8 @@ TEST_CASE("humanize::network::path") } { - auto rp_opt - = humanize::network::path::from_str("host1.example.com:/var/log"); + auto rp_opt = humanize::network::path::from_str( + string_fragment::from_const("host1.example.com:/var/log")); CHECK(rp_opt.has_value()); auto rp = *rp_opt; @@ -101,7 +105,8 @@ TEST_CASE("humanize::network::path") } { - auto rp_opt = humanize::network::path::from_str("host1.example.com:"); + auto rp_opt = humanize::network::path::from_str( + string_fragment::from_const("host1.example.com:")); CHECK(rp_opt.has_value()); auto rp = *rp_opt; diff --git a/src/base/intern_string.hh b/src/base/intern_string.hh index 3493bbce..9ed098da 100644 --- a/src/base/intern_string.hh +++ b/src/base/intern_string.hh @@ -32,6 +32,7 @@ #ifndef intern_string_hh #define intern_string_hh +#include #include #include @@ -48,9 +49,17 @@ struct string_fragment { using iterator = const char*; + static string_fragment invalid() + { + string_fragment retval; + + retval.invalidate(); + return retval; + } + static string_fragment from_c_str(const char* str) { - return string_fragment{str, 0, (int) strlen(str)}; + return string_fragment{str, 0, str != nullptr ? (int) strlen(str) : 0}; } template @@ -130,6 +139,11 @@ struct string_fragment { const char* data() const { return &this->sf_string[this->sf_begin]; } + const unsigned char* udata() const + { + return (const unsigned char*) &this->sf_string[this->sf_begin]; + } + char front() const { return this->sf_string[this->sf_begin]; } uint32_t front_codepoint() const @@ -252,6 +266,12 @@ struct string_fragment { this->sf_string, this->sf_begin + begin, this->sf_end}; } + string_fragment sub_range(int begin, int end) const + { + return string_fragment{ + this->sf_string, this->sf_begin + begin, this->sf_begin + end}; + } + nonstd::optional find(char ch) const { for (int lpc = this->sf_begin; lpc < this->sf_end; lpc++) { @@ -521,12 +541,25 @@ operator<(const char* left, const string_fragment& right) return rc < 0; } +inline void +operator+=(std::string& left, const string_fragment& right) +{ + left.append(right.data(), right.length()); +} + inline bool operator<(const string_fragment& left, const char* right) { return strncmp(left.data(), right, left.length()) < 0; } +inline std::ostream& +operator<<(std::ostream& os, const string_fragment& sf) +{ + os.write(sf.data(), sf.length()); + return os; +} + class intern_string { public: static const intern_string* lookup(const char* str, ssize_t len) noexcept; diff --git a/src/base/lnav.console.cc b/src/base/lnav.console.cc index 8eeeb024..a34ebac0 100644 --- a/src/base/lnav.console.cc +++ b/src/base/lnav.console.cc @@ -34,7 +34,10 @@ #include "config.h" #include "fmt/color.h" #include "itertools.hh" +#include "lnav.console.into.hh" #include "log_level_enum.hh" +#include "pcrepp/pcre2pp.hh" +#include "snippet_highlighters.hh" #include "view_curses.hh" using namespace lnav::roles::literals; @@ -462,5 +465,30 @@ print(FILE* file, const user_message& um) println(file, al); } +user_message +to_user_message(intern_string_t src, const lnav::pcre2pp::compile_error& ce) +{ + attr_line_t pcre_error_content{ce.ce_pattern}; + + lnav::snippets::regex_highlighter(pcre_error_content, + pcre_error_content.length(), + line_range{ + 0, + (int) pcre_error_content.length(), + }); + pcre_error_content.append("\n") + .append(ce.ce_offset, ' ') + .append(lnav::roles::error("^ ")) + .append(lnav::roles::error(ce.get_message())) + .with_attr_for_all(VC_ROLE.value(role_t::VCR_QUOTED_CODE)); + + return user_message::error( + attr_line_t() + .append_quoted(ce.ce_pattern) + .append(" is not a valid regular expression")) + .with_reason(ce.get_message()) + .with_snippet(lnav::console::snippet::from(src, pcre_error_content)); +} + } // namespace console } // namespace lnav diff --git a/src/base/lnav.console.into.hh b/src/base/lnav.console.into.hh new file mode 100644 index 00000000..206d5634 --- /dev/null +++ b/src/base/lnav.console.into.hh @@ -0,0 +1,51 @@ +/** + * Copyright (c) 2022, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef lnav_console_into_hh +#define lnav_console_into_hh + +#include "intern_string.hh" +#include "lnav.console.hh" + +namespace lnav { +namespace pcre2pp { + +struct compile_error; + +} + +namespace console { + +user_message to_user_message(intern_string_t src, + const pcre2pp::compile_error& ce); + +} +} // namespace lnav + +#endif diff --git a/src/base/lnav_log.cc b/src/base/lnav_log.cc index d1114f84..c1b08c41 100644 --- a/src/base/lnav_log.cc +++ b/src/base/lnav_log.cc @@ -56,20 +56,14 @@ #include #include +#define PCRE2_CODE_UNIT_WIDTH 8 +#include #include #include #include #include #include -#ifdef HAVE_PCRE_H -# include -#elif HAVE_PCRE_PCRE_H -# include -#else -# error "pcre.h not found?" -#endif - #if defined HAVE_NCURSESW_CURSES_H # include # include @@ -215,14 +209,14 @@ void log_host_info() { char cwd[MAXPATHLEN]; - const char* jittarget; + char jittarget[128]; struct utsname un; struct rusage ru; - int pcre_jit; + uint32_t pcre_jit; uname(&un); - pcre_config(PCRE_CONFIG_JIT, &pcre_jit); - pcre_config(PCRE_CONFIG_JITTARGET, &jittarget); + pcre2_config(PCRE2_CONFIG_JIT, &pcre_jit); + pcre2_config(PCRE2_CONFIG_JITTARGET, jittarget); log_info("uname:"); log_info(" sysname=%s", un.sysname); diff --git a/src/base/snippet_highlighters.cc b/src/base/snippet_highlighters.cc index 5cec8d74..058fa41f 100644 --- a/src/base/snippet_highlighters.cc +++ b/src/base/snippet_highlighters.cc @@ -30,7 +30,7 @@ #include "snippet_highlighters.hh" #include "attr_line.builder.hh" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "view_curses.hh" namespace lnav { @@ -225,21 +225,24 @@ regex_highlighter(attr_line_t& al, int x, line_range sub) break; } case '>': { - static const pcrepp CAP_RE(R"(\(\?\<\w+$)"); + static const auto CAP_RE + = lnav::pcre2pp::code::from_const(R"(\(\?\<\w+$)"); auto capture_start = string_fragment::from_str_range( line, sub.lr_start, lpc) .find_left_boundary(lpc - sub.lr_start - 1, string_fragment::tag1{'('}); - pcre_context_static<30> pc; - pcre_input pi(capture_start); - if (CAP_RE.match(pc, pi)) { + auto cap_find_res + = CAP_RE.find_in(capture_start).ignore_error(); + + if (cap_find_res) { alb.overlay_attr( - line_range( - capture_start.sf_begin + pc.all()->c_begin + 3, - capture_start.sf_begin + pc.all()->c_end), + line_range(capture_start.sf_begin + + cap_find_res->f_all.sf_begin + 3, + capture_start.sf_begin + + cap_find_res->f_all.sf_end), VC_ROLE.value(role_t::VCR_IDENTIFIER)); alb.overlay_attr(line_range(lpc, lpc + 1), VC_ROLE.value(role_t::VCR_RE_SPECIAL)); diff --git a/src/data_parser.cc b/src/data_parser.cc index bfeca34e..902bdb6c 100644 --- a/src/data_parser.cc +++ b/src/data_parser.cc @@ -41,10 +41,10 @@ data_format data_parser::FORMAT_PLAIN("plain", DT_INVALID, DT_INVALID); data_parser::data_parser(data_scanner* ds) : dp_errors("dp_errors", __FILE__, __LINE__), dp_pairs("dp_pairs", __FILE__, __LINE__), dp_msg_format(nullptr), - dp_msg_format_begin(ds->get_input().pi_offset), dp_scanner(ds) + dp_msg_format_begin(ds->get_init_offset()), dp_scanner(ds) { if (TRACE_FILE != nullptr) { - fprintf(TRACE_FILE, "input %s\n", ds->get_input().get_string()); + fprintf(TRACE_FILE, "input %s\n", ds->get_input().to_string().c_str()); } } @@ -110,7 +110,8 @@ data_parser::pairup(data_parser::schema_id_t* schema, key_comps.POP_FRONT(); found = true; } else if (key_iter->e_token - == in_list.el_format.df_terminator) { + == in_list.el_format.df_terminator) + { std::vector key_copy; value.SPLICE(value.end(), @@ -259,17 +260,18 @@ data_parser::pairup(data_parser::schema_id_t* schema, if (!has_value) { element_list_t ELEMENT_LIST_T(blank_value); - pcre_input& pi = this->dp_scanner->get_input(); - const char* str = pi.get_string(); struct element blank; blank.e_token = DT_QUOTED_STRING; blank.e_capture.c_begin = blank.e_capture.c_end = pair_subs.front().e_capture.c_end; - if ((blank.e_capture.c_begin >= 0) - && ((size_t) blank.e_capture.c_begin < pi.pi_length)) + if (blank.e_capture.c_begin >= 0 + && blank.e_capture.c_begin + < this->dp_scanner->get_input().sf_end) { - switch (str[blank.e_capture.c_begin]) { + switch (this->dp_scanner->to_string_fragment(blank.e_capture) + .front()) + { case '=': case ':': blank.e_capture.c_begin += 1; @@ -387,23 +389,23 @@ data_parser::pairup(data_parser::schema_id_t* schema, } if (schema != nullptr && this->dp_msg_format != nullptr) { - pcre_input& pi = this->dp_scanner->get_input(); for (auto& fiter : pairs_out) { *(this->dp_msg_format) += this->get_string_up_to_value(fiter); this->dp_msg_format->append("#"); } - if ((size_t) this->dp_msg_format_begin < pi.pi_length) { - const char* str = pi.get_string(); - pcre_context::capture_t last(this->dp_msg_format_begin, - pi.pi_length); + if ((size_t) this->dp_msg_format_begin + < this->dp_scanner->get_input().length()) + { + auto last = this->dp_scanner->get_input().substr( + this->dp_msg_format_begin); - switch (str[last.c_begin]) { + switch (last.front()) { case '\'': case '"': - last.c_begin += 1; + last.sf_begin += 1; break; } - *(this->dp_msg_format) += pi.get_substr(&last); + *(this->dp_msg_format) += last.to_string(); } } @@ -415,21 +417,20 @@ data_parser::pairup(data_parser::schema_id_t* schema, void data_parser::discover_format() { - pcre_context_static<30> pc; std::stack state_stack; - struct element elem; - this->dp_group_token.push_back(DT_INVALID); this->dp_group_stack.resize(1); state_stack.push(discover_format_state()); - while (this->dp_scanner->tokenize2(pc, elem.e_token)) { - pcre_context::iterator pc_iter; - - pc_iter = std::find_if(pc.begin(), pc.end(), capture_if_not(-1)); - require(pc_iter != pc.end()); + while (true) { + auto tok_res = this->dp_scanner->tokenize2(); + if (!tok_res) { + break; + } - elem.e_capture = *pc_iter; + element elem; + elem.e_token = tok_res->tr_token; + elem.e_capture = tok_res->tr_inner_capture; require(elem.e_capture.c_begin >= 0); require(elem.e_capture.c_end >= 0); @@ -598,22 +599,19 @@ data_parser::parse() std::string data_parser::get_element_string(const data_parser::element& elem) const { - pcre_input& pi = this->dp_scanner->get_input(); - - return pi.get_substr(&elem.e_capture); + return this->dp_scanner->to_string_fragment(elem.e_capture).to_string(); } std::string data_parser::get_string_up_to_value(const data_parser::element& elem) { - pcre_input& pi = this->dp_scanner->get_input(); const element& val_elem = elem.e_token == DNT_PAIR ? elem.e_sub_elements->back() : elem; if (this->dp_msg_format_begin <= val_elem.e_capture.c_begin) { - pcre_context::capture_t leading_and_key = pcre_context::capture_t( + auto leading_and_key = data_scanner::capture_t( this->dp_msg_format_begin, val_elem.e_capture.c_begin); - const char* str = pi.get_string(); + auto str = this->dp_scanner->get_input().data(); if (leading_and_key.length() >= 2) { switch (str[leading_and_key.c_end - 1]) { case '\'': @@ -635,7 +633,8 @@ data_parser::get_string_up_to_value(const data_parser::element& elem) } } this->dp_msg_format_begin = val_elem.e_capture.c_end; - return pi.get_substr(&leading_and_key); + return this->dp_scanner->to_string_fragment(leading_and_key) + .to_string(); } else { this->dp_msg_format_begin = val_elem.e_capture.c_end; } @@ -646,19 +645,18 @@ const char* data_parser::get_element_string(const data_parser::element& elem, size_t& len_out) { - pcre_input& pi = this->dp_scanner->get_input(); - len_out = elem.e_capture.length(); - return pi.get_substr_start(&elem.e_capture); + return this->dp_scanner->to_string_fragment(elem.e_capture).data(); } void data_parser::print(FILE* out, data_parser::element_list_t& el) { - fprintf( - out, " %s\n", this->dp_scanner->get_input().get_string()); + fprintf(out, + " %s\n", + this->dp_scanner->get_input().to_string().c_str()); for (auto& iter : el) { - iter.print(out, this->dp_scanner->get_input()); + iter.print(out, *this->dp_scanner); } } @@ -939,7 +937,8 @@ data_parser::element::value_token() const if (this->e_token == DNT_VALUE) { if (this->e_sub_elements != nullptr - && this->e_sub_elements->size() == 1) { + && this->e_sub_elements->size() == 1) + { retval = this->e_sub_elements->front().e_token; } else { retval = DT_SYMBOL; @@ -955,7 +954,8 @@ data_parser::element::get_value_elem() const { if (this->e_token == DNT_VALUE) { if (this->e_sub_elements != nullptr - && this->e_sub_elements->size() == 1) { + && this->e_sub_elements->size() == 1) + { return this->e_sub_elements->front(); } } @@ -972,13 +972,13 @@ data_parser::element::get_pair_elem() const } void -data_parser::element::print(FILE* out, pcre_input& pi, int offset) const +data_parser::element::print(FILE* out, data_scanner& ds, int offset) const { int lpc; if (this->e_sub_elements != nullptr) { for (auto& e_sub_element : *this->e_sub_elements) { - e_sub_element.print(out, pi, offset + 1); + e_sub_element.print(out, ds, offset + 1); } } @@ -998,11 +998,11 @@ data_parser::element::print(FILE* out, pcre_input& pi, int offset) const fputc(' ', out); } } - for (; lpc < (int) pi.pi_length; lpc++) { + for (; lpc < (int) ds.get_input().length(); lpc++) { fputc(' ', out); } - std::string sub = pi.get_substr(&this->e_capture); + std::string sub = ds.to_string_fragment(this->e_capture).to_string(); fprintf(out, " %s\n", sub.c_str()); } diff --git a/src/data_parser.hh b/src/data_parser.hh index 1cb66dc3..e340245c 100644 --- a/src/data_parser.hh +++ b/src/data_parser.hh @@ -40,7 +40,6 @@ #include "base/lnav_log.hh" #include "byte_array.hh" #include "data_scanner.hh" -#include "pcrepp/pcrepp.hh" #define ELEMENT_LIST_T(var) var("" #var, __FILE__, __LINE__, group_depth) #define PUSH_FRONT(elem) push_front(elem, __FILE__, __LINE__) @@ -334,9 +333,9 @@ public: const element& get_pair_elem() const; - void print(FILE* out, pcre_input& pi, int offset = 0) const; + void print(FILE* out, data_scanner&, int offset = 0) const; - pcre_context::capture_t e_capture; + data_scanner::capture_t e_capture; data_token_t e_token; element_list_t* e_sub_elements; diff --git a/src/data_scanner.cc b/src/data_scanner.cc index 54be7528..4350d04e 100644 --- a/src/data_scanner.cc +++ b/src/data_scanner.cc @@ -29,228 +29,165 @@ #include "data_scanner.hh" -#include -#include -#include - #include "config.h" -#include "pcrepp/pcrepp.hh" + +void +data_scanner::capture_t::ltrim(const char* str) +{ + while (this->c_begin < this->c_end && isspace(str[this->c_begin])) { + this->c_begin += 1; + } +} static struct { const char* name; - pcrepp pcre; } MATCHERS[DT_TERMINAL_MAX] = { { "quot", - pcrepp("\\A(?:(?:u|r)?\"((?:\\\\.|[^\"])+)\"|" - "(?:u|r)?'((?:\\\\.|[^'])+)')"), }, { "url", - pcrepp("\\A([\\w]+://[^\\s'\"\\[\\](){}]+[/a-zA-Z0-9\\-=&])"), }, { "path", - pcrepp("\\A((?:/|\\./|\\.\\./)[\\w\\.\\-_\\~/]*)"), }, { "mac", - pcrepp( - "\\A([0-9a-fA-F][0-9a-fA-F](?::[0-9a-fA-F][0-9a-fA-F]){5})(?!:)"), }, { "date", - pcrepp("\\A(" - "\\d{4}/\\d{1,2}/\\d{1,2}|" - "\\d{4}-\\d{1,2}-\\d{1,2}|" - "\\d{2}/\\w{3}/\\d{4}" - ")T?"), }, { "time", - pcrepp("\\A([\\s\\d]\\d:\\d\\d(?:(?!:\\d)|:\\d\\d(?:[\\.,]\\d{3,6})?Z?)" - ")\\b"), }, /* { "qual", pcrepp("\\A([^\\s:=]+:[^\\s:=,]+(?!,)(?::[^\\s:=,]+)*)"), }, */ { "ipv6", - pcrepp("\\A(::|[:\\da-fA-F\\.]+[a-fA-F\\d](?:%\\w+)?)"), }, { "hexd", - pcrepp("\\A([0-9a-fA-F][0-9a-fA-F](?::[0-9a-fA-F][0-9a-fA-F])+)"), }, { "xmld", - pcrepp("\\A(]+)" - "))*\\s*>)"), }, { "xmlt", - pcrepp("\\A(<\\??[\\w:]+\\s*(?:[\\w:]+(?:\\s*=\\s*" - "(?:\"((?:\\\\.|[^\"])+)\"|'((?:\\\\.|[^'])+)'|[^>]+)" - "))*\\s*(?:/|\\?)>)"), }, { "xmlo", - pcrepp("\\A(<[\\w:]+\\s*(?:[\\w:]+(?:\\s*=\\s*" - "(?:\"((?:\\\\.|[^\"])+)\"|'((?:\\\\.|[^'])+)'|[^>]+)" - "))*\\s*>)"), }, { "xmlc", - pcrepp("\\A()"), }, { "h1", - pcrepp("\\A([A-Z \\-])"), }, { "h2", - pcrepp("\\A([A-Z \\-])"), }, { "h3", - pcrepp("\\A([A-Z \\-])"), }, { "coln", - pcrepp("\\A(:)"), }, { "eq", - pcrepp("\\A(=)"), }, { "comm", - pcrepp("\\A(,)"), }, { "semi", - pcrepp("\\A(;)"), }, { "empt", - pcrepp("\\A(\\(\\)|\\{\\}|\\[\\])"), }, { "lcurly", - pcrepp("\\A({)"), }, { "rcurly", - pcrepp("\\A(})"), }, { "lsquare", - pcrepp("\\A(\\[)"), }, { "rsquare", - pcrepp("\\A(\\])"), }, { "lparen", - pcrepp("\\A(\\()"), }, { "rparen", - pcrepp("\\A(\\))"), }, { "langle", - pcrepp("\\A(\\<)"), }, { "rangle", - pcrepp("\\A(\\>)"), }, { "ipv4", - pcrepp("\\A(" - "(?:(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\\.){3}" - "(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])(?![\\d]))"), }, { "uuid", - pcrepp("\\A([0-9a-fA-F]{8}(?:-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12})"), }, { "vers", - pcrepp("\\A(" - "[0-9]+(?:\\.[0-9]+\\w*){2,}(?:-\\w+)?|" - "[0-9]+(?:\\.[0-9]+\\w*)+(?\\~`\\|\\\\]+" - "(?:::[^\";\\s:=,\\(\\)\\{\\}\\[\\]\\+#!@%\\^&\\*'\\?<>\\~`\\|\\\\]" - "+)*)"), }, { "line", - pcrepp("\\A(\r?\n|\r|;)"), }, { "wspc", - pcrepp("\\A([ \\r\\t\\n]+)"), }, { "dot", - pcrepp("\\A(\\.)"), }, { "escc", - pcrepp("\\A(\\\\\\.)"), }, { "gbg", - pcrepp("\\A(.)"), }, }; @@ -272,11 +209,12 @@ data_scanner::token2name(data_token_t token) { if (token < 0) { return "inv"; - } else if (token < DT_TERMINAL_MAX) { + } + if (token < DT_TERMINAL_MAX) { return MATCHERS[token].name; - } else if (token == DT_ANY) { + } + if (token == DT_ANY) { return "any"; - } else { - return DNT_NAMES[token - DNT_KEY]; } + return DNT_NAMES[token - DNT_KEY]; } diff --git a/src/data_scanner.hh b/src/data_scanner.hh index 48c45a37..e694a667 100644 --- a/src/data_scanner.hh +++ b/src/data_scanner.hh @@ -32,7 +32,7 @@ #include -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "shared_buffer.hh" enum data_token_t { @@ -118,47 +118,90 @@ class data_scanner { public: static const char* token2name(data_token_t token); - data_scanner(const std::string& line, - size_t off = 0, - size_t len = (size_t) -1) - : ds_line(line), ds_pcre_input(ds_line.c_str(), off, len) + struct capture_t { + capture_t() + { /* We don't initialize anything since it's a perf hit. */ + } + + capture_t(int begin, int end) : c_begin(begin), c_end(end) + { + assert(begin <= end); + } + + int c_begin; + int c_end; + + void ltrim(const char* str); + + bool contains(int pos) const + { + return this->c_begin <= pos && pos < this->c_end; + } + + bool is_valid() const { return this->c_begin != -1; } + + int length() const { return this->c_end - this->c_begin; } + + bool empty() const { return this->c_begin == this->c_end; } + }; + + data_scanner(const std::string& line, size_t off = 0) + : ds_line(line), ds_input(this->ds_line), ds_init_offset(off), + ds_next_offset(off) { - if (!line.empty() && line[line.length() - 1] == '.') { - this->ds_pcre_input.pi_length -= 1; + if (!line.empty() && line.back() == '.') { + this->ds_input.sf_end -= 1; } } - explicit data_scanner(string_fragment sf) : ds_pcre_input(sf) + explicit data_scanner(string_fragment sf) : ds_input(sf) { - if (!sf.empty() && sf[sf.length() - 1] == '.') { - this->ds_pcre_input.pi_length -= 1; + if (!sf.empty() && sf.back() == '.') { + this->ds_input.sf_end -= 1; } } - data_scanner(shared_buffer_ref& line, - size_t off = 0, - size_t len = (size_t) -1) - : ds_sbr(line), - ds_pcre_input( - line.get_data(), off, len == (size_t) -1 ? line.length() : len) + explicit data_scanner(shared_buffer_ref& line, size_t off, size_t end) + : ds_sbr(line), ds_input(line.to_string_fragment().sub_range(0, end)), + ds_init_offset(off), ds_next_offset(off) { - require(len == (size_t) -1 || len <= line.length()); - if (line.length() > 0 && line.get_data()[line.length() - 1] == '.') { - this->ds_pcre_input.pi_length -= 1; + if (!this->ds_input.empty() && this->ds_input.back() == '.') { + this->ds_input.sf_end -= 1; } } - bool tokenize(pcre_context& pc, data_token_t& token_out); - bool tokenize2(pcre_context& pc, data_token_t& token_out); + struct tokenize_result { + data_token_t tr_token{DT_INVALID}; + capture_t tr_capture; + capture_t tr_inner_capture; + const char* tr_data{nullptr}; + + std::string to_string() const + { + return {&this->tr_data[this->tr_capture.c_begin], + (size_t) this->tr_capture.length()}; + } + }; + + nonstd::optional tokenize2(); - pcre_input& get_input() { return this->ds_pcre_input; } + void reset() { this->ds_next_offset = this->ds_init_offset; } - void reset() { this->ds_pcre_input.reset_next_offset(); } + int get_init_offset() const { return this->ds_init_offset; } + + string_fragment get_input() const { return this->ds_input; } + + string_fragment to_string_fragment(capture_t cap) const + { + return this->ds_input.sub_range(cap.c_begin, cap.c_end); + } private: std::string ds_line; shared_buffer_ref ds_sbr; - pcre_input ds_pcre_input; + string_fragment ds_input; + int ds_init_offset{0}; + int ds_next_offset{0}; }; #endif diff --git a/src/data_scanner_re.cc b/src/data_scanner_re.cc index 201c5867..68ee9d4b 100644 --- a/src/data_scanner_re.cc +++ b/src/data_scanner_re.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 3.0 on Mon Aug 22 22:00:24 2022 */ +/* Generated by re2c 3.0 on Fri Sep 9 19:37:44 2022 */ #line 1 "../../lnav/src/data_scanner_re.re" /** * Copyright (c) 2015, Timothy Stack @@ -36,26 +36,28 @@ #include "config.h" #include "data_scanner.hh" -bool data_scanner::tokenize2(pcre_context &pc, data_token_t &token_out) +nonstd::optional data_scanner::tokenize2() { + data_token_t token_out = DT_INVALID; + capture_t cap_all; + capture_t cap_inner; # define YYCTYPE unsigned char # define CAPTURE(tok) { \ if (YYCURSOR.val == EMPTY) { \ - pi.pi_next_offset = pi.pi_length; \ + this->ds_next_offset = this->ds_input.length(); \ } else { \ - pi.pi_next_offset = YYCURSOR.val - (const unsigned char *) pi.get_string(); \ + this->ds_next_offset = YYCURSOR.val - this->ds_input.udata(); \ } \ - cap[0].c_end = pi.pi_next_offset; \ - cap[1].c_end = pi.pi_next_offset; \ + cap_all.c_end = this->ds_next_offset; \ + cap_inner.c_end = this->ds_next_offset; \ token_out = tok; \ } # define RET(tok) { \ CAPTURE(tok); \ - return true; \ + return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()}; \ } static const unsigned char *EMPTY = (const unsigned char *) ""; - pcre_input &pi = this->ds_pcre_input; struct _YYCURSOR { YYCTYPE operator*() const { if (this->val < this->lim) { @@ -93,22 +95,20 @@ bool data_scanner::tokenize2(pcre_context &pc, data_token_t &token_out) const YYCTYPE *val{nullptr}; const YYCTYPE *lim{nullptr}; } YYCURSOR; - YYCURSOR = (const unsigned char *) pi.get_string() + pi.pi_next_offset; + YYCURSOR = (const unsigned char *) this->ds_input.udata() + this->ds_next_offset; _YYCURSOR yyt1; _YYCURSOR yyt2; _YYCURSOR yyt3; _YYCURSOR yyt4; - const YYCTYPE *YYLIMIT = (const unsigned char *) pi.get_string() + pi.pi_length; + const YYCTYPE *YYLIMIT = (const unsigned char *) this->ds_input.end(); const YYCTYPE *YYMARKER = YYCURSOR; - pcre_context::capture_t *cap = pc.all(); YYCURSOR.lim = YYLIMIT; - pc.set_count(2); - cap[0].c_begin = pi.pi_next_offset; - cap[0].c_end = pi.pi_next_offset; - cap[1].c_begin = pi.pi_next_offset; - cap[1].c_end = pi.pi_next_offset; + cap_all.c_begin = this->ds_next_offset; + cap_all.c_end = this->ds_next_offset; + cap_inner.c_begin = this->ds_next_offset; + cap_inner.c_end = this->ds_next_offset; #line 115 "data_scanner_re.cc" @@ -561,7 +561,7 @@ yy1: yy2: ++YYCURSOR; #line 138 "../../lnav/src/data_scanner_re.re" - { return false; } + { return nonstd::nullopt; } #line 566 "data_scanner_re.cc" yy3: yyaccept = 0; @@ -1867,15 +1867,15 @@ yy70: #line 140 "../../lnav/src/data_scanner_re.re" { CAPTURE(DT_QUOTED_STRING); - switch (pi.get_string()[cap[1].c_begin]) { + switch (this->ds_input[cap_inner.c_begin]) { case 'u': case 'r': - cap[1].c_begin += 1; + cap_inner.c_begin += 1; break; } - cap[1].c_begin += 1; - cap[1].c_end -= 1; - return true; + cap_inner.c_begin += 1; + cap_inner.c_end -= 1; + return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()}; } #line 1881 "data_scanner_re.cc" yy71: @@ -4244,15 +4244,15 @@ yy155: #line 155 "../../lnav/src/data_scanner_re.re" { CAPTURE(DT_QUOTED_STRING); - switch (pi.get_string()[cap[1].c_begin]) { + switch (this->ds_input[cap_inner.c_begin]) { case 'u': case 'r': - cap[1].c_begin += 1; + cap_inner.c_begin += 1; break; } - cap[1].c_begin += 1; - cap[1].c_end -= 1; - return true; + cap_inner.c_begin += 1; + cap_inner.c_end -= 1; + return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()}; } #line 4258 "data_scanner_re.cc" yy156: @@ -11090,7 +11090,7 @@ yy347: yy348: #line 171 "../../lnav/src/data_scanner_re.re" { - if ((YYCURSOR - (const unsigned char *) pi.get_string()) == 17) { + if ((YYCURSOR - this->ds_input.udata()) == 17) { RET(DT_MAC_ADDRESS); } else { RET(DT_HEX_DUMP); diff --git a/src/data_scanner_re.re b/src/data_scanner_re.re index f8491e89..4d47ee70 100644 --- a/src/data_scanner_re.re +++ b/src/data_scanner_re.re @@ -34,26 +34,28 @@ #include "config.h" #include "data_scanner.hh" -bool data_scanner::tokenize2(pcre_context &pc, data_token_t &token_out) +nonstd::optional data_scanner::tokenize2() { + data_token_t token_out = DT_INVALID; + capture_t cap_all; + capture_t cap_inner; # define YYCTYPE unsigned char # define CAPTURE(tok) { \ if (YYCURSOR.val == EMPTY) { \ - pi.pi_next_offset = pi.pi_length; \ + this->ds_next_offset = this->ds_input.length(); \ } else { \ - pi.pi_next_offset = YYCURSOR.val - (const unsigned char *) pi.get_string(); \ + this->ds_next_offset = YYCURSOR.val - this->ds_input.udata(); \ } \ - cap[0].c_end = pi.pi_next_offset; \ - cap[1].c_end = pi.pi_next_offset; \ + cap_all.c_end = this->ds_next_offset; \ + cap_inner.c_end = this->ds_next_offset; \ token_out = tok; \ } # define RET(tok) { \ CAPTURE(tok); \ - return true; \ + return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()}; \ } static const unsigned char *EMPTY = (const unsigned char *) ""; - pcre_input &pi = this->ds_pcre_input; struct _YYCURSOR { YYCTYPE operator*() const { if (this->val < this->lim) { @@ -91,22 +93,20 @@ bool data_scanner::tokenize2(pcre_context &pc, data_token_t &token_out) const YYCTYPE *val{nullptr}; const YYCTYPE *lim{nullptr}; } YYCURSOR; - YYCURSOR = (const unsigned char *) pi.get_string() + pi.pi_next_offset; + YYCURSOR = (const unsigned char *) this->ds_input.udata() + this->ds_next_offset; _YYCURSOR yyt1; _YYCURSOR yyt2; _YYCURSOR yyt3; _YYCURSOR yyt4; - const YYCTYPE *YYLIMIT = (const unsigned char *) pi.get_string() + pi.pi_length; + const YYCTYPE *YYLIMIT = (const unsigned char *) this->ds_input.end(); const YYCTYPE *YYMARKER = YYCURSOR; - pcre_context::capture_t *cap = pc.all(); YYCURSOR.lim = YYLIMIT; - pc.set_count(2); - cap[0].c_begin = pi.pi_next_offset; - cap[0].c_end = pi.pi_next_offset; - cap[1].c_begin = pi.pi_next_offset; - cap[1].c_end = pi.pi_next_offset; + cap_all.c_begin = this->ds_next_offset; + cap_all.c_end = this->ds_next_offset; + cap_inner.c_begin = this->ds_next_offset; + cap_inner.c_end = this->ds_next_offset; /*!re2c re2c:yyfill:enable = 0; @@ -135,41 +135,41 @@ bool data_scanner::tokenize2(pcre_context &pc, data_token_t &token_out) (IPV6SEG":"){1,4}":"IPV4ADDR ); - EOF { return false; } + EOF { return nonstd::nullopt; } ("u"|"r")?'"'('\\'.|[^\x00"\\]|'""')*'"' { CAPTURE(DT_QUOTED_STRING); - switch (pi.get_string()[cap[1].c_begin]) { + switch (this->ds_input[cap_inner.c_begin]) { case 'u': case 'r': - cap[1].c_begin += 1; + cap_inner.c_begin += 1; break; } - cap[1].c_begin += 1; - cap[1].c_end -= 1; - return true; + cap_inner.c_begin += 1; + cap_inner.c_end -= 1; + return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()}; } [a-qstv-zA-QSTV-Z]"'" { CAPTURE(DT_WORD); } ("u"|"r")?"'"('\\'.|"''"|[^\x00'\\])*"'"/[^sS] { CAPTURE(DT_QUOTED_STRING); - switch (pi.get_string()[cap[1].c_begin]) { + switch (this->ds_input[cap_inner.c_begin]) { case 'u': case 'r': - cap[1].c_begin += 1; + cap_inner.c_begin += 1; break; } - cap[1].c_begin += 1; - cap[1].c_end -= 1; - return true; + cap_inner.c_begin += 1; + cap_inner.c_end -= 1; + return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()}; } [a-zA-Z0-9]+":/""/"?[^\x00\r\n\t '"[\](){}]+[/a-zA-Z0-9\-=&?%] { RET(DT_URL); } ("/"|"./"|"../"|[A-Z]":\\"|"\\\\")("Program Files"(" (x86)")?)?[a-zA-Z0-9_\.\-\~/\\!@#$%^&*()]* { RET(DT_PATH); } (SPACE|NUM)NUM":"NUM{2}/[^:] { RET(DT_TIME); } (SPACE|NUM)NUM?":"NUM{2}":"NUM{2}("."NUM{3,6})?/[^:] { RET(DT_TIME); } [0-9a-fA-F][0-9a-fA-F](":"[0-9a-fA-F][0-9a-fA-F])+ { - if ((YYCURSOR - (const unsigned char *) pi.get_string()) == 17) { + if ((YYCURSOR - this->ds_input.udata()) == 17) { RET(DT_MAC_ADDRESS); } else { RET(DT_HEX_DUMP); diff --git a/src/document.sections.cc b/src/document.sections.cc index 2c071c33..8541564d 100644 --- a/src/document.sections.cc +++ b/src/document.sections.cc @@ -251,13 +251,16 @@ public: metadata walk() { metadata_builder mb; - pcre_context_static<30> pc; data_token_t dt = DT_INVALID; - auto& pi = this->sw_scanner.get_input(); size_t garbage_count = 0; - while (garbage_count < 1000 && this->sw_scanner.tokenize2(pc, dt)) { - element el(dt, pc); + while (garbage_count < 1000) { + auto tokenize_res = this->sw_scanner.tokenize2(); + if (!tokenize_res) { + break; + } + + element el(tokenize_res->tr_token, tokenize_res->tr_capture); switch (dt) { case DT_XML_DECL_TAG: @@ -271,7 +274,7 @@ public: this->sw_interval_state.back().is_line_number = this->sw_line_number; this->sw_interval_state.back().is_name - = pi.get_substr(&el.e_capture); + = tokenize_res->to_string(); this->sw_depth += 1; this->sw_interval_state.resize(this->sw_depth + 1); this->sw_hier_nodes.push_back( @@ -328,13 +331,14 @@ public: = std::move(this->sw_hier_nodes.back()); this->sw_hier_nodes.pop_back(); if (this->sw_interval_state.back().is_start) { - pcre_context::capture_t obj_cap = { + data_scanner::capture_t obj_cap = { static_cast(this->sw_interval_state.back() .is_start.value()), el.e_capture.c_end, }; - auto sf = pi.get_string_fragment(&obj_cap); + auto sf + = this->sw_scanner.to_string_fragment(obj_cap); if (!sf.find('\n')) { this->sw_hier_stage->hn_named_children.clear(); this->sw_hier_stage->hn_children.clear(); @@ -396,18 +400,13 @@ public: private: struct element { - element(data_token_t token, pcre_context& pc) - : e_token(token), e_capture(*pc.all()) - { - } - - element(data_token_t token, pcre_context::capture_t& cap) + element(data_token_t token, data_scanner::capture_t& cap) : e_token(token), e_capture(cap) { } data_token_t e_token; - pcre_context::capture_t e_capture; + data_scanner::capture_t e_capture; }; struct interval_state { @@ -416,11 +415,10 @@ private: std::string is_name; }; - nonstd::optional flush_values() + nonstd::optional flush_values() { - nonstd::optional last_key; - nonstd::optional retval; - auto& pi = this->sw_scanner.get_input(); + nonstd::optional last_key; + nonstd::optional retval; if (!this->sw_values.empty()) { if (!this->sw_interval_state.back().is_start) { @@ -443,7 +441,9 @@ private: case DT_EQUALS: if (last_key) { this->sw_interval_state.back().is_name - = pi.get_substr(&last_key.value()); + = this->sw_scanner + .to_string_fragment(last_key.value()) + .to_string(); if (!this->sw_interval_state.back().is_name.empty()) { this->sw_interval_state.back().is_start = static_cast( @@ -464,7 +464,7 @@ private: return retval; } - void append_child_node(nonstd::optional terminator) + void append_child_node(nonstd::optional terminator) { auto& ivstate = this->sw_interval_state.back(); if (!ivstate.is_start || !terminator || this->sw_depth == 0) { diff --git a/src/file_collection.cc b/src/file_collection.cc index a0208c9d..19e71c2e 100644 --- a/src/file_collection.cc +++ b/src/file_collection.cc @@ -44,7 +44,6 @@ #include "lnav_util.hh" #include "logfile.hh" #include "pcap_manager.hh" -#include "pcrepp/pcrepp.hh" #include "service_tags.hh" #include "tailer/tailer.looper.hh" @@ -84,7 +83,8 @@ file_collection::close_files(const std::vector>& files) auto path_str = actual_path_opt.value().string(); for (auto iter = REALPATH_CACHE.begin(); - iter != REALPATH_CACHE.end();) { + iter != REALPATH_CACHE.end();) + { if (iter->first == path_str || iter->second == path_str) { iter = REALPATH_CACHE.erase(iter); } else { @@ -339,7 +339,8 @@ file_collection::watch_logfile(const std::string& filename, error_queue = convert_res.cr_error_queue]( auto& fc, auto& child) { if (child.was_normal_exit() - && child.exit_status() == EXIT_SUCCESS) { + && child.exit_status() == EXIT_SUCCESS) + { log_info("pcap[%d] exited normally", child.in()); return; diff --git a/src/filter_sub_source.cc b/src/filter_sub_source.cc index 55f61d2c..a97cf02e 100644 --- a/src/filter_sub_source.cc +++ b/src/filter_sub_source.cc @@ -425,15 +425,15 @@ filter_sub_source::rl_change(readline_curses* rc) break; case filter_lang_t::REGEX: { auto regex_res - = pcrepp::shared_from_str(new_value, PCRE_CASELESS | PCRE_UTF8); + = lnav::pcre2pp::code::from(new_value, PCRE2_CASELESS); if (regex_res.isErr()) { auto pe = regex_res.unwrapErr(); lnav_data.ld_filter_help_status_source.fss_error.set_value( - "error: %s", pe.ce_msg); + "error: %s", pe.get_message().c_str()); } else { auto& hm = top_view->get_highlights(); - highlighter hl(regex_res.unwrap()); + highlighter hl(regex_res.unwrap().to_shared()); auto role = tf->get_type() == text_filter::EXCLUDE ? role_t::VCR_DIFF_DELETE : role_t::VCR_DIFF_ADD; @@ -508,21 +508,12 @@ filter_sub_source::rl_perform(readline_curses* rc) switch (tf->get_lang()) { case filter_lang_t::NONE: case filter_lang_t::REGEX: { - auto compile_res = pcrepp::shared_from_str( - new_value, PCRE_CASELESS | PCRE_UTF8); + auto compile_res + = lnav::pcre2pp::code::from(new_value, PCRE2_CASELESS); if (compile_res.isErr()) { auto ce = compile_res.unwrapErr(); - auto um = lnav::console::user_message::error( - "invalid regular expression") - .with_reason(ce.ce_msg) - .with_snippet(lnav::console::snippet::from( - INPUT_SRC, new_value)); - um.um_snippets.back() - .s_content.append("\n") - .append(ce.ce_offset, ' ') - .append("^ "_comment) - .append(lnav::roles::comment(ce.ce_msg)); + auto um = lnav::console::to_user_message(INPUT_SRC, ce); lnav_data.ld_exec_context.ec_error_callback_stack.back()( um); this->rl_abort(rc); @@ -530,11 +521,11 @@ filter_sub_source::rl_perform(readline_curses* rc) tf->lf_deleted = true; tss->text_filters_changed(); - auto pf - = std::make_shared(tf->get_type(), - new_value, - tf->get_index(), - compile_res.unwrap()); + auto pf = std::make_shared( + tf->get_type(), + new_value, + tf->get_index(), + compile_res.unwrap().to_shared()); *iter = pf; tss->text_filters_changed(); diff --git a/src/grep_proc.cc b/src/grep_proc.cc index 0cabe9f1..e67ae2ba 100644 --- a/src/grep_proc.cc +++ b/src/grep_proc.cc @@ -48,7 +48,7 @@ #include "vis_line.hh" template -grep_proc::grep_proc(pcre* code, +grep_proc::grep_proc(std::shared_ptr code, grep_proc_source& gps, std::shared_ptr ps) : pollable(ps, pollable::category::background), gp_pcre(code), @@ -189,40 +189,30 @@ grep_proc::child_loop() line_value.clear(); done = !this->gp_source.grep_value_for_line(line, line_value); if (!done) { - pcre_context_static<128> pc; - pcre_input pi(line_value); - - while (this->gp_pcre.match(pc, pi)) { - pcre_context::iterator pc_iter; - pcre_context::capture_t* m; - - if (pi.pi_offset == 0) { - fprintf(stdout, "%d\n", (int) line); - } - m = pc.all(); - fprintf(stdout, "[%d:%d]\n", m->c_begin, m->c_end); - for (pc_iter = pc.begin(); pc_iter != pc.end(); pc_iter++) { - if (!pc_iter->is_valid()) { - continue; + this->gp_pcre->capture_from(line_value) + .for_each([&](lnav::pcre2pp::match_data& md) { + if (md.leading().sf_begin == 0) { + fprintf(stdout, "%d\n", (int) line); } fprintf(stdout, - "(%d:%d)", - pc_iter->c_begin, - pc_iter->c_end); - - /* If the capture was conditional, pcre will return a -1 - * here. - */ - if (pc_iter->c_begin >= 0) { - fwrite(pi.get_substr_start(pc_iter), - 1, - pc_iter->length(), - stdout); + "[%d:%d]\n", + md[0]->sf_begin, + md[0]->sf_end); + for (int lpc = 1; lpc < md.get_count(); lpc++) { + if (!md[lpc]) { + continue; + } + fprintf(stdout, + "(%d:%d)", + md[lpc]->sf_begin, + md[lpc]->sf_end); + + fwrite( + md[lpc]->data(), 1, md[lpc]->length(), stdout); + fputc('\n', stdout); } - fputc('\n', stdout); - } - fprintf(stdout, "/\n"); - } + fprintf(stdout, "/\n"); + }); } if (((line + 1) % 10000) == 0) { diff --git a/src/grep_proc.hh b/src/grep_proc.hh index 2f185cbc..58010e3f 100644 --- a/src/grep_proc.hh +++ b/src/grep_proc.hh @@ -46,7 +46,7 @@ #include "base/auto_mem.hh" #include "base/lnav_log.hh" #include "line_buffer.hh" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "pollable.hh" #include "strong_int.hh" @@ -180,7 +180,7 @@ public: * @param code The pcre code to run over the lines of input. * @param gps The source of the data to match. */ - grep_proc(pcre* code, + grep_proc(std::shared_ptr code, grep_proc_source& gps, std::shared_ptr ps); @@ -274,7 +274,7 @@ protected: virtual void handle_match( int line, std::string& line_value, int off, int* matches, int count); - pcrepp gp_pcre; + std::shared_ptr gp_pcre; grep_proc_source& gp_source; /*< The data source delegate. */ auto_fd gp_err_pipe; /*< Standard error from the child. */ diff --git a/src/highlighter.cc b/src/highlighter.cc index 636e1b14..57afafd2 100644 --- a/src/highlighter.cc +++ b/src/highlighter.cc @@ -114,33 +114,33 @@ highlighter::annotate(attr_line_t& al, int start) const return; } - pcre_context_static<60> pc; - pcre_input pi(sf); - - while (this->h_regex->match(pc, pi)) { - if (pc.get_count() == 1) { - line_range lr{start + pc.all()->c_begin, start + pc.all()->c_end}; + this->h_regex->capture_from(sf).for_each( + [&](lnav::pcre2pp::match_data& md) { + if (md.get_count() == 1) { + this->annotate_capture(al, to_line_range(md[0].value())); + } else { + for (int lpc = 1; lpc < md.get_count(); lpc++) { + if (!md[lpc]) { + continue; + } - this->annotate_capture(al, lr); - } else { - for (int lpc = 0; lpc < pc.get_count() - 1; lpc++) { - line_range lr{start + pc[lpc]->c_begin, start + pc[lpc]->c_end}; - const auto* name = this->h_regex->name_for_capture(lpc); + const auto* name = this->h_regex->get_name_for_capture(lpc); + auto lr = to_line_range(md[lpc].value()); - if (name != nullptr && name[0]) { - auto ident_attrs = vc.attrs_for_ident(name); + if (name != nullptr && name[0]) { + auto ident_attrs = vc.attrs_for_ident(name); - ident_attrs.ta_attrs |= this->h_attrs.ta_attrs; - if (this->h_role != role_t::VCR_NONE) { - auto role_attrs = vc.attrs_for_role(this->h_role); + ident_attrs.ta_attrs |= this->h_attrs.ta_attrs; + if (this->h_role != role_t::VCR_NONE) { + auto role_attrs = vc.attrs_for_role(this->h_role); - ident_attrs.ta_attrs |= role_attrs.ta_attrs; + ident_attrs.ta_attrs |= role_attrs.ta_attrs; + } + sa.emplace_back(lr, VC_STYLE.value(ident_attrs)); + } else { + this->annotate_capture(al, lr); } - sa.emplace_back(lr, VC_STYLE.value(ident_attrs)); - } else { - this->annotate_capture(al, lr); } } - } - } + }); } diff --git a/src/highlighter.hh b/src/highlighter.hh index b6752110..25e8087e 100644 --- a/src/highlighter.hh +++ b/src/highlighter.hh @@ -36,15 +36,15 @@ #include #include "optional.hpp" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "text_format.hh" #include "view_curses.hh" struct highlighter { highlighter() = default; - explicit highlighter(std::shared_ptr regex) - : h_regex(std::move(regex)) + explicit highlighter(const std::shared_ptr& regex) + : h_regex(regex) { } @@ -113,7 +113,7 @@ struct highlighter { role_t h_role{role_t::VCR_NONE}; styling::color_unit h_fg{styling::color_unit::make_empty()}; styling::color_unit h_bg{styling::color_unit::make_empty()}; - std::shared_ptr h_regex; + std::shared_ptr h_regex; text_attrs h_attrs; std::set h_text_formats; intern_string_t h_format_name; diff --git a/src/internals/sql-ref.rst b/src/internals/sql-ref.rst index d466d065..da9d2b13 100644 --- a/src/internals/sql-ref.rst +++ b/src/internals/sql-ref.rst @@ -2727,12 +2727,12 @@ regexp_capture(*string*, *pattern*) ;SELECT * FROM regexp_capture('a=1; b=2', '(\w+)=(\d+)') match_index capture_index capture_name capture_count range_start range_stop content - 0 0 3 1 4 a=1 - 0 1 3 1 2 a - 0 2 3 3 4 1 - 1 0 3 6 9 b=2 - 1 1 3 6 7 b - 1 2 3 8 9 2 + 0 0 3 1 4 a=1 + 0 1 3 1 2 a + 0 2 3 3 4 1 + 1 0 3 6 9 b=2 + 1 1 3 6 7 b + 1 2 3 8 9 2 **See Also** :ref:`char`, :ref:`charindex`, :ref:`decode`, :ref:`encode`, :ref:`endswith`, :ref:`extract`, :ref:`group_concat`, :ref:`group_spooky_hash_agg`, :ref:`gunzip`, :ref:`gzip`, :ref:`humanize_duration`, :ref:`humanize_file_size`, :ref:`instr`, :ref:`leftstr`, :ref:`length`, :ref:`logfmt2json`, :ref:`lower`, :ref:`ltrim`, :ref:`padc`, :ref:`padl`, :ref:`padr`, :ref:`printf`, :ref:`proper`, :ref:`regexp_capture_into_json`, :ref:`regexp_match`, :ref:`regexp_replace`, :ref:`replace`, :ref:`replicate`, :ref:`reverse`, :ref:`rightstr`, :ref:`rtrim`, :ref:`sparkline`, :ref:`spooky_hash`, :ref:`startswith`, :ref:`strfilter`, :ref:`substr`, :ref:`trim`, :ref:`unicode`, :ref:`upper`, :ref:`xpath` diff --git a/src/lnav.cc b/src/lnav.cc index 856e119c..12eeb592 100644 --- a/src/lnav.cc +++ b/src/lnav.cc @@ -2939,6 +2939,9 @@ SELECT tbl_name FROM sqlite_master WHERE sql LIKE 'CREATE VIRTUAL TABLE%' isc::supervisor root_superv(injector::get()); try { + char pcre2_version[128]; + + pcre2_config(PCRE2_CONFIG_VERSION, pcre2_version); log_info("startup: %s", VCS_PACKAGE_STRING); log_host_info(); log_info("Libraries:"); @@ -2952,7 +2955,7 @@ SELECT tbl_name FROM sqlite_master WHERE sql LIKE 'CREATE VIRTUAL TABLE%' log_info(" libarchive=%d", ARCHIVE_VERSION_NUMBER); #endif log_info(" ncurses=%s", NCURSES_VERSION); - log_info(" pcre=%s", pcre_version()); + log_info(" pcre2=%s", pcre2_version); log_info(" readline=%s", rl_library_version); log_info(" sqlite=%s", sqlite3_version); log_info(" zlib=%s", zlibVersion()); diff --git a/src/lnav_commands.cc b/src/lnav_commands.cc index 5195ca11..516dc10b 100644 --- a/src/lnav_commands.cc +++ b/src/lnav_commands.cc @@ -64,6 +64,7 @@ #include "log_data_helper.hh" #include "log_data_table.hh" #include "log_search_table.hh" +#include "log_search_table_fwd.hh" #include "readline_callbacks.hh" #include "readline_curses.hh" #include "readline_highlighters.hh" @@ -1669,6 +1670,8 @@ com_highlight(exec_context& ec, if (args.empty()) { args.emplace_back("filter"); } else if (args.size() > 1) { + const static intern_string_t PATTERN_SRC = intern_string::lookup("pattern"); + auto* tc = *lnav_data.ld_view_stack.top(); auto& hm = tc->get_highlights(); auto re_frag = remaining_args_frag(cmdline, args); @@ -1678,22 +1681,14 @@ com_highlight(exec_context& ec, } auto compile_res - = pcrepp::shared_from_str(args[1], PCRE_CASELESS | PCRE_UTF8); + = lnav::pcre2pp::code::from(args[1], PCRE2_CASELESS); if (compile_res.isErr()) { auto ce = compile_res.unwrapErr(); - auto um = lnav::console::user_message::error( - "invalid regular expression") - .with_reason(ce.ce_msg) - .with_snippets(ec.ec_source); - um.um_snippets.back() - .s_content.append("\n") - .append(re_frag.sf_begin + ce.ce_offset, ' ') - .append("^ "_comment) - .append(lnav::roles::comment(ce.ce_msg)); + auto um = lnav::console::to_user_message(PATTERN_SRC, ce); return Err(um); } - highlighter hl(compile_res.unwrap()); + highlighter hl(compile_res.unwrap().to_shared()); auto hl_attrs = view_colors::singleton().attrs_for_ident(args[1]); if (ec.ec_dry_run) { @@ -1799,6 +1794,8 @@ com_filter(exec_context& ec, return ec.make_error("{} view does not support filtering", lnav_view_strings[tc - lnav_data.ld_views]); } else if (args.size() > 1) { + const static intern_string_t PATTERN_SRC = intern_string::lookup("pattern"); + auto* tss = tc->get_sub_source(); auto& fs = tss->get_filters(); auto re_frag = remaining_args_frag(cmdline, args); @@ -1814,19 +1811,11 @@ com_filter(exec_context& ec, } auto compile_res - = pcrepp::shared_from_str(args[1], PCRE_CASELESS | PCRE_UTF8); + = lnav::pcre2pp::code::from(args[1], PCRE2_CASELESS); if (compile_res.isErr()) { auto ce = compile_res.unwrapErr(); - auto um = lnav::console::user_message::error( - "invalid regular expression") - .with_reason(ce.ce_msg) - .with_snippets(ec.ec_source); - um.um_snippets.back() - .s_content.append("\n") - .append(re_frag.sf_begin + ce.ce_offset, ' ') - .append("^ "_comment) - .append(lnav::roles::comment(ce.ce_msg)); + auto um = lnav::console::to_user_message(PATTERN_SRC, ce); return Err(um); } if (ec.ec_dry_run) { @@ -1837,7 +1826,7 @@ com_filter(exec_context& ec, retval = ""; } else { auto& hm = tc->get_highlights(); - highlighter hl(compile_res.unwrap()); + highlighter hl(compile_res.unwrap().to_shared()); auto role = (args[0] == "filter-out") ? role_t::VCR_DIFF_DELETE : role_t::VCR_DIFF_ADD; hl.with_role(role); @@ -1861,7 +1850,7 @@ com_filter(exec_context& ec, return ec.make_error("too many filters"); } auto pf = std::make_shared( - lt, args[1], *filter_index, compile_res.unwrap()); + lt, args[1], *filter_index, compile_res.unwrap().to_shared()); log_debug("%s [%d] %s", args[0].c_str(), @@ -2227,6 +2216,7 @@ com_create_search_table(exec_context& ec, if (args.empty()) { } else if (args.size() >= 2) { + const static intern_string_t PATTERN_SRC = intern_string::lookup("pattern"); string_fragment regex_frag; std::string regex; @@ -2237,28 +2227,19 @@ com_create_search_table(exec_context& ec, regex = lnav_data.ld_views[LNV_LOG].get_current_search(); } - auto re_res = pcrepp::shared_from_str( - regex, log_search_table::pattern_options()); + auto compile_res = lnav::pcre2pp::code::from( + regex, log_search_table_ns::PATTERN_OPTIONS); - if (re_res.isErr()) { - auto re_err = re_res.unwrapErr(); - auto um = lnav::console::user_message::error( - "invalid regular expression") - .with_reason(re_err.ce_msg) - .with_snippets(ec.ec_source); - if (args.size() >= 3) { - um.um_snippets.back() - .s_content.append("\n") - .append(regex_frag.sf_begin + re_err.ce_offset, ' ') - .append("^ "_comment) - .append(lnav::roles::comment(re_err.ce_msg)); - } + if (compile_res.isErr()) { + auto re_err = compile_res.unwrapErr(); + auto um = lnav::console::to_user_message(PATTERN_SRC, re_err) + .with_snippets(ec.ec_source); return Err(um); } - auto re = re_res.unwrap(); + auto re = compile_res.unwrap().to_shared(); auto tab_name = intern_string::lookup(args[1]); - auto lst = std::make_shared(*re, tab_name); + auto lst = std::make_shared(re, tab_name); if (ec.ec_dry_run) { auto* tc = &lnav_data.ld_views[LNV_LOG]; auto& hm = tc->get_highlights(); @@ -4726,7 +4707,7 @@ search_files_prompt(std::vector& args) lnav_data.ld_mode = ln_mode_t::SEARCH_FILES; for (const auto& lf : lnav_data.ld_active_files.fc_files) { - auto path = pcrepp::quote(lf->get_unique_path()); + auto path = lnav::pcre2pp::quote(lf->get_unique_path()); lnav_data.ld_rl_view->add_possibility( ln_mode_t::SEARCH_FILES, "*", path); } diff --git a/src/lnav_config.cc b/src/lnav_config.cc index 19f17552..58c0583c 100644 --- a/src/lnav_config.cc +++ b/src/lnav_config.cc @@ -471,9 +471,7 @@ static const struct json_path_container keymap_def_handlers = { "an 'x' followed by the hexadecimal representation of the byte.") .with_obj_provider( [](const yajlpp_provider_context& ypc, key_map* km) { - key_command& retval - = km->km_seq_to_cmd[ypc.ypc_extractor.get_substr( - "key_seq")]; + auto& retval = km->km_seq_to_cmd[ypc.get_substr("key_seq")]; return &retval; }) @@ -492,8 +490,7 @@ static const struct json_path_container keymap_defs_handlers = { .with_obj_provider( [](const yajlpp_provider_context& ypc, _lnav_config* root) { key_map& retval - = root->lc_ui_keymaps[ypc.ypc_extractor.get_substr( - "keymap_name")]; + = root->lc_ui_keymaps[ypc.get_substr("keymap_name")]; return &retval; }) .with_path_provider<_lnav_config>( @@ -845,7 +842,7 @@ static const struct json_path_container theme_log_level_styles_handlers = { .with_obj_provider( [](const yajlpp_provider_context& ypc, lnav_theme* root) { auto& sc = root->lt_level_styles[string2level( - ypc.ypc_extractor.get_substr_i("level").get())]; + ypc.get_substr_i("level").get())]; if (ypc.ypc_parse_context != nullptr && sc.pp_path.empty()) { sc.pp_path = ypc.ypc_parse_context->get_full_path(); @@ -877,15 +874,14 @@ static const struct json_path_container highlighter_handlers = { static const struct json_path_container theme_highlights_handlers = { yajlpp::pattern_property_handler("(?[\\w\\-]+)") - .with_obj_provider( - [](const yajlpp_provider_context& ypc, lnav_theme* root) { - highlighter_config& hc - = root->lt_highlights[ypc.ypc_extractor - .get_substr_i("highlight_name") - .get()]; - - return &hc; - }) + .with_obj_provider([](const yajlpp_provider_context& ypc, + lnav_theme* root) { + highlighter_config& hc + = root->lt_highlights[ypc.get_substr_i("highlight_name").get()]; + + return &hc; + }) .with_path_provider( [](struct lnav_theme* cfg, std::vector& paths_out) { for (const auto& pair : cfg->lt_highlights) { @@ -940,8 +936,7 @@ static const struct json_path_container theme_defs_handlers = { .with_obj_provider( [](const yajlpp_provider_context& ypc, _lnav_config* root) { lnav_theme& lt - = root->lc_ui_theme_defs[ypc.ypc_extractor.get_substr( - "theme_name")]; + = root->lc_ui_theme_defs[ypc.get_substr("theme_name")]; return < }) @@ -953,8 +948,7 @@ static const struct json_path_container theme_defs_handlers = { }) .with_obj_deleter( +[](const yajlpp_provider_context& ypc, _lnav_config* root) { - root->lc_ui_theme_defs.erase( - ypc.ypc_extractor.get_substr("theme_name")); + root->lc_ui_theme_defs.erase(ypc.get_substr("theme_name")); }) .with_children(theme_def_handlers), }; @@ -1137,9 +1131,8 @@ static const struct json_path_container sysclip_impls_handlers = { .with_obj_provider( [](const yajlpp_provider_context& ypc, _lnav_config* root) { auto& retval - = root->lc_sysclip - .c_clipboard_impls[ypc.ypc_extractor.get_substr( - "clipboard_impl_name")]; + = root->lc_sysclip.c_clipboard_impls[ypc.get_substr( + "clipboard_impl_name")]; return &retval; }) .with_path_provider<_lnav_config>( @@ -1178,8 +1171,7 @@ static const struct json_path_container log_source_watch_handlers = { _lnav_config>( [](const yajlpp_provider_context& ypc, _lnav_config* root) { auto& retval = root->lc_log_source - .c_watch_exprs[ypc.ypc_extractor.get_substr( - "watch_name")]; + .c_watch_exprs[ypc.get_substr("watch_name")]; return &retval; }) .with_path_provider<_lnav_config>( @@ -1191,7 +1183,7 @@ static const struct json_path_container log_source_watch_handlers = { .with_obj_deleter( +[](const yajlpp_provider_context& ypc, _lnav_config* root) { root->lc_log_source.c_watch_exprs.erase( - ypc.ypc_extractor.get_substr("watch_name")); + ypc.get_substr("watch_name")); }) .with_children(log_source_watch_expr_handlers), }; @@ -1525,14 +1517,12 @@ reset_config(const std::string& path) } if (jph != nullptr && jph->jph_children && jph->jph_obj_deleter) { - pcre_context_static<30> pc; auto key_start = ypc.ypc_path_index_stack.back(); - pcre_input pi(&ypc.ypc_path[key_start + 1], - 0, - ypc.ypc_path.size() - key_start - 2); - yajlpp_provider_context provider_ctx{{pc, pi}, - static_cast(-1)}; - jph->jph_regex->match(pc, pi); + auto path_frag = string_fragment::from_byte_range( + ypc.ypc_path.data(), key_start + 1, ypc.ypc_path.size()); + auto md = jph->jph_regex->create_match_data(); + yajlpp_provider_context provider_ctx{&md, static_cast(-1)}; + jph->jph_regex->capture_from(path_frag).into(md).matches(); jph->jph_obj_deleter(provider_ctx, ypc.ypc_obj_stack.top()); } diff --git a/src/lnav_util.cc b/src/lnav_util.cc index 2ee305da..e5540d95 100644 --- a/src/lnav_util.cc +++ b/src/lnav_util.cc @@ -144,7 +144,7 @@ to_json(yajlpp_gen& gen, const attr_line_t& al) }, [&](const intern_string_t& str) { elem_map.gen(str); }, [&](const std::string& str) { elem_map.gen(str); }, - [&](const text_attrs& ta) { elem_map.gen(""); }, + [&](const text_attrs& ta) { elem_map.gen(ta.ta_attrs); }, [&](const std::shared_ptr& lf) { elem_map.gen(""); }, @@ -234,7 +234,10 @@ read_string_attr_type(yajlpp_parse_context* ypc, sa->sa_type = &VC_ROLE; } else if (type == "preformatted") { sa->sa_type = &SA_PREFORMATTED; + } else if (type == "style") { + sa->sa_type = &VC_STYLE; } else { + log_error("unhandled string_attr type: %s", type.c_str()); ensure(false); } return 1; @@ -247,6 +250,10 @@ read_string_attr_int_value(yajlpp_parse_context* ypc, long long in) if (sa->sa_type == &VC_ROLE) { sa->sa_value = static_cast(in); + } else if (sa->sa_type == &VC_STYLE) { + sa->sa_value = text_attrs{ + static_cast(in), + }; } return 1; } diff --git a/src/log_data_helper.cc b/src/log_data_helper.cc index 8d5af7dd..e976d8fa 100644 --- a/src/log_data_helper.cc +++ b/src/log_data_helper.cc @@ -85,7 +85,8 @@ log_data_helper::parse_line(content_line_t line, bool allow_middle) body.lr_end = this->ldh_line_values.lvv_sbr.length(); } this->ldh_scanner = std::make_unique( - this->ldh_line_values.lvv_sbr, body.lr_start, body.lr_end); + this->ldh_line_values.lvv_sbr.to_string_fragment().sub_range( + body.lr_start, body.lr_end)); this->ldh_parser = std::make_unique(this->ldh_scanner.get()); this->ldh_msg_format.clear(); diff --git a/src/log_data_table.cc b/src/log_data_table.cc index 8a2bcc97..a664c9d9 100644 --- a/src/log_data_table.cc +++ b/src/log_data_table.cc @@ -39,7 +39,7 @@ log_data_table::log_data_table(logfile_sub_source& lss, : log_vtab_impl(table_name), ldt_log_source(lss), ldt_template_line(template_line) { - std::shared_ptr lf = lss.find(template_line); + auto lf = lss.find(template_line); auto format = lf->get_format(); this->vi_supports_indexes = false; @@ -53,7 +53,7 @@ log_data_table::get_columns_int() auto& cols = this->ldt_cols; auto& metas = this->ldt_value_metas; content_line_t cl_copy = this->ldt_template_line; - std::shared_ptr lf = this->ldt_log_source.find(cl_copy); + auto lf = this->ldt_log_source.find(cl_copy); struct line_range body; string_attrs_t sa; logline_value_vector line_values; diff --git a/src/log_format.cc b/src/log_format.cc index 31de1bd6..15ef8945 100644 --- a/src/log_format.cc +++ b/src/log_format.cc @@ -297,45 +297,38 @@ log_format::next_format(pcre_format* fmt, int& index, int& locked_index) const char* log_format::log_scanf(uint32_t line_number, - const char* line, - size_t len, + string_fragment line, pcre_format* fmt, const char* time_fmt[], struct exttm* tm_out, struct timeval* tv_out, - ...) + + string_fragment* ts_out, + nonstd::optional* level_out) { int curr_fmt = -1; const char* retval = nullptr; bool done = false; - pcre_input pi(line, 0, len); - pcre_context_static<128> pc; - va_list args; int pat_index = this->last_pattern_index(); while (!done && next_format(fmt, curr_fmt, pat_index)) { - va_start(args, tv_out); - - pi.reset(line, 0, len); - if (!fmt[curr_fmt].pcre.match(pc, pi, PCRE_NO_UTF8_CHECK)) { + auto md = fmt[curr_fmt].pcre->create_match_data(); + auto match_res = fmt[curr_fmt] + .pcre->capture_from(line) + .into(md) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (!match_res) { retval = nullptr; } else { - pcre_context::capture_t* ts = pc[fmt[curr_fmt].pf_timestamp_index]; - - for (auto& iter : pc) { - pcre_context::capture_t* cap - = va_arg(args, pcre_context::capture_t*); - - *cap = iter; - } + auto ts = md[fmt[curr_fmt].pf_timestamp_index]; - retval = this->lf_date_time.scan(pi.get_substr_start(ts), - ts->length(), - nullptr, - tm_out, - *tv_out); + retval = this->lf_date_time.scan( + ts->data(), ts->length(), nullptr, tm_out, *tv_out); if (retval) { + *ts_out = ts.value(); + *level_out = md[2]; if (curr_fmt != pat_index) { uint32_t lock_line; @@ -351,8 +344,6 @@ log_format::log_scanf(uint32_t line_number, done = true; } } - - va_end(args); } return retval; @@ -495,11 +486,9 @@ read_json_int(yajlpp_parse_context* ypc, long long val) snprintf(level_buf, sizeof(level_buf), "%lld", val); - pcre_input pi(level_buf); - pcre_context::capture_t level_cap = {0, (int) strlen(level_buf)}; - jlu->jlu_base_line->set_level(jlu->jlu_format->convert_level( - pi, &level_cap, jlu->jlu_batch_context)); + string_fragment::from_c_str(level_buf), + jlu->jlu_batch_context)); } else { std::vector>::iterator iter; @@ -589,7 +578,7 @@ json_array_end(void* ctx) } static struct json_path_container json_log_handlers = { - json_path_handler(pcrepp("\\w+")) + yajlpp::pattern_property_handler("\\w+") .add_cb(read_json_null) .add_cb(read_json_bool) .add_cb(read_json_int) @@ -664,13 +653,14 @@ rewrite_json_double(yajlpp_parse_context* ypc, double val) return 1; } -static struct json_path_container json_log_rewrite_handlers - = {json_path_handler(pcrepp("\\w+")) - .add_cb(rewrite_json_null) - .add_cb(rewrite_json_bool) - .add_cb(rewrite_json_int) - .add_cb(rewrite_json_double) - .add_cb(rewrite_json_field)}; +static struct json_path_container json_log_rewrite_handlers = { + yajlpp::pattern_property_handler("\\w+") + .add_cb(rewrite_json_null) + .add_cb(rewrite_json_bool) + .add_cb(rewrite_json_int) + .add_cb(rewrite_json_double) + .add_cb(rewrite_json_field), +}; bool external_log_format::scan_for_partial(shared_buffer_ref& sbr, @@ -680,11 +670,9 @@ external_log_format::scan_for_partial(shared_buffer_ref& sbr, return false; } - auto& pat = this->elf_pattern_order[this->last_pattern_index()]; - pcre_input pi(sbr.get_data(), 0, sbr.length()); - + const auto& pat = this->elf_pattern_order[this->last_pattern_index()]; if (!this->lf_multiline) { - len_out = pat->p_pcre->match_partial(pi); + len_out = pat->p_pcre.value->match_partial(sbr.to_string_fragment()); return true; } @@ -694,7 +682,7 @@ external_log_format::scan_for_partial(shared_buffer_ref& sbr, return false; } - len_out = pat->p_pcre->match_partial(pi); + len_out = pat->p_pcre.value->match_partial(sbr.to_string_fragment()); return (int) len_out > pat->p_timestamp_end; } @@ -809,20 +797,24 @@ external_log_format::scan(logfile& lf, return log_format::SCAN_MATCH; } - pcre_input pi(sbr.get_data(), 0, sbr.length()); - pcre_context_static<128> pc; int curr_fmt = -1, orig_lock = this->last_pattern_index(); int pat_index = orig_lock; + auto line_sf = sbr.to_string_fragment(); while (::next_format(this->elf_pattern_order, curr_fmt, pat_index)) { auto* fpat = this->elf_pattern_order[curr_fmt].get(); - auto* pat = fpat->p_pcre.get(); + auto* pat = fpat->p_pcre.value.get(); if (fpat->p_module_format) { continue; } - if (!pat->match(pc, pi, PCRE_NO_UTF8_CHECK)) { + auto md = pat->create_match_data(); + auto match_res = pat->capture_from(line_sf) + .into(md) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (!match_res) { if (!this->lf_pattern_locks.empty() && pat_index != -1) { curr_fmt = -1; pat_index = -1; @@ -830,41 +822,39 @@ external_log_format::scan(logfile& lf, continue; } - pcre_context::capture_t* ts = pc[fpat->p_timestamp_field_index]; - pcre_context::capture_t* time_cap = pc[fpat->p_time_field_index]; - pcre_context::capture_t* level_cap = pc[fpat->p_level_field_index]; - pcre_context::capture_t* mod_cap = pc[fpat->p_module_field_index]; - pcre_context::capture_t* opid_cap = pc[fpat->p_opid_field_index]; - pcre_context::capture_t* body_cap = pc[fpat->p_body_field_index]; - const char* ts_str = pi.get_substr_start(ts); - auto ts_str_len = ts->length(); + auto ts = md[fpat->p_timestamp_field_index]; + auto time_cap = md[fpat->p_time_field_index]; + auto level_cap = md[fpat->p_level_field_index]; + auto mod_cap = md[fpat->p_module_field_index]; + auto opid_cap = md[fpat->p_opid_field_index]; + auto body_cap = md[fpat->p_body_field_index]; const char* last; struct exttm log_time_tm; struct timeval log_tv; uint8_t mod_index = 0, opid = 0; char combined_datetime_buf[512]; - if (time_cap != nullptr) { - ts_str_len = snprintf(combined_datetime_buf, - sizeof(combined_datetime_buf), - "%.*sT%.*s", - ts->length(), - ts_str, - time_cap->length(), - pi.get_substr_start(time_cap)); - ts_str = combined_datetime_buf; + if (ts && time_cap) { + auto ts_str_len = snprintf(combined_datetime_buf, + sizeof(combined_datetime_buf), + "%.*sT%.*s", + ts->length(), + ts->data(), + time_cap->length(), + time_cap->data()); + ts = string_fragment::from_bytes(combined_datetime_buf, ts_str_len); } - if ((last = this->lf_date_time.scan(ts_str, - ts_str_len, + if ((last = this->lf_date_time.scan(ts->data(), + ts->length(), this->get_timestamp_formats(), &log_time_tm, log_tv)) == nullptr) { this->lf_date_time.unlock(); - if ((last = this->lf_date_time.scan(ts_str, - ts_str_len, + if ((last = this->lf_date_time.scan(ts->data(), + ts->length(), this->get_timestamp_formats(), &log_time_tm, log_tv)) @@ -874,7 +864,8 @@ external_log_format::scan(logfile& lf, } } - auto level = this->convert_level(pi, level_cap, &sbc); + auto level = this->convert_level( + level_cap.value_or(string_fragment::invalid()), &sbc); this->lf_timestamp_flags = log_time_tm.et_flags; @@ -885,29 +876,27 @@ external_log_format::scan(logfile& lf, this->check_for_new_year(dst, log_time_tm, log_tv); } - if (opid_cap != nullptr && !opid_cap->empty()) { - auto opid_sf = pi.get_string_fragment(opid_cap); + if (opid_cap && !opid_cap->empty()) { { - auto opid_iter = sbc.sbc_opids.find(opid_sf); + auto opid_iter = sbc.sbc_opids.find(opid_cap.value()); if (opid_iter == sbc.sbc_opids.end()) { - auto opid_copy = opid_sf.to_owned(sbc.sbc_allocator); + auto opid_copy = opid_cap->to_owned(sbc.sbc_allocator); auto otr = opid_time_range{log_tv, log_tv}; sbc.sbc_opids.emplace(opid_copy, otr); } else { opid_iter->second.otr_end = log_tv; } } - opid = hash_str(pi.get_substr_start(opid_cap), opid_cap->length()); + opid = hash_str(opid_cap->data(), opid_cap->length()); } - if (mod_cap != nullptr) { - intern_string_t mod_name = intern_string::lookup( - pi.get_substr_start(mod_cap), mod_cap->length()); + if (mod_cap) { + intern_string_t mod_name = intern_string::lookup(mod_cap.value()); auto mod_iter = MODULE_FORMATS.find(mod_name); if (mod_iter == MODULE_FORMATS.end()) { - mod_index = module_scan(pi, body_cap, mod_name); + mod_index = this->module_scan(body_cap.value(), mod_name); mod_iter = MODULE_FORMATS.find(mod_name); } else if (mod_iter->second.mf_mod_format) { mod_index = mod_iter->second.mf_mod_format->lf_mod_index; @@ -918,24 +907,25 @@ external_log_format::scan(logfile& lf, mod_iter->second.mf_mod_format); if (mod_elf) { - pcre_context_static<128> mod_pc; shared_buffer_ref body_ref; - body_cap->ltrim(sbr.get_data()); + body_cap->trim(); - pcre_input mod_pi( - pi.get_substr_start(body_cap), 0, body_cap->length()); int mod_pat_index = mod_elf->last_pattern_index(); auto& mod_pat = *mod_elf->elf_pattern_order[mod_pat_index]; - - if (mod_pat.p_pcre->match( - mod_pc, mod_pi, PCRE_NO_UTF8_CHECK)) - { - auto* mod_level_cap - = mod_pc[mod_pat.p_level_field_index]; + auto mod_md = mod_pat.p_pcre.value->create_match_data(); + auto match_res + = mod_pat.p_pcre.value->capture_from(body_cap.value()) + .into(mod_md) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (match_res) { + auto mod_level_cap + = mod_md[mod_pat.p_level_field_index]; level = mod_elf->convert_level( - mod_pi, mod_level_cap, &sbc); + mod_level_cap.value_or(string_fragment::invalid()), + &sbc); } } } @@ -944,17 +934,17 @@ external_log_format::scan(logfile& lf, for (auto value_index : fpat->p_numeric_value_indexes) { const indexed_value_def& ivd = fpat->p_value_by_index[value_index]; const value_def& vd = *ivd.ivd_value_def; - pcre_context::capture_t* num_cap = pc[ivd.ivd_index]; + auto num_cap = md[ivd.ivd_index]; - if (num_cap != nullptr && num_cap->is_valid()) { + if (num_cap && num_cap->is_valid()) { const struct scaling_factor* scaling = nullptr; if (ivd.ivd_unit_field_index >= 0) { - auto unit_cap = pc[ivd.ivd_unit_field_index]; + auto unit_cap = md[ivd.ivd_unit_field_index]; - if (unit_cap != nullptr && unit_cap->is_valid()) { - intern_string_t unit_val = intern_string::lookup( - pi.get_substr_start(unit_cap), unit_cap->length()); + if (unit_cap && unit_cap->is_valid()) { + intern_string_t unit_val + = intern_string::lookup(unit_cap.value()); auto unit_iter = vd.vd_unit_scaling.find(unit_val); if (unit_iter != vd.vd_unit_scaling.end()) { @@ -966,7 +956,7 @@ external_log_format::scan(logfile& lf, } auto scan_res - = scn::scan_value(pi.to_string_view(num_cap)); + = scn::scan_value(num_cap->to_string_view()); if (scan_res) { auto dvalue = scan_res.value(); if (scaling != nullptr) { @@ -1012,15 +1002,12 @@ external_log_format::scan(logfile& lf, } uint8_t -external_log_format::module_scan(const pcre_input& pi, - pcre_context::capture_t* body_cap, +external_log_format::module_scan(string_fragment body_cap, const intern_string_t& mod_name) { uint8_t mod_index; - body_cap->ltrim(pi.get_string()); - pcre_input body_pi(pi.get_substr_start(body_cap), 0, body_cap->length()); + body_cap.trim(); auto& ext_fmts = GRAPH_ORDERED_FORMATS; - pcre_context_static<128> pc; module_format mf; for (auto& elf : ext_fmts) { @@ -1034,7 +1021,12 @@ external_log_format::module_scan(const pcre_input& pi, continue; } - if (!pat->match(pc, body_pi)) { + auto md = pat.value->create_match_data(); + auto match_res = pat.value->capture_from(body_cap) + .into(md) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (!match_res) { continue; } @@ -1063,10 +1055,7 @@ external_log_format::annotate(uint64_t line_number, bool annotate_module) const { auto& line = values.lvv_sbr; - pcre_context_static<128> pc; - pcre_input pi(line.get_data(), 0, line.length()); struct line_range lr; - pcre_context::capture_t *cap, *body_cap, *module_cap = nullptr; if (this->elf_type != elf_type_t::ELF_TYPE_TEXT) { values = this->jlf_line_values; @@ -1083,14 +1072,20 @@ external_log_format::annotate(uint64_t line_number, int pat_index = this->pattern_index_for_line(line_number); auto& pat = *this->elf_pattern_order[pat_index]; - sa.reserve(pat.p_pcre->get_capture_count()); - if (!pat.p_pcre->match(pc, pi, PCRE_NO_UTF8_CHECK)) { + sa.reserve(pat.p_pcre.value->get_capture_count()); + auto md = pat.p_pcre.value->create_match_data(); + auto match_res = pat.p_pcre.value->capture_from(line.to_string_fragment()) + .into(md) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (!match_res) { // A continued line still needs a body. lr.lr_start = 0; lr.lr_end = line.length(); sa.emplace_back(lr, SA_BODY.value()); if (!this->lf_multiline) { - auto len = pat.p_pcre->match_partial(pi); + auto len + = pat.p_pcre.value->match_partial(line.to_string_fragment()); sa.emplace_back( line_range{(int) len, -1}, SA_INVALID.value("Log line does not match any pattern")); @@ -1098,45 +1093,43 @@ external_log_format::annotate(uint64_t line_number, return; } + nonstd::optional module_cap; if (!pat.p_module_format) { - cap = pc[pat.p_timestamp_field_index]; - if (cap->is_valid()) { - lr.lr_start = cap->c_begin; - lr.lr_end = cap->c_end; - sa.emplace_back(lr, logline::L_TIMESTAMP.value()); + auto ts_cap = md[pat.p_timestamp_field_index]; + if (ts_cap) { + sa.emplace_back(to_line_range(ts_cap.value()), + logline::L_TIMESTAMP.value()); } if (pat.p_module_field_index != -1) { - module_cap = pc[pat.p_module_field_index]; - if (module_cap != nullptr && module_cap->is_valid()) { - lr.lr_start = module_cap->c_begin; - lr.lr_end = module_cap->c_end; - sa.emplace_back(lr, logline::L_MODULE.value()); + module_cap = md[pat.p_module_field_index]; + if (module_cap) { + sa.emplace_back(to_line_range(module_cap.value()), + logline::L_MODULE.value()); } } - cap = pc[pat.p_opid_field_index]; - if (cap != nullptr && cap->is_valid()) { - lr.lr_start = cap->c_begin; - lr.lr_end = cap->c_end; - sa.emplace_back(lr, logline::L_OPID.value()); + auto opid_cap = md[pat.p_opid_field_index]; + if (opid_cap) { + sa.emplace_back(to_line_range(opid_cap.value()), + logline::L_OPID.value()); } } - body_cap = pc[pat.p_body_field_index]; + auto body_cap = md[pat.p_body_field_index]; for (size_t lpc = 0; lpc < pat.p_value_by_index.size(); lpc++) { const indexed_value_def& ivd = pat.p_value_by_index[lpc]; const struct scaling_factor* scaling = nullptr; - auto* cap = pc[ivd.ivd_index]; + auto cap = md[ivd.ivd_index]; const auto& vd = *ivd.ivd_value_def; if (ivd.ivd_unit_field_index >= 0) { - auto* unit_cap = pc[ivd.ivd_unit_field_index]; + auto unit_cap = md[ivd.ivd_unit_field_index]; - if (unit_cap != nullptr && unit_cap->c_begin != -1) { - intern_string_t unit_val = intern_string::lookup( - pi.get_substr_start(unit_cap), unit_cap->length()); + if (unit_cap) { + intern_string_t unit_val + = intern_string::lookup(unit_cap.value()); auto unit_iter = vd.vd_unit_scaling.find(unit_val); if (unit_iter != vd.vd_unit_scaling.end()) { const struct scaling_factor& sf = unit_iter->second; @@ -1146,9 +1139,9 @@ external_log_format::annotate(uint64_t line_number, } } - if (cap->is_valid()) { + if (cap) { values.lvv_values.emplace_back( - vd.vd_meta, line, line_range{cap->c_begin, cap->c_end}); + vd.vd_meta, line, to_line_range(cap.value())); values.lvv_values.back().apply_scaling(scaling); } else { values.lvv_values.emplace_back(vd.vd_meta); @@ -1159,11 +1152,8 @@ external_log_format::annotate(uint64_t line_number, } bool did_mod_annotate_body = false; - if (annotate_module && module_cap != nullptr && body_cap != nullptr - && body_cap->is_valid()) - { - intern_string_t mod_name = intern_string::lookup( - pi.get_substr_start(module_cap), module_cap->length()); + if (annotate_module && module_cap && body_cap && body_cap->is_valid()) { + intern_string_t mod_name = intern_string::lookup(module_cap.value()); auto mod_iter = MODULE_FORMATS.find(mod_name); if (mod_iter != MODULE_FORMATS.end() @@ -1171,9 +1161,9 @@ external_log_format::annotate(uint64_t line_number, { auto& mf = mod_iter->second; - body_cap->ltrim(line.get_data()); + body_cap->trim(); auto narrow_res - = line.narrow(body_cap->c_begin, body_cap->length()); + = line.narrow(body_cap->sf_begin, body_cap->length()); auto pre_mod_values_size = values.lvv_values.size(); auto pre_mod_sa_size = sa.size(); mf.mf_mod_format->annotate(line_number, sa, values, false); @@ -1181,19 +1171,18 @@ external_log_format::annotate(uint64_t line_number, lpc < values.lvv_values.size(); lpc++) { - values.lvv_values[lpc].lv_origin.shift(0, body_cap->c_begin); + values.lvv_values[lpc].lv_origin.shift(0, body_cap->sf_begin); } for (size_t lpc = pre_mod_sa_size; lpc < sa.size(); lpc++) { - sa[lpc].sa_range.shift(0, body_cap->c_begin); + sa[lpc].sa_range.shift(0, body_cap->sf_begin); } line.widen(narrow_res); did_mod_annotate_body = true; } } if (!did_mod_annotate_body) { - if (body_cap != nullptr && body_cap->is_valid()) { - lr.lr_start = body_cap->c_begin; - lr.lr_end = body_cap->c_end; + if (body_cap && body_cap->is_valid()) { + lr = to_line_range(body_cap.value()); } else { lr.lr_start = line.length(); lr.lr_end = line.length(); @@ -1282,26 +1271,21 @@ read_json_field(yajlpp_parse_context* ypc, const unsigned char* str, size_t len) jlu->jlu_format->lf_timestamp_flags = tm_out.et_flags & ~ETF_MACHINE_ORIENTED; jlu->jlu_base_line->set_time(tv_out); - } else if (!jlu->jlu_format->elf_level_pointer.empty()) { - pcre_context_static<30> pc; - pcre_input pi(field_name); - - if (jlu->jlu_format->elf_level_pointer.match( - pc, pi, PCRE_NO_UTF8_CHECK)) + } else if (jlu->jlu_format->elf_level_pointer.value != nullptr) { + if (jlu->jlu_format->elf_level_pointer.value + ->find_in(field_name.to_string_fragment(), PCRE2_NO_UTF_CHECK) + .ignore_error() + .has_value()) { - pcre_input pi_level((const char*) str, 0, len); - pcre_context::capture_t level_cap = {0, (int) len}; - jlu->jlu_base_line->set_level(jlu->jlu_format->convert_level( - pi_level, &level_cap, jlu->jlu_batch_context)); + string_fragment::from_bytes(str, len), jlu->jlu_batch_context)); } - } else if (jlu->jlu_format->elf_level_field == field_name) { - pcre_input pi((const char*) str, 0, len); - pcre_context::capture_t level_cap = {0, (int) len}; - + } + if (jlu->jlu_format->elf_level_field == field_name) { jlu->jlu_base_line->set_level(jlu->jlu_format->convert_level( - pi, &level_cap, jlu->jlu_batch_context)); - } else if (jlu->jlu_format->elf_opid_field == field_name) { + string_fragment::from_bytes(str, len), jlu->jlu_batch_context)); + } + if (jlu->jlu_format->elf_opid_field == field_name) { uint8_t opid = hash_str((const char*) str, len); jlu->jlu_base_line->set_opid(opid); } @@ -1780,7 +1764,7 @@ external_log_format::build(std::vector& errors) { pattern& pat = *iter->second; - if (pat.p_pcre == nullptr) { + if (pat.p_pcre.value == nullptr) { continue; } @@ -1788,30 +1772,27 @@ external_log_format::build(std::vector& errors) this->elf_has_module_format = true; } - for (auto name_iter = pat.p_pcre->named_begin(); - name_iter != pat.p_pcre->named_end(); - ++name_iter) - { + for (auto named_cap : pat.p_pcre.value->get_named_captures()) { const intern_string_t name - = intern_string::lookup(name_iter->pnc_name, -1); + = intern_string::lookup(named_cap.get_name()); if (name == this->lf_timestamp_field) { - pat.p_timestamp_field_index = name_iter->index(); + pat.p_timestamp_field_index = named_cap.get_index(); } if (name == this->lf_time_field) { - pat.p_time_field_index = name_iter->index(); + pat.p_time_field_index = named_cap.get_index(); } if (name == this->elf_level_field) { - pat.p_level_field_index = name_iter->index(); + pat.p_level_field_index = named_cap.get_index(); } if (name == this->elf_module_id_field) { - pat.p_module_field_index = name_iter->index(); + pat.p_module_field_index = named_cap.get_index(); } if (name == this->elf_opid_field) { - pat.p_opid_field_index = name_iter->index(); + pat.p_opid_field_index = named_cap.get_index(); } if (name == this->elf_body_field) { - pat.p_body_field_index = name_iter->index(); + pat.p_body_field_index = named_cap.get_index(); } auto value_iter = this->elf_value_defs.find(name); @@ -1819,10 +1800,10 @@ external_log_format::build(std::vector& errors) auto vd = value_iter->second; indexed_value_def ivd; - ivd.ivd_index = name_iter->index(); + ivd.ivd_index = named_cap.get_index(); if (!vd->vd_unit_field.empty()) { ivd.ivd_unit_field_index - = pat.p_pcre->name_index(vd->vd_unit_field.get()); + = pat.p_pcre.value->name_index(vd->vd_unit_field.get()); } else { ivd.ivd_unit_field_index = -1; } @@ -1927,18 +1908,17 @@ external_log_format::build(std::vector& errors) bool found_in_pattern = false; for (const auto& pat : this->elf_patterns) { - auto cap_index = pat.second->p_pcre->name_index( + auto cap_index = pat.second->p_pcre.value->name_index( vd->vd_meta.lvm_name.get()); if (cap_index >= 0) { found_in_pattern = true; break; } - for (auto name_iter = pat.second->p_pcre->named_begin(); - name_iter != pat.second->p_pcre->named_end(); - ++name_iter) + for (auto named_cap : + pat.second->p_pcre.value->get_named_captures()) { - available_captures.insert(name_iter->pnc_name); + available_captures.insert(named_cap.get_name().to_string()); } } if (!found_in_pattern) { @@ -1986,7 +1966,9 @@ external_log_format::build(std::vector& errors) for (const auto& td_pair : this->lf_tag_defs) { const auto& td = td_pair.second; - if (td->ftd_pattern == nullptr || td->ftd_pattern->empty()) { + if (td->ftd_pattern.value == nullptr + || td->ftd_pattern.value->get_pattern().empty()) + { errors.emplace_back( lnav::console::user_message::error( attr_line_t("invalid tag definition ") @@ -2017,8 +1999,6 @@ external_log_format::build(std::vector& errors) for (auto& elf_sample : this->elf_samples) { auto sample_lines = string_fragment(elf_sample.s_line.pp_value).split_lines(); - pcre_context_static<128> pc; - pcre_input pi(sample_lines[0]); bool found = false; for (auto pat_iter = this->elf_pattern_order.begin(); @@ -2027,11 +2007,16 @@ external_log_format::build(std::vector& errors) { auto& pat = *(*pat_iter); - if (!pat.p_pcre) { + if (!pat.p_pcre.value) { continue; } - if (!pat.p_pcre->match(pc, pi)) { + auto md = pat.p_pcre.value->create_match_data(); + auto match_res = pat.p_pcre.value->capture_from(sample_lines[0]) + .into(md) + .matches() + .ignore_error(); + if (!match_res) { continue; } found = true; @@ -2040,23 +2025,21 @@ external_log_format::build(std::vector& errors) continue; } - if (pat.p_pcre->name_index(this->lf_timestamp_field.to_string()) + if (pat.p_pcre.value->name_index(this->lf_timestamp_field.get()) < 0) { attr_line_t notes; bool first_note = true; - if (pat.p_pcre->p_named_count > 0) { + if (pat.p_pcre.value->get_capture_count() > 0) { notes.append("the following captures are available:\n "); } - for (auto name_iter = pat.p_pcre->named_begin(); - name_iter != pat.p_pcre->named_end(); - ++name_iter) - { + for (auto named_cap : pat.p_pcre.value->get_named_captures()) { if (!first_note) { notes.append(", "); } - notes.append(lnav::roles::symbol(name_iter->pnc_name)); + notes.append( + lnav::roles::symbol(named_cap.get_name().to_string())); first_note = false; } errors.emplace_back( @@ -2075,21 +2058,23 @@ external_log_format::build(std::vector& errors) continue; } - const auto* ts_cap = pc[this->lf_timestamp_field.get()]; - const auto* level_cap = pc[pat.p_level_field_index]; - const char* ts = pi.get_substr_start(ts_cap); - auto ts_frag = pi.get_string_fragment(ts_cap); - ssize_t ts_len = pc[this->lf_timestamp_field.get()]->length(); + const auto ts_cap = md[pat.p_timestamp_field_index]; + const auto level_cap = md[pat.p_level_field_index]; const char* const* custom_formats = this->get_timestamp_formats(); date_time_scanner dts; struct timeval tv; struct exttm tm; - if (ts_cap->c_begin == 0) { - pat.p_timestamp_end = ts_cap->c_end; + if (ts_cap && ts_cap->sf_begin == 0) { + pat.p_timestamp_end = ts_cap->sf_end; } - if (ts_len == -1 - || dts.scan(ts, ts_len, custom_formats, &tm, tv) == nullptr) + if (ts_cap + && dts.scan(ts_cap->data(), + ts_cap->length(), + custom_formats, + &tm, + tv) + == nullptr) { attr_line_t notes; @@ -2100,9 +2085,10 @@ external_log_format::build(std::vector& errors) { off_t off = 0; - PTIMEC_FORMATS[lpc].pf_func(&tm, ts, off, ts_len); + PTIMEC_FORMATS[lpc].pf_func( + &tm, ts_cap->data(), off, ts_cap->length()); notes.append("\n ") - .append(ts_frag) + .append(ts_cap.value()) .append("\n") .append(2 + off, ' ') .append("^ "_snippet_border) @@ -2115,9 +2101,13 @@ external_log_format::build(std::vector& errors) for (int lpc = 0; custom_formats[lpc] != nullptr; lpc++) { off_t off = 0; - ptime_fmt(custom_formats[lpc], &tm, ts, off, ts_len); + ptime_fmt(custom_formats[lpc], + &tm, + ts_cap->data(), + off, + ts_cap->length()); notes.append("\n ") - .append(ts_frag) + .append(ts_cap.value()) .append("\n") .append(2 + off, ' ') .append("^ "_snippet_border) @@ -2132,7 +2122,7 @@ external_log_format::build(std::vector& errors) attr_line_t("invalid sample log message: ") .append(lnav::to_json(elf_sample.s_line.pp_value))) .with_reason(attr_line_t("unrecognized timestamp -- ") - .append(ts_frag)) + .append(ts_cap.value())) .with_snippet(elf_sample.s_line.to_snippet()) .with_note(notes) .with_help(attr_line_t("If the timestamp format is not " @@ -2142,7 +2132,8 @@ external_log_format::build(std::vector& errors) .append(" property"))); } - log_level_t level = this->convert_level(pi, level_cap, nullptr); + log_level_t level = this->convert_level( + level_cap.value_or(string_fragment::invalid()), nullptr); if (elf_sample.s_level != LEVEL_UNKNOWN && elf_sample.s_level != level) @@ -2153,8 +2144,7 @@ external_log_format::build(std::vector& errors) .append(lnav::roles::symbol(pat.p_name.to_string())) .append("\n") .append("captured level = ") - .append_quoted( - pi.get_string_fragment(level_cap).to_string()); + .append_quoted(level_cap->to_string()); errors.emplace_back( lnav::console::user_message::error( attr_line_t("invalid sample log message: ") @@ -2171,11 +2161,13 @@ external_log_format::build(std::vector& errors) } { - pcre_context_static<128> pc_full; - pcre_input pi_full(elf_sample.s_line.pp_value); - - if (!pat.p_pcre->match(pc_full, pi_full)) { - attr_line_t regex_al = pat.p_pcre->get_pattern(); + auto full_match_res + = pat.p_pcre.value->capture_from(elf_sample.s_line.pp_value) + .into(md) + .matches() + .ignore_error(); + if (!full_match_res) { + attr_line_t regex_al = pat.p_pcre.value->get_pattern(); lnav::snippets::regex_highlighter( regex_al, -1, line_range{0, (int) regex_al.length()}); errors.emplace_back( @@ -2194,14 +2186,14 @@ external_log_format::build(std::vector& errors) .with_help( attr_line_t("use ").append_quoted(".*").append( " to match new-lines"))); - } else if (static_cast(pc_full.all()->length()) + } else if (static_cast(full_match_res->f_all.length()) != elf_sample.s_line.pp_value.length()) { - attr_line_t regex_al = pat.p_pcre->get_pattern(); + attr_line_t regex_al = pat.p_pcre.value->get_pattern(); lnav::snippets::regex_highlighter( regex_al, -1, line_range{0, (int) regex_al.length()}); auto match_length - = static_cast(pc_full.all()->length()); + = static_cast(full_match_res->f_all.length()); attr_line_t sample_al = elf_sample.s_line.pp_value; sample_al.append("\n") .append(match_length, ' ') @@ -2235,14 +2227,15 @@ external_log_format::build(std::vector& errors) size_t max_name_width = 0; for (const auto& pat_iter : this->elf_pattern_order) { - pattern& pat = *pat_iter; + auto& pat = *pat_iter; - if (!pat.p_pcre) { + if (!pat.p_pcre.value) { continue; } - partial_indexes.emplace_back(pat.p_pcre->match_partial(pi), - pat.p_name); + partial_indexes.emplace_back( + pat.p_pcre.value->match_partial(sample_lines[0]), + pat.p_name); max_name_width = std::max(max_name_width, pat.p_name.size()); } for (const auto& line_frag : sample_lines) { @@ -2273,7 +2266,7 @@ external_log_format::build(std::vector& errors) attr_line_t regex_note; for (const auto& pat_iter : this->elf_pattern_order) { - if (!pat_iter->p_pcre) { + if (!pat_iter->p_pcre.value) { regex_note .append( lnav::roles::symbol(fmt::format(FMT_STRING("{:{}}"), @@ -2283,7 +2276,7 @@ external_log_format::build(std::vector& errors) continue; } - attr_line_t regex_al = pat_iter->p_pcre->get_pattern(); + attr_line_t regex_al = pat_iter->p_pcre.value->get_pattern(); lnav::snippets::regex_highlighter( regex_al, -1, line_range{0, (int) regex_al.length()}); @@ -2444,20 +2437,13 @@ external_log_format::build(std::vector& errors) attrs.ta_attrs |= A_BLINK; } - if (hd.hd_pattern != nullptr) { - auto regex = pcrepp::shared_from_str(hd.hd_pattern->get_pattern(), - PCRE_CASELESS | PCRE_UTF8); - - if (regex.isErr()) { - log_error("unable to recompile highlighter pattern"); - } else { - this->lf_highlighters.emplace_back(regex.unwrap()); - this->lf_highlighters.back() - .with_name(hd_pair.first.to_string()) - .with_format_name(this->elf_name) - .with_color(fg, bg) - .with_attrs(attrs); - } + if (hd.hd_pattern.value != nullptr) { + this->lf_highlighters.emplace_back(hd.hd_pattern.value); + this->lf_highlighters.back() + .with_name(hd_pair.first.to_string()) + .with_format_name(this->elf_name) + .with_color(fg, bg) + .with_attrs(attrs); } } } @@ -2468,12 +2454,12 @@ external_log_format::register_vtabs( std::vector& errors) { for (auto& elf_search_table : this->elf_search_tables) { - if (elf_search_table.second.std_pattern == nullptr) { + if (elf_search_table.second.std_pattern.value == nullptr) { continue; } auto lst = std::make_shared( - *elf_search_table.second.std_pattern, elf_search_table.first); + elf_search_table.second.std_pattern.value, elf_search_table.first); lst->lst_format = this; lst->lst_log_path_glob = elf_search_table.second.std_glob; if (elf_search_table.second.std_level != LEVEL_UNKNOWN) { @@ -2495,16 +2481,15 @@ external_log_format::match_samples(const std::vector& samples) const { for (const auto& sample_iter : samples) { for (const auto& pat_iter : this->elf_pattern_order) { - pattern& pat = *pat_iter; + auto& pat = *pat_iter; - if (!pat.p_pcre) { + if (!pat.p_pcre.value) { continue; } - pcre_context_static<128> pc; - pcre_input pi(sample_iter.s_line.pp_value); - - if (pat.p_pcre->match(pc, pi)) { + if (pat.p_pcre.value->find_in(sample_iter.s_line.pp_value) + .ignore_error()) + { return true; } } @@ -2522,7 +2507,7 @@ public: void get_columns(std::vector& cols) const override { - const external_log_format& elf = this->elt_format; + const auto& elf = this->elt_format; cols.resize(elf.elf_column_count); for (const auto& vd : elf.elf_value_def_order) { @@ -2684,14 +2669,13 @@ external_log_format::specialized(int fmt_lock) bool external_log_format::match_name(const std::string& filename) { - if (this->elf_file_pattern.empty()) { + if (this->elf_filename_pcre.value == nullptr) { return true; } - pcre_context_static<10> pc; - pcre_input pi(filename); - - return this->elf_filename_pcre->match(pc, pi); + return this->elf_filename_pcre.value->find_in(filename) + .ignore_error() + .has_value(); } bool @@ -2734,15 +2718,13 @@ external_log_format::value_line_count(const intern_string_t ist, } log_level_t -external_log_format::convert_level(const pcre_input& pi, - const pcre_context::capture_t* level_cap, +external_log_format::convert_level(string_fragment sf, scan_batch_context* sbc) const { log_level_t retval = LEVEL_INFO; - if (level_cap != nullptr && level_cap->is_valid()) { + if (sf.is_valid()) { if (sbc != nullptr && sbc->sbc_cached_level_count > 0) { - auto sf = pi.get_string_fragment(level_cap); auto cached_level_iter = std::find(std::begin(sbc->sbc_cached_level_strings), std::begin(sbc->sbc_cached_level_strings) @@ -2765,16 +2747,14 @@ external_log_format::convert_level(const pcre_input& pi, } } - pcre_context_static<128> pc_level; - pcre_input pi_level( - pi.get_substr_start(level_cap), 0, level_cap->length()); - if (this->elf_level_patterns.empty()) { - retval = string2level(pi_level.get_string(), level_cap->length()); + retval = string2level(sf.data(), sf.length()); } else { for (const auto& elf_level_pattern : this->elf_level_patterns) { - if (elf_level_pattern.second.lp_pcre->match( - pc_level, pi_level, PCRE_NO_UTF8_CHECK)) + if (elf_level_pattern.second.lp_pcre.value + ->find_in(sf, PCRE2_NO_UTF_CHECK) + .ignore_error() + .has_value()) { retval = elf_level_pattern.first; break; @@ -2782,7 +2762,7 @@ external_log_format::convert_level(const pcre_input& pi, } } - if (sbc != nullptr && level_cap->length() < 10) { + if (sbc != nullptr && sf.length() < 10) { size_t cache_index; if (sbc->sbc_cached_level_count == 4) { @@ -2791,8 +2771,7 @@ external_log_format::convert_level(const pcre_input& pi, cache_index = sbc->sbc_cached_level_count; sbc->sbc_cached_level_count += 1; } - sbc->sbc_cached_level_strings[cache_index] - = std::string(pi_level.get_string(), pi_level.pi_length); + sbc->sbc_cached_level_strings[cache_index] = sf.to_string(); sbc->sbc_cached_level_values[cache_index] = retval; } } diff --git a/src/log_format.hh b/src/log_format.hh index 5cfa05b2..88ca877a 100644 --- a/src/log_format.hh +++ b/src/log_format.hh @@ -57,7 +57,7 @@ #include "log_format_fwd.hh" #include "log_level.hh" #include "optional.hpp" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "shared_buffer.hh" struct sqlite3; @@ -522,29 +522,32 @@ protected: static std::vector> lf_root_formats; struct pcre_format { - explicit pcre_format(const char* regex) - : name(regex), pcre(regex), - pf_timestamp_index(this->pcre.name_index("timestamp")) + template + explicit pcre_format(const T (®ex)[N]) + : name(regex), + pcre(lnav::pcre2pp::code::from_const(regex).to_shared()), + pf_timestamp_index(this->pcre->name_index("timestamp")) { } - pcre_format() : name(nullptr), pcre("") {} + pcre_format() = default; - const char* name; - pcrepp pcre; + const char* name{nullptr}; + std::shared_ptr pcre; int pf_timestamp_index{-1}; }; static bool next_format(pcre_format* fmt, int& index, int& locked_index); const char* log_scanf(uint32_t line_number, - const char* line, - size_t len, + string_fragment line, pcre_format* fmt, const char* time_fmt[], struct exttm* tm_out, struct timeval* tv_out, - ...); + + string_fragment* ts_out, + nonstd::optional* level_out); }; #endif diff --git a/src/log_format_ext.hh b/src/log_format_ext.hh index 18f5a716..7aa2d9f8 100644 --- a/src/log_format_ext.hh +++ b/src/log_format_ext.hh @@ -35,6 +35,7 @@ #include #include "log_format.hh" +#include "log_search_table_fwd.hh" #include "yajlpp/yajlpp.hh" class module_format; @@ -99,7 +100,9 @@ public: struct pattern { intern_string_t p_name; std::string p_config_path; - std::shared_ptr> p_pcre; + factory_container::with_default_args + p_pcre; std::vector p_value_by_index; std::vector p_numeric_value_indexes; int p_timestamp_field_index{-1}; @@ -113,8 +116,7 @@ public: }; struct level_pattern { - std::string lp_regex; - std::shared_ptr lp_pcre; + factory_container lp_pcre; }; struct yajl_handle_deleter { @@ -273,7 +275,7 @@ public: }; struct highlighter_def { - std::shared_ptr hd_pattern; + factory_container hd_pattern; positioned_property hd_color; positioned_property hd_background_color; bool hd_underline{false}; @@ -309,11 +311,10 @@ public: return ""; } int pat_index = this->pattern_index_for_line(line_number); - return this->elf_pattern_order[pat_index]->p_pcre->get_pattern(); + return this->elf_pattern_order[pat_index]->p_pcre.value->get_pattern(); } - log_level_t convert_level(const pcre_input& pi, - const pcre_context::capture_t* level_cap, + log_level_t convert_level(string_fragment str, scan_batch_context* sbc) const; using mod_map_t = std::map; @@ -325,9 +326,8 @@ public: std::vector elf_format_source_order; std::map elf_format_sources; std::list elf_collision; - std::string elf_file_pattern; std::set elf_mime_types; - std::shared_ptr elf_filename_pcre; + factory_container elf_filename_pcre; std::map> elf_patterns; std::vector> elf_pattern_order; std::vector elf_samples; @@ -338,7 +338,7 @@ public: int elf_column_count{0}; double elf_timestamp_divisor{1.0}; intern_string_t elf_level_field; - pcrepp elf_level_pointer; + factory_container elf_level_pointer; intern_string_t elf_body_field; intern_string_t elf_module_id_field; intern_string_t elf_opid_field; @@ -348,10 +348,12 @@ public: bool elf_has_module_format{false}; bool elf_builtin_format{false}; + using search_table_pcre2pp + = factory_container::with_default_args< + log_search_table_ns::PATTERN_OPTIONS>; + struct search_table_def { - std::shared_ptr< - pcrepp_with_options> - std_pattern; + search_table_pcre2pp std_pattern; std::string std_glob; log_level_t std_level{LEVEL_UNKNOWN}; }; @@ -410,8 +412,7 @@ public: private: const intern_string_t elf_name; - static uint8_t module_scan(const pcre_input& pi, - pcre_context::capture_t* body_cap, + static uint8_t module_scan(string_fragment body_cap, const intern_string_t& mod_name); }; diff --git a/src/log_format_fwd.hh b/src/log_format_fwd.hh index 19ef9fcb..3305677a 100644 --- a/src/log_format_fwd.hh +++ b/src/log_format_fwd.hh @@ -32,6 +32,8 @@ #ifndef lnav_log_format_fwd_hh #define lnav_log_format_fwd_hh +#include + #include #include "ArenaAlloc/arenaalloc.h" @@ -39,9 +41,10 @@ #include "base/string_attr_type.hh" #include "byte_array.hh" #include "log_level.hh" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "ptimec.hh" #include "robin_hood/robin_hood.h" +#include "yajlpp/yajlpp.hh" class log_format; @@ -303,7 +306,7 @@ private: }; struct format_tag_def { - format_tag_def(std::string name) : ftd_name(name) {} + explicit format_tag_def(std::string name) : ftd_name(std::move(name)) {} struct path_restriction { std::string p_glob; @@ -314,7 +317,8 @@ struct format_tag_def { std::string ftd_name; std::string ftd_description; std::vector ftd_paths; - std::shared_ptr> ftd_pattern; + factory_container::with_default_args + ftd_pattern; log_level_t ftd_level{LEVEL_UNKNOWN}; }; diff --git a/src/log_format_impls.cc b/src/log_format_impls.cc index d8a804c3..7b250775 100644 --- a/src/log_format_impls.cc +++ b/src/log_format_impls.cc @@ -41,59 +41,10 @@ #include "config.h" #include "formats/logfmt/logfmt.parser.hh" #include "log_vtab_impl.hh" -#include "pcrepp/pcrepp.hh" #include "sql_util.hh" #include "yajlpp/yajlpp.hh" -static const pcrepp RDNS_PATTERN( - "^(?:com|net|org|edu|[a-z][a-z])" - "(\\.\\w+)+(.+)"); - -/** - * Attempt to scrub a reverse-DNS string. - * - * @param str The string to scrub. If the string looks like a reverse-DNS - * string, the leading components of the name will be reduced to a single - * letter. For example, "com.example.foo" will be reduced to "c.e.foo". - * @return The scrubbed version of the input string or the original string - * if it is not a reverse-DNS string. - */ -static std::string -scrub_rdns(const std::string& str) -{ - pcre_context_static<30> context; - pcre_input input(str); - std::string retval; - - if (RDNS_PATTERN.match(context, input)) { - pcre_context::capture_t* cap; - - cap = context.begin(); - for (int index = 0; index < cap->c_begin; index++) { - if (index == 0 || str[index - 1] == '.') { - if (index > 0) { - retval.append(1, '.'); - } - retval.append(1, str[index]); - } - } - retval += input.get_substr(cap); - retval += input.get_substr(cap + 1); - } else { - retval = str; - } - return retval; -} - class generic_log_format : public log_format { - static pcrepp& scrub_pattern() - { - static pcrepp SCRUB_PATTERN( - "\\d+-(\\d+-\\d+ \\d+:\\d+:\\d+(?:,\\d+)?:)\\w+:(.*)"); - - return SCRUB_PATTERN; - } - static pcre_format* get_pcre_log_formats() { static pcre_format log_fmt[] = { @@ -140,23 +91,6 @@ class generic_log_format : public log_format { return intern_string::lookup("generic_log"); } - void scrub(std::string& line) override - { - pcre_context_static<30> context; - pcre_input pi(line); - std::string new_line; - - if (scrub_pattern().match(context, pi)) { - pcre_context::capture_t* cap; - - for (cap = context.begin(); cap != context.end(); cap++) { - new_line += scrub_rdns(pi.get_substr(cap)); - } - - line = new_line; - } - } - scan_result_t scan(logfile& lf, std::vector& dst, const line_info& li, @@ -165,12 +99,12 @@ class generic_log_format : public log_format { { struct exttm log_time; struct timeval log_tv; - pcre_context::capture_t ts, level; + string_fragment ts; + nonstd::optional level; const char* last_pos; if ((last_pos = this->log_scanf(dst.size(), - sbr.get_data(), - sbr.length(), + sbr.to_string_fragment(), get_pcre_log_formats(), nullptr, &log_time, @@ -180,8 +114,10 @@ class generic_log_format : public log_format { &level)) != nullptr) { - const char* level_str = &sbr.get_data()[level.c_begin]; - log_level_t level_val = string2level(level_str, level.length()); + log_level_t level_val = log_level_t::LEVEL_UNKNOWN; + if (level) { + level_val = string2level(level->data(), level->length()); + } if (!((log_time.et_flags & ETF_DAY_SET) && (log_time.et_flags & ETF_MONTH_SET) @@ -204,26 +140,28 @@ class generic_log_format : public log_format { { auto& line = values.lvv_sbr; int pat_index = this->pattern_index_for_line(line_number); - pcre_format& fmt = get_pcre_log_formats()[pat_index]; - struct line_range lr; + auto& fmt = get_pcre_log_formats()[pat_index]; int prefix_len = 0; - pcre_input pi(line.get_data(), 0, line.length()); - pcre_context_static<30> pc; - - if (!fmt.pcre.match(pc, pi)) { + auto md = fmt.pcre->create_match_data(); + auto match_res = fmt.pcre->capture_from(line.to_string_fragment()) + .into(md) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (!match_res) { return; } - lr.lr_start = pc[0]->c_begin; - lr.lr_end = pc[0]->c_end; + auto lr = to_line_range(md[fmt.pf_timestamp_index].value()); sa.emplace_back(lr, logline::L_TIMESTAMP.value()); - const char* level = &line.get_data()[pc[1]->c_begin]; - - if (string2level(level, pc[1]->length(), true) == LEVEL_UNKNOWN) { - prefix_len = pc[0]->c_end; - } else { - prefix_len = pc[1]->c_end; + prefix_len = lr.lr_end; + auto level_cap = md[2]; + if (level_cap) { + if (string2level(level_cap->data(), level_cap->length(), true) + != LEVEL_UNKNOWN) + { + prefix_len = level_cap->sf_end; + } } lr.lr_start = 0; @@ -377,15 +315,9 @@ struct separated_string { size_t index() const { return this->i_index; } }; - iterator begin() - { - return {*this, this->ss_str}; - } + iterator begin() { return {*this, this->ss_str}; } - iterator end() - { - return {*this, this->ss_str + this->ss_len}; - } + iterator end() { return {*this, this->ss_str + this->ss_len}; } }; class bro_log_format : public log_format { @@ -472,7 +404,8 @@ public: string_fragment sf = *iter; if (this->lf_date_time.scan( - sf.data(), sf.length(), nullptr, &tm, tv)) { + sf.data(), sf.length(), nullptr, &tm, tv)) + { this->lf_timestamp_flags = tm.et_flags; found_ts = true; } @@ -527,18 +460,19 @@ public: shared_buffer_ref& sbr, scan_batch_context& sbc) override { - static const pcrepp SEP_RE(R"(^#separator\s+(.+))"); + static const auto SEP_RE + = lnav::pcre2pp::code::from_const(R"(^#separator\s+(.+))"); if (!this->blf_format_name.empty()) { return this->scan_int(dst, li, sbr); } if (dst.empty() || dst.size() > 20 || sbr.empty() - || sbr.get_data()[0] == '#') { + || sbr.get_data()[0] == '#') + { return SCAN_NO_MATCH; } - pcre_context_static<20> pc; auto line_iter = dst.begin(); auto read_result = lf.read_line(line_iter); @@ -547,16 +481,19 @@ public: } auto line = read_result.unwrap(); - pcre_input pi(line.get_data(), 0, line.length()); + auto md = SEP_RE.create_match_data(); - if (!SEP_RE.match(pc, pi)) { + auto match_res = SEP_RE.capture_from(line.to_string_fragment()) + .into(md) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (!match_res) { return SCAN_NO_MATCH; } this->clear(); - auto sep - = from_escaped_string(pi.get_substr_start(pc[0]), pc[0]->length()); + auto sep = from_escaped_string(md[1]->data(), md[1]->length()); this->blf_separator = intern_string::lookup(sep); for (++line_iter; line_iter != dst.end(); ++line_iter) { @@ -897,7 +834,8 @@ struct ws_separated_string { this->i_pos = this->i_next_pos; while (this->i_pos < (ss.ss_str + ss.ss_len) - && isspace(*this->i_pos)) { + && isspace(*this->i_pos)) + { this->i_pos += 1; this->i_next_pos += 1; } @@ -1070,7 +1008,8 @@ public: sbr_sf.length(), nullptr, &tm, - tv)) { + tv)) + { this->lf_date_time.set_base_time(tv.tv_sec, tm.et_tm); this->wlf_time_scanner.set_base_time(tv.tv_sec, @@ -1088,7 +1027,8 @@ public: || F_DATE_UTC == fd.fd_name) { if (this->lf_date_time.scan( - sf.data(), sf.length(), nullptr, &date_tm, date_tv)) { + sf.data(), sf.length(), nullptr, &date_tm, date_tv)) + { this->lf_timestamp_flags |= date_tm.et_flags; found_date = true; } @@ -1173,7 +1113,8 @@ public: } if (dst.empty() || dst.size() > 20 || sbr.empty() - || sbr.get_data()[0] == '#') { + || sbr.get_data()[0] == '#') + { return SCAN_NO_MATCH; } @@ -1598,8 +1539,7 @@ struct logfmt_pair_handler { date_time_scanner& lph_dt_scanner; bool lph_found_time{false}; - struct exttm lph_time_tm { - }; + struct exttm lph_time_tm {}; struct timeval lph_tv { 0, 0 }; diff --git a/src/log_format_loader.cc b/src/log_format_loader.cc index ce65b165..d009aaa0 100644 --- a/src/log_format_loader.cc +++ b/src/log_format_loader.cc @@ -57,9 +57,7 @@ #include "yajlpp/yajlpp.hh" #include "yajlpp/yajlpp_def.hh" -static void extract_metadata(const char* contents, - size_t len, - struct script_metadata& meta_out); +static void extract_metadata(string_fragment, struct script_metadata& meta_out); using log_formats_map_t = std::map>; @@ -276,30 +274,7 @@ read_format_field(yajlpp_parse_context* ypc, leading_slash ? len - 1 : len); auto field_name = ypc->get_path_fragment(1); - if (field_name == "file-pattern") { - try { - elf->elf_file_pattern = value; - elf->elf_filename_pcre - = std::make_shared(elf->elf_file_pattern); - } catch (const pcrepp::error& e) { - pcrepp::compile_error ce; - - ce.ce_msg = e.what(); - ce.ce_offset = e.e_offset; - ypc->ypc_current_handler->report_regex_value_error(ypc, value, ce); - } - } else if (field_name == "level-pointer") { - auto pcre_res = pcrepp::from_str(value); - - if (pcre_res.isErr()) { - auto pcre_error = pcre_res.unwrapErr(); - - ypc->ypc_current_handler->report_regex_value_error( - ypc, value, pcre_error); - } else { - elf->elf_level_pointer = pcre_res.unwrap(); - } - } else if (field_name == "timestamp-format") { + if (field_name == "timestamp-format") { elf->lf_timestamp_format.push_back(intern_string::lookup(value)->get()); } else if (field_name == "module-field") { elf->elf_module_id_field = intern_string::lookup(value); @@ -321,17 +296,20 @@ read_levels(yajlpp_parse_context* ypc, const unsigned char* str, size_t len) auto regex = std::string((const char*) str, len); auto level_name_or_number = ypc->get_path_fragment(2); log_level_t level = string2level(level_name_or_number.c_str()); - elf->elf_level_patterns[level].lp_regex = regex; - - try { - elf->elf_level_patterns[level].lp_pcre - = std::make_shared(regex); - } catch (const pcrepp::error& e) { - pcrepp::compile_error ce; - - ce.ce_msg = e.what(); - ce.ce_offset = e.e_offset; - ypc->ypc_current_handler->report_regex_value_error(ypc, regex, ce); + auto value_frag = string_fragment::from_bytes(str, len); + + auto compile_res = lnav::pcre2pp::code::from(value_frag); + if (compile_res.isErr()) { + static const intern_string_t PATTERN_SRC + = intern_string::lookup("pattern"); + auto ce = compile_res.unwrapErr(); + ypc->ypc_current_handler->report_error( + ypc, + value_frag.to_string(), + lnav::console::to_user_message(PATTERN_SRC, ce)); + } else { + elf->elf_level_patterns[level].lp_pcre.value + = compile_res.unwrap().to_shared(); } return 1; @@ -781,7 +759,9 @@ static struct json_path_container action_def_handlers = { }; static struct json_path_container action_handlers = { - json_path_handler(pcrepp("(?\\w+)"), read_action_def) + json_path_handler( + lnav::pcre2pp::code::from_const("(?\\w+)").to_shared(), + read_action_def) .with_children(action_def_handlers), }; @@ -849,9 +829,10 @@ struct json_path_container format_handlers = { .with_synopsis("") .with_description( "The value to divide a numeric timestamp by in a JSON log."), - json_path_handler("file-pattern", read_format_field) + json_path_handler("file-pattern") .with_description("A regular expression that restricts this format to " - "log files with a matching name"), + "log files with a matching name") + .for_field(&external_log_format::elf_filename_pcre), json_path_handler("mime-types#", read_format_field) .with_description("A list of mime-types this format should be used for") .with_enum_values(MIME_TYPE_ENUM), @@ -859,9 +840,10 @@ struct json_path_container format_handlers = { .with_description( "The name of the level field in the log message pattern") .for_field(&external_log_format::elf_level_field), - json_path_handler("level-pointer", read_format_field) + json_path_handler("level-pointer") .with_description("A regular-expression that matches the JSON-pointer " - "of the level property"), + "of the level property") + .for_field(&external_log_format::elf_level_pointer), json_path_handler("timestamp-field", read_format_field) .with_description( "The name of the timestamp field in the log message pattern") @@ -876,7 +858,8 @@ struct json_path_container format_handlers = { .with_description( "The name of the body field in the log message pattern") .for_field(&external_log_format::elf_body_field), - json_path_handler("url", pcrepp("^url#?")) + json_path_handler("url", + lnav::pcre2pp::code::from_const("^url#?").to_shared()) .add_cb(read_format_field) .with_description("A URL with more information about this log format"), json_path_handler("title", read_format_field) @@ -1037,7 +1020,7 @@ write_sample_file() auto_fd script_fd; struct stat st; - extract_metadata(sf.data(), sf.length(), meta); + extract_metadata(sf, meta); auto path = fmt::format(FMT_STRING("formats/default/{}.lnav"), meta.sm_name); auto script_path = lnav::paths::dotlnav() / path; @@ -1346,23 +1329,24 @@ load_format_extra(sqlite3* db, } static void -extract_metadata(const char* contents, - size_t len, - struct script_metadata& meta_out) +extract_metadata(string_fragment contents, struct script_metadata& meta_out) { - static const pcrepp SYNO_RE("^#\\s+@synopsis:(.*)$", PCRE_MULTILINE); - static const pcrepp DESC_RE("^#\\s+@description:(.*)$", PCRE_MULTILINE); - - pcre_input pi(contents, 0, len); - pcre_context_static<16> pc; - - pi.reset(contents, 0, len); - if (SYNO_RE.match(pc, pi)) { - meta_out.sm_synopsis = trim(pi.get_substr(pc[0])); + static const auto SYNO_RE = lnav::pcre2pp::code::from_const( + "^#\\s+@synopsis:(.*)$", PCRE2_MULTILINE); + static const auto DESC_RE = lnav::pcre2pp::code::from_const( + "^#\\s+@description:(.*)$", PCRE2_MULTILINE); + + auto syno_md = SYNO_RE.create_match_data(); + auto syno_match_res + = SYNO_RE.capture_from(contents).into(syno_md).matches().ignore_error(); + if (syno_match_res) { + meta_out.sm_synopsis = syno_md[1]->trim().to_string(); } - pi.reset(contents, 0, len); - if (DESC_RE.match(pc, pi)) { - meta_out.sm_description = trim(pi.get_substr(pc[0])); + auto desc_md = DESC_RE.create_match_data(); + auto desc_match_res + = DESC_RE.capture_from(contents).into(desc_md).matches().ignore_error(); + if (desc_match_res) { + meta_out.sm_description = desc_md[1]->trim().to_string(); } if (!meta_out.sm_synopsis.empty()) { @@ -1390,7 +1374,7 @@ extract_metadata_from_file(struct script_metadata& meta_inout) size_t len; len = fread(buffer, 1, sizeof(buffer), fp.in()); - extract_metadata(buffer, len, meta_inout); + extract_metadata(string_fragment::from_bytes(buffer, len), meta_inout); } } diff --git a/src/log_search_table.cc b/src/log_search_table.cc index 038fa8e2..a2bf5c87 100644 --- a/src/log_search_table.cc +++ b/src/log_search_table.cc @@ -36,8 +36,10 @@ const static std::string MATCH_INDEX = "match_index"; static auto match_index_name = intern_string::lookup("match_index"); -log_search_table::log_search_table(pcrepp pattern, intern_string_t table_name) - : log_vtab_impl(table_name), lst_regex(std::move(pattern)) +log_search_table::log_search_table(std::shared_ptr code, + intern_string_t table_name) + : log_vtab_impl(table_name), lst_regex(code), + lst_match_data(this->lst_regex->create_match_data()) { } @@ -65,20 +67,18 @@ log_search_table::get_columns_int(std::vector& cols) const this->lst_column_metas.emplace_back( match_index_name, value_kind_t::VALUE_INTEGER, cols.size()); cols.emplace_back(MATCH_INDEX, SQLITE_INTEGER); - for (int lpc = 0; lpc < this->lst_regex.get_capture_count(); lpc++) { + cn.add_column(string_fragment::from_const("__all__")); + auto captures = this->lst_regex->get_captures(); + for (int lpc = 0; lpc < this->lst_regex->get_capture_count(); lpc++) { std::string collator; - std::string colname; int sqlite_type = SQLITE3_TEXT; - colname = cn.add_column(string_fragment::from_c_str( - this->lst_regex.name_for_capture(lpc))) - .to_string(); - if (this->lst_regex.captures().size() - == (size_t) this->lst_regex.get_capture_count()) - { - auto iter = this->lst_regex.cap_begin() + lpc; - auto cap_re = this->lst_regex.get_pattern().substr(iter->c_begin, - iter->length()); + auto colname + = cn.add_column(string_fragment::from_c_str( + this->lst_regex->get_name_for_capture(lpc + 1))) + .to_string(); + if (captures.size() == (size_t) this->lst_regex->get_capture_count()) { + auto cap_re = captures[lpc].to_string(); sqlite_type = guess_type_from_pcre(cap_re, collator); switch (sqlite_type) { case SQLITE_FLOAT: @@ -119,18 +119,24 @@ log_search_table::next(log_cursor& lc, logfile_sub_source& lss) this->lst_line_values_cache.lvv_values.clear(); if (this->lst_match_index >= 0) { - if (this->lst_regex.match( - this->lst_match_context, this->lst_input, PCRE_NO_UTF8_CHECK)) - { + auto match_res = this->lst_regex->capture_from(this->lst_content) + .at(this->lst_remaining) + .into(this->lst_match_data) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + + if (match_res) { #if 0 log_debug("matched within line: %d", this->lst_match_context.get_count()); #endif + this->lst_remaining = match_res->f_remaining; this->lst_match_index += 1; return true; } // log_debug("done matching message"); + this->lst_remaining.clear(); this->lst_match_index = -1; return false; } @@ -163,17 +169,20 @@ log_search_table::next(log_cursor& lc, logfile_sub_source& lss) lf->read_full_message(lf_iter, this->lst_line_values_cache.lvv_sbr); lf->get_format()->annotate( cl, this->vi_attrs, this->lst_line_values_cache, false); - this->lst_input.reset(this->lst_line_values_cache.lvv_sbr.get_data(), - 0, - this->lst_line_values_cache.lvv_sbr.length()); + this->lst_content + = this->lst_line_values_cache.lvv_sbr.to_string_fragment(); + + auto match_res = this->lst_regex->capture_from(this->lst_content) + .into(this->lst_match_data) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); - if (!this->lst_regex.match( - this->lst_match_context, this->lst_input, PCRE_NO_UTF8_CHECK)) - { + if (!match_res) { this->lst_mismatch_bitmap.set_bit(lc.lc_curr_line); return false; } + this->lst_remaining = match_res->f_remaining; this->lst_match_index = 0; return true; @@ -191,13 +200,13 @@ log_search_table::extract(logfile* lf, values.lvv_values.emplace_back( this->lst_column_metas[this->lst_format_column_count], this->lst_match_index); - for (int lpc = 0; lpc < this->lst_regex.get_capture_count(); lpc++) { - const auto* cap = this->lst_match_context[lpc]; - if (cap->is_valid()) { + for (int lpc = 0; lpc < this->lst_regex->get_capture_count(); lpc++) { + const auto cap = this->lst_match_data[lpc + 1]; + if (cap) { values.lvv_values.emplace_back( this->lst_column_metas[this->lst_format_column_count + 1 + lpc], line, - line_range{cap->c_begin, cap->c_end}); + to_line_range(cap.value())); } else { values.lvv_values.emplace_back( this->lst_column_metas[this->lst_format_column_count + 1 diff --git a/src/log_search_table.hh b/src/log_search_table.hh index b1db2a65..7c575de3 100644 --- a/src/log_search_table.hh +++ b/src/log_search_table.hh @@ -36,14 +36,13 @@ #include #include "log_vtab_impl.hh" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "shared_buffer.hh" class log_search_table : public log_vtab_impl { public: - static int pattern_options() { return PCRE_CASELESS | PCRE_MULTILINE; } - - log_search_table(pcrepp pattern, intern_string_t table_name); + log_search_table(std::shared_ptr code, + intern_string_t table_name); void get_primary_keys(std::vector& keys_out) const override; @@ -65,13 +64,14 @@ public: uint64_t line_number, logline_value_vector& values) override; - pcrepp lst_regex; + std::shared_ptr lst_regex; + lnav::pcre2pp::match_data lst_match_data; + string_fragment lst_content; + string_fragment lst_remaining; log_format* lst_format{nullptr}; mutable size_t lst_format_column_count{0}; std::string lst_log_path_glob; nonstd::optional lst_log_level; - pcre_input lst_input{""}; - pcre_context_static<128> lst_match_context; mutable std::vector lst_column_metas; int64_t lst_match_index{-1}; mutable std::vector lst_cols; diff --git a/src/log_search_table_fwd.hh b/src/log_search_table_fwd.hh new file mode 100644 index 00000000..cc758e1b --- /dev/null +++ b/src/log_search_table_fwd.hh @@ -0,0 +1,40 @@ +/** + * Copyright (c) 2022, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef lnav_log_search_table_fwd_hh +#define lnav_log_search_table_fwd_hh + +#include "pcrepp/pcre2pp.hh" + +namespace log_search_table_ns { +static constexpr int PATTERN_OPTIONS + = PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_DOTALL; +} + +#endif diff --git a/src/log_vtab_impl.cc b/src/log_vtab_impl.cc index a32312a5..54462859 100644 --- a/src/log_vtab_impl.cc +++ b/src/log_vtab_impl.cc @@ -1195,13 +1195,15 @@ log_cursor::string_constraint::string_constraint(unsigned char op, : sc_op(op), sc_value(std::move(value)) { if (op == SQLITE_INDEX_CONSTRAINT_REGEXP) { - try { - this->sc_pattern - = std::make_shared(this->sc_value, PCRE_UTF8); - } catch (const pcrepp::error& err) { + auto compile_res = lnav::pcre2pp::code::from(value); + + if (compile_res.isErr()) { + auto ce = compile_res.unwrapErr(); log_error("unable to compile regexp constraint: %s -- %s", this->sc_value.c_str(), - err.e_msg.c_str()); + ce.get_message().c_str()); + } else { + this->sc_pattern = compile_res.unwrap().to_shared(); } } } @@ -1230,10 +1232,9 @@ log_cursor::string_constraint::matches(const std::string& sf) const return sqlite3_strglob(this->sc_value.c_str(), sf.data()) == 0; case SQLITE_INDEX_CONSTRAINT_REGEXP: { if (this->sc_pattern != nullptr) { - pcre_context_static<30> pc; - pcre_input pi(sf); - - return this->sc_pattern->match(pc, pi, PCRE_NO_UTF8_CHECK); + return this->sc_pattern->find_in(sf, PCRE2_NO_UTF_CHECK) + .ignore_error() + .has_value(); } // return true here so that the regexp is actually run and fails return true; diff --git a/src/log_vtab_impl.hh b/src/log_vtab_impl.hh index 8e82864b..71d52115 100644 --- a/src/log_vtab_impl.hh +++ b/src/log_vtab_impl.hh @@ -37,6 +37,7 @@ #include #include "logfile_sub_source.hh" +#include "pcrepp/pcre2pp.hh" #include "robin_hood/robin_hood.h" class textview_curses; @@ -65,7 +66,7 @@ struct log_cursor { struct string_constraint { unsigned char sc_op; std::string sc_value; - std::shared_ptr sc_pattern; + std::shared_ptr sc_pattern; string_constraint(unsigned char op, std::string value); diff --git a/src/logfile.cc b/src/logfile.cc index 751395fd..6fe7ebcf 100644 --- a/src/logfile.cc +++ b/src/logfile.cc @@ -563,6 +563,12 @@ logfile::rebuild_index(nonstd::optional deadline) .unwrapOr(text_format_t::TF_UNKNOWN); log_debug("setting text format to %d", this->lf_text_format); } + if (!li.li_valid_utf + && this->lf_text_format != text_format_t::TF_MARKDOWN + && this->lf_text_format != text_format_t::TF_LOG) + { + this->lf_text_format = text_format_t::TF_BINARY; + } auto read_result = this->lf_line_buffer.read_range(li.li_file_range); @@ -634,9 +640,10 @@ logfile::rebuild_index(nonstd::optional deadline) continue; } - pcre_context_static<30> pc; - pcre_input pi(sf); - if (td->ftd_pattern->match(pc, pi, PCRE_NO_UTF8_CHECK)) + if (td->ftd_pattern.value + ->find_in(sf, PCRE2_NO_UTF_CHECK) + .ignore_error() + .has_value()) { curr_ll->set_mark(true); while (curr_ll->is_continued()) { diff --git a/src/logfile_sub_source.hh b/src/logfile_sub_source.hh index 9cfd7bcc..712f2201 100644 --- a/src/logfile_sub_source.hh +++ b/src/logfile_sub_source.hh @@ -85,8 +85,9 @@ public: pcre_filter(type_t type, const std::string& id, size_t index, - std::shared_ptr code) - : text_filter(type, filter_lang_t::REGEX, id, index), pf_pcre(code) + std::shared_ptr code) + : text_filter(type, filter_lang_t::REGEX, id, index), + pf_pcre(std::move(code)) { } @@ -96,10 +97,9 @@ public: logfile::const_iterator ll, shared_buffer_ref& line) override { - pcre_context_static<30> pc; - pcre_input pi(line.get_data(), 0, line.length()); - - return this->pf_pcre->match(pc, pi); + return this->pf_pcre->find_in(line.to_string_fragment()) + .ignore_error() + .has_value(); } std::string to_command() const override @@ -110,7 +110,7 @@ public: } protected: - std::shared_ptr pf_pcre; + std::shared_ptr pf_pcre; }; class sql_filter : public text_filter { diff --git a/src/md2attr_line.cc b/src/md2attr_line.cc index 4e1ddec0..a208616c 100644 --- a/src/md2attr_line.cc +++ b/src/md2attr_line.cc @@ -32,7 +32,7 @@ #include "base/attr_line.builder.hh" #include "base/itertools.hh" #include "base/lnav_log.hh" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "pugixml/pugixml.hpp" #include "readline_highlighters.hh" #include "view_curses.hh" @@ -146,7 +146,8 @@ md2attr_line::leave_block(const md4cpp::event_handler::block& bl) last_block.append("\n"); } if (this->ml_list_stack.empty() - && !endswith(last_block.get_string(), "\n\n")) { + && !endswith(last_block.get_string(), "\n\n")) + { last_block.append("\n"); } } @@ -200,7 +201,8 @@ md2attr_line::leave_block(const md4cpp::event_handler::block& bl) || lang_sf.iequal( string_fragment::from_const("shellsession"))) { - static const pcrepp SH_PROMPT(R"([^\$>#%]*[\$>#%]\s+)"); + static const auto SH_PROMPT + = lnav::pcre2pp::code::from_const(R"([^\$>#%]*[\$>#%]\s+)"); attr_line_t new_block_text; attr_line_t cmd_block; @@ -208,7 +210,8 @@ md2attr_line::leave_block(const md4cpp::event_handler::block& bl) for (auto line : block_text.split_lines()) { if (!cmd_block.empty() - && endswith(cmd_block.get_string(), "\\\n")) { + && endswith(cmd_block.get_string(), "\\\n")) + { cmd_block.append(line).append("\n"); continue; } @@ -222,11 +225,11 @@ md2attr_line::leave_block(const md4cpp::event_handler::block& bl) cmd_block.clear(); } - pcre_context_static<10> pc; - pcre_input pi(line.get_string()); + auto sh_find_res + = SH_PROMPT.find_in(line.get_string()).ignore_error(); - if (SH_PROMPT.match(pc, pi)) { - prompt_size = pc.all()->length(); + if (sh_find_res) { + prompt_size = sh_find_res->f_all.length(); line.with_attr(string_attr{ line_range{0, prompt_size}, VC_ROLE.value(role_t::VCR_LIST_GLYPH), @@ -360,7 +363,8 @@ md2attr_line::leave_block(const md4cpp::event_handler::block& bl) } } for (size_t line_index = 0; line_index < max_cell_lines; - line_index++) { + line_index++) + { size_t col = 0; for (const auto& cell : cells) { block_text.append(" "); @@ -551,7 +555,8 @@ md2attr_line::text(MD_TEXTTYPE tt, const string_fragment& sf) break; } default: { - static const pcrepp REPL_RE(R"(-{2,3}|:[^:\s]*(?:::[^:\s]*)*:)"); + static const auto REPL_RE = lnav::pcre2pp::code::from_const( + R"(-{2,3}|:[^:\s]*(?:::[^:\s]*)*:)"); static const auto& emojis = md4cpp::get_emoji_map(); if (this->ml_code_depth > 0) { @@ -559,33 +564,35 @@ md2attr_line::text(MD_TEXTTYPE tt, const string_fragment& sf) return Ok(); } - pcre_input pi(sf); - pcre_context_static<30> pc; std::string span_text; - while (REPL_RE.match(pc, pi)) { - auto prev = pi.get_up_to(pc.all()); - span_text.append(prev.data(), prev.length()); - - auto matched = pi.get_string_fragment(pc.all()); - - if (matched == "--") { - span_text.append("\u2013"); - } else if (matched == "---") { - span_text.append("\u2014"); - } else if (matched.startswith(":")) { - auto em_iter - = emojis.em_shortname2emoji.find(matched.to_string()); - if (em_iter == emojis.em_shortname2emoji.end()) { - span_text.append(matched.data(), matched.length()); - } else { - span_text.append(em_iter->second.get().e_value); + auto loop_res = REPL_RE.capture_from(sf).for_each( + [&span_text](lnav::pcre2pp::match_data& md) { + span_text += md.leading(); + + auto matched = *md[0]; + + if (matched == "--") { + span_text.append("\u2013"); + } else if (matched == "---") { + span_text.append("\u2014"); + } else if (matched.startswith(":")) { + auto em_iter = emojis.em_shortname2emoji.find( + matched.to_string()); + if (em_iter == emojis.em_shortname2emoji.end()) { + span_text += matched; + } else { + span_text.append(em_iter->second.get().e_value); + } } - } - } + }); - auto last_frag = sf.substr(pi.pi_offset); - span_text.append(last_frag.data(), last_frag.length()); + if (loop_res.isOk()) { + span_text += loop_res.unwrap(); + } else { + log_error("span replacement regex failed: %d", + loop_res.unwrapErr().e_error_code); + } text_wrap_settings tws = {0, this->ml_blocks.size() == 1 ? 70 : 10000}; diff --git a/src/optional.hpp b/src/optional.hpp index d837ae40..a5f49610 100644 --- a/src/optional.hpp +++ b/src/optional.hpp @@ -1486,6 +1486,9 @@ public: has_value_ = false; } + template + auto map(F func) -> optionalvalue()))>; + private: void this_type_does_not_support_comparisons() const {} @@ -1747,6 +1750,17 @@ optional make_optional( T const & value ) #endif // optional_CPP11_OR_GREATER +template +template +auto optional::map(F func) -> optionalvalue()))> +{ + if (this->has_value()) { + return make_optional(func(this->value())); + } + + return nullopt; +} + } // namespace optional_lite using optional_lite::optional; diff --git a/src/pcrepp/CMakeLists.txt b/src/pcrepp/CMakeLists.txt index 9ced7df0..1af88453 100644 --- a/src/pcrepp/CMakeLists.txt +++ b/src/pcrepp/CMakeLists.txt @@ -1,9 +1,16 @@ -add_library(pcrepp STATIC ../config.h.in pcrepp.hh pcrepp.cc) +add_library(pcrepp STATIC + ../config.h.in + pcre2pp.hh + pcre2pp.cc) target_include_directories(pcrepp PUBLIC . .. ../third-party/scnlib/include ${CMAKE_CURRENT_BINARY_DIR}/..) -target_link_libraries(pcrepp cppfmt pcre::libpcre) +target_link_libraries(pcrepp cppfmt pcre::libpcre pcre2::pcre2) -add_executable(test_pcrepp test_pcrepp.cc) -target_link_libraries(test_pcrepp pcrepp) -add_test(NAME test_pcrepp COMMAND test_pcrepp) +add_executable(test_pcre2pp test_pcre2pp.cc) +target_include_directories( + test_pcre2pp + PUBLIC + ../third-party/doctest-root) +target_link_libraries(test_pcre2pp pcrepp) +add_test(NAME test_pcre2pp COMMAND test_pcre2pp) diff --git a/src/pcrepp/Makefile.am b/src/pcrepp/Makefile.am index 61fc1f11..72e83198 100644 --- a/src/pcrepp/Makefile.am +++ b/src/pcrepp/Makefile.am @@ -16,18 +16,18 @@ AM_CXXFLAGS = $(CODE_COVERAGE_CXXFLAGS) noinst_LIBRARIES = libpcrepp.a noinst_HEADERS = \ - pcrepp.hh + pcre2pp.hh libpcrepp_a_SOURCES = \ - pcrepp.cc + pcre2pp.cc -test_pcrepp_SOURCES = test_pcrepp.cc -test_pcrepp_LDADD = \ +test_pcre2pp_SOURCES = test_pcre2pp.cc +test_pcre2pp_LDADD = \ libpcrepp.a \ $(PCRE_LIBS) check_PROGRAMS = \ - test_pcrepp + test_pcre2pp TESTS = \ - test_pcrepp + test_pcre2pp diff --git a/src/pcrepp/pcre2pp.cc b/src/pcrepp/pcre2pp.cc new file mode 100644 index 00000000..b4d0a3c4 --- /dev/null +++ b/src/pcrepp/pcre2pp.cc @@ -0,0 +1,458 @@ +/** + * Copyright (c) 2022, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @file pcrepp.cc + */ + +#include "pcre2pp.hh" + +#include "config.h" + +namespace lnav { +namespace pcre2pp { + +std::string +quote(const char* unquoted) +{ + std::string retval; + + for (int lpc = 0; unquoted[lpc]; lpc++) { + if (isalnum(unquoted[lpc]) || unquoted[lpc] == '_' + || unquoted[lpc] & 0x80) + { + retval.push_back(unquoted[lpc]); + } else { + retval.push_back('\\'); + retval.push_back(unquoted[lpc]); + } + } + + return retval; +} + +match_data +code::create_match_data() const +{ + auto_mem md(pcre2_match_data_free); + + md = pcre2_match_data_create_from_pattern(this->p_code, nullptr); + + return match_data{std::move(md)}; +} + +Result +code::from(string_fragment sf, int options) +{ + compile_error ce; + auto_mem co(pcre2_code_free); + + options |= PCRE2_UTF; + co = pcre2_compile( + sf.udata(), sf.length(), options, &ce.ce_code, &ce.ce_offset, nullptr); + + if (co == nullptr) { + ce.ce_pattern = sf.to_string(); + return Err(ce); + } + + auto jit_rc = pcre2_jit_compile(co, PCRE2_JIT_COMPLETE); + if (jit_rc < 0) { + // log_error("failed to JIT compile pattern: %d", jit_rc); + } + + return Ok(code{std::move(co), sf.to_string()}); +} + +code::named_captures +code::get_named_captures() const +{ + named_captures retval; + + pcre2_pattern_info( + this->p_code.in(), PCRE2_INFO_NAMECOUNT, &retval.nc_count); + pcre2_pattern_info( + this->p_code.in(), PCRE2_INFO_NAMEENTRYSIZE, &retval.nc_entry_size); + pcre2_pattern_info( + this->p_code.in(), PCRE2_INFO_NAMETABLE, &retval.nc_name_table); + + return retval; +} + +size_t +code::match_partial(string_fragment in) const +{ + auto md = this->create_match_data(); + auto length = in.length(); + + do { + auto rc = pcre2_match(this->p_code.in(), + in.udata(), + length, + 0, + PCRE2_PARTIAL_HARD, + md.md_data.in(), + nullptr); + + if (rc == PCRE2_ERROR_PARTIAL) { + return md.md_ovector[1]; + } + + if (length > 0) { + length -= 1; + } + } while (length > 0); + + return 0; +} + +const char* +code::get_name_for_capture(size_t index) const +{ + for (const auto cap : this->get_named_captures()) { + if (cap.get_index() == index) { + return cap.get_name().data(); + } + } + + return nullptr; +} + +size_t +code::get_capture_count() const +{ + uint32_t retval; + + pcre2_pattern_info(this->p_code.in(), PCRE2_INFO_CAPTURECOUNT, &retval); + + return retval; +} + +std::vector +code::get_captures() const +{ + bool in_class = false, in_escape = false, in_literal = false; + auto pat_frag = string_fragment::from_str(this->p_pattern); + std::vector cap_in_progress; + std::vector retval; + + for (int lpc = 0; this->p_pattern[lpc]; lpc++) { + if (in_escape) { + in_escape = false; + if (this->p_pattern[lpc] == 'Q') { + in_literal = true; + } + } else if (in_class) { + if (this->p_pattern[lpc] == ']') { + in_class = false; + } + if (this->p_pattern[lpc] == '\\') { + in_escape = true; + } + } else if (in_literal) { + if (this->p_pattern[lpc] == '\\' && this->p_pattern[lpc + 1] == 'E') + { + in_literal = false; + lpc += 1; + } + } else { + switch (this->p_pattern[lpc]) { + case '\\': + in_escape = true; + break; + case '[': + in_class = true; + break; + case '(': + cap_in_progress.emplace_back(pat_frag.sub_range(lpc, lpc)); + break; + case ')': { + if (!cap_in_progress.empty()) { + static const auto DEFINE_SF + = string_fragment::from_const("(?(DEFINE)"); + + auto& cap = cap_in_progress.back(); + char first = '\0', second = '\0', third = '\0'; + bool is_cap = false; + + cap.sf_end = lpc + 1; + if (cap.length() >= 2) { + first = this->p_pattern[cap.sf_begin + 1]; + } + if (cap.length() >= 3) { + second = this->p_pattern[cap.sf_begin + 2]; + } + if (cap.length() >= 4) { + third = this->p_pattern[cap.sf_begin + 3]; + } + if (cap.sf_begin >= 2) { + auto poss_define = string_fragment::from_str_range( + this->p_pattern, cap.sf_begin - 2, cap.sf_end); + if (poss_define == DEFINE_SF) { + cap_in_progress.pop_back(); + continue; + } + } + if (first == '?') { + if (second == '\'') { + is_cap = true; + } + if (second == '<' + && (isalpha(third) || third == '_')) + { + is_cap = true; + } + if (second == 'P' && third == '<') { + is_cap = true; + } + } else if (first != '*') { + is_cap = true; + } + if (is_cap) { + retval.emplace_back(cap); + } + cap_in_progress.pop_back(); + } + break; + } + } + } + } + + assert((size_t) this->get_capture_count() == retval.size()); + + return retval; +} + +std::string +code::replace(string_fragment str, const char* repl) const +{ + std::string retval; + std::string::size_type start = 0; + string_fragment remaining = str; + + auto md = this->create_match_data(); + while (remaining.is_valid()) { + auto find_res = this->capture_from(str) + .at(remaining) + .into(md) + .matches() + .ignore_error(); + if (!find_res) { + break; + } + auto all = find_res->f_all; + remaining = find_res->f_remaining; + bool in_escape = false; + + retval.append(str.data(), start, (all.sf_begin - start)); + start = all.sf_end; + for (int lpc = 0; repl[lpc]; lpc++) { + auto ch = repl[lpc]; + + if (in_escape) { + if (isdigit(ch)) { + auto capture_index = (ch - '0'); + + if (capture_index < md.get_count()) { + auto cap = md[capture_index]; + if (cap) { + retval.append(cap->data(), cap->length()); + } + } else if (capture_index > this->get_capture_count()) { + retval.push_back('\\'); + retval.push_back(ch); + } + } else { + if (ch != '\\') { + retval.push_back('\\'); + } + retval.push_back(ch); + } + in_escape = false; + } else { + switch (ch) { + case '\\': + in_escape = true; + break; + default: + retval.push_back(ch); + break; + } + } + } + } + if (remaining.is_valid()) { + retval.append(str.data(), remaining.sf_begin, std::string::npos); + } + + return retval; +} + +int +code::name_index(const char* name) const +{ + return pcre2_substring_number_from_name(this->p_code.in(), + (PCRE2_SPTR) name); +} + +size_t +code::named_capture::get_index() const +{ + return (this->nc_entry[0] << 8) | (this->nc_entry[1] & 0xff); +} + +string_fragment +code::named_capture::get_name() const +{ + return string_fragment::from_bytes( + &this->nc_entry[2], strlen((const char*) &this->nc_entry[2])); +} + +code::named_capture +code::named_captures::iterator::operator*() const +{ + return code::named_capture{this->i_entry}; +} + +code::named_captures::iterator& +code::named_captures::iterator::operator++() +{ + this->i_entry += this->i_entry_size; + + return *this; +} + +bool +code::named_captures::iterator::operator==(const iterator& other) const +{ + return this->i_entry == other.i_entry + && this->i_entry_size == other.i_entry_size; +} + +bool +code::named_captures::iterator::operator!=(const iterator& other) const +{ + return this->i_entry != other.i_entry + || this->i_entry_size != other.i_entry_size; +} + +code::named_captures::iterator +code::named_captures::begin() const +{ + return iterator{this->nc_entry_size, this->nc_name_table}; +} + +code::named_captures::iterator +code::named_captures::end() const +{ + return iterator{ + this->nc_entry_size, + this->nc_name_table + (this->nc_count * this->nc_entry_size), + }; +} + +matcher::matches_result +matcher::matches(uint32_t options) +{ + this->mb_input.i_offset = this->mb_input.i_next_offset; + + if (this->mb_input.i_offset == -1) { + return not_found{}; + } + + auto rc = pcre2_match(this->mb_code.p_code.in(), + this->mb_input.i_string.udata(), + this->mb_input.i_string.length(), + this->mb_input.i_offset, + options, + this->mb_match_data.md_data.in(), + nullptr); + + if (rc > 0) { + this->mb_match_data.md_input = this->mb_input; + this->mb_match_data.md_code = &this->mb_code; + this->mb_match_data.md_capture_end = rc; + if (this->mb_match_data[0]->empty() + && this->mb_match_data[0]->sf_end >= this->mb_input.i_string.sf_end) + { + this->mb_input.i_next_offset = -1; + } else if (this->mb_match_data[0]->empty()) { + this->mb_input.i_next_offset = this->mb_match_data[0]->sf_end + 1; + } else { + this->mb_input.i_next_offset = this->mb_match_data[0]->sf_end; + } + this->mb_match_data.md_input.i_next_offset + = this->mb_input.i_next_offset; + return found{ + this->mb_match_data[0].value(), + this->mb_match_data.remaining(), + }; + } + + this->mb_match_data.md_input = this->mb_input; + this->mb_match_data.md_ovector[0] = this->mb_input.i_offset; + this->mb_match_data.md_ovector[1] = this->mb_input.i_offset; + this->mb_match_data.md_capture_end = 1; + if (rc == PCRE2_ERROR_NOMATCH) { + return not_found{}; + } + + return error{&this->mb_code, rc}; +} + +void +matcher::matches_result::handle_error(matcher::error err) +{ + unsigned char buffer[1024]; + + pcre2_get_error_message(err.e_error_code, buffer, sizeof(buffer)); + // log_error("pcre2_match failure: %s", buffer); +} + +std::string +compile_error::get_message() const +{ + unsigned char buffer[1024]; + + pcre2_get_error_message(this->ce_code, buffer, sizeof(buffer)); + + return {(const char*) buffer}; +} + +std::string +matcher::error::get_message() +{ + unsigned char buffer[1024]; + + pcre2_get_error_message(this->e_error_code, buffer, sizeof(buffer)); + + return {(const char*) buffer}; +} + +} // namespace pcre2pp +} // namespace lnav diff --git a/src/pcrepp/pcre2pp.hh b/src/pcrepp/pcre2pp.hh new file mode 100644 index 00000000..9fa86c89 --- /dev/null +++ b/src/pcrepp/pcre2pp.hh @@ -0,0 +1,368 @@ +/** + * Copyright (c) 2022, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef lnav_pcre2pp_hh +#define lnav_pcre2pp_hh + +#define PCRE2_CODE_UNIT_WIDTH 8 + +#include +#include +#include + +#include + +#include "base/auto_mem.hh" +#include "base/intern_string.hh" +#include "base/result.h" +#include "mapbox/variant.hpp" + +namespace lnav { +namespace pcre2pp { + +std::string quote(const char* unquoted); + +inline std::string +quote(const std::string& unquoted) +{ + return quote(unquoted.c_str()); +} + +class code; +struct capture_builder; +class matcher; + +struct input { + string_fragment i_string; + int i_offset{0}; + int i_next_offset{0}; +}; + +class match_data { +public: + static match_data unitialized() { return match_data{}; } + + string_fragment leading() const + { + return this->md_input.i_string.sub_range(this->md_input.i_offset, + this->md_ovector[0]); + } + + string_fragment remaining() const + { + if (this->md_capture_end == 0 || this->md_input.i_next_offset == -1) { + return string_fragment::invalid(); + } + + return string_fragment::from_byte_range( + this->md_input.i_string.sf_string, + this->md_input.i_next_offset, + this->md_input.i_string.sf_end); + } + + nonstd::optional operator[](size_t index) const + { + if (index >= this->md_capture_end) { + return nonstd::nullopt; + } + + auto start = this->md_ovector[(index * 2)]; + auto stop = this->md_ovector[(index * 2) + 1]; + if (start == PCRE2_UNSET || stop == PCRE2_UNSET) { + return nonstd::nullopt; + } + + return this->md_input.i_string.sub_range(start, stop); + } + + template + nonstd::optional operator[](const T (&name)[N]) const; + + int get_count() const { return this->md_capture_end; } + +private: + friend matcher; + friend code; + + match_data() = default; + + explicit match_data(auto_mem dat) + : md_data(std::move(dat)), + md_ovector(pcre2_get_ovector_pointer(this->md_data.in())), + md_ovector_count(pcre2_get_ovector_count(this->md_data.in())) + { + } + + auto_mem md_data; + const code* md_code{nullptr}; + input md_input; + PCRE2_SIZE* md_ovector{nullptr}; + uint32_t md_ovector_count{0}; + int md_capture_end{0}; +}; + +class matcher { +public: + struct found { + string_fragment f_all; + string_fragment f_remaining; + }; + struct not_found {}; + struct error { + const code* e_code{nullptr}; + int e_error_code{0}; + std::string get_message(); + }; + + class matches_result + : public mapbox::util::variant { + public: + using variant::variant; + + nonstd::optional ignore_error() + { + return this->match( + [](found fo) { return nonstd::make_optional(fo); }, + [](not_found) { return nonstd::nullopt; }, + [](error err) { + handle_error(err); + return nonstd::nullopt; + }); + } + + private: + static void handle_error(error err); + }; + + matcher& reload_input(string_fragment sf, int next_offset) + { + this->mb_input = input{sf, next_offset, next_offset}; + + return *this; + } + + matches_result matches(uint32_t options = 0); + + int get_next_offset() const { return this->mb_input.i_next_offset; } + +private: + friend capture_builder; + + matcher(const code& co, input& in, match_data& md) + : mb_code(co), mb_input(in), mb_match_data(md) + { + } + + const code& mb_code; + input mb_input; + match_data& mb_match_data; +}; + +struct capture_builder { + const code& mb_code; + input mb_input; + + capture_builder at(const string_fragment& remaining) && + { + this->mb_input.i_offset = this->mb_input.i_next_offset + = remaining.sf_begin; + return *this; + } + + matcher into(match_data& md) && + { + return matcher{ + this->mb_code, + this->mb_input, + md, + }; + } + + template + Result for_each(F func) &&; +}; + +struct compile_error { + std::string ce_pattern; + int ce_code{0}; + size_t ce_offset{0}; + + std::string get_message() const; +}; + +class code { +public: + class named_capture { + public: + size_t get_index() const; + string_fragment get_name() const; + + PCRE2_SPTR nc_entry; + }; + + class named_captures { + public: + struct iterator { + named_capture operator*() const; + iterator& operator++(); + bool operator==(const iterator& other) const; + bool operator!=(const iterator& other) const; + + uint32_t i_entry_size; + PCRE2_SPTR i_entry; + }; + + iterator begin() const; + iterator end() const; + bool empty() const { return this->nc_count == 0; } + size_t size() const { return this->nc_count; } + + private: + friend code; + + named_captures() = default; + + uint32_t nc_count{0}; + uint32_t nc_entry_size{0}; + PCRE2_SPTR nc_name_table{nullptr}; + }; + + static Result from(string_fragment sf, + int options = 0); + + template + static code from_const(const T (&str)[N], int options = 0) + { + return from(string_fragment::from_const(str), options).unwrap(); + } + + const std::string& get_pattern() const { return this->p_pattern; } + + named_captures get_named_captures() const; + + const char* get_name_for_capture(size_t index) const; + + size_t get_capture_count() const; + + int name_index(const char* name) const; + + std::vector get_captures() const; + + match_data create_match_data() const; + + capture_builder capture_from(string_fragment in) const + { + return capture_builder{ + *this, + input{in}, + }; + } + + matcher::matches_result find_in(string_fragment in, + uint32_t options = 0) const + { + static thread_local match_data md = this->create_match_data(); + + if (md.md_ovector_count < this->p_match_proto.md_ovector_count) { + md = this->create_match_data(); + } + + return this->capture_from(in).into(md).matches(options); + } + + size_t match_partial(string_fragment in) const; + + std::string replace(string_fragment str, const char* repl) const; + + std::shared_ptr to_shared() && + { + return std::make_shared(std::move(this->p_code), + std::move(this->p_pattern)); + } + + code(auto_mem code, std::string pattern) + : p_code(std::move(code)), p_pattern(std::move(pattern)), + p_match_proto(this->create_match_data()) + { + } + +private: + friend matcher; + friend match_data; + + auto_mem p_code; + std::string p_pattern; + match_data p_match_proto; +}; + +template +nonstd::optional +match_data::operator[](const T (&name)[N]) const +{ + auto index = pcre2_substring_number_from_name( + this->md_code->p_code.in(), + reinterpret_cast(name)); + + return this->operator[](index); +} + +template +Result +capture_builder::for_each(F func) && +{ + auto md = this->mb_code.create_match_data(); + auto mat = matcher{this->mb_code, this->mb_input, md}; + + bool done = false; + matcher::error eret; + + while (!done) { + auto match_res = mat.matches(Options); + done = match_res.match( + [mat, &func](matcher::found) { + func(mat.mb_match_data); + return false; + }, + [](matcher::not_found) { return true; }, + [&eret](matcher::error err) { + eret = err; + return true; + }); + } + + if (eret.e_error_code == 0) { + return Ok(md.remaining()); + } + return Err(eret); +} + +} // namespace pcre2pp +} // namespace lnav + +#endif diff --git a/src/pcrepp/pcrepp.cc b/src/pcrepp/pcrepp.cc deleted file mode 100644 index 943cc2ab..00000000 --- a/src/pcrepp/pcrepp.cc +++ /dev/null @@ -1,453 +0,0 @@ -/** - * Copyright (c) 2007-2012, Timothy Stack - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * * Neither the name of Timothy Stack nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * @file pcrepp.cc - */ - -#include "pcrepp.hh" - -const int JIT_STACK_MIN_SIZE = 32 * 1024; -const int JIT_STACK_MAX_SIZE = 512 * 1024; - -pcre_context::capture_t* -pcre_context::operator[](const char* name) const -{ - capture_t* retval = nullptr; - auto index = this->pc_pcre->name_index(name); - if (index != PCRE_ERROR_NOSUBSTRING) { - retval = &this->pc_captures[index + 1]; - } - - return retval; -} - -pcre_context::capture_t* -pcre_context::first_valid() const -{ - for (int lpc = 1; lpc < this->pc_count; lpc++) { - if (this->pc_captures[lpc].is_valid()) { - return &this->pc_captures[lpc]; - } - } - - return nullptr; -} - -std::string -pcrepp::quote(const char* unquoted) -{ - std::string retval; - - for (int lpc = 0; unquoted[lpc]; lpc++) { - if (isalnum(unquoted[lpc]) || unquoted[lpc] == '_' - || unquoted[lpc] & 0x80) - { - retval.push_back(unquoted[lpc]); - } else { - retval.push_back('\\'); - retval.push_back(unquoted[lpc]); - } - } - - return retval; -} - -Result -pcrepp::from_str(std::string pattern, int options) -{ - const char* errptr; - int eoff; - auto* code = pcre_compile( - pattern.c_str(), options | PCRE_UTF8, &errptr, &eoff, nullptr); - - if (!code) { - return Err(compile_error{errptr, eoff}); - } - - return Ok(pcrepp(std::move(pattern), code)); -} - -Result, pcrepp::compile_error> -pcrepp::shared_from_str(std::string pattern, int options) -{ - const char* errptr; - int eoff; - auto* code = pcre_compile( - pattern.c_str(), options | PCRE_UTF8, &errptr, &eoff, nullptr); - - if (!code) { - return Err(compile_error{errptr, eoff}); - } - - return Ok(std::make_shared(std::move(pattern), code)); -} - -void -pcrepp::find_captures(const char* pattern) -{ - bool in_class = false, in_escape = false, in_literal = false; - std::vector cap_in_progress; - - for (int lpc = 0; pattern[lpc]; lpc++) { - if (in_escape) { - in_escape = false; - if (pattern[lpc] == 'Q') { - in_literal = true; - } - } else if (in_class) { - if (pattern[lpc] == ']') { - in_class = false; - } - if (pattern[lpc] == '\\') { - in_escape = true; - } - } else if (in_literal) { - if (pattern[lpc] == '\\' && pattern[lpc + 1] == 'E') { - in_literal = false; - lpc += 1; - } - } else { - switch (pattern[lpc]) { - case '\\': - in_escape = true; - break; - case '[': - in_class = true; - break; - case '(': - cap_in_progress.emplace_back(lpc, lpc); - break; - case ')': { - if (!cap_in_progress.empty()) { - static const auto DEFINE_SF - = string_fragment::from_const("(?(DEFINE)"); - - auto& cap = cap_in_progress.back(); - char first = '\0', second = '\0', third = '\0'; - bool is_cap = false; - - cap.c_end = lpc + 1; - if (cap.length() >= 2) { - first = pattern[cap.c_begin + 1]; - } - if (cap.length() >= 3) { - second = pattern[cap.c_begin + 2]; - } - if (cap.length() >= 4) { - third = pattern[cap.c_begin + 3]; - } - if (cap.c_begin >= 2) { - auto poss_define = string_fragment::from_byte_range( - pattern, cap.c_begin - 2, cap.c_end); - if (poss_define == DEFINE_SF) { - cap_in_progress.pop_back(); - continue; - } - } - if (first == '?') { - if (second == '\'') { - is_cap = true; - } - if (second == '<' - && (isalpha(third) || third == '_')) - { - is_cap = true; - } - if (second == 'P' && third == '<') { - is_cap = true; - } - } else if (first != '*') { - is_cap = true; - } - if (is_cap) { - this->p_captures.push_back(cap); - } - cap_in_progress.pop_back(); - } - break; - } - } - } - } - - assert((size_t) this->p_capture_count == this->p_captures.size()); -} - -bool -pcrepp::match(pcre_context& pc, pcre_input& pi, int options) const -{ - int length, startoffset, filtered_options = options; - int count = pc.get_max_count(); - const char* str; - int rc; - - pc.set_pcrepp(this); - pi.pi_offset = pi.pi_next_offset; - - str = pi.get_string(); - if (filtered_options & PCRE_ANCHORED) { - filtered_options &= ~PCRE_ANCHORED; - str = &str[pi.pi_offset]; - startoffset = 0; - length = pi.pi_length - pi.pi_offset; - } else { - startoffset = pi.pi_offset; - length = pi.pi_length; - } - rc = pcre_exec(this->p_code, - this->p_code_extra.in(), - str, - length, - startoffset, - filtered_options, - (int*) pc.all(), - count * 2); - - if (rc < 0) { - switch (rc) { - case PCRE_ERROR_NOMATCH: - break; - case PCRE_ERROR_PARTIAL: - pc.set_count(1); - return true; - - default: - break; - } - } else if (rc == 0) { - rc = 0; - } else if (pc.all()->c_begin == pc.all()->c_end) { - rc = 0; - if (pi.pi_next_offset + 1 < pi.pi_length) { - pi.pi_next_offset += 1; - } - } else { - if (options & PCRE_ANCHORED) { - for (int lpc = 0; lpc < rc; lpc++) { - if (pc.all()[lpc].c_begin == -1) { - continue; - } - pc.all()[lpc].c_begin += pi.pi_offset; - pc.all()[lpc].c_end += pi.pi_offset; - } - } - pi.pi_next_offset = pc.all()->c_end; - } - - pc.set_count(rc); - - return rc > 0; -} - -std::string -pcrepp::replace(const char* str, const char* repl) const -{ - pcre_context_static<30> pc; - pcre_input pi(str); - std::string retval; - std::string::size_type start = 0; - - while (pi.pi_offset < pi.pi_length) { - this->match(pc, pi); - auto all = pc.all(); - bool in_escape = false; - - if (pc.get_count() < 0) { - break; - } - - retval.append(str, start, (all->c_begin - start)); - start = all->c_end; - for (int lpc = 0; repl[lpc]; lpc++) { - auto ch = repl[lpc]; - - if (in_escape) { - if (isdigit(ch)) { - auto capture_index = (ch - '0'); - - if (capture_index < pc.get_count()) { - retval.append(pi.get_substr_start(&all[capture_index]), - pi.get_substr_len(&all[capture_index])); - } else if (capture_index > this->p_capture_count) { - retval.push_back('\\'); - retval.push_back(ch); - } - } else { - if (ch != '\\') { - retval.push_back('\\'); - } - retval.push_back(ch); - } - in_escape = false; - } else { - switch (ch) { - case '\\': - in_escape = true; - break; - default: - retval.push_back(ch); - break; - } - } - } - } - retval.append(str, start, std::string::npos); - - return retval; -} - -void -pcrepp::study() -{ - const char* errptr; - - this->p_code_extra = pcre_study(this->p_code, -#ifdef PCRE_STUDY_JIT_COMPILE - PCRE_STUDY_JIT_COMPILE, -#else - 0, -#endif - &errptr); - if (!this->p_code_extra && errptr) { - // log_error("pcre_study error: %s", errptr); - } - if (this->p_code_extra != nullptr) { - pcre_extra* extra = this->p_code_extra; - - extra->flags - |= (PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION); - extra->match_limit = 10000; - extra->match_limit_recursion = 500; -#ifdef PCRE_STUDY_JIT_COMPILE - // pcre_assign_jit_stack(extra, nullptr, jit_stack()); -#endif - } - pcre_fullinfo( - this->p_code, this->p_code_extra, PCRE_INFO_OPTIONS, &this->p_options); - pcre_fullinfo(this->p_code, - this->p_code_extra, - PCRE_INFO_CAPTURECOUNT, - &this->p_capture_count); - pcre_fullinfo(this->p_code, - this->p_code_extra, - PCRE_INFO_NAMECOUNT, - &this->p_named_count); - pcre_fullinfo(this->p_code, - this->p_code_extra, - PCRE_INFO_NAMEENTRYSIZE, - &this->p_name_len); - pcre_fullinfo(this->p_code, - this->p_code_extra, - PCRE_INFO_NAMETABLE, - &this->p_named_entries); -} - -#ifdef PCRE_STUDY_JIT_COMPILE -pcre_jit_stack* -pcrepp::jit_stack() -{ - static pcre_jit_stack* retval = nullptr; - - if (retval == nullptr) { - retval = pcre_jit_stack_alloc(JIT_STACK_MIN_SIZE, JIT_STACK_MAX_SIZE); - } - - return retval; -} - -size_t -pcrepp::match_partial(pcre_input& pi) const -{ - size_t length = pi.pi_length; - int rc; - - do { - rc = pcre_exec(this->p_code, - this->p_code_extra.in(), - pi.get_string(), - length, - pi.pi_offset, - PCRE_PARTIAL, - nullptr, - 0); - switch (rc) { - case 0: - case PCRE_ERROR_PARTIAL: - return length; - } - if (length > 0) { - length -= 1; - } - } while (length > 0); - - return length; -} - -const char* -pcrepp::name_for_capture(int index) const -{ - for (pcre_named_capture::iterator iter = this->named_begin(); - iter != this->named_end(); - ++iter) - { - if (iter->index() == index) { - return iter->pnc_name; - } - } - return ""; -} - -int -pcrepp::name_index(const char* name) const -{ - int retval = pcre_get_stringnumber(this->p_code, name); - - if (retval == PCRE_ERROR_NOSUBSTRING) { - return retval; - } - - return retval - 1; -} - -#else -# warning "pcrejit is not available, search performance will be degraded" - -void -pcrepp::pcre_free_study(pcre_extra* extra) -{ - free(extra); -} -#endif - -void -pcre_context::capture_t::ltrim(const char* str) -{ - while (this->c_begin < this->c_end && isspace(str[this->c_begin])) { - this->c_begin += 1; - } -} diff --git a/src/pcrepp/pcrepp.hh b/src/pcrepp/pcrepp.hh deleted file mode 100644 index 477c3c55..00000000 --- a/src/pcrepp/pcrepp.hh +++ /dev/null @@ -1,617 +0,0 @@ -/** - * Copyright (c) 2007-2013, Timothy Stack - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * * Neither the name of Timothy Stack nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * @file pcrepp.hh - * - * A C++ adapter for the pcre library. The interface provided here has a - * different focus than the pcrecpp.h file included in the pcre distribution. - * The standard pcrecpp.h interface is more concerned with regular expressions - * that are digesting data to be used within the program itself. Whereas this - * interface is dealing with regular expression entered by the user and - * processing a series of matches on text files. - */ - -#ifndef pcrepp_hh -#define pcrepp_hh - -#include "config.h" - -#ifdef HAVE_PCRE_H -# include -#elif HAVE_PCRE_PCRE_H -# include -#else -# error "pcre.h not found?" -#endif - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "base/auto_mem.hh" -#include "base/intern_string.hh" -#include "base/result.h" -#include "scn/util/string_view.h" - -class pcrepp; - -/** - * Context that tracks captures found during a match operation. This class is a - * base that defines iterator methods and fields, but does not allocate space - * for the capture array. - */ -class pcre_context { -public: - struct capture_t { - capture_t() - { /* We don't initialize anything since it's a perf hit. */ - } - - capture_t(int begin, int end) : c_begin(begin), c_end(end) - { - assert(begin <= end); - } - - int c_begin; - int c_end; - - void ltrim(const char* str); - - bool contains(int pos) const - { - return this->c_begin <= pos && pos < this->c_end; - } - - bool is_valid() const { return this->c_begin != -1; } - - int length() const { return this->c_end - this->c_begin; } - - bool empty() const { return this->c_begin == this->c_end; } - }; - using iterator = capture_t*; - using const_iterator = const capture_t*; - - /** @return The maximum number of strings this context can capture. */ - int get_max_count() const { return this->pc_max_count; } - - void set_count(int count) { this->pc_count = count; } - - int get_count() const { return this->pc_count; } - - void set_pcrepp(const pcrepp* src) { this->pc_pcre = src; } - - /** - * @return a capture_t that covers all of the text that was matched. - */ - capture_t* all() const { return pc_captures; } - - /** @return An iterator to the first capture. */ - iterator begin() { return pc_captures + 1; } - /** @return An iterator that refers to the end of the capture array. */ - iterator end() { return pc_captures + pc_count; }; - - capture_t* operator[](int offset) const - { - if (offset < 0) { - return nullptr; - } - return &this->pc_captures[offset + 1]; - } - - capture_t* operator[](const char* name) const; - - capture_t* operator[](const std::string& name) const - { - return (*this)[name.c_str()]; - } - - capture_t* first_valid() const; - -protected: - pcre_context(capture_t* captures, int max_count) - : pc_captures(captures), pc_max_count(max_count) - { - } - - const pcrepp* pc_pcre{nullptr}; - capture_t* pc_captures; - int pc_max_count; - int pc_count{0}; -}; - -struct capture_if_not { - capture_if_not(int begin) : cin_begin(begin) {} - - bool operator()(const pcre_context::capture_t& cap) const - { - return cap.c_begin != this->cin_begin; - } - - int cin_begin; -}; - -/** - * A pcre_context that allocates storage for the capture array within the object - * itself. - */ -template -class pcre_context_static : public pcre_context { -public: - pcre_context_static() - : pcre_context(this->pc_match_buffer, MAX_COUNT + 1){}; - -private: - capture_t pc_match_buffer[MAX_COUNT + 1]; -}; - -/** - * - */ -class pcre_input { -public: - pcre_input(const char* str, size_t off = 0, size_t len = -1) - : pi_offset(off), pi_next_offset(off), pi_length(len), pi_string(str) - { - if (this->pi_length == (size_t) -1) { - this->pi_length = strlen(str); - } - } - - pcre_input(const string_fragment& s) - : pi_offset(0), pi_next_offset(0), pi_length(s.length()), - pi_string(s.data()) - { - } - - pcre_input(const intern_string_t& s) - : pi_offset(0), pi_next_offset(0), pi_length(s.size()), - pi_string(s.get()) - { - } - - pcre_input(const string_fragment&&) = delete; - - pcre_input(const std::string& str, size_t off = 0) - : pi_offset(off), pi_next_offset(off), pi_length(str.length()), - pi_string(str.c_str()) - { - } - - pcre_input(const std::string&&, size_t off = 0) = delete; - - const char* get_string() const { return this->pi_string; } - - const char* get_substr_start(pcre_context::const_iterator iter) const - { - return &this->pi_string[iter->c_begin]; - } - - size_t get_substr_len(pcre_context::const_iterator iter) const - { - return iter->length(); - } - - std::string get_substr(pcre_context::const_iterator iter) const - { - if (iter->c_begin == -1) { - return ""; - } - return std::string(&this->pi_string[iter->c_begin], iter->length()); - } - - intern_string_t get_substr_i(pcre_context::const_iterator iter) const - { - return intern_string::lookup(&this->pi_string[iter->c_begin], - iter->length()); - } - - string_fragment get_string_fragment(pcre_context::const_iterator iter) const - { - return string_fragment::from_byte_range( - this->pi_string, iter->c_begin, iter->c_end); - } - - string_fragment get_up_to(pcre_context::const_iterator iter) const - { - return string_fragment::from_byte_range( - this->pi_string, this->pi_offset, iter->c_begin); - } - - nonstd::optional get_substr_opt( - pcre_context::const_iterator iter) const - { - if (iter->is_valid()) { - return std::string(&this->pi_string[iter->c_begin], iter->length()); - } - - return nonstd::nullopt; - } - - scn::string_view to_string_view(pcre_context::const_iterator iter) const - { - return scn::string_view{ - &this->pi_string[iter->c_begin], - &this->pi_string[iter->c_end], - }; - } - - void get_substr(pcre_context::const_iterator iter, char* dst) const - { - memcpy(dst, &this->pi_string[iter->c_begin], iter->length()); - dst[iter->length()] = '\0'; - } - - void reset_next_offset() { this->pi_next_offset = this->pi_offset; } - - void reset(const char* str, size_t off = 0, size_t len = -1) - { - this->pi_string = str; - this->pi_offset = off; - this->pi_next_offset = off; - if (this->pi_length == (size_t) -1) { - this->pi_length = strlen(str); - } else { - this->pi_length = len; - } - } - - void reset(const std::string& str, size_t off = 0) - { - this->reset(str.c_str(), off, str.length()); - } - - size_t pi_offset; - size_t pi_next_offset; - size_t pi_length; - -private: - const char* pi_string; -}; - -struct pcre_named_capture { - class iterator { - public: - iterator(pcre_named_capture* pnc, size_t name_len) - : i_named_capture(pnc), i_name_len(name_len) - { - } - - iterator() : i_named_capture(nullptr), i_name_len(0) {} - - const pcre_named_capture& operator*() const - { - return *this->i_named_capture; - } - - const pcre_named_capture* operator->() const - { - return this->i_named_capture; - } - - bool operator!=(const iterator& rhs) const - { - return this->i_named_capture != rhs.i_named_capture; - } - - iterator& operator++() - { - char* ptr = (char*) this->i_named_capture; - - ptr += this->i_name_len; - this->i_named_capture = (pcre_named_capture*) ptr; - return *this; - } - - private: - pcre_named_capture* i_named_capture; - size_t i_name_len; - }; - - int index() const - { - return (this->pnc_index_msb << 8 | this->pnc_index_lsb) - 1; - } - - char pnc_index_msb; - char pnc_index_lsb; - char pnc_name[]; -}; - -struct pcre_extractor { - const pcre_context& pe_context; - const pcre_input& pe_input; - - template - intern_string_t get_substr_i(T name) const - { - return this->pe_input.get_substr_i(this->pe_context[name]); - } - - template - std::string get_substr(T name) const - { - return this->pe_input.get_substr(this->pe_context[name]); - } -}; - -class pcrepp { -public: - class error : public std::exception { - public: - error(std::string msg, int offset = 0) - : e_msg(std::move(msg)), e_offset(offset) - { - } - - const char* what() const noexcept override - { - return this->e_msg.c_str(); - } - - const std::string e_msg; - int e_offset; - }; - - static std::string quote(const char* unquoted); - - static std::string quote(const std::string& unquoted) - { - return quote(unquoted.c_str()); - } - - struct compile_error { - const char* ce_msg{nullptr}; - int ce_offset{0}; - }; - - static Result from_str(std::string pattern, - int options = 0); - - static Result, compile_error> shared_from_str( - std::string pattern, int options = 0); - - pcrepp(pcre* code) : p_code(code), p_code_extra(pcre_free_study) - { - pcre_refcount(this->p_code, 1); - this->study(); - } - - pcrepp(std::string pattern, pcre* code) - : p_code(code), p_pattern(std::move(pattern)), - p_code_extra(pcre_free_study) - { - pcre_refcount(this->p_code, 1); - this->study(); - this->find_captures(this->p_pattern.c_str()); - } - - explicit pcrepp(const char* pattern, int options = 0) - : p_pattern(pattern), p_code_extra(pcre_free_study) - { - const char* errptr; - int eoff; - - if ((this->p_code - = pcre_compile(pattern, options, &errptr, &eoff, nullptr)) - == nullptr) - { - throw error(errptr, eoff); - } - - pcre_refcount(this->p_code, 1); - this->study(); - this->find_captures(pattern); - } - - explicit pcrepp(const std::string& pattern, int options = 0) - : p_pattern(pattern), p_code_extra(pcre_free_study) - { - const char* errptr; - int eoff; - - if ((this->p_code = pcre_compile( - pattern.c_str(), options | PCRE_UTF8, &errptr, &eoff, nullptr)) - == nullptr) - { - throw error(errptr, eoff); - } - - pcre_refcount(this->p_code, 1); - this->study(); - this->find_captures(pattern.c_str()); - } - - pcrepp() {} - - pcrepp(const pcrepp& other) - : p_code(other.p_code), p_pattern(other.p_pattern), - p_code_extra(pcre_free_study), p_captures(other.p_captures) - { - pcre_refcount(this->p_code, 1); - this->study(); - } - - pcrepp(pcrepp&& other) - : p_code(other.p_code), p_pattern(std::move(other.p_pattern)), - p_code_extra(pcre_free_study), p_capture_count(other.p_capture_count), - p_named_count(other.p_named_count), p_name_len(other.p_name_len), - p_options(other.p_options), p_named_entries(other.p_named_entries), - p_captures(std::move(other.p_captures)) - { - pcre_refcount(this->p_code, 1); - this->p_code_extra = std::move(other.p_code_extra); - } - - virtual ~pcrepp() { this->clear(); } - - pcrepp& operator=(pcrepp&& other) noexcept - { - if (this == &other) { - return *this; - } - - this->p_code = other.p_code; - pcre_refcount(this->p_code, 1); - this->p_pattern = std::move(other.p_pattern); - this->p_code_extra = std::move(other.p_code_extra); - this->p_capture_count = other.p_capture_count; - this->p_named_count = other.p_named_count; - this->p_name_len = other.p_name_len; - this->p_options = other.p_options; - this->p_named_entries = other.p_named_entries; - this->p_captures = std::move(other.p_captures); - - return *this; - } - - const std::string& get_pattern() const { return this->p_pattern; } - - bool empty() const { return this->p_pattern.empty(); } - - void clear() - { - if (this->p_code && pcre_refcount(this->p_code, -1) == 0) { - free(this->p_code); - this->p_code = nullptr; - } - this->p_pattern.clear(); - this->p_code_extra.reset(); - this->p_capture_count = 0; - this->p_named_count = 0; - this->p_name_len = 0; - this->p_options = 0; - this->p_named_entries = nullptr; - this->p_captures.clear(); - } - - pcre_named_capture::iterator named_begin() const - { - return {this->p_named_entries, static_cast(this->p_name_len)}; - } - - pcre_named_capture::iterator named_end() const - { - char* ptr = (char*) this->p_named_entries; - - ptr += this->p_named_count * this->p_name_len; - return {(pcre_named_capture*) ptr, - static_cast(this->p_name_len)}; - } - - const std::vector& captures() const - { - return this->p_captures; - } - - std::vector::const_iterator cap_begin() const - { - return this->p_captures.begin(); - } - - std::vector::const_iterator cap_end() const - { - return this->p_captures.end(); - } - - int name_index(const std::string& name) const - { - return this->name_index(name.c_str()); - } - - int name_index(const char* name) const; - - const char* name_for_capture(int index) const; - - int get_capture_count() const { return this->p_capture_count; } - - bool match(pcre_context& pc, pcre_input& pi, int options = 0) const; - - template - nonstd::optional> match(pcre_input& pi, - int options - = 0) const - { - pcre_context_static pc; - - if (this->match(pc, pi, options)) { - return pc; - } - - return nonstd::nullopt; - } - - std::string replace(const char* str, const char* repl) const; - - size_t match_partial(pcre_input& pi) const; - - pcre* release() { - auto retval = std::exchange(this->p_code, nullptr); - this->clear(); - - return retval; - } - -// #undef PCRE_STUDY_JIT_COMPILE -#ifdef PCRE_STUDY_JIT_COMPILE - static pcre_jit_stack* jit_stack(); - -#else - static void pcre_free_study(pcre_extra*); -#endif - - void study(); - - void find_captures(const char* pattern); - - pcre* p_code{nullptr}; - std::string p_pattern; - auto_mem p_code_extra; - int p_capture_count{0}; - int p_named_count{0}; - int p_name_len{0}; - unsigned long p_options{0}; - pcre_named_capture* p_named_entries{nullptr}; - std::vector p_captures; -}; - -template -class pcrepp_with_options : public pcrepp { -public: - template - pcrepp_with_options(Args... args) : pcrepp(args..., options) - { - } -}; - -#endif diff --git a/src/pcrepp/test_pcre2pp.cc b/src/pcrepp/test_pcre2pp.cc new file mode 100644 index 00000000..87dce4de --- /dev/null +++ b/src/pcrepp/test_pcre2pp.cc @@ -0,0 +1,246 @@ +/** + * Copyright (c) 2022, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN +#include "doctest/doctest.h" +#include "pcre2pp.hh" + +TEST_CASE("bad pattern") +{ + auto compile_res + = lnav::pcre2pp::code::from(string_fragment::from_const("[abc")); + + CHECK(compile_res.isErr()); + auto ce = compile_res.unwrapErr(); + CHECK(ce.ce_offset == 4); +} + +TEST_CASE("named captures") +{ + auto compile_res = lnav::pcre2pp::code::from( + string_fragment::from_const("(?a)(b)(?c)")); + + CHECK(compile_res.isOk()); + + const std::vector> expected_caps = { + {1, string_fragment::from_const("abc")}, + {3, string_fragment::from_const("def")}, + }; + + int caps_index = 0; + auto co = compile_res.unwrap(); + for (const auto cap : co.get_named_captures()) { + const auto& expected_cap = expected_caps[caps_index]; + + CHECK(expected_cap.first == cap.get_index()); + CHECK(expected_cap.second == cap.get_name()); + caps_index += 1; + } +} + +TEST_CASE("match") +{ + static const char INPUT[] = "key1=1234;key2=5678;"; + + auto co + = lnav::pcre2pp::code::from_const(R"((?\w+)=(?[^;]+);)"); + + co.capture_from(string_fragment::from_const(INPUT)) + .for_each([](lnav::pcre2pp::match_data& md) { + printf("got '%s' %s = %s\n", + md[0]->to_string().c_str(), + md[1]->to_string().c_str(), + md[2]->to_string().c_str()); + }); +} + +TEST_CASE("partial") +{ + static const char INPUT[] = "key1=1234"; + + auto co = lnav::pcre2pp::code::from_const(R"([a-z]+=.*)"); + auto matched = co.match_partial(string_fragment::from_const(INPUT)); + CHECK(matched == 3); +} + +TEST_CASE("capture_name") +{ + auto co = lnav::pcre2pp::code::from_const("(?def)(ghi)"); + + CHECK(co.get_capture_count() == 2); + CHECK(string_fragment::from_c_str(co.get_name_for_capture(1)) == "abc"); + CHECK(co.get_name_for_capture(2) == nullptr); +} + +TEST_CASE("get_capture_count") +{ + auto co = lnav::pcre2pp::code::from_const("(DEFINE)"); + + CHECK(co.get_capture_count() == 1); +} + +TEST_CASE("get_captures") +{ + auto co = lnav::pcre2pp::code::from_const(R"((?\w+)-(def)-)"); + + CHECK(co.get_capture_count() == 2); + const auto& caps = co.get_captures(); + CHECK(caps.size() == 2); + CHECK(caps[0].to_string() == R"((?\w+))"); + CHECK(caps[1].to_string() == R"((def))"); +} + +TEST_CASE("replace") +{ + static const char INPUT[] = "test 1 2 3"; + + auto co = lnav::pcre2pp::code::from_const(R"(\w*)"); + auto in = string_fragment::from_const(INPUT); + + auto res = co.replace(in, R"({\0})"); + CHECK(res == "{test}{} {1}{} {2}{} {3}{}"); +} + +TEST_CASE("replace-empty") +{ + static const char INPUT[] = ""; + + auto co = lnav::pcre2pp::code::from_const(R"(\w*)"); + auto in = string_fragment::from_const(INPUT); + + auto res = co.replace(in, R"({\0})"); + CHECK(res == "{}"); +} + +TEST_CASE("for_each-all") +{ + static const char INPUT[] = "Hello, World!\n"; + + auto co = lnav::pcre2pp::code::from_const(R"(.*)"); + auto in = string_fragment::from_const(INPUT); + + co.capture_from(in).for_each([](lnav::pcre2pp::match_data& md) { + printf("range %d:%d\n", md[0]->sf_begin, md[0]->sf_end); + }); +} + +TEST_CASE("capture_count") +{ + auto co = lnav::pcre2pp::code::from_const(R"(^(\w+)=([^;]+);)"); + + CHECK(co.get_capture_count() == 2); +} + +TEST_CASE("no-caps") +{ + const static std::string empty_cap_regexes[] = { + "foo (?:bar)", + "foo [(]", + "foo \\Q(bar)\\E", + "(?i)", + }; + + for (auto re : empty_cap_regexes) { + auto co = lnav::pcre2pp::code::from(re).unwrap(); + + CHECK(co.get_captures().empty()); + } +} + +TEST_CASE("ipmatcher") +{ + auto co = lnav::pcre2pp::code::from_const( + R"((?(DEFINE)(?2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))\b(?&byte)(\.(?&byte)){3}\b)"); + auto inp = string_fragment::from_const("192.168.1.1"); + + auto find_res = co.find_in(inp).ignore_error(); + CHECK(find_res.has_value()); + CHECK(find_res->f_all.sf_begin == 0); +} + +TEST_CASE("get_captures-nested") +{ + auto re = lnav::pcre2pp::code::from_const("foo (bar (?:baz)?)"); + + CHECK(re.get_captures().size() == 1); + CHECK(re.get_captures()[0].sf_begin == 4); + CHECK(re.get_captures()[0].sf_end == 18); + CHECK(re.get_captures()[0].length() == 14); +} + +TEST_CASE("get_captures-basic") +{ + auto re = lnav::pcre2pp::code::from_const("(a)(b)(c)"); + + assert(re.get_captures().size() == 3); + assert(re.get_captures()[0].sf_begin == 0); + assert(re.get_captures()[0].sf_end == 3); + assert(re.get_captures()[1].sf_begin == 3); + assert(re.get_captures()[1].sf_end == 6); + assert(re.get_captures()[2].sf_begin == 6); + assert(re.get_captures()[2].sf_end == 9); +} + +TEST_CASE("get_captures-escape") +{ + auto re = lnav::pcre2pp::code::from_const("\\(a\\)(b)"); + + assert(re.get_captures().size() == 1); + assert(re.get_captures()[0].sf_begin == 5); + assert(re.get_captures()[0].sf_end == 8); +} + +TEST_CASE("get_captures-named") +{ + auto re = lnav::pcre2pp::code::from_const("(?b)"); + + assert(re.get_captures().size() == 1); + assert(re.get_captures()[0].sf_begin == 0); + assert(re.get_captures()[0].sf_end == 11); +} + +TEST_CASE("get_captures-namedP") +{ + auto re = lnav::pcre2pp::code::from_const("(?Pb)"); + + assert(re.get_captures().size() == 1); + assert(re.get_captures()[0].sf_begin == 0); + assert(re.get_captures()[0].sf_end == 12); +} + +TEST_CASE("get_captures-namedq") +{ + auto re = lnav::pcre2pp::code::from_const("(?'named'b)"); + + assert(re.get_captures().size() == 1); + assert(re.get_captures()[0].sf_begin == 0); + assert(re.get_captures()[0].sf_end == 11); +} diff --git a/src/pcrepp/test_pcrepp.cc b/src/pcrepp/test_pcrepp.cc deleted file mode 100644 index 490de448..00000000 --- a/src/pcrepp/test_pcrepp.cc +++ /dev/null @@ -1,192 +0,0 @@ -/** - * Copyright (c) 2007-2012, Timothy Stack - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * * Neither the name of Timothy Stack nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include -#include -#include - -#include "config.h" -#include "pcrepp/pcrepp.hh" - -int -main(int argc, char* argv[]) -{ - pcre_context_static<30> context; - int retval = EXIT_SUCCESS; - - { - pcrepp ipmatcher( - R"((?(DEFINE)(?2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))\b(?&byte)(\.(?&byte)){3}\b)"); - pcre_input pi("192.168.1.1"); - - assert(ipmatcher.match(context, pi)); - assert(context.all()->c_begin == 0); - } - - { - pcrepp ipmatcher(R"((DEFINE))"); - - assert(ipmatcher.get_capture_count() == 1); - } - - { - pcrepp nomatch("nothing-to-match"); - pcre_input pi("dummy"); - - assert(!nomatch.match(context, pi)); - } - - { - pcrepp match1("(\\w*)=(\\d+)"); - pcre_input pi("a=1 b=2"); - pcre_context::capture_t* cap; - - assert(match1.match(context, pi)); - - cap = context.all(); - assert(cap->c_begin == 0); - assert(cap->c_end == 3); - - assert((context.end() - context.begin()) == 2); - assert(pi.get_substr(context.begin()) == "a"); - assert(pi.get_substr(context.begin() + 1) == "1"); - assert(pi.get_substr(context[1]) == "1"); - - assert(match1.match(context, pi)); - assert((context.end() - context.begin()) == 2); - assert(pi.get_substr(context.begin()) == "b"); - assert(pi.get_substr(context.begin() + 1) == "2"); - } - - { - pcrepp match2(""); - } - - { - pcrepp match3("(?\\d+)(?\\w+)"); - pcre_named_capture::iterator iter; - const char* expected_names[] = { - "var1", - "var2", - }; - int index = 0; - - for (iter = match3.named_begin(); iter != match3.named_end(); - ++iter, index++) - { - assert(strcmp(iter->pnc_name, expected_names[index]) == 0); - } - - assert(match3.name_index("var2") == 1); - - pcre_input pi("123foo"); - - match3.match(context, pi); - assert(pi.get_substr(context["var1"]) == "123"); - } - - { - pcre_context::capture_t cap(1, 4); - pcre_input pi("\0foo", 0, 4); - - assert("foo" == pi.get_substr(&cap)); - } - - const char* empty_cap_regexes[] = { - "foo (?:bar)", - "foo [(]", - "foo \\Q(bar)\\E", - "(?i)", - - nullptr, - }; - - for (int lpc = 0; empty_cap_regexes[lpc]; lpc++) { - pcrepp re(empty_cap_regexes[lpc]); - - assert(re.captures().empty()); - } - - { - pcrepp re("foo (bar (?:baz)?)"); - - assert(re.captures().size() == 1); - assert(re.captures()[0].c_begin == 4); - assert(re.captures()[0].c_end == 18); - assert(re.captures()[0].length() == 14); - } - - { - pcrepp re("(a)(b)(c)"); - - assert(re.captures().size() == 3); - assert(re.captures()[0].c_begin == 0); - assert(re.captures()[0].c_end == 3); - assert(re.captures()[1].c_begin == 3); - assert(re.captures()[1].c_end == 6); - assert(re.captures()[2].c_begin == 6); - assert(re.captures()[2].c_end == 9); - } - - { - pcrepp re("\\(a\\)(b)"); - - assert(re.captures().size() == 1); - assert(re.captures()[0].c_begin == 5); - assert(re.captures()[0].c_end == 8); - } - - { - pcrepp re("(?b)"); - - assert(re.captures().size() == 1); - assert(re.captures()[0].c_begin == 0); - assert(re.captures()[0].c_end == 11); - } - - { - pcrepp re("(?Pb)"); - - assert(re.captures().size() == 1); - assert(re.captures()[0].c_begin == 0); - assert(re.captures()[0].c_end == 12); - } - - { - pcrepp re("(?'named'b)"); - - assert(re.captures().size() == 1); - assert(re.captures()[0].c_begin == 0); - assert(re.captures()[0].c_end == 11); - } - - return retval; -} diff --git a/src/pretty_printer.cc b/src/pretty_printer.cc index 30083f6f..2ff3e11f 100644 --- a/src/pretty_printer.cc +++ b/src/pretty_printer.cc @@ -35,25 +35,26 @@ void pretty_printer::append_to(attr_line_t& al) { - auto& pi = this->pp_scanner->get_input(); - pcre_context_static<30> pc; - data_token_t dt; - - this->pp_scanner->reset(); - if (pi.pi_offset > 0) { - pcre_context::capture_t leading_cap = { + if (this->pp_scanner->get_init_offset() > 0) { + data_scanner::capture_t leading_cap = { 0, - static_cast(pi.pi_offset), + this->pp_scanner->get_init_offset(), }; // this->pp_stream << pi.get_substr(&leading_cap); this->pp_values.emplace_back(DT_WORD, leading_cap); } - while (this->pp_scanner->tokenize2(pc, dt)) { - element el(dt, pc); + this->pp_scanner->reset(); + while (true) { + auto tok_res = this->pp_scanner->tokenize2(); + if (!tok_res) { + break; + } + + element el(tok_res->tr_token, tok_res->tr_capture); - switch (dt) { + switch (el.e_token) { case DT_XML_DECL_TAG: case DT_XML_EMPTY_TAG: if (this->pp_is_xml && this->pp_line_length > 0) { @@ -71,7 +72,7 @@ pretty_printer::append_to(attr_line_t& al) this->pp_interval_state.back().is_start = this->pp_stream.tellp(); this->pp_interval_state.back().is_name - = pi.get_substr(&el.e_capture); + = tok_res->to_string(); this->descend(); } else { this->pp_values.emplace_back(el); @@ -118,7 +119,8 @@ pretty_printer::append_to(attr_line_t& al) break; case DT_WHITE: if (this->pp_values.empty() && this->pp_depth == 0 - && this->pp_line_length == 0) { + && this->pp_line_length == 0) + { this->pp_leading_indent = el.e_capture.length(); continue; } @@ -181,16 +183,17 @@ pretty_printer::write_element(const pretty_printer::element& el) } return; } - auto& pi = this->pp_scanner->get_input(); if (this->pp_line_length == 0) { this->append_indent(); } ssize_t start_size = this->pp_stream.tellp(); if (el.e_token == DT_QUOTED_STRING) { auto_mem unquoted_str((char*) malloc(el.e_capture.length() + 1)); - const char* start = pi.get_substr_start(&el.e_capture); - unquote(unquoted_str.in(), start, el.e_capture.length()); - data_scanner ds(unquoted_str.in()); + const char* start + = this->pp_scanner->to_string_fragment(el.e_capture).data(); + auto unq_len = unquote(unquoted_str.in(), start, el.e_capture.length()); + data_scanner ds( + string_fragment::from_bytes(unquoted_str.in(), unq_len)); string_attrs_t sa; pretty_printer str_pp( &ds, sa, this->pp_leading_indent + this->pp_depth * 4); @@ -214,10 +217,11 @@ pretty_printer::write_element(const pretty_printer::element& el) this->pp_stream << start[el.e_capture.length() - 1] << start[el.e_capture.length() - 1]; } else { - this->pp_stream << pi.get_substr(&el.e_capture); + this->pp_stream + << this->pp_scanner->to_string_fragment(el.e_capture); } } else { - this->pp_stream << pi.get_substr(&el.e_capture); + this->pp_stream << this->pp_scanner->to_string_fragment(el.e_capture); int shift_amount = start_size - el.e_capture.c_begin - this->pp_shift_accum; shift_string_attrs(this->pp_attrs, el.e_capture.c_begin, shift_amount); @@ -247,8 +251,7 @@ pretty_printer::append_indent() bool pretty_printer::flush_values(bool start_on_depth) { - nonstd::optional last_key; - auto& pi = this->pp_scanner->get_input(); + nonstd::optional last_key; bool retval = false; while (!this->pp_values.empty()) { @@ -266,7 +269,9 @@ pretty_printer::flush_values(bool start_on_depth) case DT_EQUALS: if (last_key) { this->pp_interval_state.back().is_name - = pi.get_substr(&last_key.value()); + = this->pp_scanner + ->to_string_fragment(last_key.value()) + .to_string(); if (!this->pp_interval_state.back().is_name.empty()) { this->pp_interval_state.back().is_start = static_cast(this->pp_stream.tellp()); @@ -278,7 +283,8 @@ pretty_printer::flush_values(bool start_on_depth) break; } if (start_on_depth - && (el.e_token == DT_LSQUARE || el.e_token == DT_LCURLY)) { + && (el.e_token == DT_LSQUARE || el.e_token == DT_LCURLY)) + { if (this->pp_line_length > 0) { this->pp_stream << std::endl; } diff --git a/src/pretty_printer.hh b/src/pretty_printer.hh index 85d46624..f10b9464 100644 --- a/src/pretty_printer.hh +++ b/src/pretty_printer.hh @@ -48,18 +48,13 @@ class pretty_printer { public: struct element { - element(data_token_t token, pcre_context& pc) - : e_token(token), e_capture(*pc.all()) - { - } - - element(data_token_t token, pcre_context::capture_t& cap) + element(data_token_t token, data_scanner::capture_t& cap) : e_token(token), e_capture(cap) { } data_token_t e_token; - pcre_context::capture_t e_capture; + data_scanner::capture_t e_capture; }; pretty_printer(data_scanner* ds, string_attrs_t sa, int leading_indent = 0) @@ -67,13 +62,15 @@ public: pp_attrs(std::move(sa)) { this->pp_body_lines.push(0); - - pcre_context_static<30> pc; - data_token_t dt; - this->pp_scanner->reset(); - while (this->pp_scanner->tokenize2(pc, dt)) { - if (dt == DT_XML_CLOSE_TAG || dt == DT_XML_DECL_TAG) { + while (true) { + auto tok_res = this->pp_scanner->tokenize2(); + if (!tok_res) { + break; + } + if (tok_res->tr_token == DT_XML_CLOSE_TAG + || tok_res->tr_token == DT_XML_DECL_TAG) + { pp_is_xml = true; break; } diff --git a/src/readline_highlighters.cc b/src/readline_highlighters.cc index d75d34b5..58c537c5 100644 --- a/src/readline_highlighters.cc +++ b/src/readline_highlighters.cc @@ -35,7 +35,7 @@ #include "base/snippet_highlighters.hh" #include "base/string_util.hh" #include "config.h" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "shlex.hh" #include "sql_help.hh" #include "sql_util.hh" @@ -145,19 +145,23 @@ readline_regex_highlighter(attr_line_t& al, int x) void readline_command_highlighter_int(attr_line_t& al, int x, line_range sub) { - static const pcrepp RE_PREFIXES( + static const auto RE_PREFIXES = lnav::pcre2pp::code::from_const( R"(^:(filter-in|filter-out|delete-filter|enable-filter|disable-filter|highlight|clear-highlight|create-search-table\s+[^\s]+\s+))"); - static const pcrepp SH_PREFIXES( + static const auto SH_PREFIXES = lnav::pcre2pp::code::from_const( "^:(eval|open|append-to|write-to|write-csv-to|write-json-to)"); - static const pcrepp SQL_PREFIXES("^:(filter-expr|mark-expr)"); - static const pcrepp IDENT_PREFIXES("^:(tag|untag|delete-tags)"); - static const pcrepp COLOR_PREFIXES("^:(config)"); - static const pcrepp COLOR_RE("(#(?:[a-fA-F0-9]{6}|[a-fA-F0-9]{3}))"); + static const auto SQL_PREFIXES + = lnav::pcre2pp::code::from_const("^:(filter-expr|mark-expr)"); + static const auto IDENT_PREFIXES + = lnav::pcre2pp::code::from_const("^:(tag|untag|delete-tags)"); + static const auto COLOR_PREFIXES + = lnav::pcre2pp::code::from_const("^:(config)"); + static const auto COLOR_RE = lnav::pcre2pp::code::from_const( + "(#(?:[a-fA-F0-9]{6}|[a-fA-F0-9]{3}))"); attr_line_builder alb(al); const auto& line = al.get_string(); - pcre_context_static<30> pc; - pcre_input pi(&line[sub.lr_start], 0, sub.length()); + auto in_frag + = string_fragment::from_str_range(line, sub.lr_start, sub.lr_end); size_t ws_index; ws_index = line.find(' ', sub.lr_start); @@ -166,43 +170,37 @@ readline_command_highlighter_int(attr_line_t& al, int x, line_range sub) alb.overlay_attr(line_range(sub.lr_start + 1, ws_index), VC_ROLE.value(role_t::VCR_KEYWORD)); - if (RE_PREFIXES.match(pc, pi)) { + if (RE_PREFIXES.find_in(in_frag).ignore_error()) { lnav::snippets::regex_highlighter( al, x, line_range{(int) ws_index, sub.lr_end}); } - pi.reset(&line[sub.lr_start], 0, sub.length()); - if (SH_PREFIXES.match(pc, pi)) { + if (SH_PREFIXES.find_in(in_frag).ignore_error()) { readline_shlex_highlighter_int( al, x, line_range{(int) ws_index, sub.lr_end}); } - pi.reset(&line[sub.lr_start], 0, sub.length()); - if (SQL_PREFIXES.match(pc, pi)) { + if (SQL_PREFIXES.find_in(in_frag).ignore_error()) { readline_sqlite_highlighter_int( al, x, line_range{(int) ws_index, sub.lr_end}); } } - pi.reset(&line[sub.lr_start], 0, sub.length()); - if (COLOR_PREFIXES.match(pc, pi)) { - pi.reset(&line[sub.lr_start], 0, sub.length()); - if (COLOR_RE.match(pc, pi)) { - auto* cap = pc[0]; - auto hash_color = pi.get_substr(cap); - - styling::color_unit::from_str(hash_color) - .then([&](const auto& rgb_fg) { - auto color = view_colors::singleton().match_color(rgb_fg); - alb.template overlay_attr( - line_range{sub.lr_start + cap->c_begin, - sub.lr_start + cap->c_begin + 1}, - VC_STYLE.value(text_attrs{ - A_BOLD, - color, - })); - }); - } + if (COLOR_PREFIXES.find_in(in_frag).ignore_error()) { + COLOR_RE.capture_from(in_frag).for_each( + [&alb](lnav::pcre2pp::match_data& md) { + styling::color_unit::from_str(md[0].value()) + .then([&](const auto& rgb_fg) { + auto color + = view_colors::singleton().match_color(rgb_fg); + alb.template overlay_attr(to_line_range(md[0].value()), + VC_STYLE.value(text_attrs{ + A_BOLD, + color, + })); + }); + }); } - pi.reset(&line[sub.lr_start], 0, sub.length()); - if (IDENT_PREFIXES.match(pc, pi) && ws_index != std::string::npos) { + if (IDENT_PREFIXES.find_in(in_frag).ignore_error() + && ws_index != std::string::npos) + { size_t start = ws_index, last; do { @@ -308,7 +306,7 @@ readline_shlex_highlighter_int(attr_line_t& al, int x, line_range sub) { attr_line_builder alb(al); const auto& str = al.get_string(); - pcre_context::capture_t cap; + string_fragment cap; shlex_token_t token; nonstd::optional quote_start; shlex lexer(string_fragment{al.al_string.data(), sub.lr_start, sub.lr_end}); @@ -316,49 +314,50 @@ readline_shlex_highlighter_int(attr_line_t& al, int x, line_range sub) while (lexer.tokenize(cap, token)) { switch (token) { case shlex_token_t::ST_ERROR: - alb.overlay_attr(line_range(sub.lr_start + cap.c_begin, - sub.lr_start + cap.c_end), + alb.overlay_attr(line_range(sub.lr_start + cap.sf_begin, + sub.lr_start + cap.sf_end), VC_STYLE.value(text_attrs{A_REVERSE})); - alb.overlay_attr(line_range(sub.lr_start + cap.c_begin, - sub.lr_start + cap.c_end), + alb.overlay_attr(line_range(sub.lr_start + cap.sf_begin, + sub.lr_start + cap.sf_end), VC_ROLE.value(role_t::VCR_ERROR)); break; case shlex_token_t::ST_TILDE: case shlex_token_t::ST_ESCAPE: - alb.overlay_attr(line_range(sub.lr_start + cap.c_begin, - sub.lr_start + cap.c_end), + alb.overlay_attr(line_range(sub.lr_start + cap.sf_begin, + sub.lr_start + cap.sf_end), VC_ROLE.value(role_t::VCR_SYMBOL)); break; case shlex_token_t::ST_DOUBLE_QUOTE_START: case shlex_token_t::ST_SINGLE_QUOTE_START: - quote_start = sub.lr_start + cap.c_begin; + quote_start = sub.lr_start + cap.sf_begin; break; case shlex_token_t::ST_DOUBLE_QUOTE_END: case shlex_token_t::ST_SINGLE_QUOTE_END: alb.overlay_attr( - line_range(quote_start.value(), sub.lr_start + cap.c_end), + line_range(quote_start.value(), sub.lr_start + cap.sf_end), VC_ROLE.value(role_t::VCR_STRING)); quote_start = nonstd::nullopt; break; case shlex_token_t::ST_VARIABLE_REF: case shlex_token_t::ST_QUOTED_VARIABLE_REF: { int extra = token == shlex_token_t::ST_VARIABLE_REF ? 0 : 1; - auto ident = str.substr(sub.lr_start + cap.c_begin + 1 + extra, + auto ident = str.substr(sub.lr_start + cap.sf_begin + 1 + extra, cap.length() - 1 - extra * 2); alb.overlay_attr( - line_range(sub.lr_start + cap.c_begin, - sub.lr_start + cap.c_begin + 1 + extra), + line_range(sub.lr_start + cap.sf_begin, + sub.lr_start + cap.sf_begin + 1 + extra), VC_ROLE.value(role_t::VCR_SYMBOL)); alb.overlay_attr( - line_range(sub.lr_start + cap.c_begin + 1 + extra, - sub.lr_start + cap.c_end - extra), - VC_ROLE.value(x == sub.lr_start + cap.c_end - || cap.contains(x) - ? role_t::VCR_SYMBOL - : role_t::VCR_IDENTIFIER)); + line_range(sub.lr_start + cap.sf_begin + 1 + extra, + sub.lr_start + cap.sf_end - extra), + VC_ROLE.value( + x == sub.lr_start + cap.sf_end + || (cap.sf_begin <= x && x < cap.sf_end) + ? role_t::VCR_SYMBOL + : role_t::VCR_IDENTIFIER)); if (extra) { alb.overlay_attr_for_char( - sub.lr_start + cap.c_end - 1, + sub.lr_start + cap.sf_end - 1, VC_ROLE.value(role_t::VCR_SYMBOL)); } break; @@ -412,7 +411,7 @@ readline_lnav_highlighter_int(attr_line_t& al, int x, line_range sub) void readline_lnav_highlighter(attr_line_t& al, int x) { - static const pcrepp COMMENT_RE{R"(^\s*#)"}; + static const auto COMMENT_RE = lnav::pcre2pp::code::from_const(R"(^\s*#)"); attr_line_builder alb(al); size_t start = 0, lf_pos; @@ -426,10 +425,11 @@ readline_lnav_highlighter(attr_line_t& al, int x) continue; } - pcre_input pi(&al.al_string[line.lr_start], 0, line.length()); - pcre_context_static<30> pc; + auto line_frag = string_fragment::from_str_range( + al.al_string, line.lr_start, line.lr_end); - if (COMMENT_RE.match(pc, pi)) { + auto find_res = COMMENT_RE.find_in(line_frag).ignore_error(); + if (find_res.has_value()) { if (section_start) { readline_lnav_highlighter_int(al, x, @@ -439,10 +439,8 @@ readline_lnav_highlighter(attr_line_t& al, int x) }); section_start = nonstd::nullopt; } - const auto* cap = pc.all(); - alb.overlay_attr( - line_range{line.lr_start + cap->c_begin, (int) lf_pos}, - VC_ROLE.value(role_t::VCR_COMMENT)); + alb.overlay_attr(line_range{find_res->f_all.sf_begin, line.lr_end}, + VC_ROLE.value(role_t::VCR_COMMENT)); } else { switch (al.al_string[line.lr_start]) { case ':': diff --git a/src/readline_possibilities.cc b/src/readline_possibilities.cc index d67b18db..68cab7de 100644 --- a/src/readline_possibilities.cc +++ b/src/readline_possibilities.cc @@ -128,16 +128,19 @@ add_text_possibilities(readline_curses* rlc, static const std::regex re_escape(R"(([.\^$*+?()\[\]{}\\|]))"); static const std::regex re_escape_no_dot(R"(([\^$*+?()\[\]{}\\|]))"); - pcre_context_static<30> pc; data_scanner ds(str); - data_token_t dt; - while (ds.tokenize2(pc, dt)) { - if (pc[0]->length() < 4) { + while (true) { + auto tok_res = ds.tokenize2(); + + if (!tok_res) { + break; + } + if (tok_res->tr_capture.length() < 4) { continue; } - switch (dt) { + switch (tok_res->tr_token) { case DT_DATE: case DT_TIME: case DT_WHITE: @@ -148,7 +151,7 @@ add_text_possibilities(readline_curses* rlc, switch (tq) { case text_quoting::sql: { - auto token_value = ds.get_input().get_substr(pc.all()); + auto token_value = tok_res->to_string(); auto_mem quoted_token; quoted_token = sqlite3_mprintf("%Q", token_value.c_str()); @@ -156,12 +159,9 @@ add_text_possibilities(readline_curses* rlc, break; } default: { - std::string token_value, token_value_no_dot; - - token_value_no_dot = token_value - = ds.get_input().get_substr(pc.all()); - token_value - = std::regex_replace(token_value, re_escape, R"(\\\1)"); + auto token_value_no_dot = tok_res->to_string(); + auto token_value = std::regex_replace( + token_value_no_dot, re_escape, R"(\\\1)"); token_value_no_dot = std::regex_replace( token_value_no_dot, re_escape_no_dot, R"(\\\1)"); rlc->add_possibility(context, type, token_value); @@ -172,10 +172,15 @@ add_text_possibilities(readline_curses* rlc, } } - switch (dt) { + switch (tok_res->tr_token) { case DT_QUOTED_STRING: add_text_possibilities( - rlc, context, type, ds.get_input().get_substr(pc[0]), tq); + rlc, + context, + type, + ds.to_string_fragment(tok_res->tr_inner_capture) + .to_string(), + tq); break; default: break; @@ -416,22 +421,21 @@ add_config_possibilities() const std::string& path, void* mem) { if (jph.jph_children) { - if (!jph.jph_regex->p_named_count) { + const auto named_caps = jph.jph_regex->get_named_captures(); + + if (named_caps.empty()) { rc->add_possibility(ln_mode_t::COMMAND, "config-option", path); } - for (auto named_iter = jph.jph_regex->named_begin(); - named_iter != jph.jph_regex->named_end(); - ++named_iter) - { - if (visited.count(named_iter->pnc_name) == 0) { + for (const auto named_cap : named_caps) { + if (visited.count(named_cap.get_name().to_string()) == 0) { rc->clear_possibilities(ln_mode_t::COMMAND, - named_iter->pnc_name); - visited.insert(named_iter->pnc_name); + named_cap.get_name().to_string()); + visited.insert(named_cap.get_name().to_string()); } ghc::filesystem::path path_obj(path); rc->add_possibility(ln_mode_t::COMMAND, - named_iter->pnc_name, + named_cap.get_name().to_string(), path_obj.parent_path().filename().string()); } } else { diff --git a/src/regex101.import.cc b/src/regex101.import.cc index 0ed54d2e..d0f7d345 100644 --- a/src/regex101.import.cc +++ b/src/regex101.import.cc @@ -35,7 +35,7 @@ #include "lnav_config.hh" #include "log_format.hh" #include "log_format_ext.hh" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "regex101.client.hh" #include "session_data.hh" #include "yajlpp/yajlpp.hh" @@ -47,8 +47,10 @@ regex101::import(const std::string& url, const std::string& name, const std::string& pat_name) { - static const pcrepp USER_URL{R"(^https://regex101.com/r/(\w+)(?:/(\d+))?)"}; - static const pcrepp NAME_RE{R"(^\w+$)"}; + static const auto USER_URL = lnav::pcre2pp::code::from_const( + R"(^https://regex101.com/r/(\w+)(?:/(\d+))?)"); + static thread_local auto URL_MATCH_DATA = USER_URL.create_match_data(); + static const auto NAME_RE = lnav::pcre2pp::code::from_const(R"(^\w+$)"); if (url.empty()) { return Err(lnav::console::user_message::error( @@ -76,11 +78,9 @@ regex101::import(const std::string& url, } } - pcre_context_static<30> pc_name; - pcre_input pi_name{name}; - - if (!NAME_RE.match(pc_name, pi_name)) { - auto partial_len = NAME_RE.match_partial(pi_name); + auto name_find_res = NAME_RE.find_in(name).ignore_error(); + if (!name_find_res) { + auto partial_len = NAME_RE.match_partial(name); return Err( lnav::console::user_message::error( attr_line_t("unable to import: ") @@ -95,11 +95,12 @@ regex101::import(const std::string& url, .append("^ matched up to here"_comment))); } - pcre_context_static<30> pc; - pcre_input pi{url}; - - if (!USER_URL.match(pc, pi)) { - auto partial_len = USER_URL.match_partial(pi); + auto user_find_res = USER_URL.capture_from(url) + .into(URL_MATCH_DATA) + .matches() + .ignore_error(); + if (!user_find_res) { + auto partial_len = USER_URL.match_partial(url); return Err(lnav::console::user_message::error( attr_line_t("unrecognized regex101.com URL: ") .append(lnav::roles::file(url))) @@ -112,7 +113,7 @@ regex101::import(const std::string& url, .append("^ matched up to here"_comment))); } - auto permalink = pi.get_substr(pc[0]); + auto permalink = URL_MATCH_DATA[1]->to_string(); auto format_filename = existing_format ? fmt::format(FMT_STRING("{}.regex101-{}.json"), name, permalink) @@ -155,7 +156,7 @@ regex101::import(const std::string& url, .append(" flavor of regexes are supported"))); } - auto regex_res = pcrepp::from_str(entry.e_regex); + auto regex_res = lnav::pcre2pp::code::from(entry.e_regex); if (regex_res.isErr()) { auto parse_error = regex_res.unwrapErr(); return Err(lnav::console::user_message::error( @@ -163,7 +164,7 @@ regex101::import(const std::string& url, .append_quoted(lnav::roles::symbol(entry.e_regex)) .append(" from ") .append_quoted(lnav::roles::symbol(url))) - .with_reason(parse_error.ce_msg) + .with_reason(parse_error.get_message()) .with_help("fix the regex and try the import again")); } @@ -205,16 +206,13 @@ regex101::import(const std::string& url, { yajlpp_map value_map(gen); - for (auto named_iter = regex.named_begin(); - named_iter != regex.named_end(); - ++named_iter) - { - if (strcmp(named_iter->pnc_name, "body") == 0) { + for (auto named_cap : regex.get_named_captures()) { + if (named_cap.get_name() == "body") { // don't need to add this as a value continue; } - value_map.gen(named_iter->pnc_name); + value_map.gen(named_cap.get_name()); { yajlpp_map cap_map(gen); @@ -374,7 +372,7 @@ regex101::convert_format_pattern( { regex101::client::entry en; - en.e_regex = pattern->p_pcre->get_pattern(); + en.e_regex = pattern->p_pcre.value->get_pattern(); for (const auto& sample : format->elf_samples) { if (en.e_test_string.empty()) { en.e_test_string = sample.s_line.pp_value; diff --git a/src/regexp_vtab.cc b/src/regexp_vtab.cc index d0ab1230..457514a5 100644 --- a/src/regexp_vtab.cc +++ b/src/regexp_vtab.cc @@ -35,7 +35,7 @@ #include "column_namer.hh" #include "config.h" #include "lnav_util.hh" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "scn/scn.h" #include "sql_help.hh" #include "sql_util.hh" @@ -75,31 +75,38 @@ CREATE TABLE regexp_capture ( struct cursor { sqlite3_vtab_cursor base; - pcrepp c_pattern; - pcre_context_static<30> c_context; - std::unique_ptr c_input; + std::shared_ptr c_pattern; + lnav::pcre2pp::match_data c_match_data{ + lnav::pcre2pp::match_data::unitialized()}; std::string c_content; + string_fragment c_remaining; bool c_content_as_blob{false}; int c_index{0}; bool c_matched{false}; int c_match_index{0}; sqlite3_int64 c_rowid{0}; - cursor(sqlite3_vtab* vt) : base({vt}) { this->c_context.set_count(0); } + cursor(sqlite3_vtab* vt) : base({vt}) {} int reset() { return SQLITE_OK; } int next() { - if (this->c_index >= (this->c_context.get_count() - 1)) { - this->c_input->pi_offset = this->c_input->pi_next_offset; - this->c_matched = this->c_pattern.match( - this->c_context, *(this->c_input), PCRE_NO_UTF8_CHECK); + if (this->c_index >= (this->c_match_data.get_count() - 1)) { + auto match_res = this->c_pattern->capture_from(this->c_content) + .at(this->c_remaining) + .into(this->c_match_data) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (match_res) { + this->c_remaining = match_res->f_remaining; + } + this->c_matched = match_res.has_value(); this->c_index = -1; this->c_match_index += 1; } - if (this->c_pattern.empty() || !this->c_matched) { + if (this->c_pattern == nullptr || !this->c_matched) { return SQLITE_OK; } @@ -108,7 +115,7 @@ CREATE TABLE regexp_capture ( return SQLITE_OK; } - int eof() { return this->c_pattern.empty() || !this->c_matched; } + int eof() { return this->c_pattern == nullptr || !this->c_matched; } int get_rowid(sqlite3_int64& rowid_out) { @@ -120,7 +127,7 @@ CREATE TABLE regexp_capture ( int get_column(const cursor& vc, sqlite3_context* ctx, int col) { - auto& cap = vc.c_context.all()[vc.c_index]; + const auto cap = vc.c_match_data[vc.c_index]; switch (col) { case RC_COL_MATCH_INDEX: @@ -133,28 +140,30 @@ CREATE TABLE regexp_capture ( if (vc.c_index == 0) { sqlite3_result_null(ctx); } else { - sqlite3_result_text( - ctx, - vc.c_pattern.name_for_capture(vc.c_index - 1), - -1, - SQLITE_TRANSIENT); + to_sqlite(ctx, + vc.c_pattern->get_name_for_capture(vc.c_index)); } break; case RC_COL_CAPTURE_COUNT: - sqlite3_result_int64(ctx, vc.c_context.get_count()); + sqlite3_result_int64(ctx, vc.c_match_data.get_count()); break; case RC_COL_RANGE_START: - sqlite3_result_int64(ctx, cap.c_begin + 1); + if (cap.has_value()) { + sqlite3_result_int64(ctx, cap->sf_begin + 1); + } else { + sqlite3_result_int64(ctx, 0); + } break; case RC_COL_RANGE_STOP: - sqlite3_result_int64(ctx, cap.c_end + 1); + if (cap.has_value()) { + sqlite3_result_int64(ctx, cap->sf_end + 1); + } else { + sqlite3_result_int64(ctx, 0); + } break; case RC_COL_CONTENT: - if (cap.is_valid()) { - sqlite3_result_text(ctx, - vc.c_input->get_substr_start(&cap), - cap.length(), - SQLITE_TRANSIENT); + if (cap.has_value()) { + to_sqlite(ctx, cap.value()); } else { sqlite3_result_null(ctx); } @@ -173,10 +182,7 @@ CREATE TABLE regexp_capture ( } break; case RC_COL_PATTERN: { - auto str = vc.c_pattern.get_pattern(); - - sqlite3_result_text( - ctx, str.c_str(), str.length(), SQLITE_TRANSIENT); + to_sqlite(ctx, vc.c_pattern->get_pattern()); break; } } @@ -219,7 +225,7 @@ rcFilter(sqlite3_vtab_cursor* pVtabCursor, if (argc != 2) { pCur->c_content.clear(); - pCur->c_pattern.clear(); + pCur->c_pattern.reset(); return SQLITE_OK; } @@ -229,22 +235,29 @@ rcFilter(sqlite3_vtab_cursor* pVtabCursor, pCur->c_content_as_blob = (sqlite3_value_type(argv[0]) == SQLITE_BLOB); pCur->c_content.assign(blob, byte_count); - const char* pattern = (const char*) sqlite3_value_text(argv[1]); - auto re_res = pcrepp::from_str(pattern); - if (re_res.isErr()) { - pVtabCursor->pVtab->zErrMsg = sqlite3_mprintf( - "Invalid regular expression: %s", re_res.unwrapErr().ce_msg); + auto pattern = from_sqlite()(argc, argv, 1); + auto compile_res = lnav::pcre2pp::code::from(pattern); + if (compile_res.isErr()) { + pVtabCursor->pVtab->zErrMsg + = sqlite3_mprintf("Invalid regular expression: %s", + compile_res.unwrapErr().get_message().c_str()); return SQLITE_ERROR; } - pCur->c_pattern = re_res.unwrap(); + pCur->c_pattern = compile_res.unwrap().to_shared(); pCur->c_index = 0; - pCur->c_context.set_count(0); - - pCur->c_input = std::make_unique(pCur->c_content); - pCur->c_matched = pCur->c_pattern.match( - pCur->c_context, *(pCur->c_input), PCRE_NO_UTF8_CHECK); + pCur->c_match_data = pCur->c_pattern->create_match_data(); + + pCur->c_remaining.clear(); + auto match_res = pCur->c_pattern->capture_from(pCur->c_content) + .into(pCur->c_match_data) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (match_res) { + pCur->c_remaining = match_res->f_remaining; + } + pCur->c_matched = match_res.has_value(); pCur->c_match_index = 0; return SQLITE_OK; @@ -286,11 +299,12 @@ CREATE TABLE regexp_capture_into_json ( struct cursor { sqlite3_vtab_cursor base; - pcrepp c_pattern; - pcre_context_static<30> c_context; - std::unique_ptr c_input; + std::shared_ptr c_pattern; + lnav::pcre2pp::match_data c_match_data{ + lnav::pcre2pp::match_data::unitialized()}; std::unique_ptr c_namer; std::string c_content; + string_fragment c_remaining; bool c_content_as_blob{false}; bool c_matched{false}; size_t c_match_index{0}; @@ -298,25 +312,31 @@ CREATE TABLE regexp_capture_into_json ( std::string c_flags_string; nonstd::optional c_flags; - cursor(sqlite3_vtab* vt) : base({vt}) { this->c_context.set_count(0); } + cursor(sqlite3_vtab* vt) : base({vt}) {} int reset() { return SQLITE_OK; } int next() { - this->c_input->pi_offset = this->c_input->pi_next_offset; - this->c_matched = this->c_pattern.match( - this->c_context, *(this->c_input), PCRE_NO_UTF8_CHECK); + auto match_res = this->c_pattern->capture_from(this->c_content) + .at(this->c_remaining) + .into(this->c_match_data) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (match_res) { + this->c_remaining = match_res->f_remaining; + } + this->c_matched = match_res.has_value(); this->c_match_index += 1; - if (this->c_pattern.empty() || !this->c_matched) { + if (this->c_pattern == nullptr || !this->c_matched) { return SQLITE_OK; } return SQLITE_OK; } - int eof() { return this->c_pattern.empty() || !this->c_matched; } + int eof() { return this->c_pattern == nullptr || !this->c_matched; } int get_rowid(sqlite3_int64& rowid_out) { @@ -339,18 +359,19 @@ CREATE TABLE regexp_capture_into_json ( { yajlpp_map root_map(gen); - for (int lpc = 0; lpc < vc.c_pattern.get_capture_count(); - lpc++) + for (int lpc = 1; lpc < vc.c_match_data.get_count(); lpc++) { const auto& colname = vc.c_namer->cn_names[lpc]; - const auto* cap = vc.c_context[lpc]; + const auto cap = vc.c_match_data[lpc]; + + if (!cap) { + continue; + } yajl_gen_pstring(gen, colname.data(), colname.length()); - if (!cap->is_valid()) { - yajl_gen_null(gen); - } else if (!vc.c_flags || vc.c_flags->convert_numbers) { - auto cap_view = vc.c_input->to_string_view(cap); + if (!vc.c_flags || vc.c_flags->convert_numbers) { + auto cap_view = cap->to_string_view(); auto scan_int_res = scn::scan_value(cap_view); @@ -372,9 +393,7 @@ CREATE TABLE regexp_capture_into_json ( yajl_gen_pstring( gen, cap_view.data(), cap_view.length()); } else { - yajl_gen_pstring(gen, - vc.c_input->get_substr_start(cap), - cap->length()); + yajl_gen_pstring(gen, cap->data(), cap->length()); } } } @@ -399,10 +418,7 @@ CREATE TABLE regexp_capture_into_json ( } break; case RCJ_COL_PATTERN: { - auto str = vc.c_pattern.get_pattern(); - - sqlite3_result_text( - ctx, str.c_str(), str.length(), SQLITE_TRANSIENT); + to_sqlite(ctx, vc.c_pattern->get_pattern()); break; } case RCJ_COL_FLAGS: { @@ -454,7 +470,7 @@ rcjFilter(sqlite3_vtab_cursor* pVtabCursor, if (argc < 2 || argc > 3) { pCur->c_content.clear(); - pCur->c_pattern.clear(); + pCur->c_pattern.reset(); pCur->c_flags_string.clear(); pCur->c_flags = nonstd::nullopt; return SQLITE_OK; @@ -466,11 +482,12 @@ rcjFilter(sqlite3_vtab_cursor* pVtabCursor, pCur->c_content_as_blob = (sqlite3_value_type(argv[0]) == SQLITE_BLOB); pCur->c_content.assign(blob, byte_count); - const char* pattern = (const char*) sqlite3_value_text(argv[1]); - auto re_res = pcrepp::from_str(pattern); - if (re_res.isErr()) { - pVtabCursor->pVtab->zErrMsg = sqlite3_mprintf( - "Invalid regular expression: %s", re_res.unwrapErr().ce_msg); + auto pattern = from_sqlite()(argc, argv, 1); + auto compile_res = lnav::pcre2pp::code::from(pattern); + if (compile_res.isErr()) { + pVtabCursor->pVtab->zErrMsg + = sqlite3_mprintf("Invalid regular expression: %s", + compile_res.unwrapErr().get_message().c_str()); return SQLITE_ERROR; } @@ -500,19 +517,25 @@ rcjFilter(sqlite3_vtab_cursor* pVtabCursor, } } - pCur->c_pattern = re_res.unwrap(); + pCur->c_pattern = compile_res.unwrap().to_shared(); pCur->c_namer = std::make_unique(column_namer::language::JSON); - for (int lpc = 0; lpc < pCur->c_pattern.get_capture_count(); lpc++) { - pCur->c_namer->add_column( - string_fragment{pCur->c_pattern.name_for_capture(lpc)}); + pCur->c_namer->add_column(string_fragment::from_const("__all__")); + for (int lpc = 1; lpc <= pCur->c_pattern->get_capture_count(); lpc++) { + pCur->c_namer->add_column(string_fragment::from_c_str( + pCur->c_pattern->get_name_for_capture(lpc))); } - pCur->c_context.set_count(0); - - pCur->c_input = std::make_unique(pCur->c_content); - pCur->c_matched = pCur->c_pattern.match( - pCur->c_context, *(pCur->c_input), PCRE_NO_UTF8_CHECK); + pCur->c_match_data = pCur->c_pattern->create_match_data(); + pCur->c_remaining.clear(); + auto match_res = pCur->c_pattern->capture_from(pCur->c_content) + .into(pCur->c_match_data) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (match_res) { + pCur->c_remaining = match_res->f_remaining; + } + pCur->c_matched = match_res.has_value(); pCur->c_match_index = 0; return SQLITE_OK; diff --git a/src/relative_time.cc b/src/relative_time.cc index 02cae28a..ef1d9e02 100644 --- a/src/relative_time.cc +++ b/src/relative_time.cc @@ -33,55 +33,172 @@ #include "base/time_util.hh" #include "config.h" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "scn/scn.h" using namespace std::chrono_literals; static const struct { const char* name; - pcrepp pcre; + lnav::pcre2pp::code pcre; } MATCHERS[relative_time::RTT__MAX] = { - {"ws", pcrepp("\\A\\s+\\b")}, - {"am", pcrepp("\\Aam|a\\.m\\.\\b")}, - {"pm", pcrepp("\\Apm|p\\.m\\.\\b")}, - {"a", pcrepp("\\Aa\\b")}, - {"an", pcrepp("\\Aan\\b")}, - {"at", pcrepp("\\Aat\\b")}, - {"time", pcrepp("\\A(\\d{1,2}):(\\d{2})(?::(\\d{2})(?:\\.(\\d{3,6}))?)?")}, - {"num", pcrepp("\\A((?:-|\\+)?\\d+)")}, - - {"sun", pcrepp("\\Asun(days?)?\\b")}, - {"mon", pcrepp("\\Amon(days?)?\\b")}, - {"tue", pcrepp("\\Atue(s(days?)?)?\\b")}, - {"wed", pcrepp("\\Awed(nesdays?)?\\b")}, - {"thu", pcrepp("\\Athu(rsdays?)?\\b")}, - {"fri", pcrepp("\\Afri(days?)?\\b")}, - {"sat", pcrepp("\\Asat(urdays?)?\\b")}, - - {"us", pcrepp("\\A(?:micros(?:econds?)?|us(?![a-zA-Z]))")}, - {"ms", pcrepp("\\A(?:millis(?:econds?)?|ms(?![a-zA-Z]))")}, - {"sec", pcrepp("\\As(?:ec(?:onds?)?)?(?![a-zA-Z])")}, - {"min", pcrepp("\\Am(?:in(?:utes?)?)?(?![a-zA-Z])")}, - {"h", pcrepp("\\Ah(?:ours?)?(?![a-zA-Z])")}, - {"day", pcrepp("\\Ad(?:ays?)?(?![a-zA-Z])")}, - {"week", pcrepp("\\Aw(?:eeks?)?(?![a-zA-Z])")}, - {"mon", pcrepp("\\Amon(?:ths?)?(?![a-zA-Z])")}, - {"year", pcrepp("\\Ay(?:ears?)?(?![a-zA-Z])")}, - {"today", pcrepp("\\Atoday\\b")}, - {"yest", pcrepp("\\Ayesterday\\b")}, - {"tomo", pcrepp("\\Atomorrow\\b")}, - {"noon", pcrepp("\\Anoon\\b")}, - {"and", pcrepp("\\Aand\\b")}, - {"the", pcrepp("\\Athe\\b")}, - {"ago", pcrepp("\\Aago\\b")}, - {"lter", pcrepp("\\Alater\\b")}, - {"bfor", pcrepp("\\Abefore\\b")}, - {"aft", pcrepp("\\Aafter\\b")}, - {"now", pcrepp("\\Anow\\b")}, - {"here", pcrepp("\\Ahere\\b")}, - {"next", pcrepp("\\Anext\\b")}, - {"previous", pcrepp("\\A(?:previous\\b|last\\b)")}, + { + "ws", + lnav::pcre2pp::code::from_const("\\A\\s+\\b"), + }, + { + "am", + lnav::pcre2pp::code::from_const("\\Aam|a\\.m\\.\\b"), + }, + { + "pm", + lnav::pcre2pp::code::from_const("\\Apm|p\\.m\\.\\b"), + }, + { + "a", + lnav::pcre2pp::code::from_const("\\Aa\\b"), + }, + { + "an", + lnav::pcre2pp::code::from_const("\\Aan\\b"), + }, + { + "at", + lnav::pcre2pp::code::from_const("\\Aat\\b"), + }, + { + "time", + lnav::pcre2pp::code::from_const( + "\\A(\\d{1,2}):(\\d{2})(?::(\\d{2})(?:\\.(\\d{3,6}))?)?"), + }, + { + "num", + lnav::pcre2pp::code::from_const("\\A((?:-|\\+)?\\d+)"), + }, + + { + "sun", + lnav::pcre2pp::code::from_const("\\Asun(days?)?\\b"), + }, + { + "mon", + lnav::pcre2pp::code::from_const("\\Amon(days?)?\\b"), + }, + { + "tue", + lnav::pcre2pp::code::from_const("\\Atue(s(days?)?)?\\b"), + }, + { + "wed", + lnav::pcre2pp::code::from_const("\\Awed(nesdays?)?\\b"), + }, + { + "thu", + lnav::pcre2pp::code::from_const("\\Athu(rsdays?)?\\b"), + }, + { + "fri", + lnav::pcre2pp::code::from_const("\\Afri(days?)?\\b"), + }, + { + "sat", + lnav::pcre2pp::code::from_const("\\Asat(urdays?)?\\b"), + }, + + { + "us", + lnav::pcre2pp::code::from_const( + "\\A(?:micros(?:econds?)?|us(?![a-zA-Z]))"), + }, + { + "ms", + lnav::pcre2pp::code::from_const( + "\\A(?:millis(?:econds?)?|ms(?![a-zA-Z]))"), + }, + { + "sec", + lnav::pcre2pp::code::from_const("\\As(?:ec(?:onds?)?)?(?![a-zA-Z])"), + }, + { + "min", + lnav::pcre2pp::code::from_const("\\Am(?:in(?:utes?)?)?(?![a-zA-Z])"), + }, + { + "h", + lnav::pcre2pp::code::from_const("\\Ah(?:ours?)?(?![a-zA-Z])"), + }, + { + "day", + lnav::pcre2pp::code::from_const("\\Ad(?:ays?)?(?![a-zA-Z])"), + }, + { + "week", + lnav::pcre2pp::code::from_const("\\Aw(?:eeks?)?(?![a-zA-Z])"), + }, + { + "mon", + lnav::pcre2pp::code::from_const("\\Amon(?:ths?)?(?![a-zA-Z])"), + }, + { + "year", + lnav::pcre2pp::code::from_const("\\Ay(?:ears?)?(?![a-zA-Z])"), + }, + { + "today", + lnav::pcre2pp::code::from_const("\\Atoday\\b"), + }, + { + "yest", + lnav::pcre2pp::code::from_const("\\Ayesterday\\b"), + }, + { + "tomo", + lnav::pcre2pp::code::from_const("\\Atomorrow\\b"), + }, + { + "noon", + lnav::pcre2pp::code::from_const("\\Anoon\\b"), + }, + { + "and", + lnav::pcre2pp::code::from_const("\\Aand\\b"), + }, + { + "the", + lnav::pcre2pp::code::from_const("\\Athe\\b"), + }, + { + "ago", + lnav::pcre2pp::code::from_const("\\Aago\\b"), + }, + { + "lter", + lnav::pcre2pp::code::from_const("\\Alater\\b"), + }, + { + "bfor", + lnav::pcre2pp::code::from_const("\\Abefore\\b"), + }, + { + "aft", + lnav::pcre2pp::code::from_const("\\Aafter\\b"), + }, + { + "now", + lnav::pcre2pp::code::from_const("\\Anow\\b"), + }, + { + "here", + lnav::pcre2pp::code::from_const("\\Ahere\\b"), + }, + { + "next", + lnav::pcre2pp::code::from_const("\\Anext\\b"), + }, + { + "previous", + lnav::pcre2pp::code::from_const("\\A(?:previous\\b|last\\b)"), + }, }; static int64_t TIME_SCALES[] = { @@ -102,10 +219,8 @@ const char relative_time::FIELD_CHARS[] = { }; Result -relative_time::from_str(const char* str, size_t len) +relative_time::from_str(string_fragment str) { - pcre_input pi(str, 0, len); - pcre_context_static<30> pc; int64_t number = 0; bool number_set = false, number_was_set = false; bool next_set = false; @@ -118,10 +233,11 @@ relative_time::from_str(const char* str, size_t len) pe_out.pe_column = 0; pe_out.pe_msg.clear(); + auto remaining = str; while (true) { rt_field_type curr_field_type = RTF__MAX; - if (pi.pi_next_offset >= pi.pi_length) { + if (remaining.empty()) { if (number_set) { if (number > 1970 && number < 2050) { retval.rt_field[RTF_YEARS] = number - 1900; @@ -179,11 +295,18 @@ relative_time::from_str(const char* str, size_t len) bool found = false; for (int lpc = 0; lpc < RTT__MAX && !found; lpc++) { token_t token = (token_t) lpc; - if (!MATCHERS[lpc].pcre.match(pc, pi, PCRE_ANCHORED)) { + auto md = MATCHERS[lpc].pcre.create_match_data(); + auto match_res = MATCHERS[lpc] + .pcre.capture_from(remaining) + .into(md) + .matches() + .ignore_error(); + if (!match_res) { continue; } - pe_out.pe_column = pc.all()->c_begin; + remaining = match_res->f_remaining; + pe_out.pe_column = match_res->f_all.sf_begin; found = true; if (RTT_MICROS <= token && token <= RTT_YEARS) { if (!number_set) { @@ -333,15 +456,15 @@ relative_time::from_str(const char* str, size_t len) case RTT_AT: break; case RTT_TIME: { - const auto hstr = pi.get_substr(pc[0]); - const auto mstr = pi.get_substr(pc[1]); + const auto hstr = md[1]->to_string(); + const auto mstr = md[2]->to_string(); retval.rt_field[RTF_HOURS] = atoi(hstr.c_str()); retval.rt_field[RTF_MINUTES] = atoi(mstr.c_str()); - if (pc[2]->is_valid()) { - const auto sstr = pi.get_substr(pc[2]); + if (md[3]) { + const auto sstr = md[3]->to_string(); retval.rt_field[RTF_SECONDS] = atoi(sstr.c_str()); - if (pc[3]->is_valid()) { - const auto substr = pi.get_substr(pc[3]); + if (md[4]) { + const auto substr = md[4]->to_string(); switch (substr.length()) { case 3: @@ -373,12 +496,11 @@ relative_time::from_str(const char* str, size_t len) } auto num_scan_res - = scn::scan_value(pi.to_string_view(pc[0])); + = scn::scan_value(md[0]->to_string_view()); if (!num_scan_res) { - pe_out.pe_msg - = fmt::format(FMT_STRING("Invalid number: {}"), - pi.get_substr(pc[0])); + pe_out.pe_msg = fmt::format( + FMT_STRING("Invalid number: {}"), md[0].value()); return Err(pe_out); } number = num_scan_res.value(); diff --git a/src/relative_time.hh b/src/relative_time.hh index 305f2e41..89901f12 100644 --- a/src/relative_time.hh +++ b/src/relative_time.hh @@ -40,6 +40,7 @@ #include +#include "base/intern_string.hh" #include "base/result.h" #include "ptimec.hh" @@ -109,13 +110,7 @@ public: std::string pe_msg; }; - static Result from_str(const char* str, - size_t len); - - static Result from_str(const std::string& str) - { - return from_str(str.c_str(), str.length()); - } + static Result from_str(string_fragment str); static relative_time from_timeval(const struct timeval& tv); diff --git a/src/shlex.cc b/src/shlex.cc index b1f77822..8da44bb4 100644 --- a/src/shlex.cc +++ b/src/shlex.cc @@ -37,19 +37,19 @@ #include "shlex.hh" bool -shlex::tokenize(pcre_context::capture_t& cap_out, shlex_token_t& token_out) +shlex::tokenize(string_fragment& cap_out, shlex_token_t& token_out) { while (this->s_index < this->s_len) { switch (this->s_str[this->s_index]) { case '\\': - cap_out.c_begin = this->s_index; + cap_out.sf_begin = this->s_index; if (this->s_index + 1 < this->s_len) { token_out = shlex_token_t::ST_ESCAPE; this->s_index += 2; - cap_out.c_end = this->s_index; + cap_out.sf_end = this->s_index; } else { this->s_index += 1; - cap_out.c_end = this->s_index; + cap_out.sf_end = this->s_index; token_out = shlex_token_t::ST_ERROR; } return true; @@ -57,16 +57,16 @@ shlex::tokenize(pcre_context::capture_t& cap_out, shlex_token_t& token_out) if (!this->s_ignore_quotes) { switch (this->s_state) { case state_t::STATE_NORMAL: - cap_out.c_begin = this->s_index; + cap_out.sf_begin = this->s_index; this->s_index += 1; - cap_out.c_end = this->s_index; + cap_out.sf_end = this->s_index; token_out = shlex_token_t::ST_DOUBLE_QUOTE_START; this->s_state = state_t::STATE_IN_DOUBLE_QUOTE; return true; case state_t::STATE_IN_DOUBLE_QUOTE: - cap_out.c_begin = this->s_index; + cap_out.sf_begin = this->s_index; this->s_index += 1; - cap_out.c_end = this->s_index; + cap_out.sf_end = this->s_index; token_out = shlex_token_t::ST_DOUBLE_QUOTE_END; this->s_state = state_t::STATE_NORMAL; return true; @@ -79,16 +79,16 @@ shlex::tokenize(pcre_context::capture_t& cap_out, shlex_token_t& token_out) if (!this->s_ignore_quotes) { switch (this->s_state) { case state_t::STATE_NORMAL: - cap_out.c_begin = this->s_index; + cap_out.sf_begin = this->s_index; this->s_index += 1; - cap_out.c_end = this->s_index; + cap_out.sf_end = this->s_index; token_out = shlex_token_t::ST_SINGLE_QUOTE_START; this->s_state = state_t::STATE_IN_SINGLE_QUOTE; return true; case state_t::STATE_IN_SINGLE_QUOTE: - cap_out.c_begin = this->s_index; + cap_out.sf_begin = this->s_index; this->s_index += 1; - cap_out.c_end = this->s_index; + cap_out.sf_end = this->s_index; token_out = shlex_token_t::ST_SINGLE_QUOTE_END; this->s_state = state_t::STATE_NORMAL; return true; @@ -110,7 +110,7 @@ shlex::tokenize(pcre_context::capture_t& cap_out, shlex_token_t& token_out) case '~': switch (this->s_state) { case state_t::STATE_NORMAL: - cap_out.c_begin = this->s_index; + cap_out.sf_begin = this->s_index; this->s_index += 1; while (this->s_index < this->s_len && (isalnum(this->s_str[this->s_index]) @@ -119,7 +119,7 @@ shlex::tokenize(pcre_context::capture_t& cap_out, shlex_token_t& token_out) { this->s_index += 1; } - cap_out.c_end = this->s_index; + cap_out.sf_end = this->s_index; token_out = shlex_token_t::ST_TILDE; return true; default: @@ -130,11 +130,11 @@ shlex::tokenize(pcre_context::capture_t& cap_out, shlex_token_t& token_out) case '\t': switch (this->s_state) { case state_t::STATE_NORMAL: - cap_out.c_begin = this->s_index; + cap_out.sf_begin = this->s_index; while (isspace(this->s_str[this->s_index])) { this->s_index += 1; } - cap_out.c_end = this->s_index; + cap_out.sf_end = this->s_index; token_out = shlex_token_t::ST_WHITESPACE; return true; default: @@ -152,13 +152,12 @@ shlex::tokenize(pcre_context::capture_t& cap_out, shlex_token_t& token_out) } void -shlex::scan_variable_ref(pcre_context::capture_t& cap_out, - shlex_token_t& token_out) +shlex::scan_variable_ref(string_fragment& cap_out, shlex_token_t& token_out) { - cap_out.c_begin = this->s_index; + cap_out.sf_begin = this->s_index; this->s_index += 1; if (this->s_index >= this->s_len) { - cap_out.c_end = this->s_index; + cap_out.sf_end = this->s_index; token_out = shlex_token_t::ST_ERROR; return; } @@ -189,32 +188,31 @@ shlex::scan_variable_ref(pcre_context::capture_t& cap_out, } } - cap_out.c_end = this->s_index; + cap_out.sf_end = this->s_index; if (token_out == shlex_token_t::ST_QUOTED_VARIABLE_REF && this->s_str[this->s_index - 1] != '}') { - cap_out.c_begin += 1; - cap_out.c_end = cap_out.c_begin + 1; + cap_out.sf_begin += 1; + cap_out.sf_end = cap_out.sf_begin + 1; token_out = shlex_token_t::ST_ERROR; } } void -shlex::resolve_home_dir(std::string& result, - const pcre_context::capture_t cap) const +shlex::resolve_home_dir(std::string& result, string_fragment cap) const { if (cap.length() == 1) { result.append(getenv_opt("HOME").value_or("~")); } else { auto username = (char*) alloca(cap.length()); - memcpy(username, &this->s_str[cap.c_begin + 1], cap.length() - 1); + memcpy(username, &this->s_str[cap.sf_begin + 1], cap.length() - 1); username[cap.length() - 1] = '\0'; auto pw = getpwnam(username); if (pw != nullptr) { result.append(pw->pw_dir); } else { - result.append(&this->s_str[cap.c_begin], cap.length()); + result.append(&this->s_str[cap.sf_begin], cap.length()); } } } diff --git a/src/shlex.hh b/src/shlex.hh index 6b06bf9b..2317a2c3 100644 --- a/src/shlex.hh +++ b/src/shlex.hh @@ -38,8 +38,8 @@ #include +#include "base/intern_string.hh" #include "base/opt_util.hh" -#include "pcrepp/pcrepp.hh" #include "shlex.resolver.hh" enum class shlex_token_t { @@ -73,32 +73,32 @@ public: return *this; } - bool tokenize(pcre_context::capture_t& cap_out, shlex_token_t& token_out); + bool tokenize(string_fragment& cap_out, shlex_token_t& token_out); template bool eval(std::string& result, const Resolver& vars) { result.clear(); - pcre_context::capture_t cap; + string_fragment cap; shlex_token_t token; int last_index = 0; while (this->tokenize(cap, token)) { - result.append(&this->s_str[last_index], cap.c_begin - last_index); + result.append(&this->s_str[last_index], cap.sf_begin - last_index); switch (token) { case shlex_token_t::ST_ERROR: return false; case shlex_token_t::ST_ESCAPE: - result.append(1, this->s_str[cap.c_begin + 1]); + result.append(1, this->s_str[cap.sf_begin + 1]); break; case shlex_token_t::ST_WHITESPACE: - result.append(&this->s_str[cap.c_begin], cap.length()); + result.append(&this->s_str[cap.sf_begin], cap.length()); break; case shlex_token_t::ST_VARIABLE_REF: case shlex_token_t::ST_QUOTED_VARIABLE_REF: { int extra = token == shlex_token_t::ST_VARIABLE_REF ? 0 : 1; - std::string var_name(&this->s_str[cap.c_begin + 1 + extra], + std::string var_name(&this->s_str[cap.sf_begin + 1 + extra], cap.length() - 1 - extra * 2); auto local_var = vars.find(var_name); const char* var_value = getenv(var_name.c_str()); @@ -124,7 +124,7 @@ public: default: break; } - last_index = cap.c_end; + last_index = cap.sf_end; } result.append(&this->s_str[last_index], this->s_len - last_index); @@ -137,7 +137,7 @@ public: { result.clear(); - pcre_context::capture_t cap; + string_fragment cap; shlex_token_t token; int last_index = 0; bool start_new = true; @@ -151,12 +151,12 @@ public: start_new = false; } result.back().append(&this->s_str[last_index], - cap.c_begin - last_index); + cap.sf_begin - last_index); switch (token) { case shlex_token_t::ST_ERROR: return false; case shlex_token_t::ST_ESCAPE: - result.back().append(1, this->s_str[cap.c_begin + 1]); + result.back().append(1, this->s_str[cap.sf_begin + 1]); break; case shlex_token_t::ST_WHITESPACE: start_new = true; @@ -164,7 +164,7 @@ public: case shlex_token_t::ST_VARIABLE_REF: case shlex_token_t::ST_QUOTED_VARIABLE_REF: { int extra = token == shlex_token_t::ST_VARIABLE_REF ? 0 : 1; - std::string var_name(&this->s_str[cap.c_begin + 1 + extra], + std::string var_name(&this->s_str[cap.sf_begin + 1 + extra], cap.length() - 1 - extra * 2); auto local_var = vars.find(var_name); const char* var_value = getenv(var_name.c_str()); @@ -182,7 +182,7 @@ public: default: break; } - last_index = cap.c_end; + last_index = cap.sf_end; } if (last_index < this->s_len) { @@ -202,11 +202,9 @@ public: this->s_state = state_t::STATE_NORMAL; } - void scan_variable_ref(pcre_context::capture_t& cap_out, - shlex_token_t& token_out); + void scan_variable_ref(string_fragment& cap_out, shlex_token_t& token_out); - void resolve_home_dir(std::string& result, - const pcre_context::capture_t cap) const; + void resolve_home_dir(std::string& result, string_fragment cap) const; enum class state_t { STATE_NORMAL, diff --git a/src/sql_util.cc b/src/sql_util.cc index 4bab0815..7469c5d5 100644 --- a/src/sql_util.cc +++ b/src/sql_util.cc @@ -47,7 +47,7 @@ #include "bound_tags.hh" #include "config.h" #include "lnav_util.hh" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "readline_context.hh" #include "readline_highlighters.hh" #include "shlex.resolver.hh" @@ -902,40 +902,42 @@ static struct { int guess_type_from_pcre(const std::string& pattern, std::string& collator) { - try { - static const std::vector number_matches = {1, 2}; + static const std::vector number_matches = {1, 2}; - pcrepp re(pattern); - std::vector matches; - int retval = SQLITE3_TEXT; - int index = 0; - - collator.clear(); - for (const auto& test_value : TYPE_TEST_VALUE) { - pcre_context_static<30> pc; - pcre_input pi(test_value.sample); - - if (re.match(pc, pi, PCRE_ANCHORED) && pc[0]->c_begin == 0 - && pc[0]->length() == (int) pi.pi_length) - { - matches.push_back(index); - } + auto compile_res = lnav::pcre2pp::code::from(pattern); + if (compile_res.isErr()) { + return SQLITE3_TEXT; + } - index += 1; + auto re = compile_res.unwrap(); + std::vector matches; + int retval = SQLITE3_TEXT; + int index = 0; + + collator.clear(); + for (const auto& test_value : TYPE_TEST_VALUE) { + auto find_res + = re.find_in(string_fragment::from_c_str(test_value.sample), + PCRE2_ANCHORED) + .ignore_error(); + if (find_res && find_res->f_all.sf_begin == 0 + && find_res->f_remaining.empty()) + { + matches.push_back(index); } - if (matches.size() == 1) { - retval = TYPE_TEST_VALUE[matches.front()].sqlite_type; - collator = TYPE_TEST_VALUE[matches.front()].collator; - } else if (matches == number_matches) { - retval = SQLITE_FLOAT; - collator = ""; - } + index += 1; + } - return retval; - } catch (pcrepp::error& e) { - return SQLITE3_TEXT; + if (matches.size() == 1) { + retval = TYPE_TEST_VALUE[matches.front()].sqlite_type; + collator = TYPE_TEST_VALUE[matches.front()].collator; + } else if (matches == number_matches) { + retval = SQLITE_FLOAT; + collator = ""; } + + return retval; } const char* @@ -1038,50 +1040,79 @@ annotate_sql_statement(attr_line_t& al) static const std::string keyword_re_str = R"(\A)" + sql_keyword_re(); static const struct { - pcrepp re; + lnav::pcre2pp::code re; string_attr_type* type; } PATTERNS[] = { - {pcrepp{R"(\A,)"}, &SQL_COMMA_ATTR}, - {pcrepp{R"(\A\(|\A\))"}, &SQL_PAREN_ATTR}, - {pcrepp{keyword_re_str, PCRE_CASELESS}, &SQL_KEYWORD_ATTR}, - {pcrepp{R"(\A'[^']*('(?:'[^']*')*|$))"}, &SQL_STRING_ATTR}, { - pcrepp{R"(\A-?\d+(?:\.\d*(?:[eE][\-\+]?\d+)?)?|0x[0-9a-fA-F]+$)"}, + lnav::pcre2pp::code::from_const(R"(\A,)"), + &SQL_COMMA_ATTR, + }, + { + lnav::pcre2pp::code::from_const(R"(\A\(|\A\))"), + &SQL_PAREN_ATTR, + }, + { + lnav::pcre2pp::code::from(keyword_re_str, PCRE2_CASELESS).unwrap(), + &SQL_KEYWORD_ATTR, + }, + { + lnav::pcre2pp::code::from_const(R"(\A'[^']*('(?:'[^']*')*|$))"), + &SQL_STRING_ATTR, + }, + { + lnav::pcre2pp::code::from_const( + R"(\A-?\d+(?:\.\d*(?:[eE][\-\+]?\d+)?)?|0x[0-9a-fA-F]+$)"), &SQL_NUMBER_ATTR, }, - {pcrepp{R"(\A(((\$|:|@)?\b[a-z_]\w*)|\"([^\"]+)\"|\[([^\]]+)]))", - PCRE_CASELESS}, - &SQL_IDENTIFIER_ATTR}, - {pcrepp{R"(\A--.*)"}, &SQL_COMMENT_ATTR}, - {pcrepp{R"(\A(\*|<|>|=|!|\-|\+|\|\|))"}, &SQL_OPERATOR_ATTR}, - {pcrepp{R"(\A.)"}, &SQL_GARBAGE_ATTR}, + { + lnav::pcre2pp::code::from_const( + R"(\A(((\$|:|@)?\b[a-z_]\w*)|\"([^\"]+)\"|\[([^\]]+)]))", + PCRE2_CASELESS), + &SQL_IDENTIFIER_ATTR, + }, + { + lnav::pcre2pp::code::from_const(R"(\A--.*)"), + &SQL_COMMENT_ATTR, + }, + { + lnav::pcre2pp::code::from_const(R"(\A(\*|<|>|=|!|\-|\+|\|\|))"), + &SQL_OPERATOR_ATTR, + }, + { + lnav::pcre2pp::code::from_const(R"(\A.)"), + &SQL_GARBAGE_ATTR, + }, }; - static const pcrepp cmd_pattern{R"(^(\.\w+))"}; - static const pcrepp ws_pattern(R"(\A\s+)"); + static const auto cmd_pattern + = lnav::pcre2pp::code::from_const(R"(^(\.\w+))"); + static const auto ws_pattern = lnav::pcre2pp::code::from_const(R"(\A\s+)"); - pcre_context_static<30> pc; - pcre_input pi(al.get_string()); auto& line = al.get_string(); auto& sa = al.get_attrs(); - if (cmd_pattern.match(pc, pi, PCRE_ANCHORED)) { - auto* cap = pc.all(); - sa.emplace_back(line_range(cap->c_begin, cap->c_end), + auto cmd_find_res + = cmd_pattern.find_in(line, PCRE2_ANCHORED).ignore_error(); + if (cmd_find_res) { + auto cap = cmd_find_res->f_all; + sa.emplace_back(line_range(cap.sf_begin, cap.sf_end), SQL_COMMAND_ATTR.value()); return; } - while (pi.pi_next_offset < line.length()) { - if (ws_pattern.match(pc, pi, PCRE_ANCHORED)) { + auto remaining = string_fragment::from_str(line); + while (!remaining.empty()) { + auto ws_find_res = ws_pattern.find_in(remaining).ignore_error(); + if (ws_find_res) { + remaining = ws_find_res->f_remaining; continue; } for (const auto& pat : PATTERNS) { - if (pat.re.match(pc, pi, PCRE_ANCHORED)) { - auto* cap = pc.all(); - struct line_range lr(cap->c_begin, cap->c_end); - - sa.emplace_back(lr, pat.type->value()); + auto pat_find_res = pat.re.find_in(remaining).ignore_error(); + if (pat_find_res) { + sa.emplace_back(to_line_range(pat_find_res->f_all), + pat.type->value()); + remaining = pat_find_res->f_remaining; break; } } diff --git a/src/string-extension-functions.cc b/src/string-extension-functions.cc index af91d2e1..799aa18b 100644 --- a/src/string-extension-functions.cc +++ b/src/string-extension-functions.cc @@ -29,7 +29,7 @@ #include "libbase64.h" #include "mapbox/variant.hpp" #include "optional.hpp" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "safe/safe.h" #include "scn/scn.h" #include "spookyhash/SpookyV2.h" @@ -47,7 +47,7 @@ using namespace mapbox; struct cache_entry { - std::shared_ptr re2; + std::shared_ptr re2; std::shared_ptr cn{ std::make_shared(column_namer::language::JSON)}; }; @@ -61,15 +61,22 @@ find_re(string_fragment re) auto iter = cache.find(re); if (iter == cache.end()) { + auto compile_res = lnav::pcre2pp::code::from(re); + if (compile_res.isErr()) { + const static intern_string_t SRC = intern_string::lookup("arg"); + + throw lnav::console::to_user_message(SRC, compile_res.unwrapErr()); + } + cache_entry c; - c.re2 = std::make_shared(re.to_string()); + c.re2 = compile_res.unwrap().to_shared(); auto pair = cache.insert( std::make_pair(string_fragment::from_str(c.re2->get_pattern()), c)); for (int lpc = 0; lpc < c.re2->get_capture_count(); lpc++) { - c.cn->add_column( - string_fragment::from_c_str(c.re2->name_for_capture(lpc))); + c.cn->add_column(string_fragment::from_c_str( + c.re2->get_name_for_capture(lpc + 1))); } iter = pair.first; @@ -81,90 +88,78 @@ find_re(string_fragment re) static bool regexp(string_fragment re, string_fragment str) { - cache_entry* reobj = find_re(re); - pcre_context_static<30> pc; - pcre_input pi(str); + auto* reobj = find_re(re); - return reobj->re2->match(pc, pi); + return reobj->re2->find_in(str).ignore_error().has_value(); } static util::variant -regexp_match(string_fragment re, const char* str) +regexp_match(string_fragment re, string_fragment str) { - cache_entry* reobj = find_re(re); - pcre_context_static<30> pc; - pcre_input pi(str); - pcrepp& extractor = *reobj->re2; + auto* reobj = find_re(re); + auto& extractor = *reobj->re2; if (extractor.get_capture_count() == 0) { - throw pcrepp::error("regular expression does not have any captures"); + throw std::runtime_error( + "regular expression does not have any captures"); } - if (!extractor.match(pc, pi, PCRE_NO_UTF8_CHECK)) { + auto md = extractor.create_match_data(); + auto match_res = extractor.capture_from(str).into(md).matches(); + if (match_res.is()) { return static_cast(nullptr); } + if (match_res.is()) { + auto err = match_res.get(); + + throw std::runtime_error(err.get_message()); + } yajlpp_gen gen; yajl_gen_config(gen, yajl_gen_beautify, false); if (extractor.get_capture_count() == 1) { - pcre_context::capture_t* cap = pc[0]; - const char* cap_start = pi.get_substr_start(cap); + auto cap = md[1]; - if (!cap->is_valid()) { + if (!cap) { return static_cast(nullptr); } - char* cap_copy = (char*) alloca(cap->length() + 1); - long long int i_value; - double d_value; - int end_index; - - memcpy(cap_copy, cap_start, cap->length()); - cap_copy[cap->length()] = '\0'; - - if (sscanf(cap_copy, "%lld%n", &i_value, &end_index) == 1 - && (end_index == cap->length())) - { - return (int64_t) i_value; + auto scan_int_res = scn::scan_value(cap->to_string_view()); + if (scan_int_res && scan_int_res.empty()) { + return scan_int_res.value(); } - if (sscanf(cap_copy, "%lf%n", &d_value, &end_index) == 1 - && (end_index == cap->length())) - { - return d_value; + + auto scan_float_res = scn::scan_value(cap->to_string_view()); + if (scan_float_res && scan_float_res.empty()) { + return scan_float_res.value(); } - return string_fragment(str, cap->c_begin, cap->c_end); + + return cap.value(); } else { yajlpp_map root_map(gen); for (int lpc = 0; lpc < extractor.get_capture_count(); lpc++) { const auto& colname = reobj->cn->cn_names[lpc]; - const auto* cap = pc[lpc]; + const auto cap = md[lpc + 1]; yajl_gen_pstring(gen, colname.data(), colname.length()); - if (!cap->is_valid()) { + if (!cap) { yajl_gen_null(gen); } else { - const char* cap_start = pi.get_substr_start(cap); - char* cap_copy = (char*) alloca(cap->length() + 1); - long long int i_value; - double d_value; - int end_index; - - memcpy(cap_copy, cap_start, cap->length()); - cap_copy[cap->length()] = '\0'; - - if (sscanf(cap_copy, "%lld%n", &i_value, &end_index) == 1 - && (end_index == cap->length())) - { - yajl_gen_integer(gen, i_value); - } else if (sscanf(cap_copy, "%lf%n", &d_value, &end_index) == 1 - && (end_index == cap->length())) - { - yajl_gen_number(gen, cap_start, cap->length()); + auto scan_int_res + = scn::scan_value(cap->to_string_view()); + if (scan_int_res && scan_int_res.empty()) { + yajl_gen_integer(gen, scan_int_res.value()); } else { - yajl_gen_pstring(gen, cap_start, cap->length()); + auto scan_float_res + = scn::scan_value(cap->to_string_view()); + if (scan_float_res && scan_float_res.empty()) { + yajl_gen_number(gen, cap->data(), cap->length()); + } else { + yajl_gen_pstring(gen, cap->data(), cap->length()); + } } } } @@ -263,9 +258,9 @@ logfmt2json(string_fragment line) } static std::string -regexp_replace(const char* str, string_fragment re, const char* repl) +regexp_replace(string_fragment str, string_fragment re, const char* repl) { - cache_entry* reobj = find_re(re); + auto* reobj = find_re(re); return reobj->re2->replace(str, repl); } diff --git a/src/text_format.cc b/src/text_format.cc index 46afb579..dc2c2881 100644 --- a/src/text_format.cc +++ b/src/text_format.cc @@ -32,7 +32,7 @@ #include "text_format.hh" #include "config.h" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "yajl/api/yajl_parse.h" text_format_t @@ -44,56 +44,57 @@ detect_text_format(string_fragment sf, static const auto MD_EXT = ghc::filesystem::path(".md"); static const auto MARKDOWN_EXT = ghc::filesystem::path(".markdown"); - static const pcrepp MAN_MATCHERS - = pcrepp(R"(^[A-Z]+\(\d\)\s+)", PCRE_MULTILINE); + static const auto MAN_MATCHERS = lnav::pcre2pp::code::from_const( + R"(^[A-Z]+\(\d\)\s+)", PCRE2_MULTILINE); // XXX This is a pretty crude way of detecting format... - static const pcrepp PYTHON_MATCHERS = pcrepp( + static const auto PYTHON_MATCHERS = lnav::pcre2pp::code::from_const( "(?:" "^\\s*def\\s+\\w+\\([^)]*\\):[^\\n]*$|" "^\\s*try:[^\\n]*$" ")", - PCRE_MULTILINE); + PCRE2_MULTILINE); - static const pcrepp RUST_MATCHERS = pcrepp(R"( + static const auto RUST_MATCHERS + = lnav::pcre2pp::code::from_const(R"( (?: ^\s*use\s+[\w+:\{\}]+;$| ^\s*(?:pub)?\s+(?:const|enum|fn)\s+\w+.*$| ^\s*impl\s+\w+.*$ ) )", - PCRE_MULTILINE); + PCRE2_MULTILINE); - static const pcrepp JAVA_MATCHERS = pcrepp( + static const auto JAVA_MATCHERS = lnav::pcre2pp::code::from_const( "(?:" "^package\\s+|" "^import\\s+|" "^\\s*(?:public)?\\s*class\\s*(\\w+\\s+)*\\s*{" ")", - PCRE_MULTILINE); + PCRE2_MULTILINE); - static const pcrepp C_LIKE_MATCHERS = pcrepp( + static const auto C_LIKE_MATCHERS = lnav::pcre2pp::code::from_const( "(?:" "^#\\s*include\\s+|" "^#\\s*define\\s+|" "^\\s*if\\s+\\([^)]+\\)[^\\n]*$|" "^\\s*(?:\\w+\\s+)*class \\w+ {" ")", - PCRE_MULTILINE); + PCRE2_MULTILINE); - static const pcrepp SQL_MATCHERS = pcrepp( + static const auto SQL_MATCHERS = lnav::pcre2pp::code::from_const( "(?:" "select\\s+.+\\s+from\\s+|" "insert\\s+into\\s+.+\\s+values" ")", - PCRE_MULTILINE | PCRE_CASELESS); + PCRE2_MULTILINE | PCRE2_CASELESS); - static const pcrepp XML_MATCHERS = pcrepp( + static const auto XML_MATCHERS = lnav::pcre2pp::code::from_const( "(?:" R"(<\?xml(\s+\w+\s*=\s*"[^"]*")*\?>|)" R"()" ")", - PCRE_MULTILINE | PCRE_CASELESS); + PCRE2_MULTILINE | PCRE2_CASELESS); text_format_t retval = text_format_t::TF_UNKNOWN; @@ -110,45 +111,40 @@ detect_text_format(string_fragment sf, } } - pcre_input pi(sf); - pcre_context_static<30> pc; - { auto_mem jhandle(yajl_free); jhandle = yajl_alloc(nullptr, nullptr, nullptr); - if (yajl_parse(jhandle, (unsigned char*) sf.data(), sf.length()) - == yajl_status_ok) - { + if (yajl_parse(jhandle, sf.udata(), sf.length()) == yajl_status_ok) { return text_format_t::TF_JSON; } } - if (MAN_MATCHERS.match(pc, pi)) { + if (MAN_MATCHERS.find_in(sf).ignore_error()) { return text_format_t::TF_MAN; } - if (PYTHON_MATCHERS.match(pc, pi)) { + if (PYTHON_MATCHERS.find_in(sf).ignore_error()) { return text_format_t::TF_PYTHON; } - if (RUST_MATCHERS.match(pc, pi)) { + if (RUST_MATCHERS.find_in(sf).ignore_error()) { return text_format_t::TF_RUST; } - if (JAVA_MATCHERS.match(pc, pi)) { + if (JAVA_MATCHERS.find_in(sf).ignore_error()) { return text_format_t::TF_JAVA; } - if (C_LIKE_MATCHERS.match(pc, pi)) { + if (C_LIKE_MATCHERS.find_in(sf).ignore_error()) { return text_format_t::TF_C_LIKE; } - if (SQL_MATCHERS.match(pc, pi)) { + if (SQL_MATCHERS.find_in(sf).ignore_error()) { return text_format_t::TF_SQL; } - if (XML_MATCHERS.match(pc, pi)) { + if (XML_MATCHERS.find_in(sf).ignore_error()) { return text_format_t::TF_XML; } diff --git a/src/text_format.hh b/src/text_format.hh index 0eae47e8..fd476716 100644 --- a/src/text_format.hh +++ b/src/text_format.hh @@ -42,6 +42,7 @@ enum class text_format_t { TF_UNKNOWN, + TF_BINARY, TF_C_LIKE, TF_JAVA, TF_JSON, @@ -64,6 +65,9 @@ struct formatter : formatter { string_view name = "unknown"; switch (tf) { case text_format_t::TF_UNKNOWN: + name = "text/plain"; + break; + case text_format_t::TF_BINARY: name = "application/octet-stream"; break; case text_format_t::TF_LOG: diff --git a/src/textfile_highlighters.cc b/src/textfile_highlighters.cc index fe01901b..8eca8dae 100644 --- a/src/textfile_highlighters.cc +++ b/src/textfile_highlighters.cc @@ -33,21 +33,11 @@ #include "config.h" -static std::shared_ptr -xpcre_compile(const char* pattern, int options = 0) +template +static std::shared_ptr +xpcre_compile(const T (&pattern)[N], int options = 0) { - auto compile_res = pcrepp::shared_from_str(pattern, options); - - if (compile_res.isErr()) { - auto ce = compile_res.unwrapErr(); - - fprintf(stderr, "internal error: failed to compile -- %s\n", pattern); - fprintf(stderr, "internal error: %s\n", ce.ce_msg); - - exit(1); - } - - return compile_res.unwrap(); + return lnav::pcre2pp::code::from_const(pattern, options).to_shared(); } void @@ -382,7 +372,7 @@ setup_highlights(highlight_map_t& hm) "\\bWITH\\b|" "\\bWITHOUT\\b" ")", - PCRE_CASELESS)) + PCRE2_CASELESS)) .with_nestable(false) .with_text_format(text_format_t::TF_SQL) .with_role(role_t::VCR_KEYWORD); diff --git a/src/textfile_sub_source.cc b/src/textfile_sub_source.cc index 44ac1d7d..c871e232 100644 --- a/src/textfile_sub_source.cc +++ b/src/textfile_sub_source.cc @@ -570,7 +570,9 @@ textfile_sub_source::rescan_files( continue; } - if (!retval && lf->is_indexing()) { + if (!retval && lf->is_indexing() + && lf->get_text_format() != text_format_t::TF_BINARY) + { auto ms_iter = this->tss_doc_metadata.find(lf->get_filename()); if (ms_iter != this->tss_doc_metadata.end()) { diff --git a/src/textview_curses.cc b/src/textview_curses.cc index 2d0dae8f..82658069 100644 --- a/src/textview_curses.cc +++ b/src/textview_curses.cc @@ -175,15 +175,15 @@ textview_curses::reload_config(error_reporter& reporter) continue; } - auto regex = pcrepp::shared_from_str(hl_pair.second.hc_regex); + auto regex = lnav::pcre2pp::code::from(hl_pair.second.hc_regex); if (regex.isErr()) { + const static intern_string_t PATTERN_SRC + = intern_string::lookup("pattern"); + auto ce = regex.unwrapErr(); reporter(&hl_pair.second.hc_regex, - lnav::console::user_message::error(fmt::format( - FMT_STRING("invalid highlight regex: {} at {}"), - ce.ce_msg, - ce.ce_offset))); + lnav::console::to_user_message(PATTERN_SRC, ce)); continue; } @@ -228,7 +228,7 @@ textview_curses::reload_config(error_reporter& reporter) attrs.ta_attrs |= A_UNDERLINE; } this->tc_highlights[{highlight_source_t::THEME, hl_pair.first}] - = highlighter(regex.unwrap()) + = highlighter(regex.unwrap().to_shared()) .with_attrs(attrs) .with_color(fg, bg) .with_nestable(false); @@ -557,7 +557,7 @@ void textview_curses::execute_search(const std::string& regex_orig) { std::string regex = regex_orig; - std::shared_ptr code; + std::shared_ptr code; if ((this->tc_search_child == nullptr) || (regex != this->tc_current_search)) @@ -571,27 +571,26 @@ textview_curses::execute_search(const std::string& regex_orig) if (regex.empty()) { } else { - auto compile_res - = pcrepp::shared_from_str(regex, PCRE_CASELESS | PCRE_UTF8); + auto compile_res = lnav::pcre2pp::code::from(regex, PCRE2_CASELESS); if (compile_res.isErr()) { auto ce = compile_res.unwrapErr(); - regex = pcrepp::quote(regex); + regex = lnav::pcre2pp::quote(regex); log_info("invalid search regex (%s), using quoted: %s", - ce.ce_msg, + ce.get_message().c_str(), regex.c_str()); auto compile_quote_res - = pcrepp::shared_from_str(regex, PCRE_CASELESS | PCRE_UTF8); + = lnav::pcre2pp::code::from(regex, PCRE2_CASELESS); if (compile_quote_res.isErr()) { log_error("Unable to compile quoted regex: %s", regex.c_str()); } else { - code = compile_quote_res.unwrap(); + code = compile_quote_res.unwrap().to_shared(); } } else { - code = compile_res.unwrap(); + code = compile_res.unwrap().to_shared(); } } @@ -604,7 +603,7 @@ textview_curses::execute_search(const std::string& regex_orig) hm[{highlight_source_t::PREVIEW, "search"}] = hl; auto gp = injector::get>>( - code->p_code, *this); + code, *this); gp->set_sink(this); auto top = this->get_top(); @@ -626,7 +625,7 @@ textview_curses::execute_search(const std::string& regex_orig) this->tc_sub_source->get_grepper() | [this, code](auto pair) { auto sgp = injector::get>>( - code->p_code, *pair.first); + code, *pair.first); sgp->set_sink(pair.second); sgp->queue_request(0_vl); @@ -661,15 +660,15 @@ textview_curses::horiz_shift(vis_line_t start, vis_line_t end, int off_start) this->listview_value_for_rows(*this, start, rows); const auto& str = rows[0].get_string(); - pcre_context_static<60> pc; - pcre_input pi(str); - while (hl_iter->second.h_regex->match(pc, pi)) { - if (pc.all()->c_begin < off_start) { - prev_hit = std::max(prev_hit, pc.all()->c_begin); - } else if (pc.all()->c_begin > off_start) { - next_hit = std::min(next_hit, pc.all()->c_begin); - } - } + hl_iter->second.h_regex->capture_from(str).for_each( + [&](lnav::pcre2pp::match_data& md) { + auto cap = md[0].value(); + if (cap.sf_begin < off_start) { + prev_hit = std::max(prev_hit, cap.sf_begin); + } else if (cap.sf_begin > off_start) { + next_hit = std::min(next_hit, cap.sf_begin); + } + }); } if (prev_hit == -1 && next_hit == INT_MAX) { @@ -1127,8 +1126,7 @@ logfile_filter_state::content_line_to_vis_line(uint32_t line) std::string text_anchors::to_anchor_string(const std::string& raw) { - static const pcrepp ANCHOR_RE(R"([^\w]+)"); + static const auto ANCHOR_RE = lnav::pcre2pp::code::from_const(R"([^\w]+)"); - return fmt::format(FMT_STRING("#{}"), - ANCHOR_RE.replace(tolower(raw).c_str(), "-")); + return fmt::format(FMT_STRING("#{}"), ANCHOR_RE.replace(tolower(raw), "-")); } diff --git a/src/time-extension-functions.cc b/src/time-extension-functions.cc index caa55f3d..b92ec2a2 100644 --- a/src/time-extension-functions.cc +++ b/src/time-extension-functions.cc @@ -50,14 +50,15 @@ timeslice(sqlite3_value* time_in, nonstd::optional slice_in_opt) std::string c_slice_str; relative_time c_rel_time; } cache; - const auto slice_in = string_fragment(slice_in_opt.value_or("15m")); + const auto slice_in + = string_fragment::from_c_str(slice_in_opt.value_or("15m")); if (slice_in.empty()) { throw sqlite_func_error("no time slice value given"); } if (slice_in != cache.c_slice_str.c_str()) { - auto parse_res = relative_time::from_str(slice_in.data()); + auto parse_res = relative_time::from_str(slice_in); if (parse_res.isErr()) { throw sqlite_func_error( "unable to parse time slice value: {} -- {}", @@ -145,22 +146,26 @@ timeslice(sqlite3_value* time_in, nonstd::optional slice_in_opt) } static nonstd::optional -sql_timediff(const char* time1, const char* time2) +sql_timediff(string_fragment time1, string_fragment time2) { struct timeval tv1, tv2, retval; date_time_scanner dts1, dts2; - auto parse_res1 = relative_time::from_str(time1, -1); + auto parse_res1 = relative_time::from_str(time1); if (parse_res1.isOk()) { tv1 = parse_res1.unwrap().adjust_now().to_timeval(); - } else if (!dts1.convert_to_timeval(time1, -1, nullptr, tv1)) { + } else if (!dts1.convert_to_timeval( + time1.data(), time1.length(), nullptr, tv1)) + { return nonstd::nullopt; } - auto parse_res2 = relative_time::from_str(time2, -1); + auto parse_res2 = relative_time::from_str(time2); if (parse_res2.isOk()) { tv2 = parse_res2.unwrap().adjust_now().to_timeval(); - } else if (!dts2.convert_to_timeval(time2, -1, nullptr, tv2)) { + } else if (!dts2.convert_to_timeval( + time2.data(), time2.length(), nullptr, tv2)) + { return nonstd::nullopt; } diff --git a/src/views_vtab.cc b/src/views_vtab.cc index cd652516..cf0a7cbe 100644 --- a/src/views_vtab.cc +++ b/src/views_vtab.cc @@ -97,10 +97,10 @@ struct from_sqlite { }; template<> -struct from_sqlite> { - inline std::shared_ptr operator()(int argc, - sqlite3_value** val, - int argi) +struct from_sqlite> { + inline std::shared_ptr operator()(int argc, + sqlite3_value** val, + int argi) { const char* pattern = (const char*) sqlite3_value_text(val[argi]); @@ -108,18 +108,18 @@ struct from_sqlite> { throw sqlite_func_error("Expecting a non-empty pattern value"); } - auto compile_res - = pcrepp::shared_from_str(pattern, PCRE_CASELESS | PCRE_UTF8); + auto compile_res = lnav::pcre2pp::code::from( + string_fragment::from_c_str(pattern), PCRE2_CASELESS); if (compile_res.isErr()) { auto ce = compile_res.unwrapErr(); throw sqlite_func_error( "Invalid regular expression for pattern: {} at offset {}", - ce.ce_msg, + ce.get_message().c_str(), ce.ce_offset); } - return compile_res.unwrap(); + return compile_res.unwrap().to_shared(); } }; @@ -683,8 +683,9 @@ CREATE TABLE lnav_view_filters ( std::shared_ptr tf; switch (lang.value_or(filter_lang_t::REGEX)) { case filter_lang_t::REGEX: { - auto pattern = from_sqlite>()( - 1, &pattern_str, 0); + auto pattern + = from_sqlite>()( + 1, &pattern_str, 0); auto pf = std::make_shared( type.value_or(text_filter::type_t::EXCLUDE), pattern->get_pattern(), @@ -874,8 +875,8 @@ CREATE TABLE lnav_view_filters ( tf->lf_deleted = true; tss->text_filters_changed(); - auto pattern - = from_sqlite>()(1, &pattern_val, 0); + auto pattern = from_sqlite>()( + 1, &pattern_val, 0); auto pf = std::make_shared( type, pattern->get_pattern(), tf->get_index(), pattern); auto conflict_mode = sqlite3_vtab_on_conflict(mod_vt->v_db); diff --git a/src/yajlpp/yajlpp.cc b/src/yajlpp/yajlpp.cc index cc9382c5..14a31b70 100644 --- a/src/yajlpp/yajlpp.cc +++ b/src/yajlpp/yajlpp.cc @@ -145,8 +145,10 @@ json_path_handler_base::json_path_handler_base(const std::string& property) : jph_property(property.back() == '#' ? property.substr(0, property.size() - 1) : property), - jph_regex( - std::make_shared(pcrepp::quote(property), PCRE_ANCHORED)), + jph_regex(lnav::pcre2pp::code::from(lnav::pcre2pp::quote(property), + PCRE2_ANCHORED) + .unwrap() + .to_shared()), jph_is_array(property.back() == '#') { memset(&this->jph_callbacks, 0, sizeof(this->jph_callbacks)); @@ -160,28 +162,20 @@ scrub_pattern(const std::string& pattern) return std::regex_replace(pattern, CAPTURE, "("); } -json_path_handler_base::json_path_handler_base(const pcrepp& property) - : jph_property(scrub_pattern(property.p_pattern)), - jph_regex(std::make_shared(property)), - jph_is_array(property.p_pattern.back() == '#'), - jph_is_pattern_property(true) -{ - memset(&this->jph_callbacks, 0, sizeof(this->jph_callbacks)); -} - -json_path_handler_base::json_path_handler_base(std::string property, - const pcrepp& property_re) - : jph_property(std::move(property)), - jph_regex(std::make_shared(property_re)), - jph_is_array(property_re.p_pattern.find('#') != std::string::npos) +json_path_handler_base::json_path_handler_base( + const std::shared_ptr& property) + : jph_property(scrub_pattern(property->get_pattern())), jph_regex(property), + jph_is_array(property->get_pattern().find('#') != std::string::npos), + jph_is_pattern_property(property->get_capture_count() > 0) { memset(&this->jph_callbacks, 0, sizeof(this->jph_callbacks)); } json_path_handler_base::json_path_handler_base( - std::string property, const std::shared_ptr& property_re) + std::string property, + const std::shared_ptr& property_re) : jph_property(std::move(property)), jph_regex(property_re), - jph_is_array(property_re->p_pattern.find('#') != std::string::npos) + jph_is_array(property_re->get_pattern().find('#') != std::string::npos) { memset(&this->jph_callbacks, 0, sizeof(this->jph_callbacks)); } @@ -191,14 +185,12 @@ json_path_handler_base::gen(yajlpp_gen_context& ygc, yajl_gen handle) const { if (this->jph_is_array) { auto size = this->jph_size_provider(ygc.ygc_obj_stack.top()); + auto md = lnav::pcre2pp::match_data::unitialized(); yajl_gen_string(handle, this->jph_property); yajl_gen_array_open(handle); for (size_t index = 0; index < size; index++) { - pcre_context_static<30> pc; - pcre_input pi(""); - - yajlpp_provider_context ypc{{pc, pi}, index}; + yajlpp_provider_context ypc{&md, index}; yajlpp_gen_context elem_ygc(handle, *this->jph_children); elem_ygc.ygc_depth = 1; elem_ygc.ygc_obj_stack.push( @@ -232,16 +224,17 @@ json_path_handler_base::gen(yajlpp_gen_context& ygc, yajl_gen handle) const ygc.ygc_depth += 1; if (this->jph_obj_provider) { - pcre_context_static<30> pc; - pcre_input pi(full_path); + auto md = this->jph_regex->create_match_data(); + auto find_res = this->jph_regex->capture_from(full_path) + .into(md) + .matches(); - this->jph_regex->match(pc, pi); ygc.ygc_obj_stack.push(this->jph_obj_provider( - {{pc, pi}, yajlpp_provider_context::nindex}, + {&md, yajlpp_provider_context::nindex}, ygc.ygc_obj_stack.top())); if (!ygc.ygc_default_stack.empty()) { ygc.ygc_default_stack.push(this->jph_obj_provider( - {{pc, pi}, yajlpp_provider_context::nindex}, + {&md, yajlpp_provider_context::nindex}, ygc.ygc_default_stack.top())); } } @@ -301,8 +294,9 @@ json_path_handler_base::gen_schema(yajlpp_gen_context& ygc) const schema.gen(this->jph_description); } if (this->jph_is_pattern_property) { - ygc.ygc_path.emplace_back(fmt::format( - FMT_STRING("<{}>"), this->jph_regex->name_for_capture(0))); + ygc.ygc_path.emplace_back( + fmt::format(FMT_STRING("<{}>"), + this->jph_regex->get_name_for_capture(1))); } else { ygc.ygc_path.emplace_back(this->jph_property); } @@ -312,7 +306,7 @@ json_path_handler_base::gen_schema(yajlpp_gen_context& ygc) const fmt::join(ygc.ygc_path, "/"))); schema.gen("type"); if (this->jph_is_array) { - if (this->jph_regex->p_pattern.find("#?") + if (this->jph_regex->get_pattern().find("#?") == std::string::npos) { schema.gen("array"); @@ -349,7 +343,7 @@ json_path_handler_base::gen_schema(yajlpp_gen_context& ygc) const if (this->jph_is_pattern_property) { ygc.ygc_path.emplace_back(fmt::format( - FMT_STRING("<{}>"), this->jph_regex->name_for_capture(0))); + FMT_STRING("<{}>"), this->jph_regex->get_name_for_capture(1))); } else { ygc.ygc_path.emplace_back(this->jph_property); } @@ -365,7 +359,8 @@ json_path_handler_base::gen_schema(yajlpp_gen_context& ygc) const schema.gen("type"); if (this->jph_is_array) { - if (this->jph_regex->p_pattern.find("#?") == std::string::npos) { + if (this->jph_regex->get_pattern().find("#?") == std::string::npos) + { schema.gen("array"); } else { yajlpp_array type_array(ygc.ygc_handle); @@ -493,7 +488,7 @@ json_path_handler_base::walk( if (this->jph_children) { for (const auto& lpath : local_paths) { for (const auto& jph : this->jph_children->jpc_children) { - static const auto POSS_SRC + static const intern_string_t POSS_SRC = intern_string::lookup("possibilities"); std::string full_path = base + lpath; @@ -509,16 +504,18 @@ json_path_handler_base::walk( ypc.set_path(full_path).with_obj(root).update_callbacks(); if (this->jph_obj_provider) { + auto md = this->jph_regex->create_match_data(); std::string full_path = lpath + "/"; - pcre_input pi(full_path); - if (!this->jph_regex->match(ypc.ypc_pcre_context, pi)) { + if (!this->jph_regex->capture_from(full_path) + .into(md) + .matches() + .ignore_error()) + { ensure(false); } child_root = this->jph_obj_provider( - {{ypc.ypc_pcre_context, pi}, - yajlpp_provider_context::nindex}, - root); + {&md, yajlpp_provider_context::nindex}, root); } jph.walk(cb, child_root, full_path); @@ -683,8 +680,6 @@ yajlpp_parse_context::update_callbacks(const json_path_container* orig_handlers, } this->ypc_sibling_handlers = orig_handlers; - pcre_input pi(&this->ypc_path[0], 0, this->ypc_path.size() - 1); - this->ypc_callbacks = DEFAULT_CALLBACKS; if (handlers == nullptr) { @@ -709,12 +704,16 @@ yajlpp_parse_context::update_callbacks(const json_path_container* orig_handlers, } } + auto path_frag = string_fragment::from_byte_range( + this->ypc_path.data(), 1 + child_start, this->ypc_path.size() - 1); for (const auto& jph : handlers->jpc_children) { - pi.reset(&this->ypc_path[1 + child_start], - 0, - this->ypc_path.size() - 2 - child_start); - if (jph.jph_regex->match(this->ypc_pcre_context, pi)) { - pcre_context::capture_t* cap = this->ypc_pcre_context.all(); + auto md = jph.jph_regex->create_match_data(); + if (jph.jph_regex->capture_from(path_frag) + .into(md) + .matches() + .ignore_error()) + { + auto cap = md[0].value(); if (jph.jph_is_array) { this->ypc_array_handler_count += 1; @@ -724,31 +723,24 @@ yajlpp_parse_context::update_callbacks(const json_path_container* orig_handlers, ? static_cast(-1) : this->ypc_array_index[this->ypc_array_handler_count - 1]; - if ((1 + child_start + cap->c_end - != (int) this->ypc_path.size() - 1) + if ((cap.sf_end != (int) this->ypc_path.size() - 1) && (!jph.is_array() || index != yajlpp_provider_context::nindex)) { this->ypc_obj_stack.push(jph.jph_obj_provider( - {{this->ypc_pcre_context, pi}, index, this}, - this->ypc_obj_stack.top())); + {&md, index, this}, this->ypc_obj_stack.top())); } } if (jph.jph_children) { this->ypc_handler_stack.emplace_back(&jph); - if (1 + child_start + cap->c_end - != (int) this->ypc_path.size() - 1) - { - this->update_callbacks(jph.jph_children, - 1 + child_start + cap->c_end); + if (cap.sf_end != (int) this->ypc_path.size() - 1) { + this->update_callbacks(jph.jph_children, cap.sf_end); return; } } else { - if (1 + child_start + cap->c_end - != (int) this->ypc_path.size() - 1) - { + if (cap.sf_end != (int) this->ypc_path.size() - 1) { continue; } @@ -953,13 +945,16 @@ yajlpp_parse_context::handle_unused_or_delete(void* ctx) if (!ypc->ypc_handler_stack.empty() && ypc->ypc_handler_stack.back()->jph_obj_deleter) { - pcre_context_static<30> pc; + auto& jph = ypc->ypc_handler_stack.back(); + auto md = jph->jph_regex->create_match_data(); auto key_start = ypc->ypc_path_index_stack.back(); - pcre_input pi(&ypc->ypc_path[key_start + 1], - 0, - ypc->ypc_path.size() - key_start - 2); - yajlpp_provider_context provider_ctx{{pc, pi}, static_cast(-1)}; - ypc->ypc_handler_stack.back()->jph_regex->match(pc, pi); + auto path_frag = string_fragment::from_byte_range( + ypc->ypc_path.data(), key_start + 1, ypc->ypc_path.size() - 1); + yajlpp_provider_context provider_ctx{&md, static_cast(-1)}; + ypc->ypc_handler_stack.back() + ->jph_regex->capture_from(path_frag) + .into(md) + .matches(); ypc->ypc_handler_stack.back()->jph_obj_deleter( provider_ctx, ypc->ypc_obj_stack.top()); @@ -1426,35 +1421,11 @@ json_path_handler_base::report_enum_error(yajlpp_parse_context* ypc, } void -json_path_handler_base::report_regex_value_error( - yajlpp_parse_context* ypc, - const std::string& value, - const pcrepp::compile_error& pcre_error) const +json_path_handler_base::report_error(yajlpp_parse_context* ypc, + const std::string& value, + lnav::console::user_message um) const { - attr_line_t pcre_error_content{value}; - - lnav::snippets::regex_highlighter(pcre_error_content, - pcre_error_content.length(), - line_range{ - 0, - (int) pcre_error_content.length(), - }); - pcre_error_content.append("\n") - .append(pcre_error.ce_offset, ' ') - .append(lnav::roles::error("^ ")) - .append(lnav::roles::error(pcre_error.ce_msg)) - .with_attr_for_all(VC_ROLE.value(role_t::VCR_QUOTED_CODE)); - ypc->report_error(lnav::console::user_message::error( - attr_line_t() - .append_quoted(value) - .append(" is not a valid regular expression for " - "property ") - .append_quoted(lnav::roles::symbol( - ypc->get_full_path().to_string()))) - .with_reason(pcre_error.ce_msg) - .with_snippet(ypc->get_snippet()) - .with_snippet(lnav::console::snippet::from( - ypc->get_full_path(), pcre_error_content)) + ypc->report_error(um.with_snippet(ypc->get_snippet()) .with_help(this->get_help_text(ypc))); } diff --git a/src/yajlpp/yajlpp.hh b/src/yajlpp/yajlpp.hh index c202b3ee..a6aa133b 100644 --- a/src/yajlpp/yajlpp.hh +++ b/src/yajlpp/yajlpp.hh @@ -48,10 +48,11 @@ #include "base/file_range.hh" #include "base/intern_string.hh" #include "base/lnav.console.hh" +#include "base/lnav.console.into.hh" #include "base/lnav_log.hh" #include "json_ptr.hh" #include "optional.hpp" -#include "pcrepp/pcrepp.hh" +#include "pcrepp/pcre2pp.hh" #include "relative_time.hh" #include "yajl/api/yajl_gen.h" #include "yajl/api/yajl_parse.h" @@ -88,38 +89,70 @@ struct positioned_property { } }; +template +struct factory_container { + template + struct with_default_args { + template + static Result from( + intern_string_t src, Args... args) + { + auto from_res = T::from(args..., DefaultArgs...); + + if (from_res.isOk()) { + return Ok(with_default_args{from_res.unwrap().to_shared()}); + } + + return Err( + lnav::console::to_user_message(src, from_res.unwrapErr())); + } + + std::shared_ptr value; + }; + + template + static Result from( + intern_string_t src, Args... args) + { + auto from_res = T::from(args...); + + if (from_res.isOk()) { + return Ok(factory_container{from_res.unwrap().to_shared()}); + } + + return Err( + lnav::console::to_user_message(src, from_res.unwrapErr())); + } + + std::shared_ptr value; +}; + class yajlpp_gen_context; class yajlpp_parse_context; struct yajlpp_provider_context { - pcre_extractor ypc_extractor; + lnav::pcre2pp::match_data* ypc_extractor; size_t ypc_index{0}; yajlpp_parse_context* ypc_parse_context; static constexpr size_t nindex = static_cast(-1); template - intern_string_t get_substr_i(T name) const + intern_string_t get_substr_i(T&& name) const { - pcre_context::iterator cap = this->ypc_extractor.pe_context[name]; - char path[cap->length() + 1]; - size_t len = json_ptr::decode( - path, - this->ypc_extractor.pe_input.get_substr_start(cap), - cap->length()); + auto cap = (*this->ypc_extractor)[std::forward(name)].value(); + char path[cap.length() + 1]; + size_t len = json_ptr::decode(path, cap.data(), cap.length()); return intern_string::lookup(path, len); } template - std::string get_substr(T name) const + std::string get_substr(T&& name) const { - pcre_context::iterator cap = this->ypc_extractor.pe_context[name]; - char path[cap->length() + 1]; - size_t len = json_ptr::decode( - path, - this->ypc_extractor.pe_input.get_substr_start(cap), - cap->length()); + auto cap = (*this->ypc_extractor)[std::forward(name)].value(); + char path[cap.length() + 1]; + size_t len = json_ptr::decode(path, cap.data(), cap.length()); return {path, len}; } @@ -158,14 +191,14 @@ struct json_path_handler_base { static const enum_value_t ENUM_TERMINATOR; - json_path_handler_base(const std::string& property); - - explicit json_path_handler_base(const pcrepp& property); + explicit json_path_handler_base(const std::string& property); - json_path_handler_base(std::string property, const pcrepp& property_re); + explicit json_path_handler_base( + const std::shared_ptr& property_re); - json_path_handler_base(std::string property, - const std::shared_ptr& property_re); + json_path_handler_base( + std::string property, + const std::shared_ptr& property_re); bool is_array() const { return this->jph_is_array; } @@ -194,7 +227,7 @@ struct json_path_handler_base { std::vector get_types() const; std::string jph_property; - std::shared_ptr jph_regex; + std::shared_ptr jph_regex; yajl_callbacks jph_callbacks{}; std::function @@ -214,7 +247,7 @@ struct json_path_handler_base { const char* jph_synopsis{""}; const char* jph_description{""}; const json_path_container* jph_children{nullptr}; - std::shared_ptr jph_pattern; + std::shared_ptr jph_pattern; const char* jph_pattern_re{nullptr}; std::function jph_string_validator; size_t jph_min_length{0}; @@ -241,9 +274,9 @@ struct json_path_handler_base { const relative_time::parse_error& pe) const; void report_enum_error(yajlpp_parse_context* ypc, const std::string& value_str) const; - void report_regex_value_error(yajlpp_parse_context* ypc, - const std::string& value_str, - const pcrepp::compile_error& ce) const; + void report_error(yajlpp_parse_context* ypc, + const std::string& value_str, + lnav::console::user_message um) const; attr_line_t get_help_text(const std::string& full_path) const; attr_line_t get_help_text(yajlpp_parse_context* ypc) const; @@ -410,7 +443,6 @@ public: std::vector ypc_array_index; std::vector ypc_handler_stack; size_t ypc_array_handler_count{0}; - pcre_context_static<30> ypc_pcre_context; bool ypc_ignore_unused{false}; const struct json_path_container* ypc_sibling_handlers{nullptr}; const struct json_path_handler_base* ypc_current_handler{nullptr}; diff --git a/src/yajlpp/yajlpp_def.hh b/src/yajlpp/yajlpp_def.hh index 2f1aedc3..ea9dfb5a 100644 --- a/src/yajlpp/yajlpp_def.hh +++ b/src/yajlpp/yajlpp_def.hh @@ -104,6 +104,11 @@ struct json_path_handler : public json_path_handler_base { this->jph_callbacks.yajl_double = (int (*)(void*, double)) double_func; } + template + json_path_handler(P path) : json_path_handler_base(path) + { + } + template json_path_handler(P path, int (*str_func)(yajlpp_parse_context*, @@ -115,18 +120,8 @@ struct json_path_handler : public json_path_handler_base { = (int (*)(void*, const unsigned char*, size_t)) str_func; } - template - json_path_handler(P path) : json_path_handler_base(path) - { - } - - json_path_handler(const std::string& path, const pcrepp& re) - : json_path_handler_base(path, re) - { - } - json_path_handler(const std::string& path, - const std::shared_ptr& re) + const std::shared_ptr& re) : json_path_handler_base(path, re) { } @@ -194,10 +189,11 @@ struct json_path_handler : public json_path_handler_base { return *this; } - json_path_handler& with_pattern(const char* re) + template + json_path_handler& with_pattern(const T (&re)[N]) { this->jph_pattern_re = re; - this->jph_pattern = std::make_shared(re); + this->jph_pattern = lnav::pcre2pp::code::from_const(re).to_shared(); return *this; } @@ -341,10 +337,8 @@ struct json_path_handler : public json_path_handler_base { if (jph.jph_pattern) { auto sf = to_string_fragment(field_ptr); - pcre_input pi(sf); - pcre_context_static<30> pc; - if (!jph.jph_pattern->match(pc, pi)) { + if (!jph.jph_pattern->find_in(sf).ignore_error()) { jph.report_pattern_error(&ypc, sf.to_string()); } } @@ -833,10 +827,7 @@ struct json_path_handler : public json_path_handler_base { auto jph = ypc->ypc_current_handler; if (jph->jph_pattern) { - pcre_input pi(value_str); - pcre_context_static<30> pc; - - if (!jph->jph_pattern->match(pc, pi)) { + if (!jph->jph_pattern->find_in(value_str).ignore_error()) { jph->report_pattern_error(ypc, value_str); } } @@ -891,10 +882,7 @@ struct json_path_handler : public json_path_handler_base { auto jph = ypc->ypc_current_handler; if (jph->jph_pattern) { - pcre_input pi(value_str); - pcre_context_static<30> pc; - - if (!jph->jph_pattern->match(pc, pi)) { + if (!jph->jph_pattern->find_in(value_str).ignore_error()) { jph->report_pattern_error(ypc, value_str); } } @@ -953,10 +941,7 @@ struct json_path_handler : public json_path_handler_base { auto jph = ypc->ypc_current_handler; if (jph->jph_pattern) { - pcre_input pi(value_str); - pcre_context_static<30> pc; - - if (!jph->jph_pattern->match(pc, pi)) { + if (!jph->jph_pattern->find_in(value_str).ignore_error()) { jph->report_pattern_error(ypc, value_str); } } @@ -1010,10 +995,7 @@ struct json_path_handler : public json_path_handler_base { auto jph = ypc->ypc_current_handler; if (jph->jph_pattern) { - pcre_input pi(value_str); - pcre_context_static<30> pc; - - if (!jph->jph_pattern->match(pc, pi)) { + if (!jph->jph_pattern->find_in(value_str).ignore_error()) { jph->report_pattern_error(ypc, value_str); } } @@ -1065,10 +1047,7 @@ struct json_path_handler : public json_path_handler_base { auto jph = ypc->ypc_current_handler; if (jph->jph_pattern) { - pcre_input pi(value_str); - pcre_context_static<30> pc; - - if (!jph->jph_pattern->match(pc, pi)) { + if (!jph->jph_pattern->find_in(value_str).ignore_error()) { jph->report_pattern_error(ypc, value_str); } } @@ -1107,55 +1086,35 @@ struct json_path_handler : public json_path_handler_base { return *this; } - template - json_path_handler& for_field(Args... args, std::shared_ptr C::*ptr_arg) + template + struct int_ { + typedef int type; + }; + template::type = 0, + typename... Args> + json_path_handler& for_field(Args... args, T C::*ptr_arg) { this->add_cb(str_field_cb2); this->jph_str_cb = [args..., ptr_arg](yajlpp_parse_context* ypc, const unsigned char* str, size_t len) { - auto obj = ypc->ypc_obj_stack.top(); - auto value_str = std::string((const char*) str, len); - auto jph = ypc->ypc_current_handler; + auto* obj = ypc->ypc_obj_stack.top(); + auto value_frag = string_fragment::from_bytes(str, len); + const auto* jph = ypc->ypc_current_handler; - try { - auto re = std::make_shared(value_str); + auto from_res = T::from(ypc->get_full_path(), value_frag); + if (from_res.isErr()) { + jph->report_error( + ypc, value_frag.to_string(), from_res.unwrapErr()); + } else { json_path_handler::get_field(obj, args..., ptr_arg) - = std::move(re); - } catch (const pcrepp::error& e) { - pcrepp::compile_error ce; - - ce.ce_msg = e.what(); - ce.ce_offset = e.e_offset; - jph->report_regex_value_error(ypc, value_str, ce); + = from_res.unwrap(); } return 1; }; - this->jph_gen_callback - = [args..., ptr_arg](yajlpp_gen_context& ygc, - const json_path_handler_base& jph, - yajl_gen handle) { - const auto& field = json_path_handler::get_field( - ygc.ygc_obj_stack.top(), args..., ptr_arg); - - if (!ygc.ygc_default_stack.empty()) { - const auto& field_def = json_path_handler::get_field( - ygc.ygc_default_stack.top(), args..., ptr_arg); - - if (field == field_def) { - return yajl_gen_status_ok; - } - } - - if (ygc.ygc_depth) { - yajl_gen_string(handle, jph.jph_property); - } - - yajlpp_generator gen(handle); - - return gen(field->get_pattern()); - }; return *this; } @@ -1225,7 +1184,8 @@ struct json_path_handler : public json_path_handler_base { size_t len) { auto obj = ypc->ypc_obj_stack.top(); auto handler = ypc->ypc_current_handler; - auto parse_res = relative_time::from_str((const char*) str, len); + auto parse_res = relative_time::from_str( + string_fragment::from_bytes(str, len)); if (parse_res.isErr()) { auto parse_error = parse_res.unwrapErr(); @@ -1495,10 +1455,11 @@ property_handler(const std::string& path) return {path}; } +template inline json_path_handler -pattern_property_handler(const std::string& path) +pattern_property_handler(const T (&path)[N]) { - return {pcrepp(path)}; + return {lnav::pcre2pp::code::from_const(path).to_shared()}; } } // namespace yajlpp diff --git a/test/drive_data_scanner.cc b/test/drive_data_scanner.cc index 63f40404..79113a92 100644 --- a/test/drive_data_scanner.cc +++ b/test/drive_data_scanner.cc @@ -162,12 +162,14 @@ main(int argc, char* argv[]) scan_batch_context sbc{allocator}; for (iter = root_formats.begin(); iter != root_formats.end() && !found; - ++iter) { + ++iter) + { line_info li = {{13}}; (*iter)->clear(); if ((*iter)->scan(*lf, index, li, sbr, sbc) - == log_format::SCAN_MATCH) { + == log_format::SCAN_MATCH) + { format = (*iter)->specialized(); found = true; } @@ -188,7 +190,7 @@ main(int argc, char* argv[]) data_parser::TRACE_FILE = fopen("scanned.dpt", "w"); - data_scanner ds(sub_line, body.lr_start, sub_line.length()); + data_scanner ds(sub_line, body.lr_start); data_parser dp(&ds); std::string msg_format; @@ -200,8 +202,7 @@ main(int argc, char* argv[]) fprintf(out, "format :%s\n", msg_format.c_str()); if (pretty_print) { - data_scanner ds2( - sub_line, body.lr_start, sub_line.length()); + data_scanner ds2(sub_line, body.lr_start); pretty_printer pp(&ds2, sa); attr_line_t pretty_out; diff --git a/test/drive_grep_proc.cc b/test/drive_grep_proc.cc index a0d743f8..1ed82466 100644 --- a/test/drive_grep_proc.cc +++ b/test/drive_grep_proc.cc @@ -44,10 +44,7 @@ using namespace std; class my_source : public grep_proc_source { public: - my_source(auto_fd& fd) - { - this->ms_buffer.set_fd(fd); - }; + my_source(auto_fd& fd) { this->ms_buffer.set_fd(fd); }; bool grep_value_for_line(vis_line_t line_number, string& value_out) { @@ -117,7 +114,6 @@ main(int argc, char* argv[]) int retval = EXIT_SUCCESS; const char* errptr; auto_fd fd; - pcre* code; int eoff; if (argc < 3) { @@ -126,29 +122,35 @@ main(int argc, char* argv[]) } else if ((fd = open(argv[2], O_RDONLY)) == -1) { perror("open"); retval = EXIT_FAILURE; - } else if ((code - = pcre_compile(argv[1], PCRE_CASELESS, &errptr, &eoff, NULL)) - == NULL) - { - fprintf(stderr, "error: invalid pattern -- %s\n", errptr); } else { - auto psuperv = std::make_shared(); - my_source ms(fd); - my_sink msink; + auto compile_res = lnav::pcre2pp::code::from( + string_fragment::from_c_str(argv[1]), PCRE2_CASELESS); - grep_proc gp(code, ms, psuperv); + if (compile_res.isErr()) { + auto ce = compile_res.unwrapErr(); + fprintf(stderr, + "error: invalid pattern -- %s\n", + ce.get_message().c_str()); + } else { + auto co = compile_res.unwrap().to_shared(); + auto psuperv = std::make_shared(); + my_source ms(fd); + my_sink msink; + + grep_proc gp(co, ms, psuperv); - gp.set_sink(&msink); - gp.queue_request(); - gp.start(); + gp.set_sink(&msink); + gp.queue_request(); + gp.start(); - while (!msink.ms_finished) { - vector pollfds; + while (!msink.ms_finished) { + vector pollfds; - psuperv->update_poll_set(pollfds); - poll(&pollfds[0], pollfds.size(), -1); + psuperv->update_poll_set(pollfds); + poll(&pollfds[0], pollfds.size(), -1); - psuperv->check_poll_set(pollfds); + psuperv->check_poll_set(pollfds); + } } } diff --git a/test/drive_sequencer.cc b/test/drive_sequencer.cc index 2c9494fb..4e39d30f 100644 --- a/test/drive_sequencer.cc +++ b/test/drive_sequencer.cc @@ -43,7 +43,6 @@ #include "config.h" #include "logfile.hh" -#include "pcrepp/pcrepp.hh" #include "sequence_matcher.hh" #include "sequence_sink.hh" #include "textview_curses.hh" @@ -52,10 +51,7 @@ using namespace std; class my_source : public grep_proc_source { public: - my_source(auto_fd& fd) : ms_offset(0) - { - this->ms_buffer.set_fd(fd); - }; + my_source(auto_fd& fd) : ms_offset(0) { this->ms_buffer.set_fd(fd); }; bool grep_value_for_line(vis_line_t line_number, string& value_out) { diff --git a/test/drive_shlexer.cc b/test/drive_shlexer.cc index ed3dfb7d..fe6f15fc 100644 --- a/test/drive_shlexer.cc +++ b/test/drive_shlexer.cc @@ -56,7 +56,7 @@ main(int argc, char* argv[]) } shlex lexer(argv[1], strlen(argv[1])); - pcre_context::capture_t cap; + string_fragment cap; shlex_token_t token; printf(" %s\n", argv[1]); @@ -64,12 +64,12 @@ main(int argc, char* argv[]) int lpc; printf("%s ", ST_TOKEN_NAMES[(int) token]); - for (lpc = 0; lpc < cap.c_end; lpc++) { - if (lpc == cap.c_begin) { + for (lpc = 0; lpc < cap.sf_end; lpc++) { + if (lpc == cap.sf_begin) { fputc('^', stdout); - } else if (lpc == (cap.c_end - 1)) { + } else if (lpc == (cap.sf_end - 1)) { fputc('^', stdout); - } else if (lpc > cap.c_begin) { + } else if (lpc > cap.sf_begin) { fputc('-', stdout); } else { fputc(' ', stdout); diff --git a/test/expected/expected.am b/test/expected/expected.am index 1e3cdfc8..3a7f53dc 100644 --- a/test/expected/expected.am +++ b/test/expected/expected.am @@ -132,8 +132,6 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_cmds.sh_968dac54dc80d91a5da2322890c6c26dfa0d8462.out \ $(srcdir)/%reldir%/test_cmds.sh_a00943ef715598c7554b85de8502454e41bb9e28.err \ $(srcdir)/%reldir%/test_cmds.sh_a00943ef715598c7554b85de8502454e41bb9e28.out \ - $(srcdir)/%reldir%/test_cmds.sh_a0e6214b2a85c90d31aee12efde850441cca7eb3.err \ - $(srcdir)/%reldir%/test_cmds.sh_a0e6214b2a85c90d31aee12efde850441cca7eb3.out \ $(srcdir)/%reldir%/test_cmds.sh_a1123427c31c022433d66d05ee5d5e1c8ab415e4.err \ $(srcdir)/%reldir%/test_cmds.sh_a1123427c31c022433d66d05ee5d5e1c8ab415e4.out \ $(srcdir)/%reldir%/test_cmds.sh_a190bfc279fa046a823864f1484f899d27d22953.err \ @@ -348,58 +346,6 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_pretty_print.sh_cd361eeca7e91bfab942b75d6c3422c7a456a111.out \ $(srcdir)/%reldir%/test_pretty_print.sh_f8feb52a321026d9562b271eb37a2c56dfaed329.err \ $(srcdir)/%reldir%/test_pretty_print.sh_f8feb52a321026d9562b271eb37a2c56dfaed329.out \ - $(srcdir)/%reldir%/test_regex101.sh_0fa3663a45aca6a328cb728872af7ed7ee896f1c.err \ - $(srcdir)/%reldir%/test_regex101.sh_0fa3663a45aca6a328cb728872af7ed7ee896f1c.out \ - $(srcdir)/%reldir%/test_regex101.sh_182ae9244db314a953af2bee969726e381bc5a32.err \ - $(srcdir)/%reldir%/test_regex101.sh_182ae9244db314a953af2bee969726e381bc5a32.out \ - $(srcdir)/%reldir%/test_regex101.sh_2158f1f011ba8e1b152396c072790c076fdb8ce8.err \ - $(srcdir)/%reldir%/test_regex101.sh_2158f1f011ba8e1b152396c072790c076fdb8ce8.out \ - $(srcdir)/%reldir%/test_regex101.sh_281af24141680330791db7f7c5fa70833ce08a6b.err \ - $(srcdir)/%reldir%/test_regex101.sh_281af24141680330791db7f7c5fa70833ce08a6b.out \ - $(srcdir)/%reldir%/test_regex101.sh_35703b13990785632cca82123fb3883797959c0b.err \ - $(srcdir)/%reldir%/test_regex101.sh_35703b13990785632cca82123fb3883797959c0b.out \ - $(srcdir)/%reldir%/test_regex101.sh_366730cac50b4a09b7de4b84641791470b1cb9a3.err \ - $(srcdir)/%reldir%/test_regex101.sh_366730cac50b4a09b7de4b84641791470b1cb9a3.out \ - $(srcdir)/%reldir%/test_regex101.sh_3d18474a3e472fff6e23e0c41337ec9188fee591.err \ - $(srcdir)/%reldir%/test_regex101.sh_3d18474a3e472fff6e23e0c41337ec9188fee591.out \ - $(srcdir)/%reldir%/test_regex101.sh_442cc58676590a3604d5c2183f5fe0a75c98351a.err \ - $(srcdir)/%reldir%/test_regex101.sh_442cc58676590a3604d5c2183f5fe0a75c98351a.out \ - $(srcdir)/%reldir%/test_regex101.sh_566fd88d216a44bc1c6e23f2d6f2d0caf99d42f9.err \ - $(srcdir)/%reldir%/test_regex101.sh_566fd88d216a44bc1c6e23f2d6f2d0caf99d42f9.out \ - $(srcdir)/%reldir%/test_regex101.sh_5f2f7ecb6ab9cbec4b41385b91bd038906b8a7b2.err \ - $(srcdir)/%reldir%/test_regex101.sh_5f2f7ecb6ab9cbec4b41385b91bd038906b8a7b2.out \ - $(srcdir)/%reldir%/test_regex101.sh_629bde30483e0a6461076e9058f3a5eb81ae0425.err \ - $(srcdir)/%reldir%/test_regex101.sh_629bde30483e0a6461076e9058f3a5eb81ae0425.out \ - $(srcdir)/%reldir%/test_regex101.sh_630db454054cf92ec9bd0f4e3e83300047f583ff.err \ - $(srcdir)/%reldir%/test_regex101.sh_630db454054cf92ec9bd0f4e3e83300047f583ff.out \ - $(srcdir)/%reldir%/test_regex101.sh_771af6f3d29b8350542d5c6e98bdbf4c223cd531.err \ - $(srcdir)/%reldir%/test_regex101.sh_771af6f3d29b8350542d5c6e98bdbf4c223cd531.out \ - $(srcdir)/%reldir%/test_regex101.sh_7991a5b617867cf37c9f7baa85ffa425f7d455a2.err \ - $(srcdir)/%reldir%/test_regex101.sh_7991a5b617867cf37c9f7baa85ffa425f7d455a2.out \ - $(srcdir)/%reldir%/test_regex101.sh_79ee3f5fe71ccec97b2619d8c1f74ca97ffd2243.err \ - $(srcdir)/%reldir%/test_regex101.sh_79ee3f5fe71ccec97b2619d8c1f74ca97ffd2243.out \ - $(srcdir)/%reldir%/test_regex101.sh_7de76c174c58d67bf93e8f01d6d55ebb6a023f10.err \ - $(srcdir)/%reldir%/test_regex101.sh_7de76c174c58d67bf93e8f01d6d55ebb6a023f10.out \ - $(srcdir)/%reldir%/test_regex101.sh_8a43e6657d4f60e68d31eb8302542ca28e80d077.err \ - $(srcdir)/%reldir%/test_regex101.sh_8a43e6657d4f60e68d31eb8302542ca28e80d077.out \ - $(srcdir)/%reldir%/test_regex101.sh_8e93a3b6b941847c71409a297779fbb0a6666a51.err \ - $(srcdir)/%reldir%/test_regex101.sh_8e93a3b6b941847c71409a297779fbb0a6666a51.out \ - $(srcdir)/%reldir%/test_regex101.sh_95c56a9d146ec9a7c2196559d316f928b2ae6ae9.err \ - $(srcdir)/%reldir%/test_regex101.sh_95c56a9d146ec9a7c2196559d316f928b2ae6ae9.out \ - $(srcdir)/%reldir%/test_regex101.sh_9d101ee29c45cdb8c0f117ad736c9a5dd5da5839.err \ - $(srcdir)/%reldir%/test_regex101.sh_9d101ee29c45cdb8c0f117ad736c9a5dd5da5839.out \ - $(srcdir)/%reldir%/test_regex101.sh_c43e07df9b3068696fdc8759c7561135db981b38.err \ - $(srcdir)/%reldir%/test_regex101.sh_c43e07df9b3068696fdc8759c7561135db981b38.out \ - $(srcdir)/%reldir%/test_regex101.sh_cbd859487e4ea011cd6e0f0f114d70158bfd8b43.err \ - $(srcdir)/%reldir%/test_regex101.sh_cbd859487e4ea011cd6e0f0f114d70158bfd8b43.out \ - $(srcdir)/%reldir%/test_regex101.sh_cf6c0a9f0f04e24ce1fae7a0a434830b14447f83.err \ - $(srcdir)/%reldir%/test_regex101.sh_cf6c0a9f0f04e24ce1fae7a0a434830b14447f83.out \ - $(srcdir)/%reldir%/test_regex101.sh_d84597760285c3964b258726341e018f6cd49954.err \ - $(srcdir)/%reldir%/test_regex101.sh_d84597760285c3964b258726341e018f6cd49954.out \ - $(srcdir)/%reldir%/test_regex101.sh_f23e393dbf23d0d8e276e9b7610c7b74d79980f8.err \ - $(srcdir)/%reldir%/test_regex101.sh_f23e393dbf23d0d8e276e9b7610c7b74d79980f8.out \ - $(srcdir)/%reldir%/test_regex101.sh_fc41b6ee90cbf038620151f16d164b361acf82dd.err \ - $(srcdir)/%reldir%/test_regex101.sh_fc41b6ee90cbf038620151f16d164b361acf82dd.out \ $(srcdir)/%reldir%/test_sessions.sh_0300a1391c33b1c45ddfa90198a6bd0a5404a77f.err \ $(srcdir)/%reldir%/test_sessions.sh_0300a1391c33b1c45ddfa90198a6bd0a5404a77f.out \ $(srcdir)/%reldir%/test_sessions.sh_17b85654b929b2a8fc1705a170ced544783292fa.err \ @@ -762,6 +708,8 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_sql_json_func.sh_f34205b59e04f261897ad89f659595c743a18ca9.out \ $(srcdir)/%reldir%/test_sql_json_func.sh_f34f5dfa938a1ac7721f924beb16bbceec127a1b.err \ $(srcdir)/%reldir%/test_sql_json_func.sh_f34f5dfa938a1ac7721f924beb16bbceec127a1b.out \ + $(srcdir)/%reldir%/test_sql_regexp.sh_03257c56e85558aa0cc925b68d3af962afc25125.err \ + $(srcdir)/%reldir%/test_sql_regexp.sh_03257c56e85558aa0cc925b68d3af962afc25125.out \ $(srcdir)/%reldir%/test_sql_regexp.sh_51293df041b6969ccecc60204dce3676d0fb006d.err \ $(srcdir)/%reldir%/test_sql_regexp.sh_51293df041b6969ccecc60204dce3676d0fb006d.out \ $(srcdir)/%reldir%/test_sql_regexp.sh_b841a0c09601e2419eeb99e85f7e286c889e4801.err \ @@ -770,6 +718,8 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_sql_regexp.sh_bbd1128cf61a9af8f9dc937b46217443f42e1a7a.out \ $(srcdir)/%reldir%/test_sql_regexp.sh_d42e1fcfe6d42394f79da84be2d37e62c4c0ea63.err \ $(srcdir)/%reldir%/test_sql_regexp.sh_d42e1fcfe6d42394f79da84be2d37e62c4c0ea63.out \ + $(srcdir)/%reldir%/test_sql_regexp.sh_d61af17ff19d640ddfc879460910991825eedd05.err \ + $(srcdir)/%reldir%/test_sql_regexp.sh_d61af17ff19d640ddfc879460910991825eedd05.out \ $(srcdir)/%reldir%/test_sql_regexp.sh_ed6e9f13f178def009ee58c2aeea8c3c70fdb580.err \ $(srcdir)/%reldir%/test_sql_regexp.sh_ed6e9f13f178def009ee58c2aeea8c3c70fdb580.out \ $(srcdir)/%reldir%/test_sql_search_table.sh_1a0d872ebc492fcecb2e79a0993170d5fc771a5b.err \ @@ -850,6 +800,8 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_sql_str_func.sh_8cef54f0617960320b5d3615068eb27333dcf6a3.out \ $(srcdir)/%reldir%/test_sql_str_func.sh_8f4f0ed74c4dc6b821e02a44552b694614cd9353.err \ $(srcdir)/%reldir%/test_sql_str_func.sh_8f4f0ed74c4dc6b821e02a44552b694614cd9353.out \ + $(srcdir)/%reldir%/test_sql_str_func.sh_949ffd5b2ef9fbcbe17f2e61ef7750f7038f6fd6.err \ + $(srcdir)/%reldir%/test_sql_str_func.sh_949ffd5b2ef9fbcbe17f2e61ef7750f7038f6fd6.out \ $(srcdir)/%reldir%/test_sql_str_func.sh_a4d84a0082a7df34c95c2e6e070bbf6effaa5594.err \ $(srcdir)/%reldir%/test_sql_str_func.sh_a4d84a0082a7df34c95c2e6e070bbf6effaa5594.out \ $(srcdir)/%reldir%/test_sql_str_func.sh_a65d2fb2f841578619528ca10168ca4d650218e9.err \ @@ -986,8 +938,6 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_sql_xml_func.sh_fefeb387ae14d4171225ea06cbbff3ec43990cf0.out \ $(srcdir)/%reldir%/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.err \ $(srcdir)/%reldir%/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.out \ - $(srcdir)/%reldir%/test_text_file.sh_2e69c22dcfa37b5c3e8490a6026eacb7ca953998.err \ - $(srcdir)/%reldir%/test_text_file.sh_2e69c22dcfa37b5c3e8490a6026eacb7ca953998.out \ $(srcdir)/%reldir%/test_text_file.sh_5b51b55dff7332c5bee2c9b797c401c5614d574a.err \ $(srcdir)/%reldir%/test_text_file.sh_5b51b55dff7332c5bee2c9b797c401c5614d574a.out \ $(srcdir)/%reldir%/test_text_file.sh_6a24078983cf1b7a80b6fb65d5186cd125498136.err \ diff --git a/test/expected/test_cmds.sh_b6a3bb78e9d60e5e1f5ce5b18e40d2f1662707ab.out b/test/expected/test_cmds.sh_b6a3bb78e9d60e5e1f5ce5b18e40d2f1662707ab.out index 64894b07..eb930dab 100644 --- a/test/expected/test_cmds.sh_b6a3bb78e9d60e5e1f5ce5b18e40d2f1662707ab.out +++ b/test/expected/test_cmds.sh_b6a3bb78e9d60e5e1f5ce5b18e40d2f1662707ab.out @@ -833,7 +833,7 @@ For support questions, email: -:current-time +:current-time ══════════════════════════════════════════════════════════════════════ Print the current time in human-readable form and seconds since the epoch diff --git a/test/expected/test_format_loader.sh_3f1d6f35e8a9ae4fd3e91ffaa82a037b5a847ab7.err b/test/expected/test_format_loader.sh_3f1d6f35e8a9ae4fd3e91ffaa82a037b5a847ab7.err index 42cbe655..20ee86f6 100644 --- a/test/expected/test_format_loader.sh_3f1d6f35e8a9ae4fd3e91ffaa82a037b5a847ab7.err +++ b/test/expected/test_format_loader.sh_3f1d6f35e8a9ae4fd3e91ffaa82a037b5a847ab7.err @@ -1,56 +1,56 @@ -✘ error: “invalid(abc” is not a valid regular expression for property “/invalid_props_log/tags/badtag3/pattern” - reason: missing ) - --> {test_dir}/bad-config/formats/invalid-properties/format.json:35 - |  "pattern": "invalid(abc" +✘ error: “invalid(abc” is not a valid regular expression + reason: missing closing parenthesis  --> /invalid_props_log/tags/badtag3/pattern  | invalid(abc  - |  ^ missing )  + |  ^ missing closing parenthesis + --> {test_dir}/bad-config/formats/invalid-properties/format.json:35 + |  "pattern": "invalid(abc"  = help: Property Synopsis /invalid_props_log/tags/badtag3/pattern  Description The regular expression to match against the body of the log message Example \w+ is down -✘ error: “abc(def” is not a valid regular expression for property “/invalid_props_log/search-table/bad_table_regex/pattern” - reason: missing ) - --> {test_dir}/bad-config/formats/invalid-properties/format.json:40 - |  "pattern": "abc(def"  +✘ error: “abc(def” is not a valid regular expression + reason: missing closing parenthesis  --> /invalid_props_log/search-table/bad_table_regex/pattern  | abc(def  - |  ^ missing )  + |  ^ missing closing parenthesis  + --> {test_dir}/bad-config/formats/invalid-properties/format.json:40 + |  "pattern": "abc(def"   = help: Property Synopsis /invalid_props_log/search-table/bad_table_regex/pattern  Description The regular expression for this search table. -✘ error: “^(?\d+: (?.*)$” is not a valid regular expression for property “/bad_regex_log/regex/std/pattern” - reason: missing ) +✘ error: “^(?\d+: (?.*)$” is not a valid regular expression + reason: missing closing parenthesis + --> /bad_regex_log/regex/std/pattern + | ^(?<timestamp>\d+: (?<body>.*)$  + |  ^ missing closing parenthesis  --> {test_dir}/bad-config/formats/invalid-regex/format.json:6  |  "pattern": "^(?\\d+: (?.*)$" - --> /bad_regex_log/regex/std/pattern - | ^(?<timestamp>\d+: (?<body>.*)$  - |  ^ missing )  = help: Property Synopsis /bad_regex_log/regex/std/pattern  Description The regular expression to match a log message and capture fields. -✘ error: “(foo” is not a valid regular expression for property “/bad_regex_log/level/error” - reason: missing ) +✘ error: “(foo” is not a valid regular expression + reason: missing closing parenthesis + --> pattern + | (foo  + |  ^ missing closing parenthesis   --> {test_dir}/bad-config/formats/invalid-regex/format.json:13  |  "error": "(foo"  - --> /bad_regex_log/level/error - | (foo  - |  ^ missing )   = help: Property Synopsis /bad_regex_log/level/error  Description The regular expression used to match the log text for this level. For JSON logs with numeric levels, this should be the number for the corresponding level. -✘ error: “abc(” is not a valid regular expression for property “/bad_regex_log/highlights/foobar/pattern” - reason: missing ) - --> {test_dir}/bad-config/formats/invalid-regex/format.json:25 - |  "pattern": "abc("  +✘ error: “abc(” is not a valid regular expression + reason: missing closing parenthesis  --> /bad_regex_log/highlights/foobar/pattern  | abc(  - |  ^ missing )  + |  ^ missing closing parenthesis  + --> {test_dir}/bad-config/formats/invalid-regex/format.json:25 + |  "pattern": "abc("   = help: Property Synopsis /bad_regex_log/highlights/foobar/pattern  Description @@ -153,8 +153,11 @@  | CREATE TALE invalid (x y z);   |  ^ near "TALE": syntax error  ✘ error: failed to execute SQL statement - reason: ✘ error: call to regexp_match(re, str) failed - |  reason: missing ) + reason: ✘ error: “abc(” is not a valid regular expression + |  reason: missing closing parenthesis + |   --> arg + |   | abc(  + |   |  ^ missing closing parenthesis  --> {test_dir}/bad-config/formats/invalid-sql/init2.sql  | SELECT regexp_match('abc(', '123')   | FROM sqlite_master;  diff --git a/test/expected/test_format_loader.sh_a47f2b090a5d8a226783835c7ff7d1c8821f11ed.err b/test/expected/test_format_loader.sh_a47f2b090a5d8a226783835c7ff7d1c8821f11ed.err index 6efa6110..f657d874 100644 --- a/test/expected/test_format_loader.sh_a47f2b090a5d8a226783835c7ff7d1c8821f11ed.err +++ b/test/expected/test_format_loader.sh_a47f2b090a5d8a226783835c7ff7d1c8821f11ed.err @@ -4,24 +4,24 @@  |  ar_log": { "abc" } }  |  (right here) ------^  |  -✘ error: “abc(” is not a valid regular expression for property “/invalid_key_log/level-pointer” - reason: missing ) - --> {test_dir}/bad-config-json/formats/invalid-key/format.json:4 - |  "level-pointer": "abc(",  +✘ error: “abc(” is not a valid regular expression + reason: missing closing parenthesis  --> /invalid_key_log/level-pointer  | abc(  - |  ^ missing )  + |  ^ missing closing parenthesis  + --> {test_dir}/bad-config-json/formats/invalid-key/format.json:4 + |  "level-pointer": "abc(",   = help: Property Synopsis /invalid_key_log/level-pointer Description A regular-expression that matches the JSON-pointer of the level property -✘ error: “def[ghi” is not a valid regular expression for property “/invalid_key_log/file-pattern” +✘ error: “def[ghi” is not a valid regular expression reason: missing terminating ] for character class - --> {test_dir}/bad-config-json/formats/invalid-key/format.json:5 - |  "file-pattern": "def[ghi",   --> /invalid_key_log/file-pattern  | def[ghi   |  ^ missing terminating ] for character class + --> {test_dir}/bad-config-json/formats/invalid-key/format.json:5 + |  "file-pattern": "def[ghi",   = help: Property Synopsis /invalid_key_log/file-pattern Description diff --git a/test/expected/test_regex101.sh_3d18474a3e472fff6e23e0c41337ec9188fee591.err b/test/expected/test_regex101.sh_3d18474a3e472fff6e23e0c41337ec9188fee591.err index 59a51879..32c87d4b 100644 --- a/test/expected/test_regex101.sh_3d18474a3e472fff6e23e0c41337ec9188fee591.err +++ b/test/expected/test_regex101.sh_3d18474a3e472fff6e23e0c41337ec9188fee591.err @@ -1,14 +1,34 @@ +✘ error: invalid value “/unit_test_log/value/jobserver” + reason: no patterns have a capture named “jobserver” + = note: the following captures are available: + + = help: values are populated from captures in patterns, so at least one pattern must have a capture with this value name +✘ error: invalid value “/unit_test_log/value/processid” + reason: no patterns have a capture named “processid” + = note: the following captures are available: + + = help: values are populated from captures in patterns, so at least one pattern must have a capture with this value name +✘ error: invalid value “/unit_test_log/value/timestamp” + reason: no patterns have a capture named “timestamp” + = note: the following captures are available: + + = help: values are populated from captures in patterns, so at least one pattern must have a capture with this value name +✘ error: invalid value “/unit_test_log/value/workqueue” + reason: no patterns have a capture named “workqueue” + = note: the following captures are available: + + = help: values are populated from captures in patterns, so at least one pattern must have a capture with this value name ✘ error: invalid sample log message: "[03/22/2021 02:00:02 job1074.example.com db.db81.example_events 54026] {\"ELAPSED\":\"0.011\",\"LEVEL\":\"info\",\"MESSAGE\":\"finished in 0.011\\n\",\"PREFIX\":\"YFgyWQriCmsAAofJAAAAHg\",\"ROUTINGKEY\":\"EXAMPLE1366.Example.Events._Publish\"}" reason: sample does not match any patterns  --> regex101-home/.lnav/formats/installed/unit_test_log.json:26  = note: the following shows how each pattern matched this sample: - [03/22/2021 02:00:02 job1074.example.com db.db81.example_events 54026] {"ELAPSED":"0.011","LEVEL":"info","MESSAGE":"finished in 0.011\n","PREFIX":"YFgyWQriCmsAAofJAAAAHg","ROUTINGKEY":"EXAMPLE1366.Example.Events._Publish"} + [03/22/2021 02:00:02 job1074.example.com db.db81.example_events 54026] {"ELAPSED":"0.011","LEVEL":"info","MESSAGE":"finished in 0.011\n","PREFIX":"YFgyWQriCmsAAofJAAAAHg","ROUTINGKEY":"EXAMPLE1366.Example.Events._Publish"}  = note: std = “” ✘ error: invalid sample log message: "[03/22/2021 02:00:02 job1074.example.com db.db81.example_events 54026] {\"ELAPSED\":\"0.011\",\"LEVEL\":\"info\",\"MESSAGE\":\"finished in 0.011\\n\",\"PREFIX\":\"YFgyWQriCmsAAofJAAAAHg\",\"ROUTINGKEY\":\"EXAMPLE1366.Example.Events._Publish\"}" reason: sample does not match any patterns  --> regex101-home/.lnav/formats/installed/unit_test_log.json:30  = note: the following shows how each pattern matched this sample: - [03/22/2021 02:00:02 job1074.example.com db.db81.example_events 54026] {"ELAPSED":"0.011","LEVEL":"info","MESSAGE":"finished in 0.011\n","PREFIX":"YFgyWQriCmsAAofJAAAAHg","ROUTINGKEY":"EXAMPLE1366.Example.Events._Publish"} + [03/22/2021 02:00:02 job1074.example.com db.db81.example_events 54026] {"ELAPSED":"0.011","LEVEL":"info","MESSAGE":"finished in 0.011\n","PREFIX":"YFgyWQriCmsAAofJAAAAHg","ROUTINGKEY":"EXAMPLE1366.Example.Events._Publish"}  = note: std = “” diff --git a/test/expected/test_sql.sh_2532083f215ed44630621f18df3dd7b77c06ae10.err b/test/expected/test_sql.sh_2532083f215ed44630621f18df3dd7b77c06ae10.err index a3cbd2db..b57e6bc5 100644 --- a/test/expected/test_sql.sh_2532083f215ed44630621f18df3dd7b77c06ae10.err +++ b/test/expected/test_sql.sh_2532083f215ed44630621f18df3dd7b77c06ae10.err @@ -1,8 +1,10 @@ -✘ error: invalid regular expression - reason: missing ) +✘ error: “bad(” is not a valid regular expression + reason: missing closing parenthesis + --> pattern + | bad(  + |  ^ missing closing parenthesis   --> command-option:1 - | :create-search-table search_test1 bad(  - |  ^ missing ) + | :create-search-table search_test1 bad(   = help: :create-search-table table-name [pattern] ══════════════════════════════════════════════════════════════════════ Create an SQL table based on a regex search diff --git a/test/expected/test_sql_regexp.sh_03257c56e85558aa0cc925b68d3af962afc25125.err b/test/expected/test_sql_regexp.sh_03257c56e85558aa0cc925b68d3af962afc25125.err new file mode 100644 index 00000000..e69de29b diff --git a/test/expected/test_sql_regexp.sh_03257c56e85558aa0cc925b68d3af962afc25125.out b/test/expected/test_sql_regexp.sh_03257c56e85558aa0cc925b68d3af962afc25125.out new file mode 100644 index 00000000..19aa1c4a --- /dev/null +++ b/test/expected/test_sql_regexp.sh_03257c56e85558aa0cc925b68d3af962afc25125.out @@ -0,0 +1,4 @@ +match_index capture_index capture_name capture_count range_start range_stop content  + 0  0  <NULL>  3  1  9 abc=def;  + 0 1 3 1 4 abc + 0  2  <NULL>  3  5  8 def  diff --git a/test/expected/test_sql_regexp.sh_d61af17ff19d640ddfc879460910991825eedd05.err b/test/expected/test_sql_regexp.sh_d61af17ff19d640ddfc879460910991825eedd05.err new file mode 100644 index 00000000..e69de29b diff --git a/test/expected/test_sql_regexp.sh_d61af17ff19d640ddfc879460910991825eedd05.out b/test/expected/test_sql_regexp.sh_d61af17ff19d640ddfc879460910991825eedd05.out new file mode 100644 index 00000000..6e952fb6 --- /dev/null +++ b/test/expected/test_sql_regexp.sh_d61af17ff19d640ddfc879460910991825eedd05.out @@ -0,0 +1,2 @@ +match_index  content  + 0 {"col_0":"abc","col_1":"def"}  diff --git a/test/expected/test_sql_str_func.sh_11bcc5d32eabbedb6974f160dace9ef1ef0009e9.out b/test/expected/test_sql_str_func.sh_11bcc5d32eabbedb6974f160dace9ef1ef0009e9.out index b1833a9d..efa08d6d 100644 --- a/test/expected/test_sql_str_func.sh_11bcc5d32eabbedb6974f160dace9ef1ef0009e9.out +++ b/test/expected/test_sql_str_func.sh_11bcc5d32eabbedb6974f160dace9ef1ef0009e9.out @@ -9,7 +9,7 @@ Row 0: Row 1: Column match_index: 0 Column capture_index: 1 - Column capture_name: + Column capture_name: (null) Column capture_count: 2 Column range_start: 1 Column range_stop: 2 @@ -25,7 +25,7 @@ Row 2: Row 3: Column match_index: 1 Column capture_index: 1 - Column capture_name: + Column capture_name: (null) Column capture_count: 2 Column range_start: 3 Column range_stop: 4 @@ -41,7 +41,7 @@ Row 4: Row 5: Column match_index: 2 Column capture_index: 1 - Column capture_name: + Column capture_name: (null) Column capture_count: 2 Column range_start: 5 Column range_stop: 6 @@ -57,7 +57,7 @@ Row 6: Row 7: Column match_index: 3 Column capture_index: 1 - Column capture_name: + Column capture_name: (null) Column capture_count: 2 Column range_start: 7 Column range_stop: 9 diff --git a/test/expected/test_sql_str_func.sh_5e436fbd4efb140600999c5208886a5a57b8a30e.out b/test/expected/test_sql_str_func.sh_5e436fbd4efb140600999c5208886a5a57b8a30e.out index 8e9598e6..22d58c43 100644 --- a/test/expected/test_sql_str_func.sh_5e436fbd4efb140600999c5208886a5a57b8a30e.out +++ b/test/expected/test_sql_str_func.sh_5e436fbd4efb140600999c5208886a5a57b8a30e.out @@ -9,7 +9,7 @@ Row 0: Row 1: Column match_index: 0 Column capture_index: 1 - Column capture_name: + Column capture_name: (null) Column capture_count: 3 Column range_start: 0 Column range_stop: 0 diff --git a/test/expected/test_sql_str_func.sh_949ffd5b2ef9fbcbe17f2e61ef7750f7038f6fd6.err b/test/expected/test_sql_str_func.sh_949ffd5b2ef9fbcbe17f2e61ef7750f7038f6fd6.err new file mode 100644 index 00000000..e69de29b diff --git a/test/expected/test_sql_str_func.sh_949ffd5b2ef9fbcbe17f2e61ef7750f7038f6fd6.out b/test/expected/test_sql_str_func.sh_949ffd5b2ef9fbcbe17f2e61ef7750f7038f6fd6.out new file mode 100644 index 00000000..f93d3487 --- /dev/null +++ b/test/expected/test_sql_str_func.sh_949ffd5b2ef9fbcbe17f2e61ef7750f7038f6fd6.out @@ -0,0 +1,2 @@ +regexp_match('^(\w+)=([^;]+);', 'abc=def;ghi=jkl;') +{"col_0":"abc","col_1":"def"} diff --git a/test/expected/test_sql_str_func.sh_b0e5bf23bbbc0defa8bb26817782c9d46a778ad8.out b/test/expected/test_sql_str_func.sh_b0e5bf23bbbc0defa8bb26817782c9d46a778ad8.out index 4a469755..f94307e7 100644 --- a/test/expected/test_sql_str_func.sh_b0e5bf23bbbc0defa8bb26817782c9d46a778ad8.out +++ b/test/expected/test_sql_str_func.sh_b0e5bf23bbbc0defa8bb26817782c9d46a778ad8.out @@ -9,7 +9,7 @@ Row 0: Row 1: Column match_index: 0 Column capture_index: 1 - Column capture_name: + Column capture_name: (null) Column capture_count: 2 Column range_start: 5 Column range_stop: 8 diff --git a/test/expected/test_sql_str_func.sh_d4bc869850f5b7e53353fc2506fea0c8e96f29c5.err b/test/expected/test_sql_str_func.sh_d4bc869850f5b7e53353fc2506fea0c8e96f29c5.err index 592bf949..f4c83996 100644 --- a/test/expected/test_sql_str_func.sh_d4bc869850f5b7e53353fc2506fea0c8e96f29c5.err +++ b/test/expected/test_sql_str_func.sh_d4bc869850f5b7e53353fc2506fea0c8e96f29c5.err @@ -1 +1 @@ -error: sqlite3_exec failed -- Invalid regular expression: missing ) +error: sqlite3_exec failed -- Invalid regular expression: missing closing parenthesis diff --git a/test/expected/test_sql_views_vtab.sh_ade121f29bedea0d1a54452cc994b2302ad9dabb.err b/test/expected/test_sql_views_vtab.sh_ade121f29bedea0d1a54452cc994b2302ad9dabb.err index b97c051c..dd46516b 100644 --- a/test/expected/test_sql_views_vtab.sh_ade121f29bedea0d1a54452cc994b2302ad9dabb.err +++ b/test/expected/test_sql_views_vtab.sh_ade121f29bedea0d1a54452cc994b2302ad9dabb.err @@ -1,4 +1,4 @@ ✘ error: SQL statement failed - reason: Invalid regular expression for pattern: missing ) at offset 4 + reason: Invalid regular expression for pattern: missing closing parenthesis at offset 4  --> command-option:1  | ;INSERT INTO lnav_view_filters VALUES ('log', 0, 1, 'out', 'regex', 'abc(') diff --git a/test/log-samples/sample-1aeb47c0a97d19bb7418f0172480e05e49c6e53e.txt b/test/log-samples/sample-1aeb47c0a97d19bb7418f0172480e05e49c6e53e.txt index 18f052de..289780a8 100644 --- a/test/log-samples/sample-1aeb47c0a97d19bb7418f0172480e05e49c6e53e.txt +++ b/test/log-samples/sample-1aeb47c0a97d19bb7418f0172480e05e49c6e53e.txt @@ -1,4 +1,4 @@ - Apr 29 22:32:27 tstack-centos5 dhclient: bound to 10.1.10.62 -- renewal in 55327 seconds. + Apr 29 22:32:27 tstack-centos5 dhclient: bound to 10.1.10.62 -- renewal in 55327 seconds key 50:50 ^ ipv4 50:60 ^--------^ 10.1.10.62 pair 50:60 ^--------^ 10.1.10.62 @@ -8,7 +8,7 @@ pair 61:63 ^^ key 75:75 ^ num 75:80 ^---^ 55327 pair 75:80 ^---^ 55327 -msg :bound to 10.1.10.62 -- renewal in 55327 seconds. +msg :bound to 10.1.10.62 -- renewal in 55327 seconds format :bound to # # renewal in # seconds { "col_0": "10.1.10.62", diff --git a/test/log-samples/sample-3856ad0f551a04fde41a020158d6b33ef97c870a.txt b/test/log-samples/sample-3856ad0f551a04fde41a020158d6b33ef97c870a.txt index 6a5c0320..5d5d3479 100644 --- a/test/log-samples/sample-3856ad0f551a04fde41a020158d6b33ef97c870a.txt +++ b/test/log-samples/sample-3856ad0f551a04fde41a020158d6b33ef97c870a.txt @@ -1,4 +1,4 @@ - Apr 29 08:13:43 tstack-centos5 avahi-daemon[2467]: Leaving mDNS multicast group on interface eth0.IPv4 with address 10.1.10.62. + Apr 29 08:13:43 tstack-centos5 avahi-daemon[2467]: Leaving mDNS multicast group on interface eth0.IPv4 with address 10.1.10.62 key 59:59 ^ sym 59:63 ^--^ mDNS pair 59:63 ^--^ mDNS @@ -8,7 +8,7 @@ pair 93:102 key 116:116 ^ ipv4 116:126 ^--------^ 10.1.10.62 pair 116:126 ^--------^ 10.1.10.62 -msg :Leaving mDNS multicast group on interface eth0.IPv4 with address 10.1.10.62. +msg :Leaving mDNS multicast group on interface eth0.IPv4 with address 10.1.10.62 format :Leaving # multicast group on interface # with address # { "col_0": "mDNS", diff --git a/test/log-samples/sample-500c9e492e04f5f58862c8086ca301de0dd976ce.txt b/test/log-samples/sample-500c9e492e04f5f58862c8086ca301de0dd976ce.txt index 8f397057..ed7fa8ec 100644 --- a/test/log-samples/sample-500c9e492e04f5f58862c8086ca301de0dd976ce.txt +++ b/test/log-samples/sample-500c9e492e04f5f58862c8086ca301de0dd976ce.txt @@ -1,11 +1,11 @@ - Apr 29 08:13:43 tstack-centos5 avahi-daemon[2467]: New relevant interface eth0.IPv4 for mDNS. + Apr 29 08:13:43 tstack-centos5 avahi-daemon[2467]: New relevant interface eth0.IPv4 for mDNS key 74:74 ^ sym 74:83 ^-------^ eth0.IPv4 pair 74:83 ^-------^ eth0.IPv4 key 88:88 ^ sym 88:92 ^--^ mDNS pair 88:92 ^--^ mDNS -msg :New relevant interface eth0.IPv4 for mDNS. +msg :New relevant interface eth0.IPv4 for mDNS format :New relevant interface # for # { "col_0": "eth0.IPv4", diff --git a/test/log-samples/sample-6049d4309f26eefb1a3406d937a9ba8a0df592a7.txt b/test/log-samples/sample-6049d4309f26eefb1a3406d937a9ba8a0df592a7.txt index 5f2a2478..f424a866 100644 --- a/test/log-samples/sample-6049d4309f26eefb1a3406d937a9ba8a0df592a7.txt +++ b/test/log-samples/sample-6049d4309f26eefb1a3406d937a9ba8a0df592a7.txt @@ -1,11 +1,11 @@ - Apr 29 08:13:43 tstack-centos5 avahi-daemon[2467]: Withdrawing address record for 10.1.10.62 on eth0. + Apr 29 08:13:43 tstack-centos5 avahi-daemon[2467]: Withdrawing address record for 10.1.10.62 on eth0 key 82:82 ^ ipv4 82:92 ^--------^ 10.1.10.62 pair 82:92 ^--------^ 10.1.10.62 key 96:96 ^ sym 96:100 ^--^ eth0 pair 96:100 ^--^ eth0 -msg :Withdrawing address record for 10.1.10.62 on eth0. +msg :Withdrawing address record for 10.1.10.62 on eth0 format :Withdrawing address record for # on # { "col_0": "10.1.10.62", diff --git a/test/log-samples/sample-62315d884afdc4155b35f905415c74bfcfd39fc2.txt b/test/log-samples/sample-62315d884afdc4155b35f905415c74bfcfd39fc2.txt index 41a5d005..5a30cbc3 100644 --- a/test/log-samples/sample-62315d884afdc4155b35f905415c74bfcfd39fc2.txt +++ b/test/log-samples/sample-62315d884afdc4155b35f905415c74bfcfd39fc2.txt @@ -1,4 +1,4 @@ - Apr 29 08:13:43 tstack-centos5 avahi-daemon[2467]: Joining mDNS multicast group on interface eth0.IPv4 with address 10.1.10.62. + Apr 29 08:13:43 tstack-centos5 avahi-daemon[2467]: Joining mDNS multicast group on interface eth0.IPv4 with address 10.1.10.62 key 59:59 ^ sym 59:63 ^--^ mDNS pair 59:63 ^--^ mDNS @@ -8,7 +8,7 @@ pair 93:102 key 116:116 ^ ipv4 116:126 ^--------^ 10.1.10.62 pair 116:126 ^--------^ 10.1.10.62 -msg :Joining mDNS multicast group on interface eth0.IPv4 with address 10.1.10.62. +msg :Joining mDNS multicast group on interface eth0.IPv4 with address 10.1.10.62 format :Joining # multicast group on interface # with address # { "col_0": "mDNS", diff --git a/test/log-samples/sample-a74570613c082c7fe283672031e18e54e8887ffb.txt b/test/log-samples/sample-a74570613c082c7fe283672031e18e54e8887ffb.txt index 75edc9f7..b2e0426d 100644 --- a/test/log-samples/sample-a74570613c082c7fe283672031e18e54e8887ffb.txt +++ b/test/log-samples/sample-a74570613c082c7fe283672031e18e54e8887ffb.txt @@ -1,11 +1,11 @@ - Apr 29 08:13:43 tstack-centos5 avahi-daemon[2467]: Interface eth0.IPv4 no longer relevant for mDNS. + Apr 29 08:13:43 tstack-centos5 avahi-daemon[2467]: Interface eth0.IPv4 no longer relevant for mDNS key 61:61 ^ sym 61:70 ^-------^ eth0.IPv4 pair 61:70 ^-------^ eth0.IPv4 key 94:94 ^ sym 94:98 ^--^ mDNS pair 94:98 ^--^ mDNS -msg :Interface eth0.IPv4 no longer relevant for mDNS. +msg :Interface eth0.IPv4 no longer relevant for mDNS format :Interface # no longer relevant for # { "col_0": "eth0.IPv4", diff --git a/test/log-samples/sample-ad31f12d2adabd07e3ddda3ad5b0dbf6b49c4c99.txt b/test/log-samples/sample-ad31f12d2adabd07e3ddda3ad5b0dbf6b49c4c99.txt index 0a310200..63c22cc2 100644 --- a/test/log-samples/sample-ad31f12d2adabd07e3ddda3ad5b0dbf6b49c4c99.txt +++ b/test/log-samples/sample-ad31f12d2adabd07e3ddda3ad5b0dbf6b49c4c99.txt @@ -1,4 +1,4 @@ - Jun 2 00:34:32 Tim-Stacks-iMac kernel[0]: vmnet: VNetUserIf_Create: created userIf at 0xffffff802644f400. + Jun 2 00:34:32 Tim-Stacks-iMac kernel[0]: vmnet: VNetUserIf_Create: created userIf at 0xffffff802644f400 key 43:48 ^---^ vmnet quot 49:49 ^ val 49:49 ^ @@ -13,7 +13,7 @@ wspc 86:87 hex 87:105 ^----------------^ 0xffffff802644f400 val 69:105 ^----------------------------------^ created userIf at 0xffffff802644f400 pair 50:105 ^-----------------------------------------------------^ VNetUserIf_Create: created userIf at 0xffffff802644f400 -msg :vmnet: VNetUserIf_Create: created userIf at 0xffffff802644f400. +msg :vmnet: VNetUserIf_Create: created userIf at 0xffffff802644f400 format :vmnet:# VNetUserIf_Create: # { "vmnet": "", diff --git a/test/log-samples/sample-d4a0aedc8350f64b22403eeef4eca71fbf749d2b.txt b/test/log-samples/sample-d4a0aedc8350f64b22403eeef4eca71fbf749d2b.txt index dff52578..91a456ae 100644 --- a/test/log-samples/sample-d4a0aedc8350f64b22403eeef4eca71fbf749d2b.txt +++ b/test/log-samples/sample-d4a0aedc8350f64b22403eeef4eca71fbf749d2b.txt @@ -1,8 +1,8 @@ - Apr 29 23:02:45 tstack-centos5 avahi-daemon[2467]: Invalid response packet from host fe80::22c9:d0ff:fe15:1b7c. + Apr 29 23:02:45 tstack-centos5 avahi-daemon[2467]: Invalid response packet from host fe80::22c9:d0ff:fe15:1b7c key 85:85 ^ ipv6 85:110 ^-----------------------^ fe80::22c9:d0ff:fe15:1b7c pair 85:110 ^-----------------------^ fe80::22c9:d0ff:fe15:1b7c -msg :Invalid response packet from host fe80::22c9:d0ff:fe15:1b7c. +msg :Invalid response packet from host fe80::22c9:d0ff:fe15:1b7c format :Invalid response packet from host # { "col_0": "fe80::22c9:d0ff:fe15:1b7c" diff --git a/test/log-samples/sample-d714b5e8cd354321f376ed1c0a70ec9a2f58076d.txt b/test/log-samples/sample-d714b5e8cd354321f376ed1c0a70ec9a2f58076d.txt index 2624be74..d7367ab9 100644 --- a/test/log-samples/sample-d714b5e8cd354321f376ed1c0a70ec9a2f58076d.txt +++ b/test/log-samples/sample-d714b5e8cd354321f376ed1c0a70ec9a2f58076d.txt @@ -1,8 +1,8 @@ - Apr 29 23:02:45 tstack-centos5 avahi-daemon[2467]: Invalid response packet from host 10.1.10.10. + Apr 29 23:02:45 tstack-centos5 avahi-daemon[2467]: Invalid response packet from host 10.1.10.10 key 85:85 ^ ipv4 85:95 ^--------^ 10.1.10.10 pair 85:95 ^--------^ 10.1.10.10 -msg :Invalid response packet from host 10.1.10.10. +msg :Invalid response packet from host 10.1.10.10 format :Invalid response packet from host # { "col_0": "10.1.10.10" diff --git a/test/log-samples/sample-fc8923633e57bacd641d80dde3ff878212230552.txt b/test/log-samples/sample-fc8923633e57bacd641d80dde3ff878212230552.txt index ecca50d8..f0f3afea 100644 --- a/test/log-samples/sample-fc8923633e57bacd641d80dde3ff878212230552.txt +++ b/test/log-samples/sample-fc8923633e57bacd641d80dde3ff878212230552.txt @@ -1,11 +1,11 @@ - Apr 29 08:13:43 tstack-centos5 avahi-daemon[2467]: Registering new address record for 10.1.10.62 on eth0. + Apr 29 08:13:43 tstack-centos5 avahi-daemon[2467]: Registering new address record for 10.1.10.62 on eth0 key 86:86 ^ ipv4 86:96 ^--------^ 10.1.10.62 pair 86:96 ^--------^ 10.1.10.62 key 100:100 ^ sym 100:104 ^--^ eth0 pair 100:104 ^--^ eth0 -msg :Registering new address record for 10.1.10.62 on eth0. +msg :Registering new address record for 10.1.10.62 on eth0 format :Registering new address record for # on # { "col_0": "10.1.10.62", diff --git a/test/test_grep_proc.sh b/test/test_grep_proc.sh index 30e2efb3..70b19bae 100644 --- a/test/test_grep_proc.sh +++ b/test/test_grep_proc.sh @@ -21,7 +21,14 @@ EOF run_test grep_slice '.*' gp.dat -check_output "grep_proc didn't find all lines?" < gp.dat +check_output "grep_proc didn't find all lines?" <& gp) - { - this->ms_finished = true; - }; + void grep_end(grep_proc& gp) { this->ms_finished = true; }; bool ms_finished; }; @@ -116,13 +113,10 @@ looper(grep_proc& gp) int main(int argc, char* argv[]) { - int eoff, retval = EXIT_SUCCESS; - const char* errptr; - pcre* code; + int retval = EXIT_SUCCESS; - code = pcre_compile("foobar", PCRE_CASELESS, &errptr, &eoff, NULL); - pcre_refcount(code, 1); - assert(code != NULL); + auto code + = lnav::pcre2pp::code::from_const("foobar", PCRE2_CASELESS).to_shared(); auto psuperv = std::make_shared(); { @@ -152,7 +146,5 @@ main(int argc, char* argv[]) assert(errno == ECHILD); } - free(code); - return retval; } diff --git a/test/test_reltime.cc b/test/test_reltime.cc index 8069a497..a526c1dc 100644 --- a/test/test_reltime.cc +++ b/test/test_reltime.cc @@ -68,24 +68,26 @@ static struct { {"12pm", "12:00", "12:00"}, {"00:27:18.567", "0:27:18.567", "0:27:18.567"}, - {}}; + {}, +}; static struct { const char* reltime; const char* expected_error; -} BAD_TEST_DATA[] - = {{"10am am", "Time has already been set"}, - {"yesterday today", "Current time reference has already been used"}, - {"10am 10am", "Time has already been set"}, - {"ago", "Expecting a time unit"}, - {"minute", "Expecting a number before time unit"}, - {"1 2", "No time unit given for the previous number"}, - {"blah", "Unrecognized input"}, - {"before", "'before' requires a point in time (e.g. before 10am)"}, - {"after", "'after' requires a point in time (e.g. after 10am)"}, - {"before after", "Before/after ranges are not supported yet"}, - - {nullptr, nullptr}}; +} BAD_TEST_DATA[] = { + {"10am am", "Time has already been set"}, + {"yesterday today", "Current time reference has already been used"}, + {"10am 10am", "Time has already been set"}, + {"ago", "Expecting a time unit"}, + {"minute", "Expecting a number before time unit"}, + {"1 2", "No time unit given for the previous number"}, + {"blah", "Unrecognized input"}, + {"before", "'before' requires a point in time (e.g. before 10am)"}, + {"after", "'after' requires a point in time (e.g. after 10am)"}, + {"before after", "Before/after ranges are not supported yet"}, + + {nullptr, nullptr}, +}; TEST_CASE("reltime") { @@ -97,7 +99,8 @@ TEST_CASE("reltime") time_t new_time; { - auto rt_res = relative_time::from_str("before 2014"); + auto rt_res = relative_time::from_str( + string_fragment::from_const("before 2014")); CHECK(rt_res.isOk()); auto rt = rt_res.unwrap(); @@ -110,7 +113,8 @@ TEST_CASE("reltime") } { - auto rt_res = relative_time::from_str("after 2014"); + auto rt_res = relative_time::from_str( + string_fragment::from_const("after 2014")); CHECK(rt_res.isOk()); auto rt = rt_res.unwrap(); @@ -123,7 +127,8 @@ TEST_CASE("reltime") } { - auto rt_res = relative_time::from_str("after fri"); + auto rt_res + = relative_time::from_str(string_fragment::from_const("after fri")); CHECK(rt_res.isOk()); auto rt = rt_res.unwrap(); @@ -136,7 +141,8 @@ TEST_CASE("reltime") } { - auto rt_res = relative_time::from_str("before fri"); + auto rt_res = relative_time::from_str( + string_fragment::from_const("before fri")); CHECK(rt_res.isOk()); auto rt = rt_res.unwrap(); @@ -149,7 +155,8 @@ TEST_CASE("reltime") } { - auto rt_res = relative_time::from_str("before 12pm"); + auto rt_res = relative_time::from_str( + string_fragment::from_const("before 12pm")); CHECK(rt_res.isOk()); auto rt = rt_res.unwrap(); @@ -162,7 +169,8 @@ TEST_CASE("reltime") } { - auto rt_res = relative_time::from_str("sun after 1pm"); + auto rt_res = relative_time::from_str( + string_fragment::from_const("sun after 1pm")); CHECK(rt_res.isOk()); auto rt = rt_res.unwrap(); @@ -181,7 +189,8 @@ TEST_CASE("reltime") } { - auto rt_res = relative_time::from_str("0:05"); + auto rt_res + = relative_time::from_str(string_fragment::from_const("0:05")); CHECK(rt_res.isOk()); auto rt = rt_res.unwrap(); @@ -203,7 +212,8 @@ TEST_CASE("reltime") } { - auto rt_res = relative_time::from_str("mon"); + auto rt_res + = relative_time::from_str(string_fragment::from_const("mon")); CHECK(rt_res.isOk()); auto rt = rt_res.unwrap(); @@ -222,7 +232,8 @@ TEST_CASE("reltime") } { - auto rt_res = relative_time::from_str("tue"); + auto rt_res + = relative_time::from_str(string_fragment::from_const("tue")); CHECK(rt_res.isOk()); auto rt = rt_res.unwrap(); @@ -231,7 +242,8 @@ TEST_CASE("reltime") } { - auto rt_res = relative_time::from_str("1m"); + auto rt_res + = relative_time::from_str(string_fragment::from_const("1m")); CHECK(rt_res.isOk()); auto rt = rt_res.unwrap(); @@ -257,7 +269,8 @@ TEST_CASE("reltime") relative_time rt; for (int lpc = 0; TEST_DATA[lpc].reltime; lpc++) { - auto res = relative_time::from_str(TEST_DATA[lpc].reltime); + auto res = relative_time::from_str( + string_fragment::from_c_str(TEST_DATA[lpc].reltime)); CHECK_MESSAGE(res.isOk(), TEST_DATA[lpc].reltime); rt = res.unwrap(); CHECK(std::string(TEST_DATA[lpc].expected) == rt.to_string()); @@ -266,42 +279,51 @@ TEST_CASE("reltime") } for (int lpc = 0; BAD_TEST_DATA[lpc].reltime; lpc++) { - auto res = relative_time::from_str(BAD_TEST_DATA[lpc].reltime); + auto res = relative_time::from_str( + string_fragment::from_c_str(BAD_TEST_DATA[lpc].reltime)); CHECK(res.isErr()); CHECK(res.unwrapErr().pe_msg == string(BAD_TEST_DATA[lpc].expected_error)); } - rt = relative_time::from_str("").unwrap(); + rt = relative_time::from_str(string_fragment::from_const("")).unwrap(); CHECK(rt.empty()); - rt = relative_time::from_str("a minute ago").unwrap(); + rt = relative_time::from_str(string_fragment::from_const("a minute ago")) + .unwrap(); CHECK(rt.rt_field[relative_time::RTF_MINUTES].value == -1); CHECK(rt.is_negative() == true); - rt = relative_time::from_str("5 milliseconds").unwrap(); + rt = relative_time::from_str(string_fragment::from_const("5 milliseconds")) + .unwrap(); CHECK(rt.rt_field[relative_time::RTF_MICROSECONDS].value == 5 * 1000); - rt = relative_time::from_str("5000 ms ago").unwrap(); + rt = relative_time::from_str(string_fragment::from_const("5000 ms ago")) + .unwrap(); CHECK(rt.rt_field[relative_time::RTF_SECONDS].value == -5); - rt = relative_time::from_str("5 hours 20 minutes ago").unwrap(); + rt = relative_time::from_str( + string_fragment::from_const("5 hours 20 minutes ago")) + .unwrap(); CHECK(rt.rt_field[relative_time::RTF_HOURS].value == -5); CHECK(rt.rt_field[relative_time::RTF_MINUTES].value == -20); - rt = relative_time::from_str("5 hours and 20 minutes ago").unwrap(); + rt = relative_time::from_str( + string_fragment::from_const("5 hours and 20 minutes ago")) + .unwrap(); CHECK(rt.rt_field[relative_time::RTF_HOURS].value == -5); CHECK(rt.rt_field[relative_time::RTF_MINUTES].value == -20); - rt = relative_time::from_str("1:23").unwrap(); + rt = relative_time::from_str(string_fragment::from_const("1:23")).unwrap(); CHECK(rt.rt_field[relative_time::RTF_HOURS].value == 1); CHECK(rt.rt_field[relative_time::RTF_MINUTES].value == 23); CHECK(rt.is_absolute()); - rt = relative_time::from_str("1:23:45").unwrap(); + rt = relative_time::from_str(string_fragment::from_const("1:23:45")) + .unwrap(); CHECK(rt.rt_field[relative_time::RTF_HOURS].value == 1); CHECK(rt.rt_field[relative_time::RTF_MINUTES].value == 23); @@ -316,7 +338,8 @@ TEST_CASE("reltime") CHECK(tm.et_tm.tm_hour == 1); CHECK(tm.et_tm.tm_min == 23); - rt = relative_time::from_str("5 minutes ago").unwrap(); + rt = relative_time::from_str(string_fragment::from_const("5 minutes ago")) + .unwrap(); tm = base_tm; tm = rt.adjust(tm); @@ -325,7 +348,8 @@ TEST_CASE("reltime") CHECK(new_time == (base_time - (5 * 60))); - rt = relative_time::from_str("today at 4pm").unwrap(); + rt = relative_time::from_str(string_fragment::from_const("today at 4pm")) + .unwrap(); memset(&tm, 0, sizeof(tm)); memset(&tm2, 0, sizeof(tm2)); gettimeofday(&tv, nullptr); @@ -350,7 +374,9 @@ TEST_CASE("reltime") CHECK(tm.et_tm.tm_min == tm2.et_tm.tm_min); CHECK(tm.et_tm.tm_sec == tm2.et_tm.tm_sec); - rt = relative_time::from_str("yesterday at 4pm").unwrap(); + rt = relative_time::from_str( + string_fragment::from_const("yesterday at 4pm")) + .unwrap(); gettimeofday(&tv, nullptr); localtime_r(&tv.tv_sec, &tm.et_tm); localtime_r(&tv.tv_sec, &tm2.et_tm); @@ -374,7 +400,8 @@ TEST_CASE("reltime") CHECK(tm.et_tm.tm_min == tm2.et_tm.tm_min); CHECK(tm.et_tm.tm_sec == tm2.et_tm.tm_sec); - rt = relative_time::from_str("2 days ago").unwrap(); + rt = relative_time::from_str(string_fragment::from_const("2 days ago")) + .unwrap(); gettimeofday(&tv, nullptr); localtime_r(&tv.tv_sec, &tm.et_tm); localtime_r(&tv.tv_sec, &tm2.et_tm); diff --git a/test/test_sql_regexp.sh b/test/test_sql_regexp.sh index 6d148f5e..d2fc5f8f 100644 --- a/test/test_sql_regexp.sh +++ b/test/test_sql_regexp.sh @@ -22,3 +22,9 @@ run_cap_test ${lnav_test} -n \ run_cap_test ${lnav_test} -n \ -c ";SELECT * from regexp_capture_into_json('foo=123e;', '(?\w+)=(?[^;]+)')" \ ${test_dir}/logfile_syslog.3 + +run_cap_test ${lnav_test} -nN \ + -c ";SELECT * from regexp_capture('abc=def;ghi=jkl;', '^(\w+)=([^;]+);')" + +run_cap_test ${lnav_test} -nN \ + -c ";SELECT * from regexp_capture_into_json('abc=def;ghi=jkl;', '^(\w+)=([^;]+);')" diff --git a/test/test_sql_str_func.sh b/test/test_sql_str_func.sh index e6eb7394..a05d0bce 100644 --- a/test/test_sql_str_func.sh +++ b/test/test_sql_str_func.sh @@ -68,6 +68,9 @@ run_cap_test ./drive_sql "select regexp_match('foo=(?\w+); (\w+)', 'foo=abc run_cap_test ./drive_sql "select regexp_match('foo=(?\w+); (\w+\.\w+)', 'foo=abc; 123.456') as result" +run_cap_test ${lnav_test} -nN \ + -c ";SELECT regexp_match('^(\w+)=([^;]+);', 'abc=def;ghi=jkl;')" + run_cap_test ./drive_sql "select extract('foo=1') as result" run_cap_test ./drive_sql "select extract('foo=1; bar=2') as result"