[pcre2] migrate from pcre to pcre2

Fixes #974
pull/1062/head
Tim Stack 2 years ago
parent ca4e61ba02
commit 5a63ece31d

@ -19,6 +19,7 @@ find_package(BZip2 REQUIRED)
find_package(LibArchive REQUIRED)
find_package(ZLIB REQUIRED)
find_package(pcre REQUIRED)
find_package(pcre2 REQUIRED)
find_package(readline REQUIRED)
find_package(ncurses REQUIRED)
find_package(CURL REQUIRED)
@ -29,6 +30,7 @@ set(lnav_LIBS
BZip2::BZip2
ncurses::libcurses
pcre::libpcre
pcre2::pcre2
readline::readline
LibArchive::LibArchive
ZLIB::ZLIB
@ -39,19 +41,19 @@ add_subdirectory(src)
# ---- Install rules ----
if(NOT CMAKE_SKIP_INSTALL_RULES)
include(cmake/install-rules.cmake)
endif()
if (NOT CMAKE_SKIP_INSTALL_RULES)
include(cmake/install-rules.cmake)
endif ()
# ---- Developer mode ----
if(NOT lnav_DEVELOPER_MODE)
return()
elseif(NOT PROJECT_IS_TOP_LEVEL)
message(
AUTHOR_WARNING
"Developer mode is intended for developers of lnav"
)
endif()
if (NOT lnav_DEVELOPER_MODE)
return()
elseif (NOT PROJECT_IS_TOP_LEVEL)
message(
AUTHOR_WARNING
"Developer mode is intended for developers of lnav"
)
endif ()
include(cmake/dev-mode.cmake)

@ -1,3 +1,9 @@
lnav v0.11.1:
Breaking changes:
* The regexp_capture() table-valued-function now returns NULL
instead of an empty string for the `capture_name` column if
the capture is not named.
lnav v0.11.0:
Features:
* Redesigned the top status area to allow for user-specified

@ -1,6 +1,6 @@
# aminclude_static.am generated automatically by Autoconf
# from AX_AM_MACROS_STATIC on Sat Aug 20 18:43:07 PDT 2022
# from AX_AM_MACROS_STATIC on Sat Sep 10 09:23:23 PDT 2022
# Code coverage

@ -1,12 +1,18 @@
# ---- Dependencies ----
set(extract_timestamps "")
if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24")
set(extract_timestamps DOWNLOAD_EXTRACT_TIMESTAMP YES)
endif ()
include(FetchContent)
FetchContent_Declare(
mcss URL
https://github.com/friendlyanon/m.css/releases/download/release-1/mcss.zip
URL_MD5 00cd2757ebafb9bcba7f5d399b3bec7f
SOURCE_DIR "${PROJECT_BINARY_DIR}/mcss"
UPDATE_DISCONNECTED YES
mcss URL
https://github.com/friendlyanon/m.css/releases/download/release-1/mcss.zip
URL_MD5 00cd2757ebafb9bcba7f5d399b3bec7f
SOURCE_DIR "${PROJECT_BINARY_DIR}/mcss"
UPDATE_DISCONNECTED YES
${extract_timestamps}
)
FetchContent_MakeAvailable(mcss)
@ -15,26 +21,26 @@ find_package(Python3 3.6 REQUIRED)
# ---- Declare documentation target ----
set(
DOXYGEN_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/docs"
CACHE PATH "Path for the generated Doxygen documentation"
DOXYGEN_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/docs"
CACHE PATH "Path for the generated Doxygen documentation"
)
set(working_dir "${PROJECT_BINARY_DIR}/docs")
foreach(file IN ITEMS Doxyfile conf.py)
configure_file("docs/${file}.in" "${working_dir}/${file}" @ONLY)
endforeach()
foreach (file IN ITEMS Doxyfile conf.py)
configure_file("docs/${file}.in" "${working_dir}/${file}" @ONLY)
endforeach ()
set(mcss_script "${mcss_SOURCE_DIR}/documentation/doxygen.py")
set(config "${working_dir}/conf.py")
add_custom_target(
docs
COMMAND "${CMAKE_COMMAND}" -E remove_directory
"${DOXYGEN_OUTPUT_DIRECTORY}/html"
"${DOXYGEN_OUTPUT_DIRECTORY}/xml"
COMMAND "${Python3_EXECUTABLE}" "${mcss_script}" "${config}"
COMMENT "Building documentation using Doxygen and m.css"
WORKING_DIRECTORY "${working_dir}"
VERBATIM
docs
COMMAND "${CMAKE_COMMAND}" -E remove_directory
"${DOXYGEN_OUTPUT_DIRECTORY}/html"
"${DOXYGEN_OUTPUT_DIRECTORY}/xml"
COMMAND "${Python3_EXECUTABLE}" "${mcss_script}" "${config}"
COMMENT "Building documentation using Doxygen and m.css"
WORKING_DIRECTORY "${working_dir}"
VERBATIM
)

@ -20,7 +20,7 @@ class LnavConan(ConanFile):
"libarchive/3.6.0",
"libcurl/7.80.0",
"ncurses/6.3",
"pcre/8.45",
"pcre2/10.40",
"readline/8.1.2",
"sqlite3/3.38.0",
"zlib/1.2.12",
@ -32,7 +32,8 @@ class LnavConan(ConanFile):
"libarchive:with_lzo": True,
"libarchive:with_lzma": True,
"libarchive:with_zstd": True,
"pcre:with_jit": True,
"pcre2:support_jit": True,
"pcre2:build_pcre2_8": True,
"sqlite3:enable_json1": True,
"sqlite3:enable_soundex": True,
"readline:with_library": "curses",

@ -184,7 +184,7 @@ AS_VAR_IF([ax_cv_curses],[yes],[],
)
AX_PATH_LIB_ARCHIVE
AX_PATH_LIB_PCRE([], [AC_MSG_ERROR([pcre required to build])])
AX_CHECK_PCRE2([8], [], [AC_MSG_ERROR([pcre2 is required to build])])
AX_PATH_LIB_READLINE
AX_CODE_COVERAGE
@ -206,7 +206,7 @@ AS_VAR_SET(ALL_LDFLAGS, "$SQLITE3_LDFLAGS $READLINE_LDFLAGS $LIBARCHIVE_LDFLAGS
AS_VAR_SET(static_lib_list,
["libncurses.a libncursesw.a libreadline.a libsqlite3.a libz.a libtinfo.a libtinfow.a"])
AS_VAR_SET(static_lib_list,
["$static_lib_list libpcre.a libncursesw.a libbz2.a"])
["$static_lib_list libpcre2.a libncursesw.a libbz2.a"])
AS_VAR_SET(static_lib_list,
["$static_lib_list libgpm.a libcurl.a libcrypto.a libssl.a libssh2.a"])
AS_VAR_SET(static_lib_list,

@ -219,9 +219,9 @@
"([^/]+)": {
"title": "/<format_name>/value/<value_name>/unit/scaling-factor/<scale>",
"type": "object",
"patternProperties": {
"properties": {
"op": {
"title": "/<format_name>/value/<value_name>/unit/scaling-factor/<scale>/<>",
"title": "/<format_name>/value/<value_name>/unit/scaling-factor/<scale>/op",
"type": "string",
"enum": [
"identity",
@ -230,7 +230,7 @@
]
},
"value": {
"title": "/<format_name>/value/<value_name>/unit/scaling-factor/<scale>/<>",
"title": "/<format_name>/value/<value_name>/unit/scaling-factor/<scale>/value",
"type": "number"
}
},

@ -0,0 +1,163 @@
# ===========================================================================
# https://www.gnu.org/software/autoconf-archive/ax_check_pcre2.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_CHECK_PCRE2([bits], [action-if-found], [action-if-not-found])
#
# DESCRIPTION
#
# Search for an installed libpcre2-8 library. If nothing was specified
# when calling configure, it searches first in /usr/local and then in
# /usr, /opt/local and /sw. If the --with-pcre2=DIR is specified, it will
# try to find it in DIR/include/pcre2.h and DIR/lib/libpcre2-8. If
# --without-pcre2 is specified, the library is not searched at all.
#
# If 'bits' is empty or '8', PCRE2 8-bit character support is checked
# only. If 'bits' contains '16', PCRE2 8-bit and 16-bit character support
# are checked. If 'bits' contains '32', PCRE2 8-bit and 32-bit character
# support are checked. When 'bits' contains both '16' and '32', PCRE2
# 8-bit, 16-bit, and 32-bit character support is checked.
#
# If either the header file (pcre2.h), or the library (libpcre2-8) is not
# found, or the specified PCRE2 character bit width is not supported,
# shell commands 'action-if-not-found' is run. If 'action-if-not-found' is
# not specified, the configuration exits on error, asking for a valid
# PCRE2 installation directory or --without-pcre2.
#
# If both header file and library are found, and the specified PCRE2 bit
# widths are supported, shell commands 'action-if-found' is run. If
# 'action-if-found' is not specified, the default action appends
# '-I${PCRE2_HOME}/include' to CPFLAGS, appends '-L$PCRE2_HOME}/lib' to
# LDFLAGS, prepends '-lpcre2-8' to LIBS, and calls AC_DEFINE(HAVE_PCRE2).
# You should use autoheader to include a definition for this symbol in a
# config.h file. Sample usage in a C/C++ source is as follows:
#
# #ifdef HAVE_PCRE2
# #define PCRE2_CODE_UNIT_WIDTH 8
# #include <pcre2.h>
# #endif /* HAVE_PCRE2 */
#
# LICENSE
#
# Copyright (c) 2020 Robert van Engelen <engelen@acm.org>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 2
AC_DEFUN([AX_CHECK_PCRE2],
#
# Handle user hints
#
[AC_MSG_CHECKING(if PCRE2 is wanted)
pcre2_places="/usr/local /usr /opt/local /sw"
AC_ARG_WITH([pcre2],
[ --with-pcre2=DIR root directory path of PCRE2 installation @<:@defaults to
/usr/local or /usr if not found in /usr/local@:>@
--without-pcre2 to disable PCRE2 usage completely],
[if test "$withval" != "no" ; then
AC_MSG_RESULT(yes)
if test -d "$withval"
then
pcre2_places="$withval $pcre2_places"
else
AC_MSG_WARN([Sorry, $withval does not exist, checking usual places])
fi
else
pcre2_places=""
AC_MSG_RESULT(no)
fi],
[AC_MSG_RESULT(yes)])
#
# Locate PCRE2, if wanted
#
if test -n "${pcre2_places}"
then
# check the user supplied or any other more or less 'standard' place:
# Most UNIX systems : /usr/local and /usr
# MacPorts / Fink on OSX : /opt/local respectively /sw
for PCRE2_HOME in ${pcre2_places} ; do
if test -f "${PCRE2_HOME}/include/pcre2.h"; then break; fi
PCRE2_HOME=""
done
PCRE2_OLD_LDFLAGS=$LDFLAGS
PCRE2_OLD_CPPFLAGS=$CPPFLAGS
if test -n "${PCRE2_HOME}"; then
LDFLAGS="$LDFLAGS -L${PCRE2_HOME}/lib"
CPPFLAGS="$CPPFLAGS -I${PCRE2_HOME}/include"
fi
AC_LANG_PUSH([C])
AC_CHECK_LIB([pcre2-8], [pcre2_compile_8], [pcre2_cv_libpcre2=yes], [pcre2_cv_libpcre2=no])
AC_CHECK_HEADER([pcre2.h], [pcre2_cv_pcre2_h=yes], [pcre2_cv_pcre2_h=no], [#define PCRE2_CODE_UNIT_WIDTH 8])
case "$1" in
*16*)
AC_CHECK_LIB([pcre2-16], [pcre2_compile_16], [pcre2_cv_libpcre2_16=yes], [pcre2_cv_libpcre2_16=no])
AC_CHECK_HEADER([pcre2.h], [pcre2_cv_pcre2_16_h=yes], [pcre2_cv_pcre2_16_h=no], [#define PCRE2_CODE_UNIT_WIDTH 16])
if test "$pcre2_cv_libpcre2_16" = "no" || test "$pcre2_cv_pcre2_16_h" = "no"; then
pcre2_cv_libpcre2=no
fi
;;
esac
case "$1" in
*32*)
AC_CHECK_LIB([pcre2-32], [pcre2_compile_32], [pcre2_cv_libpcre2_32=yes], [pcre2_cv_libpcre2_32=no])
AC_CHECK_HEADER([pcre2.h], [pcre2_cv_pcre2_32_h=yes], [pcre2_cv_pcre2_32_h=no], [#define PCRE2_CODE_UNIT_WIDTH 32])
if test "$pcre2_cv_libpcre2_32" = "no" || test "$pcre2_cv_pcre2_32_h" = "no"; then
pcre2_cv_libpcre2=no
fi
esac
AC_LANG_POP([C])
if test "$pcre2_cv_libpcre2" = "yes" && test "$pcre2_cv_pcre2_h" = "yes"
then
#
# If both library and header were found, action-if-found
#
m4_ifblank([$2],[
CPPFLAGS="$CPPFLAGS -I${PCRE2_HOME}/include"
LDFLAGS="$LDFLAGS -L${PCRE2_HOME}/lib"
LIBS="-lpcre2-8 $LIBS"
AC_DEFINE([HAVE_PCRE2], [1],
[Define to 1 if you have `PCRE2' library (-lpcre2-$1)])
],[
# Restore variables
LDFLAGS="$PCRE2_OLD_LDFLAGS"
CPPFLAGS="$PCRE2_OLD_CPPFLAGS"
$2
])
else
#
# If either header or library was not found, action-if-not-found
#
m4_default([$3],[
AC_MSG_ERROR([either specify a valid PCRE2 installation with --with-pcre2=DIR or disable PCRE2 usage with --without-pcre2])
])
fi
fi
])

@ -1,74 +0,0 @@
dnl
dnl Copyright (c) 2007-2015, Timothy Stack
dnl
dnl All rights reserved.
dnl
dnl Redistribution and use in source and binary forms, with or without
dnl modification, are permitted provided that the following conditions are met:
dnl
dnl dnl Redistributions of source code must retain the above copyright notice, this
dnl list of conditions and the following disclaimer.
dnl dnl Redistributions in binary form must reproduce the above copyright notice,
dnl this list of conditions and the following disclaimer in the documentation
dnl and/or other materials provided with the distribution.
dnl dnl Neither the name of Timothy Stack nor the names of its contributors
dnl may be used to endorse or promote products derived from this software
dnl without specific prior written permission.
dnl
dnl THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
dnl EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
dnl WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
dnl DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
dnl DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
dnl (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
dnl LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
dnl ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
dnl (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
dnl SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
dnl
dnl @file lnav_with_pcre.m4
dnl
AC_DEFUN([AX_PATH_LIB_PCRE],[dnl
AC_MSG_CHECKING([lib pcre])
AC_ARG_WITH(pcre,
[ --with-pcre[[=prefix]]],,
with_pcre="yes")
if test ".$with_pcre" = ".no" ; then
AC_MSG_RESULT([disabled])
m4_ifval($2,$2)
else
AC_MSG_RESULT([(testing)])
AS_VAR_SET(saved_LIBS, $LIBS)
if test ".$with_pcre" = "." && test "$ac_cv_lib_pcre_pcre_study" = "yes" ; then
PCRE_LIBS="-lpcre"
AC_MSG_CHECKING([lib pcre])
AC_CHECK_LIB(pcre, pcre_study)
AC_CHECK_HEADERS(pcre.h pcre/pcre.h)
AC_MSG_RESULT([$PCRE_LIBS])
m4_ifval($1,$1)
else
OLDLDFLAGS="$LDFLAGS" ; LDFLAGS="$LDFLAGS -L$with_pcre/lib"
OLDCPPFLAGS="$CPPFLAGS" ; CPPFLAGS="$CPPFLAGS -I$with_pcre/include"
AC_CHECK_LIB(pcre, pcre_compile)
AC_CHECK_HEADERS(pcre.h pcre/pcre.h)
CPPFLAGS="$OLDCPPFLAGS"
LDFLAGS="$OLDLDFLAGS"
if test "$ac_cv_lib_pcre_pcre_compile" = "yes" ; then
AC_MSG_RESULT(.setting PCRE_LIBS -L$with_pcre/lib -lpcre)
PCRE_LDFLAGS="-L$with_pcre/lib"
PCRE_LIBS="-lpcre"
test -d "$with_pcre/include" && PCRE_CFLAGS="-I$with_pcre/include"
AC_MSG_CHECKING([lib pcre])
AC_MSG_RESULT([$PCRE_LIBS])
m4_ifval($1,$1)
else
AC_MSG_CHECKING([lib pcre])
AC_MSG_RESULT([[no, (WARNING)]])
m4_ifval($2,$2)
fi
fi
fi
AC_SUBST([PCRE_LIBS])
AC_SUBST([PCRE_CFLAGS])
])

@ -492,6 +492,7 @@ add_library(
log_gutter_source.hh
log_level.hh
log_search_table.hh
log_search_table_fwd.hh
logfile_sub_source.cfg.hh
logfile.hh
logfile_fwd.hh

@ -239,6 +239,7 @@ noinst_HEADERS = \
log_level.hh \
log_level_re.re \
log_search_table.hh \
log_search_table_fwd.hh \
logfile.hh \
logfile.cfg.hh \
logfile_fwd.hh \

@ -29,8 +29,8 @@
#include "all_logs_vtab.hh"
#include "config.h"
#include "base/attr_line.hh"
#include "config.h"
static auto intern_lifetime = intern_string::get_table_lifetime();
@ -65,7 +65,7 @@ all_logs_vtab::extract(logfile* lf,
logline_value_vector& values)
{
auto& line = values.lvv_sbr;
auto format = lf->get_format_ptr();
auto* format = lf->get_format_ptr();
logline_value_vector sub_values;
@ -79,7 +79,8 @@ all_logs_vtab::extract(logfile* lf,
body.lr_end = line.length();
}
data_scanner ds(line, body.lr_start, body.lr_end);
data_scanner ds(
line.to_string_fragment().sub_range(body.lr_start, body.lr_end));
data_parser dp(&ds);
std::string str;

@ -46,6 +46,7 @@ add_library(
isc.hh
itertools.hh
lnav.console.hh
lnav.console.into.hh
log_level_enum.hh
lrucache.hpp
math_util.hh
@ -63,7 +64,7 @@ add_library(
target_include_directories(base PUBLIC . .. ../third-party
${CMAKE_CURRENT_BINARY_DIR}/..)
target_link_libraries(base cppfmt cppscnlib pcre::libpcre ncurses::libcurses pthread)
target_link_libraries(base cppfmt cppscnlib pcrepp ncurses::libcurses pthread)
add_executable(
test_base

@ -45,6 +45,7 @@ noinst_HEADERS = \
itertools.hh \
lnav_log.hh \
lnav.console.hh \
lnav.console.into.hh \
lnav.gzip.hh \
log_level_enum.hh \
lrucache.hpp \

@ -35,15 +35,15 @@
#include "base/opt_util.hh"
#include "config.h"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "scn/scn.h"
#include "view_curses.hh"
static const pcrepp&
static const lnav::pcre2pp::code&
ansi_regex()
{
static const pcrepp retval("\x1b\\[([\\d=;\\?]*)([a-zA-Z])|(?:\\X\x08\\X)+",
PCRE_UTF8);
static const auto retval = lnav::pcre2pp::code::from_const(
"\x1b\\[([\\d=;\\?]*)([a-zA-Z])|(?:\\X\x08\\X)+");
return retval;
}
@ -51,16 +51,25 @@ ansi_regex()
void
scrub_ansi_string(std::string& str, string_attrs_t* sa)
{
pcre_context_static<60> context;
const auto& regex = ansi_regex();
pcre_input pi(str);
auto md = regex.create_match_data();
int64_t origin_offset = 0;
int last_origin_offset_end = 0;
replace(str.begin(), str.end(), '\0', ' ');
while (regex.match(context, pi, PCRE_NO_UTF8_CHECK)) {
auto* caps = context.all();
const auto sf = pi.get_string_fragment(caps);
auto matcher = regex.capture_from(str).into(md);
while (true) {
auto match_res = matcher.matches(PCRE2_NO_UTF_CHECK);
if (match_res.is<lnav::pcre2pp::matcher::not_found>()) {
break;
}
if (match_res.is<lnav::pcre2pp::matcher::error>()) {
log_error("ansi scrub regex failure");
break;
}
const auto sf = md[0].value();
auto bs_index_res = sf.codepoint_to_byte_index(1);
if (sf.length() >= 3 && bs_index_res.isOk()
@ -139,7 +148,7 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
*sa, caps->c_begin + sf.length() / 3, -erased_size);
#endif
sa->emplace_back(line_range{last_origin_offset_end,
caps->c_begin + (int) output_size},
sf.sf_begin + (int) output_size},
SA_ORIGIN_OFFSET.value(origin_offset));
}
@ -154,27 +163,28 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
bold_range.clear();
}
str.erase(str.begin() + fill_index, str.begin() + caps->c_end);
last_origin_offset_end = caps->c_begin + output_size;
str.erase(str.begin() + fill_index, str.begin() + sf.sf_end);
last_origin_offset_end = sf.sf_begin + output_size;
origin_offset += erased_size;
pi.reset(str);
pi.pi_next_offset = last_origin_offset_end;
matcher.reload_input(str, last_origin_offset_end);
continue;
}
auto seq = md[1].value();
auto terminator = md[2].value();
struct line_range lr;
bool has_attrs = false;
text_attrs attrs;
auto role = nonstd::optional<role_t>();
size_t lpc;
switch (pi.get_substr_start(&caps[2])[0]) {
switch (terminator[0]) {
case 'm':
for (lpc = caps[1].c_begin;
lpc != std::string::npos && lpc < (size_t) caps[1].c_end;)
for (lpc = seq.sf_begin;
lpc != std::string::npos && lpc < (size_t) seq.sf_end;)
{
auto ansi_code_res = scn::scan_value<int>(
scn::string_view{&str[lpc], &str[caps[1].c_end]});
scn::string_view{&str[lpc], &str[seq.sf_end]});
if (ansi_code_res) {
auto ansi_code = ansi_code_res.value();
@ -215,11 +225,11 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
break;
case 'C': {
auto spaces_res = scn::scan_value<unsigned int>(
pi.to_string_view(&caps[1]));
auto spaces_res
= scn::scan_value<unsigned int>(seq.to_string_view());
if (spaces_res && spaces_res.value() > 0) {
str.insert((std::string::size_type) caps[0].c_end,
str.insert((std::string::size_type) sf.sf_end,
spaces_res.value(),
' ');
}
@ -229,13 +239,13 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
case 'H': {
unsigned int row = 0, spaces = 0;
if (scn::scan(pi.to_string_view(&caps[1]), "{};{}", row, spaces)
if (scn::scan(seq.to_string_view(), "{};{}", row, spaces)
&& spaces > 1)
{
int ispaces = spaces - 1;
if (ispaces > caps[0].c_begin) {
str.insert((unsigned long) caps[0].c_end,
ispaces - caps[0].c_begin,
if (ispaces > sf.sf_begin) {
str.insert((unsigned long) sf.sf_end,
ispaces - sf.sf_begin,
' ');
}
}
@ -243,8 +253,7 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
}
case 'O': {
auto role_res
= scn::scan_value<int>(pi.to_string_view(&caps[1]));
auto role_res = scn::scan_value<int>(seq.to_string_view());
if (role_res) {
role_t role_tmp = (role_t) role_res.value();
@ -258,18 +267,18 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
break;
}
}
str.erase(str.begin() + caps[0].c_begin, str.begin() + caps[0].c_end);
str.erase(str.begin() + sf.sf_begin, str.begin() + sf.sf_end);
if (sa != nullptr) {
shift_string_attrs(*sa, caps[0].c_begin, -caps[0].length());
shift_string_attrs(*sa, sf.sf_begin, -sf.length());
if (has_attrs) {
for (auto rit = sa->rbegin(); rit != sa->rend(); rit++) {
if (rit->sa_range.lr_end != -1) {
continue;
}
rit->sa_range.lr_end = caps[0].c_begin;
rit->sa_range.lr_end = sf.sf_begin;
}
lr.lr_start = caps[0].c_begin;
lr.lr_start = sf.sf_begin;
lr.lr_end = -1;
if (attrs.ta_attrs || attrs.ta_fg_color || attrs.ta_bg_color) {
sa->emplace_back(lr, VC_STYLE.value(attrs));
@ -278,14 +287,13 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
sa->emplace_back(lr, VC_ROLE.value(r));
};
}
sa->emplace_back(line_range{last_origin_offset_end, caps->c_begin},
sa->emplace_back(line_range{last_origin_offset_end, sf.sf_begin},
SA_ORIGIN_OFFSET.value(origin_offset));
last_origin_offset_end = caps->c_begin;
origin_offset += caps->length();
last_origin_offset_end = sf.sf_begin;
origin_offset += sf.length();
}
pi.reset(str);
pi.pi_next_offset = caps->c_begin;
matcher.reload_input(str, sf.sf_begin);
}
if (sa != nullptr && last_origin_offset_end > 0) {

@ -37,7 +37,7 @@
#include "auto_mem.hh"
#include "config.h"
#include "lnav_log.hh"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
attr_line_t&
attr_line_t::with_ansi_string(const char* str, ...)
@ -91,18 +91,19 @@ using chunk = mapbox::util::variant<word, space, corrupt, eof>;
chunk
consume(const string_fragment text)
{
static const pcrepp WORD_RE(R"((*UTF)^[^\p{Z}\p{So}\p{C}]+)");
static const pcrepp SPACE_RE(R"((*UTF)^\s)");
static const auto WORD_RE
= lnav::pcre2pp::code::from_const(R"((*UTF)^[^\p{Z}\p{So}\p{C}]+)");
static const auto SPACE_RE
= lnav::pcre2pp::code::from_const(R"((*UTF)^\s)");
if (text.empty()) {
return eof{text};
}
pcre_input pi(text);
pcre_context_static<30> pc;
if (WORD_RE.match(pc, pi, PCRE_NO_UTF8_CHECK)) {
auto split_res = text.split_n(pc.all()->length()).value();
auto word_find_res
= WORD_RE.find_in(text, PCRE2_NO_UTF_CHECK).ignore_error();
if (word_find_res) {
auto split_res = text.split_n(word_find_res->f_all.length()).value();
return word{split_res.first, split_res.second};
}
@ -113,8 +114,10 @@ consume(const string_fragment text)
return space{split_res.first, split_res.second};
}
if (SPACE_RE.match(pc, pi, PCRE_NO_UTF8_CHECK)) {
auto split_res = text.split_n(pc.all()->length()).value();
auto space_find_res
= SPACE_RE.find_in(text, PCRE2_NO_UTF_CHECK).ignore_error();
if (space_find_res) {
auto split_res = text.split_n(space_find_res->f_all.length()).value();
return space{split_res.first, split_res.second};
}
@ -184,8 +187,6 @@ attr_line_t::insert(size_t index,
return *this;
}
static const pcrepp SPACE_RE(R"(\s?)");
auto starting_line_index = this->al_string.rfind('\n', index);
if (starting_line_index == std::string::npos) {
starting_line_index = 0;

@ -70,7 +70,8 @@ struct line_range {
bool empty() const { return this->length() == 0; }
void clear() {
void clear()
{
this->lr_start = -1;
this->lr_end = -1;
}
@ -163,6 +164,12 @@ struct line_range {
}
};
inline line_range
to_line_range(const string_fragment& frag)
{
return line_range{frag.sf_begin, frag.sf_end};
}
struct string_attr {
string_attr(const struct line_range& lr, const string_attr_pair& value)
: sa_range(lr), sa_type(value.first), sa_value(value.second)

@ -86,6 +86,8 @@ public:
~auto_mem() { this->reset(); }
bool empty() const { return this->am_ptr == nullptr; }
operator T*() const { return this->am_ptr; }
T* operator->() { return this->am_ptr; }

@ -30,39 +30,44 @@
#include "humanize.network.hh"
#include "config.h"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
namespace humanize {
namespace network {
namespace path {
nonstd::optional<::network::path>
from_str(const char* str)
from_str(string_fragment sf)
{
static const pcrepp REMOTE_PATTERN(
"(?:(?<username>[\\w\\._\\-]+)@)?"
static const auto REMOTE_PATTERN = lnav::pcre2pp::code::from_const(
"^(?:(?<username>[\\w\\._\\-]+)@)?"
"(?:\\[(?<ipv6>[^\\]]+)\\]|(?<hostname>[^\\[/:]+)):"
"(?<path>.*)");
"(?<path>.*)$");
static thread_local auto REMOTE_MATCH_DATA
= REMOTE_PATTERN.create_match_data();
pcre_context_static<30> pc;
pcre_input pi(str);
auto match_res = REMOTE_PATTERN.capture_from(sf)
.into(REMOTE_MATCH_DATA)
.matches()
.ignore_error();
if (!REMOTE_PATTERN.match(pc, pi)) {
if (!match_res) {
return nonstd::nullopt;
}
const auto username = pi.get_substr_opt(pc["username"]);
const auto ipv6 = pi.get_substr_opt(pc["ipv6"]);
const auto hostname = pi.get_substr_opt(pc["hostname"]);
const auto username = REMOTE_MATCH_DATA["username"].map(
[](auto sf) { return sf.to_string(); });
const auto ipv6 = REMOTE_MATCH_DATA["ipv6"];
const auto hostname = REMOTE_MATCH_DATA["hostname"];
const auto locality_hostname = ipv6 ? ipv6.value() : hostname.value();
auto path = pi.get_substr(pc["path"]);
auto path = *REMOTE_MATCH_DATA["path"];
if (path.empty()) {
path = ".";
path = string_fragment::from_const(".");
}
return ::network::path{
{username, locality_hostname, nonstd::nullopt},
path,
{username, locality_hostname.to_string(), nonstd::nullopt},
path.to_string(),
};
}

@ -33,6 +33,7 @@
#include <string>
#include "fmt/format.h"
#include "intern_string.hh"
#include "network.tcp.hh"
#include "optional.hpp"
@ -99,13 +100,7 @@ namespace humanize {
namespace network {
namespace path {
nonstd::optional<::network::path> from_str(const char* str);
inline nonstd::optional<::network::path>
from_str(const std::string& str)
{
return from_str(str.c_str());
}
nonstd::optional<::network::path> from_str(string_fragment sf);
} // namespace path
} // namespace network

@ -36,17 +36,19 @@
TEST_CASE("humanize::network::path")
{
{
auto rp_opt = humanize::network::path::from_str("foobar");
auto rp_opt = humanize::network::path::from_str(
string_fragment::from_const("foobar"));
CHECK(!rp_opt);
}
{
auto rp_opt = humanize::network::path::from_str("dean@foobar/bar");
auto rp_opt = humanize::network::path::from_str(
string_fragment::from_const("dean@foobar/bar"));
CHECK(!rp_opt);
}
{
auto rp_opt = humanize::network::path::from_str(
"dean@host1.example.com:/var/log");
string_fragment::from_const("dean@host1.example.com:/var/log"));
CHECK(rp_opt.has_value());
auto rp = *rp_opt;
@ -58,8 +60,9 @@ TEST_CASE("humanize::network::path")
}
{
auto rp_opt = humanize::network::path::from_str(
"dean@[fe80::184f:c67:baf1:fe02%en0]:/var/log");
auto rp_opt
= humanize::network::path::from_str(string_fragment::from_const(
"dean@[fe80::184f:c67:baf1:fe02%en0]:/var/log"));
CHECK(rp_opt.has_value());
auto rp = *rp_opt;
@ -74,8 +77,9 @@ TEST_CASE("humanize::network::path")
}
{
auto rp_opt = humanize::network::path::from_str(
"[fe80::184f:c67:baf1:fe02%en0]:/var/log");
auto rp_opt
= humanize::network::path::from_str(string_fragment::from_const(
"[fe80::184f:c67:baf1:fe02%en0]:/var/log"));
CHECK(rp_opt.has_value());
auto rp = *rp_opt;
@ -89,8 +93,8 @@ TEST_CASE("humanize::network::path")
}
{
auto rp_opt
= humanize::network::path::from_str("host1.example.com:/var/log");
auto rp_opt = humanize::network::path::from_str(
string_fragment::from_const("host1.example.com:/var/log"));
CHECK(rp_opt.has_value());
auto rp = *rp_opt;
@ -101,7 +105,8 @@ TEST_CASE("humanize::network::path")
}
{
auto rp_opt = humanize::network::path::from_str("host1.example.com:");
auto rp_opt = humanize::network::path::from_str(
string_fragment::from_const("host1.example.com:"));
CHECK(rp_opt.has_value());
auto rp = *rp_opt;

@ -32,6 +32,7 @@
#ifndef intern_string_hh
#define intern_string_hh
#include <ostream>
#include <string>
#include <vector>
@ -48,9 +49,17 @@
struct string_fragment {
using iterator = const char*;
static string_fragment invalid()
{
string_fragment retval;
retval.invalidate();
return retval;
}
static string_fragment from_c_str(const char* str)
{
return string_fragment{str, 0, (int) strlen(str)};
return string_fragment{str, 0, str != nullptr ? (int) strlen(str) : 0};
}
template<typename T, std::size_t N>
@ -130,6 +139,11 @@ struct string_fragment {
const char* data() const { return &this->sf_string[this->sf_begin]; }
const unsigned char* udata() const
{
return (const unsigned char*) &this->sf_string[this->sf_begin];
}
char front() const { return this->sf_string[this->sf_begin]; }
uint32_t front_codepoint() const
@ -252,6 +266,12 @@ struct string_fragment {
this->sf_string, this->sf_begin + begin, this->sf_end};
}
string_fragment sub_range(int begin, int end) const
{
return string_fragment{
this->sf_string, this->sf_begin + begin, this->sf_begin + end};
}
nonstd::optional<size_t> find(char ch) const
{
for (int lpc = this->sf_begin; lpc < this->sf_end; lpc++) {
@ -521,12 +541,25 @@ operator<(const char* left, const string_fragment& right)
return rc < 0;
}
inline void
operator+=(std::string& left, const string_fragment& right)
{
left.append(right.data(), right.length());
}
inline bool
operator<(const string_fragment& left, const char* right)
{
return strncmp(left.data(), right, left.length()) < 0;
}
inline std::ostream&
operator<<(std::ostream& os, const string_fragment& sf)
{
os.write(sf.data(), sf.length());
return os;
}
class intern_string {
public:
static const intern_string* lookup(const char* str, ssize_t len) noexcept;

@ -34,7 +34,10 @@
#include "config.h"
#include "fmt/color.h"
#include "itertools.hh"
#include "lnav.console.into.hh"
#include "log_level_enum.hh"
#include "pcrepp/pcre2pp.hh"
#include "snippet_highlighters.hh"
#include "view_curses.hh"
using namespace lnav::roles::literals;
@ -462,5 +465,30 @@ print(FILE* file, const user_message& um)
println(file, al);
}
user_message
to_user_message(intern_string_t src, const lnav::pcre2pp::compile_error& ce)
{
attr_line_t pcre_error_content{ce.ce_pattern};
lnav::snippets::regex_highlighter(pcre_error_content,
pcre_error_content.length(),
line_range{
0,
(int) pcre_error_content.length(),
});
pcre_error_content.append("\n")
.append(ce.ce_offset, ' ')
.append(lnav::roles::error("^ "))
.append(lnav::roles::error(ce.get_message()))
.with_attr_for_all(VC_ROLE.value(role_t::VCR_QUOTED_CODE));
return user_message::error(
attr_line_t()
.append_quoted(ce.ce_pattern)
.append(" is not a valid regular expression"))
.with_reason(ce.get_message())
.with_snippet(lnav::console::snippet::from(src, pcre_error_content));
}
} // namespace console
} // namespace lnav

@ -0,0 +1,51 @@
/**
* Copyright (c) 2022, Timothy Stack
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef lnav_console_into_hh
#define lnav_console_into_hh
#include "intern_string.hh"
#include "lnav.console.hh"
namespace lnav {
namespace pcre2pp {
struct compile_error;
}
namespace console {
user_message to_user_message(intern_string_t src,
const pcre2pp::compile_error& ce);
}
} // namespace lnav
#endif

@ -56,20 +56,14 @@
#include <thread>
#include <vector>
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#ifdef HAVE_PCRE_H
# include <pcre.h>
#elif HAVE_PCRE_PCRE_H
# include <pcre/pcre.h>
#else
# error "pcre.h not found?"
#endif
#if defined HAVE_NCURSESW_CURSES_H
# include <ncursesw/curses.h>
# include <ncursesw/termcap.h>
@ -215,14 +209,14 @@ void
log_host_info()
{
char cwd[MAXPATHLEN];
const char* jittarget;
char jittarget[128];
struct utsname un;
struct rusage ru;
int pcre_jit;
uint32_t pcre_jit;
uname(&un);
pcre_config(PCRE_CONFIG_JIT, &pcre_jit);
pcre_config(PCRE_CONFIG_JITTARGET, &jittarget);
pcre2_config(PCRE2_CONFIG_JIT, &pcre_jit);
pcre2_config(PCRE2_CONFIG_JITTARGET, jittarget);
log_info("uname:");
log_info(" sysname=%s", un.sysname);

@ -30,7 +30,7 @@
#include "snippet_highlighters.hh"
#include "attr_line.builder.hh"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "view_curses.hh"
namespace lnav {
@ -225,21 +225,24 @@ regex_highlighter(attr_line_t& al, int x, line_range sub)
break;
}
case '>': {
static const pcrepp CAP_RE(R"(\(\?\<\w+$)");
static const auto CAP_RE
= lnav::pcre2pp::code::from_const(R"(\(\?\<\w+$)");
auto capture_start
= string_fragment::from_str_range(
line, sub.lr_start, lpc)
.find_left_boundary(lpc - sub.lr_start - 1,
string_fragment::tag1{'('});
pcre_context_static<30> pc;
pcre_input pi(capture_start);
if (CAP_RE.match(pc, pi)) {
auto cap_find_res
= CAP_RE.find_in(capture_start).ignore_error();
if (cap_find_res) {
alb.overlay_attr(
line_range(
capture_start.sf_begin + pc.all()->c_begin + 3,
capture_start.sf_begin + pc.all()->c_end),
line_range(capture_start.sf_begin
+ cap_find_res->f_all.sf_begin + 3,
capture_start.sf_begin
+ cap_find_res->f_all.sf_end),
VC_ROLE.value(role_t::VCR_IDENTIFIER));
alb.overlay_attr(line_range(lpc, lpc + 1),
VC_ROLE.value(role_t::VCR_RE_SPECIAL));

@ -41,10 +41,10 @@ data_format data_parser::FORMAT_PLAIN("plain", DT_INVALID, DT_INVALID);
data_parser::data_parser(data_scanner* ds)
: dp_errors("dp_errors", __FILE__, __LINE__),
dp_pairs("dp_pairs", __FILE__, __LINE__), dp_msg_format(nullptr),
dp_msg_format_begin(ds->get_input().pi_offset), dp_scanner(ds)
dp_msg_format_begin(ds->get_init_offset()), dp_scanner(ds)
{
if (TRACE_FILE != nullptr) {
fprintf(TRACE_FILE, "input %s\n", ds->get_input().get_string());
fprintf(TRACE_FILE, "input %s\n", ds->get_input().to_string().c_str());
}
}
@ -110,7 +110,8 @@ data_parser::pairup(data_parser::schema_id_t* schema,
key_comps.POP_FRONT();
found = true;
} else if (key_iter->e_token
== in_list.el_format.df_terminator) {
== in_list.el_format.df_terminator)
{
std::vector<element> key_copy;
value.SPLICE(value.end(),
@ -259,17 +260,18 @@ data_parser::pairup(data_parser::schema_id_t* schema,
if (!has_value) {
element_list_t ELEMENT_LIST_T(blank_value);
pcre_input& pi = this->dp_scanner->get_input();
const char* str = pi.get_string();
struct element blank;
blank.e_token = DT_QUOTED_STRING;
blank.e_capture.c_begin = blank.e_capture.c_end
= pair_subs.front().e_capture.c_end;
if ((blank.e_capture.c_begin >= 0)
&& ((size_t) blank.e_capture.c_begin < pi.pi_length))
if (blank.e_capture.c_begin >= 0
&& blank.e_capture.c_begin
< this->dp_scanner->get_input().sf_end)
{
switch (str[blank.e_capture.c_begin]) {
switch (this->dp_scanner->to_string_fragment(blank.e_capture)
.front())
{
case '=':
case ':':
blank.e_capture.c_begin += 1;
@ -387,23 +389,23 @@ data_parser::pairup(data_parser::schema_id_t* schema,
}
if (schema != nullptr && this->dp_msg_format != nullptr) {
pcre_input& pi = this->dp_scanner->get_input();
for (auto& fiter : pairs_out) {
*(this->dp_msg_format) += this->get_string_up_to_value(fiter);
this->dp_msg_format->append("#");
}
if ((size_t) this->dp_msg_format_begin < pi.pi_length) {
const char* str = pi.get_string();
pcre_context::capture_t last(this->dp_msg_format_begin,
pi.pi_length);
if ((size_t) this->dp_msg_format_begin
< this->dp_scanner->get_input().length())
{
auto last = this->dp_scanner->get_input().substr(
this->dp_msg_format_begin);
switch (str[last.c_begin]) {
switch (last.front()) {
case '\'':
case '"':
last.c_begin += 1;
last.sf_begin += 1;
break;
}
*(this->dp_msg_format) += pi.get_substr(&last);
*(this->dp_msg_format) += last.to_string();
}
}
@ -415,21 +417,20 @@ data_parser::pairup(data_parser::schema_id_t* schema,
void
data_parser::discover_format()
{
pcre_context_static<30> pc;
std::stack<discover_format_state> state_stack;
struct element elem;
this->dp_group_token.push_back(DT_INVALID);
this->dp_group_stack.resize(1);
state_stack.push(discover_format_state());
while (this->dp_scanner->tokenize2(pc, elem.e_token)) {
pcre_context::iterator pc_iter;
pc_iter = std::find_if(pc.begin(), pc.end(), capture_if_not(-1));
require(pc_iter != pc.end());
while (true) {
auto tok_res = this->dp_scanner->tokenize2();
if (!tok_res) {
break;
}
elem.e_capture = *pc_iter;
element elem;
elem.e_token = tok_res->tr_token;
elem.e_capture = tok_res->tr_inner_capture;
require(elem.e_capture.c_begin >= 0);
require(elem.e_capture.c_end >= 0);
@ -598,22 +599,19 @@ data_parser::parse()
std::string
data_parser::get_element_string(const data_parser::element& elem) const
{
pcre_input& pi = this->dp_scanner->get_input();
return pi.get_substr(&elem.e_capture);
return this->dp_scanner->to_string_fragment(elem.e_capture).to_string();
}
std::string
data_parser::get_string_up_to_value(const data_parser::element& elem)
{
pcre_input& pi = this->dp_scanner->get_input();
const element& val_elem
= elem.e_token == DNT_PAIR ? elem.e_sub_elements->back() : elem;
if (this->dp_msg_format_begin <= val_elem.e_capture.c_begin) {
pcre_context::capture_t leading_and_key = pcre_context::capture_t(
auto leading_and_key = data_scanner::capture_t(
this->dp_msg_format_begin, val_elem.e_capture.c_begin);
const char* str = pi.get_string();
auto str = this->dp_scanner->get_input().data();
if (leading_and_key.length() >= 2) {
switch (str[leading_and_key.c_end - 1]) {
case '\'':
@ -635,7 +633,8 @@ data_parser::get_string_up_to_value(const data_parser::element& elem)
}
}
this->dp_msg_format_begin = val_elem.e_capture.c_end;
return pi.get_substr(&leading_and_key);
return this->dp_scanner->to_string_fragment(leading_and_key)
.to_string();
} else {
this->dp_msg_format_begin = val_elem.e_capture.c_end;
}
@ -646,19 +645,18 @@ const char*
data_parser::get_element_string(const data_parser::element& elem,
size_t& len_out)
{
pcre_input& pi = this->dp_scanner->get_input();
len_out = elem.e_capture.length();
return pi.get_substr_start(&elem.e_capture);
return this->dp_scanner->to_string_fragment(elem.e_capture).data();
}
void
data_parser::print(FILE* out, data_parser::element_list_t& el)
{
fprintf(
out, " %s\n", this->dp_scanner->get_input().get_string());
fprintf(out,
" %s\n",
this->dp_scanner->get_input().to_string().c_str());
for (auto& iter : el) {
iter.print(out, this->dp_scanner->get_input());
iter.print(out, *this->dp_scanner);
}
}
@ -939,7 +937,8 @@ data_parser::element::value_token() const
if (this->e_token == DNT_VALUE) {
if (this->e_sub_elements != nullptr
&& this->e_sub_elements->size() == 1) {
&& this->e_sub_elements->size() == 1)
{
retval = this->e_sub_elements->front().e_token;
} else {
retval = DT_SYMBOL;
@ -955,7 +954,8 @@ data_parser::element::get_value_elem() const
{
if (this->e_token == DNT_VALUE) {
if (this->e_sub_elements != nullptr
&& this->e_sub_elements->size() == 1) {
&& this->e_sub_elements->size() == 1)
{
return this->e_sub_elements->front();
}
}
@ -972,13 +972,13 @@ data_parser::element::get_pair_elem() const
}
void
data_parser::element::print(FILE* out, pcre_input& pi, int offset) const
data_parser::element::print(FILE* out, data_scanner& ds, int offset) const
{
int lpc;
if (this->e_sub_elements != nullptr) {
for (auto& e_sub_element : *this->e_sub_elements) {
e_sub_element.print(out, pi, offset + 1);
e_sub_element.print(out, ds, offset + 1);
}
}
@ -998,11 +998,11 @@ data_parser::element::print(FILE* out, pcre_input& pi, int offset) const
fputc(' ', out);
}
}
for (; lpc < (int) pi.pi_length; lpc++) {
for (; lpc < (int) ds.get_input().length(); lpc++) {
fputc(' ', out);
}
std::string sub = pi.get_substr(&this->e_capture);
std::string sub = ds.to_string_fragment(this->e_capture).to_string();
fprintf(out, " %s\n", sub.c_str());
}

@ -40,7 +40,6 @@
#include "base/lnav_log.hh"
#include "byte_array.hh"
#include "data_scanner.hh"
#include "pcrepp/pcrepp.hh"
#define ELEMENT_LIST_T(var) var("" #var, __FILE__, __LINE__, group_depth)
#define PUSH_FRONT(elem) push_front(elem, __FILE__, __LINE__)
@ -334,9 +333,9 @@ public:
const element& get_pair_elem() const;
void print(FILE* out, pcre_input& pi, int offset = 0) const;
void print(FILE* out, data_scanner&, int offset = 0) const;
pcre_context::capture_t e_capture;
data_scanner::capture_t e_capture;
data_token_t e_token;
element_list_t* e_sub_elements;

@ -29,228 +29,165 @@
#include "data_scanner.hh"
#include <arpa/inet.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include "config.h"
#include "pcrepp/pcrepp.hh"
void
data_scanner::capture_t::ltrim(const char* str)
{
while (this->c_begin < this->c_end && isspace(str[this->c_begin])) {
this->c_begin += 1;
}
}
static struct {
const char* name;
pcrepp pcre;
} MATCHERS[DT_TERMINAL_MAX] = {
{
"quot",
pcrepp("\\A(?:(?:u|r)?\"((?:\\\\.|[^\"])+)\"|"
"(?:u|r)?'((?:\\\\.|[^'])+)')"),
},
{
"url",
pcrepp("\\A([\\w]+://[^\\s'\"\\[\\](){}]+[/a-zA-Z0-9\\-=&])"),
},
{
"path",
pcrepp("\\A((?:/|\\./|\\.\\./)[\\w\\.\\-_\\~/]*)"),
},
{
"mac",
pcrepp(
"\\A([0-9a-fA-F][0-9a-fA-F](?::[0-9a-fA-F][0-9a-fA-F]){5})(?!:)"),
},
{
"date",
pcrepp("\\A("
"\\d{4}/\\d{1,2}/\\d{1,2}|"
"\\d{4}-\\d{1,2}-\\d{1,2}|"
"\\d{2}/\\w{3}/\\d{4}"
")T?"),
},
{
"time",
pcrepp("\\A([\\s\\d]\\d:\\d\\d(?:(?!:\\d)|:\\d\\d(?:[\\.,]\\d{3,6})?Z?)"
")\\b"),
},
/* { "qual", pcrepp("\\A([^\\s:=]+:[^\\s:=,]+(?!,)(?::[^\\s:=,]+)*)"), }, */
{
"ipv6",
pcrepp("\\A(::|[:\\da-fA-F\\.]+[a-fA-F\\d](?:%\\w+)?)"),
},
{
"hexd",
pcrepp("\\A([0-9a-fA-F][0-9a-fA-F](?::[0-9a-fA-F][0-9a-fA-F])+)"),
},
{
"xmld",
pcrepp("\\A(<!\\??[\\w:]+\\s*(?:[\\w:]+(?:\\s*=\\s*"
"(?:\"((?:\\\\.|[^\"])+)\"|'((?:\\\\.|[^'])+)'|[^>]+)"
"))*\\s*>)"),
},
{
"xmlt",
pcrepp("\\A(<\\??[\\w:]+\\s*(?:[\\w:]+(?:\\s*=\\s*"
"(?:\"((?:\\\\.|[^\"])+)\"|'((?:\\\\.|[^'])+)'|[^>]+)"
"))*\\s*(?:/|\\?)>)"),
},
{
"xmlo",
pcrepp("\\A(<[\\w:]+\\s*(?:[\\w:]+(?:\\s*=\\s*"
"(?:\"((?:\\\\.|[^\"])+)\"|'((?:\\\\.|[^'])+)'|[^>]+)"
"))*\\s*>)"),
},
{
"xmlc",
pcrepp("\\A(</[\\w:]+\\s*>)"),
},
{
"h1",
pcrepp("\\A([A-Z \\-])"),
},
{
"h2",
pcrepp("\\A([A-Z \\-])"),
},
{
"h3",
pcrepp("\\A([A-Z \\-])"),
},
{
"coln",
pcrepp("\\A(:)"),
},
{
"eq",
pcrepp("\\A(=)"),
},
{
"comm",
pcrepp("\\A(,)"),
},
{
"semi",
pcrepp("\\A(;)"),
},
{
"empt",
pcrepp("\\A(\\(\\)|\\{\\}|\\[\\])"),
},
{
"lcurly",
pcrepp("\\A({)"),
},
{
"rcurly",
pcrepp("\\A(})"),
},
{
"lsquare",
pcrepp("\\A(\\[)"),
},
{
"rsquare",
pcrepp("\\A(\\])"),
},
{
"lparen",
pcrepp("\\A(\\()"),
},
{
"rparen",
pcrepp("\\A(\\))"),
},
{
"langle",
pcrepp("\\A(\\<)"),
},
{
"rangle",
pcrepp("\\A(\\>)"),
},
{
"ipv4",
pcrepp("\\A("
"(?:(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\\.){3}"
"(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])(?![\\d]))"),
},
{
"uuid",
pcrepp("\\A([0-9a-fA-F]{8}(?:-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12})"),
},
{
"vers",
pcrepp("\\A("
"[0-9]+(?:\\.[0-9]+\\w*){2,}(?:-\\w+)?|"
"[0-9]+(?:\\.[0-9]+\\w*)+(?<!\\d[eE])-\\w+?"
")\\b"),
},
{
"oct",
pcrepp("\\A(-?0[0-7]+\\b)"),
},
{
"pcnt",
pcrepp("\\A(-?[0-9]+(\\.[0-9]+)?[ ]*%\\b)"),
},
{
"num",
pcrepp("\\A(-?[0-9]+(\\.[0-9]+)?([eE][\\-+][0-9]+)?)"
"\\b(?![\\._\\-][a-zA-Z])"),
},
{
"hex",
pcrepp("\\A(-?(?:0x|[0-9])[0-9a-fA-F]+)"
"\\b(?![\\._\\-][a-zA-Z])"),
},
{
"mail",
pcrepp("\\A([a-zA-Z0-9\\._%+-]+@[a-zA-Z0-9\\.-]+\\.[a-zA-Z]+)\\b"),
},
{"cnst", pcrepp("\\A(true|True|TRUE|false|False|FALSE|None|null)\\b")},
{
"cnst",
},
{
"word",
pcrepp("\\A([a-zA-Z][a-z']+(?=[\\s\\(\\)!\\*:;'\\\"\\?,]|[\\.\\!,\\?]"
"\\s|$))"),
},
{
"sym",
pcrepp(
"\\A([^\";\\s:=,\\(\\)\\{\\}\\[\\]\\+#!@%\\^&\\*'\\?<>\\~`\\|\\\\]+"
"(?:::[^\";\\s:=,\\(\\)\\{\\}\\[\\]\\+#!@%\\^&\\*'\\?<>\\~`\\|\\\\]"
"+)*)"),
},
{
"line",
pcrepp("\\A(\r?\n|\r|;)"),
},
{
"wspc",
pcrepp("\\A([ \\r\\t\\n]+)"),
},
{
"dot",
pcrepp("\\A(\\.)"),
},
{
"escc",
pcrepp("\\A(\\\\\\.)"),
},
{
"gbg",
pcrepp("\\A(.)"),
},
};
@ -272,11 +209,12 @@ data_scanner::token2name(data_token_t token)
{
if (token < 0) {
return "inv";
} else if (token < DT_TERMINAL_MAX) {
}
if (token < DT_TERMINAL_MAX) {
return MATCHERS[token].name;
} else if (token == DT_ANY) {
}
if (token == DT_ANY) {
return "any";
} else {
return DNT_NAMES[token - DNT_KEY];
}
return DNT_NAMES[token - DNT_KEY];
}

@ -32,7 +32,7 @@
#include <string>
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "shared_buffer.hh"
enum data_token_t {
@ -118,47 +118,90 @@ class data_scanner {
public:
static const char* token2name(data_token_t token);
data_scanner(const std::string& line,
size_t off = 0,
size_t len = (size_t) -1)
: ds_line(line), ds_pcre_input(ds_line.c_str(), off, len)
struct capture_t {
capture_t()
{ /* We don't initialize anything since it's a perf hit. */
}
capture_t(int begin, int end) : c_begin(begin), c_end(end)
{
assert(begin <= end);
}
int c_begin;
int c_end;
void ltrim(const char* str);
bool contains(int pos) const
{
return this->c_begin <= pos && pos < this->c_end;
}
bool is_valid() const { return this->c_begin != -1; }
int length() const { return this->c_end - this->c_begin; }
bool empty() const { return this->c_begin == this->c_end; }
};
data_scanner(const std::string& line, size_t off = 0)
: ds_line(line), ds_input(this->ds_line), ds_init_offset(off),
ds_next_offset(off)
{
if (!line.empty() && line[line.length() - 1] == '.') {
this->ds_pcre_input.pi_length -= 1;
if (!line.empty() && line.back() == '.') {
this->ds_input.sf_end -= 1;
}
}
explicit data_scanner(string_fragment sf) : ds_pcre_input(sf)
explicit data_scanner(string_fragment sf) : ds_input(sf)
{
if (!sf.empty() && sf[sf.length() - 1] == '.') {
this->ds_pcre_input.pi_length -= 1;
if (!sf.empty() && sf.back() == '.') {
this->ds_input.sf_end -= 1;
}
}
data_scanner(shared_buffer_ref& line,
size_t off = 0,
size_t len = (size_t) -1)
: ds_sbr(line),
ds_pcre_input(
line.get_data(), off, len == (size_t) -1 ? line.length() : len)
explicit data_scanner(shared_buffer_ref& line, size_t off, size_t end)
: ds_sbr(line), ds_input(line.to_string_fragment().sub_range(0, end)),
ds_init_offset(off), ds_next_offset(off)
{
require(len == (size_t) -1 || len <= line.length());
if (line.length() > 0 && line.get_data()[line.length() - 1] == '.') {
this->ds_pcre_input.pi_length -= 1;
if (!this->ds_input.empty() && this->ds_input.back() == '.') {
this->ds_input.sf_end -= 1;
}
}
bool tokenize(pcre_context& pc, data_token_t& token_out);
bool tokenize2(pcre_context& pc, data_token_t& token_out);
struct tokenize_result {
data_token_t tr_token{DT_INVALID};
capture_t tr_capture;
capture_t tr_inner_capture;
const char* tr_data{nullptr};
std::string to_string() const
{
return {&this->tr_data[this->tr_capture.c_begin],
(size_t) this->tr_capture.length()};
}
};
nonstd::optional<tokenize_result> tokenize2();
pcre_input& get_input() { return this->ds_pcre_input; }
void reset() { this->ds_next_offset = this->ds_init_offset; }
void reset() { this->ds_pcre_input.reset_next_offset(); }
int get_init_offset() const { return this->ds_init_offset; }
string_fragment get_input() const { return this->ds_input; }
string_fragment to_string_fragment(capture_t cap) const
{
return this->ds_input.sub_range(cap.c_begin, cap.c_end);
}
private:
std::string ds_line;
shared_buffer_ref ds_sbr;
pcre_input ds_pcre_input;
string_fragment ds_input;
int ds_init_offset{0};
int ds_next_offset{0};
};
#endif

@ -1,4 +1,4 @@
/* Generated by re2c 3.0 on Mon Aug 22 22:00:24 2022 */
/* Generated by re2c 3.0 on Fri Sep 9 19:37:44 2022 */
#line 1 "../../lnav/src/data_scanner_re.re"
/**
* Copyright (c) 2015, Timothy Stack
@ -36,26 +36,28 @@
#include "config.h"
#include "data_scanner.hh"
bool data_scanner::tokenize2(pcre_context &pc, data_token_t &token_out)
nonstd::optional<data_scanner::tokenize_result> data_scanner::tokenize2()
{
data_token_t token_out = DT_INVALID;
capture_t cap_all;
capture_t cap_inner;
# define YYCTYPE unsigned char
# define CAPTURE(tok) { \
if (YYCURSOR.val == EMPTY) { \
pi.pi_next_offset = pi.pi_length; \
this->ds_next_offset = this->ds_input.length(); \
} else { \
pi.pi_next_offset = YYCURSOR.val - (const unsigned char *) pi.get_string(); \
this->ds_next_offset = YYCURSOR.val - this->ds_input.udata(); \
} \
cap[0].c_end = pi.pi_next_offset; \
cap[1].c_end = pi.pi_next_offset; \
cap_all.c_end = this->ds_next_offset; \
cap_inner.c_end = this->ds_next_offset; \
token_out = tok; \
}
# define RET(tok) { \
CAPTURE(tok); \
return true; \
return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()}; \
}
static const unsigned char *EMPTY = (const unsigned char *) "";
pcre_input &pi = this->ds_pcre_input;
struct _YYCURSOR {
YYCTYPE operator*() const {
if (this->val < this->lim) {
@ -93,22 +95,20 @@ bool data_scanner::tokenize2(pcre_context &pc, data_token_t &token_out)
const YYCTYPE *val{nullptr};
const YYCTYPE *lim{nullptr};
} YYCURSOR;
YYCURSOR = (const unsigned char *) pi.get_string() + pi.pi_next_offset;
YYCURSOR = (const unsigned char *) this->ds_input.udata() + this->ds_next_offset;
_YYCURSOR yyt1;
_YYCURSOR yyt2;
_YYCURSOR yyt3;
_YYCURSOR yyt4;
const YYCTYPE *YYLIMIT = (const unsigned char *) pi.get_string() + pi.pi_length;
const YYCTYPE *YYLIMIT = (const unsigned char *) this->ds_input.end();
const YYCTYPE *YYMARKER = YYCURSOR;
pcre_context::capture_t *cap = pc.all();
YYCURSOR.lim = YYLIMIT;
pc.set_count(2);
cap[0].c_begin = pi.pi_next_offset;
cap[0].c_end = pi.pi_next_offset;
cap[1].c_begin = pi.pi_next_offset;
cap[1].c_end = pi.pi_next_offset;
cap_all.c_begin = this->ds_next_offset;
cap_all.c_end = this->ds_next_offset;
cap_inner.c_begin = this->ds_next_offset;
cap_inner.c_end = this->ds_next_offset;
#line 115 "data_scanner_re.cc"
@ -561,7 +561,7 @@ yy1:
yy2:
++YYCURSOR;
#line 138 "../../lnav/src/data_scanner_re.re"
{ return false; }
{ return nonstd::nullopt; }
#line 566 "data_scanner_re.cc"
yy3:
yyaccept = 0;
@ -1867,15 +1867,15 @@ yy70:
#line 140 "../../lnav/src/data_scanner_re.re"
{
CAPTURE(DT_QUOTED_STRING);
switch (pi.get_string()[cap[1].c_begin]) {
switch (this->ds_input[cap_inner.c_begin]) {
case 'u':
case 'r':
cap[1].c_begin += 1;
cap_inner.c_begin += 1;
break;
}
cap[1].c_begin += 1;
cap[1].c_end -= 1;
return true;
cap_inner.c_begin += 1;
cap_inner.c_end -= 1;
return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()};
}
#line 1881 "data_scanner_re.cc"
yy71:
@ -4244,15 +4244,15 @@ yy155:
#line 155 "../../lnav/src/data_scanner_re.re"
{
CAPTURE(DT_QUOTED_STRING);
switch (pi.get_string()[cap[1].c_begin]) {
switch (this->ds_input[cap_inner.c_begin]) {
case 'u':
case 'r':
cap[1].c_begin += 1;
cap_inner.c_begin += 1;
break;
}
cap[1].c_begin += 1;
cap[1].c_end -= 1;
return true;
cap_inner.c_begin += 1;
cap_inner.c_end -= 1;
return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()};
}
#line 4258 "data_scanner_re.cc"
yy156:
@ -11090,7 +11090,7 @@ yy347:
yy348:
#line 171 "../../lnav/src/data_scanner_re.re"
{
if ((YYCURSOR - (const unsigned char *) pi.get_string()) == 17) {
if ((YYCURSOR - this->ds_input.udata()) == 17) {
RET(DT_MAC_ADDRESS);
} else {
RET(DT_HEX_DUMP);

@ -34,26 +34,28 @@
#include "config.h"
#include "data_scanner.hh"
bool data_scanner::tokenize2(pcre_context &pc, data_token_t &token_out)
nonstd::optional<data_scanner::tokenize_result> data_scanner::tokenize2()
{
data_token_t token_out = DT_INVALID;
capture_t cap_all;
capture_t cap_inner;
# define YYCTYPE unsigned char
# define CAPTURE(tok) { \
if (YYCURSOR.val == EMPTY) { \
pi.pi_next_offset = pi.pi_length; \
this->ds_next_offset = this->ds_input.length(); \
} else { \
pi.pi_next_offset = YYCURSOR.val - (const unsigned char *) pi.get_string(); \
this->ds_next_offset = YYCURSOR.val - this->ds_input.udata(); \
} \
cap[0].c_end = pi.pi_next_offset; \
cap[1].c_end = pi.pi_next_offset; \
cap_all.c_end = this->ds_next_offset; \
cap_inner.c_end = this->ds_next_offset; \
token_out = tok; \
}
# define RET(tok) { \
CAPTURE(tok); \
return true; \
return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()}; \
}
static const unsigned char *EMPTY = (const unsigned char *) "";
pcre_input &pi = this->ds_pcre_input;
struct _YYCURSOR {
YYCTYPE operator*() const {
if (this->val < this->lim) {
@ -91,22 +93,20 @@ bool data_scanner::tokenize2(pcre_context &pc, data_token_t &token_out)
const YYCTYPE *val{nullptr};
const YYCTYPE *lim{nullptr};
} YYCURSOR;
YYCURSOR = (const unsigned char *) pi.get_string() + pi.pi_next_offset;
YYCURSOR = (const unsigned char *) this->ds_input.udata() + this->ds_next_offset;
_YYCURSOR yyt1;
_YYCURSOR yyt2;
_YYCURSOR yyt3;
_YYCURSOR yyt4;
const YYCTYPE *YYLIMIT = (const unsigned char *) pi.get_string() + pi.pi_length;
const YYCTYPE *YYLIMIT = (const unsigned char *) this->ds_input.end();
const YYCTYPE *YYMARKER = YYCURSOR;
pcre_context::capture_t *cap = pc.all();
YYCURSOR.lim = YYLIMIT;
pc.set_count(2);
cap[0].c_begin = pi.pi_next_offset;
cap[0].c_end = pi.pi_next_offset;
cap[1].c_begin = pi.pi_next_offset;
cap[1].c_end = pi.pi_next_offset;
cap_all.c_begin = this->ds_next_offset;
cap_all.c_end = this->ds_next_offset;
cap_inner.c_begin = this->ds_next_offset;
cap_inner.c_end = this->ds_next_offset;
/*!re2c
re2c:yyfill:enable = 0;
@ -135,41 +135,41 @@ bool data_scanner::tokenize2(pcre_context &pc, data_token_t &token_out)
(IPV6SEG":"){1,4}":"IPV4ADDR
);
EOF { return false; }
EOF { return nonstd::nullopt; }
("u"|"r")?'"'('\\'.|[^\x00"\\]|'""')*'"' {
CAPTURE(DT_QUOTED_STRING);
switch (pi.get_string()[cap[1].c_begin]) {
switch (this->ds_input[cap_inner.c_begin]) {
case 'u':
case 'r':
cap[1].c_begin += 1;
cap_inner.c_begin += 1;
break;
}
cap[1].c_begin += 1;
cap[1].c_end -= 1;
return true;
cap_inner.c_begin += 1;
cap_inner.c_end -= 1;
return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()};
}
[a-qstv-zA-QSTV-Z]"'" {
CAPTURE(DT_WORD);
}
("u"|"r")?"'"('\\'.|"''"|[^\x00'\\])*"'"/[^sS] {
CAPTURE(DT_QUOTED_STRING);
switch (pi.get_string()[cap[1].c_begin]) {
switch (this->ds_input[cap_inner.c_begin]) {
case 'u':
case 'r':
cap[1].c_begin += 1;
cap_inner.c_begin += 1;
break;
}
cap[1].c_begin += 1;
cap[1].c_end -= 1;
return true;
cap_inner.c_begin += 1;
cap_inner.c_end -= 1;
return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()};
}
[a-zA-Z0-9]+":/""/"?[^\x00\r\n\t '"[\](){}]+[/a-zA-Z0-9\-=&?%] { RET(DT_URL); }
("/"|"./"|"../"|[A-Z]":\\"|"\\\\")("Program Files"(" (x86)")?)?[a-zA-Z0-9_\.\-\~/\\!@#$%^&*()]* { RET(DT_PATH); }
(SPACE|NUM)NUM":"NUM{2}/[^:] { RET(DT_TIME); }
(SPACE|NUM)NUM?":"NUM{2}":"NUM{2}("."NUM{3,6})?/[^:] { RET(DT_TIME); }
[0-9a-fA-F][0-9a-fA-F](":"[0-9a-fA-F][0-9a-fA-F])+ {
if ((YYCURSOR - (const unsigned char *) pi.get_string()) == 17) {
if ((YYCURSOR - this->ds_input.udata()) == 17) {
RET(DT_MAC_ADDRESS);
} else {
RET(DT_HEX_DUMP);

@ -251,13 +251,16 @@ public:
metadata walk()
{
metadata_builder mb;
pcre_context_static<30> pc;
data_token_t dt = DT_INVALID;
auto& pi = this->sw_scanner.get_input();
size_t garbage_count = 0;
while (garbage_count < 1000 && this->sw_scanner.tokenize2(pc, dt)) {
element el(dt, pc);
while (garbage_count < 1000) {
auto tokenize_res = this->sw_scanner.tokenize2();
if (!tokenize_res) {
break;
}
element el(tokenize_res->tr_token, tokenize_res->tr_capture);
switch (dt) {
case DT_XML_DECL_TAG:
@ -271,7 +274,7 @@ public:
this->sw_interval_state.back().is_line_number
= this->sw_line_number;
this->sw_interval_state.back().is_name
= pi.get_substr(&el.e_capture);
= tokenize_res->to_string();
this->sw_depth += 1;
this->sw_interval_state.resize(this->sw_depth + 1);
this->sw_hier_nodes.push_back(
@ -328,13 +331,14 @@ public:
= std::move(this->sw_hier_nodes.back());
this->sw_hier_nodes.pop_back();
if (this->sw_interval_state.back().is_start) {
pcre_context::capture_t obj_cap = {
data_scanner::capture_t obj_cap = {
static_cast<int>(this->sw_interval_state.back()
.is_start.value()),
el.e_capture.c_end,
};
auto sf = pi.get_string_fragment(&obj_cap);
auto sf
= this->sw_scanner.to_string_fragment(obj_cap);
if (!sf.find('\n')) {
this->sw_hier_stage->hn_named_children.clear();
this->sw_hier_stage->hn_children.clear();
@ -396,18 +400,13 @@ public:
private:
struct element {
element(data_token_t token, pcre_context& pc)
: e_token(token), e_capture(*pc.all())
{
}
element(data_token_t token, pcre_context::capture_t& cap)
element(data_token_t token, data_scanner::capture_t& cap)
: e_token(token), e_capture(cap)
{
}
data_token_t e_token;
pcre_context::capture_t e_capture;
data_scanner::capture_t e_capture;
};
struct interval_state {
@ -416,11 +415,10 @@ private:
std::string is_name;
};
nonstd::optional<pcre_context::capture_t> flush_values()
nonstd::optional<data_scanner::capture_t> flush_values()
{
nonstd::optional<pcre_context::capture_t> last_key;
nonstd::optional<pcre_context::capture_t> retval;
auto& pi = this->sw_scanner.get_input();
nonstd::optional<data_scanner::capture_t> last_key;
nonstd::optional<data_scanner::capture_t> retval;
if (!this->sw_values.empty()) {
if (!this->sw_interval_state.back().is_start) {
@ -443,7 +441,9 @@ private:
case DT_EQUALS:
if (last_key) {
this->sw_interval_state.back().is_name
= pi.get_substr(&last_key.value());
= this->sw_scanner
.to_string_fragment(last_key.value())
.to_string();
if (!this->sw_interval_state.back().is_name.empty()) {
this->sw_interval_state.back().is_start
= static_cast<ssize_t>(
@ -464,7 +464,7 @@ private:
return retval;
}
void append_child_node(nonstd::optional<pcre_context::capture_t> terminator)
void append_child_node(nonstd::optional<data_scanner::capture_t> terminator)
{
auto& ivstate = this->sw_interval_state.back();
if (!ivstate.is_start || !terminator || this->sw_depth == 0) {

@ -44,7 +44,6 @@
#include "lnav_util.hh"
#include "logfile.hh"
#include "pcap_manager.hh"
#include "pcrepp/pcrepp.hh"
#include "service_tags.hh"
#include "tailer/tailer.looper.hh"
@ -84,7 +83,8 @@ file_collection::close_files(const std::vector<std::shared_ptr<logfile>>& files)
auto path_str = actual_path_opt.value().string();
for (auto iter = REALPATH_CACHE.begin();
iter != REALPATH_CACHE.end();) {
iter != REALPATH_CACHE.end();)
{
if (iter->first == path_str || iter->second == path_str) {
iter = REALPATH_CACHE.erase(iter);
} else {
@ -339,7 +339,8 @@ file_collection::watch_logfile(const std::string& filename,
error_queue = convert_res.cr_error_queue](
auto& fc, auto& child) {
if (child.was_normal_exit()
&& child.exit_status() == EXIT_SUCCESS) {
&& child.exit_status() == EXIT_SUCCESS)
{
log_info("pcap[%d] exited normally",
child.in());
return;

@ -425,15 +425,15 @@ filter_sub_source::rl_change(readline_curses* rc)
break;
case filter_lang_t::REGEX: {
auto regex_res
= pcrepp::shared_from_str(new_value, PCRE_CASELESS | PCRE_UTF8);
= lnav::pcre2pp::code::from(new_value, PCRE2_CASELESS);
if (regex_res.isErr()) {
auto pe = regex_res.unwrapErr();
lnav_data.ld_filter_help_status_source.fss_error.set_value(
"error: %s", pe.ce_msg);
"error: %s", pe.get_message().c_str());
} else {
auto& hm = top_view->get_highlights();
highlighter hl(regex_res.unwrap());
highlighter hl(regex_res.unwrap().to_shared());
auto role = tf->get_type() == text_filter::EXCLUDE
? role_t::VCR_DIFF_DELETE
: role_t::VCR_DIFF_ADD;
@ -508,21 +508,12 @@ filter_sub_source::rl_perform(readline_curses* rc)
switch (tf->get_lang()) {
case filter_lang_t::NONE:
case filter_lang_t::REGEX: {
auto compile_res = pcrepp::shared_from_str(
new_value, PCRE_CASELESS | PCRE_UTF8);
auto compile_res
= lnav::pcre2pp::code::from(new_value, PCRE2_CASELESS);
if (compile_res.isErr()) {
auto ce = compile_res.unwrapErr();
auto um = lnav::console::user_message::error(
"invalid regular expression")
.with_reason(ce.ce_msg)
.with_snippet(lnav::console::snippet::from(
INPUT_SRC, new_value));
um.um_snippets.back()
.s_content.append("\n")
.append(ce.ce_offset, ' ')
.append("^ "_comment)
.append(lnav::roles::comment(ce.ce_msg));
auto um = lnav::console::to_user_message(INPUT_SRC, ce);
lnav_data.ld_exec_context.ec_error_callback_stack.back()(
um);
this->rl_abort(rc);
@ -530,11 +521,11 @@ filter_sub_source::rl_perform(readline_curses* rc)
tf->lf_deleted = true;
tss->text_filters_changed();
auto pf
= std::make_shared<pcre_filter>(tf->get_type(),
new_value,
tf->get_index(),
compile_res.unwrap());
auto pf = std::make_shared<pcre_filter>(
tf->get_type(),
new_value,
tf->get_index(),
compile_res.unwrap().to_shared());
*iter = pf;
tss->text_filters_changed();

@ -48,7 +48,7 @@
#include "vis_line.hh"
template<typename LineType>
grep_proc<LineType>::grep_proc(pcre* code,
grep_proc<LineType>::grep_proc(std::shared_ptr<lnav::pcre2pp::code> code,
grep_proc_source<LineType>& gps,
std::shared_ptr<pollable_supervisor> ps)
: pollable(ps, pollable::category::background), gp_pcre(code),
@ -189,40 +189,30 @@ grep_proc<LineType>::child_loop()
line_value.clear();
done = !this->gp_source.grep_value_for_line(line, line_value);
if (!done) {
pcre_context_static<128> pc;
pcre_input pi(line_value);
while (this->gp_pcre.match(pc, pi)) {
pcre_context::iterator pc_iter;
pcre_context::capture_t* m;
if (pi.pi_offset == 0) {
fprintf(stdout, "%d\n", (int) line);
}
m = pc.all();
fprintf(stdout, "[%d:%d]\n", m->c_begin, m->c_end);
for (pc_iter = pc.begin(); pc_iter != pc.end(); pc_iter++) {
if (!pc_iter->is_valid()) {
continue;
this->gp_pcre->capture_from(line_value)
.for_each([&](lnav::pcre2pp::match_data& md) {
if (md.leading().sf_begin == 0) {
fprintf(stdout, "%d\n", (int) line);
}
fprintf(stdout,
"(%d:%d)",
pc_iter->c_begin,
pc_iter->c_end);
/* If the capture was conditional, pcre will return a -1
* here.
*/
if (pc_iter->c_begin >= 0) {
fwrite(pi.get_substr_start(pc_iter),
1,
pc_iter->length(),
stdout);
"[%d:%d]\n",
md[0]->sf_begin,
md[0]->sf_end);
for (int lpc = 1; lpc < md.get_count(); lpc++) {
if (!md[lpc]) {
continue;
}
fprintf(stdout,
"(%d:%d)",
md[lpc]->sf_begin,
md[lpc]->sf_end);
fwrite(
md[lpc]->data(), 1, md[lpc]->length(), stdout);
fputc('\n', stdout);
}
fputc('\n', stdout);
}
fprintf(stdout, "/\n");
}
fprintf(stdout, "/\n");
});
}
if (((line + 1) % 10000) == 0) {

@ -46,7 +46,7 @@
#include "base/auto_mem.hh"
#include "base/lnav_log.hh"
#include "line_buffer.hh"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "pollable.hh"
#include "strong_int.hh"
@ -180,7 +180,7 @@ public:
* @param code The pcre code to run over the lines of input.
* @param gps The source of the data to match.
*/
grep_proc(pcre* code,
grep_proc(std::shared_ptr<lnav::pcre2pp::code> code,
grep_proc_source<LineType>& gps,
std::shared_ptr<pollable_supervisor> ps);
@ -274,7 +274,7 @@ protected:
virtual void handle_match(
int line, std::string& line_value, int off, int* matches, int count);
pcrepp gp_pcre;
std::shared_ptr<lnav::pcre2pp::code> gp_pcre;
grep_proc_source<LineType>& gp_source; /*< The data source delegate. */
auto_fd gp_err_pipe; /*< Standard error from the child. */

@ -114,33 +114,33 @@ highlighter::annotate(attr_line_t& al, int start) const
return;
}
pcre_context_static<60> pc;
pcre_input pi(sf);
while (this->h_regex->match(pc, pi)) {
if (pc.get_count() == 1) {
line_range lr{start + pc.all()->c_begin, start + pc.all()->c_end};
this->h_regex->capture_from(sf).for_each(
[&](lnav::pcre2pp::match_data& md) {
if (md.get_count() == 1) {
this->annotate_capture(al, to_line_range(md[0].value()));
} else {
for (int lpc = 1; lpc < md.get_count(); lpc++) {
if (!md[lpc]) {
continue;
}
this->annotate_capture(al, lr);
} else {
for (int lpc = 0; lpc < pc.get_count() - 1; lpc++) {
line_range lr{start + pc[lpc]->c_begin, start + pc[lpc]->c_end};
const auto* name = this->h_regex->name_for_capture(lpc);
const auto* name = this->h_regex->get_name_for_capture(lpc);
auto lr = to_line_range(md[lpc].value());
if (name != nullptr && name[0]) {
auto ident_attrs = vc.attrs_for_ident(name);
if (name != nullptr && name[0]) {
auto ident_attrs = vc.attrs_for_ident(name);
ident_attrs.ta_attrs |= this->h_attrs.ta_attrs;
if (this->h_role != role_t::VCR_NONE) {
auto role_attrs = vc.attrs_for_role(this->h_role);
ident_attrs.ta_attrs |= this->h_attrs.ta_attrs;
if (this->h_role != role_t::VCR_NONE) {
auto role_attrs = vc.attrs_for_role(this->h_role);
ident_attrs.ta_attrs |= role_attrs.ta_attrs;
ident_attrs.ta_attrs |= role_attrs.ta_attrs;
}
sa.emplace_back(lr, VC_STYLE.value(ident_attrs));
} else {
this->annotate_capture(al, lr);
}
sa.emplace_back(lr, VC_STYLE.value(ident_attrs));
} else {
this->annotate_capture(al, lr);
}
}
}
}
});
}

@ -36,15 +36,15 @@
#include <utility>
#include "optional.hpp"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "text_format.hh"
#include "view_curses.hh"
struct highlighter {
highlighter() = default;
explicit highlighter(std::shared_ptr<pcrepp> regex)
: h_regex(std::move(regex))
explicit highlighter(const std::shared_ptr<lnav::pcre2pp::code>& regex)
: h_regex(regex)
{
}
@ -113,7 +113,7 @@ struct highlighter {
role_t h_role{role_t::VCR_NONE};
styling::color_unit h_fg{styling::color_unit::make_empty()};
styling::color_unit h_bg{styling::color_unit::make_empty()};
std::shared_ptr<pcrepp> h_regex;
std::shared_ptr<lnav::pcre2pp::code> h_regex;
text_attrs h_attrs;
std::set<text_format_t> h_text_formats;
intern_string_t h_format_name;

@ -2727,12 +2727,12 @@ regexp_capture(*string*, *pattern*)
;SELECT * FROM regexp_capture('a=1; b=2', '(\w+)=(\d+)')
match_index capture_index capture_name capture_count range_start range_stop content
0 0 <NULL> 3 1 4 a=1
0 1 3 1 2 a
0 2 3 3 4 1
1 0 <NULL> 3 6 9 b=2
1 1 3 6 7 b
1 2 3 8 9 2
0 0 <NULL> 3 1 4 a=1
0 1 <NULL> 3 1 2 a
0 2 <NULL> 3 3 4 1
1 0 <NULL> 3 6 9 b=2
1 1 <NULL> 3 6 7 b
1 2 <NULL> 3 8 9 2
**See Also**
:ref:`char`, :ref:`charindex`, :ref:`decode`, :ref:`encode`, :ref:`endswith`, :ref:`extract`, :ref:`group_concat`, :ref:`group_spooky_hash_agg`, :ref:`gunzip`, :ref:`gzip`, :ref:`humanize_duration`, :ref:`humanize_file_size`, :ref:`instr`, :ref:`leftstr`, :ref:`length`, :ref:`logfmt2json`, :ref:`lower`, :ref:`ltrim`, :ref:`padc`, :ref:`padl`, :ref:`padr`, :ref:`printf`, :ref:`proper`, :ref:`regexp_capture_into_json`, :ref:`regexp_match`, :ref:`regexp_replace`, :ref:`replace`, :ref:`replicate`, :ref:`reverse`, :ref:`rightstr`, :ref:`rtrim`, :ref:`sparkline`, :ref:`spooky_hash`, :ref:`startswith`, :ref:`strfilter`, :ref:`substr`, :ref:`trim`, :ref:`unicode`, :ref:`upper`, :ref:`xpath`

@ -2939,6 +2939,9 @@ SELECT tbl_name FROM sqlite_master WHERE sql LIKE 'CREATE VIRTUAL TABLE%'
isc::supervisor root_superv(injector::get<isc::service_list>());
try {
char pcre2_version[128];
pcre2_config(PCRE2_CONFIG_VERSION, pcre2_version);
log_info("startup: %s", VCS_PACKAGE_STRING);
log_host_info();
log_info("Libraries:");
@ -2952,7 +2955,7 @@ SELECT tbl_name FROM sqlite_master WHERE sql LIKE 'CREATE VIRTUAL TABLE%'
log_info(" libarchive=%d", ARCHIVE_VERSION_NUMBER);
#endif
log_info(" ncurses=%s", NCURSES_VERSION);
log_info(" pcre=%s", pcre_version());
log_info(" pcre2=%s", pcre2_version);
log_info(" readline=%s", rl_library_version);
log_info(" sqlite=%s", sqlite3_version);
log_info(" zlib=%s", zlibVersion());

@ -64,6 +64,7 @@
#include "log_data_helper.hh"
#include "log_data_table.hh"
#include "log_search_table.hh"
#include "log_search_table_fwd.hh"
#include "readline_callbacks.hh"
#include "readline_curses.hh"
#include "readline_highlighters.hh"
@ -1669,6 +1670,8 @@ com_highlight(exec_context& ec,
if (args.empty()) {
args.emplace_back("filter");
} else if (args.size() > 1) {
const static intern_string_t PATTERN_SRC = intern_string::lookup("pattern");
auto* tc = *lnav_data.ld_view_stack.top();
auto& hm = tc->get_highlights();
auto re_frag = remaining_args_frag(cmdline, args);
@ -1678,22 +1681,14 @@ com_highlight(exec_context& ec,
}
auto compile_res
= pcrepp::shared_from_str(args[1], PCRE_CASELESS | PCRE_UTF8);
= lnav::pcre2pp::code::from(args[1], PCRE2_CASELESS);
if (compile_res.isErr()) {
auto ce = compile_res.unwrapErr();
auto um = lnav::console::user_message::error(
"invalid regular expression")
.with_reason(ce.ce_msg)
.with_snippets(ec.ec_source);
um.um_snippets.back()
.s_content.append("\n")
.append(re_frag.sf_begin + ce.ce_offset, ' ')
.append("^ "_comment)
.append(lnav::roles::comment(ce.ce_msg));
auto um = lnav::console::to_user_message(PATTERN_SRC, ce);
return Err(um);
}
highlighter hl(compile_res.unwrap());
highlighter hl(compile_res.unwrap().to_shared());
auto hl_attrs = view_colors::singleton().attrs_for_ident(args[1]);
if (ec.ec_dry_run) {
@ -1799,6 +1794,8 @@ com_filter(exec_context& ec,
return ec.make_error("{} view does not support filtering",
lnav_view_strings[tc - lnav_data.ld_views]);
} else if (args.size() > 1) {
const static intern_string_t PATTERN_SRC = intern_string::lookup("pattern");
auto* tss = tc->get_sub_source();
auto& fs = tss->get_filters();
auto re_frag = remaining_args_frag(cmdline, args);
@ -1814,19 +1811,11 @@ com_filter(exec_context& ec,
}
auto compile_res
= pcrepp::shared_from_str(args[1], PCRE_CASELESS | PCRE_UTF8);
= lnav::pcre2pp::code::from(args[1], PCRE2_CASELESS);
if (compile_res.isErr()) {
auto ce = compile_res.unwrapErr();
auto um = lnav::console::user_message::error(
"invalid regular expression")
.with_reason(ce.ce_msg)
.with_snippets(ec.ec_source);
um.um_snippets.back()
.s_content.append("\n")
.append(re_frag.sf_begin + ce.ce_offset, ' ')
.append("^ "_comment)
.append(lnav::roles::comment(ce.ce_msg));
auto um = lnav::console::to_user_message(PATTERN_SRC, ce);
return Err(um);
}
if (ec.ec_dry_run) {
@ -1837,7 +1826,7 @@ com_filter(exec_context& ec,
retval = "";
} else {
auto& hm = tc->get_highlights();
highlighter hl(compile_res.unwrap());
highlighter hl(compile_res.unwrap().to_shared());
auto role = (args[0] == "filter-out") ? role_t::VCR_DIFF_DELETE
: role_t::VCR_DIFF_ADD;
hl.with_role(role);
@ -1861,7 +1850,7 @@ com_filter(exec_context& ec,
return ec.make_error("too many filters");
}
auto pf = std::make_shared<pcre_filter>(
lt, args[1], *filter_index, compile_res.unwrap());
lt, args[1], *filter_index, compile_res.unwrap().to_shared());
log_debug("%s [%d] %s",
args[0].c_str(),
@ -2227,6 +2216,7 @@ com_create_search_table(exec_context& ec,
if (args.empty()) {
} else if (args.size() >= 2) {
const static intern_string_t PATTERN_SRC = intern_string::lookup("pattern");
string_fragment regex_frag;
std::string regex;
@ -2237,28 +2227,19 @@ com_create_search_table(exec_context& ec,
regex = lnav_data.ld_views[LNV_LOG].get_current_search();
}
auto re_res = pcrepp::shared_from_str(
regex, log_search_table::pattern_options());
auto compile_res = lnav::pcre2pp::code::from(
regex, log_search_table_ns::PATTERN_OPTIONS);
if (re_res.isErr()) {
auto re_err = re_res.unwrapErr();
auto um = lnav::console::user_message::error(
"invalid regular expression")
.with_reason(re_err.ce_msg)
.with_snippets(ec.ec_source);
if (args.size() >= 3) {
um.um_snippets.back()
.s_content.append("\n")
.append(regex_frag.sf_begin + re_err.ce_offset, ' ')
.append("^ "_comment)
.append(lnav::roles::comment(re_err.ce_msg));
}
if (compile_res.isErr()) {
auto re_err = compile_res.unwrapErr();
auto um = lnav::console::to_user_message(PATTERN_SRC, re_err)
.with_snippets(ec.ec_source);
return Err(um);
}
auto re = re_res.unwrap();
auto re = compile_res.unwrap().to_shared();
auto tab_name = intern_string::lookup(args[1]);
auto lst = std::make_shared<log_search_table>(*re, tab_name);
auto lst = std::make_shared<log_search_table>(re, tab_name);
if (ec.ec_dry_run) {
auto* tc = &lnav_data.ld_views[LNV_LOG];
auto& hm = tc->get_highlights();
@ -4726,7 +4707,7 @@ search_files_prompt(std::vector<std::string>& args)
lnav_data.ld_mode = ln_mode_t::SEARCH_FILES;
for (const auto& lf : lnav_data.ld_active_files.fc_files) {
auto path = pcrepp::quote(lf->get_unique_path());
auto path = lnav::pcre2pp::quote(lf->get_unique_path());
lnav_data.ld_rl_view->add_possibility(
ln_mode_t::SEARCH_FILES, "*", path);
}

@ -471,9 +471,7 @@ static const struct json_path_container keymap_def_handlers = {
"an 'x' followed by the hexadecimal representation of the byte.")
.with_obj_provider<key_command, key_map>(
[](const yajlpp_provider_context& ypc, key_map* km) {
key_command& retval
= km->km_seq_to_cmd[ypc.ypc_extractor.get_substr(
"key_seq")];
auto& retval = km->km_seq_to_cmd[ypc.get_substr("key_seq")];
return &retval;
})
@ -492,8 +490,7 @@ static const struct json_path_container keymap_defs_handlers = {
.with_obj_provider<key_map, _lnav_config>(
[](const yajlpp_provider_context& ypc, _lnav_config* root) {
key_map& retval
= root->lc_ui_keymaps[ypc.ypc_extractor.get_substr(
"keymap_name")];
= root->lc_ui_keymaps[ypc.get_substr("keymap_name")];
return &retval;
})
.with_path_provider<_lnav_config>(
@ -845,7 +842,7 @@ static const struct json_path_container theme_log_level_styles_handlers = {
.with_obj_provider<style_config, lnav_theme>(
[](const yajlpp_provider_context& ypc, lnav_theme* root) {
auto& sc = root->lt_level_styles[string2level(
ypc.ypc_extractor.get_substr_i("level").get())];
ypc.get_substr_i("level").get())];
if (ypc.ypc_parse_context != nullptr && sc.pp_path.empty()) {
sc.pp_path = ypc.ypc_parse_context->get_full_path();
@ -877,15 +874,14 @@ static const struct json_path_container highlighter_handlers = {
static const struct json_path_container theme_highlights_handlers = {
yajlpp::pattern_property_handler("(?<highlight_name>[\\w\\-]+)")
.with_obj_provider<highlighter_config, lnav_theme>(
[](const yajlpp_provider_context& ypc, lnav_theme* root) {
highlighter_config& hc
= root->lt_highlights[ypc.ypc_extractor
.get_substr_i("highlight_name")
.get()];
return &hc;
})
.with_obj_provider<highlighter_config,
lnav_theme>([](const yajlpp_provider_context& ypc,
lnav_theme* root) {
highlighter_config& hc
= root->lt_highlights[ypc.get_substr_i("highlight_name").get()];
return &hc;
})
.with_path_provider<lnav_theme>(
[](struct lnav_theme* cfg, std::vector<std::string>& paths_out) {
for (const auto& pair : cfg->lt_highlights) {
@ -940,8 +936,7 @@ static const struct json_path_container theme_defs_handlers = {
.with_obj_provider<lnav_theme, _lnav_config>(
[](const yajlpp_provider_context& ypc, _lnav_config* root) {
lnav_theme& lt
= root->lc_ui_theme_defs[ypc.ypc_extractor.get_substr(
"theme_name")];
= root->lc_ui_theme_defs[ypc.get_substr("theme_name")];
return &lt;
})
@ -953,8 +948,7 @@ static const struct json_path_container theme_defs_handlers = {
})
.with_obj_deleter(
+[](const yajlpp_provider_context& ypc, _lnav_config* root) {
root->lc_ui_theme_defs.erase(
ypc.ypc_extractor.get_substr("theme_name"));
root->lc_ui_theme_defs.erase(ypc.get_substr("theme_name"));
})
.with_children(theme_def_handlers),
};
@ -1137,9 +1131,8 @@ static const struct json_path_container sysclip_impls_handlers = {
.with_obj_provider<sysclip::clipboard, _lnav_config>(
[](const yajlpp_provider_context& ypc, _lnav_config* root) {
auto& retval
= root->lc_sysclip
.c_clipboard_impls[ypc.ypc_extractor.get_substr(
"clipboard_impl_name")];
= root->lc_sysclip.c_clipboard_impls[ypc.get_substr(
"clipboard_impl_name")];
return &retval;
})
.with_path_provider<_lnav_config>(
@ -1178,8 +1171,7 @@ static const struct json_path_container log_source_watch_handlers = {
_lnav_config>(
[](const yajlpp_provider_context& ypc, _lnav_config* root) {
auto& retval = root->lc_log_source
.c_watch_exprs[ypc.ypc_extractor.get_substr(
"watch_name")];
.c_watch_exprs[ypc.get_substr("watch_name")];
return &retval;
})
.with_path_provider<_lnav_config>(
@ -1191,7 +1183,7 @@ static const struct json_path_container log_source_watch_handlers = {
.with_obj_deleter(
+[](const yajlpp_provider_context& ypc, _lnav_config* root) {
root->lc_log_source.c_watch_exprs.erase(
ypc.ypc_extractor.get_substr("watch_name"));
ypc.get_substr("watch_name"));
})
.with_children(log_source_watch_expr_handlers),
};
@ -1525,14 +1517,12 @@ reset_config(const std::string& path)
}
if (jph != nullptr && jph->jph_children && jph->jph_obj_deleter) {
pcre_context_static<30> pc;
auto key_start = ypc.ypc_path_index_stack.back();
pcre_input pi(&ypc.ypc_path[key_start + 1],
0,
ypc.ypc_path.size() - key_start - 2);
yajlpp_provider_context provider_ctx{{pc, pi},
static_cast<size_t>(-1)};
jph->jph_regex->match(pc, pi);
auto path_frag = string_fragment::from_byte_range(
ypc.ypc_path.data(), key_start + 1, ypc.ypc_path.size());
auto md = jph->jph_regex->create_match_data();
yajlpp_provider_context provider_ctx{&md, static_cast<size_t>(-1)};
jph->jph_regex->capture_from(path_frag).into(md).matches();
jph->jph_obj_deleter(provider_ctx, ypc.ypc_obj_stack.top());
}

@ -144,7 +144,7 @@ to_json(yajlpp_gen& gen, const attr_line_t& al)
},
[&](const intern_string_t& str) { elem_map.gen(str); },
[&](const std::string& str) { elem_map.gen(str); },
[&](const text_attrs& ta) { elem_map.gen(""); },
[&](const text_attrs& ta) { elem_map.gen(ta.ta_attrs); },
[&](const std::shared_ptr<logfile>& lf) {
elem_map.gen("");
},
@ -234,7 +234,10 @@ read_string_attr_type(yajlpp_parse_context* ypc,
sa->sa_type = &VC_ROLE;
} else if (type == "preformatted") {
sa->sa_type = &SA_PREFORMATTED;
} else if (type == "style") {
sa->sa_type = &VC_STYLE;
} else {
log_error("unhandled string_attr type: %s", type.c_str());
ensure(false);
}
return 1;
@ -247,6 +250,10 @@ read_string_attr_int_value(yajlpp_parse_context* ypc, long long in)
if (sa->sa_type == &VC_ROLE) {
sa->sa_value = static_cast<role_t>(in);
} else if (sa->sa_type == &VC_STYLE) {
sa->sa_value = text_attrs{
static_cast<int32_t>(in),
};
}
return 1;
}

@ -85,7 +85,8 @@ log_data_helper::parse_line(content_line_t line, bool allow_middle)
body.lr_end = this->ldh_line_values.lvv_sbr.length();
}
this->ldh_scanner = std::make_unique<data_scanner>(
this->ldh_line_values.lvv_sbr, body.lr_start, body.lr_end);
this->ldh_line_values.lvv_sbr.to_string_fragment().sub_range(
body.lr_start, body.lr_end));
this->ldh_parser
= std::make_unique<data_parser>(this->ldh_scanner.get());
this->ldh_msg_format.clear();

@ -39,7 +39,7 @@ log_data_table::log_data_table(logfile_sub_source& lss,
: log_vtab_impl(table_name), ldt_log_source(lss),
ldt_template_line(template_line)
{
std::shared_ptr<logfile> lf = lss.find(template_line);
auto lf = lss.find(template_line);
auto format = lf->get_format();
this->vi_supports_indexes = false;
@ -53,7 +53,7 @@ log_data_table::get_columns_int()
auto& cols = this->ldt_cols;
auto& metas = this->ldt_value_metas;
content_line_t cl_copy = this->ldt_template_line;
std::shared_ptr<logfile> lf = this->ldt_log_source.find(cl_copy);
auto lf = this->ldt_log_source.find(cl_copy);
struct line_range body;
string_attrs_t sa;
logline_value_vector line_values;

@ -297,45 +297,38 @@ log_format::next_format(pcre_format* fmt, int& index, int& locked_index)
const char*
log_format::log_scanf(uint32_t line_number,
const char* line,
size_t len,
string_fragment line,
pcre_format* fmt,
const char* time_fmt[],
struct exttm* tm_out,
struct timeval* tv_out,
...)
string_fragment* ts_out,
nonstd::optional<string_fragment>* level_out)
{
int curr_fmt = -1;
const char* retval = nullptr;
bool done = false;
pcre_input pi(line, 0, len);
pcre_context_static<128> pc;
va_list args;
int pat_index = this->last_pattern_index();
while (!done && next_format(fmt, curr_fmt, pat_index)) {
va_start(args, tv_out);
pi.reset(line, 0, len);
if (!fmt[curr_fmt].pcre.match(pc, pi, PCRE_NO_UTF8_CHECK)) {
auto md = fmt[curr_fmt].pcre->create_match_data();
auto match_res = fmt[curr_fmt]
.pcre->capture_from(line)
.into(md)
.matches(PCRE2_NO_UTF_CHECK)
.ignore_error();
if (!match_res) {
retval = nullptr;
} else {
pcre_context::capture_t* ts = pc[fmt[curr_fmt].pf_timestamp_index];
for (auto& iter : pc) {
pcre_context::capture_t* cap
= va_arg(args, pcre_context::capture_t*);
*cap = iter;
}
auto ts = md[fmt[curr_fmt].pf_timestamp_index];
retval = this->lf_date_time.scan(pi.get_substr_start(ts),
ts->length(),
nullptr,
tm_out,
*tv_out);
retval = this->lf_date_time.scan(
ts->data(), ts->length(), nullptr, tm_out, *tv_out);
if (retval) {
*ts_out = ts.value();
*level_out = md[2];
if (curr_fmt != pat_index) {
uint32_t lock_line;
@ -351,8 +344,6 @@ log_format::log_scanf(uint32_t line_number,
done = true;
}
}
va_end(args);
}
return retval;
@ -495,11 +486,9 @@ read_json_int(yajlpp_parse_context* ypc, long long val)
snprintf(level_buf, sizeof(level_buf), "%lld", val);
pcre_input pi(level_buf);
pcre_context::capture_t level_cap = {0, (int) strlen(level_buf)};
jlu->jlu_base_line->set_level(jlu->jlu_format->convert_level(
pi, &level_cap, jlu->jlu_batch_context));
string_fragment::from_c_str(level_buf),
jlu->jlu_batch_context));
} else {
std::vector<std::pair<int64_t, log_level_t>>::iterator iter;
@ -589,7 +578,7 @@ json_array_end(void* ctx)
}
static struct json_path_container json_log_handlers = {
json_path_handler(pcrepp("\\w+"))
yajlpp::pattern_property_handler("\\w+")
.add_cb(read_json_null)
.add_cb(read_json_bool)
.add_cb(read_json_int)
@ -664,13 +653,14 @@ rewrite_json_double(yajlpp_parse_context* ypc, double val)
return 1;
}
static struct json_path_container json_log_rewrite_handlers
= {json_path_handler(pcrepp("\\w+"))
.add_cb(rewrite_json_null)
.add_cb(rewrite_json_bool)
.add_cb(rewrite_json_int)
.add_cb(rewrite_json_double)
.add_cb(rewrite_json_field)};
static struct json_path_container json_log_rewrite_handlers = {
yajlpp::pattern_property_handler("\\w+")
.add_cb(rewrite_json_null)
.add_cb(rewrite_json_bool)
.add_cb(rewrite_json_int)
.add_cb(rewrite_json_double)
.add_cb(rewrite_json_field),
};
bool
external_log_format::scan_for_partial(shared_buffer_ref& sbr,
@ -680,11 +670,9 @@ external_log_format::scan_for_partial(shared_buffer_ref& sbr,
return false;
}
auto& pat = this->elf_pattern_order[this->last_pattern_index()];
pcre_input pi(sbr.get_data(), 0, sbr.length());
const auto& pat = this->elf_pattern_order[this->last_pattern_index()];
if (!this->lf_multiline) {
len_out = pat->p_pcre->match_partial(pi);
len_out = pat->p_pcre.value->match_partial(sbr.to_string_fragment());
return true;
}
@ -694,7 +682,7 @@ external_log_format::scan_for_partial(shared_buffer_ref& sbr,
return false;
}
len_out = pat->p_pcre->match_partial(pi);
len_out = pat->p_pcre.value->match_partial(sbr.to_string_fragment());
return (int) len_out > pat->p_timestamp_end;
}
@ -809,20 +797,24 @@ external_log_format::scan(logfile& lf,
return log_format::SCAN_MATCH;
}
pcre_input pi(sbr.get_data(), 0, sbr.length());
pcre_context_static<128> pc;
int curr_fmt = -1, orig_lock = this->last_pattern_index();
int pat_index = orig_lock;
auto line_sf = sbr.to_string_fragment();
while (::next_format(this->elf_pattern_order, curr_fmt, pat_index)) {
auto* fpat = this->elf_pattern_order[curr_fmt].get();
auto* pat = fpat->p_pcre.get();
auto* pat = fpat->p_pcre.value.get();
if (fpat->p_module_format) {
continue;
}
if (!pat->match(pc, pi, PCRE_NO_UTF8_CHECK)) {
auto md = pat->create_match_data();
auto match_res = pat->capture_from(line_sf)
.into(md)
.matches(PCRE2_NO_UTF_CHECK)
.ignore_error();
if (!match_res) {
if (!this->lf_pattern_locks.empty() && pat_index != -1) {
curr_fmt = -1;
pat_index = -1;
@ -830,41 +822,39 @@ external_log_format::scan(logfile& lf,
continue;
}
pcre_context::capture_t* ts = pc[fpat->p_timestamp_field_index];
pcre_context::capture_t* time_cap = pc[fpat->p_time_field_index];
pcre_context::capture_t* level_cap = pc[fpat->p_level_field_index];
pcre_context::capture_t* mod_cap = pc[fpat->p_module_field_index];
pcre_context::capture_t* opid_cap = pc[fpat->p_opid_field_index];
pcre_context::capture_t* body_cap = pc[fpat->p_body_field_index];
const char* ts_str = pi.get_substr_start(ts);
auto ts_str_len = ts->length();
auto ts = md[fpat->p_timestamp_field_index];
auto time_cap = md[fpat->p_time_field_index];
auto level_cap = md[fpat->p_level_field_index];
auto mod_cap = md[fpat->p_module_field_index];
auto opid_cap = md[fpat->p_opid_field_index];
auto body_cap = md[fpat->p_body_field_index];
const char* last;
struct exttm log_time_tm;
struct timeval log_tv;
uint8_t mod_index = 0, opid = 0;
char combined_datetime_buf[512];
if (time_cap != nullptr) {
ts_str_len = snprintf(combined_datetime_buf,
sizeof(combined_datetime_buf),
"%.*sT%.*s",
ts->length(),
ts_str,
time_cap->length(),
pi.get_substr_start(time_cap));
ts_str = combined_datetime_buf;
if (ts && time_cap) {
auto ts_str_len = snprintf(combined_datetime_buf,
sizeof(combined_datetime_buf),
"%.*sT%.*s",
ts->length(),
ts->data(),
time_cap->length(),
time_cap->data());
ts = string_fragment::from_bytes(combined_datetime_buf, ts_str_len);
}
if ((last = this->lf_date_time.scan(ts_str,
ts_str_len,
if ((last = this->lf_date_time.scan(ts->data(),
ts->length(),
this->get_timestamp_formats(),
&log_time_tm,
log_tv))
== nullptr)
{
this->lf_date_time.unlock();
if ((last = this->lf_date_time.scan(ts_str,
ts_str_len,
if ((last = this->lf_date_time.scan(ts->data(),
ts->length(),
this->get_timestamp_formats(),
&log_time_tm,
log_tv))
@ -874,7 +864,8 @@ external_log_format::scan(logfile& lf,
}
}
auto level = this->convert_level(pi, level_cap, &sbc);
auto level = this->convert_level(
level_cap.value_or(string_fragment::invalid()), &sbc);
this->lf_timestamp_flags = log_time_tm.et_flags;
@ -885,29 +876,27 @@ external_log_format::scan(logfile& lf,
this->check_for_new_year(dst, log_time_tm, log_tv);
}
if (opid_cap != nullptr && !opid_cap->empty()) {
auto opid_sf = pi.get_string_fragment(opid_cap);
if (opid_cap && !opid_cap->empty()) {
{
auto opid_iter = sbc.sbc_opids.find(opid_sf);
auto opid_iter = sbc.sbc_opids.find(opid_cap.value());
if (opid_iter == sbc.sbc_opids.end()) {
auto opid_copy = opid_sf.to_owned(sbc.sbc_allocator);
auto opid_copy = opid_cap->to_owned(sbc.sbc_allocator);
auto otr = opid_time_range{log_tv, log_tv};
sbc.sbc_opids.emplace(opid_copy, otr);
} else {
opid_iter->second.otr_end = log_tv;
}
}
opid = hash_str(pi.get_substr_start(opid_cap), opid_cap->length());
opid = hash_str(opid_cap->data(), opid_cap->length());
}
if (mod_cap != nullptr) {
intern_string_t mod_name = intern_string::lookup(
pi.get_substr_start(mod_cap), mod_cap->length());
if (mod_cap) {
intern_string_t mod_name = intern_string::lookup(mod_cap.value());
auto mod_iter = MODULE_FORMATS.find(mod_name);
if (mod_iter == MODULE_FORMATS.end()) {
mod_index = module_scan(pi, body_cap, mod_name);
mod_index = this->module_scan(body_cap.value(), mod_name);
mod_iter = MODULE_FORMATS.find(mod_name);
} else if (mod_iter->second.mf_mod_format) {
mod_index = mod_iter->second.mf_mod_format->lf_mod_index;
@ -918,24 +907,25 @@ external_log_format::scan(logfile& lf,
mod_iter->second.mf_mod_format);
if (mod_elf) {
pcre_context_static<128> mod_pc;
shared_buffer_ref body_ref;
body_cap->ltrim(sbr.get_data());
body_cap->trim();
pcre_input mod_pi(
pi.get_substr_start(body_cap), 0, body_cap->length());
int mod_pat_index = mod_elf->last_pattern_index();
auto& mod_pat = *mod_elf->elf_pattern_order[mod_pat_index];
if (mod_pat.p_pcre->match(
mod_pc, mod_pi, PCRE_NO_UTF8_CHECK))
{
auto* mod_level_cap
= mod_pc[mod_pat.p_level_field_index];
auto mod_md = mod_pat.p_pcre.value->create_match_data();
auto match_res
= mod_pat.p_pcre.value->capture_from(body_cap.value())
.into(mod_md)
.matches(PCRE2_NO_UTF_CHECK)
.ignore_error();
if (match_res) {
auto mod_level_cap
= mod_md[mod_pat.p_level_field_index];
level = mod_elf->convert_level(
mod_pi, mod_level_cap, &sbc);
mod_level_cap.value_or(string_fragment::invalid()),
&sbc);
}
}
}
@ -944,17 +934,17 @@ external_log_format::scan(logfile& lf,
for (auto value_index : fpat->p_numeric_value_indexes) {
const indexed_value_def& ivd = fpat->p_value_by_index[value_index];
const value_def& vd = *ivd.ivd_value_def;
pcre_context::capture_t* num_cap = pc[ivd.ivd_index];
auto num_cap = md[ivd.ivd_index];
if (num_cap != nullptr && num_cap->is_valid()) {
if (num_cap && num_cap->is_valid()) {
const struct scaling_factor* scaling = nullptr;
if (ivd.ivd_unit_field_index >= 0) {
auto unit_cap = pc[ivd.ivd_unit_field_index];
auto unit_cap = md[ivd.ivd_unit_field_index];
if (unit_cap != nullptr && unit_cap->is_valid()) {
intern_string_t unit_val = intern_string::lookup(
pi.get_substr_start(unit_cap), unit_cap->length());
if (unit_cap && unit_cap->is_valid()) {
intern_string_t unit_val
= intern_string::lookup(unit_cap.value());
auto unit_iter = vd.vd_unit_scaling.find(unit_val);
if (unit_iter != vd.vd_unit_scaling.end()) {
@ -966,7 +956,7 @@ external_log_format::scan(logfile& lf,
}
auto scan_res
= scn::scan_value<double>(pi.to_string_view(num_cap));
= scn::scan_value<double>(num_cap->to_string_view());
if (scan_res) {
auto dvalue = scan_res.value();
if (scaling != nullptr) {
@ -1012,15 +1002,12 @@ external_log_format::scan(logfile& lf,
}
uint8_t
external_log_format::module_scan(const pcre_input& pi,
pcre_context::capture_t* body_cap,
external_log_format::module_scan(string_fragment body_cap,
const intern_string_t& mod_name)
{
uint8_t mod_index;
body_cap->ltrim(pi.get_string());
pcre_input body_pi(pi.get_substr_start(body_cap), 0, body_cap->length());
body_cap.trim();
auto& ext_fmts = GRAPH_ORDERED_FORMATS;
pcre_context_static<128> pc;
module_format mf;
for (auto& elf : ext_fmts) {
@ -1034,7 +1021,12 @@ external_log_format::module_scan(const pcre_input& pi,
continue;
}
if (!pat->match(pc, body_pi)) {
auto md = pat.value->create_match_data();
auto match_res = pat.value->capture_from(body_cap)
.into(md)
.matches(PCRE2_NO_UTF_CHECK)
.ignore_error();
if (!match_res) {
continue;
}
@ -1063,10 +1055,7 @@ external_log_format::annotate(uint64_t line_number,
bool annotate_module) const
{
auto& line = values.lvv_sbr;
pcre_context_static<128> pc;
pcre_input pi(line.get_data(), 0, line.length());
struct line_range lr;
pcre_context::capture_t *cap, *body_cap, *module_cap = nullptr;
if (this->elf_type != elf_type_t::ELF_TYPE_TEXT) {
values = this->jlf_line_values;
@ -1083,14 +1072,20 @@ external_log_format::annotate(uint64_t line_number,
int pat_index = this->pattern_index_for_line(line_number);
auto& pat = *this->elf_pattern_order[pat_index];
sa.reserve(pat.p_pcre->get_capture_count());
if (!pat.p_pcre->match(pc, pi, PCRE_NO_UTF8_CHECK)) {
sa.reserve(pat.p_pcre.value->get_capture_count());
auto md = pat.p_pcre.value->create_match_data();
auto match_res = pat.p_pcre.value->capture_from(line.to_string_fragment())
.into(md)
.matches(PCRE2_NO_UTF_CHECK)
.ignore_error();
if (!match_res) {
// A continued line still needs a body.
lr.lr_start = 0;
lr.lr_end = line.length();
sa.emplace_back(lr, SA_BODY.value());
if (!this->lf_multiline) {
auto len = pat.p_pcre->match_partial(pi);
auto len
= pat.p_pcre.value->match_partial(line.to_string_fragment());
sa.emplace_back(
line_range{(int) len, -1},
SA_INVALID.value("Log line does not match any pattern"));
@ -1098,45 +1093,43 @@ external_log_format::annotate(uint64_t line_number,
return;
}
nonstd::optional<string_fragment> module_cap;
if (!pat.p_module_format) {
cap = pc[pat.p_timestamp_field_index];
if (cap->is_valid()) {
lr.lr_start = cap->c_begin;
lr.lr_end = cap->c_end;
sa.emplace_back(lr, logline::L_TIMESTAMP.value());
auto ts_cap = md[pat.p_timestamp_field_index];
if (ts_cap) {
sa.emplace_back(to_line_range(ts_cap.value()),
logline::L_TIMESTAMP.value());
}
if (pat.p_module_field_index != -1) {
module_cap = pc[pat.p_module_field_index];
if (module_cap != nullptr && module_cap->is_valid()) {
lr.lr_start = module_cap->c_begin;
lr.lr_end = module_cap->c_end;
sa.emplace_back(lr, logline::L_MODULE.value());
module_cap = md[pat.p_module_field_index];
if (module_cap) {
sa.emplace_back(to_line_range(module_cap.value()),
logline::L_MODULE.value());
}
}
cap = pc[pat.p_opid_field_index];
if (cap != nullptr && cap->is_valid()) {
lr.lr_start = cap->c_begin;
lr.lr_end = cap->c_end;
sa.emplace_back(lr, logline::L_OPID.value());
auto opid_cap = md[pat.p_opid_field_index];
if (opid_cap) {
sa.emplace_back(to_line_range(opid_cap.value()),
logline::L_OPID.value());
}
}
body_cap = pc[pat.p_body_field_index];
auto body_cap = md[pat.p_body_field_index];
for (size_t lpc = 0; lpc < pat.p_value_by_index.size(); lpc++) {
const indexed_value_def& ivd = pat.p_value_by_index[lpc];
const struct scaling_factor* scaling = nullptr;
auto* cap = pc[ivd.ivd_index];
auto cap = md[ivd.ivd_index];
const auto& vd = *ivd.ivd_value_def;
if (ivd.ivd_unit_field_index >= 0) {
auto* unit_cap = pc[ivd.ivd_unit_field_index];
auto unit_cap = md[ivd.ivd_unit_field_index];
if (unit_cap != nullptr && unit_cap->c_begin != -1) {
intern_string_t unit_val = intern_string::lookup(
pi.get_substr_start(unit_cap), unit_cap->length());
if (unit_cap) {
intern_string_t unit_val
= intern_string::lookup(unit_cap.value());
auto unit_iter = vd.vd_unit_scaling.find(unit_val);
if (unit_iter != vd.vd_unit_scaling.end()) {
const struct scaling_factor& sf = unit_iter->second;
@ -1146,9 +1139,9 @@ external_log_format::annotate(uint64_t line_number,
}
}
if (cap->is_valid()) {
if (cap) {
values.lvv_values.emplace_back(
vd.vd_meta, line, line_range{cap->c_begin, cap->c_end});
vd.vd_meta, line, to_line_range(cap.value()));
values.lvv_values.back().apply_scaling(scaling);
} else {
values.lvv_values.emplace_back(vd.vd_meta);
@ -1159,11 +1152,8 @@ external_log_format::annotate(uint64_t line_number,
}
bool did_mod_annotate_body = false;
if (annotate_module && module_cap != nullptr && body_cap != nullptr
&& body_cap->is_valid())
{
intern_string_t mod_name = intern_string::lookup(
pi.get_substr_start(module_cap), module_cap->length());
if (annotate_module && module_cap && body_cap && body_cap->is_valid()) {
intern_string_t mod_name = intern_string::lookup(module_cap.value());
auto mod_iter = MODULE_FORMATS.find(mod_name);
if (mod_iter != MODULE_FORMATS.end()
@ -1171,9 +1161,9 @@ external_log_format::annotate(uint64_t line_number,
{
auto& mf = mod_iter->second;
body_cap->ltrim(line.get_data());
body_cap->trim();
auto narrow_res
= line.narrow(body_cap->c_begin, body_cap->length());
= line.narrow(body_cap->sf_begin, body_cap->length());
auto pre_mod_values_size = values.lvv_values.size();
auto pre_mod_sa_size = sa.size();
mf.mf_mod_format->annotate(line_number, sa, values, false);
@ -1181,19 +1171,18 @@ external_log_format::annotate(uint64_t line_number,
lpc < values.lvv_values.size();
lpc++)
{
values.lvv_values[lpc].lv_origin.shift(0, body_cap->c_begin);
values.lvv_values[lpc].lv_origin.shift(0, body_cap->sf_begin);
}
for (size_t lpc = pre_mod_sa_size; lpc < sa.size(); lpc++) {
sa[lpc].sa_range.shift(0, body_cap->c_begin);
sa[lpc].sa_range.shift(0, body_cap->sf_begin);
}
line.widen(narrow_res);
did_mod_annotate_body = true;
}
}
if (!did_mod_annotate_body) {
if (body_cap != nullptr && body_cap->is_valid()) {
lr.lr_start = body_cap->c_begin;
lr.lr_end = body_cap->c_end;
if (body_cap && body_cap->is_valid()) {
lr = to_line_range(body_cap.value());
} else {
lr.lr_start = line.length();
lr.lr_end = line.length();
@ -1282,26 +1271,21 @@ read_json_field(yajlpp_parse_context* ypc, const unsigned char* str, size_t len)
jlu->jlu_format->lf_timestamp_flags
= tm_out.et_flags & ~ETF_MACHINE_ORIENTED;
jlu->jlu_base_line->set_time(tv_out);
} else if (!jlu->jlu_format->elf_level_pointer.empty()) {
pcre_context_static<30> pc;
pcre_input pi(field_name);
if (jlu->jlu_format->elf_level_pointer.match(
pc, pi, PCRE_NO_UTF8_CHECK))
} else if (jlu->jlu_format->elf_level_pointer.value != nullptr) {
if (jlu->jlu_format->elf_level_pointer.value
->find_in(field_name.to_string_fragment(), PCRE2_NO_UTF_CHECK)
.ignore_error()
.has_value())
{
pcre_input pi_level((const char*) str, 0, len);
pcre_context::capture_t level_cap = {0, (int) len};
jlu->jlu_base_line->set_level(jlu->jlu_format->convert_level(
pi_level, &level_cap, jlu->jlu_batch_context));
string_fragment::from_bytes(str, len), jlu->jlu_batch_context));
}
} else if (jlu->jlu_format->elf_level_field == field_name) {
pcre_input pi((const char*) str, 0, len);
pcre_context::capture_t level_cap = {0, (int) len};
}
if (jlu->jlu_format->elf_level_field == field_name) {
jlu->jlu_base_line->set_level(jlu->jlu_format->convert_level(
pi, &level_cap, jlu->jlu_batch_context));
} else if (jlu->jlu_format->elf_opid_field == field_name) {
string_fragment::from_bytes(str, len), jlu->jlu_batch_context));
}
if (jlu->jlu_format->elf_opid_field == field_name) {
uint8_t opid = hash_str((const char*) str, len);
jlu->jlu_base_line->set_opid(opid);
}
@ -1780,7 +1764,7 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
{
pattern& pat = *iter->second;
if (pat.p_pcre == nullptr) {
if (pat.p_pcre.value == nullptr) {
continue;
}
@ -1788,30 +1772,27 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
this->elf_has_module_format = true;
}
for (auto name_iter = pat.p_pcre->named_begin();
name_iter != pat.p_pcre->named_end();
++name_iter)
{
for (auto named_cap : pat.p_pcre.value->get_named_captures()) {
const intern_string_t name
= intern_string::lookup(name_iter->pnc_name, -1);
= intern_string::lookup(named_cap.get_name());
if (name == this->lf_timestamp_field) {
pat.p_timestamp_field_index = name_iter->index();
pat.p_timestamp_field_index = named_cap.get_index();
}
if (name == this->lf_time_field) {
pat.p_time_field_index = name_iter->index();
pat.p_time_field_index = named_cap.get_index();
}
if (name == this->elf_level_field) {
pat.p_level_field_index = name_iter->index();
pat.p_level_field_index = named_cap.get_index();
}
if (name == this->elf_module_id_field) {
pat.p_module_field_index = name_iter->index();
pat.p_module_field_index = named_cap.get_index();
}
if (name == this->elf_opid_field) {
pat.p_opid_field_index = name_iter->index();
pat.p_opid_field_index = named_cap.get_index();
}
if (name == this->elf_body_field) {
pat.p_body_field_index = name_iter->index();
pat.p_body_field_index = named_cap.get_index();
}
auto value_iter = this->elf_value_defs.find(name);
@ -1819,10 +1800,10 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
auto vd = value_iter->second;
indexed_value_def ivd;
ivd.ivd_index = name_iter->index();
ivd.ivd_index = named_cap.get_index();
if (!vd->vd_unit_field.empty()) {
ivd.ivd_unit_field_index
= pat.p_pcre->name_index(vd->vd_unit_field.get());
= pat.p_pcre.value->name_index(vd->vd_unit_field.get());
} else {
ivd.ivd_unit_field_index = -1;
}
@ -1927,18 +1908,17 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
bool found_in_pattern = false;
for (const auto& pat : this->elf_patterns) {
auto cap_index = pat.second->p_pcre->name_index(
auto cap_index = pat.second->p_pcre.value->name_index(
vd->vd_meta.lvm_name.get());
if (cap_index >= 0) {
found_in_pattern = true;
break;
}
for (auto name_iter = pat.second->p_pcre->named_begin();
name_iter != pat.second->p_pcre->named_end();
++name_iter)
for (auto named_cap :
pat.second->p_pcre.value->get_named_captures())
{
available_captures.insert(name_iter->pnc_name);
available_captures.insert(named_cap.get_name().to_string());
}
}
if (!found_in_pattern) {
@ -1986,7 +1966,9 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
for (const auto& td_pair : this->lf_tag_defs) {
const auto& td = td_pair.second;
if (td->ftd_pattern == nullptr || td->ftd_pattern->empty()) {
if (td->ftd_pattern.value == nullptr
|| td->ftd_pattern.value->get_pattern().empty())
{
errors.emplace_back(
lnav::console::user_message::error(
attr_line_t("invalid tag definition ")
@ -2017,8 +1999,6 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
for (auto& elf_sample : this->elf_samples) {
auto sample_lines
= string_fragment(elf_sample.s_line.pp_value).split_lines();
pcre_context_static<128> pc;
pcre_input pi(sample_lines[0]);
bool found = false;
for (auto pat_iter = this->elf_pattern_order.begin();
@ -2027,11 +2007,16 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
{
auto& pat = *(*pat_iter);
if (!pat.p_pcre) {
if (!pat.p_pcre.value) {
continue;
}
if (!pat.p_pcre->match(pc, pi)) {
auto md = pat.p_pcre.value->create_match_data();
auto match_res = pat.p_pcre.value->capture_from(sample_lines[0])
.into(md)
.matches()
.ignore_error();
if (!match_res) {
continue;
}
found = true;
@ -2040,23 +2025,21 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
continue;
}
if (pat.p_pcre->name_index(this->lf_timestamp_field.to_string())
if (pat.p_pcre.value->name_index(this->lf_timestamp_field.get())
< 0)
{
attr_line_t notes;
bool first_note = true;
if (pat.p_pcre->p_named_count > 0) {
if (pat.p_pcre.value->get_capture_count() > 0) {
notes.append("the following captures are available:\n ");
}
for (auto name_iter = pat.p_pcre->named_begin();
name_iter != pat.p_pcre->named_end();
++name_iter)
{
for (auto named_cap : pat.p_pcre.value->get_named_captures()) {
if (!first_note) {
notes.append(", ");
}
notes.append(lnav::roles::symbol(name_iter->pnc_name));
notes.append(
lnav::roles::symbol(named_cap.get_name().to_string()));
first_note = false;
}
errors.emplace_back(
@ -2075,21 +2058,23 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
continue;
}
const auto* ts_cap = pc[this->lf_timestamp_field.get()];
const auto* level_cap = pc[pat.p_level_field_index];
const char* ts = pi.get_substr_start(ts_cap);
auto ts_frag = pi.get_string_fragment(ts_cap);
ssize_t ts_len = pc[this->lf_timestamp_field.get()]->length();
const auto ts_cap = md[pat.p_timestamp_field_index];
const auto level_cap = md[pat.p_level_field_index];
const char* const* custom_formats = this->get_timestamp_formats();
date_time_scanner dts;
struct timeval tv;
struct exttm tm;
if (ts_cap->c_begin == 0) {
pat.p_timestamp_end = ts_cap->c_end;
if (ts_cap && ts_cap->sf_begin == 0) {
pat.p_timestamp_end = ts_cap->sf_end;
}
if (ts_len == -1
|| dts.scan(ts, ts_len, custom_formats, &tm, tv) == nullptr)
if (ts_cap
&& dts.scan(ts_cap->data(),
ts_cap->length(),
custom_formats,
&tm,
tv)
== nullptr)
{
attr_line_t notes;
@ -2100,9 +2085,10 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
{
off_t off = 0;
PTIMEC_FORMATS[lpc].pf_func(&tm, ts, off, ts_len);
PTIMEC_FORMATS[lpc].pf_func(
&tm, ts_cap->data(), off, ts_cap->length());
notes.append("\n ")
.append(ts_frag)
.append(ts_cap.value())
.append("\n")
.append(2 + off, ' ')
.append("^ "_snippet_border)
@ -2115,9 +2101,13 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
for (int lpc = 0; custom_formats[lpc] != nullptr; lpc++) {
off_t off = 0;
ptime_fmt(custom_formats[lpc], &tm, ts, off, ts_len);
ptime_fmt(custom_formats[lpc],
&tm,
ts_cap->data(),
off,
ts_cap->length());
notes.append("\n ")
.append(ts_frag)
.append(ts_cap.value())
.append("\n")
.append(2 + off, ' ')
.append("^ "_snippet_border)
@ -2132,7 +2122,7 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
attr_line_t("invalid sample log message: ")
.append(lnav::to_json(elf_sample.s_line.pp_value)))
.with_reason(attr_line_t("unrecognized timestamp -- ")
.append(ts_frag))
.append(ts_cap.value()))
.with_snippet(elf_sample.s_line.to_snippet())
.with_note(notes)
.with_help(attr_line_t("If the timestamp format is not "
@ -2142,7 +2132,8 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
.append(" property")));
}
log_level_t level = this->convert_level(pi, level_cap, nullptr);
log_level_t level = this->convert_level(
level_cap.value_or(string_fragment::invalid()), nullptr);
if (elf_sample.s_level != LEVEL_UNKNOWN
&& elf_sample.s_level != level)
@ -2153,8 +2144,7 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
.append(lnav::roles::symbol(pat.p_name.to_string()))
.append("\n")
.append("captured level = ")
.append_quoted(
pi.get_string_fragment(level_cap).to_string());
.append_quoted(level_cap->to_string());
errors.emplace_back(
lnav::console::user_message::error(
attr_line_t("invalid sample log message: ")
@ -2171,11 +2161,13 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
}
{
pcre_context_static<128> pc_full;
pcre_input pi_full(elf_sample.s_line.pp_value);
if (!pat.p_pcre->match(pc_full, pi_full)) {
attr_line_t regex_al = pat.p_pcre->get_pattern();
auto full_match_res
= pat.p_pcre.value->capture_from(elf_sample.s_line.pp_value)
.into(md)
.matches()
.ignore_error();
if (!full_match_res) {
attr_line_t regex_al = pat.p_pcre.value->get_pattern();
lnav::snippets::regex_highlighter(
regex_al, -1, line_range{0, (int) regex_al.length()});
errors.emplace_back(
@ -2194,14 +2186,14 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
.with_help(
attr_line_t("use ").append_quoted(".*").append(
" to match new-lines")));
} else if (static_cast<size_t>(pc_full.all()->length())
} else if (static_cast<size_t>(full_match_res->f_all.length())
!= elf_sample.s_line.pp_value.length())
{
attr_line_t regex_al = pat.p_pcre->get_pattern();
attr_line_t regex_al = pat.p_pcre.value->get_pattern();
lnav::snippets::regex_highlighter(
regex_al, -1, line_range{0, (int) regex_al.length()});
auto match_length
= static_cast<size_t>(pc_full.all()->length());
= static_cast<size_t>(full_match_res->f_all.length());
attr_line_t sample_al = elf_sample.s_line.pp_value;
sample_al.append("\n")
.append(match_length, ' ')
@ -2235,14 +2227,15 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
size_t max_name_width = 0;
for (const auto& pat_iter : this->elf_pattern_order) {
pattern& pat = *pat_iter;
auto& pat = *pat_iter;
if (!pat.p_pcre) {
if (!pat.p_pcre.value) {
continue;
}
partial_indexes.emplace_back(pat.p_pcre->match_partial(pi),
pat.p_name);
partial_indexes.emplace_back(
pat.p_pcre.value->match_partial(sample_lines[0]),
pat.p_name);
max_name_width = std::max(max_name_width, pat.p_name.size());
}
for (const auto& line_frag : sample_lines) {
@ -2273,7 +2266,7 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
attr_line_t regex_note;
for (const auto& pat_iter : this->elf_pattern_order) {
if (!pat_iter->p_pcre) {
if (!pat_iter->p_pcre.value) {
regex_note
.append(
lnav::roles::symbol(fmt::format(FMT_STRING("{:{}}"),
@ -2283,7 +2276,7 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
continue;
}
attr_line_t regex_al = pat_iter->p_pcre->get_pattern();
attr_line_t regex_al = pat_iter->p_pcre.value->get_pattern();
lnav::snippets::regex_highlighter(
regex_al, -1, line_range{0, (int) regex_al.length()});
@ -2444,20 +2437,13 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
attrs.ta_attrs |= A_BLINK;
}
if (hd.hd_pattern != nullptr) {
auto regex = pcrepp::shared_from_str(hd.hd_pattern->get_pattern(),
PCRE_CASELESS | PCRE_UTF8);
if (regex.isErr()) {
log_error("unable to recompile highlighter pattern");
} else {
this->lf_highlighters.emplace_back(regex.unwrap());
this->lf_highlighters.back()
.with_name(hd_pair.first.to_string())
.with_format_name(this->elf_name)
.with_color(fg, bg)
.with_attrs(attrs);
}
if (hd.hd_pattern.value != nullptr) {
this->lf_highlighters.emplace_back(hd.hd_pattern.value);
this->lf_highlighters.back()
.with_name(hd_pair.first.to_string())
.with_format_name(this->elf_name)
.with_color(fg, bg)
.with_attrs(attrs);
}
}
}
@ -2468,12 +2454,12 @@ external_log_format::register_vtabs(
std::vector<lnav::console::user_message>& errors)
{
for (auto& elf_search_table : this->elf_search_tables) {
if (elf_search_table.second.std_pattern == nullptr) {
if (elf_search_table.second.std_pattern.value == nullptr) {
continue;
}
auto lst = std::make_shared<log_search_table>(
*elf_search_table.second.std_pattern, elf_search_table.first);
elf_search_table.second.std_pattern.value, elf_search_table.first);
lst->lst_format = this;
lst->lst_log_path_glob = elf_search_table.second.std_glob;
if (elf_search_table.second.std_level != LEVEL_UNKNOWN) {
@ -2495,16 +2481,15 @@ external_log_format::match_samples(const std::vector<sample>& samples) const
{
for (const auto& sample_iter : samples) {
for (const auto& pat_iter : this->elf_pattern_order) {
pattern& pat = *pat_iter;
auto& pat = *pat_iter;
if (!pat.p_pcre) {
if (!pat.p_pcre.value) {
continue;
}
pcre_context_static<128> pc;
pcre_input pi(sample_iter.s_line.pp_value);
if (pat.p_pcre->match(pc, pi)) {
if (pat.p_pcre.value->find_in(sample_iter.s_line.pp_value)
.ignore_error())
{
return true;
}
}
@ -2522,7 +2507,7 @@ public:
void get_columns(std::vector<vtab_column>& cols) const override
{
const external_log_format& elf = this->elt_format;
const auto& elf = this->elt_format;
cols.resize(elf.elf_column_count);
for (const auto& vd : elf.elf_value_def_order) {
@ -2684,14 +2669,13 @@ external_log_format::specialized(int fmt_lock)
bool
external_log_format::match_name(const std::string& filename)
{
if (this->elf_file_pattern.empty()) {
if (this->elf_filename_pcre.value == nullptr) {
return true;
}
pcre_context_static<10> pc;
pcre_input pi(filename);
return this->elf_filename_pcre->match(pc, pi);
return this->elf_filename_pcre.value->find_in(filename)
.ignore_error()
.has_value();
}
bool
@ -2734,15 +2718,13 @@ external_log_format::value_line_count(const intern_string_t ist,
}
log_level_t
external_log_format::convert_level(const pcre_input& pi,
const pcre_context::capture_t* level_cap,
external_log_format::convert_level(string_fragment sf,
scan_batch_context* sbc) const
{
log_level_t retval = LEVEL_INFO;
if (level_cap != nullptr && level_cap->is_valid()) {
if (sf.is_valid()) {
if (sbc != nullptr && sbc->sbc_cached_level_count > 0) {
auto sf = pi.get_string_fragment(level_cap);
auto cached_level_iter
= std::find(std::begin(sbc->sbc_cached_level_strings),
std::begin(sbc->sbc_cached_level_strings)
@ -2765,16 +2747,14 @@ external_log_format::convert_level(const pcre_input& pi,
}
}
pcre_context_static<128> pc_level;
pcre_input pi_level(
pi.get_substr_start(level_cap), 0, level_cap->length());
if (this->elf_level_patterns.empty()) {
retval = string2level(pi_level.get_string(), level_cap->length());
retval = string2level(sf.data(), sf.length());
} else {
for (const auto& elf_level_pattern : this->elf_level_patterns) {
if (elf_level_pattern.second.lp_pcre->match(
pc_level, pi_level, PCRE_NO_UTF8_CHECK))
if (elf_level_pattern.second.lp_pcre.value
->find_in(sf, PCRE2_NO_UTF_CHECK)
.ignore_error()
.has_value())
{
retval = elf_level_pattern.first;
break;
@ -2782,7 +2762,7 @@ external_log_format::convert_level(const pcre_input& pi,
}
}
if (sbc != nullptr && level_cap->length() < 10) {
if (sbc != nullptr && sf.length() < 10) {
size_t cache_index;
if (sbc->sbc_cached_level_count == 4) {
@ -2791,8 +2771,7 @@ external_log_format::convert_level(const pcre_input& pi,
cache_index = sbc->sbc_cached_level_count;
sbc->sbc_cached_level_count += 1;
}
sbc->sbc_cached_level_strings[cache_index]
= std::string(pi_level.get_string(), pi_level.pi_length);
sbc->sbc_cached_level_strings[cache_index] = sf.to_string();
sbc->sbc_cached_level_values[cache_index] = retval;
}
}

@ -57,7 +57,7 @@
#include "log_format_fwd.hh"
#include "log_level.hh"
#include "optional.hpp"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "shared_buffer.hh"
struct sqlite3;
@ -522,29 +522,32 @@ protected:
static std::vector<std::shared_ptr<log_format>> lf_root_formats;
struct pcre_format {
explicit pcre_format(const char* regex)
: name(regex), pcre(regex),
pf_timestamp_index(this->pcre.name_index("timestamp"))
template<typename T, std::size_t N>
explicit pcre_format(const T (&regex)[N])
: name(regex),
pcre(lnav::pcre2pp::code::from_const(regex).to_shared()),
pf_timestamp_index(this->pcre->name_index("timestamp"))
{
}
pcre_format() : name(nullptr), pcre("") {}
pcre_format() = default;
const char* name;
pcrepp pcre;
const char* name{nullptr};
std::shared_ptr<lnav::pcre2pp::code> pcre;
int pf_timestamp_index{-1};
};
static bool next_format(pcre_format* fmt, int& index, int& locked_index);
const char* log_scanf(uint32_t line_number,
const char* line,
size_t len,
string_fragment line,
pcre_format* fmt,
const char* time_fmt[],
struct exttm* tm_out,
struct timeval* tv_out,
...);
string_fragment* ts_out,
nonstd::optional<string_fragment>* level_out);
};
#endif

@ -35,6 +35,7 @@
#include <unordered_map>
#include "log_format.hh"
#include "log_search_table_fwd.hh"
#include "yajlpp/yajlpp.hh"
class module_format;
@ -99,7 +100,9 @@ public:
struct pattern {
intern_string_t p_name;
std::string p_config_path;
std::shared_ptr<pcrepp_with_options<PCRE_DOTALL>> p_pcre;
factory_container<lnav::pcre2pp::code,
int>::with_default_args<PCRE2_DOTALL>
p_pcre;
std::vector<indexed_value_def> p_value_by_index;
std::vector<int> p_numeric_value_indexes;
int p_timestamp_field_index{-1};
@ -113,8 +116,7 @@ public:
};
struct level_pattern {
std::string lp_regex;
std::shared_ptr<pcrepp> lp_pcre;
factory_container<lnav::pcre2pp::code> lp_pcre;
};
struct yajl_handle_deleter {
@ -273,7 +275,7 @@ public:
};
struct highlighter_def {
std::shared_ptr<pcrepp> hd_pattern;
factory_container<lnav::pcre2pp::code> hd_pattern;
positioned_property<std::string> hd_color;
positioned_property<std::string> hd_background_color;
bool hd_underline{false};
@ -309,11 +311,10 @@ public:
return "";
}
int pat_index = this->pattern_index_for_line(line_number);
return this->elf_pattern_order[pat_index]->p_pcre->get_pattern();
return this->elf_pattern_order[pat_index]->p_pcre.value->get_pattern();
}
log_level_t convert_level(const pcre_input& pi,
const pcre_context::capture_t* level_cap,
log_level_t convert_level(string_fragment str,
scan_batch_context* sbc) const;
using mod_map_t = std::map<intern_string_t, module_format>;
@ -325,9 +326,8 @@ public:
std::vector<ghc::filesystem::path> elf_format_source_order;
std::map<intern_string_t, int> elf_format_sources;
std::list<intern_string_t> elf_collision;
std::string elf_file_pattern;
std::set<file_format_t> elf_mime_types;
std::shared_ptr<pcrepp> elf_filename_pcre;
factory_container<lnav::pcre2pp::code> elf_filename_pcre;
std::map<std::string, std::shared_ptr<pattern>> elf_patterns;
std::vector<std::shared_ptr<pattern>> elf_pattern_order;
std::vector<sample> elf_samples;
@ -338,7 +338,7 @@ public:
int elf_column_count{0};
double elf_timestamp_divisor{1.0};
intern_string_t elf_level_field;
pcrepp elf_level_pointer;
factory_container<lnav::pcre2pp::code> elf_level_pointer;
intern_string_t elf_body_field;
intern_string_t elf_module_id_field;
intern_string_t elf_opid_field;
@ -348,10 +348,12 @@ public:
bool elf_has_module_format{false};
bool elf_builtin_format{false};
using search_table_pcre2pp
= factory_container<lnav::pcre2pp::code, int>::with_default_args<
log_search_table_ns::PATTERN_OPTIONS>;
struct search_table_def {
std::shared_ptr<
pcrepp_with_options<PCRE_CASELESS | PCRE_MULTILINE | PCRE_DOTALL>>
std_pattern;
search_table_pcre2pp std_pattern;
std::string std_glob;
log_level_t std_level{LEVEL_UNKNOWN};
};
@ -410,8 +412,7 @@ public:
private:
const intern_string_t elf_name;
static uint8_t module_scan(const pcre_input& pi,
pcre_context::capture_t* body_cap,
static uint8_t module_scan(string_fragment body_cap,
const intern_string_t& mod_name);
};

@ -32,6 +32,8 @@
#ifndef lnav_log_format_fwd_hh
#define lnav_log_format_fwd_hh
#include <utility>
#include <sys/types.h>
#include "ArenaAlloc/arenaalloc.h"
@ -39,9 +41,10 @@
#include "base/string_attr_type.hh"
#include "byte_array.hh"
#include "log_level.hh"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "ptimec.hh"
#include "robin_hood/robin_hood.h"
#include "yajlpp/yajlpp.hh"
class log_format;
@ -303,7 +306,7 @@ private:
};
struct format_tag_def {
format_tag_def(std::string name) : ftd_name(name) {}
explicit format_tag_def(std::string name) : ftd_name(std::move(name)) {}
struct path_restriction {
std::string p_glob;
@ -314,7 +317,8 @@ struct format_tag_def {
std::string ftd_name;
std::string ftd_description;
std::vector<path_restriction> ftd_paths;
std::shared_ptr<pcrepp_with_options<PCRE_DOTALL>> ftd_pattern;
factory_container<lnav::pcre2pp::code, int>::with_default_args<PCRE2_DOTALL>
ftd_pattern;
log_level_t ftd_level{LEVEL_UNKNOWN};
};

@ -41,59 +41,10 @@
#include "config.h"
#include "formats/logfmt/logfmt.parser.hh"
#include "log_vtab_impl.hh"
#include "pcrepp/pcrepp.hh"
#include "sql_util.hh"
#include "yajlpp/yajlpp.hh"
static const pcrepp RDNS_PATTERN(
"^(?:com|net|org|edu|[a-z][a-z])"
"(\\.\\w+)+(.+)");
/**
* Attempt to scrub a reverse-DNS string.
*
* @param str The string to scrub. If the string looks like a reverse-DNS
* string, the leading components of the name will be reduced to a single
* letter. For example, "com.example.foo" will be reduced to "c.e.foo".
* @return The scrubbed version of the input string or the original string
* if it is not a reverse-DNS string.
*/
static std::string
scrub_rdns(const std::string& str)
{
pcre_context_static<30> context;
pcre_input input(str);
std::string retval;
if (RDNS_PATTERN.match(context, input)) {
pcre_context::capture_t* cap;
cap = context.begin();
for (int index = 0; index < cap->c_begin; index++) {
if (index == 0 || str[index - 1] == '.') {
if (index > 0) {
retval.append(1, '.');
}
retval.append(1, str[index]);
}
}
retval += input.get_substr(cap);
retval += input.get_substr(cap + 1);
} else {
retval = str;
}
return retval;
}
class generic_log_format : public log_format {
static pcrepp& scrub_pattern()
{
static pcrepp SCRUB_PATTERN(
"\\d+-(\\d+-\\d+ \\d+:\\d+:\\d+(?:,\\d+)?:)\\w+:(.*)");
return SCRUB_PATTERN;
}
static pcre_format* get_pcre_log_formats()
{
static pcre_format log_fmt[] = {
@ -140,23 +91,6 @@ class generic_log_format : public log_format {
return intern_string::lookup("generic_log");
}
void scrub(std::string& line) override
{
pcre_context_static<30> context;
pcre_input pi(line);
std::string new_line;
if (scrub_pattern().match(context, pi)) {
pcre_context::capture_t* cap;
for (cap = context.begin(); cap != context.end(); cap++) {
new_line += scrub_rdns(pi.get_substr(cap));
}
line = new_line;
}
}
scan_result_t scan(logfile& lf,
std::vector<logline>& dst,
const line_info& li,
@ -165,12 +99,12 @@ class generic_log_format : public log_format {
{
struct exttm log_time;
struct timeval log_tv;
pcre_context::capture_t ts, level;
string_fragment ts;
nonstd::optional<string_fragment> level;
const char* last_pos;
if ((last_pos = this->log_scanf(dst.size(),
sbr.get_data(),
sbr.length(),
sbr.to_string_fragment(),
get_pcre_log_formats(),
nullptr,
&log_time,
@ -180,8 +114,10 @@ class generic_log_format : public log_format {
&level))
!= nullptr)
{
const char* level_str = &sbr.get_data()[level.c_begin];
log_level_t level_val = string2level(level_str, level.length());
log_level_t level_val = log_level_t::LEVEL_UNKNOWN;
if (level) {
level_val = string2level(level->data(), level->length());
}
if (!((log_time.et_flags & ETF_DAY_SET)
&& (log_time.et_flags & ETF_MONTH_SET)
@ -204,26 +140,28 @@ class generic_log_format : public log_format {
{
auto& line = values.lvv_sbr;
int pat_index = this->pattern_index_for_line(line_number);
pcre_format& fmt = get_pcre_log_formats()[pat_index];
struct line_range lr;
auto& fmt = get_pcre_log_formats()[pat_index];
int prefix_len = 0;
pcre_input pi(line.get_data(), 0, line.length());
pcre_context_static<30> pc;
if (!fmt.pcre.match(pc, pi)) {
auto md = fmt.pcre->create_match_data();
auto match_res = fmt.pcre->capture_from(line.to_string_fragment())
.into(md)
.matches(PCRE2_NO_UTF_CHECK)
.ignore_error();
if (!match_res) {
return;
}
lr.lr_start = pc[0]->c_begin;
lr.lr_end = pc[0]->c_end;
auto lr = to_line_range(md[fmt.pf_timestamp_index].value());
sa.emplace_back(lr, logline::L_TIMESTAMP.value());
const char* level = &line.get_data()[pc[1]->c_begin];
if (string2level(level, pc[1]->length(), true) == LEVEL_UNKNOWN) {
prefix_len = pc[0]->c_end;
} else {
prefix_len = pc[1]->c_end;
prefix_len = lr.lr_end;
auto level_cap = md[2];
if (level_cap) {
if (string2level(level_cap->data(), level_cap->length(), true)
!= LEVEL_UNKNOWN)
{
prefix_len = level_cap->sf_end;
}
}
lr.lr_start = 0;
@ -377,15 +315,9 @@ struct separated_string {
size_t index() const { return this->i_index; }
};
iterator begin()
{
return {*this, this->ss_str};
}
iterator begin() { return {*this, this->ss_str}; }
iterator end()
{
return {*this, this->ss_str + this->ss_len};
}
iterator end() { return {*this, this->ss_str + this->ss_len}; }
};
class bro_log_format : public log_format {
@ -472,7 +404,8 @@ public:
string_fragment sf = *iter;
if (this->lf_date_time.scan(
sf.data(), sf.length(), nullptr, &tm, tv)) {
sf.data(), sf.length(), nullptr, &tm, tv))
{
this->lf_timestamp_flags = tm.et_flags;
found_ts = true;
}
@ -527,18 +460,19 @@ public:
shared_buffer_ref& sbr,
scan_batch_context& sbc) override
{
static const pcrepp SEP_RE(R"(^#separator\s+(.+))");
static const auto SEP_RE
= lnav::pcre2pp::code::from_const(R"(^#separator\s+(.+))");
if (!this->blf_format_name.empty()) {
return this->scan_int(dst, li, sbr);
}
if (dst.empty() || dst.size() > 20 || sbr.empty()
|| sbr.get_data()[0] == '#') {
|| sbr.get_data()[0] == '#')
{
return SCAN_NO_MATCH;
}
pcre_context_static<20> pc;
auto line_iter = dst.begin();
auto read_result = lf.read_line(line_iter);
@ -547,16 +481,19 @@ public:
}
auto line = read_result.unwrap();
pcre_input pi(line.get_data(), 0, line.length());
auto md = SEP_RE.create_match_data();
if (!SEP_RE.match(pc, pi)) {
auto match_res = SEP_RE.capture_from(line.to_string_fragment())
.into(md)
.matches(PCRE2_NO_UTF_CHECK)
.ignore_error();
if (!match_res) {
return SCAN_NO_MATCH;
}
this->clear();
auto sep
= from_escaped_string(pi.get_substr_start(pc[0]), pc[0]->length());
auto sep = from_escaped_string(md[1]->data(), md[1]->length());
this->blf_separator = intern_string::lookup(sep);
for (++line_iter; line_iter != dst.end(); ++line_iter) {
@ -897,7 +834,8 @@ struct ws_separated_string {
this->i_pos = this->i_next_pos;
while (this->i_pos < (ss.ss_str + ss.ss_len)
&& isspace(*this->i_pos)) {
&& isspace(*this->i_pos))
{
this->i_pos += 1;
this->i_next_pos += 1;
}
@ -1070,7 +1008,8 @@ public:
sbr_sf.length(),
nullptr,
&tm,
tv)) {
tv))
{
this->lf_date_time.set_base_time(tv.tv_sec,
tm.et_tm);
this->wlf_time_scanner.set_base_time(tv.tv_sec,
@ -1088,7 +1027,8 @@ public:
|| F_DATE_UTC == fd.fd_name)
{
if (this->lf_date_time.scan(
sf.data(), sf.length(), nullptr, &date_tm, date_tv)) {
sf.data(), sf.length(), nullptr, &date_tm, date_tv))
{
this->lf_timestamp_flags |= date_tm.et_flags;
found_date = true;
}
@ -1173,7 +1113,8 @@ public:
}
if (dst.empty() || dst.size() > 20 || sbr.empty()
|| sbr.get_data()[0] == '#') {
|| sbr.get_data()[0] == '#')
{
return SCAN_NO_MATCH;
}
@ -1598,8 +1539,7 @@ struct logfmt_pair_handler {
date_time_scanner& lph_dt_scanner;
bool lph_found_time{false};
struct exttm lph_time_tm {
};
struct exttm lph_time_tm {};
struct timeval lph_tv {
0, 0
};

@ -57,9 +57,7 @@
#include "yajlpp/yajlpp.hh"
#include "yajlpp/yajlpp_def.hh"
static void extract_metadata(const char* contents,
size_t len,
struct script_metadata& meta_out);
static void extract_metadata(string_fragment, struct script_metadata& meta_out);
using log_formats_map_t
= std::map<intern_string_t, std::shared_ptr<external_log_format>>;
@ -276,30 +274,7 @@ read_format_field(yajlpp_parse_context* ypc,
leading_slash ? len - 1 : len);
auto field_name = ypc->get_path_fragment(1);
if (field_name == "file-pattern") {
try {
elf->elf_file_pattern = value;
elf->elf_filename_pcre
= std::make_shared<pcrepp>(elf->elf_file_pattern);
} catch (const pcrepp::error& e) {
pcrepp::compile_error ce;
ce.ce_msg = e.what();
ce.ce_offset = e.e_offset;
ypc->ypc_current_handler->report_regex_value_error(ypc, value, ce);
}
} else if (field_name == "level-pointer") {
auto pcre_res = pcrepp::from_str(value);
if (pcre_res.isErr()) {
auto pcre_error = pcre_res.unwrapErr();
ypc->ypc_current_handler->report_regex_value_error(
ypc, value, pcre_error);
} else {
elf->elf_level_pointer = pcre_res.unwrap();
}
} else if (field_name == "timestamp-format") {
if (field_name == "timestamp-format") {
elf->lf_timestamp_format.push_back(intern_string::lookup(value)->get());
} else if (field_name == "module-field") {
elf->elf_module_id_field = intern_string::lookup(value);
@ -321,17 +296,20 @@ read_levels(yajlpp_parse_context* ypc, const unsigned char* str, size_t len)
auto regex = std::string((const char*) str, len);
auto level_name_or_number = ypc->get_path_fragment(2);
log_level_t level = string2level(level_name_or_number.c_str());
elf->elf_level_patterns[level].lp_regex = regex;
try {
elf->elf_level_patterns[level].lp_pcre
= std::make_shared<pcrepp>(regex);
} catch (const pcrepp::error& e) {
pcrepp::compile_error ce;
ce.ce_msg = e.what();
ce.ce_offset = e.e_offset;
ypc->ypc_current_handler->report_regex_value_error(ypc, regex, ce);
auto value_frag = string_fragment::from_bytes(str, len);
auto compile_res = lnav::pcre2pp::code::from(value_frag);
if (compile_res.isErr()) {
static const intern_string_t PATTERN_SRC
= intern_string::lookup("pattern");
auto ce = compile_res.unwrapErr();
ypc->ypc_current_handler->report_error(
ypc,
value_frag.to_string(),
lnav::console::to_user_message(PATTERN_SRC, ce));
} else {
elf->elf_level_patterns[level].lp_pcre.value
= compile_res.unwrap().to_shared();
}
return 1;
@ -781,7 +759,9 @@ static struct json_path_container action_def_handlers = {
};
static struct json_path_container action_handlers = {
json_path_handler(pcrepp("(?<action_name>\\w+)"), read_action_def)
json_path_handler(
lnav::pcre2pp::code::from_const("(?<action_name>\\w+)").to_shared(),
read_action_def)
.with_children(action_def_handlers),
};
@ -849,9 +829,10 @@ struct json_path_container format_handlers = {
.with_synopsis("<number>")
.with_description(
"The value to divide a numeric timestamp by in a JSON log."),
json_path_handler("file-pattern", read_format_field)
json_path_handler("file-pattern")
.with_description("A regular expression that restricts this format to "
"log files with a matching name"),
"log files with a matching name")
.for_field(&external_log_format::elf_filename_pcre),
json_path_handler("mime-types#", read_format_field)
.with_description("A list of mime-types this format should be used for")
.with_enum_values(MIME_TYPE_ENUM),
@ -859,9 +840,10 @@ struct json_path_container format_handlers = {
.with_description(
"The name of the level field in the log message pattern")
.for_field(&external_log_format::elf_level_field),
json_path_handler("level-pointer", read_format_field)
json_path_handler("level-pointer")
.with_description("A regular-expression that matches the JSON-pointer "
"of the level property"),
"of the level property")
.for_field(&external_log_format::elf_level_pointer),
json_path_handler("timestamp-field", read_format_field)
.with_description(
"The name of the timestamp field in the log message pattern")
@ -876,7 +858,8 @@ struct json_path_container format_handlers = {
.with_description(
"The name of the body field in the log message pattern")
.for_field(&external_log_format::elf_body_field),
json_path_handler("url", pcrepp("^url#?"))
json_path_handler("url",
lnav::pcre2pp::code::from_const("^url#?").to_shared())
.add_cb(read_format_field)
.with_description("A URL with more information about this log format"),
json_path_handler("title", read_format_field)
@ -1037,7 +1020,7 @@ write_sample_file()
auto_fd script_fd;
struct stat st;
extract_metadata(sf.data(), sf.length(), meta);
extract_metadata(sf, meta);
auto path
= fmt::format(FMT_STRING("formats/default/{}.lnav"), meta.sm_name);
auto script_path = lnav::paths::dotlnav() / path;
@ -1346,23 +1329,24 @@ load_format_extra(sqlite3* db,
}
static void
extract_metadata(const char* contents,
size_t len,
struct script_metadata& meta_out)
extract_metadata(string_fragment contents, struct script_metadata& meta_out)
{
static const pcrepp SYNO_RE("^#\\s+@synopsis:(.*)$", PCRE_MULTILINE);
static const pcrepp DESC_RE("^#\\s+@description:(.*)$", PCRE_MULTILINE);
pcre_input pi(contents, 0, len);
pcre_context_static<16> pc;
pi.reset(contents, 0, len);
if (SYNO_RE.match(pc, pi)) {
meta_out.sm_synopsis = trim(pi.get_substr(pc[0]));
static const auto SYNO_RE = lnav::pcre2pp::code::from_const(
"^#\\s+@synopsis:(.*)$", PCRE2_MULTILINE);
static const auto DESC_RE = lnav::pcre2pp::code::from_const(
"^#\\s+@description:(.*)$", PCRE2_MULTILINE);
auto syno_md = SYNO_RE.create_match_data();
auto syno_match_res
= SYNO_RE.capture_from(contents).into(syno_md).matches().ignore_error();
if (syno_match_res) {
meta_out.sm_synopsis = syno_md[1]->trim().to_string();
}
pi.reset(contents, 0, len);
if (DESC_RE.match(pc, pi)) {
meta_out.sm_description = trim(pi.get_substr(pc[0]));
auto desc_md = DESC_RE.create_match_data();
auto desc_match_res
= DESC_RE.capture_from(contents).into(desc_md).matches().ignore_error();
if (desc_match_res) {
meta_out.sm_description = desc_md[1]->trim().to_string();
}
if (!meta_out.sm_synopsis.empty()) {
@ -1390,7 +1374,7 @@ extract_metadata_from_file(struct script_metadata& meta_inout)
size_t len;
len = fread(buffer, 1, sizeof(buffer), fp.in());
extract_metadata(buffer, len, meta_inout);
extract_metadata(string_fragment::from_bytes(buffer, len), meta_inout);
}
}

@ -36,8 +36,10 @@
const static std::string MATCH_INDEX = "match_index";
static auto match_index_name = intern_string::lookup("match_index");
log_search_table::log_search_table(pcrepp pattern, intern_string_t table_name)
: log_vtab_impl(table_name), lst_regex(std::move(pattern))
log_search_table::log_search_table(std::shared_ptr<lnav::pcre2pp::code> code,
intern_string_t table_name)
: log_vtab_impl(table_name), lst_regex(code),
lst_match_data(this->lst_regex->create_match_data())
{
}
@ -65,20 +67,18 @@ log_search_table::get_columns_int(std::vector<vtab_column>& cols) const
this->lst_column_metas.emplace_back(
match_index_name, value_kind_t::VALUE_INTEGER, cols.size());
cols.emplace_back(MATCH_INDEX, SQLITE_INTEGER);
for (int lpc = 0; lpc < this->lst_regex.get_capture_count(); lpc++) {
cn.add_column(string_fragment::from_const("__all__"));
auto captures = this->lst_regex->get_captures();
for (int lpc = 0; lpc < this->lst_regex->get_capture_count(); lpc++) {
std::string collator;
std::string colname;
int sqlite_type = SQLITE3_TEXT;
colname = cn.add_column(string_fragment::from_c_str(
this->lst_regex.name_for_capture(lpc)))
.to_string();
if (this->lst_regex.captures().size()
== (size_t) this->lst_regex.get_capture_count())
{
auto iter = this->lst_regex.cap_begin() + lpc;
auto cap_re = this->lst_regex.get_pattern().substr(iter->c_begin,
iter->length());
auto colname
= cn.add_column(string_fragment::from_c_str(
this->lst_regex->get_name_for_capture(lpc + 1)))
.to_string();
if (captures.size() == (size_t) this->lst_regex->get_capture_count()) {
auto cap_re = captures[lpc].to_string();
sqlite_type = guess_type_from_pcre(cap_re, collator);
switch (sqlite_type) {
case SQLITE_FLOAT:
@ -119,18 +119,24 @@ log_search_table::next(log_cursor& lc, logfile_sub_source& lss)
this->lst_line_values_cache.lvv_values.clear();
if (this->lst_match_index >= 0) {
if (this->lst_regex.match(
this->lst_match_context, this->lst_input, PCRE_NO_UTF8_CHECK))
{
auto match_res = this->lst_regex->capture_from(this->lst_content)
.at(this->lst_remaining)
.into(this->lst_match_data)
.matches(PCRE2_NO_UTF_CHECK)
.ignore_error();
if (match_res) {
#if 0
log_debug("matched within line: %d",
this->lst_match_context.get_count());
#endif
this->lst_remaining = match_res->f_remaining;
this->lst_match_index += 1;
return true;
}
// log_debug("done matching message");
this->lst_remaining.clear();
this->lst_match_index = -1;
return false;
}
@ -163,17 +169,20 @@ log_search_table::next(log_cursor& lc, logfile_sub_source& lss)
lf->read_full_message(lf_iter, this->lst_line_values_cache.lvv_sbr);
lf->get_format()->annotate(
cl, this->vi_attrs, this->lst_line_values_cache, false);
this->lst_input.reset(this->lst_line_values_cache.lvv_sbr.get_data(),
0,
this->lst_line_values_cache.lvv_sbr.length());
this->lst_content
= this->lst_line_values_cache.lvv_sbr.to_string_fragment();
auto match_res = this->lst_regex->capture_from(this->lst_content)
.into(this->lst_match_data)
.matches(PCRE2_NO_UTF_CHECK)
.ignore_error();
if (!this->lst_regex.match(
this->lst_match_context, this->lst_input, PCRE_NO_UTF8_CHECK))
{
if (!match_res) {
this->lst_mismatch_bitmap.set_bit(lc.lc_curr_line);
return false;
}
this->lst_remaining = match_res->f_remaining;
this->lst_match_index = 0;
return true;
@ -191,13 +200,13 @@ log_search_table::extract(logfile* lf,
values.lvv_values.emplace_back(
this->lst_column_metas[this->lst_format_column_count],
this->lst_match_index);
for (int lpc = 0; lpc < this->lst_regex.get_capture_count(); lpc++) {
const auto* cap = this->lst_match_context[lpc];
if (cap->is_valid()) {
for (int lpc = 0; lpc < this->lst_regex->get_capture_count(); lpc++) {
const auto cap = this->lst_match_data[lpc + 1];
if (cap) {
values.lvv_values.emplace_back(
this->lst_column_metas[this->lst_format_column_count + 1 + lpc],
line,
line_range{cap->c_begin, cap->c_end});
to_line_range(cap.value()));
} else {
values.lvv_values.emplace_back(
this->lst_column_metas[this->lst_format_column_count + 1

@ -36,14 +36,13 @@
#include <vector>
#include "log_vtab_impl.hh"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "shared_buffer.hh"
class log_search_table : public log_vtab_impl {
public:
static int pattern_options() { return PCRE_CASELESS | PCRE_MULTILINE; }
log_search_table(pcrepp pattern, intern_string_t table_name);
log_search_table(std::shared_ptr<lnav::pcre2pp::code> code,
intern_string_t table_name);
void get_primary_keys(std::vector<std::string>& keys_out) const override;
@ -65,13 +64,14 @@ public:
uint64_t line_number,
logline_value_vector& values) override;
pcrepp lst_regex;
std::shared_ptr<lnav::pcre2pp::code> lst_regex;
lnav::pcre2pp::match_data lst_match_data;
string_fragment lst_content;
string_fragment lst_remaining;
log_format* lst_format{nullptr};
mutable size_t lst_format_column_count{0};
std::string lst_log_path_glob;
nonstd::optional<log_level_t> lst_log_level;
pcre_input lst_input{""};
pcre_context_static<128> lst_match_context;
mutable std::vector<logline_value_meta> lst_column_metas;
int64_t lst_match_index{-1};
mutable std::vector<vtab_column> lst_cols;

@ -0,0 +1,40 @@
/**
* Copyright (c) 2022, Timothy Stack
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef lnav_log_search_table_fwd_hh
#define lnav_log_search_table_fwd_hh
#include "pcrepp/pcre2pp.hh"
namespace log_search_table_ns {
static constexpr int PATTERN_OPTIONS
= PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_DOTALL;
}
#endif

@ -1195,13 +1195,15 @@ log_cursor::string_constraint::string_constraint(unsigned char op,
: sc_op(op), sc_value(std::move(value))
{
if (op == SQLITE_INDEX_CONSTRAINT_REGEXP) {
try {
this->sc_pattern
= std::make_shared<pcrepp>(this->sc_value, PCRE_UTF8);
} catch (const pcrepp::error& err) {
auto compile_res = lnav::pcre2pp::code::from(value);
if (compile_res.isErr()) {
auto ce = compile_res.unwrapErr();
log_error("unable to compile regexp constraint: %s -- %s",
this->sc_value.c_str(),
err.e_msg.c_str());
ce.get_message().c_str());
} else {
this->sc_pattern = compile_res.unwrap().to_shared();
}
}
}
@ -1230,10 +1232,9 @@ log_cursor::string_constraint::matches(const std::string& sf) const
return sqlite3_strglob(this->sc_value.c_str(), sf.data()) == 0;
case SQLITE_INDEX_CONSTRAINT_REGEXP: {
if (this->sc_pattern != nullptr) {
pcre_context_static<30> pc;
pcre_input pi(sf);
return this->sc_pattern->match(pc, pi, PCRE_NO_UTF8_CHECK);
return this->sc_pattern->find_in(sf, PCRE2_NO_UTF_CHECK)
.ignore_error()
.has_value();
}
// return true here so that the regexp is actually run and fails
return true;

@ -37,6 +37,7 @@
#include <sqlite3.h>
#include "logfile_sub_source.hh"
#include "pcrepp/pcre2pp.hh"
#include "robin_hood/robin_hood.h"
class textview_curses;
@ -65,7 +66,7 @@ struct log_cursor {
struct string_constraint {
unsigned char sc_op;
std::string sc_value;
std::shared_ptr<pcrepp> sc_pattern;
std::shared_ptr<lnav::pcre2pp::code> sc_pattern;
string_constraint(unsigned char op, std::string value);

@ -563,6 +563,12 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline)
.unwrapOr(text_format_t::TF_UNKNOWN);
log_debug("setting text format to %d", this->lf_text_format);
}
if (!li.li_valid_utf
&& this->lf_text_format != text_format_t::TF_MARKDOWN
&& this->lf_text_format != text_format_t::TF_LOG)
{
this->lf_text_format = text_format_t::TF_BINARY;
}
auto read_result
= this->lf_line_buffer.read_range(li.li_file_range);
@ -634,9 +640,10 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline)
continue;
}
pcre_context_static<30> pc;
pcre_input pi(sf);
if (td->ftd_pattern->match(pc, pi, PCRE_NO_UTF8_CHECK))
if (td->ftd_pattern.value
->find_in(sf, PCRE2_NO_UTF_CHECK)
.ignore_error()
.has_value())
{
curr_ll->set_mark(true);
while (curr_ll->is_continued()) {

@ -85,8 +85,9 @@ public:
pcre_filter(type_t type,
const std::string& id,
size_t index,
std::shared_ptr<pcrepp> code)
: text_filter(type, filter_lang_t::REGEX, id, index), pf_pcre(code)
std::shared_ptr<lnav::pcre2pp::code> code)
: text_filter(type, filter_lang_t::REGEX, id, index),
pf_pcre(std::move(code))
{
}
@ -96,10 +97,9 @@ public:
logfile::const_iterator ll,
shared_buffer_ref& line) override
{
pcre_context_static<30> pc;
pcre_input pi(line.get_data(), 0, line.length());
return this->pf_pcre->match(pc, pi);
return this->pf_pcre->find_in(line.to_string_fragment())
.ignore_error()
.has_value();
}
std::string to_command() const override
@ -110,7 +110,7 @@ public:
}
protected:
std::shared_ptr<pcrepp> pf_pcre;
std::shared_ptr<lnav::pcre2pp::code> pf_pcre;
};
class sql_filter : public text_filter {

@ -32,7 +32,7 @@
#include "base/attr_line.builder.hh"
#include "base/itertools.hh"
#include "base/lnav_log.hh"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "pugixml/pugixml.hpp"
#include "readline_highlighters.hh"
#include "view_curses.hh"
@ -146,7 +146,8 @@ md2attr_line::leave_block(const md4cpp::event_handler::block& bl)
last_block.append("\n");
}
if (this->ml_list_stack.empty()
&& !endswith(last_block.get_string(), "\n\n")) {
&& !endswith(last_block.get_string(), "\n\n"))
{
last_block.append("\n");
}
}
@ -200,7 +201,8 @@ md2attr_line::leave_block(const md4cpp::event_handler::block& bl)
|| lang_sf.iequal(
string_fragment::from_const("shellsession")))
{
static const pcrepp SH_PROMPT(R"([^\$>#%]*[\$>#%]\s+)");
static const auto SH_PROMPT
= lnav::pcre2pp::code::from_const(R"([^\$>#%]*[\$>#%]\s+)");
attr_line_t new_block_text;
attr_line_t cmd_block;
@ -208,7 +210,8 @@ md2attr_line::leave_block(const md4cpp::event_handler::block& bl)
for (auto line : block_text.split_lines()) {
if (!cmd_block.empty()
&& endswith(cmd_block.get_string(), "\\\n")) {
&& endswith(cmd_block.get_string(), "\\\n"))
{
cmd_block.append(line).append("\n");
continue;
}
@ -222,11 +225,11 @@ md2attr_line::leave_block(const md4cpp::event_handler::block& bl)
cmd_block.clear();
}
pcre_context_static<10> pc;
pcre_input pi(line.get_string());
auto sh_find_res
= SH_PROMPT.find_in(line.get_string()).ignore_error();
if (SH_PROMPT.match(pc, pi)) {
prompt_size = pc.all()->length();
if (sh_find_res) {
prompt_size = sh_find_res->f_all.length();
line.with_attr(string_attr{
line_range{0, prompt_size},
VC_ROLE.value(role_t::VCR_LIST_GLYPH),
@ -360,7 +363,8 @@ md2attr_line::leave_block(const md4cpp::event_handler::block& bl)
}
}
for (size_t line_index = 0; line_index < max_cell_lines;
line_index++) {
line_index++)
{
size_t col = 0;
for (const auto& cell : cells) {
block_text.append(" ");
@ -551,7 +555,8 @@ md2attr_line::text(MD_TEXTTYPE tt, const string_fragment& sf)
break;
}
default: {
static const pcrepp REPL_RE(R"(-{2,3}|:[^:\s]*(?:::[^:\s]*)*:)");
static const auto REPL_RE = lnav::pcre2pp::code::from_const(
R"(-{2,3}|:[^:\s]*(?:::[^:\s]*)*:)");
static const auto& emojis = md4cpp::get_emoji_map();
if (this->ml_code_depth > 0) {
@ -559,33 +564,35 @@ md2attr_line::text(MD_TEXTTYPE tt, const string_fragment& sf)
return Ok();
}
pcre_input pi(sf);
pcre_context_static<30> pc;
std::string span_text;
while (REPL_RE.match(pc, pi)) {
auto prev = pi.get_up_to(pc.all());
span_text.append(prev.data(), prev.length());
auto matched = pi.get_string_fragment(pc.all());
if (matched == "--") {
span_text.append("\u2013");
} else if (matched == "---") {
span_text.append("\u2014");
} else if (matched.startswith(":")) {
auto em_iter
= emojis.em_shortname2emoji.find(matched.to_string());
if (em_iter == emojis.em_shortname2emoji.end()) {
span_text.append(matched.data(), matched.length());
} else {
span_text.append(em_iter->second.get().e_value);
auto loop_res = REPL_RE.capture_from(sf).for_each(
[&span_text](lnav::pcre2pp::match_data& md) {
span_text += md.leading();
auto matched = *md[0];
if (matched == "--") {
span_text.append("\u2013");
} else if (matched == "---") {
span_text.append("\u2014");
} else if (matched.startswith(":")) {
auto em_iter = emojis.em_shortname2emoji.find(
matched.to_string());
if (em_iter == emojis.em_shortname2emoji.end()) {
span_text += matched;
} else {
span_text.append(em_iter->second.get().e_value);
}
}
}
}
});
auto last_frag = sf.substr(pi.pi_offset);
span_text.append(last_frag.data(), last_frag.length());
if (loop_res.isOk()) {
span_text += loop_res.unwrap();
} else {
log_error("span replacement regex failed: %d",
loop_res.unwrapErr().e_error_code);
}
text_wrap_settings tws
= {0, this->ml_blocks.size() == 1 ? 70 : 10000};

@ -1486,6 +1486,9 @@ public:
has_value_ = false;
}
template<typename F>
auto map(F func) -> optional<decltype(func(this->value()))>;
private:
void this_type_does_not_support_comparisons() const {}
@ -1747,6 +1750,17 @@ optional<T> make_optional( T const & value )
#endif // optional_CPP11_OR_GREATER
template<typename T>
template<typename F>
auto optional<T>::map(F func) -> optional<decltype(func(this->value()))>
{
if (this->has_value()) {
return make_optional(func(this->value()));
}
return nullopt;
}
} // namespace optional_lite
using optional_lite::optional;

@ -1,9 +1,16 @@
add_library(pcrepp STATIC ../config.h.in pcrepp.hh pcrepp.cc)
add_library(pcrepp STATIC
../config.h.in
pcre2pp.hh
pcre2pp.cc)
target_include_directories(pcrepp PUBLIC . .. ../third-party/scnlib/include
${CMAKE_CURRENT_BINARY_DIR}/..)
target_link_libraries(pcrepp cppfmt pcre::libpcre)
target_link_libraries(pcrepp cppfmt pcre::libpcre pcre2::pcre2)
add_executable(test_pcrepp test_pcrepp.cc)
target_link_libraries(test_pcrepp pcrepp)
add_test(NAME test_pcrepp COMMAND test_pcrepp)
add_executable(test_pcre2pp test_pcre2pp.cc)
target_include_directories(
test_pcre2pp
PUBLIC
../third-party/doctest-root)
target_link_libraries(test_pcre2pp pcrepp)
add_test(NAME test_pcre2pp COMMAND test_pcre2pp)

@ -16,18 +16,18 @@ AM_CXXFLAGS = $(CODE_COVERAGE_CXXFLAGS)
noinst_LIBRARIES = libpcrepp.a
noinst_HEADERS = \
pcrepp.hh
pcre2pp.hh
libpcrepp_a_SOURCES = \
pcrepp.cc
pcre2pp.cc
test_pcrepp_SOURCES = test_pcrepp.cc
test_pcrepp_LDADD = \
test_pcre2pp_SOURCES = test_pcre2pp.cc
test_pcre2pp_LDADD = \
libpcrepp.a \
$(PCRE_LIBS)
check_PROGRAMS = \
test_pcrepp
test_pcre2pp
TESTS = \
test_pcrepp
test_pcre2pp

@ -0,0 +1,458 @@
/**
* Copyright (c) 2022, Timothy Stack
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @file pcrepp.cc
*/
#include "pcre2pp.hh"
#include "config.h"
namespace lnav {
namespace pcre2pp {
std::string
quote(const char* unquoted)
{
std::string retval;
for (int lpc = 0; unquoted[lpc]; lpc++) {
if (isalnum(unquoted[lpc]) || unquoted[lpc] == '_'
|| unquoted[lpc] & 0x80)
{
retval.push_back(unquoted[lpc]);
} else {
retval.push_back('\\');
retval.push_back(unquoted[lpc]);
}
}
return retval;
}
match_data
code::create_match_data() const
{
auto_mem<pcre2_match_data> md(pcre2_match_data_free);
md = pcre2_match_data_create_from_pattern(this->p_code, nullptr);
return match_data{std::move(md)};
}
Result<code, compile_error>
code::from(string_fragment sf, int options)
{
compile_error ce;
auto_mem<pcre2_code> co(pcre2_code_free);
options |= PCRE2_UTF;
co = pcre2_compile(
sf.udata(), sf.length(), options, &ce.ce_code, &ce.ce_offset, nullptr);
if (co == nullptr) {
ce.ce_pattern = sf.to_string();
return Err(ce);
}
auto jit_rc = pcre2_jit_compile(co, PCRE2_JIT_COMPLETE);
if (jit_rc < 0) {
// log_error("failed to JIT compile pattern: %d", jit_rc);
}
return Ok(code{std::move(co), sf.to_string()});
}
code::named_captures
code::get_named_captures() const
{
named_captures retval;
pcre2_pattern_info(
this->p_code.in(), PCRE2_INFO_NAMECOUNT, &retval.nc_count);
pcre2_pattern_info(
this->p_code.in(), PCRE2_INFO_NAMEENTRYSIZE, &retval.nc_entry_size);
pcre2_pattern_info(
this->p_code.in(), PCRE2_INFO_NAMETABLE, &retval.nc_name_table);
return retval;
}
size_t
code::match_partial(string_fragment in) const
{
auto md = this->create_match_data();
auto length = in.length();
do {
auto rc = pcre2_match(this->p_code.in(),
in.udata(),
length,
0,
PCRE2_PARTIAL_HARD,
md.md_data.in(),
nullptr);
if (rc == PCRE2_ERROR_PARTIAL) {
return md.md_ovector[1];
}
if (length > 0) {
length -= 1;
}
} while (length > 0);
return 0;
}
const char*
code::get_name_for_capture(size_t index) const
{
for (const auto cap : this->get_named_captures()) {
if (cap.get_index() == index) {
return cap.get_name().data();
}
}
return nullptr;
}
size_t
code::get_capture_count() const
{
uint32_t retval;
pcre2_pattern_info(this->p_code.in(), PCRE2_INFO_CAPTURECOUNT, &retval);
return retval;
}
std::vector<string_fragment>
code::get_captures() const
{
bool in_class = false, in_escape = false, in_literal = false;
auto pat_frag = string_fragment::from_str(this->p_pattern);
std::vector<string_fragment> cap_in_progress;
std::vector<string_fragment> retval;
for (int lpc = 0; this->p_pattern[lpc]; lpc++) {
if (in_escape) {
in_escape = false;
if (this->p_pattern[lpc] == 'Q') {
in_literal = true;
}
} else if (in_class) {
if (this->p_pattern[lpc] == ']') {
in_class = false;
}
if (this->p_pattern[lpc] == '\\') {
in_escape = true;
}
} else if (in_literal) {
if (this->p_pattern[lpc] == '\\' && this->p_pattern[lpc + 1] == 'E')
{
in_literal = false;
lpc += 1;
}
} else {
switch (this->p_pattern[lpc]) {
case '\\':
in_escape = true;
break;
case '[':
in_class = true;
break;
case '(':
cap_in_progress.emplace_back(pat_frag.sub_range(lpc, lpc));
break;
case ')': {
if (!cap_in_progress.empty()) {
static const auto DEFINE_SF
= string_fragment::from_const("(?(DEFINE)");
auto& cap = cap_in_progress.back();
char first = '\0', second = '\0', third = '\0';
bool is_cap = false;
cap.sf_end = lpc + 1;
if (cap.length() >= 2) {
first = this->p_pattern[cap.sf_begin + 1];
}
if (cap.length() >= 3) {
second = this->p_pattern[cap.sf_begin + 2];
}
if (cap.length() >= 4) {
third = this->p_pattern[cap.sf_begin + 3];
}
if (cap.sf_begin >= 2) {
auto poss_define = string_fragment::from_str_range(
this->p_pattern, cap.sf_begin - 2, cap.sf_end);
if (poss_define == DEFINE_SF) {
cap_in_progress.pop_back();
continue;
}
}
if (first == '?') {
if (second == '\'') {
is_cap = true;
}
if (second == '<'
&& (isalpha(third) || third == '_'))
{
is_cap = true;
}
if (second == 'P' && third == '<') {
is_cap = true;
}
} else if (first != '*') {
is_cap = true;
}
if (is_cap) {
retval.emplace_back(cap);
}
cap_in_progress.pop_back();
}
break;
}
}
}
}
assert((size_t) this->get_capture_count() == retval.size());
return retval;
}
std::string
code::replace(string_fragment str, const char* repl) const
{
std::string retval;
std::string::size_type start = 0;
string_fragment remaining = str;
auto md = this->create_match_data();
while (remaining.is_valid()) {
auto find_res = this->capture_from(str)
.at(remaining)
.into(md)
.matches()
.ignore_error();
if (!find_res) {
break;
}
auto all = find_res->f_all;
remaining = find_res->f_remaining;
bool in_escape = false;
retval.append(str.data(), start, (all.sf_begin - start));
start = all.sf_end;
for (int lpc = 0; repl[lpc]; lpc++) {
auto ch = repl[lpc];
if (in_escape) {
if (isdigit(ch)) {
auto capture_index = (ch - '0');
if (capture_index < md.get_count()) {
auto cap = md[capture_index];
if (cap) {
retval.append(cap->data(), cap->length());
}
} else if (capture_index > this->get_capture_count()) {
retval.push_back('\\');
retval.push_back(ch);
}
} else {
if (ch != '\\') {
retval.push_back('\\');
}
retval.push_back(ch);
}
in_escape = false;
} else {
switch (ch) {
case '\\':
in_escape = true;
break;
default:
retval.push_back(ch);
break;
}
}
}
}
if (remaining.is_valid()) {
retval.append(str.data(), remaining.sf_begin, std::string::npos);
}
return retval;
}
int
code::name_index(const char* name) const
{
return pcre2_substring_number_from_name(this->p_code.in(),
(PCRE2_SPTR) name);
}
size_t
code::named_capture::get_index() const
{
return (this->nc_entry[0] << 8) | (this->nc_entry[1] & 0xff);
}
string_fragment
code::named_capture::get_name() const
{
return string_fragment::from_bytes(
&this->nc_entry[2], strlen((const char*) &this->nc_entry[2]));
}
code::named_capture
code::named_captures::iterator::operator*() const
{
return code::named_capture{this->i_entry};
}
code::named_captures::iterator&
code::named_captures::iterator::operator++()
{
this->i_entry += this->i_entry_size;
return *this;
}
bool
code::named_captures::iterator::operator==(const iterator& other) const
{
return this->i_entry == other.i_entry
&& this->i_entry_size == other.i_entry_size;
}
bool
code::named_captures::iterator::operator!=(const iterator& other) const
{
return this->i_entry != other.i_entry
|| this->i_entry_size != other.i_entry_size;
}
code::named_captures::iterator
code::named_captures::begin() const
{
return iterator{this->nc_entry_size, this->nc_name_table};
}
code::named_captures::iterator
code::named_captures::end() const
{
return iterator{
this->nc_entry_size,
this->nc_name_table + (this->nc_count * this->nc_entry_size),
};
}
matcher::matches_result
matcher::matches(uint32_t options)
{
this->mb_input.i_offset = this->mb_input.i_next_offset;
if (this->mb_input.i_offset == -1) {
return not_found{};
}
auto rc = pcre2_match(this->mb_code.p_code.in(),
this->mb_input.i_string.udata(),
this->mb_input.i_string.length(),
this->mb_input.i_offset,
options,
this->mb_match_data.md_data.in(),
nullptr);
if (rc > 0) {
this->mb_match_data.md_input = this->mb_input;
this->mb_match_data.md_code = &this->mb_code;
this->mb_match_data.md_capture_end = rc;
if (this->mb_match_data[0]->empty()
&& this->mb_match_data[0]->sf_end >= this->mb_input.i_string.sf_end)
{
this->mb_input.i_next_offset = -1;
} else if (this->mb_match_data[0]->empty()) {
this->mb_input.i_next_offset = this->mb_match_data[0]->sf_end + 1;
} else {
this->mb_input.i_next_offset = this->mb_match_data[0]->sf_end;
}
this->mb_match_data.md_input.i_next_offset
= this->mb_input.i_next_offset;
return found{
this->mb_match_data[0].value(),
this->mb_match_data.remaining(),
};
}
this->mb_match_data.md_input = this->mb_input;
this->mb_match_data.md_ovector[0] = this->mb_input.i_offset;
this->mb_match_data.md_ovector[1] = this->mb_input.i_offset;
this->mb_match_data.md_capture_end = 1;
if (rc == PCRE2_ERROR_NOMATCH) {
return not_found{};
}
return error{&this->mb_code, rc};
}
void
matcher::matches_result::handle_error(matcher::error err)
{
unsigned char buffer[1024];
pcre2_get_error_message(err.e_error_code, buffer, sizeof(buffer));
// log_error("pcre2_match failure: %s", buffer);
}
std::string
compile_error::get_message() const
{
unsigned char buffer[1024];
pcre2_get_error_message(this->ce_code, buffer, sizeof(buffer));
return {(const char*) buffer};
}
std::string
matcher::error::get_message()
{
unsigned char buffer[1024];
pcre2_get_error_message(this->e_error_code, buffer, sizeof(buffer));
return {(const char*) buffer};
}
} // namespace pcre2pp
} // namespace lnav

@ -0,0 +1,368 @@
/**
* Copyright (c) 2022, Timothy Stack
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef lnav_pcre2pp_hh
#define lnav_pcre2pp_hh
#define PCRE2_CODE_UNIT_WIDTH 8
#include <memory>
#include <string>
#include <vector>
#include <pcre2.h>
#include "base/auto_mem.hh"
#include "base/intern_string.hh"
#include "base/result.h"
#include "mapbox/variant.hpp"
namespace lnav {
namespace pcre2pp {
std::string quote(const char* unquoted);
inline std::string
quote(const std::string& unquoted)
{
return quote(unquoted.c_str());
}
class code;
struct capture_builder;
class matcher;
struct input {
string_fragment i_string;
int i_offset{0};
int i_next_offset{0};
};
class match_data {
public:
static match_data unitialized() { return match_data{}; }
string_fragment leading() const
{
return this->md_input.i_string.sub_range(this->md_input.i_offset,
this->md_ovector[0]);
}
string_fragment remaining() const
{
if (this->md_capture_end == 0 || this->md_input.i_next_offset == -1) {
return string_fragment::invalid();
}
return string_fragment::from_byte_range(
this->md_input.i_string.sf_string,
this->md_input.i_next_offset,
this->md_input.i_string.sf_end);
}
nonstd::optional<string_fragment> operator[](size_t index) const
{
if (index >= this->md_capture_end) {
return nonstd::nullopt;
}
auto start = this->md_ovector[(index * 2)];
auto stop = this->md_ovector[(index * 2) + 1];
if (start == PCRE2_UNSET || stop == PCRE2_UNSET) {
return nonstd::nullopt;
}
return this->md_input.i_string.sub_range(start, stop);
}
template<typename T, std::size_t N>
nonstd::optional<string_fragment> operator[](const T (&name)[N]) const;
int get_count() const { return this->md_capture_end; }
private:
friend matcher;
friend code;
match_data() = default;
explicit match_data(auto_mem<pcre2_match_data> dat)
: md_data(std::move(dat)),
md_ovector(pcre2_get_ovector_pointer(this->md_data.in())),
md_ovector_count(pcre2_get_ovector_count(this->md_data.in()))
{
}
auto_mem<pcre2_match_data> md_data;
const code* md_code{nullptr};
input md_input;
PCRE2_SIZE* md_ovector{nullptr};
uint32_t md_ovector_count{0};
int md_capture_end{0};
};
class matcher {
public:
struct found {
string_fragment f_all;
string_fragment f_remaining;
};
struct not_found {};
struct error {
const code* e_code{nullptr};
int e_error_code{0};
std::string get_message();
};
class matches_result
: public mapbox::util::variant<found, not_found, error> {
public:
using variant::variant;
nonstd::optional<found> ignore_error()
{
return this->match(
[](found fo) { return nonstd::make_optional(fo); },
[](not_found) { return nonstd::nullopt; },
[](error err) {
handle_error(err);
return nonstd::nullopt;
});
}
private:
static void handle_error(error err);
};
matcher& reload_input(string_fragment sf, int next_offset)
{
this->mb_input = input{sf, next_offset, next_offset};
return *this;
}
matches_result matches(uint32_t options = 0);
int get_next_offset() const { return this->mb_input.i_next_offset; }
private:
friend capture_builder;
matcher(const code& co, input& in, match_data& md)
: mb_code(co), mb_input(in), mb_match_data(md)
{
}
const code& mb_code;
input mb_input;
match_data& mb_match_data;
};
struct capture_builder {
const code& mb_code;
input mb_input;
capture_builder at(const string_fragment& remaining) &&
{
this->mb_input.i_offset = this->mb_input.i_next_offset
= remaining.sf_begin;
return *this;
}
matcher into(match_data& md) &&
{
return matcher{
this->mb_code,
this->mb_input,
md,
};
}
template<uint32_t Options = 0, typename F>
Result<string_fragment, matcher::error> for_each(F func) &&;
};
struct compile_error {
std::string ce_pattern;
int ce_code{0};
size_t ce_offset{0};
std::string get_message() const;
};
class code {
public:
class named_capture {
public:
size_t get_index() const;
string_fragment get_name() const;
PCRE2_SPTR nc_entry;
};
class named_captures {
public:
struct iterator {
named_capture operator*() const;
iterator& operator++();
bool operator==(const iterator& other) const;
bool operator!=(const iterator& other) const;
uint32_t i_entry_size;
PCRE2_SPTR i_entry;
};
iterator begin() const;
iterator end() const;
bool empty() const { return this->nc_count == 0; }
size_t size() const { return this->nc_count; }
private:
friend code;
named_captures() = default;
uint32_t nc_count{0};
uint32_t nc_entry_size{0};
PCRE2_SPTR nc_name_table{nullptr};
};
static Result<code, compile_error> from(string_fragment sf,
int options = 0);
template<typename T, std::size_t N>
static code from_const(const T (&str)[N], int options = 0)
{
return from(string_fragment::from_const(str), options).unwrap();
}
const std::string& get_pattern() const { return this->p_pattern; }
named_captures get_named_captures() const;
const char* get_name_for_capture(size_t index) const;
size_t get_capture_count() const;
int name_index(const char* name) const;
std::vector<string_fragment> get_captures() const;
match_data create_match_data() const;
capture_builder capture_from(string_fragment in) const
{
return capture_builder{
*this,
input{in},
};
}
matcher::matches_result find_in(string_fragment in,
uint32_t options = 0) const
{
static thread_local match_data md = this->create_match_data();
if (md.md_ovector_count < this->p_match_proto.md_ovector_count) {
md = this->create_match_data();
}
return this->capture_from(in).into(md).matches(options);
}
size_t match_partial(string_fragment in) const;
std::string replace(string_fragment str, const char* repl) const;
std::shared_ptr<code> to_shared() &&
{
return std::make_shared<code>(std::move(this->p_code),
std::move(this->p_pattern));
}
code(auto_mem<pcre2_code> code, std::string pattern)
: p_code(std::move(code)), p_pattern(std::move(pattern)),
p_match_proto(this->create_match_data())
{
}
private:
friend matcher;
friend match_data;
auto_mem<pcre2_code> p_code;
std::string p_pattern;
match_data p_match_proto;
};
template<typename T, std::size_t N>
nonstd::optional<string_fragment>
match_data::operator[](const T (&name)[N]) const
{
auto index = pcre2_substring_number_from_name(
this->md_code->p_code.in(),
reinterpret_cast<const unsigned char*>(name));
return this->operator[](index);
}
template<uint32_t Options, typename F>
Result<string_fragment, matcher::error>
capture_builder::for_each(F func) &&
{
auto md = this->mb_code.create_match_data();
auto mat = matcher{this->mb_code, this->mb_input, md};
bool done = false;
matcher::error eret;
while (!done) {
auto match_res = mat.matches(Options);
done = match_res.match(
[mat, &func](matcher::found) {
func(mat.mb_match_data);
return false;
},
[](matcher::not_found) { return true; },
[&eret](matcher::error err) {
eret = err;
return true;
});
}
if (eret.e_error_code == 0) {
return Ok(md.remaining());
}
return Err(eret);
}
} // namespace pcre2pp
} // namespace lnav
#endif

@ -1,453 +0,0 @@
/**
* Copyright (c) 2007-2012, Timothy Stack
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @file pcrepp.cc
*/
#include "pcrepp.hh"
const int JIT_STACK_MIN_SIZE = 32 * 1024;
const int JIT_STACK_MAX_SIZE = 512 * 1024;
pcre_context::capture_t*
pcre_context::operator[](const char* name) const
{
capture_t* retval = nullptr;
auto index = this->pc_pcre->name_index(name);
if (index != PCRE_ERROR_NOSUBSTRING) {
retval = &this->pc_captures[index + 1];
}
return retval;
}
pcre_context::capture_t*
pcre_context::first_valid() const
{
for (int lpc = 1; lpc < this->pc_count; lpc++) {
if (this->pc_captures[lpc].is_valid()) {
return &this->pc_captures[lpc];
}
}
return nullptr;
}
std::string
pcrepp::quote(const char* unquoted)
{
std::string retval;
for (int lpc = 0; unquoted[lpc]; lpc++) {
if (isalnum(unquoted[lpc]) || unquoted[lpc] == '_'
|| unquoted[lpc] & 0x80)
{
retval.push_back(unquoted[lpc]);
} else {
retval.push_back('\\');
retval.push_back(unquoted[lpc]);
}
}
return retval;
}
Result<pcrepp, pcrepp::compile_error>
pcrepp::from_str(std::string pattern, int options)
{
const char* errptr;
int eoff;
auto* code = pcre_compile(
pattern.c_str(), options | PCRE_UTF8, &errptr, &eoff, nullptr);
if (!code) {
return Err(compile_error{errptr, eoff});
}
return Ok(pcrepp(std::move(pattern), code));
}
Result<std::shared_ptr<pcrepp>, pcrepp::compile_error>
pcrepp::shared_from_str(std::string pattern, int options)
{
const char* errptr;
int eoff;
auto* code = pcre_compile(
pattern.c_str(), options | PCRE_UTF8, &errptr, &eoff, nullptr);
if (!code) {
return Err(compile_error{errptr, eoff});
}
return Ok(std::make_shared<pcrepp>(std::move(pattern), code));
}
void
pcrepp::find_captures(const char* pattern)
{
bool in_class = false, in_escape = false, in_literal = false;
std::vector<pcre_context::capture_t> cap_in_progress;
for (int lpc = 0; pattern[lpc]; lpc++) {
if (in_escape) {
in_escape = false;
if (pattern[lpc] == 'Q') {
in_literal = true;
}
} else if (in_class) {
if (pattern[lpc] == ']') {
in_class = false;
}
if (pattern[lpc] == '\\') {
in_escape = true;
}
} else if (in_literal) {
if (pattern[lpc] == '\\' && pattern[lpc + 1] == 'E') {
in_literal = false;
lpc += 1;
}
} else {
switch (pattern[lpc]) {
case '\\':
in_escape = true;
break;
case '[':
in_class = true;
break;
case '(':
cap_in_progress.emplace_back(lpc, lpc);
break;
case ')': {
if (!cap_in_progress.empty()) {
static const auto DEFINE_SF
= string_fragment::from_const("(?(DEFINE)");
auto& cap = cap_in_progress.back();
char first = '\0', second = '\0', third = '\0';
bool is_cap = false;
cap.c_end = lpc + 1;
if (cap.length() >= 2) {
first = pattern[cap.c_begin + 1];
}
if (cap.length() >= 3) {
second = pattern[cap.c_begin + 2];
}
if (cap.length() >= 4) {
third = pattern[cap.c_begin + 3];
}
if (cap.c_begin >= 2) {
auto poss_define = string_fragment::from_byte_range(
pattern, cap.c_begin - 2, cap.c_end);
if (poss_define == DEFINE_SF) {
cap_in_progress.pop_back();
continue;
}
}
if (first == '?') {
if (second == '\'') {
is_cap = true;
}
if (second == '<'
&& (isalpha(third) || third == '_'))
{
is_cap = true;
}
if (second == 'P' && third == '<') {
is_cap = true;
}
} else if (first != '*') {
is_cap = true;
}
if (is_cap) {
this->p_captures.push_back(cap);
}
cap_in_progress.pop_back();
}
break;
}
}
}
}
assert((size_t) this->p_capture_count == this->p_captures.size());
}
bool
pcrepp::match(pcre_context& pc, pcre_input& pi, int options) const
{
int length, startoffset, filtered_options = options;
int count = pc.get_max_count();
const char* str;
int rc;
pc.set_pcrepp(this);
pi.pi_offset = pi.pi_next_offset;
str = pi.get_string();
if (filtered_options & PCRE_ANCHORED) {
filtered_options &= ~PCRE_ANCHORED;
str = &str[pi.pi_offset];
startoffset = 0;
length = pi.pi_length - pi.pi_offset;
} else {
startoffset = pi.pi_offset;
length = pi.pi_length;
}
rc = pcre_exec(this->p_code,
this->p_code_extra.in(),
str,
length,
startoffset,
filtered_options,
(int*) pc.all(),
count * 2);
if (rc < 0) {
switch (rc) {
case PCRE_ERROR_NOMATCH:
break;
case PCRE_ERROR_PARTIAL:
pc.set_count(1);
return true;
default:
break;
}
} else if (rc == 0) {
rc = 0;
} else if (pc.all()->c_begin == pc.all()->c_end) {
rc = 0;
if (pi.pi_next_offset + 1 < pi.pi_length) {
pi.pi_next_offset += 1;
}
} else {
if (options & PCRE_ANCHORED) {
for (int lpc = 0; lpc < rc; lpc++) {
if (pc.all()[lpc].c_begin == -1) {
continue;
}
pc.all()[lpc].c_begin += pi.pi_offset;
pc.all()[lpc].c_end += pi.pi_offset;
}
}
pi.pi_next_offset = pc.all()->c_end;
}
pc.set_count(rc);
return rc > 0;
}
std::string
pcrepp::replace(const char* str, const char* repl) const
{
pcre_context_static<30> pc;
pcre_input pi(str);
std::string retval;
std::string::size_type start = 0;
while (pi.pi_offset < pi.pi_length) {
this->match(pc, pi);
auto all = pc.all();
bool in_escape = false;
if (pc.get_count() < 0) {
break;
}
retval.append(str, start, (all->c_begin - start));
start = all->c_end;
for (int lpc = 0; repl[lpc]; lpc++) {
auto ch = repl[lpc];
if (in_escape) {
if (isdigit(ch)) {
auto capture_index = (ch - '0');
if (capture_index < pc.get_count()) {
retval.append(pi.get_substr_start(&all[capture_index]),
pi.get_substr_len(&all[capture_index]));
} else if (capture_index > this->p_capture_count) {
retval.push_back('\\');
retval.push_back(ch);
}
} else {
if (ch != '\\') {
retval.push_back('\\');
}
retval.push_back(ch);
}
in_escape = false;
} else {
switch (ch) {
case '\\':
in_escape = true;
break;
default:
retval.push_back(ch);
break;
}
}
}
}
retval.append(str, start, std::string::npos);
return retval;
}
void
pcrepp::study()
{
const char* errptr;
this->p_code_extra = pcre_study(this->p_code,
#ifdef PCRE_STUDY_JIT_COMPILE
PCRE_STUDY_JIT_COMPILE,
#else
0,
#endif
&errptr);
if (!this->p_code_extra && errptr) {
// log_error("pcre_study error: %s", errptr);
}
if (this->p_code_extra != nullptr) {
pcre_extra* extra = this->p_code_extra;
extra->flags
|= (PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION);
extra->match_limit = 10000;
extra->match_limit_recursion = 500;
#ifdef PCRE_STUDY_JIT_COMPILE
// pcre_assign_jit_stack(extra, nullptr, jit_stack());
#endif
}
pcre_fullinfo(
this->p_code, this->p_code_extra, PCRE_INFO_OPTIONS, &this->p_options);
pcre_fullinfo(this->p_code,
this->p_code_extra,
PCRE_INFO_CAPTURECOUNT,
&this->p_capture_count);
pcre_fullinfo(this->p_code,
this->p_code_extra,
PCRE_INFO_NAMECOUNT,
&this->p_named_count);
pcre_fullinfo(this->p_code,
this->p_code_extra,
PCRE_INFO_NAMEENTRYSIZE,
&this->p_name_len);
pcre_fullinfo(this->p_code,
this->p_code_extra,
PCRE_INFO_NAMETABLE,
&this->p_named_entries);
}
#ifdef PCRE_STUDY_JIT_COMPILE
pcre_jit_stack*
pcrepp::jit_stack()
{
static pcre_jit_stack* retval = nullptr;
if (retval == nullptr) {
retval = pcre_jit_stack_alloc(JIT_STACK_MIN_SIZE, JIT_STACK_MAX_SIZE);
}
return retval;
}
size_t
pcrepp::match_partial(pcre_input& pi) const
{
size_t length = pi.pi_length;
int rc;
do {
rc = pcre_exec(this->p_code,
this->p_code_extra.in(),
pi.get_string(),
length,
pi.pi_offset,
PCRE_PARTIAL,
nullptr,
0);
switch (rc) {
case 0:
case PCRE_ERROR_PARTIAL:
return length;
}
if (length > 0) {
length -= 1;
}
} while (length > 0);
return length;
}
const char*
pcrepp::name_for_capture(int index) const
{
for (pcre_named_capture::iterator iter = this->named_begin();
iter != this->named_end();
++iter)
{
if (iter->index() == index) {
return iter->pnc_name;
}
}
return "";
}
int
pcrepp::name_index(const char* name) const
{
int retval = pcre_get_stringnumber(this->p_code, name);
if (retval == PCRE_ERROR_NOSUBSTRING) {
return retval;
}
return retval - 1;
}
#else
# warning "pcrejit is not available, search performance will be degraded"
void
pcrepp::pcre_free_study(pcre_extra* extra)
{
free(extra);
}
#endif
void
pcre_context::capture_t::ltrim(const char* str)
{
while (this->c_begin < this->c_end && isspace(str[this->c_begin])) {
this->c_begin += 1;
}
}

@ -1,617 +0,0 @@
/**
* Copyright (c) 2007-2013, Timothy Stack
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @file pcrepp.hh
*
* A C++ adapter for the pcre library. The interface provided here has a
* different focus than the pcrecpp.h file included in the pcre distribution.
* The standard pcrecpp.h interface is more concerned with regular expressions
* that are digesting data to be used within the program itself. Whereas this
* interface is dealing with regular expression entered by the user and
* processing a series of matches on text files.
*/
#ifndef pcrepp_hh
#define pcrepp_hh
#include "config.h"
#ifdef HAVE_PCRE_H
# include <pcre.h>
#elif HAVE_PCRE_PCRE_H
# include <pcre/pcre.h>
#else
# error "pcre.h not found?"
#endif
#include <cassert>
#include <exception>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include <stdio.h>
#include <string.h>
#include "base/auto_mem.hh"
#include "base/intern_string.hh"
#include "base/result.h"
#include "scn/util/string_view.h"
class pcrepp;
/**
* Context that tracks captures found during a match operation. This class is a
* base that defines iterator methods and fields, but does not allocate space
* for the capture array.
*/
class pcre_context {
public:
struct capture_t {
capture_t()
{ /* We don't initialize anything since it's a perf hit. */
}
capture_t(int begin, int end) : c_begin(begin), c_end(end)
{
assert(begin <= end);
}
int c_begin;
int c_end;
void ltrim(const char* str);
bool contains(int pos) const
{
return this->c_begin <= pos && pos < this->c_end;
}
bool is_valid() const { return this->c_begin != -1; }
int length() const { return this->c_end - this->c_begin; }
bool empty() const { return this->c_begin == this->c_end; }
};
using iterator = capture_t*;
using const_iterator = const capture_t*;
/** @return The maximum number of strings this context can capture. */
int get_max_count() const { return this->pc_max_count; }
void set_count(int count) { this->pc_count = count; }
int get_count() const { return this->pc_count; }
void set_pcrepp(const pcrepp* src) { this->pc_pcre = src; }
/**
* @return a capture_t that covers all of the text that was matched.
*/
capture_t* all() const { return pc_captures; }
/** @return An iterator to the first capture. */
iterator begin() { return pc_captures + 1; }
/** @return An iterator that refers to the end of the capture array. */
iterator end() { return pc_captures + pc_count; };
capture_t* operator[](int offset) const
{
if (offset < 0) {
return nullptr;
}
return &this->pc_captures[offset + 1];
}
capture_t* operator[](const char* name) const;
capture_t* operator[](const std::string& name) const
{
return (*this)[name.c_str()];
}
capture_t* first_valid() const;
protected:
pcre_context(capture_t* captures, int max_count)
: pc_captures(captures), pc_max_count(max_count)
{
}
const pcrepp* pc_pcre{nullptr};
capture_t* pc_captures;
int pc_max_count;
int pc_count{0};
};
struct capture_if_not {
capture_if_not(int begin) : cin_begin(begin) {}
bool operator()(const pcre_context::capture_t& cap) const
{
return cap.c_begin != this->cin_begin;
}
int cin_begin;
};
/**
* A pcre_context that allocates storage for the capture array within the object
* itself.
*/
template<size_t MAX_COUNT>
class pcre_context_static : public pcre_context {
public:
pcre_context_static()
: pcre_context(this->pc_match_buffer, MAX_COUNT + 1){};
private:
capture_t pc_match_buffer[MAX_COUNT + 1];
};
/**
*
*/
class pcre_input {
public:
pcre_input(const char* str, size_t off = 0, size_t len = -1)
: pi_offset(off), pi_next_offset(off), pi_length(len), pi_string(str)
{
if (this->pi_length == (size_t) -1) {
this->pi_length = strlen(str);
}
}
pcre_input(const string_fragment& s)
: pi_offset(0), pi_next_offset(0), pi_length(s.length()),
pi_string(s.data())
{
}
pcre_input(const intern_string_t& s)
: pi_offset(0), pi_next_offset(0), pi_length(s.size()),
pi_string(s.get())
{
}
pcre_input(const string_fragment&&) = delete;
pcre_input(const std::string& str, size_t off = 0)
: pi_offset(off), pi_next_offset(off), pi_length(str.length()),
pi_string(str.c_str())
{
}
pcre_input(const std::string&&, size_t off = 0) = delete;
const char* get_string() const { return this->pi_string; }
const char* get_substr_start(pcre_context::const_iterator iter) const
{
return &this->pi_string[iter->c_begin];
}
size_t get_substr_len(pcre_context::const_iterator iter) const
{
return iter->length();
}
std::string get_substr(pcre_context::const_iterator iter) const
{
if (iter->c_begin == -1) {
return "";
}
return std::string(&this->pi_string[iter->c_begin], iter->length());
}
intern_string_t get_substr_i(pcre_context::const_iterator iter) const
{
return intern_string::lookup(&this->pi_string[iter->c_begin],
iter->length());
}
string_fragment get_string_fragment(pcre_context::const_iterator iter) const
{
return string_fragment::from_byte_range(
this->pi_string, iter->c_begin, iter->c_end);
}
string_fragment get_up_to(pcre_context::const_iterator iter) const
{
return string_fragment::from_byte_range(
this->pi_string, this->pi_offset, iter->c_begin);
}
nonstd::optional<std::string> get_substr_opt(
pcre_context::const_iterator iter) const
{
if (iter->is_valid()) {
return std::string(&this->pi_string[iter->c_begin], iter->length());
}
return nonstd::nullopt;
}
scn::string_view to_string_view(pcre_context::const_iterator iter) const
{
return scn::string_view{
&this->pi_string[iter->c_begin],
&this->pi_string[iter->c_end],
};
}
void get_substr(pcre_context::const_iterator iter, char* dst) const
{
memcpy(dst, &this->pi_string[iter->c_begin], iter->length());
dst[iter->length()] = '\0';
}
void reset_next_offset() { this->pi_next_offset = this->pi_offset; }
void reset(const char* str, size_t off = 0, size_t len = -1)
{
this->pi_string = str;
this->pi_offset = off;
this->pi_next_offset = off;
if (this->pi_length == (size_t) -1) {
this->pi_length = strlen(str);
} else {
this->pi_length = len;
}
}
void reset(const std::string& str, size_t off = 0)
{
this->reset(str.c_str(), off, str.length());
}
size_t pi_offset;
size_t pi_next_offset;
size_t pi_length;
private:
const char* pi_string;
};
struct pcre_named_capture {
class iterator {
public:
iterator(pcre_named_capture* pnc, size_t name_len)
: i_named_capture(pnc), i_name_len(name_len)
{
}
iterator() : i_named_capture(nullptr), i_name_len(0) {}
const pcre_named_capture& operator*() const
{
return *this->i_named_capture;
}
const pcre_named_capture* operator->() const
{
return this->i_named_capture;
}
bool operator!=(const iterator& rhs) const
{
return this->i_named_capture != rhs.i_named_capture;
}
iterator& operator++()
{
char* ptr = (char*) this->i_named_capture;
ptr += this->i_name_len;
this->i_named_capture = (pcre_named_capture*) ptr;
return *this;
}
private:
pcre_named_capture* i_named_capture;
size_t i_name_len;
};
int index() const
{
return (this->pnc_index_msb << 8 | this->pnc_index_lsb) - 1;
}
char pnc_index_msb;
char pnc_index_lsb;
char pnc_name[];
};
struct pcre_extractor {
const pcre_context& pe_context;
const pcre_input& pe_input;
template<typename T>
intern_string_t get_substr_i(T name) const
{
return this->pe_input.get_substr_i(this->pe_context[name]);
}
template<typename T>
std::string get_substr(T name) const
{
return this->pe_input.get_substr(this->pe_context[name]);
}
};
class pcrepp {
public:
class error : public std::exception {
public:
error(std::string msg, int offset = 0)
: e_msg(std::move(msg)), e_offset(offset)
{
}
const char* what() const noexcept override
{
return this->e_msg.c_str();
}
const std::string e_msg;
int e_offset;
};
static std::string quote(const char* unquoted);
static std::string quote(const std::string& unquoted)
{
return quote(unquoted.c_str());
}
struct compile_error {
const char* ce_msg{nullptr};
int ce_offset{0};
};
static Result<pcrepp, compile_error> from_str(std::string pattern,
int options = 0);
static Result<std::shared_ptr<pcrepp>, compile_error> shared_from_str(
std::string pattern, int options = 0);
pcrepp(pcre* code) : p_code(code), p_code_extra(pcre_free_study)
{
pcre_refcount(this->p_code, 1);
this->study();
}
pcrepp(std::string pattern, pcre* code)
: p_code(code), p_pattern(std::move(pattern)),
p_code_extra(pcre_free_study)
{
pcre_refcount(this->p_code, 1);
this->study();
this->find_captures(this->p_pattern.c_str());
}
explicit pcrepp(const char* pattern, int options = 0)
: p_pattern(pattern), p_code_extra(pcre_free_study)
{
const char* errptr;
int eoff;
if ((this->p_code
= pcre_compile(pattern, options, &errptr, &eoff, nullptr))
== nullptr)
{
throw error(errptr, eoff);
}
pcre_refcount(this->p_code, 1);
this->study();
this->find_captures(pattern);
}
explicit pcrepp(const std::string& pattern, int options = 0)
: p_pattern(pattern), p_code_extra(pcre_free_study)
{
const char* errptr;
int eoff;
if ((this->p_code = pcre_compile(
pattern.c_str(), options | PCRE_UTF8, &errptr, &eoff, nullptr))
== nullptr)
{
throw error(errptr, eoff);
}
pcre_refcount(this->p_code, 1);
this->study();
this->find_captures(pattern.c_str());
}
pcrepp() {}
pcrepp(const pcrepp& other)
: p_code(other.p_code), p_pattern(other.p_pattern),
p_code_extra(pcre_free_study), p_captures(other.p_captures)
{
pcre_refcount(this->p_code, 1);
this->study();
}
pcrepp(pcrepp&& other)
: p_code(other.p_code), p_pattern(std::move(other.p_pattern)),
p_code_extra(pcre_free_study), p_capture_count(other.p_capture_count),
p_named_count(other.p_named_count), p_name_len(other.p_name_len),
p_options(other.p_options), p_named_entries(other.p_named_entries),
p_captures(std::move(other.p_captures))
{
pcre_refcount(this->p_code, 1);
this->p_code_extra = std::move(other.p_code_extra);
}
virtual ~pcrepp() { this->clear(); }
pcrepp& operator=(pcrepp&& other) noexcept
{
if (this == &other) {
return *this;
}
this->p_code = other.p_code;
pcre_refcount(this->p_code, 1);
this->p_pattern = std::move(other.p_pattern);
this->p_code_extra = std::move(other.p_code_extra);
this->p_capture_count = other.p_capture_count;
this->p_named_count = other.p_named_count;
this->p_name_len = other.p_name_len;
this->p_options = other.p_options;
this->p_named_entries = other.p_named_entries;
this->p_captures = std::move(other.p_captures);
return *this;
}
const std::string& get_pattern() const { return this->p_pattern; }
bool empty() const { return this->p_pattern.empty(); }
void clear()
{
if (this->p_code && pcre_refcount(this->p_code, -1) == 0) {
free(this->p_code);
this->p_code = nullptr;
}
this->p_pattern.clear();
this->p_code_extra.reset();
this->p_capture_count = 0;
this->p_named_count = 0;
this->p_name_len = 0;
this->p_options = 0;
this->p_named_entries = nullptr;
this->p_captures.clear();
}
pcre_named_capture::iterator named_begin() const
{
return {this->p_named_entries, static_cast<size_t>(this->p_name_len)};
}
pcre_named_capture::iterator named_end() const
{
char* ptr = (char*) this->p_named_entries;
ptr += this->p_named_count * this->p_name_len;
return {(pcre_named_capture*) ptr,
static_cast<size_t>(this->p_name_len)};
}
const std::vector<pcre_context::capture_t>& captures() const
{
return this->p_captures;
}
std::vector<pcre_context::capture_t>::const_iterator cap_begin() const
{
return this->p_captures.begin();
}
std::vector<pcre_context::capture_t>::const_iterator cap_end() const
{
return this->p_captures.end();
}
int name_index(const std::string& name) const
{
return this->name_index(name.c_str());
}
int name_index(const char* name) const;
const char* name_for_capture(int index) const;
int get_capture_count() const { return this->p_capture_count; }
bool match(pcre_context& pc, pcre_input& pi, int options = 0) const;
template<size_t MATCH_COUNT>
nonstd::optional<pcre_context_static<MATCH_COUNT>> match(pcre_input& pi,
int options
= 0) const
{
pcre_context_static<MATCH_COUNT> pc;
if (this->match(pc, pi, options)) {
return pc;
}
return nonstd::nullopt;
}
std::string replace(const char* str, const char* repl) const;
size_t match_partial(pcre_input& pi) const;
pcre* release() {
auto retval = std::exchange(this->p_code, nullptr);
this->clear();
return retval;
}
// #undef PCRE_STUDY_JIT_COMPILE
#ifdef PCRE_STUDY_JIT_COMPILE
static pcre_jit_stack* jit_stack();
#else
static void pcre_free_study(pcre_extra*);
#endif
void study();
void find_captures(const char* pattern);
pcre* p_code{nullptr};
std::string p_pattern;
auto_mem<pcre_extra> p_code_extra;
int p_capture_count{0};
int p_named_count{0};
int p_name_len{0};
unsigned long p_options{0};
pcre_named_capture* p_named_entries{nullptr};
std::vector<pcre_context::capture_t> p_captures;
};
template<int options = 0>
class pcrepp_with_options : public pcrepp {
public:
template<typename... Args>
pcrepp_with_options(Args... args) : pcrepp(args..., options)
{
}
};
#endif

@ -0,0 +1,246 @@
/**
* Copyright (c) 2022, Timothy Stack
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
#include "doctest/doctest.h"
#include "pcre2pp.hh"
TEST_CASE("bad pattern")
{
auto compile_res
= lnav::pcre2pp::code::from(string_fragment::from_const("[abc"));
CHECK(compile_res.isErr());
auto ce = compile_res.unwrapErr();
CHECK(ce.ce_offset == 4);
}
TEST_CASE("named captures")
{
auto compile_res = lnav::pcre2pp::code::from(
string_fragment::from_const("(?<abc>a)(b)(?<def>c)"));
CHECK(compile_res.isOk());
const std::vector<std::pair<size_t, string_fragment>> expected_caps = {
{1, string_fragment::from_const("abc")},
{3, string_fragment::from_const("def")},
};
int caps_index = 0;
auto co = compile_res.unwrap();
for (const auto cap : co.get_named_captures()) {
const auto& expected_cap = expected_caps[caps_index];
CHECK(expected_cap.first == cap.get_index());
CHECK(expected_cap.second == cap.get_name());
caps_index += 1;
}
}
TEST_CASE("match")
{
static const char INPUT[] = "key1=1234;key2=5678;";
auto co
= lnav::pcre2pp::code::from_const(R"((?<key>\w+)=(?<value>[^;]+);)");
co.capture_from(string_fragment::from_const(INPUT))
.for_each([](lnav::pcre2pp::match_data& md) {
printf("got '%s' %s = %s\n",
md[0]->to_string().c_str(),
md[1]->to_string().c_str(),
md[2]->to_string().c_str());
});
}
TEST_CASE("partial")
{
static const char INPUT[] = "key1=1234";
auto co = lnav::pcre2pp::code::from_const(R"([a-z]+=.*)");
auto matched = co.match_partial(string_fragment::from_const(INPUT));
CHECK(matched == 3);
}
TEST_CASE("capture_name")
{
auto co = lnav::pcre2pp::code::from_const("(?<abc>def)(ghi)");
CHECK(co.get_capture_count() == 2);
CHECK(string_fragment::from_c_str(co.get_name_for_capture(1)) == "abc");
CHECK(co.get_name_for_capture(2) == nullptr);
}
TEST_CASE("get_capture_count")
{
auto co = lnav::pcre2pp::code::from_const("(DEFINE)");
CHECK(co.get_capture_count() == 1);
}
TEST_CASE("get_captures")
{
auto co = lnav::pcre2pp::code::from_const(R"((?<abc>\w+)-(def)-)");
CHECK(co.get_capture_count() == 2);
const auto& caps = co.get_captures();
CHECK(caps.size() == 2);
CHECK(caps[0].to_string() == R"((?<abc>\w+))");
CHECK(caps[1].to_string() == R"((def))");
}
TEST_CASE("replace")
{
static const char INPUT[] = "test 1 2 3";
auto co = lnav::pcre2pp::code::from_const(R"(\w*)");
auto in = string_fragment::from_const(INPUT);
auto res = co.replace(in, R"({\0})");
CHECK(res == "{test}{} {1}{} {2}{} {3}{}");
}
TEST_CASE("replace-empty")
{
static const char INPUT[] = "";
auto co = lnav::pcre2pp::code::from_const(R"(\w*)");
auto in = string_fragment::from_const(INPUT);
auto res = co.replace(in, R"({\0})");
CHECK(res == "{}");
}
TEST_CASE("for_each-all")
{
static const char INPUT[] = "Hello, World!\n";
auto co = lnav::pcre2pp::code::from_const(R"(.*)");
auto in = string_fragment::from_const(INPUT);
co.capture_from(in).for_each([](lnav::pcre2pp::match_data& md) {
printf("range %d:%d\n", md[0]->sf_begin, md[0]->sf_end);
});
}
TEST_CASE("capture_count")
{
auto co = lnav::pcre2pp::code::from_const(R"(^(\w+)=([^;]+);)");
CHECK(co.get_capture_count() == 2);
}
TEST_CASE("no-caps")
{
const static std::string empty_cap_regexes[] = {
"foo (?:bar)",
"foo [(]",
"foo \\Q(bar)\\E",
"(?i)",
};
for (auto re : empty_cap_regexes) {
auto co = lnav::pcre2pp::code::from(re).unwrap();
CHECK(co.get_captures().empty());
}
}
TEST_CASE("ipmatcher")
{
auto co = lnav::pcre2pp::code::from_const(
R"((?(DEFINE)(?<byte>2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))\b(?&byte)(\.(?&byte)){3}\b)");
auto inp = string_fragment::from_const("192.168.1.1");
auto find_res = co.find_in(inp).ignore_error();
CHECK(find_res.has_value());
CHECK(find_res->f_all.sf_begin == 0);
}
TEST_CASE("get_captures-nested")
{
auto re = lnav::pcre2pp::code::from_const("foo (bar (?:baz)?)");
CHECK(re.get_captures().size() == 1);
CHECK(re.get_captures()[0].sf_begin == 4);
CHECK(re.get_captures()[0].sf_end == 18);
CHECK(re.get_captures()[0].length() == 14);
}
TEST_CASE("get_captures-basic")
{
auto re = lnav::pcre2pp::code::from_const("(a)(b)(c)");
assert(re.get_captures().size() == 3);
assert(re.get_captures()[0].sf_begin == 0);
assert(re.get_captures()[0].sf_end == 3);
assert(re.get_captures()[1].sf_begin == 3);
assert(re.get_captures()[1].sf_end == 6);
assert(re.get_captures()[2].sf_begin == 6);
assert(re.get_captures()[2].sf_end == 9);
}
TEST_CASE("get_captures-escape")
{
auto re = lnav::pcre2pp::code::from_const("\\(a\\)(b)");
assert(re.get_captures().size() == 1);
assert(re.get_captures()[0].sf_begin == 5);
assert(re.get_captures()[0].sf_end == 8);
}
TEST_CASE("get_captures-named")
{
auto re = lnav::pcre2pp::code::from_const("(?<named>b)");
assert(re.get_captures().size() == 1);
assert(re.get_captures()[0].sf_begin == 0);
assert(re.get_captures()[0].sf_end == 11);
}
TEST_CASE("get_captures-namedP")
{
auto re = lnav::pcre2pp::code::from_const("(?P<named>b)");
assert(re.get_captures().size() == 1);
assert(re.get_captures()[0].sf_begin == 0);
assert(re.get_captures()[0].sf_end == 12);
}
TEST_CASE("get_captures-namedq")
{
auto re = lnav::pcre2pp::code::from_const("(?'named'b)");
assert(re.get_captures().size() == 1);
assert(re.get_captures()[0].sf_begin == 0);
assert(re.get_captures()[0].sf_end == 11);
}

@ -1,192 +0,0 @@
/**
* Copyright (c) 2007-2012, Timothy Stack
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
#include "pcrepp/pcrepp.hh"
int
main(int argc, char* argv[])
{
pcre_context_static<30> context;
int retval = EXIT_SUCCESS;
{
pcrepp ipmatcher(
R"((?(DEFINE)(?<byte>2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))\b(?&byte)(\.(?&byte)){3}\b)");
pcre_input pi("192.168.1.1");
assert(ipmatcher.match(context, pi));
assert(context.all()->c_begin == 0);
}
{
pcrepp ipmatcher(R"((DEFINE))");
assert(ipmatcher.get_capture_count() == 1);
}
{
pcrepp nomatch("nothing-to-match");
pcre_input pi("dummy");
assert(!nomatch.match(context, pi));
}
{
pcrepp match1("(\\w*)=(\\d+)");
pcre_input pi("a=1 b=2");
pcre_context::capture_t* cap;
assert(match1.match(context, pi));
cap = context.all();
assert(cap->c_begin == 0);
assert(cap->c_end == 3);
assert((context.end() - context.begin()) == 2);
assert(pi.get_substr(context.begin()) == "a");
assert(pi.get_substr(context.begin() + 1) == "1");
assert(pi.get_substr(context[1]) == "1");
assert(match1.match(context, pi));
assert((context.end() - context.begin()) == 2);
assert(pi.get_substr(context.begin()) == "b");
assert(pi.get_substr(context.begin() + 1) == "2");
}
{
pcrepp match2("");
}
{
pcrepp match3("(?<var1>\\d+)(?<var2>\\w+)");
pcre_named_capture::iterator iter;
const char* expected_names[] = {
"var1",
"var2",
};
int index = 0;
for (iter = match3.named_begin(); iter != match3.named_end();
++iter, index++)
{
assert(strcmp(iter->pnc_name, expected_names[index]) == 0);
}
assert(match3.name_index("var2") == 1);
pcre_input pi("123foo");
match3.match(context, pi);
assert(pi.get_substr(context["var1"]) == "123");
}
{
pcre_context::capture_t cap(1, 4);
pcre_input pi("\0foo", 0, 4);
assert("foo" == pi.get_substr(&cap));
}
const char* empty_cap_regexes[] = {
"foo (?:bar)",
"foo [(]",
"foo \\Q(bar)\\E",
"(?i)",
nullptr,
};
for (int lpc = 0; empty_cap_regexes[lpc]; lpc++) {
pcrepp re(empty_cap_regexes[lpc]);
assert(re.captures().empty());
}
{
pcrepp re("foo (bar (?:baz)?)");
assert(re.captures().size() == 1);
assert(re.captures()[0].c_begin == 4);
assert(re.captures()[0].c_end == 18);
assert(re.captures()[0].length() == 14);
}
{
pcrepp re("(a)(b)(c)");
assert(re.captures().size() == 3);
assert(re.captures()[0].c_begin == 0);
assert(re.captures()[0].c_end == 3);
assert(re.captures()[1].c_begin == 3);
assert(re.captures()[1].c_end == 6);
assert(re.captures()[2].c_begin == 6);
assert(re.captures()[2].c_end == 9);
}
{
pcrepp re("\\(a\\)(b)");
assert(re.captures().size() == 1);
assert(re.captures()[0].c_begin == 5);
assert(re.captures()[0].c_end == 8);
}
{
pcrepp re("(?<named>b)");
assert(re.captures().size() == 1);
assert(re.captures()[0].c_begin == 0);
assert(re.captures()[0].c_end == 11);
}
{
pcrepp re("(?P<named>b)");
assert(re.captures().size() == 1);
assert(re.captures()[0].c_begin == 0);
assert(re.captures()[0].c_end == 12);
}
{
pcrepp re("(?'named'b)");
assert(re.captures().size() == 1);
assert(re.captures()[0].c_begin == 0);
assert(re.captures()[0].c_end == 11);
}
return retval;
}

@ -35,25 +35,26 @@
void
pretty_printer::append_to(attr_line_t& al)
{
auto& pi = this->pp_scanner->get_input();
pcre_context_static<30> pc;
data_token_t dt;
this->pp_scanner->reset();
if (pi.pi_offset > 0) {
pcre_context::capture_t leading_cap = {
if (this->pp_scanner->get_init_offset() > 0) {
data_scanner::capture_t leading_cap = {
0,
static_cast<int>(pi.pi_offset),
this->pp_scanner->get_init_offset(),
};
// this->pp_stream << pi.get_substr(&leading_cap);
this->pp_values.emplace_back(DT_WORD, leading_cap);
}
while (this->pp_scanner->tokenize2(pc, dt)) {
element el(dt, pc);
this->pp_scanner->reset();
while (true) {
auto tok_res = this->pp_scanner->tokenize2();
if (!tok_res) {
break;
}
element el(tok_res->tr_token, tok_res->tr_capture);
switch (dt) {
switch (el.e_token) {
case DT_XML_DECL_TAG:
case DT_XML_EMPTY_TAG:
if (this->pp_is_xml && this->pp_line_length > 0) {
@ -71,7 +72,7 @@ pretty_printer::append_to(attr_line_t& al)
this->pp_interval_state.back().is_start
= this->pp_stream.tellp();
this->pp_interval_state.back().is_name
= pi.get_substr(&el.e_capture);
= tok_res->to_string();
this->descend();
} else {
this->pp_values.emplace_back(el);
@ -118,7 +119,8 @@ pretty_printer::append_to(attr_line_t& al)
break;
case DT_WHITE:
if (this->pp_values.empty() && this->pp_depth == 0
&& this->pp_line_length == 0) {
&& this->pp_line_length == 0)
{
this->pp_leading_indent = el.e_capture.length();
continue;
}
@ -181,16 +183,17 @@ pretty_printer::write_element(const pretty_printer::element& el)
}
return;
}
auto& pi = this->pp_scanner->get_input();
if (this->pp_line_length == 0) {
this->append_indent();
}
ssize_t start_size = this->pp_stream.tellp();
if (el.e_token == DT_QUOTED_STRING) {
auto_mem<char> unquoted_str((char*) malloc(el.e_capture.length() + 1));
const char* start = pi.get_substr_start(&el.e_capture);
unquote(unquoted_str.in(), start, el.e_capture.length());
data_scanner ds(unquoted_str.in());
const char* start
= this->pp_scanner->to_string_fragment(el.e_capture).data();
auto unq_len = unquote(unquoted_str.in(), start, el.e_capture.length());
data_scanner ds(
string_fragment::from_bytes(unquoted_str.in(), unq_len));
string_attrs_t sa;
pretty_printer str_pp(
&ds, sa, this->pp_leading_indent + this->pp_depth * 4);
@ -214,10 +217,11 @@ pretty_printer::write_element(const pretty_printer::element& el)
this->pp_stream << start[el.e_capture.length() - 1]
<< start[el.e_capture.length() - 1];
} else {
this->pp_stream << pi.get_substr(&el.e_capture);
this->pp_stream
<< this->pp_scanner->to_string_fragment(el.e_capture);
}
} else {
this->pp_stream << pi.get_substr(&el.e_capture);
this->pp_stream << this->pp_scanner->to_string_fragment(el.e_capture);
int shift_amount
= start_size - el.e_capture.c_begin - this->pp_shift_accum;
shift_string_attrs(this->pp_attrs, el.e_capture.c_begin, shift_amount);
@ -247,8 +251,7 @@ pretty_printer::append_indent()
bool
pretty_printer::flush_values(bool start_on_depth)
{
nonstd::optional<pcre_context::capture_t> last_key;
auto& pi = this->pp_scanner->get_input();
nonstd::optional<data_scanner::capture_t> last_key;
bool retval = false;
while (!this->pp_values.empty()) {
@ -266,7 +269,9 @@ pretty_printer::flush_values(bool start_on_depth)
case DT_EQUALS:
if (last_key) {
this->pp_interval_state.back().is_name
= pi.get_substr(&last_key.value());
= this->pp_scanner
->to_string_fragment(last_key.value())
.to_string();
if (!this->pp_interval_state.back().is_name.empty()) {
this->pp_interval_state.back().is_start
= static_cast<ssize_t>(this->pp_stream.tellp());
@ -278,7 +283,8 @@ pretty_printer::flush_values(bool start_on_depth)
break;
}
if (start_on_depth
&& (el.e_token == DT_LSQUARE || el.e_token == DT_LCURLY)) {
&& (el.e_token == DT_LSQUARE || el.e_token == DT_LCURLY))
{
if (this->pp_line_length > 0) {
this->pp_stream << std::endl;
}

@ -48,18 +48,13 @@
class pretty_printer {
public:
struct element {
element(data_token_t token, pcre_context& pc)
: e_token(token), e_capture(*pc.all())
{
}
element(data_token_t token, pcre_context::capture_t& cap)
element(data_token_t token, data_scanner::capture_t& cap)
: e_token(token), e_capture(cap)
{
}
data_token_t e_token;
pcre_context::capture_t e_capture;
data_scanner::capture_t e_capture;
};
pretty_printer(data_scanner* ds, string_attrs_t sa, int leading_indent = 0)
@ -67,13 +62,15 @@ public:
pp_attrs(std::move(sa))
{
this->pp_body_lines.push(0);
pcre_context_static<30> pc;
data_token_t dt;
this->pp_scanner->reset();
while (this->pp_scanner->tokenize2(pc, dt)) {
if (dt == DT_XML_CLOSE_TAG || dt == DT_XML_DECL_TAG) {
while (true) {
auto tok_res = this->pp_scanner->tokenize2();
if (!tok_res) {
break;
}
if (tok_res->tr_token == DT_XML_CLOSE_TAG
|| tok_res->tr_token == DT_XML_DECL_TAG)
{
pp_is_xml = true;
break;
}

@ -35,7 +35,7 @@
#include "base/snippet_highlighters.hh"
#include "base/string_util.hh"
#include "config.h"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "shlex.hh"
#include "sql_help.hh"
#include "sql_util.hh"
@ -145,19 +145,23 @@ readline_regex_highlighter(attr_line_t& al, int x)
void
readline_command_highlighter_int(attr_line_t& al, int x, line_range sub)
{
static const pcrepp RE_PREFIXES(
static const auto RE_PREFIXES = lnav::pcre2pp::code::from_const(
R"(^:(filter-in|filter-out|delete-filter|enable-filter|disable-filter|highlight|clear-highlight|create-search-table\s+[^\s]+\s+))");
static const pcrepp SH_PREFIXES(
static const auto SH_PREFIXES = lnav::pcre2pp::code::from_const(
"^:(eval|open|append-to|write-to|write-csv-to|write-json-to)");
static const pcrepp SQL_PREFIXES("^:(filter-expr|mark-expr)");
static const pcrepp IDENT_PREFIXES("^:(tag|untag|delete-tags)");
static const pcrepp COLOR_PREFIXES("^:(config)");
static const pcrepp COLOR_RE("(#(?:[a-fA-F0-9]{6}|[a-fA-F0-9]{3}))");
static const auto SQL_PREFIXES
= lnav::pcre2pp::code::from_const("^:(filter-expr|mark-expr)");
static const auto IDENT_PREFIXES
= lnav::pcre2pp::code::from_const("^:(tag|untag|delete-tags)");
static const auto COLOR_PREFIXES
= lnav::pcre2pp::code::from_const("^:(config)");
static const auto COLOR_RE = lnav::pcre2pp::code::from_const(
"(#(?:[a-fA-F0-9]{6}|[a-fA-F0-9]{3}))");
attr_line_builder alb(al);
const auto& line = al.get_string();
pcre_context_static<30> pc;
pcre_input pi(&line[sub.lr_start], 0, sub.length());
auto in_frag
= string_fragment::from_str_range(line, sub.lr_start, sub.lr_end);
size_t ws_index;
ws_index = line.find(' ', sub.lr_start);
@ -166,43 +170,37 @@ readline_command_highlighter_int(attr_line_t& al, int x, line_range sub)
alb.overlay_attr(line_range(sub.lr_start + 1, ws_index),
VC_ROLE.value(role_t::VCR_KEYWORD));
if (RE_PREFIXES.match(pc, pi)) {
if (RE_PREFIXES.find_in(in_frag).ignore_error()) {
lnav::snippets::regex_highlighter(
al, x, line_range{(int) ws_index, sub.lr_end});
}
pi.reset(&line[sub.lr_start], 0, sub.length());
if (SH_PREFIXES.match(pc, pi)) {
if (SH_PREFIXES.find_in(in_frag).ignore_error()) {
readline_shlex_highlighter_int(
al, x, line_range{(int) ws_index, sub.lr_end});
}
pi.reset(&line[sub.lr_start], 0, sub.length());
if (SQL_PREFIXES.match(pc, pi)) {
if (SQL_PREFIXES.find_in(in_frag).ignore_error()) {
readline_sqlite_highlighter_int(
al, x, line_range{(int) ws_index, sub.lr_end});
}
}
pi.reset(&line[sub.lr_start], 0, sub.length());
if (COLOR_PREFIXES.match(pc, pi)) {
pi.reset(&line[sub.lr_start], 0, sub.length());
if (COLOR_RE.match(pc, pi)) {
auto* cap = pc[0];
auto hash_color = pi.get_substr(cap);
styling::color_unit::from_str(hash_color)
.then([&](const auto& rgb_fg) {
auto color = view_colors::singleton().match_color(rgb_fg);
alb.template overlay_attr(
line_range{sub.lr_start + cap->c_begin,
sub.lr_start + cap->c_begin + 1},
VC_STYLE.value(text_attrs{
A_BOLD,
color,
}));
});
}
if (COLOR_PREFIXES.find_in(in_frag).ignore_error()) {
COLOR_RE.capture_from(in_frag).for_each(
[&alb](lnav::pcre2pp::match_data& md) {
styling::color_unit::from_str(md[0].value())
.then([&](const auto& rgb_fg) {
auto color
= view_colors::singleton().match_color(rgb_fg);
alb.template overlay_attr(to_line_range(md[0].value()),
VC_STYLE.value(text_attrs{
A_BOLD,
color,
}));
});
});
}
pi.reset(&line[sub.lr_start], 0, sub.length());
if (IDENT_PREFIXES.match(pc, pi) && ws_index != std::string::npos) {
if (IDENT_PREFIXES.find_in(in_frag).ignore_error()
&& ws_index != std::string::npos)
{
size_t start = ws_index, last;
do {
@ -308,7 +306,7 @@ readline_shlex_highlighter_int(attr_line_t& al, int x, line_range sub)
{
attr_line_builder alb(al);
const auto& str = al.get_string();
pcre_context::capture_t cap;
string_fragment cap;
shlex_token_t token;
nonstd::optional<int> quote_start;
shlex lexer(string_fragment{al.al_string.data(), sub.lr_start, sub.lr_end});
@ -316,49 +314,50 @@ readline_shlex_highlighter_int(attr_line_t& al, int x, line_range sub)
while (lexer.tokenize(cap, token)) {
switch (token) {
case shlex_token_t::ST_ERROR:
alb.overlay_attr(line_range(sub.lr_start + cap.c_begin,
sub.lr_start + cap.c_end),
alb.overlay_attr(line_range(sub.lr_start + cap.sf_begin,
sub.lr_start + cap.sf_end),
VC_STYLE.value(text_attrs{A_REVERSE}));
alb.overlay_attr(line_range(sub.lr_start + cap.c_begin,
sub.lr_start + cap.c_end),
alb.overlay_attr(line_range(sub.lr_start + cap.sf_begin,
sub.lr_start + cap.sf_end),
VC_ROLE.value(role_t::VCR_ERROR));
break;
case shlex_token_t::ST_TILDE:
case shlex_token_t::ST_ESCAPE:
alb.overlay_attr(line_range(sub.lr_start + cap.c_begin,
sub.lr_start + cap.c_end),
alb.overlay_attr(line_range(sub.lr_start + cap.sf_begin,
sub.lr_start + cap.sf_end),
VC_ROLE.value(role_t::VCR_SYMBOL));
break;
case shlex_token_t::ST_DOUBLE_QUOTE_START:
case shlex_token_t::ST_SINGLE_QUOTE_START:
quote_start = sub.lr_start + cap.c_begin;
quote_start = sub.lr_start + cap.sf_begin;
break;
case shlex_token_t::ST_DOUBLE_QUOTE_END:
case shlex_token_t::ST_SINGLE_QUOTE_END:
alb.overlay_attr(
line_range(quote_start.value(), sub.lr_start + cap.c_end),
line_range(quote_start.value(), sub.lr_start + cap.sf_end),
VC_ROLE.value(role_t::VCR_STRING));
quote_start = nonstd::nullopt;
break;
case shlex_token_t::ST_VARIABLE_REF:
case shlex_token_t::ST_QUOTED_VARIABLE_REF: {
int extra = token == shlex_token_t::ST_VARIABLE_REF ? 0 : 1;
auto ident = str.substr(sub.lr_start + cap.c_begin + 1 + extra,
auto ident = str.substr(sub.lr_start + cap.sf_begin + 1 + extra,
cap.length() - 1 - extra * 2);
alb.overlay_attr(
line_range(sub.lr_start + cap.c_begin,
sub.lr_start + cap.c_begin + 1 + extra),
line_range(sub.lr_start + cap.sf_begin,
sub.lr_start + cap.sf_begin + 1 + extra),
VC_ROLE.value(role_t::VCR_SYMBOL));
alb.overlay_attr(
line_range(sub.lr_start + cap.c_begin + 1 + extra,
sub.lr_start + cap.c_end - extra),
VC_ROLE.value(x == sub.lr_start + cap.c_end
|| cap.contains(x)
? role_t::VCR_SYMBOL
: role_t::VCR_IDENTIFIER));
line_range(sub.lr_start + cap.sf_begin + 1 + extra,
sub.lr_start + cap.sf_end - extra),
VC_ROLE.value(
x == sub.lr_start + cap.sf_end
|| (cap.sf_begin <= x && x < cap.sf_end)
? role_t::VCR_SYMBOL
: role_t::VCR_IDENTIFIER));
if (extra) {
alb.overlay_attr_for_char(
sub.lr_start + cap.c_end - 1,
sub.lr_start + cap.sf_end - 1,
VC_ROLE.value(role_t::VCR_SYMBOL));
}
break;
@ -412,7 +411,7 @@ readline_lnav_highlighter_int(attr_line_t& al, int x, line_range sub)
void
readline_lnav_highlighter(attr_line_t& al, int x)
{
static const pcrepp COMMENT_RE{R"(^\s*#)"};
static const auto COMMENT_RE = lnav::pcre2pp::code::from_const(R"(^\s*#)");
attr_line_builder alb(al);
size_t start = 0, lf_pos;
@ -426,10 +425,11 @@ readline_lnav_highlighter(attr_line_t& al, int x)
continue;
}
pcre_input pi(&al.al_string[line.lr_start], 0, line.length());
pcre_context_static<30> pc;
auto line_frag = string_fragment::from_str_range(
al.al_string, line.lr_start, line.lr_end);
if (COMMENT_RE.match(pc, pi)) {
auto find_res = COMMENT_RE.find_in(line_frag).ignore_error();
if (find_res.has_value()) {
if (section_start) {
readline_lnav_highlighter_int(al,
x,
@ -439,10 +439,8 @@ readline_lnav_highlighter(attr_line_t& al, int x)
});
section_start = nonstd::nullopt;
}
const auto* cap = pc.all();
alb.overlay_attr(
line_range{line.lr_start + cap->c_begin, (int) lf_pos},
VC_ROLE.value(role_t::VCR_COMMENT));
alb.overlay_attr(line_range{find_res->f_all.sf_begin, line.lr_end},
VC_ROLE.value(role_t::VCR_COMMENT));
} else {
switch (al.al_string[line.lr_start]) {
case ':':

@ -128,16 +128,19 @@ add_text_possibilities(readline_curses* rlc,
static const std::regex re_escape(R"(([.\^$*+?()\[\]{}\\|]))");
static const std::regex re_escape_no_dot(R"(([\^$*+?()\[\]{}\\|]))");
pcre_context_static<30> pc;
data_scanner ds(str);
data_token_t dt;
while (ds.tokenize2(pc, dt)) {
if (pc[0]->length() < 4) {
while (true) {
auto tok_res = ds.tokenize2();
if (!tok_res) {
break;
}
if (tok_res->tr_capture.length() < 4) {
continue;
}
switch (dt) {
switch (tok_res->tr_token) {
case DT_DATE:
case DT_TIME:
case DT_WHITE:
@ -148,7 +151,7 @@ add_text_possibilities(readline_curses* rlc,
switch (tq) {
case text_quoting::sql: {
auto token_value = ds.get_input().get_substr(pc.all());
auto token_value = tok_res->to_string();
auto_mem<char, sqlite3_free> quoted_token;
quoted_token = sqlite3_mprintf("%Q", token_value.c_str());
@ -156,12 +159,9 @@ add_text_possibilities(readline_curses* rlc,
break;
}
default: {
std::string token_value, token_value_no_dot;
token_value_no_dot = token_value
= ds.get_input().get_substr(pc.all());
token_value
= std::regex_replace(token_value, re_escape, R"(\\\1)");
auto token_value_no_dot = tok_res->to_string();
auto token_value = std::regex_replace(
token_value_no_dot, re_escape, R"(\\\1)");
token_value_no_dot = std::regex_replace(
token_value_no_dot, re_escape_no_dot, R"(\\\1)");
rlc->add_possibility(context, type, token_value);
@ -172,10 +172,15 @@ add_text_possibilities(readline_curses* rlc,
}
}
switch (dt) {
switch (tok_res->tr_token) {
case DT_QUOTED_STRING:
add_text_possibilities(
rlc, context, type, ds.get_input().get_substr(pc[0]), tq);
rlc,
context,
type,
ds.to_string_fragment(tok_res->tr_inner_capture)
.to_string(),
tq);
break;
default:
break;
@ -416,22 +421,21 @@ add_config_possibilities()
const std::string& path,
void* mem) {
if (jph.jph_children) {
if (!jph.jph_regex->p_named_count) {
const auto named_caps = jph.jph_regex->get_named_captures();
if (named_caps.empty()) {
rc->add_possibility(ln_mode_t::COMMAND, "config-option", path);
}
for (auto named_iter = jph.jph_regex->named_begin();
named_iter != jph.jph_regex->named_end();
++named_iter)
{
if (visited.count(named_iter->pnc_name) == 0) {
for (const auto named_cap : named_caps) {
if (visited.count(named_cap.get_name().to_string()) == 0) {
rc->clear_possibilities(ln_mode_t::COMMAND,
named_iter->pnc_name);
visited.insert(named_iter->pnc_name);
named_cap.get_name().to_string());
visited.insert(named_cap.get_name().to_string());
}
ghc::filesystem::path path_obj(path);
rc->add_possibility(ln_mode_t::COMMAND,
named_iter->pnc_name,
named_cap.get_name().to_string(),
path_obj.parent_path().filename().string());
}
} else {

@ -35,7 +35,7 @@
#include "lnav_config.hh"
#include "log_format.hh"
#include "log_format_ext.hh"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "regex101.client.hh"
#include "session_data.hh"
#include "yajlpp/yajlpp.hh"
@ -47,8 +47,10 @@ regex101::import(const std::string& url,
const std::string& name,
const std::string& pat_name)
{
static const pcrepp USER_URL{R"(^https://regex101.com/r/(\w+)(?:/(\d+))?)"};
static const pcrepp NAME_RE{R"(^\w+$)"};
static const auto USER_URL = lnav::pcre2pp::code::from_const(
R"(^https://regex101.com/r/(\w+)(?:/(\d+))?)");
static thread_local auto URL_MATCH_DATA = USER_URL.create_match_data();
static const auto NAME_RE = lnav::pcre2pp::code::from_const(R"(^\w+$)");
if (url.empty()) {
return Err(lnav::console::user_message::error(
@ -76,11 +78,9 @@ regex101::import(const std::string& url,
}
}
pcre_context_static<30> pc_name;
pcre_input pi_name{name};
if (!NAME_RE.match(pc_name, pi_name)) {
auto partial_len = NAME_RE.match_partial(pi_name);
auto name_find_res = NAME_RE.find_in(name).ignore_error();
if (!name_find_res) {
auto partial_len = NAME_RE.match_partial(name);
return Err(
lnav::console::user_message::error(
attr_line_t("unable to import: ")
@ -95,11 +95,12 @@ regex101::import(const std::string& url,
.append("^ matched up to here"_comment)));
}
pcre_context_static<30> pc;
pcre_input pi{url};
if (!USER_URL.match(pc, pi)) {
auto partial_len = USER_URL.match_partial(pi);
auto user_find_res = USER_URL.capture_from(url)
.into(URL_MATCH_DATA)
.matches()
.ignore_error();
if (!user_find_res) {
auto partial_len = USER_URL.match_partial(url);
return Err(lnav::console::user_message::error(
attr_line_t("unrecognized regex101.com URL: ")
.append(lnav::roles::file(url)))
@ -112,7 +113,7 @@ regex101::import(const std::string& url,
.append("^ matched up to here"_comment)));
}
auto permalink = pi.get_substr(pc[0]);
auto permalink = URL_MATCH_DATA[1]->to_string();
auto format_filename = existing_format
? fmt::format(FMT_STRING("{}.regex101-{}.json"), name, permalink)
@ -155,7 +156,7 @@ regex101::import(const std::string& url,
.append(" flavor of regexes are supported")));
}
auto regex_res = pcrepp::from_str(entry.e_regex);
auto regex_res = lnav::pcre2pp::code::from(entry.e_regex);
if (regex_res.isErr()) {
auto parse_error = regex_res.unwrapErr();
return Err(lnav::console::user_message::error(
@ -163,7 +164,7 @@ regex101::import(const std::string& url,
.append_quoted(lnav::roles::symbol(entry.e_regex))
.append(" from ")
.append_quoted(lnav::roles::symbol(url)))
.with_reason(parse_error.ce_msg)
.with_reason(parse_error.get_message())
.with_help("fix the regex and try the import again"));
}
@ -205,16 +206,13 @@ regex101::import(const std::string& url,
{
yajlpp_map value_map(gen);
for (auto named_iter = regex.named_begin();
named_iter != regex.named_end();
++named_iter)
{
if (strcmp(named_iter->pnc_name, "body") == 0) {
for (auto named_cap : regex.get_named_captures()) {
if (named_cap.get_name() == "body") {
// don't need to add this as a value
continue;
}
value_map.gen(named_iter->pnc_name);
value_map.gen(named_cap.get_name());
{
yajlpp_map cap_map(gen);
@ -374,7 +372,7 @@ regex101::convert_format_pattern(
{
regex101::client::entry en;
en.e_regex = pattern->p_pcre->get_pattern();
en.e_regex = pattern->p_pcre.value->get_pattern();
for (const auto& sample : format->elf_samples) {
if (en.e_test_string.empty()) {
en.e_test_string = sample.s_line.pp_value;

@ -35,7 +35,7 @@
#include "column_namer.hh"
#include "config.h"
#include "lnav_util.hh"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "scn/scn.h"
#include "sql_help.hh"
#include "sql_util.hh"
@ -75,31 +75,38 @@ CREATE TABLE regexp_capture (
struct cursor {
sqlite3_vtab_cursor base;
pcrepp c_pattern;
pcre_context_static<30> c_context;
std::unique_ptr<pcre_input> c_input;
std::shared_ptr<lnav::pcre2pp::code> c_pattern;
lnav::pcre2pp::match_data c_match_data{
lnav::pcre2pp::match_data::unitialized()};
std::string c_content;
string_fragment c_remaining;
bool c_content_as_blob{false};
int c_index{0};
bool c_matched{false};
int c_match_index{0};
sqlite3_int64 c_rowid{0};
cursor(sqlite3_vtab* vt) : base({vt}) { this->c_context.set_count(0); }
cursor(sqlite3_vtab* vt) : base({vt}) {}
int reset() { return SQLITE_OK; }
int next()
{
if (this->c_index >= (this->c_context.get_count() - 1)) {
this->c_input->pi_offset = this->c_input->pi_next_offset;
this->c_matched = this->c_pattern.match(
this->c_context, *(this->c_input), PCRE_NO_UTF8_CHECK);
if (this->c_index >= (this->c_match_data.get_count() - 1)) {
auto match_res = this->c_pattern->capture_from(this->c_content)
.at(this->c_remaining)
.into(this->c_match_data)
.matches(PCRE2_NO_UTF_CHECK)
.ignore_error();
if (match_res) {
this->c_remaining = match_res->f_remaining;
}
this->c_matched = match_res.has_value();
this->c_index = -1;
this->c_match_index += 1;
}
if (this->c_pattern.empty() || !this->c_matched) {
if (this->c_pattern == nullptr || !this->c_matched) {
return SQLITE_OK;
}
@ -108,7 +115,7 @@ CREATE TABLE regexp_capture (
return SQLITE_OK;
}
int eof() { return this->c_pattern.empty() || !this->c_matched; }
int eof() { return this->c_pattern == nullptr || !this->c_matched; }
int get_rowid(sqlite3_int64& rowid_out)
{
@ -120,7 +127,7 @@ CREATE TABLE regexp_capture (
int get_column(const cursor& vc, sqlite3_context* ctx, int col)
{
auto& cap = vc.c_context.all()[vc.c_index];
const auto cap = vc.c_match_data[vc.c_index];
switch (col) {
case RC_COL_MATCH_INDEX:
@ -133,28 +140,30 @@ CREATE TABLE regexp_capture (
if (vc.c_index == 0) {
sqlite3_result_null(ctx);
} else {
sqlite3_result_text(
ctx,
vc.c_pattern.name_for_capture(vc.c_index - 1),
-1,
SQLITE_TRANSIENT);
to_sqlite(ctx,
vc.c_pattern->get_name_for_capture(vc.c_index));
}
break;
case RC_COL_CAPTURE_COUNT:
sqlite3_result_int64(ctx, vc.c_context.get_count());
sqlite3_result_int64(ctx, vc.c_match_data.get_count());
break;
case RC_COL_RANGE_START:
sqlite3_result_int64(ctx, cap.c_begin + 1);
if (cap.has_value()) {
sqlite3_result_int64(ctx, cap->sf_begin + 1);
} else {
sqlite3_result_int64(ctx, 0);
}
break;
case RC_COL_RANGE_STOP:
sqlite3_result_int64(ctx, cap.c_end + 1);
if (cap.has_value()) {
sqlite3_result_int64(ctx, cap->sf_end + 1);
} else {
sqlite3_result_int64(ctx, 0);
}
break;
case RC_COL_CONTENT:
if (cap.is_valid()) {
sqlite3_result_text(ctx,
vc.c_input->get_substr_start(&cap),
cap.length(),
SQLITE_TRANSIENT);
if (cap.has_value()) {
to_sqlite(ctx, cap.value());
} else {
sqlite3_result_null(ctx);
}
@ -173,10 +182,7 @@ CREATE TABLE regexp_capture (
}
break;
case RC_COL_PATTERN: {
auto str = vc.c_pattern.get_pattern();
sqlite3_result_text(
ctx, str.c_str(), str.length(), SQLITE_TRANSIENT);
to_sqlite(ctx, vc.c_pattern->get_pattern());
break;
}
}
@ -219,7 +225,7 @@ rcFilter(sqlite3_vtab_cursor* pVtabCursor,
if (argc != 2) {
pCur->c_content.clear();
pCur->c_pattern.clear();
pCur->c_pattern.reset();
return SQLITE_OK;
}
@ -229,22 +235,29 @@ rcFilter(sqlite3_vtab_cursor* pVtabCursor,
pCur->c_content_as_blob = (sqlite3_value_type(argv[0]) == SQLITE_BLOB);
pCur->c_content.assign(blob, byte_count);
const char* pattern = (const char*) sqlite3_value_text(argv[1]);
auto re_res = pcrepp::from_str(pattern);
if (re_res.isErr()) {
pVtabCursor->pVtab->zErrMsg = sqlite3_mprintf(
"Invalid regular expression: %s", re_res.unwrapErr().ce_msg);
auto pattern = from_sqlite<string_fragment>()(argc, argv, 1);
auto compile_res = lnav::pcre2pp::code::from(pattern);
if (compile_res.isErr()) {
pVtabCursor->pVtab->zErrMsg
= sqlite3_mprintf("Invalid regular expression: %s",
compile_res.unwrapErr().get_message().c_str());
return SQLITE_ERROR;
}
pCur->c_pattern = re_res.unwrap();
pCur->c_pattern = compile_res.unwrap().to_shared();
pCur->c_index = 0;
pCur->c_context.set_count(0);
pCur->c_input = std::make_unique<pcre_input>(pCur->c_content);
pCur->c_matched = pCur->c_pattern.match(
pCur->c_context, *(pCur->c_input), PCRE_NO_UTF8_CHECK);
pCur->c_match_data = pCur->c_pattern->create_match_data();
pCur->c_remaining.clear();
auto match_res = pCur->c_pattern->capture_from(pCur->c_content)
.into(pCur->c_match_data)
.matches(PCRE2_NO_UTF_CHECK)
.ignore_error();
if (match_res) {
pCur->c_remaining = match_res->f_remaining;
}
pCur->c_matched = match_res.has_value();
pCur->c_match_index = 0;
return SQLITE_OK;
@ -286,11 +299,12 @@ CREATE TABLE regexp_capture_into_json (
struct cursor {
sqlite3_vtab_cursor base;
pcrepp c_pattern;
pcre_context_static<30> c_context;
std::unique_ptr<pcre_input> c_input;
std::shared_ptr<lnav::pcre2pp::code> c_pattern;
lnav::pcre2pp::match_data c_match_data{
lnav::pcre2pp::match_data::unitialized()};
std::unique_ptr<column_namer> c_namer;
std::string c_content;
string_fragment c_remaining;
bool c_content_as_blob{false};
bool c_matched{false};
size_t c_match_index{0};
@ -298,25 +312,31 @@ CREATE TABLE regexp_capture_into_json (
std::string c_flags_string;
nonstd::optional<regexp_capture_flags> c_flags;
cursor(sqlite3_vtab* vt) : base({vt}) { this->c_context.set_count(0); }
cursor(sqlite3_vtab* vt) : base({vt}) {}
int reset() { return SQLITE_OK; }
int next()
{
this->c_input->pi_offset = this->c_input->pi_next_offset;
this->c_matched = this->c_pattern.match(
this->c_context, *(this->c_input), PCRE_NO_UTF8_CHECK);
auto match_res = this->c_pattern->capture_from(this->c_content)
.at(this->c_remaining)
.into(this->c_match_data)
.matches(PCRE2_NO_UTF_CHECK)
.ignore_error();
if (match_res) {
this->c_remaining = match_res->f_remaining;
}
this->c_matched = match_res.has_value();
this->c_match_index += 1;
if (this->c_pattern.empty() || !this->c_matched) {
if (this->c_pattern == nullptr || !this->c_matched) {
return SQLITE_OK;
}
return SQLITE_OK;
}
int eof() { return this->c_pattern.empty() || !this->c_matched; }
int eof() { return this->c_pattern == nullptr || !this->c_matched; }
int get_rowid(sqlite3_int64& rowid_out)
{
@ -339,18 +359,19 @@ CREATE TABLE regexp_capture_into_json (
{
yajlpp_map root_map(gen);
for (int lpc = 0; lpc < vc.c_pattern.get_capture_count();
lpc++)
for (int lpc = 1; lpc < vc.c_match_data.get_count(); lpc++)
{
const auto& colname = vc.c_namer->cn_names[lpc];
const auto* cap = vc.c_context[lpc];
const auto cap = vc.c_match_data[lpc];
if (!cap) {
continue;
}
yajl_gen_pstring(gen, colname.data(), colname.length());
if (!cap->is_valid()) {
yajl_gen_null(gen);
} else if (!vc.c_flags || vc.c_flags->convert_numbers) {
auto cap_view = vc.c_input->to_string_view(cap);
if (!vc.c_flags || vc.c_flags->convert_numbers) {
auto cap_view = cap->to_string_view();
auto scan_int_res
= scn::scan_value<int64_t>(cap_view);
@ -372,9 +393,7 @@ CREATE TABLE regexp_capture_into_json (
yajl_gen_pstring(
gen, cap_view.data(), cap_view.length());
} else {
yajl_gen_pstring(gen,
vc.c_input->get_substr_start(cap),
cap->length());
yajl_gen_pstring(gen, cap->data(), cap->length());
}
}
}
@ -399,10 +418,7 @@ CREATE TABLE regexp_capture_into_json (
}
break;
case RCJ_COL_PATTERN: {
auto str = vc.c_pattern.get_pattern();
sqlite3_result_text(
ctx, str.c_str(), str.length(), SQLITE_TRANSIENT);
to_sqlite(ctx, vc.c_pattern->get_pattern());
break;
}
case RCJ_COL_FLAGS: {
@ -454,7 +470,7 @@ rcjFilter(sqlite3_vtab_cursor* pVtabCursor,
if (argc < 2 || argc > 3) {
pCur->c_content.clear();
pCur->c_pattern.clear();
pCur->c_pattern.reset();
pCur->c_flags_string.clear();
pCur->c_flags = nonstd::nullopt;
return SQLITE_OK;
@ -466,11 +482,12 @@ rcjFilter(sqlite3_vtab_cursor* pVtabCursor,
pCur->c_content_as_blob = (sqlite3_value_type(argv[0]) == SQLITE_BLOB);
pCur->c_content.assign(blob, byte_count);
const char* pattern = (const char*) sqlite3_value_text(argv[1]);
auto re_res = pcrepp::from_str(pattern);
if (re_res.isErr()) {
pVtabCursor->pVtab->zErrMsg = sqlite3_mprintf(
"Invalid regular expression: %s", re_res.unwrapErr().ce_msg);
auto pattern = from_sqlite<string_fragment>()(argc, argv, 1);
auto compile_res = lnav::pcre2pp::code::from(pattern);
if (compile_res.isErr()) {
pVtabCursor->pVtab->zErrMsg
= sqlite3_mprintf("Invalid regular expression: %s",
compile_res.unwrapErr().get_message().c_str());
return SQLITE_ERROR;
}
@ -500,19 +517,25 @@ rcjFilter(sqlite3_vtab_cursor* pVtabCursor,
}
}
pCur->c_pattern = re_res.unwrap();
pCur->c_pattern = compile_res.unwrap().to_shared();
pCur->c_namer
= std::make_unique<column_namer>(column_namer::language::JSON);
for (int lpc = 0; lpc < pCur->c_pattern.get_capture_count(); lpc++) {
pCur->c_namer->add_column(
string_fragment{pCur->c_pattern.name_for_capture(lpc)});
pCur->c_namer->add_column(string_fragment::from_const("__all__"));
for (int lpc = 1; lpc <= pCur->c_pattern->get_capture_count(); lpc++) {
pCur->c_namer->add_column(string_fragment::from_c_str(
pCur->c_pattern->get_name_for_capture(lpc)));
}
pCur->c_context.set_count(0);
pCur->c_input = std::make_unique<pcre_input>(pCur->c_content);
pCur->c_matched = pCur->c_pattern.match(
pCur->c_context, *(pCur->c_input), PCRE_NO_UTF8_CHECK);
pCur->c_match_data = pCur->c_pattern->create_match_data();
pCur->c_remaining.clear();
auto match_res = pCur->c_pattern->capture_from(pCur->c_content)
.into(pCur->c_match_data)
.matches(PCRE2_NO_UTF_CHECK)
.ignore_error();
if (match_res) {
pCur->c_remaining = match_res->f_remaining;
}
pCur->c_matched = match_res.has_value();
pCur->c_match_index = 0;
return SQLITE_OK;

@ -33,55 +33,172 @@
#include "base/time_util.hh"
#include "config.h"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "scn/scn.h"
using namespace std::chrono_literals;
static const struct {
const char* name;
pcrepp pcre;
lnav::pcre2pp::code pcre;
} MATCHERS[relative_time::RTT__MAX] = {
{"ws", pcrepp("\\A\\s+\\b")},
{"am", pcrepp("\\Aam|a\\.m\\.\\b")},
{"pm", pcrepp("\\Apm|p\\.m\\.\\b")},
{"a", pcrepp("\\Aa\\b")},
{"an", pcrepp("\\Aan\\b")},
{"at", pcrepp("\\Aat\\b")},
{"time", pcrepp("\\A(\\d{1,2}):(\\d{2})(?::(\\d{2})(?:\\.(\\d{3,6}))?)?")},
{"num", pcrepp("\\A((?:-|\\+)?\\d+)")},
{"sun", pcrepp("\\Asun(days?)?\\b")},
{"mon", pcrepp("\\Amon(days?)?\\b")},
{"tue", pcrepp("\\Atue(s(days?)?)?\\b")},
{"wed", pcrepp("\\Awed(nesdays?)?\\b")},
{"thu", pcrepp("\\Athu(rsdays?)?\\b")},
{"fri", pcrepp("\\Afri(days?)?\\b")},
{"sat", pcrepp("\\Asat(urdays?)?\\b")},
{"us", pcrepp("\\A(?:micros(?:econds?)?|us(?![a-zA-Z]))")},
{"ms", pcrepp("\\A(?:millis(?:econds?)?|ms(?![a-zA-Z]))")},
{"sec", pcrepp("\\As(?:ec(?:onds?)?)?(?![a-zA-Z])")},
{"min", pcrepp("\\Am(?:in(?:utes?)?)?(?![a-zA-Z])")},
{"h", pcrepp("\\Ah(?:ours?)?(?![a-zA-Z])")},
{"day", pcrepp("\\Ad(?:ays?)?(?![a-zA-Z])")},
{"week", pcrepp("\\Aw(?:eeks?)?(?![a-zA-Z])")},
{"mon", pcrepp("\\Amon(?:ths?)?(?![a-zA-Z])")},
{"year", pcrepp("\\Ay(?:ears?)?(?![a-zA-Z])")},
{"today", pcrepp("\\Atoday\\b")},
{"yest", pcrepp("\\Ayesterday\\b")},
{"tomo", pcrepp("\\Atomorrow\\b")},
{"noon", pcrepp("\\Anoon\\b")},
{"and", pcrepp("\\Aand\\b")},
{"the", pcrepp("\\Athe\\b")},
{"ago", pcrepp("\\Aago\\b")},
{"lter", pcrepp("\\Alater\\b")},
{"bfor", pcrepp("\\Abefore\\b")},
{"aft", pcrepp("\\Aafter\\b")},
{"now", pcrepp("\\Anow\\b")},
{"here", pcrepp("\\Ahere\\b")},
{"next", pcrepp("\\Anext\\b")},
{"previous", pcrepp("\\A(?:previous\\b|last\\b)")},
{
"ws",
lnav::pcre2pp::code::from_const("\\A\\s+\\b"),
},
{
"am",
lnav::pcre2pp::code::from_const("\\Aam|a\\.m\\.\\b"),
},
{
"pm",
lnav::pcre2pp::code::from_const("\\Apm|p\\.m\\.\\b"),
},
{
"a",
lnav::pcre2pp::code::from_const("\\Aa\\b"),
},
{
"an",
lnav::pcre2pp::code::from_const("\\Aan\\b"),
},
{
"at",
lnav::pcre2pp::code::from_const("\\Aat\\b"),
},
{
"time",
lnav::pcre2pp::code::from_const(
"\\A(\\d{1,2}):(\\d{2})(?::(\\d{2})(?:\\.(\\d{3,6}))?)?"),
},
{
"num",
lnav::pcre2pp::code::from_const("\\A((?:-|\\+)?\\d+)"),
},
{
"sun",
lnav::pcre2pp::code::from_const("\\Asun(days?)?\\b"),
},
{
"mon",
lnav::pcre2pp::code::from_const("\\Amon(days?)?\\b"),
},
{
"tue",
lnav::pcre2pp::code::from_const("\\Atue(s(days?)?)?\\b"),
},
{
"wed",
lnav::pcre2pp::code::from_const("\\Awed(nesdays?)?\\b"),
},
{
"thu",
lnav::pcre2pp::code::from_const("\\Athu(rsdays?)?\\b"),
},
{
"fri",
lnav::pcre2pp::code::from_const("\\Afri(days?)?\\b"),
},
{
"sat",
lnav::pcre2pp::code::from_const("\\Asat(urdays?)?\\b"),
},
{
"us",
lnav::pcre2pp::code::from_const(
"\\A(?:micros(?:econds?)?|us(?![a-zA-Z]))"),
},
{
"ms",
lnav::pcre2pp::code::from_const(
"\\A(?:millis(?:econds?)?|ms(?![a-zA-Z]))"),
},
{
"sec",
lnav::pcre2pp::code::from_const("\\As(?:ec(?:onds?)?)?(?![a-zA-Z])"),
},
{
"min",
lnav::pcre2pp::code::from_const("\\Am(?:in(?:utes?)?)?(?![a-zA-Z])"),
},
{
"h",
lnav::pcre2pp::code::from_const("\\Ah(?:ours?)?(?![a-zA-Z])"),
},
{
"day",
lnav::pcre2pp::code::from_const("\\Ad(?:ays?)?(?![a-zA-Z])"),
},
{
"week",
lnav::pcre2pp::code::from_const("\\Aw(?:eeks?)?(?![a-zA-Z])"),
},
{
"mon",
lnav::pcre2pp::code::from_const("\\Amon(?:ths?)?(?![a-zA-Z])"),
},
{
"year",
lnav::pcre2pp::code::from_const("\\Ay(?:ears?)?(?![a-zA-Z])"),
},
{
"today",
lnav::pcre2pp::code::from_const("\\Atoday\\b"),
},
{
"yest",
lnav::pcre2pp::code::from_const("\\Ayesterday\\b"),
},
{
"tomo",
lnav::pcre2pp::code::from_const("\\Atomorrow\\b"),
},
{
"noon",
lnav::pcre2pp::code::from_const("\\Anoon\\b"),
},
{
"and",
lnav::pcre2pp::code::from_const("\\Aand\\b"),
},
{
"the",
lnav::pcre2pp::code::from_const("\\Athe\\b"),
},
{
"ago",
lnav::pcre2pp::code::from_const("\\Aago\\b"),
},
{
"lter",
lnav::pcre2pp::code::from_const("\\Alater\\b"),
},
{
"bfor",
lnav::pcre2pp::code::from_const("\\Abefore\\b"),
},
{
"aft",
lnav::pcre2pp::code::from_const("\\Aafter\\b"),
},
{
"now",
lnav::pcre2pp::code::from_const("\\Anow\\b"),
},
{
"here",
lnav::pcre2pp::code::from_const("\\Ahere\\b"),
},
{
"next",
lnav::pcre2pp::code::from_const("\\Anext\\b"),
},
{
"previous",
lnav::pcre2pp::code::from_const("\\A(?:previous\\b|last\\b)"),
},
};
static int64_t TIME_SCALES[] = {
@ -102,10 +219,8 @@ const char relative_time::FIELD_CHARS[] = {
};
Result<relative_time, relative_time::parse_error>
relative_time::from_str(const char* str, size_t len)
relative_time::from_str(string_fragment str)
{
pcre_input pi(str, 0, len);
pcre_context_static<30> pc;
int64_t number = 0;
bool number_set = false, number_was_set = false;
bool next_set = false;
@ -118,10 +233,11 @@ relative_time::from_str(const char* str, size_t len)
pe_out.pe_column = 0;
pe_out.pe_msg.clear();
auto remaining = str;
while (true) {
rt_field_type curr_field_type = RTF__MAX;
if (pi.pi_next_offset >= pi.pi_length) {
if (remaining.empty()) {
if (number_set) {
if (number > 1970 && number < 2050) {
retval.rt_field[RTF_YEARS] = number - 1900;
@ -179,11 +295,18 @@ relative_time::from_str(const char* str, size_t len)
bool found = false;
for (int lpc = 0; lpc < RTT__MAX && !found; lpc++) {
token_t token = (token_t) lpc;
if (!MATCHERS[lpc].pcre.match(pc, pi, PCRE_ANCHORED)) {
auto md = MATCHERS[lpc].pcre.create_match_data();
auto match_res = MATCHERS[lpc]
.pcre.capture_from(remaining)
.into(md)
.matches()
.ignore_error();
if (!match_res) {
continue;
}
pe_out.pe_column = pc.all()->c_begin;
remaining = match_res->f_remaining;
pe_out.pe_column = match_res->f_all.sf_begin;
found = true;
if (RTT_MICROS <= token && token <= RTT_YEARS) {
if (!number_set) {
@ -333,15 +456,15 @@ relative_time::from_str(const char* str, size_t len)
case RTT_AT:
break;
case RTT_TIME: {
const auto hstr = pi.get_substr(pc[0]);
const auto mstr = pi.get_substr(pc[1]);
const auto hstr = md[1]->to_string();
const auto mstr = md[2]->to_string();
retval.rt_field[RTF_HOURS] = atoi(hstr.c_str());
retval.rt_field[RTF_MINUTES] = atoi(mstr.c_str());
if (pc[2]->is_valid()) {
const auto sstr = pi.get_substr(pc[2]);
if (md[3]) {
const auto sstr = md[3]->to_string();
retval.rt_field[RTF_SECONDS] = atoi(sstr.c_str());
if (pc[3]->is_valid()) {
const auto substr = pi.get_substr(pc[3]);
if (md[4]) {
const auto substr = md[4]->to_string();
switch (substr.length()) {
case 3:
@ -373,12 +496,11 @@ relative_time::from_str(const char* str, size_t len)
}
auto num_scan_res
= scn::scan_value<int64_t>(pi.to_string_view(pc[0]));
= scn::scan_value<int64_t>(md[0]->to_string_view());
if (!num_scan_res) {
pe_out.pe_msg
= fmt::format(FMT_STRING("Invalid number: {}"),
pi.get_substr(pc[0]));
pe_out.pe_msg = fmt::format(
FMT_STRING("Invalid number: {}"), md[0].value());
return Err(pe_out);
}
number = num_scan_res.value();

@ -40,6 +40,7 @@
#include <inttypes.h>
#include "base/intern_string.hh"
#include "base/result.h"
#include "ptimec.hh"
@ -109,13 +110,7 @@ public:
std::string pe_msg;
};
static Result<relative_time, parse_error> from_str(const char* str,
size_t len);
static Result<relative_time, parse_error> from_str(const std::string& str)
{
return from_str(str.c_str(), str.length());
}
static Result<relative_time, parse_error> from_str(string_fragment str);
static relative_time from_timeval(const struct timeval& tv);

@ -37,19 +37,19 @@
#include "shlex.hh"
bool
shlex::tokenize(pcre_context::capture_t& cap_out, shlex_token_t& token_out)
shlex::tokenize(string_fragment& cap_out, shlex_token_t& token_out)
{
while (this->s_index < this->s_len) {
switch (this->s_str[this->s_index]) {
case '\\':
cap_out.c_begin = this->s_index;
cap_out.sf_begin = this->s_index;
if (this->s_index + 1 < this->s_len) {
token_out = shlex_token_t::ST_ESCAPE;
this->s_index += 2;
cap_out.c_end = this->s_index;
cap_out.sf_end = this->s_index;
} else {
this->s_index += 1;
cap_out.c_end = this->s_index;
cap_out.sf_end = this->s_index;
token_out = shlex_token_t::ST_ERROR;
}
return true;
@ -57,16 +57,16 @@ shlex::tokenize(pcre_context::capture_t& cap_out, shlex_token_t& token_out)
if (!this->s_ignore_quotes) {
switch (this->s_state) {
case state_t::STATE_NORMAL:
cap_out.c_begin = this->s_index;
cap_out.sf_begin = this->s_index;
this->s_index += 1;
cap_out.c_end = this->s_index;
cap_out.sf_end = this->s_index;
token_out = shlex_token_t::ST_DOUBLE_QUOTE_START;
this->s_state = state_t::STATE_IN_DOUBLE_QUOTE;
return true;
case state_t::STATE_IN_DOUBLE_QUOTE:
cap_out.c_begin = this->s_index;
cap_out.sf_begin = this->s_index;
this->s_index += 1;
cap_out.c_end = this->s_index;
cap_out.sf_end = this->s_index;
token_out = shlex_token_t::ST_DOUBLE_QUOTE_END;
this->s_state = state_t::STATE_NORMAL;
return true;
@ -79,16 +79,16 @@ shlex::tokenize(pcre_context::capture_t& cap_out, shlex_token_t& token_out)
if (!this->s_ignore_quotes) {
switch (this->s_state) {
case state_t::STATE_NORMAL:
cap_out.c_begin = this->s_index;
cap_out.sf_begin = this->s_index;
this->s_index += 1;
cap_out.c_end = this->s_index;
cap_out.sf_end = this->s_index;
token_out = shlex_token_t::ST_SINGLE_QUOTE_START;
this->s_state = state_t::STATE_IN_SINGLE_QUOTE;
return true;
case state_t::STATE_IN_SINGLE_QUOTE:
cap_out.c_begin = this->s_index;
cap_out.sf_begin = this->s_index;
this->s_index += 1;
cap_out.c_end = this->s_index;
cap_out.sf_end = this->s_index;
token_out = shlex_token_t::ST_SINGLE_QUOTE_END;
this->s_state = state_t::STATE_NORMAL;
return true;
@ -110,7 +110,7 @@ shlex::tokenize(pcre_context::capture_t& cap_out, shlex_token_t& token_out)
case '~':
switch (this->s_state) {
case state_t::STATE_NORMAL:
cap_out.c_begin = this->s_index;
cap_out.sf_begin = this->s_index;
this->s_index += 1;
while (this->s_index < this->s_len
&& (isalnum(this->s_str[this->s_index])
@ -119,7 +119,7 @@ shlex::tokenize(pcre_context::capture_t& cap_out, shlex_token_t& token_out)
{
this->s_index += 1;
}
cap_out.c_end = this->s_index;
cap_out.sf_end = this->s_index;
token_out = shlex_token_t::ST_TILDE;
return true;
default:
@ -130,11 +130,11 @@ shlex::tokenize(pcre_context::capture_t& cap_out, shlex_token_t& token_out)
case '\t':
switch (this->s_state) {
case state_t::STATE_NORMAL:
cap_out.c_begin = this->s_index;
cap_out.sf_begin = this->s_index;
while (isspace(this->s_str[this->s_index])) {
this->s_index += 1;
}
cap_out.c_end = this->s_index;
cap_out.sf_end = this->s_index;
token_out = shlex_token_t::ST_WHITESPACE;
return true;
default:
@ -152,13 +152,12 @@ shlex::tokenize(pcre_context::capture_t& cap_out, shlex_token_t& token_out)
}
void
shlex::scan_variable_ref(pcre_context::capture_t& cap_out,
shlex_token_t& token_out)
shlex::scan_variable_ref(string_fragment& cap_out, shlex_token_t& token_out)
{
cap_out.c_begin = this->s_index;
cap_out.sf_begin = this->s_index;
this->s_index += 1;
if (this->s_index >= this->s_len) {
cap_out.c_end = this->s_index;
cap_out.sf_end = this->s_index;
token_out = shlex_token_t::ST_ERROR;
return;
}
@ -189,32 +188,31 @@ shlex::scan_variable_ref(pcre_context::capture_t& cap_out,
}
}
cap_out.c_end = this->s_index;
cap_out.sf_end = this->s_index;
if (token_out == shlex_token_t::ST_QUOTED_VARIABLE_REF
&& this->s_str[this->s_index - 1] != '}')
{
cap_out.c_begin += 1;
cap_out.c_end = cap_out.c_begin + 1;
cap_out.sf_begin += 1;
cap_out.sf_end = cap_out.sf_begin + 1;
token_out = shlex_token_t::ST_ERROR;
}
}
void
shlex::resolve_home_dir(std::string& result,
const pcre_context::capture_t cap) const
shlex::resolve_home_dir(std::string& result, string_fragment cap) const
{
if (cap.length() == 1) {
result.append(getenv_opt("HOME").value_or("~"));
} else {
auto username = (char*) alloca(cap.length());
memcpy(username, &this->s_str[cap.c_begin + 1], cap.length() - 1);
memcpy(username, &this->s_str[cap.sf_begin + 1], cap.length() - 1);
username[cap.length() - 1] = '\0';
auto pw = getpwnam(username);
if (pw != nullptr) {
result.append(pw->pw_dir);
} else {
result.append(&this->s_str[cap.c_begin], cap.length());
result.append(&this->s_str[cap.sf_begin], cap.length());
}
}
}

@ -38,8 +38,8 @@
#include <pwd.h>
#include "base/intern_string.hh"
#include "base/opt_util.hh"
#include "pcrepp/pcrepp.hh"
#include "shlex.resolver.hh"
enum class shlex_token_t {
@ -73,32 +73,32 @@ public:
return *this;
}
bool tokenize(pcre_context::capture_t& cap_out, shlex_token_t& token_out);
bool tokenize(string_fragment& cap_out, shlex_token_t& token_out);
template<typename Resolver = scoped_resolver>
bool eval(std::string& result, const Resolver& vars)
{
result.clear();
pcre_context::capture_t cap;
string_fragment cap;
shlex_token_t token;
int last_index = 0;
while (this->tokenize(cap, token)) {
result.append(&this->s_str[last_index], cap.c_begin - last_index);
result.append(&this->s_str[last_index], cap.sf_begin - last_index);
switch (token) {
case shlex_token_t::ST_ERROR:
return false;
case shlex_token_t::ST_ESCAPE:
result.append(1, this->s_str[cap.c_begin + 1]);
result.append(1, this->s_str[cap.sf_begin + 1]);
break;
case shlex_token_t::ST_WHITESPACE:
result.append(&this->s_str[cap.c_begin], cap.length());
result.append(&this->s_str[cap.sf_begin], cap.length());
break;
case shlex_token_t::ST_VARIABLE_REF:
case shlex_token_t::ST_QUOTED_VARIABLE_REF: {
int extra = token == shlex_token_t::ST_VARIABLE_REF ? 0 : 1;
std::string var_name(&this->s_str[cap.c_begin + 1 + extra],
std::string var_name(&this->s_str[cap.sf_begin + 1 + extra],
cap.length() - 1 - extra * 2);
auto local_var = vars.find(var_name);
const char* var_value = getenv(var_name.c_str());
@ -124,7 +124,7 @@ public:
default:
break;
}
last_index = cap.c_end;
last_index = cap.sf_end;
}
result.append(&this->s_str[last_index], this->s_len - last_index);
@ -137,7 +137,7 @@ public:
{
result.clear();
pcre_context::capture_t cap;
string_fragment cap;
shlex_token_t token;
int last_index = 0;
bool start_new = true;
@ -151,12 +151,12 @@ public:
start_new = false;
}
result.back().append(&this->s_str[last_index],
cap.c_begin - last_index);
cap.sf_begin - last_index);
switch (token) {
case shlex_token_t::ST_ERROR:
return false;
case shlex_token_t::ST_ESCAPE:
result.back().append(1, this->s_str[cap.c_begin + 1]);
result.back().append(1, this->s_str[cap.sf_begin + 1]);
break;
case shlex_token_t::ST_WHITESPACE:
start_new = true;
@ -164,7 +164,7 @@ public:
case shlex_token_t::ST_VARIABLE_REF:
case shlex_token_t::ST_QUOTED_VARIABLE_REF: {
int extra = token == shlex_token_t::ST_VARIABLE_REF ? 0 : 1;
std::string var_name(&this->s_str[cap.c_begin + 1 + extra],
std::string var_name(&this->s_str[cap.sf_begin + 1 + extra],
cap.length() - 1 - extra * 2);
auto local_var = vars.find(var_name);
const char* var_value = getenv(var_name.c_str());
@ -182,7 +182,7 @@ public:
default:
break;
}
last_index = cap.c_end;
last_index = cap.sf_end;
}
if (last_index < this->s_len) {
@ -202,11 +202,9 @@ public:
this->s_state = state_t::STATE_NORMAL;
}
void scan_variable_ref(pcre_context::capture_t& cap_out,
shlex_token_t& token_out);
void scan_variable_ref(string_fragment& cap_out, shlex_token_t& token_out);
void resolve_home_dir(std::string& result,
const pcre_context::capture_t cap) const;
void resolve_home_dir(std::string& result, string_fragment cap) const;
enum class state_t {
STATE_NORMAL,

@ -47,7 +47,7 @@
#include "bound_tags.hh"
#include "config.h"
#include "lnav_util.hh"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "readline_context.hh"
#include "readline_highlighters.hh"
#include "shlex.resolver.hh"
@ -902,40 +902,42 @@ static struct {
int
guess_type_from_pcre(const std::string& pattern, std::string& collator)
{
try {
static const std::vector<int> number_matches = {1, 2};
static const std::vector<int> number_matches = {1, 2};
pcrepp re(pattern);
std::vector<int> matches;
int retval = SQLITE3_TEXT;
int index = 0;
collator.clear();
for (const auto& test_value : TYPE_TEST_VALUE) {
pcre_context_static<30> pc;
pcre_input pi(test_value.sample);
if (re.match(pc, pi, PCRE_ANCHORED) && pc[0]->c_begin == 0
&& pc[0]->length() == (int) pi.pi_length)
{
matches.push_back(index);
}
auto compile_res = lnav::pcre2pp::code::from(pattern);
if (compile_res.isErr()) {
return SQLITE3_TEXT;
}
index += 1;
auto re = compile_res.unwrap();
std::vector<int> matches;
int retval = SQLITE3_TEXT;
int index = 0;
collator.clear();
for (const auto& test_value : TYPE_TEST_VALUE) {
auto find_res
= re.find_in(string_fragment::from_c_str(test_value.sample),
PCRE2_ANCHORED)
.ignore_error();
if (find_res && find_res->f_all.sf_begin == 0
&& find_res->f_remaining.empty())
{
matches.push_back(index);
}
if (matches.size() == 1) {
retval = TYPE_TEST_VALUE[matches.front()].sqlite_type;
collator = TYPE_TEST_VALUE[matches.front()].collator;
} else if (matches == number_matches) {
retval = SQLITE_FLOAT;
collator = "";
}
index += 1;
}
return retval;
} catch (pcrepp::error& e) {
return SQLITE3_TEXT;
if (matches.size() == 1) {
retval = TYPE_TEST_VALUE[matches.front()].sqlite_type;
collator = TYPE_TEST_VALUE[matches.front()].collator;
} else if (matches == number_matches) {
retval = SQLITE_FLOAT;
collator = "";
}
return retval;
}
const char*
@ -1038,50 +1040,79 @@ annotate_sql_statement(attr_line_t& al)
static const std::string keyword_re_str = R"(\A)" + sql_keyword_re();
static const struct {
pcrepp re;
lnav::pcre2pp::code re;
string_attr_type<void>* type;
} PATTERNS[] = {
{pcrepp{R"(\A,)"}, &SQL_COMMA_ATTR},
{pcrepp{R"(\A\(|\A\))"}, &SQL_PAREN_ATTR},
{pcrepp{keyword_re_str, PCRE_CASELESS}, &SQL_KEYWORD_ATTR},
{pcrepp{R"(\A'[^']*('(?:'[^']*')*|$))"}, &SQL_STRING_ATTR},
{
pcrepp{R"(\A-?\d+(?:\.\d*(?:[eE][\-\+]?\d+)?)?|0x[0-9a-fA-F]+$)"},
lnav::pcre2pp::code::from_const(R"(\A,)"),
&SQL_COMMA_ATTR,
},
{
lnav::pcre2pp::code::from_const(R"(\A\(|\A\))"),
&SQL_PAREN_ATTR,
},
{
lnav::pcre2pp::code::from(keyword_re_str, PCRE2_CASELESS).unwrap(),
&SQL_KEYWORD_ATTR,
},
{
lnav::pcre2pp::code::from_const(R"(\A'[^']*('(?:'[^']*')*|$))"),
&SQL_STRING_ATTR,
},
{
lnav::pcre2pp::code::from_const(
R"(\A-?\d+(?:\.\d*(?:[eE][\-\+]?\d+)?)?|0x[0-9a-fA-F]+$)"),
&SQL_NUMBER_ATTR,
},
{pcrepp{R"(\A(((\$|:|@)?\b[a-z_]\w*)|\"([^\"]+)\"|\[([^\]]+)]))",
PCRE_CASELESS},
&SQL_IDENTIFIER_ATTR},
{pcrepp{R"(\A--.*)"}, &SQL_COMMENT_ATTR},
{pcrepp{R"(\A(\*|<|>|=|!|\-|\+|\|\|))"}, &SQL_OPERATOR_ATTR},
{pcrepp{R"(\A.)"}, &SQL_GARBAGE_ATTR},
{
lnav::pcre2pp::code::from_const(
R"(\A(((\$|:|@)?\b[a-z_]\w*)|\"([^\"]+)\"|\[([^\]]+)]))",
PCRE2_CASELESS),
&SQL_IDENTIFIER_ATTR,
},
{
lnav::pcre2pp::code::from_const(R"(\A--.*)"),
&SQL_COMMENT_ATTR,
},
{
lnav::pcre2pp::code::from_const(R"(\A(\*|<|>|=|!|\-|\+|\|\|))"),
&SQL_OPERATOR_ATTR,
},
{
lnav::pcre2pp::code::from_const(R"(\A.)"),
&SQL_GARBAGE_ATTR,
},
};
static const pcrepp cmd_pattern{R"(^(\.\w+))"};
static const pcrepp ws_pattern(R"(\A\s+)");
static const auto cmd_pattern
= lnav::pcre2pp::code::from_const(R"(^(\.\w+))");
static const auto ws_pattern = lnav::pcre2pp::code::from_const(R"(\A\s+)");
pcre_context_static<30> pc;
pcre_input pi(al.get_string());
auto& line = al.get_string();
auto& sa = al.get_attrs();
if (cmd_pattern.match(pc, pi, PCRE_ANCHORED)) {
auto* cap = pc.all();
sa.emplace_back(line_range(cap->c_begin, cap->c_end),
auto cmd_find_res
= cmd_pattern.find_in(line, PCRE2_ANCHORED).ignore_error();
if (cmd_find_res) {
auto cap = cmd_find_res->f_all;
sa.emplace_back(line_range(cap.sf_begin, cap.sf_end),
SQL_COMMAND_ATTR.value());
return;
}
while (pi.pi_next_offset < line.length()) {
if (ws_pattern.match(pc, pi, PCRE_ANCHORED)) {
auto remaining = string_fragment::from_str(line);
while (!remaining.empty()) {
auto ws_find_res = ws_pattern.find_in(remaining).ignore_error();
if (ws_find_res) {
remaining = ws_find_res->f_remaining;
continue;
}
for (const auto& pat : PATTERNS) {
if (pat.re.match(pc, pi, PCRE_ANCHORED)) {
auto* cap = pc.all();
struct line_range lr(cap->c_begin, cap->c_end);
sa.emplace_back(lr, pat.type->value());
auto pat_find_res = pat.re.find_in(remaining).ignore_error();
if (pat_find_res) {
sa.emplace_back(to_line_range(pat_find_res->f_all),
pat.type->value());
remaining = pat_find_res->f_remaining;
break;
}
}

@ -29,7 +29,7 @@
#include "libbase64.h"
#include "mapbox/variant.hpp"
#include "optional.hpp"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "safe/safe.h"
#include "scn/scn.h"
#include "spookyhash/SpookyV2.h"
@ -47,7 +47,7 @@
using namespace mapbox;
struct cache_entry {
std::shared_ptr<pcrepp> re2;
std::shared_ptr<lnav::pcre2pp::code> re2;
std::shared_ptr<column_namer> cn{
std::make_shared<column_namer>(column_namer::language::JSON)};
};
@ -61,15 +61,22 @@ find_re(string_fragment re)
auto iter = cache.find(re);
if (iter == cache.end()) {
auto compile_res = lnav::pcre2pp::code::from(re);
if (compile_res.isErr()) {
const static intern_string_t SRC = intern_string::lookup("arg");
throw lnav::console::to_user_message(SRC, compile_res.unwrapErr());
}
cache_entry c;
c.re2 = std::make_shared<pcrepp>(re.to_string());
c.re2 = compile_res.unwrap().to_shared();
auto pair = cache.insert(
std::make_pair(string_fragment::from_str(c.re2->get_pattern()), c));
for (int lpc = 0; lpc < c.re2->get_capture_count(); lpc++) {
c.cn->add_column(
string_fragment::from_c_str(c.re2->name_for_capture(lpc)));
c.cn->add_column(string_fragment::from_c_str(
c.re2->get_name_for_capture(lpc + 1)));
}
iter = pair.first;
@ -81,90 +88,78 @@ find_re(string_fragment re)
static bool
regexp(string_fragment re, string_fragment str)
{
cache_entry* reobj = find_re(re);
pcre_context_static<30> pc;
pcre_input pi(str);
auto* reobj = find_re(re);
return reobj->re2->match(pc, pi);
return reobj->re2->find_in(str).ignore_error().has_value();
}
static util::variant<int64_t, double, const char*, string_fragment, json_string>
regexp_match(string_fragment re, const char* str)
regexp_match(string_fragment re, string_fragment str)
{
cache_entry* reobj = find_re(re);
pcre_context_static<30> pc;
pcre_input pi(str);
pcrepp& extractor = *reobj->re2;
auto* reobj = find_re(re);
auto& extractor = *reobj->re2;
if (extractor.get_capture_count() == 0) {
throw pcrepp::error("regular expression does not have any captures");
throw std::runtime_error(
"regular expression does not have any captures");
}
if (!extractor.match(pc, pi, PCRE_NO_UTF8_CHECK)) {
auto md = extractor.create_match_data();
auto match_res = extractor.capture_from(str).into(md).matches();
if (match_res.is<lnav::pcre2pp::matcher::not_found>()) {
return static_cast<const char*>(nullptr);
}
if (match_res.is<lnav::pcre2pp::matcher::error>()) {
auto err = match_res.get<lnav::pcre2pp::matcher::error>();
throw std::runtime_error(err.get_message());
}
yajlpp_gen gen;
yajl_gen_config(gen, yajl_gen_beautify, false);
if (extractor.get_capture_count() == 1) {
pcre_context::capture_t* cap = pc[0];
const char* cap_start = pi.get_substr_start(cap);
auto cap = md[1];
if (!cap->is_valid()) {
if (!cap) {
return static_cast<const char*>(nullptr);
}
char* cap_copy = (char*) alloca(cap->length() + 1);
long long int i_value;
double d_value;
int end_index;
memcpy(cap_copy, cap_start, cap->length());
cap_copy[cap->length()] = '\0';
if (sscanf(cap_copy, "%lld%n", &i_value, &end_index) == 1
&& (end_index == cap->length()))
{
return (int64_t) i_value;
auto scan_int_res = scn::scan_value<int64_t>(cap->to_string_view());
if (scan_int_res && scan_int_res.empty()) {
return scan_int_res.value();
}
if (sscanf(cap_copy, "%lf%n", &d_value, &end_index) == 1
&& (end_index == cap->length()))
{
return d_value;
auto scan_float_res = scn::scan_value<double>(cap->to_string_view());
if (scan_float_res && scan_float_res.empty()) {
return scan_float_res.value();
}
return string_fragment(str, cap->c_begin, cap->c_end);
return cap.value();
} else {
yajlpp_map root_map(gen);
for (int lpc = 0; lpc < extractor.get_capture_count(); lpc++) {
const auto& colname = reobj->cn->cn_names[lpc];
const auto* cap = pc[lpc];
const auto cap = md[lpc + 1];
yajl_gen_pstring(gen, colname.data(), colname.length());
if (!cap->is_valid()) {
if (!cap) {
yajl_gen_null(gen);
} else {
const char* cap_start = pi.get_substr_start(cap);
char* cap_copy = (char*) alloca(cap->length() + 1);
long long int i_value;
double d_value;
int end_index;
memcpy(cap_copy, cap_start, cap->length());
cap_copy[cap->length()] = '\0';
if (sscanf(cap_copy, "%lld%n", &i_value, &end_index) == 1
&& (end_index == cap->length()))
{
yajl_gen_integer(gen, i_value);
} else if (sscanf(cap_copy, "%lf%n", &d_value, &end_index) == 1
&& (end_index == cap->length()))
{
yajl_gen_number(gen, cap_start, cap->length());
auto scan_int_res
= scn::scan_value<int64_t>(cap->to_string_view());
if (scan_int_res && scan_int_res.empty()) {
yajl_gen_integer(gen, scan_int_res.value());
} else {
yajl_gen_pstring(gen, cap_start, cap->length());
auto scan_float_res
= scn::scan_value<double>(cap->to_string_view());
if (scan_float_res && scan_float_res.empty()) {
yajl_gen_number(gen, cap->data(), cap->length());
} else {
yajl_gen_pstring(gen, cap->data(), cap->length());
}
}
}
}
@ -263,9 +258,9 @@ logfmt2json(string_fragment line)
}
static std::string
regexp_replace(const char* str, string_fragment re, const char* repl)
regexp_replace(string_fragment str, string_fragment re, const char* repl)
{
cache_entry* reobj = find_re(re);
auto* reobj = find_re(re);
return reobj->re2->replace(str, repl);
}

@ -32,7 +32,7 @@
#include "text_format.hh"
#include "config.h"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "yajl/api/yajl_parse.h"
text_format_t
@ -44,56 +44,57 @@ detect_text_format(string_fragment sf,
static const auto MD_EXT = ghc::filesystem::path(".md");
static const auto MARKDOWN_EXT = ghc::filesystem::path(".markdown");
static const pcrepp MAN_MATCHERS
= pcrepp(R"(^[A-Z]+\(\d\)\s+)", PCRE_MULTILINE);
static const auto MAN_MATCHERS = lnav::pcre2pp::code::from_const(
R"(^[A-Z]+\(\d\)\s+)", PCRE2_MULTILINE);
// XXX This is a pretty crude way of detecting format...
static const pcrepp PYTHON_MATCHERS = pcrepp(
static const auto PYTHON_MATCHERS = lnav::pcre2pp::code::from_const(
"(?:"
"^\\s*def\\s+\\w+\\([^)]*\\):[^\\n]*$|"
"^\\s*try:[^\\n]*$"
")",
PCRE_MULTILINE);
PCRE2_MULTILINE);
static const pcrepp RUST_MATCHERS = pcrepp(R"(
static const auto RUST_MATCHERS
= lnav::pcre2pp::code::from_const(R"(
(?:
^\s*use\s+[\w+:\{\}]+;$|
^\s*(?:pub)?\s+(?:const|enum|fn)\s+\w+.*$|
^\s*impl\s+\w+.*$
)
)",
PCRE_MULTILINE);
PCRE2_MULTILINE);
static const pcrepp JAVA_MATCHERS = pcrepp(
static const auto JAVA_MATCHERS = lnav::pcre2pp::code::from_const(
"(?:"
"^package\\s+|"
"^import\\s+|"
"^\\s*(?:public)?\\s*class\\s*(\\w+\\s+)*\\s*{"
")",
PCRE_MULTILINE);
PCRE2_MULTILINE);
static const pcrepp C_LIKE_MATCHERS = pcrepp(
static const auto C_LIKE_MATCHERS = lnav::pcre2pp::code::from_const(
"(?:"
"^#\\s*include\\s+|"
"^#\\s*define\\s+|"
"^\\s*if\\s+\\([^)]+\\)[^\\n]*$|"
"^\\s*(?:\\w+\\s+)*class \\w+ {"
")",
PCRE_MULTILINE);
PCRE2_MULTILINE);
static const pcrepp SQL_MATCHERS = pcrepp(
static const auto SQL_MATCHERS = lnav::pcre2pp::code::from_const(
"(?:"
"select\\s+.+\\s+from\\s+|"
"insert\\s+into\\s+.+\\s+values"
")",
PCRE_MULTILINE | PCRE_CASELESS);
PCRE2_MULTILINE | PCRE2_CASELESS);
static const pcrepp XML_MATCHERS = pcrepp(
static const auto XML_MATCHERS = lnav::pcre2pp::code::from_const(
"(?:"
R"(<\?xml(\s+\w+\s*=\s*"[^"]*")*\?>|)"
R"(</?\w+(\s+\w+\s*=\s*"[^"]*")*\s*>)"
")",
PCRE_MULTILINE | PCRE_CASELESS);
PCRE2_MULTILINE | PCRE2_CASELESS);
text_format_t retval = text_format_t::TF_UNKNOWN;
@ -110,45 +111,40 @@ detect_text_format(string_fragment sf,
}
}
pcre_input pi(sf);
pcre_context_static<30> pc;
{
auto_mem<yajl_handle_t> jhandle(yajl_free);
jhandle = yajl_alloc(nullptr, nullptr, nullptr);
if (yajl_parse(jhandle, (unsigned char*) sf.data(), sf.length())
== yajl_status_ok)
{
if (yajl_parse(jhandle, sf.udata(), sf.length()) == yajl_status_ok) {
return text_format_t::TF_JSON;
}
}
if (MAN_MATCHERS.match(pc, pi)) {
if (MAN_MATCHERS.find_in(sf).ignore_error()) {
return text_format_t::TF_MAN;
}
if (PYTHON_MATCHERS.match(pc, pi)) {
if (PYTHON_MATCHERS.find_in(sf).ignore_error()) {
return text_format_t::TF_PYTHON;
}
if (RUST_MATCHERS.match(pc, pi)) {
if (RUST_MATCHERS.find_in(sf).ignore_error()) {
return text_format_t::TF_RUST;
}
if (JAVA_MATCHERS.match(pc, pi)) {
if (JAVA_MATCHERS.find_in(sf).ignore_error()) {
return text_format_t::TF_JAVA;
}
if (C_LIKE_MATCHERS.match(pc, pi)) {
if (C_LIKE_MATCHERS.find_in(sf).ignore_error()) {
return text_format_t::TF_C_LIKE;
}
if (SQL_MATCHERS.match(pc, pi)) {
if (SQL_MATCHERS.find_in(sf).ignore_error()) {
return text_format_t::TF_SQL;
}
if (XML_MATCHERS.match(pc, pi)) {
if (XML_MATCHERS.find_in(sf).ignore_error()) {
return text_format_t::TF_XML;
}

@ -42,6 +42,7 @@
enum class text_format_t {
TF_UNKNOWN,
TF_BINARY,
TF_C_LIKE,
TF_JAVA,
TF_JSON,
@ -64,6 +65,9 @@ struct formatter<text_format_t> : formatter<string_view> {
string_view name = "unknown";
switch (tf) {
case text_format_t::TF_UNKNOWN:
name = "text/plain";
break;
case text_format_t::TF_BINARY:
name = "application/octet-stream";
break;
case text_format_t::TF_LOG:

@ -33,21 +33,11 @@
#include "config.h"
static std::shared_ptr<pcrepp>
xpcre_compile(const char* pattern, int options = 0)
template<typename T, std::size_t N>
static std::shared_ptr<lnav::pcre2pp::code>
xpcre_compile(const T (&pattern)[N], int options = 0)
{
auto compile_res = pcrepp::shared_from_str(pattern, options);
if (compile_res.isErr()) {
auto ce = compile_res.unwrapErr();
fprintf(stderr, "internal error: failed to compile -- %s\n", pattern);
fprintf(stderr, "internal error: %s\n", ce.ce_msg);
exit(1);
}
return compile_res.unwrap();
return lnav::pcre2pp::code::from_const(pattern, options).to_shared();
}
void
@ -382,7 +372,7 @@ setup_highlights(highlight_map_t& hm)
"\\bWITH\\b|"
"\\bWITHOUT\\b"
")",
PCRE_CASELESS))
PCRE2_CASELESS))
.with_nestable(false)
.with_text_format(text_format_t::TF_SQL)
.with_role(role_t::VCR_KEYWORD);

@ -570,7 +570,9 @@ textfile_sub_source::rescan_files(
continue;
}
if (!retval && lf->is_indexing()) {
if (!retval && lf->is_indexing()
&& lf->get_text_format() != text_format_t::TF_BINARY)
{
auto ms_iter = this->tss_doc_metadata.find(lf->get_filename());
if (ms_iter != this->tss_doc_metadata.end()) {

@ -175,15 +175,15 @@ textview_curses::reload_config(error_reporter& reporter)
continue;
}
auto regex = pcrepp::shared_from_str(hl_pair.second.hc_regex);
auto regex = lnav::pcre2pp::code::from(hl_pair.second.hc_regex);
if (regex.isErr()) {
const static intern_string_t PATTERN_SRC
= intern_string::lookup("pattern");
auto ce = regex.unwrapErr();
reporter(&hl_pair.second.hc_regex,
lnav::console::user_message::error(fmt::format(
FMT_STRING("invalid highlight regex: {} at {}"),
ce.ce_msg,
ce.ce_offset)));
lnav::console::to_user_message(PATTERN_SRC, ce));
continue;
}
@ -228,7 +228,7 @@ textview_curses::reload_config(error_reporter& reporter)
attrs.ta_attrs |= A_UNDERLINE;
}
this->tc_highlights[{highlight_source_t::THEME, hl_pair.first}]
= highlighter(regex.unwrap())
= highlighter(regex.unwrap().to_shared())
.with_attrs(attrs)
.with_color(fg, bg)
.with_nestable(false);
@ -557,7 +557,7 @@ void
textview_curses::execute_search(const std::string& regex_orig)
{
std::string regex = regex_orig;
std::shared_ptr<pcrepp> code;
std::shared_ptr<lnav::pcre2pp::code> code;
if ((this->tc_search_child == nullptr)
|| (regex != this->tc_current_search))
@ -571,27 +571,26 @@ textview_curses::execute_search(const std::string& regex_orig)
if (regex.empty()) {
} else {
auto compile_res
= pcrepp::shared_from_str(regex, PCRE_CASELESS | PCRE_UTF8);
auto compile_res = lnav::pcre2pp::code::from(regex, PCRE2_CASELESS);
if (compile_res.isErr()) {
auto ce = compile_res.unwrapErr();
regex = pcrepp::quote(regex);
regex = lnav::pcre2pp::quote(regex);
log_info("invalid search regex (%s), using quoted: %s",
ce.ce_msg,
ce.get_message().c_str(),
regex.c_str());
auto compile_quote_res
= pcrepp::shared_from_str(regex, PCRE_CASELESS | PCRE_UTF8);
= lnav::pcre2pp::code::from(regex, PCRE2_CASELESS);
if (compile_quote_res.isErr()) {
log_error("Unable to compile quoted regex: %s",
regex.c_str());
} else {
code = compile_quote_res.unwrap();
code = compile_quote_res.unwrap().to_shared();
}
} else {
code = compile_res.unwrap();
code = compile_res.unwrap().to_shared();
}
}
@ -604,7 +603,7 @@ textview_curses::execute_search(const std::string& regex_orig)
hm[{highlight_source_t::PREVIEW, "search"}] = hl;
auto gp = injector::get<std::shared_ptr<grep_proc<vis_line_t>>>(
code->p_code, *this);
code, *this);
gp->set_sink(this);
auto top = this->get_top();
@ -626,7 +625,7 @@ textview_curses::execute_search(const std::string& regex_orig)
this->tc_sub_source->get_grepper() | [this, code](auto pair) {
auto sgp
= injector::get<std::shared_ptr<grep_proc<vis_line_t>>>(
code->p_code, *pair.first);
code, *pair.first);
sgp->set_sink(pair.second);
sgp->queue_request(0_vl);
@ -661,15 +660,15 @@ textview_curses::horiz_shift(vis_line_t start, vis_line_t end, int off_start)
this->listview_value_for_rows(*this, start, rows);
const auto& str = rows[0].get_string();
pcre_context_static<60> pc;
pcre_input pi(str);
while (hl_iter->second.h_regex->match(pc, pi)) {
if (pc.all()->c_begin < off_start) {
prev_hit = std::max(prev_hit, pc.all()->c_begin);
} else if (pc.all()->c_begin > off_start) {
next_hit = std::min(next_hit, pc.all()->c_begin);
}
}
hl_iter->second.h_regex->capture_from(str).for_each(
[&](lnav::pcre2pp::match_data& md) {
auto cap = md[0].value();
if (cap.sf_begin < off_start) {
prev_hit = std::max(prev_hit, cap.sf_begin);
} else if (cap.sf_begin > off_start) {
next_hit = std::min(next_hit, cap.sf_begin);
}
});
}
if (prev_hit == -1 && next_hit == INT_MAX) {
@ -1127,8 +1126,7 @@ logfile_filter_state::content_line_to_vis_line(uint32_t line)
std::string
text_anchors::to_anchor_string(const std::string& raw)
{
static const pcrepp ANCHOR_RE(R"([^\w]+)");
static const auto ANCHOR_RE = lnav::pcre2pp::code::from_const(R"([^\w]+)");
return fmt::format(FMT_STRING("#{}"),
ANCHOR_RE.replace(tolower(raw).c_str(), "-"));
return fmt::format(FMT_STRING("#{}"), ANCHOR_RE.replace(tolower(raw), "-"));
}

@ -50,14 +50,15 @@ timeslice(sqlite3_value* time_in, nonstd::optional<const char*> slice_in_opt)
std::string c_slice_str;
relative_time c_rel_time;
} cache;
const auto slice_in = string_fragment(slice_in_opt.value_or("15m"));
const auto slice_in
= string_fragment::from_c_str(slice_in_opt.value_or("15m"));
if (slice_in.empty()) {
throw sqlite_func_error("no time slice value given");
}
if (slice_in != cache.c_slice_str.c_str()) {
auto parse_res = relative_time::from_str(slice_in.data());
auto parse_res = relative_time::from_str(slice_in);
if (parse_res.isErr()) {
throw sqlite_func_error(
"unable to parse time slice value: {} -- {}",
@ -145,22 +146,26 @@ timeslice(sqlite3_value* time_in, nonstd::optional<const char*> slice_in_opt)
}
static nonstd::optional<double>
sql_timediff(const char* time1, const char* time2)
sql_timediff(string_fragment time1, string_fragment time2)
{
struct timeval tv1, tv2, retval;
date_time_scanner dts1, dts2;
auto parse_res1 = relative_time::from_str(time1, -1);
auto parse_res1 = relative_time::from_str(time1);
if (parse_res1.isOk()) {
tv1 = parse_res1.unwrap().adjust_now().to_timeval();
} else if (!dts1.convert_to_timeval(time1, -1, nullptr, tv1)) {
} else if (!dts1.convert_to_timeval(
time1.data(), time1.length(), nullptr, tv1))
{
return nonstd::nullopt;
}
auto parse_res2 = relative_time::from_str(time2, -1);
auto parse_res2 = relative_time::from_str(time2);
if (parse_res2.isOk()) {
tv2 = parse_res2.unwrap().adjust_now().to_timeval();
} else if (!dts2.convert_to_timeval(time2, -1, nullptr, tv2)) {
} else if (!dts2.convert_to_timeval(
time2.data(), time2.length(), nullptr, tv2))
{
return nonstd::nullopt;
}

@ -97,10 +97,10 @@ struct from_sqlite<filter_lang_t> {
};
template<>
struct from_sqlite<std::shared_ptr<pcrepp>> {
inline std::shared_ptr<pcrepp> operator()(int argc,
sqlite3_value** val,
int argi)
struct from_sqlite<std::shared_ptr<lnav::pcre2pp::code>> {
inline std::shared_ptr<lnav::pcre2pp::code> operator()(int argc,
sqlite3_value** val,
int argi)
{
const char* pattern = (const char*) sqlite3_value_text(val[argi]);
@ -108,18 +108,18 @@ struct from_sqlite<std::shared_ptr<pcrepp>> {
throw sqlite_func_error("Expecting a non-empty pattern value");
}
auto compile_res
= pcrepp::shared_from_str(pattern, PCRE_CASELESS | PCRE_UTF8);
auto compile_res = lnav::pcre2pp::code::from(
string_fragment::from_c_str(pattern), PCRE2_CASELESS);
if (compile_res.isErr()) {
auto ce = compile_res.unwrapErr();
throw sqlite_func_error(
"Invalid regular expression for pattern: {} at offset {}",
ce.ce_msg,
ce.get_message().c_str(),
ce.ce_offset);
}
return compile_res.unwrap();
return compile_res.unwrap().to_shared();
}
};
@ -683,8 +683,9 @@ CREATE TABLE lnav_view_filters (
std::shared_ptr<text_filter> tf;
switch (lang.value_or(filter_lang_t::REGEX)) {
case filter_lang_t::REGEX: {
auto pattern = from_sqlite<std::shared_ptr<pcrepp>>()(
1, &pattern_str, 0);
auto pattern
= from_sqlite<std::shared_ptr<lnav::pcre2pp::code>>()(
1, &pattern_str, 0);
auto pf = std::make_shared<pcre_filter>(
type.value_or(text_filter::type_t::EXCLUDE),
pattern->get_pattern(),
@ -874,8 +875,8 @@ CREATE TABLE lnav_view_filters (
tf->lf_deleted = true;
tss->text_filters_changed();
auto pattern
= from_sqlite<std::shared_ptr<pcrepp>>()(1, &pattern_val, 0);
auto pattern = from_sqlite<std::shared_ptr<lnav::pcre2pp::code>>()(
1, &pattern_val, 0);
auto pf = std::make_shared<pcre_filter>(
type, pattern->get_pattern(), tf->get_index(), pattern);
auto conflict_mode = sqlite3_vtab_on_conflict(mod_vt->v_db);

@ -145,8 +145,10 @@ json_path_handler_base::json_path_handler_base(const std::string& property)
: jph_property(property.back() == '#'
? property.substr(0, property.size() - 1)
: property),
jph_regex(
std::make_shared<pcrepp>(pcrepp::quote(property), PCRE_ANCHORED)),
jph_regex(lnav::pcre2pp::code::from(lnav::pcre2pp::quote(property),
PCRE2_ANCHORED)
.unwrap()
.to_shared()),
jph_is_array(property.back() == '#')
{
memset(&this->jph_callbacks, 0, sizeof(this->jph_callbacks));
@ -160,28 +162,20 @@ scrub_pattern(const std::string& pattern)
return std::regex_replace(pattern, CAPTURE, "(");
}
json_path_handler_base::json_path_handler_base(const pcrepp& property)
: jph_property(scrub_pattern(property.p_pattern)),
jph_regex(std::make_shared<pcrepp>(property)),
jph_is_array(property.p_pattern.back() == '#'),
jph_is_pattern_property(true)
{
memset(&this->jph_callbacks, 0, sizeof(this->jph_callbacks));
}
json_path_handler_base::json_path_handler_base(std::string property,
const pcrepp& property_re)
: jph_property(std::move(property)),
jph_regex(std::make_shared<pcrepp>(property_re)),
jph_is_array(property_re.p_pattern.find('#') != std::string::npos)
json_path_handler_base::json_path_handler_base(
const std::shared_ptr<const lnav::pcre2pp::code>& property)
: jph_property(scrub_pattern(property->get_pattern())), jph_regex(property),
jph_is_array(property->get_pattern().find('#') != std::string::npos),
jph_is_pattern_property(property->get_capture_count() > 0)
{
memset(&this->jph_callbacks, 0, sizeof(this->jph_callbacks));
}
json_path_handler_base::json_path_handler_base(
std::string property, const std::shared_ptr<pcrepp>& property_re)
std::string property,
const std::shared_ptr<const lnav::pcre2pp::code>& property_re)
: jph_property(std::move(property)), jph_regex(property_re),
jph_is_array(property_re->p_pattern.find('#') != std::string::npos)
jph_is_array(property_re->get_pattern().find('#') != std::string::npos)
{
memset(&this->jph_callbacks, 0, sizeof(this->jph_callbacks));
}
@ -191,14 +185,12 @@ json_path_handler_base::gen(yajlpp_gen_context& ygc, yajl_gen handle) const
{
if (this->jph_is_array) {
auto size = this->jph_size_provider(ygc.ygc_obj_stack.top());
auto md = lnav::pcre2pp::match_data::unitialized();
yajl_gen_string(handle, this->jph_property);
yajl_gen_array_open(handle);
for (size_t index = 0; index < size; index++) {
pcre_context_static<30> pc;
pcre_input pi("");
yajlpp_provider_context ypc{{pc, pi}, index};
yajlpp_provider_context ypc{&md, index};
yajlpp_gen_context elem_ygc(handle, *this->jph_children);
elem_ygc.ygc_depth = 1;
elem_ygc.ygc_obj_stack.push(
@ -232,16 +224,17 @@ json_path_handler_base::gen(yajlpp_gen_context& ygc, yajl_gen handle) const
ygc.ygc_depth += 1;
if (this->jph_obj_provider) {
pcre_context_static<30> pc;
pcre_input pi(full_path);
auto md = this->jph_regex->create_match_data();
auto find_res = this->jph_regex->capture_from(full_path)
.into(md)
.matches();
this->jph_regex->match(pc, pi);
ygc.ygc_obj_stack.push(this->jph_obj_provider(
{{pc, pi}, yajlpp_provider_context::nindex},
{&md, yajlpp_provider_context::nindex},
ygc.ygc_obj_stack.top()));
if (!ygc.ygc_default_stack.empty()) {
ygc.ygc_default_stack.push(this->jph_obj_provider(
{{pc, pi}, yajlpp_provider_context::nindex},
{&md, yajlpp_provider_context::nindex},
ygc.ygc_default_stack.top()));
}
}
@ -301,8 +294,9 @@ json_path_handler_base::gen_schema(yajlpp_gen_context& ygc) const
schema.gen(this->jph_description);
}
if (this->jph_is_pattern_property) {
ygc.ygc_path.emplace_back(fmt::format(
FMT_STRING("<{}>"), this->jph_regex->name_for_capture(0)));
ygc.ygc_path.emplace_back(
fmt::format(FMT_STRING("<{}>"),
this->jph_regex->get_name_for_capture(1)));
} else {
ygc.ygc_path.emplace_back(this->jph_property);
}
@ -312,7 +306,7 @@ json_path_handler_base::gen_schema(yajlpp_gen_context& ygc) const
fmt::join(ygc.ygc_path, "/")));
schema.gen("type");
if (this->jph_is_array) {
if (this->jph_regex->p_pattern.find("#?")
if (this->jph_regex->get_pattern().find("#?")
== std::string::npos)
{
schema.gen("array");
@ -349,7 +343,7 @@ json_path_handler_base::gen_schema(yajlpp_gen_context& ygc) const
if (this->jph_is_pattern_property) {
ygc.ygc_path.emplace_back(fmt::format(
FMT_STRING("<{}>"), this->jph_regex->name_for_capture(0)));
FMT_STRING("<{}>"), this->jph_regex->get_name_for_capture(1)));
} else {
ygc.ygc_path.emplace_back(this->jph_property);
}
@ -365,7 +359,8 @@ json_path_handler_base::gen_schema(yajlpp_gen_context& ygc) const
schema.gen("type");
if (this->jph_is_array) {
if (this->jph_regex->p_pattern.find("#?") == std::string::npos) {
if (this->jph_regex->get_pattern().find("#?") == std::string::npos)
{
schema.gen("array");
} else {
yajlpp_array type_array(ygc.ygc_handle);
@ -493,7 +488,7 @@ json_path_handler_base::walk(
if (this->jph_children) {
for (const auto& lpath : local_paths) {
for (const auto& jph : this->jph_children->jpc_children) {
static const auto POSS_SRC
static const intern_string_t POSS_SRC
= intern_string::lookup("possibilities");
std::string full_path = base + lpath;
@ -509,16 +504,18 @@ json_path_handler_base::walk(
ypc.set_path(full_path).with_obj(root).update_callbacks();
if (this->jph_obj_provider) {
auto md = this->jph_regex->create_match_data();
std::string full_path = lpath + "/";
pcre_input pi(full_path);
if (!this->jph_regex->match(ypc.ypc_pcre_context, pi)) {
if (!this->jph_regex->capture_from(full_path)
.into(md)
.matches()
.ignore_error())
{
ensure(false);
}
child_root = this->jph_obj_provider(
{{ypc.ypc_pcre_context, pi},
yajlpp_provider_context::nindex},
root);
{&md, yajlpp_provider_context::nindex}, root);
}
jph.walk(cb, child_root, full_path);
@ -683,8 +680,6 @@ yajlpp_parse_context::update_callbacks(const json_path_container* orig_handlers,
}
this->ypc_sibling_handlers = orig_handlers;
pcre_input pi(&this->ypc_path[0], 0, this->ypc_path.size() - 1);
this->ypc_callbacks = DEFAULT_CALLBACKS;
if (handlers == nullptr) {
@ -709,12 +704,16 @@ yajlpp_parse_context::update_callbacks(const json_path_container* orig_handlers,
}
}
auto path_frag = string_fragment::from_byte_range(
this->ypc_path.data(), 1 + child_start, this->ypc_path.size() - 1);
for (const auto& jph : handlers->jpc_children) {
pi.reset(&this->ypc_path[1 + child_start],
0,
this->ypc_path.size() - 2 - child_start);
if (jph.jph_regex->match(this->ypc_pcre_context, pi)) {
pcre_context::capture_t* cap = this->ypc_pcre_context.all();
auto md = jph.jph_regex->create_match_data();
if (jph.jph_regex->capture_from(path_frag)
.into(md)
.matches()
.ignore_error())
{
auto cap = md[0].value();
if (jph.jph_is_array) {
this->ypc_array_handler_count += 1;
@ -724,31 +723,24 @@ yajlpp_parse_context::update_callbacks(const json_path_container* orig_handlers,
? static_cast<size_t>(-1)
: this->ypc_array_index[this->ypc_array_handler_count - 1];
if ((1 + child_start + cap->c_end
!= (int) this->ypc_path.size() - 1)
if ((cap.sf_end != (int) this->ypc_path.size() - 1)
&& (!jph.is_array()
|| index != yajlpp_provider_context::nindex))
{
this->ypc_obj_stack.push(jph.jph_obj_provider(
{{this->ypc_pcre_context, pi}, index, this},
this->ypc_obj_stack.top()));
{&md, index, this}, this->ypc_obj_stack.top()));
}
}
if (jph.jph_children) {
this->ypc_handler_stack.emplace_back(&jph);
if (1 + child_start + cap->c_end
!= (int) this->ypc_path.size() - 1)
{
this->update_callbacks(jph.jph_children,
1 + child_start + cap->c_end);
if (cap.sf_end != (int) this->ypc_path.size() - 1) {
this->update_callbacks(jph.jph_children, cap.sf_end);
return;
}
} else {
if (1 + child_start + cap->c_end
!= (int) this->ypc_path.size() - 1)
{
if (cap.sf_end != (int) this->ypc_path.size() - 1) {
continue;
}
@ -953,13 +945,16 @@ yajlpp_parse_context::handle_unused_or_delete(void* ctx)
if (!ypc->ypc_handler_stack.empty()
&& ypc->ypc_handler_stack.back()->jph_obj_deleter)
{
pcre_context_static<30> pc;
auto& jph = ypc->ypc_handler_stack.back();
auto md = jph->jph_regex->create_match_data();
auto key_start = ypc->ypc_path_index_stack.back();
pcre_input pi(&ypc->ypc_path[key_start + 1],
0,
ypc->ypc_path.size() - key_start - 2);
yajlpp_provider_context provider_ctx{{pc, pi}, static_cast<size_t>(-1)};
ypc->ypc_handler_stack.back()->jph_regex->match(pc, pi);
auto path_frag = string_fragment::from_byte_range(
ypc->ypc_path.data(), key_start + 1, ypc->ypc_path.size() - 1);
yajlpp_provider_context provider_ctx{&md, static_cast<size_t>(-1)};
ypc->ypc_handler_stack.back()
->jph_regex->capture_from(path_frag)
.into(md)
.matches();
ypc->ypc_handler_stack.back()->jph_obj_deleter(
provider_ctx, ypc->ypc_obj_stack.top());
@ -1426,35 +1421,11 @@ json_path_handler_base::report_enum_error(yajlpp_parse_context* ypc,
}
void
json_path_handler_base::report_regex_value_error(
yajlpp_parse_context* ypc,
const std::string& value,
const pcrepp::compile_error& pcre_error) const
json_path_handler_base::report_error(yajlpp_parse_context* ypc,
const std::string& value,
lnav::console::user_message um) const
{
attr_line_t pcre_error_content{value};
lnav::snippets::regex_highlighter(pcre_error_content,
pcre_error_content.length(),
line_range{
0,
(int) pcre_error_content.length(),
});
pcre_error_content.append("\n")
.append(pcre_error.ce_offset, ' ')
.append(lnav::roles::error("^ "))
.append(lnav::roles::error(pcre_error.ce_msg))
.with_attr_for_all(VC_ROLE.value(role_t::VCR_QUOTED_CODE));
ypc->report_error(lnav::console::user_message::error(
attr_line_t()
.append_quoted(value)
.append(" is not a valid regular expression for "
"property ")
.append_quoted(lnav::roles::symbol(
ypc->get_full_path().to_string())))
.with_reason(pcre_error.ce_msg)
.with_snippet(ypc->get_snippet())
.with_snippet(lnav::console::snippet::from(
ypc->get_full_path(), pcre_error_content))
ypc->report_error(um.with_snippet(ypc->get_snippet())
.with_help(this->get_help_text(ypc)));
}

@ -48,10 +48,11 @@
#include "base/file_range.hh"
#include "base/intern_string.hh"
#include "base/lnav.console.hh"
#include "base/lnav.console.into.hh"
#include "base/lnav_log.hh"
#include "json_ptr.hh"
#include "optional.hpp"
#include "pcrepp/pcrepp.hh"
#include "pcrepp/pcre2pp.hh"
#include "relative_time.hh"
#include "yajl/api/yajl_gen.h"
#include "yajl/api/yajl_parse.h"
@ -88,38 +89,70 @@ struct positioned_property {
}
};
template<typename T, typename... Types>
struct factory_container {
template<Types... DefaultArgs>
struct with_default_args {
template<typename... Args>
static Result<with_default_args, lnav::console::user_message> from(
intern_string_t src, Args... args)
{
auto from_res = T::from(args..., DefaultArgs...);
if (from_res.isOk()) {
return Ok(with_default_args{from_res.unwrap().to_shared()});
}
return Err(
lnav::console::to_user_message(src, from_res.unwrapErr()));
}
std::shared_ptr<T> value;
};
template<typename... Args>
static Result<factory_container, lnav::console::user_message> from(
intern_string_t src, Args... args)
{
auto from_res = T::from(args...);
if (from_res.isOk()) {
return Ok(factory_container{from_res.unwrap().to_shared()});
}
return Err(
lnav::console::to_user_message(src, from_res.unwrapErr()));
}
std::shared_ptr<T> value;
};
class yajlpp_gen_context;
class yajlpp_parse_context;
struct yajlpp_provider_context {
pcre_extractor ypc_extractor;
lnav::pcre2pp::match_data* ypc_extractor;
size_t ypc_index{0};
yajlpp_parse_context* ypc_parse_context;
static constexpr size_t nindex = static_cast<size_t>(-1);
template<typename T>
intern_string_t get_substr_i(T name) const
intern_string_t get_substr_i(T&& name) const
{
pcre_context::iterator cap = this->ypc_extractor.pe_context[name];
char path[cap->length() + 1];
size_t len = json_ptr::decode(
path,
this->ypc_extractor.pe_input.get_substr_start(cap),
cap->length());
auto cap = (*this->ypc_extractor)[std::forward<T>(name)].value();
char path[cap.length() + 1];
size_t len = json_ptr::decode(path, cap.data(), cap.length());
return intern_string::lookup(path, len);
}
template<typename T>
std::string get_substr(T name) const
std::string get_substr(T&& name) const
{
pcre_context::iterator cap = this->ypc_extractor.pe_context[name];
char path[cap->length() + 1];
size_t len = json_ptr::decode(
path,
this->ypc_extractor.pe_input.get_substr_start(cap),
cap->length());
auto cap = (*this->ypc_extractor)[std::forward<T>(name)].value();
char path[cap.length() + 1];
size_t len = json_ptr::decode(path, cap.data(), cap.length());
return {path, len};
}
@ -158,14 +191,14 @@ struct json_path_handler_base {
static const enum_value_t ENUM_TERMINATOR;
json_path_handler_base(const std::string& property);
explicit json_path_handler_base(const pcrepp& property);
explicit json_path_handler_base(const std::string& property);
json_path_handler_base(std::string property, const pcrepp& property_re);
explicit json_path_handler_base(
const std::shared_ptr<const lnav::pcre2pp::code>& property_re);
json_path_handler_base(std::string property,
const std::shared_ptr<pcrepp>& property_re);
json_path_handler_base(
std::string property,
const std::shared_ptr<const lnav::pcre2pp::code>& property_re);
bool is_array() const { return this->jph_is_array; }
@ -194,7 +227,7 @@ struct json_path_handler_base {
std::vector<schema_type_t> get_types() const;
std::string jph_property;
std::shared_ptr<pcrepp> jph_regex;
std::shared_ptr<const lnav::pcre2pp::code> jph_regex;
yajl_callbacks jph_callbacks{};
std::function<yajl_gen_status(
yajlpp_gen_context&, const json_path_handler_base&, yajl_gen)>
@ -214,7 +247,7 @@ struct json_path_handler_base {
const char* jph_synopsis{""};
const char* jph_description{""};
const json_path_container* jph_children{nullptr};
std::shared_ptr<pcrepp> jph_pattern;
std::shared_ptr<const lnav::pcre2pp::code> jph_pattern;
const char* jph_pattern_re{nullptr};
std::function<void(const string_fragment&)> jph_string_validator;
size_t jph_min_length{0};
@ -241,9 +274,9 @@ struct json_path_handler_base {
const relative_time::parse_error& pe) const;
void report_enum_error(yajlpp_parse_context* ypc,
const std::string& value_str) const;
void report_regex_value_error(yajlpp_parse_context* ypc,
const std::string& value_str,
const pcrepp::compile_error& ce) const;
void report_error(yajlpp_parse_context* ypc,
const std::string& value_str,
lnav::console::user_message um) const;
attr_line_t get_help_text(const std::string& full_path) const;
attr_line_t get_help_text(yajlpp_parse_context* ypc) const;
@ -410,7 +443,6 @@ public:
std::vector<size_t> ypc_array_index;
std::vector<const json_path_handler_base*> ypc_handler_stack;
size_t ypc_array_handler_count{0};
pcre_context_static<30> ypc_pcre_context;
bool ypc_ignore_unused{false};
const struct json_path_container* ypc_sibling_handlers{nullptr};
const struct json_path_handler_base* ypc_current_handler{nullptr};

@ -104,6 +104,11 @@ struct json_path_handler : public json_path_handler_base {
this->jph_callbacks.yajl_double = (int (*)(void*, double)) double_func;
}
template<typename P>
json_path_handler(P path) : json_path_handler_base(path)
{
}
template<typename P>
json_path_handler(P path,
int (*str_func)(yajlpp_parse_context*,
@ -115,18 +120,8 @@ struct json_path_handler : public json_path_handler_base {
= (int (*)(void*, const unsigned char*, size_t)) str_func;
}
template<typename P>
json_path_handler(P path) : json_path_handler_base(path)
{
}
json_path_handler(const std::string& path, const pcrepp& re)
: json_path_handler_base(path, re)
{
}
json_path_handler(const std::string& path,
const std::shared_ptr<pcrepp>& re)
const std::shared_ptr<const lnav::pcre2pp::code>& re)
: json_path_handler_base(path, re)
{
}
@ -194,10 +189,11 @@ struct json_path_handler : public json_path_handler_base {
return *this;
}
json_path_handler& with_pattern(const char* re)
template<typename T, std::size_t N>
json_path_handler& with_pattern(const T (&re)[N])
{
this->jph_pattern_re = re;
this->jph_pattern = std::make_shared<pcrepp>(re);
this->jph_pattern = lnav::pcre2pp::code::from_const(re).to_shared();
return *this;
}
@ -341,10 +337,8 @@ struct json_path_handler : public json_path_handler_base {
if (jph.jph_pattern) {
auto sf = to_string_fragment(field_ptr);
pcre_input pi(sf);
pcre_context_static<30> pc;
if (!jph.jph_pattern->match(pc, pi)) {
if (!jph.jph_pattern->find_in(sf).ignore_error()) {
jph.report_pattern_error(&ypc, sf.to_string());
}
}
@ -833,10 +827,7 @@ struct json_path_handler : public json_path_handler_base {
auto jph = ypc->ypc_current_handler;
if (jph->jph_pattern) {
pcre_input pi(value_str);
pcre_context_static<30> pc;
if (!jph->jph_pattern->match(pc, pi)) {
if (!jph->jph_pattern->find_in(value_str).ignore_error()) {
jph->report_pattern_error(ypc, value_str);
}
}
@ -891,10 +882,7 @@ struct json_path_handler : public json_path_handler_base {
auto jph = ypc->ypc_current_handler;
if (jph->jph_pattern) {
pcre_input pi(value_str);
pcre_context_static<30> pc;
if (!jph->jph_pattern->match(pc, pi)) {
if (!jph->jph_pattern->find_in(value_str).ignore_error()) {
jph->report_pattern_error(ypc, value_str);
}
}
@ -953,10 +941,7 @@ struct json_path_handler : public json_path_handler_base {
auto jph = ypc->ypc_current_handler;
if (jph->jph_pattern) {
pcre_input pi(value_str);
pcre_context_static<30> pc;
if (!jph->jph_pattern->match(pc, pi)) {
if (!jph->jph_pattern->find_in(value_str).ignore_error()) {
jph->report_pattern_error(ypc, value_str);
}
}
@ -1010,10 +995,7 @@ struct json_path_handler : public json_path_handler_base {
auto jph = ypc->ypc_current_handler;
if (jph->jph_pattern) {
pcre_input pi(value_str);
pcre_context_static<30> pc;
if (!jph->jph_pattern->match(pc, pi)) {
if (!jph->jph_pattern->find_in(value_str).ignore_error()) {
jph->report_pattern_error(ypc, value_str);
}
}
@ -1065,10 +1047,7 @@ struct json_path_handler : public json_path_handler_base {
auto jph = ypc->ypc_current_handler;
if (jph->jph_pattern) {
pcre_input pi(value_str);
pcre_context_static<30> pc;
if (!jph->jph_pattern->match(pc, pi)) {
if (!jph->jph_pattern->find_in(value_str).ignore_error()) {
jph->report_pattern_error(ypc, value_str);
}
}
@ -1107,55 +1086,35 @@ struct json_path_handler : public json_path_handler_base {
return *this;
}
template<typename C, typename T, typename... Args>
json_path_handler& for_field(Args... args, std::shared_ptr<T> C::*ptr_arg)
template<typename>
struct int_ {
typedef int type;
};
template<typename C,
typename T,
typename int_<decltype(T::from(intern_string_t{}))>::type = 0,
typename... Args>
json_path_handler& for_field(Args... args, T C::*ptr_arg)
{
this->add_cb(str_field_cb2);
this->jph_str_cb = [args..., ptr_arg](yajlpp_parse_context* ypc,
const unsigned char* str,
size_t len) {
auto obj = ypc->ypc_obj_stack.top();
auto value_str = std::string((const char*) str, len);
auto jph = ypc->ypc_current_handler;
auto* obj = ypc->ypc_obj_stack.top();
auto value_frag = string_fragment::from_bytes(str, len);
const auto* jph = ypc->ypc_current_handler;
try {
auto re = std::make_shared<T>(value_str);
auto from_res = T::from(ypc->get_full_path(), value_frag);
if (from_res.isErr()) {
jph->report_error(
ypc, value_frag.to_string(), from_res.unwrapErr());
} else {
json_path_handler::get_field(obj, args..., ptr_arg)
= std::move(re);
} catch (const pcrepp::error& e) {
pcrepp::compile_error ce;
ce.ce_msg = e.what();
ce.ce_offset = e.e_offset;
jph->report_regex_value_error(ypc, value_str, ce);
= from_res.unwrap();
}
return 1;
};
this->jph_gen_callback
= [args..., ptr_arg](yajlpp_gen_context& ygc,
const json_path_handler_base& jph,
yajl_gen handle) {
const auto& field = json_path_handler::get_field(
ygc.ygc_obj_stack.top(), args..., ptr_arg);
if (!ygc.ygc_default_stack.empty()) {
const auto& field_def = json_path_handler::get_field(
ygc.ygc_default_stack.top(), args..., ptr_arg);
if (field == field_def) {
return yajl_gen_status_ok;
}
}
if (ygc.ygc_depth) {
yajl_gen_string(handle, jph.jph_property);
}
yajlpp_generator gen(handle);
return gen(field->get_pattern());
};
return *this;
}
@ -1225,7 +1184,8 @@ struct json_path_handler : public json_path_handler_base {
size_t len) {
auto obj = ypc->ypc_obj_stack.top();
auto handler = ypc->ypc_current_handler;
auto parse_res = relative_time::from_str((const char*) str, len);
auto parse_res = relative_time::from_str(
string_fragment::from_bytes(str, len));
if (parse_res.isErr()) {
auto parse_error = parse_res.unwrapErr();
@ -1495,10 +1455,11 @@ property_handler(const std::string& path)
return {path};
}
template<typename T, std::size_t N>
inline json_path_handler
pattern_property_handler(const std::string& path)
pattern_property_handler(const T (&path)[N])
{
return {pcrepp(path)};
return {lnav::pcre2pp::code::from_const(path).to_shared()};
}
} // namespace yajlpp

@ -162,12 +162,14 @@ main(int argc, char* argv[])
scan_batch_context sbc{allocator};
for (iter = root_formats.begin();
iter != root_formats.end() && !found;
++iter) {
++iter)
{
line_info li = {{13}};
(*iter)->clear();
if ((*iter)->scan(*lf, index, li, sbr, sbc)
== log_format::SCAN_MATCH) {
== log_format::SCAN_MATCH)
{
format = (*iter)->specialized();
found = true;
}
@ -188,7 +190,7 @@ main(int argc, char* argv[])
data_parser::TRACE_FILE = fopen("scanned.dpt", "w");
data_scanner ds(sub_line, body.lr_start, sub_line.length());
data_scanner ds(sub_line, body.lr_start);
data_parser dp(&ds);
std::string msg_format;
@ -200,8 +202,7 @@ main(int argc, char* argv[])
fprintf(out, "format :%s\n", msg_format.c_str());
if (pretty_print) {
data_scanner ds2(
sub_line, body.lr_start, sub_line.length());
data_scanner ds2(sub_line, body.lr_start);
pretty_printer pp(&ds2, sa);
attr_line_t pretty_out;

@ -44,10 +44,7 @@ using namespace std;
class my_source : public grep_proc_source<vis_line_t> {
public:
my_source(auto_fd& fd)
{
this->ms_buffer.set_fd(fd);
};
my_source(auto_fd& fd) { this->ms_buffer.set_fd(fd); };
bool grep_value_for_line(vis_line_t line_number, string& value_out)
{
@ -117,7 +114,6 @@ main(int argc, char* argv[])
int retval = EXIT_SUCCESS;
const char* errptr;
auto_fd fd;
pcre* code;
int eoff;
if (argc < 3) {
@ -126,29 +122,35 @@ main(int argc, char* argv[])
} else if ((fd = open(argv[2], O_RDONLY)) == -1) {
perror("open");
retval = EXIT_FAILURE;
} else if ((code
= pcre_compile(argv[1], PCRE_CASELESS, &errptr, &eoff, NULL))
== NULL)
{
fprintf(stderr, "error: invalid pattern -- %s\n", errptr);
} else {
auto psuperv = std::make_shared<pollable_supervisor>();
my_source ms(fd);
my_sink msink;
auto compile_res = lnav::pcre2pp::code::from(
string_fragment::from_c_str(argv[1]), PCRE2_CASELESS);
grep_proc<vis_line_t> gp(code, ms, psuperv);
if (compile_res.isErr()) {
auto ce = compile_res.unwrapErr();
fprintf(stderr,
"error: invalid pattern -- %s\n",
ce.get_message().c_str());
} else {
auto co = compile_res.unwrap().to_shared();
auto psuperv = std::make_shared<pollable_supervisor>();
my_source ms(fd);
my_sink msink;
grep_proc<vis_line_t> gp(co, ms, psuperv);
gp.set_sink(&msink);
gp.queue_request();
gp.start();
gp.set_sink(&msink);
gp.queue_request();
gp.start();
while (!msink.ms_finished) {
vector<struct pollfd> pollfds;
while (!msink.ms_finished) {
vector<struct pollfd> pollfds;
psuperv->update_poll_set(pollfds);
poll(&pollfds[0], pollfds.size(), -1);
psuperv->update_poll_set(pollfds);
poll(&pollfds[0], pollfds.size(), -1);
psuperv->check_poll_set(pollfds);
psuperv->check_poll_set(pollfds);
}
}
}

@ -43,7 +43,6 @@
#include "config.h"
#include "logfile.hh"
#include "pcrepp/pcrepp.hh"
#include "sequence_matcher.hh"
#include "sequence_sink.hh"
#include "textview_curses.hh"
@ -52,10 +51,7 @@ using namespace std;
class my_source : public grep_proc_source<vis_line_t> {
public:
my_source(auto_fd& fd) : ms_offset(0)
{
this->ms_buffer.set_fd(fd);
};
my_source(auto_fd& fd) : ms_offset(0) { this->ms_buffer.set_fd(fd); };
bool grep_value_for_line(vis_line_t line_number, string& value_out)
{

@ -56,7 +56,7 @@ main(int argc, char* argv[])
}
shlex lexer(argv[1], strlen(argv[1]));
pcre_context::capture_t cap;
string_fragment cap;
shlex_token_t token;
printf(" %s\n", argv[1]);
@ -64,12 +64,12 @@ main(int argc, char* argv[])
int lpc;
printf("%s ", ST_TOKEN_NAMES[(int) token]);
for (lpc = 0; lpc < cap.c_end; lpc++) {
if (lpc == cap.c_begin) {
for (lpc = 0; lpc < cap.sf_end; lpc++) {
if (lpc == cap.sf_begin) {
fputc('^', stdout);
} else if (lpc == (cap.c_end - 1)) {
} else if (lpc == (cap.sf_end - 1)) {
fputc('^', stdout);
} else if (lpc > cap.c_begin) {
} else if (lpc > cap.sf_begin) {
fputc('-', stdout);
} else {
fputc(' ', stdout);

@ -132,8 +132,6 @@ EXPECTED_FILES = \
$(srcdir)/%reldir%/test_cmds.sh_968dac54dc80d91a5da2322890c6c26dfa0d8462.out \
$(srcdir)/%reldir%/test_cmds.sh_a00943ef715598c7554b85de8502454e41bb9e28.err \
$(srcdir)/%reldir%/test_cmds.sh_a00943ef715598c7554b85de8502454e41bb9e28.out \
$(srcdir)/%reldir%/test_cmds.sh_a0e6214b2a85c90d31aee12efde850441cca7eb3.err \
$(srcdir)/%reldir%/test_cmds.sh_a0e6214b2a85c90d31aee12efde850441cca7eb3.out \
$(srcdir)/%reldir%/test_cmds.sh_a1123427c31c022433d66d05ee5d5e1c8ab415e4.err \
$(srcdir)/%reldir%/test_cmds.sh_a1123427c31c022433d66d05ee5d5e1c8ab415e4.out \
$(srcdir)/%reldir%/test_cmds.sh_a190bfc279fa046a823864f1484f899d27d22953.err \
@ -348,58 +346,6 @@ EXPECTED_FILES = \
$(srcdir)/%reldir%/test_pretty_print.sh_cd361eeca7e91bfab942b75d6c3422c7a456a111.out \
$(srcdir)/%reldir%/test_pretty_print.sh_f8feb52a321026d9562b271eb37a2c56dfaed329.err \
$(srcdir)/%reldir%/test_pretty_print.sh_f8feb52a321026d9562b271eb37a2c56dfaed329.out \
$(srcdir)/%reldir%/test_regex101.sh_0fa3663a45aca6a328cb728872af7ed7ee896f1c.err \
$(srcdir)/%reldir%/test_regex101.sh_0fa3663a45aca6a328cb728872af7ed7ee896f1c.out \
$(srcdir)/%reldir%/test_regex101.sh_182ae9244db314a953af2bee969726e381bc5a32.err \
$(srcdir)/%reldir%/test_regex101.sh_182ae9244db314a953af2bee969726e381bc5a32.out \
$(srcdir)/%reldir%/test_regex101.sh_2158f1f011ba8e1b152396c072790c076fdb8ce8.err \
$(srcdir)/%reldir%/test_regex101.sh_2158f1f011ba8e1b152396c072790c076fdb8ce8.out \
$(srcdir)/%reldir%/test_regex101.sh_281af24141680330791db7f7c5fa70833ce08a6b.err \
$(srcdir)/%reldir%/test_regex101.sh_281af24141680330791db7f7c5fa70833ce08a6b.out \
$(srcdir)/%reldir%/test_regex101.sh_35703b13990785632cca82123fb3883797959c0b.err \
$(srcdir)/%reldir%/test_regex101.sh_35703b13990785632cca82123fb3883797959c0b.out \
$(srcdir)/%reldir%/test_regex101.sh_366730cac50b4a09b7de4b84641791470b1cb9a3.err \
$(srcdir)/%reldir%/test_regex101.sh_366730cac50b4a09b7de4b84641791470b1cb9a3.out \
$(srcdir)/%reldir%/test_regex101.sh_3d18474a3e472fff6e23e0c41337ec9188fee591.err \
$(srcdir)/%reldir%/test_regex101.sh_3d18474a3e472fff6e23e0c41337ec9188fee591.out \
$(srcdir)/%reldir%/test_regex101.sh_442cc58676590a3604d5c2183f5fe0a75c98351a.err \
$(srcdir)/%reldir%/test_regex101.sh_442cc58676590a3604d5c2183f5fe0a75c98351a.out \
$(srcdir)/%reldir%/test_regex101.sh_566fd88d216a44bc1c6e23f2d6f2d0caf99d42f9.err \
$(srcdir)/%reldir%/test_regex101.sh_566fd88d216a44bc1c6e23f2d6f2d0caf99d42f9.out \
$(srcdir)/%reldir%/test_regex101.sh_5f2f7ecb6ab9cbec4b41385b91bd038906b8a7b2.err \
$(srcdir)/%reldir%/test_regex101.sh_5f2f7ecb6ab9cbec4b41385b91bd038906b8a7b2.out \
$(srcdir)/%reldir%/test_regex101.sh_629bde30483e0a6461076e9058f3a5eb81ae0425.err \
$(srcdir)/%reldir%/test_regex101.sh_629bde30483e0a6461076e9058f3a5eb81ae0425.out \
$(srcdir)/%reldir%/test_regex101.sh_630db454054cf92ec9bd0f4e3e83300047f583ff.err \
$(srcdir)/%reldir%/test_regex101.sh_630db454054cf92ec9bd0f4e3e83300047f583ff.out \
$(srcdir)/%reldir%/test_regex101.sh_771af6f3d29b8350542d5c6e98bdbf4c223cd531.err \
$(srcdir)/%reldir%/test_regex101.sh_771af6f3d29b8350542d5c6e98bdbf4c223cd531.out \
$(srcdir)/%reldir%/test_regex101.sh_7991a5b617867cf37c9f7baa85ffa425f7d455a2.err \
$(srcdir)/%reldir%/test_regex101.sh_7991a5b617867cf37c9f7baa85ffa425f7d455a2.out \
$(srcdir)/%reldir%/test_regex101.sh_79ee3f5fe71ccec97b2619d8c1f74ca97ffd2243.err \
$(srcdir)/%reldir%/test_regex101.sh_79ee3f5fe71ccec97b2619d8c1f74ca97ffd2243.out \
$(srcdir)/%reldir%/test_regex101.sh_7de76c174c58d67bf93e8f01d6d55ebb6a023f10.err \
$(srcdir)/%reldir%/test_regex101.sh_7de76c174c58d67bf93e8f01d6d55ebb6a023f10.out \
$(srcdir)/%reldir%/test_regex101.sh_8a43e6657d4f60e68d31eb8302542ca28e80d077.err \
$(srcdir)/%reldir%/test_regex101.sh_8a43e6657d4f60e68d31eb8302542ca28e80d077.out \
$(srcdir)/%reldir%/test_regex101.sh_8e93a3b6b941847c71409a297779fbb0a6666a51.err \
$(srcdir)/%reldir%/test_regex101.sh_8e93a3b6b941847c71409a297779fbb0a6666a51.out \
$(srcdir)/%reldir%/test_regex101.sh_95c56a9d146ec9a7c2196559d316f928b2ae6ae9.err \
$(srcdir)/%reldir%/test_regex101.sh_95c56a9d146ec9a7c2196559d316f928b2ae6ae9.out \
$(srcdir)/%reldir%/test_regex101.sh_9d101ee29c45cdb8c0f117ad736c9a5dd5da5839.err \
$(srcdir)/%reldir%/test_regex101.sh_9d101ee29c45cdb8c0f117ad736c9a5dd5da5839.out \
$(srcdir)/%reldir%/test_regex101.sh_c43e07df9b3068696fdc8759c7561135db981b38.err \
$(srcdir)/%reldir%/test_regex101.sh_c43e07df9b3068696fdc8759c7561135db981b38.out \
$(srcdir)/%reldir%/test_regex101.sh_cbd859487e4ea011cd6e0f0f114d70158bfd8b43.err \
$(srcdir)/%reldir%/test_regex101.sh_cbd859487e4ea011cd6e0f0f114d70158bfd8b43.out \
$(srcdir)/%reldir%/test_regex101.sh_cf6c0a9f0f04e24ce1fae7a0a434830b14447f83.err \
$(srcdir)/%reldir%/test_regex101.sh_cf6c0a9f0f04e24ce1fae7a0a434830b14447f83.out \
$(srcdir)/%reldir%/test_regex101.sh_d84597760285c3964b258726341e018f6cd49954.err \
$(srcdir)/%reldir%/test_regex101.sh_d84597760285c3964b258726341e018f6cd49954.out \
$(srcdir)/%reldir%/test_regex101.sh_f23e393dbf23d0d8e276e9b7610c7b74d79980f8.err \
$(srcdir)/%reldir%/test_regex101.sh_f23e393dbf23d0d8e276e9b7610c7b74d79980f8.out \
$(srcdir)/%reldir%/test_regex101.sh_fc41b6ee90cbf038620151f16d164b361acf82dd.err \
$(srcdir)/%reldir%/test_regex101.sh_fc41b6ee90cbf038620151f16d164b361acf82dd.out \
$(srcdir)/%reldir%/test_sessions.sh_0300a1391c33b1c45ddfa90198a6bd0a5404a77f.err \
$(srcdir)/%reldir%/test_sessions.sh_0300a1391c33b1c45ddfa90198a6bd0a5404a77f.out \
$(srcdir)/%reldir%/test_sessions.sh_17b85654b929b2a8fc1705a170ced544783292fa.err \
@ -762,6 +708,8 @@ EXPECTED_FILES = \
$(srcdir)/%reldir%/test_sql_json_func.sh_f34205b59e04f261897ad89f659595c743a18ca9.out \
$(srcdir)/%reldir%/test_sql_json_func.sh_f34f5dfa938a1ac7721f924beb16bbceec127a1b.err \
$(srcdir)/%reldir%/test_sql_json_func.sh_f34f5dfa938a1ac7721f924beb16bbceec127a1b.out \
$(srcdir)/%reldir%/test_sql_regexp.sh_03257c56e85558aa0cc925b68d3af962afc25125.err \
$(srcdir)/%reldir%/test_sql_regexp.sh_03257c56e85558aa0cc925b68d3af962afc25125.out \
$(srcdir)/%reldir%/test_sql_regexp.sh_51293df041b6969ccecc60204dce3676d0fb006d.err \
$(srcdir)/%reldir%/test_sql_regexp.sh_51293df041b6969ccecc60204dce3676d0fb006d.out \
$(srcdir)/%reldir%/test_sql_regexp.sh_b841a0c09601e2419eeb99e85f7e286c889e4801.err \
@ -770,6 +718,8 @@ EXPECTED_FILES = \
$(srcdir)/%reldir%/test_sql_regexp.sh_bbd1128cf61a9af8f9dc937b46217443f42e1a7a.out \
$(srcdir)/%reldir%/test_sql_regexp.sh_d42e1fcfe6d42394f79da84be2d37e62c4c0ea63.err \
$(srcdir)/%reldir%/test_sql_regexp.sh_d42e1fcfe6d42394f79da84be2d37e62c4c0ea63.out \
$(srcdir)/%reldir%/test_sql_regexp.sh_d61af17ff19d640ddfc879460910991825eedd05.err \
$(srcdir)/%reldir%/test_sql_regexp.sh_d61af17ff19d640ddfc879460910991825eedd05.out \
$(srcdir)/%reldir%/test_sql_regexp.sh_ed6e9f13f178def009ee58c2aeea8c3c70fdb580.err \
$(srcdir)/%reldir%/test_sql_regexp.sh_ed6e9f13f178def009ee58c2aeea8c3c70fdb580.out \
$(srcdir)/%reldir%/test_sql_search_table.sh_1a0d872ebc492fcecb2e79a0993170d5fc771a5b.err \
@ -850,6 +800,8 @@ EXPECTED_FILES = \
$(srcdir)/%reldir%/test_sql_str_func.sh_8cef54f0617960320b5d3615068eb27333dcf6a3.out \
$(srcdir)/%reldir%/test_sql_str_func.sh_8f4f0ed74c4dc6b821e02a44552b694614cd9353.err \
$(srcdir)/%reldir%/test_sql_str_func.sh_8f4f0ed74c4dc6b821e02a44552b694614cd9353.out \
$(srcdir)/%reldir%/test_sql_str_func.sh_949ffd5b2ef9fbcbe17f2e61ef7750f7038f6fd6.err \
$(srcdir)/%reldir%/test_sql_str_func.sh_949ffd5b2ef9fbcbe17f2e61ef7750f7038f6fd6.out \
$(srcdir)/%reldir%/test_sql_str_func.sh_a4d84a0082a7df34c95c2e6e070bbf6effaa5594.err \
$(srcdir)/%reldir%/test_sql_str_func.sh_a4d84a0082a7df34c95c2e6e070bbf6effaa5594.out \
$(srcdir)/%reldir%/test_sql_str_func.sh_a65d2fb2f841578619528ca10168ca4d650218e9.err \
@ -986,8 +938,6 @@ EXPECTED_FILES = \
$(srcdir)/%reldir%/test_sql_xml_func.sh_fefeb387ae14d4171225ea06cbbff3ec43990cf0.out \
$(srcdir)/%reldir%/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.err \
$(srcdir)/%reldir%/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.out \
$(srcdir)/%reldir%/test_text_file.sh_2e69c22dcfa37b5c3e8490a6026eacb7ca953998.err \
$(srcdir)/%reldir%/test_text_file.sh_2e69c22dcfa37b5c3e8490a6026eacb7ca953998.out \
$(srcdir)/%reldir%/test_text_file.sh_5b51b55dff7332c5bee2c9b797c401c5614d574a.err \
$(srcdir)/%reldir%/test_text_file.sh_5b51b55dff7332c5bee2c9b797c401c5614d574a.out \
$(srcdir)/%reldir%/test_text_file.sh_6a24078983cf1b7a80b6fb65d5186cd125498136.err \

@ -833,7 +833,7 @@ For support questions, email:
:current-time
:current-time
══════════════════════════════════════════════════════════════════════
Print the current time in human-readable form and seconds since the
epoch

@ -1,56 +1,56 @@
✘ error: “invalid(abc” is not a valid regular expression for property “/invalid_props_log/tags/badtag3/pattern”
reason: missing )
 --> {test_dir}/bad-config/formats/invalid-properties/format.json:35
 |  "pattern": "invalid(abc"
✘ error: “invalid(abc” is not a valid regular expression
reason: missing closing parenthesis
 --> /invalid_props_log/tags/badtag3/pattern
 | invalid(abc 
 |  ^ missing ) 
 |  ^ missing closing parenthesis
 --> {test_dir}/bad-config/formats/invalid-properties/format.json:35
 |  "pattern": "invalid(abc"
 = help: Property Synopsis
/invalid_props_log/tags/badtag3/pattern <regex>
Description
The regular expression to match against the body of the log message
Example
\w+ is down
✘ error: “abc(def” is not a valid regular expression for property “/invalid_props_log/search-table/bad_table_regex/pattern”
reason: missing )
 --> {test_dir}/bad-config/formats/invalid-properties/format.json:40
 |  "pattern": "abc(def" 
✘ error: “abc(def” is not a valid regular expression
reason: missing closing parenthesis
 --> /invalid_props_log/search-table/bad_table_regex/pattern
 | abc(def 
 |  ^ missing ) 
 |  ^ missing closing parenthesis 
 --> {test_dir}/bad-config/formats/invalid-properties/format.json:40
 |  "pattern": "abc(def" 
 = help: Property Synopsis
/invalid_props_log/search-table/bad_table_regex/pattern <regex>
Description
The regular expression for this search table.
✘ error: “^(?<timestamp>\d+: (?<body>.*)$” is not a valid regular expression for property “/bad_regex_log/regex/std/pattern”
reason: missing )
✘ error: “^(?<timestamp>\d+: (?<body>.*)$” is not a valid regular expression
reason: missing closing parenthesis
 --> /bad_regex_log/regex/std/pattern
 | ^(?<timestamp>\d+: (?<body>.*)$ 
 |  ^ missing closing parenthesis
 --> {test_dir}/bad-config/formats/invalid-regex/format.json:6
 |  "pattern": "^(?<timestamp>\\d+: (?<body>.*)$"
 --> /bad_regex_log/regex/std/pattern
 | ^(?<timestamp>\d+: (?<body>.*)$ 
 |  ^ missing )
 = help: Property Synopsis
/bad_regex_log/regex/std/pattern <message-regex>
Description
The regular expression to match a log message and capture fields.
✘ error: “(foo” is not a valid regular expression for property “/bad_regex_log/level/error”
reason: missing )
✘ error: “(foo” is not a valid regular expression
reason: missing closing parenthesis
 --> pattern
 | (foo 
 |  ^ missing closing parenthesis 
 --> {test_dir}/bad-config/formats/invalid-regex/format.json:13
 |  "error": "(foo" 
 --> /bad_regex_log/level/error
 | (foo 
 |  ^ missing ) 
 = help: Property Synopsis
/bad_regex_log/level/error <pattern|integer>
Description
The regular expression used to match the log text for this level. For JSON logs with numeric levels, this should be the number for the corresponding level.
✘ error: “abc(” is not a valid regular expression for property “/bad_regex_log/highlights/foobar/pattern”
reason: missing )
 --> {test_dir}/bad-config/formats/invalid-regex/format.json:25
 |  "pattern": "abc(" 
✘ error: “abc(” is not a valid regular expression
reason: missing closing parenthesis
 --> /bad_regex_log/highlights/foobar/pattern
 | abc( 
 |  ^ missing ) 
 |  ^ missing closing parenthesis 
 --> {test_dir}/bad-config/formats/invalid-regex/format.json:25
 |  "pattern": "abc(" 
 = help: Property Synopsis
/bad_regex_log/highlights/foobar/pattern <regex>
Description
@ -153,8 +153,11 @@
 | CREATE TALE invalid (x y z); 
 |  ^ near "TALE": syntax error 
✘ error: failed to execute SQL statement
reason: ✘ error: call to regexp_match(re, str) failed
 |  reason: missing )
reason: ✘ error: “abc(” is not a valid regular expression
 |  reason: missing closing parenthesis
 |   --> arg
 |   | abc( 
 |   |  ^ missing closing parenthesis
 --> {test_dir}/bad-config/formats/invalid-sql/init2.sql
 | SELECT regexp_match('abc(', '123') 
 | FROM sqlite_master; 

@ -4,24 +4,24 @@
 |  ar_log": { "abc" } }
 |  (right here) ------^
 | 
✘ error: “abc(” is not a valid regular expression for property “/invalid_key_log/level-pointer”
reason: missing )
 --> {test_dir}/bad-config-json/formats/invalid-key/format.json:4
 |  "level-pointer": "abc(", 
✘ error: “abc(” is not a valid regular expression
reason: missing closing parenthesis
 --> /invalid_key_log/level-pointer
 | abc( 
 |  ^ missing ) 
 |  ^ missing closing parenthesis 
 --> {test_dir}/bad-config-json/formats/invalid-key/format.json:4
 |  "level-pointer": "abc(", 
 = help: Property Synopsis
/invalid_key_log/level-pointer
Description
A regular-expression that matches the JSON-pointer of the level property
✘ error: “def[ghi” is not a valid regular expression for property “/invalid_key_log/file-pattern”
✘ error: “def[ghi” is not a valid regular expression
reason: missing terminating ] for character class
 --> {test_dir}/bad-config-json/formats/invalid-key/format.json:5
 |  "file-pattern": "def[ghi", 
 --> /invalid_key_log/file-pattern
 | def[ghi 
 |  ^ missing terminating ] for character class
 --> {test_dir}/bad-config-json/formats/invalid-key/format.json:5
 |  "file-pattern": "def[ghi", 
 = help: Property Synopsis
/invalid_key_log/file-pattern
Description

@ -1,14 +1,34 @@
✘ error: invalid value “/unit_test_log/value/jobserver”
reason: no patterns have a capture named “jobserver”
 = note: the following captures are available:
 = help: values are populated from captures in patterns, so at least one pattern must have a capture with this value name
✘ error: invalid value “/unit_test_log/value/processid”
reason: no patterns have a capture named “processid”
 = note: the following captures are available:
 = help: values are populated from captures in patterns, so at least one pattern must have a capture with this value name
✘ error: invalid value “/unit_test_log/value/timestamp”
reason: no patterns have a capture named “timestamp”
 = note: the following captures are available:
 = help: values are populated from captures in patterns, so at least one pattern must have a capture with this value name
✘ error: invalid value “/unit_test_log/value/workqueue”
reason: no patterns have a capture named “workqueue”
 = note: the following captures are available:
 = help: values are populated from captures in patterns, so at least one pattern must have a capture with this value name
✘ error: invalid sample log message: "[03/22/2021 02:00:02 job1074.example.com db.db81.example_events 54026] {\"ELAPSED\":\"0.011\",\"LEVEL\":\"info\",\"MESSAGE\":\"finished in 0.011\\n\",\"PREFIX\":\"YFgyWQriCmsAAofJAAAAHg\",\"ROUTINGKEY\":\"EXAMPLE1366.Example.Events._Publish\"}"
reason: sample does not match any patterns
 --> regex101-home/.lnav/formats/installed/unit_test_log.json:26
 = note: the following shows how each pattern matched this sample:
[03/22/2021 02:00:02 job1074.example.com db.db81.example_events 54026] {"ELAPSED":"0.011","LEVEL":"info","MESSAGE":"finished in 0.011\n","PREFIX":"YFgyWQriCmsAAofJAAAAHg","ROUTINGKEY":"EXAMPLE1366.Example.Events._Publish"}
[03/22/2021 02:00:02 job1074.example.com db.db81.example_events 54026] {"ELAPSED":"0.011","LEVEL":"info","MESSAGE":"finished in 0.011\n","PREFIX":"YFgyWQriCmsAAofJAAAAHg","ROUTINGKEY":"EXAMPLE1366.Example.Events._Publish"}
 = note: std = “”
✘ error: invalid sample log message: "[03/22/2021 02:00:02 job1074.example.com db.db81.example_events 54026] {\"ELAPSED\":\"0.011\",\"LEVEL\":\"info\",\"MESSAGE\":\"finished in 0.011\\n\",\"PREFIX\":\"YFgyWQriCmsAAofJAAAAHg\",\"ROUTINGKEY\":\"EXAMPLE1366.Example.Events._Publish\"}"
reason: sample does not match any patterns
 --> regex101-home/.lnav/formats/installed/unit_test_log.json:30
 = note: the following shows how each pattern matched this sample:
[03/22/2021 02:00:02 job1074.example.com db.db81.example_events 54026] {"ELAPSED":"0.011","LEVEL":"info","MESSAGE":"finished in 0.011\n","PREFIX":"YFgyWQriCmsAAofJAAAAHg","ROUTINGKEY":"EXAMPLE1366.Example.Events._Publish"}
[03/22/2021 02:00:02 job1074.example.com db.db81.example_events 54026] {"ELAPSED":"0.011","LEVEL":"info","MESSAGE":"finished in 0.011\n","PREFIX":"YFgyWQriCmsAAofJAAAAHg","ROUTINGKEY":"EXAMPLE1366.Example.Events._Publish"}
 = note: std = “”

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save