mirror of https://github.com/tstack/lnav
parent
ca4e61ba02
commit
5a63ece31d
@ -0,0 +1,163 @@
|
||||
# ===========================================================================
|
||||
# https://www.gnu.org/software/autoconf-archive/ax_check_pcre2.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_CHECK_PCRE2([bits], [action-if-found], [action-if-not-found])
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# Search for an installed libpcre2-8 library. If nothing was specified
|
||||
# when calling configure, it searches first in /usr/local and then in
|
||||
# /usr, /opt/local and /sw. If the --with-pcre2=DIR is specified, it will
|
||||
# try to find it in DIR/include/pcre2.h and DIR/lib/libpcre2-8. If
|
||||
# --without-pcre2 is specified, the library is not searched at all.
|
||||
#
|
||||
# If 'bits' is empty or '8', PCRE2 8-bit character support is checked
|
||||
# only. If 'bits' contains '16', PCRE2 8-bit and 16-bit character support
|
||||
# are checked. If 'bits' contains '32', PCRE2 8-bit and 32-bit character
|
||||
# support are checked. When 'bits' contains both '16' and '32', PCRE2
|
||||
# 8-bit, 16-bit, and 32-bit character support is checked.
|
||||
#
|
||||
# If either the header file (pcre2.h), or the library (libpcre2-8) is not
|
||||
# found, or the specified PCRE2 character bit width is not supported,
|
||||
# shell commands 'action-if-not-found' is run. If 'action-if-not-found' is
|
||||
# not specified, the configuration exits on error, asking for a valid
|
||||
# PCRE2 installation directory or --without-pcre2.
|
||||
#
|
||||
# If both header file and library are found, and the specified PCRE2 bit
|
||||
# widths are supported, shell commands 'action-if-found' is run. If
|
||||
# 'action-if-found' is not specified, the default action appends
|
||||
# '-I${PCRE2_HOME}/include' to CPFLAGS, appends '-L$PCRE2_HOME}/lib' to
|
||||
# LDFLAGS, prepends '-lpcre2-8' to LIBS, and calls AC_DEFINE(HAVE_PCRE2).
|
||||
# You should use autoheader to include a definition for this symbol in a
|
||||
# config.h file. Sample usage in a C/C++ source is as follows:
|
||||
#
|
||||
# #ifdef HAVE_PCRE2
|
||||
# #define PCRE2_CODE_UNIT_WIDTH 8
|
||||
# #include <pcre2.h>
|
||||
# #endif /* HAVE_PCRE2 */
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2020 Robert van Engelen <engelen@acm.org>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||
# gives unlimited permission to copy, distribute and modify the configure
|
||||
# scripts that are the output of Autoconf when processing the Macro. You
|
||||
# need not follow the terms of the GNU General Public License when using
|
||||
# or distributing such scripts, even though portions of the text of the
|
||||
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||
# all other use of the material that constitutes the Autoconf Macro.
|
||||
#
|
||||
# This special exception to the GPL applies to versions of the Autoconf
|
||||
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||
# modified version of the Autoconf Macro, you may extend this special
|
||||
# exception to the GPL to apply to your modified version as well.
|
||||
|
||||
#serial 2
|
||||
|
||||
AC_DEFUN([AX_CHECK_PCRE2],
|
||||
#
|
||||
# Handle user hints
|
||||
#
|
||||
[AC_MSG_CHECKING(if PCRE2 is wanted)
|
||||
pcre2_places="/usr/local /usr /opt/local /sw"
|
||||
AC_ARG_WITH([pcre2],
|
||||
[ --with-pcre2=DIR root directory path of PCRE2 installation @<:@defaults to
|
||||
/usr/local or /usr if not found in /usr/local@:>@
|
||||
--without-pcre2 to disable PCRE2 usage completely],
|
||||
[if test "$withval" != "no" ; then
|
||||
AC_MSG_RESULT(yes)
|
||||
if test -d "$withval"
|
||||
then
|
||||
pcre2_places="$withval $pcre2_places"
|
||||
else
|
||||
AC_MSG_WARN([Sorry, $withval does not exist, checking usual places])
|
||||
fi
|
||||
else
|
||||
pcre2_places=""
|
||||
AC_MSG_RESULT(no)
|
||||
fi],
|
||||
[AC_MSG_RESULT(yes)])
|
||||
#
|
||||
# Locate PCRE2, if wanted
|
||||
#
|
||||
if test -n "${pcre2_places}"
|
||||
then
|
||||
# check the user supplied or any other more or less 'standard' place:
|
||||
# Most UNIX systems : /usr/local and /usr
|
||||
# MacPorts / Fink on OSX : /opt/local respectively /sw
|
||||
for PCRE2_HOME in ${pcre2_places} ; do
|
||||
if test -f "${PCRE2_HOME}/include/pcre2.h"; then break; fi
|
||||
PCRE2_HOME=""
|
||||
done
|
||||
|
||||
PCRE2_OLD_LDFLAGS=$LDFLAGS
|
||||
PCRE2_OLD_CPPFLAGS=$CPPFLAGS
|
||||
if test -n "${PCRE2_HOME}"; then
|
||||
LDFLAGS="$LDFLAGS -L${PCRE2_HOME}/lib"
|
||||
CPPFLAGS="$CPPFLAGS -I${PCRE2_HOME}/include"
|
||||
fi
|
||||
AC_LANG_PUSH([C])
|
||||
AC_CHECK_LIB([pcre2-8], [pcre2_compile_8], [pcre2_cv_libpcre2=yes], [pcre2_cv_libpcre2=no])
|
||||
AC_CHECK_HEADER([pcre2.h], [pcre2_cv_pcre2_h=yes], [pcre2_cv_pcre2_h=no], [#define PCRE2_CODE_UNIT_WIDTH 8])
|
||||
case "$1" in
|
||||
*16*)
|
||||
AC_CHECK_LIB([pcre2-16], [pcre2_compile_16], [pcre2_cv_libpcre2_16=yes], [pcre2_cv_libpcre2_16=no])
|
||||
AC_CHECK_HEADER([pcre2.h], [pcre2_cv_pcre2_16_h=yes], [pcre2_cv_pcre2_16_h=no], [#define PCRE2_CODE_UNIT_WIDTH 16])
|
||||
if test "$pcre2_cv_libpcre2_16" = "no" || test "$pcre2_cv_pcre2_16_h" = "no"; then
|
||||
pcre2_cv_libpcre2=no
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
case "$1" in
|
||||
*32*)
|
||||
AC_CHECK_LIB([pcre2-32], [pcre2_compile_32], [pcre2_cv_libpcre2_32=yes], [pcre2_cv_libpcre2_32=no])
|
||||
AC_CHECK_HEADER([pcre2.h], [pcre2_cv_pcre2_32_h=yes], [pcre2_cv_pcre2_32_h=no], [#define PCRE2_CODE_UNIT_WIDTH 32])
|
||||
if test "$pcre2_cv_libpcre2_32" = "no" || test "$pcre2_cv_pcre2_32_h" = "no"; then
|
||||
pcre2_cv_libpcre2=no
|
||||
fi
|
||||
esac
|
||||
AC_LANG_POP([C])
|
||||
if test "$pcre2_cv_libpcre2" = "yes" && test "$pcre2_cv_pcre2_h" = "yes"
|
||||
then
|
||||
#
|
||||
# If both library and header were found, action-if-found
|
||||
#
|
||||
m4_ifblank([$2],[
|
||||
CPPFLAGS="$CPPFLAGS -I${PCRE2_HOME}/include"
|
||||
LDFLAGS="$LDFLAGS -L${PCRE2_HOME}/lib"
|
||||
LIBS="-lpcre2-8 $LIBS"
|
||||
AC_DEFINE([HAVE_PCRE2], [1],
|
||||
[Define to 1 if you have `PCRE2' library (-lpcre2-$1)])
|
||||
],[
|
||||
# Restore variables
|
||||
LDFLAGS="$PCRE2_OLD_LDFLAGS"
|
||||
CPPFLAGS="$PCRE2_OLD_CPPFLAGS"
|
||||
$2
|
||||
])
|
||||
else
|
||||
#
|
||||
# If either header or library was not found, action-if-not-found
|
||||
#
|
||||
m4_default([$3],[
|
||||
AC_MSG_ERROR([either specify a valid PCRE2 installation with --with-pcre2=DIR or disable PCRE2 usage with --without-pcre2])
|
||||
])
|
||||
fi
|
||||
fi
|
||||
])
|
@ -1,74 +0,0 @@
|
||||
dnl
|
||||
dnl Copyright (c) 2007-2015, Timothy Stack
|
||||
dnl
|
||||
dnl All rights reserved.
|
||||
dnl
|
||||
dnl Redistribution and use in source and binary forms, with or without
|
||||
dnl modification, are permitted provided that the following conditions are met:
|
||||
dnl
|
||||
dnl dnl Redistributions of source code must retain the above copyright notice, this
|
||||
dnl list of conditions and the following disclaimer.
|
||||
dnl dnl Redistributions in binary form must reproduce the above copyright notice,
|
||||
dnl this list of conditions and the following disclaimer in the documentation
|
||||
dnl and/or other materials provided with the distribution.
|
||||
dnl dnl Neither the name of Timothy Stack nor the names of its contributors
|
||||
dnl may be used to endorse or promote products derived from this software
|
||||
dnl without specific prior written permission.
|
||||
dnl
|
||||
dnl THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
|
||||
dnl EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
dnl WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
dnl DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
dnl DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
dnl (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
dnl LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
dnl ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
dnl (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
dnl SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
dnl
|
||||
dnl @file lnav_with_pcre.m4
|
||||
dnl
|
||||
AC_DEFUN([AX_PATH_LIB_PCRE],[dnl
|
||||
AC_MSG_CHECKING([lib pcre])
|
||||
AC_ARG_WITH(pcre,
|
||||
[ --with-pcre[[=prefix]]],,
|
||||
with_pcre="yes")
|
||||
if test ".$with_pcre" = ".no" ; then
|
||||
AC_MSG_RESULT([disabled])
|
||||
m4_ifval($2,$2)
|
||||
else
|
||||
AC_MSG_RESULT([(testing)])
|
||||
AS_VAR_SET(saved_LIBS, $LIBS)
|
||||
if test ".$with_pcre" = "." && test "$ac_cv_lib_pcre_pcre_study" = "yes" ; then
|
||||
PCRE_LIBS="-lpcre"
|
||||
AC_MSG_CHECKING([lib pcre])
|
||||
AC_CHECK_LIB(pcre, pcre_study)
|
||||
AC_CHECK_HEADERS(pcre.h pcre/pcre.h)
|
||||
AC_MSG_RESULT([$PCRE_LIBS])
|
||||
m4_ifval($1,$1)
|
||||
else
|
||||
OLDLDFLAGS="$LDFLAGS" ; LDFLAGS="$LDFLAGS -L$with_pcre/lib"
|
||||
OLDCPPFLAGS="$CPPFLAGS" ; CPPFLAGS="$CPPFLAGS -I$with_pcre/include"
|
||||
AC_CHECK_LIB(pcre, pcre_compile)
|
||||
AC_CHECK_HEADERS(pcre.h pcre/pcre.h)
|
||||
CPPFLAGS="$OLDCPPFLAGS"
|
||||
LDFLAGS="$OLDLDFLAGS"
|
||||
if test "$ac_cv_lib_pcre_pcre_compile" = "yes" ; then
|
||||
AC_MSG_RESULT(.setting PCRE_LIBS -L$with_pcre/lib -lpcre)
|
||||
PCRE_LDFLAGS="-L$with_pcre/lib"
|
||||
PCRE_LIBS="-lpcre"
|
||||
test -d "$with_pcre/include" && PCRE_CFLAGS="-I$with_pcre/include"
|
||||
AC_MSG_CHECKING([lib pcre])
|
||||
AC_MSG_RESULT([$PCRE_LIBS])
|
||||
m4_ifval($1,$1)
|
||||
else
|
||||
AC_MSG_CHECKING([lib pcre])
|
||||
AC_MSG_RESULT([[no, (WARNING)]])
|
||||
m4_ifval($2,$2)
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
AC_SUBST([PCRE_LIBS])
|
||||
AC_SUBST([PCRE_CFLAGS])
|
||||
])
|
||||
|
@ -0,0 +1,51 @@
|
||||
/**
|
||||
* Copyright (c) 2022, Timothy Stack
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
* * Neither the name of Timothy Stack nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef lnav_console_into_hh
|
||||
#define lnav_console_into_hh
|
||||
|
||||
#include "intern_string.hh"
|
||||
#include "lnav.console.hh"
|
||||
|
||||
namespace lnav {
|
||||
namespace pcre2pp {
|
||||
|
||||
struct compile_error;
|
||||
|
||||
}
|
||||
|
||||
namespace console {
|
||||
|
||||
user_message to_user_message(intern_string_t src,
|
||||
const pcre2pp::compile_error& ce);
|
||||
|
||||
}
|
||||
} // namespace lnav
|
||||
|
||||
#endif
|
@ -0,0 +1,40 @@
|
||||
/**
|
||||
* Copyright (c) 2022, Timothy Stack
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
* * Neither the name of Timothy Stack nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef lnav_log_search_table_fwd_hh
|
||||
#define lnav_log_search_table_fwd_hh
|
||||
|
||||
#include "pcrepp/pcre2pp.hh"
|
||||
|
||||
namespace log_search_table_ns {
|
||||
static constexpr int PATTERN_OPTIONS
|
||||
= PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_DOTALL;
|
||||
}
|
||||
|
||||
#endif
|
@ -1,9 +1,16 @@
|
||||
add_library(pcrepp STATIC ../config.h.in pcrepp.hh pcrepp.cc)
|
||||
add_library(pcrepp STATIC
|
||||
../config.h.in
|
||||
pcre2pp.hh
|
||||
pcre2pp.cc)
|
||||
|
||||
target_include_directories(pcrepp PUBLIC . .. ../third-party/scnlib/include
|
||||
${CMAKE_CURRENT_BINARY_DIR}/..)
|
||||
target_link_libraries(pcrepp cppfmt pcre::libpcre)
|
||||
target_link_libraries(pcrepp cppfmt pcre::libpcre pcre2::pcre2)
|
||||
|
||||
add_executable(test_pcrepp test_pcrepp.cc)
|
||||
target_link_libraries(test_pcrepp pcrepp)
|
||||
add_test(NAME test_pcrepp COMMAND test_pcrepp)
|
||||
add_executable(test_pcre2pp test_pcre2pp.cc)
|
||||
target_include_directories(
|
||||
test_pcre2pp
|
||||
PUBLIC
|
||||
../third-party/doctest-root)
|
||||
target_link_libraries(test_pcre2pp pcrepp)
|
||||
add_test(NAME test_pcre2pp COMMAND test_pcre2pp)
|
||||
|
@ -0,0 +1,458 @@
|
||||
/**
|
||||
* Copyright (c) 2022, Timothy Stack
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
* * Neither the name of Timothy Stack nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* @file pcrepp.cc
|
||||
*/
|
||||
|
||||
#include "pcre2pp.hh"
|
||||
|
||||
#include "config.h"
|
||||
|
||||
namespace lnav {
|
||||
namespace pcre2pp {
|
||||
|
||||
std::string
|
||||
quote(const char* unquoted)
|
||||
{
|
||||
std::string retval;
|
||||
|
||||
for (int lpc = 0; unquoted[lpc]; lpc++) {
|
||||
if (isalnum(unquoted[lpc]) || unquoted[lpc] == '_'
|
||||
|| unquoted[lpc] & 0x80)
|
||||
{
|
||||
retval.push_back(unquoted[lpc]);
|
||||
} else {
|
||||
retval.push_back('\\');
|
||||
retval.push_back(unquoted[lpc]);
|
||||
}
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
match_data
|
||||
code::create_match_data() const
|
||||
{
|
||||
auto_mem<pcre2_match_data> md(pcre2_match_data_free);
|
||||
|
||||
md = pcre2_match_data_create_from_pattern(this->p_code, nullptr);
|
||||
|
||||
return match_data{std::move(md)};
|
||||
}
|
||||
|
||||
Result<code, compile_error>
|
||||
code::from(string_fragment sf, int options)
|
||||
{
|
||||
compile_error ce;
|
||||
auto_mem<pcre2_code> co(pcre2_code_free);
|
||||
|
||||
options |= PCRE2_UTF;
|
||||
co = pcre2_compile(
|
||||
sf.udata(), sf.length(), options, &ce.ce_code, &ce.ce_offset, nullptr);
|
||||
|
||||
if (co == nullptr) {
|
||||
ce.ce_pattern = sf.to_string();
|
||||
return Err(ce);
|
||||
}
|
||||
|
||||
auto jit_rc = pcre2_jit_compile(co, PCRE2_JIT_COMPLETE);
|
||||
if (jit_rc < 0) {
|
||||
// log_error("failed to JIT compile pattern: %d", jit_rc);
|
||||
}
|
||||
|
||||
return Ok(code{std::move(co), sf.to_string()});
|
||||
}
|
||||
|
||||
code::named_captures
|
||||
code::get_named_captures() const
|
||||
{
|
||||
named_captures retval;
|
||||
|
||||
pcre2_pattern_info(
|
||||
this->p_code.in(), PCRE2_INFO_NAMECOUNT, &retval.nc_count);
|
||||
pcre2_pattern_info(
|
||||
this->p_code.in(), PCRE2_INFO_NAMEENTRYSIZE, &retval.nc_entry_size);
|
||||
pcre2_pattern_info(
|
||||
this->p_code.in(), PCRE2_INFO_NAMETABLE, &retval.nc_name_table);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
size_t
|
||||
code::match_partial(string_fragment in) const
|
||||
{
|
||||
auto md = this->create_match_data();
|
||||
auto length = in.length();
|
||||
|
||||
do {
|
||||
auto rc = pcre2_match(this->p_code.in(),
|
||||
in.udata(),
|
||||
length,
|
||||
0,
|
||||
PCRE2_PARTIAL_HARD,
|
||||
md.md_data.in(),
|
||||
nullptr);
|
||||
|
||||
if (rc == PCRE2_ERROR_PARTIAL) {
|
||||
return md.md_ovector[1];
|
||||
}
|
||||
|
||||
if (length > 0) {
|
||||
length -= 1;
|
||||
}
|
||||
} while (length > 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char*
|
||||
code::get_name_for_capture(size_t index) const
|
||||
{
|
||||
for (const auto cap : this->get_named_captures()) {
|
||||
if (cap.get_index() == index) {
|
||||
return cap.get_name().data();
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size_t
|
||||
code::get_capture_count() const
|
||||
{
|
||||
uint32_t retval;
|
||||
|
||||
pcre2_pattern_info(this->p_code.in(), PCRE2_INFO_CAPTURECOUNT, &retval);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
std::vector<string_fragment>
|
||||
code::get_captures() const
|
||||
{
|
||||
bool in_class = false, in_escape = false, in_literal = false;
|
||||
auto pat_frag = string_fragment::from_str(this->p_pattern);
|
||||
std::vector<string_fragment> cap_in_progress;
|
||||
std::vector<string_fragment> retval;
|
||||
|
||||
for (int lpc = 0; this->p_pattern[lpc]; lpc++) {
|
||||
if (in_escape) {
|
||||
in_escape = false;
|
||||
if (this->p_pattern[lpc] == 'Q') {
|
||||
in_literal = true;
|
||||
}
|
||||
} else if (in_class) {
|
||||
if (this->p_pattern[lpc] == ']') {
|
||||
in_class = false;
|
||||
}
|
||||
if (this->p_pattern[lpc] == '\\') {
|
||||
in_escape = true;
|
||||
}
|
||||
} else if (in_literal) {
|
||||
if (this->p_pattern[lpc] == '\\' && this->p_pattern[lpc + 1] == 'E')
|
||||
{
|
||||
in_literal = false;
|
||||
lpc += 1;
|
||||
}
|
||||
} else {
|
||||
switch (this->p_pattern[lpc]) {
|
||||
case '\\':
|
||||
in_escape = true;
|
||||
break;
|
||||
case '[':
|
||||
in_class = true;
|
||||
break;
|
||||
case '(':
|
||||
cap_in_progress.emplace_back(pat_frag.sub_range(lpc, lpc));
|
||||
break;
|
||||
case ')': {
|
||||
if (!cap_in_progress.empty()) {
|
||||
static const auto DEFINE_SF
|
||||
= string_fragment::from_const("(?(DEFINE)");
|
||||
|
||||
auto& cap = cap_in_progress.back();
|
||||
char first = '\0', second = '\0', third = '\0';
|
||||
bool is_cap = false;
|
||||
|
||||
cap.sf_end = lpc + 1;
|
||||
if (cap.length() >= 2) {
|
||||
first = this->p_pattern[cap.sf_begin + 1];
|
||||
}
|
||||
if (cap.length() >= 3) {
|
||||
second = this->p_pattern[cap.sf_begin + 2];
|
||||
}
|
||||
if (cap.length() >= 4) {
|
||||
third = this->p_pattern[cap.sf_begin + 3];
|
||||
}
|
||||
if (cap.sf_begin >= 2) {
|
||||
auto poss_define = string_fragment::from_str_range(
|
||||
this->p_pattern, cap.sf_begin - 2, cap.sf_end);
|
||||
if (poss_define == DEFINE_SF) {
|
||||
cap_in_progress.pop_back();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (first == '?') {
|
||||
if (second == '\'') {
|
||||
is_cap = true;
|
||||
}
|
||||
if (second == '<'
|
||||
&& (isalpha(third) || third == '_'))
|
||||
{
|
||||
is_cap = true;
|
||||
}
|
||||
if (second == 'P' && third == '<') {
|
||||
is_cap = true;
|
||||
}
|
||||
} else if (first != '*') {
|
||||
is_cap = true;
|
||||
}
|
||||
if (is_cap) {
|
||||
retval.emplace_back(cap);
|
||||
}
|
||||
cap_in_progress.pop_back();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert((size_t) this->get_capture_count() == retval.size());
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
std::string
|
||||
code::replace(string_fragment str, const char* repl) const
|
||||
{
|
||||
std::string retval;
|
||||
std::string::size_type start = 0;
|
||||
string_fragment remaining = str;
|
||||
|
||||
auto md = this->create_match_data();
|
||||
while (remaining.is_valid()) {
|
||||
auto find_res = this->capture_from(str)
|
||||
.at(remaining)
|
||||
.into(md)
|
||||
.matches()
|
||||
.ignore_error();
|
||||
if (!find_res) {
|
||||
break;
|
||||
}
|
||||
auto all = find_res->f_all;
|
||||
remaining = find_res->f_remaining;
|
||||
bool in_escape = false;
|
||||
|
||||
retval.append(str.data(), start, (all.sf_begin - start));
|
||||
start = all.sf_end;
|
||||
for (int lpc = 0; repl[lpc]; lpc++) {
|
||||
auto ch = repl[lpc];
|
||||
|
||||
if (in_escape) {
|
||||
if (isdigit(ch)) {
|
||||
auto capture_index = (ch - '0');
|
||||
|
||||
if (capture_index < md.get_count()) {
|
||||
auto cap = md[capture_index];
|
||||
if (cap) {
|
||||
retval.append(cap->data(), cap->length());
|
||||
}
|
||||
} else if (capture_index > this->get_capture_count()) {
|
||||
retval.push_back('\\');
|
||||
retval.push_back(ch);
|
||||
}
|
||||
} else {
|
||||
if (ch != '\\') {
|
||||
retval.push_back('\\');
|
||||
}
|
||||
retval.push_back(ch);
|
||||
}
|
||||
in_escape = false;
|
||||
} else {
|
||||
switch (ch) {
|
||||
case '\\':
|
||||
in_escape = true;
|
||||
break;
|
||||
default:
|
||||
retval.push_back(ch);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (remaining.is_valid()) {
|
||||
retval.append(str.data(), remaining.sf_begin, std::string::npos);
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
int
|
||||
code::name_index(const char* name) const
|
||||
{
|
||||
return pcre2_substring_number_from_name(this->p_code.in(),
|
||||
(PCRE2_SPTR) name);
|
||||
}
|
||||
|
||||
size_t
|
||||
code::named_capture::get_index() const
|
||||
{
|
||||
return (this->nc_entry[0] << 8) | (this->nc_entry[1] & 0xff);
|
||||
}
|
||||
|
||||
string_fragment
|
||||
code::named_capture::get_name() const
|
||||
{
|
||||
return string_fragment::from_bytes(
|
||||
&this->nc_entry[2], strlen((const char*) &this->nc_entry[2]));
|
||||
}
|
||||
|
||||
code::named_capture
|
||||
code::named_captures::iterator::operator*() const
|
||||
{
|
||||
return code::named_capture{this->i_entry};
|
||||
}
|
||||
|
||||
code::named_captures::iterator&
|
||||
code::named_captures::iterator::operator++()
|
||||
{
|
||||
this->i_entry += this->i_entry_size;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool
|
||||
code::named_captures::iterator::operator==(const iterator& other) const
|
||||
{
|
||||
return this->i_entry == other.i_entry
|
||||
&& this->i_entry_size == other.i_entry_size;
|
||||
}
|
||||
|
||||
bool
|
||||
code::named_captures::iterator::operator!=(const iterator& other) const
|
||||
{
|
||||
return this->i_entry != other.i_entry
|
||||
|| this->i_entry_size != other.i_entry_size;
|
||||
}
|
||||
|
||||
code::named_captures::iterator
|
||||
code::named_captures::begin() const
|
||||
{
|
||||
return iterator{this->nc_entry_size, this->nc_name_table};
|
||||
}
|
||||
|
||||
code::named_captures::iterator
|
||||
code::named_captures::end() const
|
||||
{
|
||||
return iterator{
|
||||
this->nc_entry_size,
|
||||
this->nc_name_table + (this->nc_count * this->nc_entry_size),
|
||||
};
|
||||
}
|
||||
|
||||
matcher::matches_result
|
||||
matcher::matches(uint32_t options)
|
||||
{
|
||||
this->mb_input.i_offset = this->mb_input.i_next_offset;
|
||||
|
||||
if (this->mb_input.i_offset == -1) {
|
||||
return not_found{};
|
||||
}
|
||||
|
||||
auto rc = pcre2_match(this->mb_code.p_code.in(),
|
||||
this->mb_input.i_string.udata(),
|
||||
this->mb_input.i_string.length(),
|
||||
this->mb_input.i_offset,
|
||||
options,
|
||||
this->mb_match_data.md_data.in(),
|
||||
nullptr);
|
||||
|
||||
if (rc > 0) {
|
||||
this->mb_match_data.md_input = this->mb_input;
|
||||
this->mb_match_data.md_code = &this->mb_code;
|
||||
this->mb_match_data.md_capture_end = rc;
|
||||
if (this->mb_match_data[0]->empty()
|
||||
&& this->mb_match_data[0]->sf_end >= this->mb_input.i_string.sf_end)
|
||||
{
|
||||
this->mb_input.i_next_offset = -1;
|
||||
} else if (this->mb_match_data[0]->empty()) {
|
||||
this->mb_input.i_next_offset = this->mb_match_data[0]->sf_end + 1;
|
||||
} else {
|
||||
this->mb_input.i_next_offset = this->mb_match_data[0]->sf_end;
|
||||
}
|
||||
this->mb_match_data.md_input.i_next_offset
|
||||
= this->mb_input.i_next_offset;
|
||||
return found{
|
||||
this->mb_match_data[0].value(),
|
||||
this->mb_match_data.remaining(),
|
||||
};
|
||||
}
|
||||
|
||||
this->mb_match_data.md_input = this->mb_input;
|
||||
this->mb_match_data.md_ovector[0] = this->mb_input.i_offset;
|
||||
this->mb_match_data.md_ovector[1] = this->mb_input.i_offset;
|
||||
this->mb_match_data.md_capture_end = 1;
|
||||
if (rc == PCRE2_ERROR_NOMATCH) {
|
||||
return not_found{};
|
||||
}
|
||||
|
||||
return error{&this->mb_code, rc};
|
||||
}
|
||||
|
||||
void
|
||||
matcher::matches_result::handle_error(matcher::error err)
|
||||
{
|
||||
unsigned char buffer[1024];
|
||||
|
||||
pcre2_get_error_message(err.e_error_code, buffer, sizeof(buffer));
|
||||
// log_error("pcre2_match failure: %s", buffer);
|
||||
}
|
||||
|
||||
std::string
|
||||
compile_error::get_message() const
|
||||
{
|
||||
unsigned char buffer[1024];
|
||||
|
||||
pcre2_get_error_message(this->ce_code, buffer, sizeof(buffer));
|
||||
|
||||
return {(const char*) buffer};
|
||||
}
|
||||
|
||||
std::string
|
||||
matcher::error::get_message()
|
||||
{
|
||||
unsigned char buffer[1024];
|
||||
|
||||
pcre2_get_error_message(this->e_error_code, buffer, sizeof(buffer));
|
||||
|
||||
return {(const char*) buffer};
|
||||
}
|
||||
|
||||
} // namespace pcre2pp
|
||||
} // namespace lnav
|
@ -0,0 +1,368 @@
|
||||
/**
|
||||
* Copyright (c) 2022, Timothy Stack
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
* * Neither the name of Timothy Stack nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef lnav_pcre2pp_hh
|
||||
#define lnav_pcre2pp_hh
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <pcre2.h>
|
||||
|
||||
#include "base/auto_mem.hh"
|
||||
#include "base/intern_string.hh"
|
||||
#include "base/result.h"
|
||||
#include "mapbox/variant.hpp"
|
||||
|
||||
namespace lnav {
|
||||
namespace pcre2pp {
|
||||
|
||||
std::string quote(const char* unquoted);
|
||||
|
||||
inline std::string
|
||||
quote(const std::string& unquoted)
|
||||
{
|
||||
return quote(unquoted.c_str());
|
||||
}
|
||||
|
||||
class code;
|
||||
struct capture_builder;
|
||||
class matcher;
|
||||
|
||||
struct input {
|
||||
string_fragment i_string;
|
||||
int i_offset{0};
|
||||
int i_next_offset{0};
|
||||
};
|
||||
|
||||
class match_data {
|
||||
public:
|
||||
static match_data unitialized() { return match_data{}; }
|
||||
|
||||
string_fragment leading() const
|
||||
{
|
||||
return this->md_input.i_string.sub_range(this->md_input.i_offset,
|
||||
this->md_ovector[0]);
|
||||
}
|
||||
|
||||
string_fragment remaining() const
|
||||
{
|
||||
if (this->md_capture_end == 0 || this->md_input.i_next_offset == -1) {
|
||||
return string_fragment::invalid();
|
||||
}
|
||||
|
||||
return string_fragment::from_byte_range(
|
||||
this->md_input.i_string.sf_string,
|
||||
this->md_input.i_next_offset,
|
||||
this->md_input.i_string.sf_end);
|
||||
}
|
||||
|
||||
nonstd::optional<string_fragment> operator[](size_t index) const
|
||||
{
|
||||
if (index >= this->md_capture_end) {
|
||||
return nonstd::nullopt;
|
||||
}
|
||||
|
||||
auto start = this->md_ovector[(index * 2)];
|
||||
auto stop = this->md_ovector[(index * 2) + 1];
|
||||
if (start == PCRE2_UNSET || stop == PCRE2_UNSET) {
|
||||
return nonstd::nullopt;
|
||||
}
|
||||
|
||||
return this->md_input.i_string.sub_range(start, stop);
|
||||
}
|
||||
|
||||
template<typename T, std::size_t N>
|
||||
nonstd::optional<string_fragment> operator[](const T (&name)[N]) const;
|
||||
|
||||
int get_count() const { return this->md_capture_end; }
|
||||
|
||||
private:
|
||||
friend matcher;
|
||||
friend code;
|
||||
|
||||
match_data() = default;
|
||||
|
||||
explicit match_data(auto_mem<pcre2_match_data> dat)
|
||||
: md_data(std::move(dat)),
|
||||
md_ovector(pcre2_get_ovector_pointer(this->md_data.in())),
|
||||
md_ovector_count(pcre2_get_ovector_count(this->md_data.in()))
|
||||
{
|
||||
}
|
||||
|
||||
auto_mem<pcre2_match_data> md_data;
|
||||
const code* md_code{nullptr};
|
||||
input md_input;
|
||||
PCRE2_SIZE* md_ovector{nullptr};
|
||||
uint32_t md_ovector_count{0};
|
||||
int md_capture_end{0};
|
||||
};
|
||||
|
||||
class matcher {
|
||||
public:
|
||||
struct found {
|
||||
string_fragment f_all;
|
||||
string_fragment f_remaining;
|
||||
};
|
||||
struct not_found {};
|
||||
struct error {
|
||||
const code* e_code{nullptr};
|
||||
int e_error_code{0};
|
||||
std::string get_message();
|
||||
};
|
||||
|
||||
class matches_result
|
||||
: public mapbox::util::variant<found, not_found, error> {
|
||||
public:
|
||||
using variant::variant;
|
||||
|
||||
nonstd::optional<found> ignore_error()
|
||||
{
|
||||
return this->match(
|
||||
[](found fo) { return nonstd::make_optional(fo); },
|
||||
[](not_found) { return nonstd::nullopt; },
|
||||
[](error err) {
|
||||
handle_error(err);
|
||||
return nonstd::nullopt;
|
||||
});
|
||||
}
|
||||
|
||||
private:
|
||||
static void handle_error(error err);
|
||||
};
|
||||
|
||||
matcher& reload_input(string_fragment sf, int next_offset)
|
||||
{
|
||||
this->mb_input = input{sf, next_offset, next_offset};
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
matches_result matches(uint32_t options = 0);
|
||||
|
||||
int get_next_offset() const { return this->mb_input.i_next_offset; }
|
||||
|
||||
private:
|
||||
friend capture_builder;
|
||||
|
||||
matcher(const code& co, input& in, match_data& md)
|
||||
: mb_code(co), mb_input(in), mb_match_data(md)
|
||||
{
|
||||
}
|
||||
|
||||
const code& mb_code;
|
||||
input mb_input;
|
||||
match_data& mb_match_data;
|
||||
};
|
||||
|
||||
struct capture_builder {
|
||||
const code& mb_code;
|
||||
input mb_input;
|
||||
|
||||
capture_builder at(const string_fragment& remaining) &&
|
||||
{
|
||||
this->mb_input.i_offset = this->mb_input.i_next_offset
|
||||
= remaining.sf_begin;
|
||||
return *this;
|
||||
}
|
||||
|
||||
matcher into(match_data& md) &&
|
||||
{
|
||||
return matcher{
|
||||
this->mb_code,
|
||||
this->mb_input,
|
||||
md,
|
||||
};
|
||||
}
|
||||
|
||||
template<uint32_t Options = 0, typename F>
|
||||
Result<string_fragment, matcher::error> for_each(F func) &&;
|
||||
};
|
||||
|
||||
struct compile_error {
|
||||
std::string ce_pattern;
|
||||
int ce_code{0};
|
||||
size_t ce_offset{0};
|
||||
|
||||
std::string get_message() const;
|
||||
};
|
||||
|
||||
class code {
|
||||
public:
|
||||
class named_capture {
|
||||
public:
|
||||
size_t get_index() const;
|
||||
string_fragment get_name() const;
|
||||
|
||||
PCRE2_SPTR nc_entry;
|
||||
};
|
||||
|
||||
class named_captures {
|
||||
public:
|
||||
struct iterator {
|
||||
named_capture operator*() const;
|
||||
iterator& operator++();
|
||||
bool operator==(const iterator& other) const;
|
||||
bool operator!=(const iterator& other) const;
|
||||
|
||||
uint32_t i_entry_size;
|
||||
PCRE2_SPTR i_entry;
|
||||
};
|
||||
|
||||
iterator begin() const;
|
||||
iterator end() const;
|
||||
bool empty() const { return this->nc_count == 0; }
|
||||
size_t size() const { return this->nc_count; }
|
||||
|
||||
private:
|
||||
friend code;
|
||||
|
||||
named_captures() = default;
|
||||
|
||||
uint32_t nc_count{0};
|
||||
uint32_t nc_entry_size{0};
|
||||
PCRE2_SPTR nc_name_table{nullptr};
|
||||
};
|
||||
|
||||
static Result<code, compile_error> from(string_fragment sf,
|
||||
int options = 0);
|
||||
|
||||
template<typename T, std::size_t N>
|
||||
static code from_const(const T (&str)[N], int options = 0)
|
||||
{
|
||||
return from(string_fragment::from_const(str), options).unwrap();
|
||||
}
|
||||
|
||||
const std::string& get_pattern() const { return this->p_pattern; }
|
||||
|
||||
named_captures get_named_captures() const;
|
||||
|
||||
const char* get_name_for_capture(size_t index) const;
|
||||
|
||||
size_t get_capture_count() const;
|
||||
|
||||
int name_index(const char* name) const;
|
||||
|
||||
std::vector<string_fragment> get_captures() const;
|
||||
|
||||
match_data create_match_data() const;
|
||||
|
||||
capture_builder capture_from(string_fragment in) const
|
||||
{
|
||||
return capture_builder{
|
||||
*this,
|
||||
input{in},
|
||||
};
|
||||
}
|
||||
|
||||
matcher::matches_result find_in(string_fragment in,
|
||||
uint32_t options = 0) const
|
||||
{
|
||||
static thread_local match_data md = this->create_match_data();
|
||||
|
||||
if (md.md_ovector_count < this->p_match_proto.md_ovector_count) {
|
||||
md = this->create_match_data();
|
||||
}
|
||||
|
||||
return this->capture_from(in).into(md).matches(options);
|
||||
}
|
||||
|
||||
size_t match_partial(string_fragment in) const;
|
||||
|
||||
std::string replace(string_fragment str, const char* repl) const;
|
||||
|
||||
std::shared_ptr<code> to_shared() &&
|
||||
{
|
||||
return std::make_shared<code>(std::move(this->p_code),
|
||||
std::move(this->p_pattern));
|
||||
}
|
||||
|
||||
code(auto_mem<pcre2_code> code, std::string pattern)
|
||||
: p_code(std::move(code)), p_pattern(std::move(pattern)),
|
||||
p_match_proto(this->create_match_data())
|
||||
{
|
||||
}
|
||||
|
||||
private:
|
||||
friend matcher;
|
||||
friend match_data;
|
||||
|
||||
auto_mem<pcre2_code> p_code;
|
||||
std::string p_pattern;
|
||||
match_data p_match_proto;
|
||||
};
|
||||
|
||||
template<typename T, std::size_t N>
|
||||
nonstd::optional<string_fragment>
|
||||
match_data::operator[](const T (&name)[N]) const
|
||||
{
|
||||
auto index = pcre2_substring_number_from_name(
|
||||
this->md_code->p_code.in(),
|
||||
reinterpret_cast<const unsigned char*>(name));
|
||||
|
||||
return this->operator[](index);
|
||||
}
|
||||
|
||||
template<uint32_t Options, typename F>
|
||||
Result<string_fragment, matcher::error>
|
||||
capture_builder::for_each(F func) &&
|
||||
{
|
||||
auto md = this->mb_code.create_match_data();
|
||||
auto mat = matcher{this->mb_code, this->mb_input, md};
|
||||
|
||||
bool done = false;
|
||||
matcher::error eret;
|
||||
|
||||
while (!done) {
|
||||
auto match_res = mat.matches(Options);
|
||||
done = match_res.match(
|
||||
[mat, &func](matcher::found) {
|
||||
func(mat.mb_match_data);
|
||||
return false;
|
||||
},
|
||||
[](matcher::not_found) { return true; },
|
||||
[&eret](matcher::error err) {
|
||||
eret = err;
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
if (eret.e_error_code == 0) {
|
||||
return Ok(md.remaining());
|
||||
}
|
||||
return Err(eret);
|
||||
}
|
||||
|
||||
} // namespace pcre2pp
|
||||
} // namespace lnav
|
||||
|
||||
#endif
|
@ -1,453 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2007-2012, Timothy Stack
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
* * Neither the name of Timothy Stack nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* @file pcrepp.cc
|
||||
*/
|
||||
|
||||
#include "pcrepp.hh"
|
||||
|
||||
const int JIT_STACK_MIN_SIZE = 32 * 1024;
|
||||
const int JIT_STACK_MAX_SIZE = 512 * 1024;
|
||||
|
||||
pcre_context::capture_t*
|
||||
pcre_context::operator[](const char* name) const
|
||||
{
|
||||
capture_t* retval = nullptr;
|
||||
auto index = this->pc_pcre->name_index(name);
|
||||
if (index != PCRE_ERROR_NOSUBSTRING) {
|
||||
retval = &this->pc_captures[index + 1];
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
pcre_context::capture_t*
|
||||
pcre_context::first_valid() const
|
||||
{
|
||||
for (int lpc = 1; lpc < this->pc_count; lpc++) {
|
||||
if (this->pc_captures[lpc].is_valid()) {
|
||||
return &this->pc_captures[lpc];
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::string
|
||||
pcrepp::quote(const char* unquoted)
|
||||
{
|
||||
std::string retval;
|
||||
|
||||
for (int lpc = 0; unquoted[lpc]; lpc++) {
|
||||
if (isalnum(unquoted[lpc]) || unquoted[lpc] == '_'
|
||||
|| unquoted[lpc] & 0x80)
|
||||
{
|
||||
retval.push_back(unquoted[lpc]);
|
||||
} else {
|
||||
retval.push_back('\\');
|
||||
retval.push_back(unquoted[lpc]);
|
||||
}
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
Result<pcrepp, pcrepp::compile_error>
|
||||
pcrepp::from_str(std::string pattern, int options)
|
||||
{
|
||||
const char* errptr;
|
||||
int eoff;
|
||||
auto* code = pcre_compile(
|
||||
pattern.c_str(), options | PCRE_UTF8, &errptr, &eoff, nullptr);
|
||||
|
||||
if (!code) {
|
||||
return Err(compile_error{errptr, eoff});
|
||||
}
|
||||
|
||||
return Ok(pcrepp(std::move(pattern), code));
|
||||
}
|
||||
|
||||
Result<std::shared_ptr<pcrepp>, pcrepp::compile_error>
|
||||
pcrepp::shared_from_str(std::string pattern, int options)
|
||||
{
|
||||
const char* errptr;
|
||||
int eoff;
|
||||
auto* code = pcre_compile(
|
||||
pattern.c_str(), options | PCRE_UTF8, &errptr, &eoff, nullptr);
|
||||
|
||||
if (!code) {
|
||||
return Err(compile_error{errptr, eoff});
|
||||
}
|
||||
|
||||
return Ok(std::make_shared<pcrepp>(std::move(pattern), code));
|
||||
}
|
||||
|
||||
void
|
||||
pcrepp::find_captures(const char* pattern)
|
||||
{
|
||||
bool in_class = false, in_escape = false, in_literal = false;
|
||||
std::vector<pcre_context::capture_t> cap_in_progress;
|
||||
|
||||
for (int lpc = 0; pattern[lpc]; lpc++) {
|
||||
if (in_escape) {
|
||||
in_escape = false;
|
||||
if (pattern[lpc] == 'Q') {
|
||||
in_literal = true;
|
||||
}
|
||||
} else if (in_class) {
|
||||
if (pattern[lpc] == ']') {
|
||||
in_class = false;
|
||||
}
|
||||
if (pattern[lpc] == '\\') {
|
||||
in_escape = true;
|
||||
}
|
||||
} else if (in_literal) {
|
||||
if (pattern[lpc] == '\\' && pattern[lpc + 1] == 'E') {
|
||||
in_literal = false;
|
||||
lpc += 1;
|
||||
}
|
||||
} else {
|
||||
switch (pattern[lpc]) {
|
||||
case '\\':
|
||||
in_escape = true;
|
||||
break;
|
||||
case '[':
|
||||
in_class = true;
|
||||
break;
|
||||
case '(':
|
||||
cap_in_progress.emplace_back(lpc, lpc);
|
||||
break;
|
||||
case ')': {
|
||||
if (!cap_in_progress.empty()) {
|
||||
static const auto DEFINE_SF
|
||||
= string_fragment::from_const("(?(DEFINE)");
|
||||
|
||||
auto& cap = cap_in_progress.back();
|
||||
char first = '\0', second = '\0', third = '\0';
|
||||
bool is_cap = false;
|
||||
|
||||
cap.c_end = lpc + 1;
|
||||
if (cap.length() >= 2) {
|
||||
first = pattern[cap.c_begin + 1];
|
||||
}
|
||||
if (cap.length() >= 3) {
|
||||
second = pattern[cap.c_begin + 2];
|
||||
}
|
||||
if (cap.length() >= 4) {
|
||||
third = pattern[cap.c_begin + 3];
|
||||
}
|
||||
if (cap.c_begin >= 2) {
|
||||
auto poss_define = string_fragment::from_byte_range(
|
||||
pattern, cap.c_begin - 2, cap.c_end);
|
||||
if (poss_define == DEFINE_SF) {
|
||||
cap_in_progress.pop_back();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (first == '?') {
|
||||
if (second == '\'') {
|
||||
is_cap = true;
|
||||
}
|
||||
if (second == '<'
|
||||
&& (isalpha(third) || third == '_'))
|
||||
{
|
||||
is_cap = true;
|
||||
}
|
||||
if (second == 'P' && third == '<') {
|
||||
is_cap = true;
|
||||
}
|
||||
} else if (first != '*') {
|
||||
is_cap = true;
|
||||
}
|
||||
if (is_cap) {
|
||||
this->p_captures.push_back(cap);
|
||||
}
|
||||
cap_in_progress.pop_back();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert((size_t) this->p_capture_count == this->p_captures.size());
|
||||
}
|
||||
|
||||
bool
|
||||
pcrepp::match(pcre_context& pc, pcre_input& pi, int options) const
|
||||
{
|
||||
int length, startoffset, filtered_options = options;
|
||||
int count = pc.get_max_count();
|
||||
const char* str;
|
||||
int rc;
|
||||
|
||||
pc.set_pcrepp(this);
|
||||
pi.pi_offset = pi.pi_next_offset;
|
||||
|
||||
str = pi.get_string();
|
||||
if (filtered_options & PCRE_ANCHORED) {
|
||||
filtered_options &= ~PCRE_ANCHORED;
|
||||
str = &str[pi.pi_offset];
|
||||
startoffset = 0;
|
||||
length = pi.pi_length - pi.pi_offset;
|
||||
} else {
|
||||
startoffset = pi.pi_offset;
|
||||
length = pi.pi_length;
|
||||
}
|
||||
rc = pcre_exec(this->p_code,
|
||||
this->p_code_extra.in(),
|
||||
str,
|
||||
length,
|
||||
startoffset,
|
||||
filtered_options,
|
||||
(int*) pc.all(),
|
||||
count * 2);
|
||||
|
||||
if (rc < 0) {
|
||||
switch (rc) {
|
||||
case PCRE_ERROR_NOMATCH:
|
||||
break;
|
||||
case PCRE_ERROR_PARTIAL:
|
||||
pc.set_count(1);
|
||||
return true;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else if (rc == 0) {
|
||||
rc = 0;
|
||||
} else if (pc.all()->c_begin == pc.all()->c_end) {
|
||||
rc = 0;
|
||||
if (pi.pi_next_offset + 1 < pi.pi_length) {
|
||||
pi.pi_next_offset += 1;
|
||||
}
|
||||
} else {
|
||||
if (options & PCRE_ANCHORED) {
|
||||
for (int lpc = 0; lpc < rc; lpc++) {
|
||||
if (pc.all()[lpc].c_begin == -1) {
|
||||
continue;
|
||||
}
|
||||
pc.all()[lpc].c_begin += pi.pi_offset;
|
||||
pc.all()[lpc].c_end += pi.pi_offset;
|
||||
}
|
||||
}
|
||||
pi.pi_next_offset = pc.all()->c_end;
|
||||
}
|
||||
|
||||
pc.set_count(rc);
|
||||
|
||||
return rc > 0;
|
||||
}
|
||||
|
||||
std::string
|
||||
pcrepp::replace(const char* str, const char* repl) const
|
||||
{
|
||||
pcre_context_static<30> pc;
|
||||
pcre_input pi(str);
|
||||
std::string retval;
|
||||
std::string::size_type start = 0;
|
||||
|
||||
while (pi.pi_offset < pi.pi_length) {
|
||||
this->match(pc, pi);
|
||||
auto all = pc.all();
|
||||
bool in_escape = false;
|
||||
|
||||
if (pc.get_count() < 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
retval.append(str, start, (all->c_begin - start));
|
||||
start = all->c_end;
|
||||
for (int lpc = 0; repl[lpc]; lpc++) {
|
||||
auto ch = repl[lpc];
|
||||
|
||||
if (in_escape) {
|
||||
if (isdigit(ch)) {
|
||||
auto capture_index = (ch - '0');
|
||||
|
||||
if (capture_index < pc.get_count()) {
|
||||
retval.append(pi.get_substr_start(&all[capture_index]),
|
||||
pi.get_substr_len(&all[capture_index]));
|
||||
} else if (capture_index > this->p_capture_count) {
|
||||
retval.push_back('\\');
|
||||
retval.push_back(ch);
|
||||
}
|
||||
} else {
|
||||
if (ch != '\\') {
|
||||
retval.push_back('\\');
|
||||
}
|
||||
retval.push_back(ch);
|
||||
}
|
||||
in_escape = false;
|
||||
} else {
|
||||
switch (ch) {
|
||||
case '\\':
|
||||
in_escape = true;
|
||||
break;
|
||||
default:
|
||||
retval.push_back(ch);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
retval.append(str, start, std::string::npos);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
void
|
||||
pcrepp::study()
|
||||
{
|
||||
const char* errptr;
|
||||
|
||||
this->p_code_extra = pcre_study(this->p_code,
|
||||
#ifdef PCRE_STUDY_JIT_COMPILE
|
||||
PCRE_STUDY_JIT_COMPILE,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
&errptr);
|
||||
if (!this->p_code_extra && errptr) {
|
||||
// log_error("pcre_study error: %s", errptr);
|
||||
}
|
||||
if (this->p_code_extra != nullptr) {
|
||||
pcre_extra* extra = this->p_code_extra;
|
||||
|
||||
extra->flags
|
||||
|= (PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION);
|
||||
extra->match_limit = 10000;
|
||||
extra->match_limit_recursion = 500;
|
||||
#ifdef PCRE_STUDY_JIT_COMPILE
|
||||
// pcre_assign_jit_stack(extra, nullptr, jit_stack());
|
||||
#endif
|
||||
}
|
||||
pcre_fullinfo(
|
||||
this->p_code, this->p_code_extra, PCRE_INFO_OPTIONS, &this->p_options);
|
||||
pcre_fullinfo(this->p_code,
|
||||
this->p_code_extra,
|
||||
PCRE_INFO_CAPTURECOUNT,
|
||||
&this->p_capture_count);
|
||||
pcre_fullinfo(this->p_code,
|
||||
this->p_code_extra,
|
||||
PCRE_INFO_NAMECOUNT,
|
||||
&this->p_named_count);
|
||||
pcre_fullinfo(this->p_code,
|
||||
this->p_code_extra,
|
||||
PCRE_INFO_NAMEENTRYSIZE,
|
||||
&this->p_name_len);
|
||||
pcre_fullinfo(this->p_code,
|
||||
this->p_code_extra,
|
||||
PCRE_INFO_NAMETABLE,
|
||||
&this->p_named_entries);
|
||||
}
|
||||
|
||||
#ifdef PCRE_STUDY_JIT_COMPILE
|
||||
pcre_jit_stack*
|
||||
pcrepp::jit_stack()
|
||||
{
|
||||
static pcre_jit_stack* retval = nullptr;
|
||||
|
||||
if (retval == nullptr) {
|
||||
retval = pcre_jit_stack_alloc(JIT_STACK_MIN_SIZE, JIT_STACK_MAX_SIZE);
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
size_t
|
||||
pcrepp::match_partial(pcre_input& pi) const
|
||||
{
|
||||
size_t length = pi.pi_length;
|
||||
int rc;
|
||||
|
||||
do {
|
||||
rc = pcre_exec(this->p_code,
|
||||
this->p_code_extra.in(),
|
||||
pi.get_string(),
|
||||
length,
|
||||
pi.pi_offset,
|
||||
PCRE_PARTIAL,
|
||||
nullptr,
|
||||
0);
|
||||
switch (rc) {
|
||||
case 0:
|
||||
case PCRE_ERROR_PARTIAL:
|
||||
return length;
|
||||
}
|
||||
if (length > 0) {
|
||||
length -= 1;
|
||||
}
|
||||
} while (length > 0);
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
const char*
|
||||
pcrepp::name_for_capture(int index) const
|
||||
{
|
||||
for (pcre_named_capture::iterator iter = this->named_begin();
|
||||
iter != this->named_end();
|
||||
++iter)
|
||||
{
|
||||
if (iter->index() == index) {
|
||||
return iter->pnc_name;
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
int
|
||||
pcrepp::name_index(const char* name) const
|
||||
{
|
||||
int retval = pcre_get_stringnumber(this->p_code, name);
|
||||
|
||||
if (retval == PCRE_ERROR_NOSUBSTRING) {
|
||||
return retval;
|
||||
}
|
||||
|
||||
return retval - 1;
|
||||
}
|
||||
|
||||
#else
|
||||
# warning "pcrejit is not available, search performance will be degraded"
|
||||
|
||||
void
|
||||
pcrepp::pcre_free_study(pcre_extra* extra)
|
||||
{
|
||||
free(extra);
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
pcre_context::capture_t::ltrim(const char* str)
|
||||
{
|
||||
while (this->c_begin < this->c_end && isspace(str[this->c_begin])) {
|
||||
this->c_begin += 1;
|
||||
}
|
||||
}
|
@ -1,617 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2007-2013, Timothy Stack
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
* * Neither the name of Timothy Stack nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* @file pcrepp.hh
|
||||
*
|
||||
* A C++ adapter for the pcre library. The interface provided here has a
|
||||
* different focus than the pcrecpp.h file included in the pcre distribution.
|
||||
* The standard pcrecpp.h interface is more concerned with regular expressions
|
||||
* that are digesting data to be used within the program itself. Whereas this
|
||||
* interface is dealing with regular expression entered by the user and
|
||||
* processing a series of matches on text files.
|
||||
*/
|
||||
|
||||
#ifndef pcrepp_hh
|
||||
#define pcrepp_hh
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#ifdef HAVE_PCRE_H
|
||||
# include <pcre.h>
|
||||
#elif HAVE_PCRE_PCRE_H
|
||||
# include <pcre/pcre.h>
|
||||
#else
|
||||
# error "pcre.h not found?"
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
#include <exception>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "base/auto_mem.hh"
|
||||
#include "base/intern_string.hh"
|
||||
#include "base/result.h"
|
||||
#include "scn/util/string_view.h"
|
||||
|
||||
class pcrepp;
|
||||
|
||||
/**
|
||||
* Context that tracks captures found during a match operation. This class is a
|
||||
* base that defines iterator methods and fields, but does not allocate space
|
||||
* for the capture array.
|
||||
*/
|
||||
class pcre_context {
|
||||
public:
|
||||
struct capture_t {
|
||||
capture_t()
|
||||
{ /* We don't initialize anything since it's a perf hit. */
|
||||
}
|
||||
|
||||
capture_t(int begin, int end) : c_begin(begin), c_end(end)
|
||||
{
|
||||
assert(begin <= end);
|
||||
}
|
||||
|
||||
int c_begin;
|
||||
int c_end;
|
||||
|
||||
void ltrim(const char* str);
|
||||
|
||||
bool contains(int pos) const
|
||||
{
|
||||
return this->c_begin <= pos && pos < this->c_end;
|
||||
}
|
||||
|
||||
bool is_valid() const { return this->c_begin != -1; }
|
||||
|
||||
int length() const { return this->c_end - this->c_begin; }
|
||||
|
||||
bool empty() const { return this->c_begin == this->c_end; }
|
||||
};
|
||||
using iterator = capture_t*;
|
||||
using const_iterator = const capture_t*;
|
||||
|
||||
/** @return The maximum number of strings this context can capture. */
|
||||
int get_max_count() const { return this->pc_max_count; }
|
||||
|
||||
void set_count(int count) { this->pc_count = count; }
|
||||
|
||||
int get_count() const { return this->pc_count; }
|
||||
|
||||
void set_pcrepp(const pcrepp* src) { this->pc_pcre = src; }
|
||||
|
||||
/**
|
||||
* @return a capture_t that covers all of the text that was matched.
|
||||
*/
|
||||
capture_t* all() const { return pc_captures; }
|
||||
|
||||
/** @return An iterator to the first capture. */
|
||||
iterator begin() { return pc_captures + 1; }
|
||||
/** @return An iterator that refers to the end of the capture array. */
|
||||
iterator end() { return pc_captures + pc_count; };
|
||||
|
||||
capture_t* operator[](int offset) const
|
||||
{
|
||||
if (offset < 0) {
|
||||
return nullptr;
|
||||
}
|
||||
return &this->pc_captures[offset + 1];
|
||||
}
|
||||
|
||||
capture_t* operator[](const char* name) const;
|
||||
|
||||
capture_t* operator[](const std::string& name) const
|
||||
{
|
||||
return (*this)[name.c_str()];
|
||||
}
|
||||
|
||||
capture_t* first_valid() const;
|
||||
|
||||
protected:
|
||||
pcre_context(capture_t* captures, int max_count)
|
||||
: pc_captures(captures), pc_max_count(max_count)
|
||||
{
|
||||
}
|
||||
|
||||
const pcrepp* pc_pcre{nullptr};
|
||||
capture_t* pc_captures;
|
||||
int pc_max_count;
|
||||
int pc_count{0};
|
||||
};
|
||||
|
||||
struct capture_if_not {
|
||||
capture_if_not(int begin) : cin_begin(begin) {}
|
||||
|
||||
bool operator()(const pcre_context::capture_t& cap) const
|
||||
{
|
||||
return cap.c_begin != this->cin_begin;
|
||||
}
|
||||
|
||||
int cin_begin;
|
||||
};
|
||||
|
||||
/**
|
||||
* A pcre_context that allocates storage for the capture array within the object
|
||||
* itself.
|
||||
*/
|
||||
template<size_t MAX_COUNT>
|
||||
class pcre_context_static : public pcre_context {
|
||||
public:
|
||||
pcre_context_static()
|
||||
: pcre_context(this->pc_match_buffer, MAX_COUNT + 1){};
|
||||
|
||||
private:
|
||||
capture_t pc_match_buffer[MAX_COUNT + 1];
|
||||
};
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
class pcre_input {
|
||||
public:
|
||||
pcre_input(const char* str, size_t off = 0, size_t len = -1)
|
||||
: pi_offset(off), pi_next_offset(off), pi_length(len), pi_string(str)
|
||||
{
|
||||
if (this->pi_length == (size_t) -1) {
|
||||
this->pi_length = strlen(str);
|
||||
}
|
||||
}
|
||||
|
||||
pcre_input(const string_fragment& s)
|
||||
: pi_offset(0), pi_next_offset(0), pi_length(s.length()),
|
||||
pi_string(s.data())
|
||||
{
|
||||
}
|
||||
|
||||
pcre_input(const intern_string_t& s)
|
||||
: pi_offset(0), pi_next_offset(0), pi_length(s.size()),
|
||||
pi_string(s.get())
|
||||
{
|
||||
}
|
||||
|
||||
pcre_input(const string_fragment&&) = delete;
|
||||
|
||||
pcre_input(const std::string& str, size_t off = 0)
|
||||
: pi_offset(off), pi_next_offset(off), pi_length(str.length()),
|
||||
pi_string(str.c_str())
|
||||
{
|
||||
}
|
||||
|
||||
pcre_input(const std::string&&, size_t off = 0) = delete;
|
||||
|
||||
const char* get_string() const { return this->pi_string; }
|
||||
|
||||
const char* get_substr_start(pcre_context::const_iterator iter) const
|
||||
{
|
||||
return &this->pi_string[iter->c_begin];
|
||||
}
|
||||
|
||||
size_t get_substr_len(pcre_context::const_iterator iter) const
|
||||
{
|
||||
return iter->length();
|
||||
}
|
||||
|
||||
std::string get_substr(pcre_context::const_iterator iter) const
|
||||
{
|
||||
if (iter->c_begin == -1) {
|
||||
return "";
|
||||
}
|
||||
return std::string(&this->pi_string[iter->c_begin], iter->length());
|
||||
}
|
||||
|
||||
intern_string_t get_substr_i(pcre_context::const_iterator iter) const
|
||||
{
|
||||
return intern_string::lookup(&this->pi_string[iter->c_begin],
|
||||
iter->length());
|
||||
}
|
||||
|
||||
string_fragment get_string_fragment(pcre_context::const_iterator iter) const
|
||||
{
|
||||
return string_fragment::from_byte_range(
|
||||
this->pi_string, iter->c_begin, iter->c_end);
|
||||
}
|
||||
|
||||
string_fragment get_up_to(pcre_context::const_iterator iter) const
|
||||
{
|
||||
return string_fragment::from_byte_range(
|
||||
this->pi_string, this->pi_offset, iter->c_begin);
|
||||
}
|
||||
|
||||
nonstd::optional<std::string> get_substr_opt(
|
||||
pcre_context::const_iterator iter) const
|
||||
{
|
||||
if (iter->is_valid()) {
|
||||
return std::string(&this->pi_string[iter->c_begin], iter->length());
|
||||
}
|
||||
|
||||
return nonstd::nullopt;
|
||||
}
|
||||
|
||||
scn::string_view to_string_view(pcre_context::const_iterator iter) const
|
||||
{
|
||||
return scn::string_view{
|
||||
&this->pi_string[iter->c_begin],
|
||||
&this->pi_string[iter->c_end],
|
||||
};
|
||||
}
|
||||
|
||||
void get_substr(pcre_context::const_iterator iter, char* dst) const
|
||||
{
|
||||
memcpy(dst, &this->pi_string[iter->c_begin], iter->length());
|
||||
dst[iter->length()] = '\0';
|
||||
}
|
||||
|
||||
void reset_next_offset() { this->pi_next_offset = this->pi_offset; }
|
||||
|
||||
void reset(const char* str, size_t off = 0, size_t len = -1)
|
||||
{
|
||||
this->pi_string = str;
|
||||
this->pi_offset = off;
|
||||
this->pi_next_offset = off;
|
||||
if (this->pi_length == (size_t) -1) {
|
||||
this->pi_length = strlen(str);
|
||||
} else {
|
||||
this->pi_length = len;
|
||||
}
|
||||
}
|
||||
|
||||
void reset(const std::string& str, size_t off = 0)
|
||||
{
|
||||
this->reset(str.c_str(), off, str.length());
|
||||
}
|
||||
|
||||
size_t pi_offset;
|
||||
size_t pi_next_offset;
|
||||
size_t pi_length;
|
||||
|
||||
private:
|
||||
const char* pi_string;
|
||||
};
|
||||
|
||||
struct pcre_named_capture {
|
||||
class iterator {
|
||||
public:
|
||||
iterator(pcre_named_capture* pnc, size_t name_len)
|
||||
: i_named_capture(pnc), i_name_len(name_len)
|
||||
{
|
||||
}
|
||||
|
||||
iterator() : i_named_capture(nullptr), i_name_len(0) {}
|
||||
|
||||
const pcre_named_capture& operator*() const
|
||||
{
|
||||
return *this->i_named_capture;
|
||||
}
|
||||
|
||||
const pcre_named_capture* operator->() const
|
||||
{
|
||||
return this->i_named_capture;
|
||||
}
|
||||
|
||||
bool operator!=(const iterator& rhs) const
|
||||
{
|
||||
return this->i_named_capture != rhs.i_named_capture;
|
||||
}
|
||||
|
||||
iterator& operator++()
|
||||
{
|
||||
char* ptr = (char*) this->i_named_capture;
|
||||
|
||||
ptr += this->i_name_len;
|
||||
this->i_named_capture = (pcre_named_capture*) ptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
pcre_named_capture* i_named_capture;
|
||||
size_t i_name_len;
|
||||
};
|
||||
|
||||
int index() const
|
||||
{
|
||||
return (this->pnc_index_msb << 8 | this->pnc_index_lsb) - 1;
|
||||
}
|
||||
|
||||
char pnc_index_msb;
|
||||
char pnc_index_lsb;
|
||||
char pnc_name[];
|
||||
};
|
||||
|
||||
struct pcre_extractor {
|
||||
const pcre_context& pe_context;
|
||||
const pcre_input& pe_input;
|
||||
|
||||
template<typename T>
|
||||
intern_string_t get_substr_i(T name) const
|
||||
{
|
||||
return this->pe_input.get_substr_i(this->pe_context[name]);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
std::string get_substr(T name) const
|
||||
{
|
||||
return this->pe_input.get_substr(this->pe_context[name]);
|
||||
}
|
||||
};
|
||||
|
||||
class pcrepp {
|
||||
public:
|
||||
class error : public std::exception {
|
||||
public:
|
||||
error(std::string msg, int offset = 0)
|
||||
: e_msg(std::move(msg)), e_offset(offset)
|
||||
{
|
||||
}
|
||||
|
||||
const char* what() const noexcept override
|
||||
{
|
||||
return this->e_msg.c_str();
|
||||
}
|
||||
|
||||
const std::string e_msg;
|
||||
int e_offset;
|
||||
};
|
||||
|
||||
static std::string quote(const char* unquoted);
|
||||
|
||||
static std::string quote(const std::string& unquoted)
|
||||
{
|
||||
return quote(unquoted.c_str());
|
||||
}
|
||||
|
||||
struct compile_error {
|
||||
const char* ce_msg{nullptr};
|
||||
int ce_offset{0};
|
||||
};
|
||||
|
||||
static Result<pcrepp, compile_error> from_str(std::string pattern,
|
||||
int options = 0);
|
||||
|
||||
static Result<std::shared_ptr<pcrepp>, compile_error> shared_from_str(
|
||||
std::string pattern, int options = 0);
|
||||
|
||||
pcrepp(pcre* code) : p_code(code), p_code_extra(pcre_free_study)
|
||||
{
|
||||
pcre_refcount(this->p_code, 1);
|
||||
this->study();
|
||||
}
|
||||
|
||||
pcrepp(std::string pattern, pcre* code)
|
||||
: p_code(code), p_pattern(std::move(pattern)),
|
||||
p_code_extra(pcre_free_study)
|
||||
{
|
||||
pcre_refcount(this->p_code, 1);
|
||||
this->study();
|
||||
this->find_captures(this->p_pattern.c_str());
|
||||
}
|
||||
|
||||
explicit pcrepp(const char* pattern, int options = 0)
|
||||
: p_pattern(pattern), p_code_extra(pcre_free_study)
|
||||
{
|
||||
const char* errptr;
|
||||
int eoff;
|
||||
|
||||
if ((this->p_code
|
||||
= pcre_compile(pattern, options, &errptr, &eoff, nullptr))
|
||||
== nullptr)
|
||||
{
|
||||
throw error(errptr, eoff);
|
||||
}
|
||||
|
||||
pcre_refcount(this->p_code, 1);
|
||||
this->study();
|
||||
this->find_captures(pattern);
|
||||
}
|
||||
|
||||
explicit pcrepp(const std::string& pattern, int options = 0)
|
||||
: p_pattern(pattern), p_code_extra(pcre_free_study)
|
||||
{
|
||||
const char* errptr;
|
||||
int eoff;
|
||||
|
||||
if ((this->p_code = pcre_compile(
|
||||
pattern.c_str(), options | PCRE_UTF8, &errptr, &eoff, nullptr))
|
||||
== nullptr)
|
||||
{
|
||||
throw error(errptr, eoff);
|
||||
}
|
||||
|
||||
pcre_refcount(this->p_code, 1);
|
||||
this->study();
|
||||
this->find_captures(pattern.c_str());
|
||||
}
|
||||
|
||||
pcrepp() {}
|
||||
|
||||
pcrepp(const pcrepp& other)
|
||||
: p_code(other.p_code), p_pattern(other.p_pattern),
|
||||
p_code_extra(pcre_free_study), p_captures(other.p_captures)
|
||||
{
|
||||
pcre_refcount(this->p_code, 1);
|
||||
this->study();
|
||||
}
|
||||
|
||||
pcrepp(pcrepp&& other)
|
||||
: p_code(other.p_code), p_pattern(std::move(other.p_pattern)),
|
||||
p_code_extra(pcre_free_study), p_capture_count(other.p_capture_count),
|
||||
p_named_count(other.p_named_count), p_name_len(other.p_name_len),
|
||||
p_options(other.p_options), p_named_entries(other.p_named_entries),
|
||||
p_captures(std::move(other.p_captures))
|
||||
{
|
||||
pcre_refcount(this->p_code, 1);
|
||||
this->p_code_extra = std::move(other.p_code_extra);
|
||||
}
|
||||
|
||||
virtual ~pcrepp() { this->clear(); }
|
||||
|
||||
pcrepp& operator=(pcrepp&& other) noexcept
|
||||
{
|
||||
if (this == &other) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
this->p_code = other.p_code;
|
||||
pcre_refcount(this->p_code, 1);
|
||||
this->p_pattern = std::move(other.p_pattern);
|
||||
this->p_code_extra = std::move(other.p_code_extra);
|
||||
this->p_capture_count = other.p_capture_count;
|
||||
this->p_named_count = other.p_named_count;
|
||||
this->p_name_len = other.p_name_len;
|
||||
this->p_options = other.p_options;
|
||||
this->p_named_entries = other.p_named_entries;
|
||||
this->p_captures = std::move(other.p_captures);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
const std::string& get_pattern() const { return this->p_pattern; }
|
||||
|
||||
bool empty() const { return this->p_pattern.empty(); }
|
||||
|
||||
void clear()
|
||||
{
|
||||
if (this->p_code && pcre_refcount(this->p_code, -1) == 0) {
|
||||
free(this->p_code);
|
||||
this->p_code = nullptr;
|
||||
}
|
||||
this->p_pattern.clear();
|
||||
this->p_code_extra.reset();
|
||||
this->p_capture_count = 0;
|
||||
this->p_named_count = 0;
|
||||
this->p_name_len = 0;
|
||||
this->p_options = 0;
|
||||
this->p_named_entries = nullptr;
|
||||
this->p_captures.clear();
|
||||
}
|
||||
|
||||
pcre_named_capture::iterator named_begin() const
|
||||
{
|
||||
return {this->p_named_entries, static_cast<size_t>(this->p_name_len)};
|
||||
}
|
||||
|
||||
pcre_named_capture::iterator named_end() const
|
||||
{
|
||||
char* ptr = (char*) this->p_named_entries;
|
||||
|
||||
ptr += this->p_named_count * this->p_name_len;
|
||||
return {(pcre_named_capture*) ptr,
|
||||
static_cast<size_t>(this->p_name_len)};
|
||||
}
|
||||
|
||||
const std::vector<pcre_context::capture_t>& captures() const
|
||||
{
|
||||
return this->p_captures;
|
||||
}
|
||||
|
||||
std::vector<pcre_context::capture_t>::const_iterator cap_begin() const
|
||||
{
|
||||
return this->p_captures.begin();
|
||||
}
|
||||
|
||||
std::vector<pcre_context::capture_t>::const_iterator cap_end() const
|
||||
{
|
||||
return this->p_captures.end();
|
||||
}
|
||||
|
||||
int name_index(const std::string& name) const
|
||||
{
|
||||
return this->name_index(name.c_str());
|
||||
}
|
||||
|
||||
int name_index(const char* name) const;
|
||||
|
||||
const char* name_for_capture(int index) const;
|
||||
|
||||
int get_capture_count() const { return this->p_capture_count; }
|
||||
|
||||
bool match(pcre_context& pc, pcre_input& pi, int options = 0) const;
|
||||
|
||||
template<size_t MATCH_COUNT>
|
||||
nonstd::optional<pcre_context_static<MATCH_COUNT>> match(pcre_input& pi,
|
||||
int options
|
||||
= 0) const
|
||||
{
|
||||
pcre_context_static<MATCH_COUNT> pc;
|
||||
|
||||
if (this->match(pc, pi, options)) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
return nonstd::nullopt;
|
||||
}
|
||||
|
||||
std::string replace(const char* str, const char* repl) const;
|
||||
|
||||
size_t match_partial(pcre_input& pi) const;
|
||||
|
||||
pcre* release() {
|
||||
auto retval = std::exchange(this->p_code, nullptr);
|
||||
this->clear();
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
// #undef PCRE_STUDY_JIT_COMPILE
|
||||
#ifdef PCRE_STUDY_JIT_COMPILE
|
||||
static pcre_jit_stack* jit_stack();
|
||||
|
||||
#else
|
||||
static void pcre_free_study(pcre_extra*);
|
||||
#endif
|
||||
|
||||
void study();
|
||||
|
||||
void find_captures(const char* pattern);
|
||||
|
||||
pcre* p_code{nullptr};
|
||||
std::string p_pattern;
|
||||
auto_mem<pcre_extra> p_code_extra;
|
||||
int p_capture_count{0};
|
||||
int p_named_count{0};
|
||||
int p_name_len{0};
|
||||
unsigned long p_options{0};
|
||||
pcre_named_capture* p_named_entries{nullptr};
|
||||
std::vector<pcre_context::capture_t> p_captures;
|
||||
};
|
||||
|
||||
template<int options = 0>
|
||||
class pcrepp_with_options : public pcrepp {
|
||||
public:
|
||||
template<typename... Args>
|
||||
pcrepp_with_options(Args... args) : pcrepp(args..., options)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
@ -0,0 +1,246 @@
|
||||
/**
|
||||
* Copyright (c) 2022, Timothy Stack
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
* * Neither the name of Timothy Stack nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
|
||||
#include "doctest/doctest.h"
|
||||
#include "pcre2pp.hh"
|
||||
|
||||
TEST_CASE("bad pattern")
|
||||
{
|
||||
auto compile_res
|
||||
= lnav::pcre2pp::code::from(string_fragment::from_const("[abc"));
|
||||
|
||||
CHECK(compile_res.isErr());
|
||||
auto ce = compile_res.unwrapErr();
|
||||
CHECK(ce.ce_offset == 4);
|
||||
}
|
||||
|
||||
TEST_CASE("named captures")
|
||||
{
|
||||
auto compile_res = lnav::pcre2pp::code::from(
|
||||
string_fragment::from_const("(?<abc>a)(b)(?<def>c)"));
|
||||
|
||||
CHECK(compile_res.isOk());
|
||||
|
||||
const std::vector<std::pair<size_t, string_fragment>> expected_caps = {
|
||||
{1, string_fragment::from_const("abc")},
|
||||
{3, string_fragment::from_const("def")},
|
||||
};
|
||||
|
||||
int caps_index = 0;
|
||||
auto co = compile_res.unwrap();
|
||||
for (const auto cap : co.get_named_captures()) {
|
||||
const auto& expected_cap = expected_caps[caps_index];
|
||||
|
||||
CHECK(expected_cap.first == cap.get_index());
|
||||
CHECK(expected_cap.second == cap.get_name());
|
||||
caps_index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("match")
|
||||
{
|
||||
static const char INPUT[] = "key1=1234;key2=5678;";
|
||||
|
||||
auto co
|
||||
= lnav::pcre2pp::code::from_const(R"((?<key>\w+)=(?<value>[^;]+);)");
|
||||
|
||||
co.capture_from(string_fragment::from_const(INPUT))
|
||||
.for_each([](lnav::pcre2pp::match_data& md) {
|
||||
printf("got '%s' %s = %s\n",
|
||||
md[0]->to_string().c_str(),
|
||||
md[1]->to_string().c_str(),
|
||||
md[2]->to_string().c_str());
|
||||
});
|
||||
}
|
||||
|
||||
TEST_CASE("partial")
|
||||
{
|
||||
static const char INPUT[] = "key1=1234";
|
||||
|
||||
auto co = lnav::pcre2pp::code::from_const(R"([a-z]+=.*)");
|
||||
auto matched = co.match_partial(string_fragment::from_const(INPUT));
|
||||
CHECK(matched == 3);
|
||||
}
|
||||
|
||||
TEST_CASE("capture_name")
|
||||
{
|
||||
auto co = lnav::pcre2pp::code::from_const("(?<abc>def)(ghi)");
|
||||
|
||||
CHECK(co.get_capture_count() == 2);
|
||||
CHECK(string_fragment::from_c_str(co.get_name_for_capture(1)) == "abc");
|
||||
CHECK(co.get_name_for_capture(2) == nullptr);
|
||||
}
|
||||
|
||||
TEST_CASE("get_capture_count")
|
||||
{
|
||||
auto co = lnav::pcre2pp::code::from_const("(DEFINE)");
|
||||
|
||||
CHECK(co.get_capture_count() == 1);
|
||||
}
|
||||
|
||||
TEST_CASE("get_captures")
|
||||
{
|
||||
auto co = lnav::pcre2pp::code::from_const(R"((?<abc>\w+)-(def)-)");
|
||||
|
||||
CHECK(co.get_capture_count() == 2);
|
||||
const auto& caps = co.get_captures();
|
||||
CHECK(caps.size() == 2);
|
||||
CHECK(caps[0].to_string() == R"((?<abc>\w+))");
|
||||
CHECK(caps[1].to_string() == R"((def))");
|
||||
}
|
||||
|
||||
TEST_CASE("replace")
|
||||
{
|
||||
static const char INPUT[] = "test 1 2 3";
|
||||
|
||||
auto co = lnav::pcre2pp::code::from_const(R"(\w*)");
|
||||
auto in = string_fragment::from_const(INPUT);
|
||||
|
||||
auto res = co.replace(in, R"({\0})");
|
||||
CHECK(res == "{test}{} {1}{} {2}{} {3}{}");
|
||||
}
|
||||
|
||||
TEST_CASE("replace-empty")
|
||||
{
|
||||
static const char INPUT[] = "";
|
||||
|
||||
auto co = lnav::pcre2pp::code::from_const(R"(\w*)");
|
||||
auto in = string_fragment::from_const(INPUT);
|
||||
|
||||
auto res = co.replace(in, R"({\0})");
|
||||
CHECK(res == "{}");
|
||||
}
|
||||
|
||||
TEST_CASE("for_each-all")
|
||||
{
|
||||
static const char INPUT[] = "Hello, World!\n";
|
||||
|
||||
auto co = lnav::pcre2pp::code::from_const(R"(.*)");
|
||||
auto in = string_fragment::from_const(INPUT);
|
||||
|
||||
co.capture_from(in).for_each([](lnav::pcre2pp::match_data& md) {
|
||||
printf("range %d:%d\n", md[0]->sf_begin, md[0]->sf_end);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_CASE("capture_count")
|
||||
{
|
||||
auto co = lnav::pcre2pp::code::from_const(R"(^(\w+)=([^;]+);)");
|
||||
|
||||
CHECK(co.get_capture_count() == 2);
|
||||
}
|
||||
|
||||
TEST_CASE("no-caps")
|
||||
{
|
||||
const static std::string empty_cap_regexes[] = {
|
||||
"foo (?:bar)",
|
||||
"foo [(]",
|
||||
"foo \\Q(bar)\\E",
|
||||
"(?i)",
|
||||
};
|
||||
|
||||
for (auto re : empty_cap_regexes) {
|
||||
auto co = lnav::pcre2pp::code::from(re).unwrap();
|
||||
|
||||
CHECK(co.get_captures().empty());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("ipmatcher")
|
||||
{
|
||||
auto co = lnav::pcre2pp::code::from_const(
|
||||
R"((?(DEFINE)(?<byte>2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))\b(?&byte)(\.(?&byte)){3}\b)");
|
||||
auto inp = string_fragment::from_const("192.168.1.1");
|
||||
|
||||
auto find_res = co.find_in(inp).ignore_error();
|
||||
CHECK(find_res.has_value());
|
||||
CHECK(find_res->f_all.sf_begin == 0);
|
||||
}
|
||||
|
||||
TEST_CASE("get_captures-nested")
|
||||
{
|
||||
auto re = lnav::pcre2pp::code::from_const("foo (bar (?:baz)?)");
|
||||
|
||||
CHECK(re.get_captures().size() == 1);
|
||||
CHECK(re.get_captures()[0].sf_begin == 4);
|
||||
CHECK(re.get_captures()[0].sf_end == 18);
|
||||
CHECK(re.get_captures()[0].length() == 14);
|
||||
}
|
||||
|
||||
TEST_CASE("get_captures-basic")
|
||||
{
|
||||
auto re = lnav::pcre2pp::code::from_const("(a)(b)(c)");
|
||||
|
||||
assert(re.get_captures().size() == 3);
|
||||
assert(re.get_captures()[0].sf_begin == 0);
|
||||
assert(re.get_captures()[0].sf_end == 3);
|
||||
assert(re.get_captures()[1].sf_begin == 3);
|
||||
assert(re.get_captures()[1].sf_end == 6);
|
||||
assert(re.get_captures()[2].sf_begin == 6);
|
||||
assert(re.get_captures()[2].sf_end == 9);
|
||||
}
|
||||
|
||||
TEST_CASE("get_captures-escape")
|
||||
{
|
||||
auto re = lnav::pcre2pp::code::from_const("\\(a\\)(b)");
|
||||
|
||||
assert(re.get_captures().size() == 1);
|
||||
assert(re.get_captures()[0].sf_begin == 5);
|
||||
assert(re.get_captures()[0].sf_end == 8);
|
||||
}
|
||||
|
||||
TEST_CASE("get_captures-named")
|
||||
{
|
||||
auto re = lnav::pcre2pp::code::from_const("(?<named>b)");
|
||||
|
||||
assert(re.get_captures().size() == 1);
|
||||
assert(re.get_captures()[0].sf_begin == 0);
|
||||
assert(re.get_captures()[0].sf_end == 11);
|
||||
}
|
||||
|
||||
TEST_CASE("get_captures-namedP")
|
||||
{
|
||||
auto re = lnav::pcre2pp::code::from_const("(?P<named>b)");
|
||||
|
||||
assert(re.get_captures().size() == 1);
|
||||
assert(re.get_captures()[0].sf_begin == 0);
|
||||
assert(re.get_captures()[0].sf_end == 12);
|
||||
}
|
||||
|
||||
TEST_CASE("get_captures-namedq")
|
||||
{
|
||||
auto re = lnav::pcre2pp::code::from_const("(?'named'b)");
|
||||
|
||||
assert(re.get_captures().size() == 1);
|
||||
assert(re.get_captures()[0].sf_begin == 0);
|
||||
assert(re.get_captures()[0].sf_end == 11);
|
||||
}
|
@ -1,192 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2007-2012, Timothy Stack
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
* * Neither the name of Timothy Stack nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "pcrepp/pcrepp.hh"
|
||||
|
||||
int
|
||||
main(int argc, char* argv[])
|
||||
{
|
||||
pcre_context_static<30> context;
|
||||
int retval = EXIT_SUCCESS;
|
||||
|
||||
{
|
||||
pcrepp ipmatcher(
|
||||
R"((?(DEFINE)(?<byte>2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))\b(?&byte)(\.(?&byte)){3}\b)");
|
||||
pcre_input pi("192.168.1.1");
|
||||
|
||||
assert(ipmatcher.match(context, pi));
|
||||
assert(context.all()->c_begin == 0);
|
||||
}
|
||||
|
||||
{
|
||||
pcrepp ipmatcher(R"((DEFINE))");
|
||||
|
||||
assert(ipmatcher.get_capture_count() == 1);
|
||||
}
|
||||
|
||||
{
|
||||
pcrepp nomatch("nothing-to-match");
|
||||
pcre_input pi("dummy");
|
||||
|
||||
assert(!nomatch.match(context, pi));
|
||||
}
|
||||
|
||||
{
|
||||
pcrepp match1("(\\w*)=(\\d+)");
|
||||
pcre_input pi("a=1 b=2");
|
||||
pcre_context::capture_t* cap;
|
||||
|
||||
assert(match1.match(context, pi));
|
||||
|
||||
cap = context.all();
|
||||
assert(cap->c_begin == 0);
|
||||
assert(cap->c_end == 3);
|
||||
|
||||
assert((context.end() - context.begin()) == 2);
|
||||
assert(pi.get_substr(context.begin()) == "a");
|
||||
assert(pi.get_substr(context.begin() + 1) == "1");
|
||||
assert(pi.get_substr(context[1]) == "1");
|
||||
|
||||
assert(match1.match(context, pi));
|
||||
assert((context.end() - context.begin()) == 2);
|
||||
assert(pi.get_substr(context.begin()) == "b");
|
||||
assert(pi.get_substr(context.begin() + 1) == "2");
|
||||
}
|
||||
|
||||
{
|
||||
pcrepp match2("");
|
||||
}
|
||||
|
||||
{
|
||||
pcrepp match3("(?<var1>\\d+)(?<var2>\\w+)");
|
||||
pcre_named_capture::iterator iter;
|
||||
const char* expected_names[] = {
|
||||
"var1",
|
||||
"var2",
|
||||
};
|
||||
int index = 0;
|
||||
|
||||
for (iter = match3.named_begin(); iter != match3.named_end();
|
||||
++iter, index++)
|
||||
{
|
||||
assert(strcmp(iter->pnc_name, expected_names[index]) == 0);
|
||||
}
|
||||
|
||||
assert(match3.name_index("var2") == 1);
|
||||
|
||||
pcre_input pi("123foo");
|
||||
|
||||
match3.match(context, pi);
|
||||
assert(pi.get_substr(context["var1"]) == "123");
|
||||
}
|
||||
|
||||
{
|
||||
pcre_context::capture_t cap(1, 4);
|
||||
pcre_input pi("\0foo", 0, 4);
|
||||
|
||||
assert("foo" == pi.get_substr(&cap));
|
||||
}
|
||||
|
||||
const char* empty_cap_regexes[] = {
|
||||
"foo (?:bar)",
|
||||
"foo [(]",
|
||||
"foo \\Q(bar)\\E",
|
||||
"(?i)",
|
||||
|
||||
nullptr,
|
||||
};
|
||||
|
||||
for (int lpc = 0; empty_cap_regexes[lpc]; lpc++) {
|
||||
pcrepp re(empty_cap_regexes[lpc]);
|
||||
|
||||
assert(re.captures().empty());
|
||||
}
|
||||
|
||||
{
|
||||
pcrepp re("foo (bar (?:baz)?)");
|
||||
|
||||
assert(re.captures().size() == 1);
|
||||
assert(re.captures()[0].c_begin == 4);
|
||||
assert(re.captures()[0].c_end == 18);
|
||||
assert(re.captures()[0].length() == 14);
|
||||
}
|
||||
|
||||
{
|
||||
pcrepp re("(a)(b)(c)");
|
||||
|
||||
assert(re.captures().size() == 3);
|
||||
assert(re.captures()[0].c_begin == 0);
|
||||
assert(re.captures()[0].c_end == 3);
|
||||
assert(re.captures()[1].c_begin == 3);
|
||||
assert(re.captures()[1].c_end == 6);
|
||||
assert(re.captures()[2].c_begin == 6);
|
||||
assert(re.captures()[2].c_end == 9);
|
||||
}
|
||||
|
||||
{
|
||||
pcrepp re("\\(a\\)(b)");
|
||||
|
||||
assert(re.captures().size() == 1);
|
||||
assert(re.captures()[0].c_begin == 5);
|
||||
assert(re.captures()[0].c_end == 8);
|
||||
}
|
||||
|
||||
{
|
||||
pcrepp re("(?<named>b)");
|
||||
|
||||
assert(re.captures().size() == 1);
|
||||
assert(re.captures()[0].c_begin == 0);
|
||||
assert(re.captures()[0].c_end == 11);
|
||||
}
|
||||
|
||||
{
|
||||
pcrepp re("(?P<named>b)");
|
||||
|
||||
assert(re.captures().size() == 1);
|
||||
assert(re.captures()[0].c_begin == 0);
|
||||
assert(re.captures()[0].c_end == 12);
|
||||
}
|
||||
|
||||
{
|
||||
pcrepp re("(?'named'b)");
|
||||
|
||||
assert(re.captures().size() == 1);
|
||||
assert(re.captures()[0].c_begin == 0);
|
||||
assert(re.captures()[0].c_end == 11);
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue