You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
lnav/src/pcrepp/pcre2pp.hh

369 lines
9.6 KiB
C++

/**
* Copyright (c) 2022, Timothy Stack
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef lnav_pcre2pp_hh
#define lnav_pcre2pp_hh
#define PCRE2_CODE_UNIT_WIDTH 8
#include <memory>
#include <string>
#include <vector>
#include <pcre2.h>
#include "base/auto_mem.hh"
#include "base/intern_string.hh"
#include "base/result.h"
#include "mapbox/variant.hpp"
namespace lnav {
namespace pcre2pp {
std::string quote(const char* unquoted);
inline std::string
quote(const std::string& unquoted)
{
return quote(unquoted.c_str());
}
class code;
struct capture_builder;
class matcher;
struct input {
string_fragment i_string;
int i_offset{0};
int i_next_offset{0};
};
class match_data {
public:
static match_data unitialized() { return match_data{}; }
string_fragment leading() const
{
return this->md_input.i_string.sub_range(this->md_input.i_offset,
this->md_ovector[0]);
}
string_fragment remaining() const
{
if (this->md_capture_end == 0 || this->md_input.i_next_offset == -1) {
return string_fragment::invalid();
}
return string_fragment::from_byte_range(
this->md_input.i_string.sf_string,
this->md_input.i_next_offset,
this->md_input.i_string.sf_end);
}
nonstd::optional<string_fragment> operator[](size_t index) const
{
if (index >= this->md_capture_end) {
return nonstd::nullopt;
}
auto start = this->md_ovector[(index * 2)];
auto stop = this->md_ovector[(index * 2) + 1];
if (start == PCRE2_UNSET || stop == PCRE2_UNSET) {
return nonstd::nullopt;
}
return this->md_input.i_string.sub_range(start, stop);
}
template<typename T, std::size_t N>
nonstd::optional<string_fragment> operator[](const T (&name)[N]) const;
int get_count() const { return this->md_capture_end; }
private:
friend matcher;
friend code;
match_data() = default;
explicit match_data(auto_mem<pcre2_match_data> dat)
: md_data(std::move(dat)),
md_ovector(pcre2_get_ovector_pointer(this->md_data.in())),
md_ovector_count(pcre2_get_ovector_count(this->md_data.in()))
{
}
auto_mem<pcre2_match_data> md_data;
const code* md_code{nullptr};
input md_input;
PCRE2_SIZE* md_ovector{nullptr};
uint32_t md_ovector_count{0};
int md_capture_end{0};
};
class matcher {
public:
struct found {
string_fragment f_all;
string_fragment f_remaining;
};
struct not_found {};
struct error {
const code* e_code{nullptr};
int e_error_code{0};
std::string get_message();
};
class matches_result
: public mapbox::util::variant<found, not_found, error> {
public:
using variant::variant;
nonstd::optional<found> ignore_error()
{
return this->match(
[](found fo) { return nonstd::make_optional(fo); },
[](not_found) { return nonstd::nullopt; },
[](error err) {
handle_error(err);
return nonstd::nullopt;
});
}
private:
static void handle_error(error err);
};
matcher& reload_input(string_fragment sf, int next_offset)
{
this->mb_input = input{sf, next_offset, next_offset};
return *this;
}
matches_result matches(uint32_t options = 0);
int get_next_offset() const { return this->mb_input.i_next_offset; }
private:
friend capture_builder;
matcher(const code& co, input& in, match_data& md)
: mb_code(co), mb_input(in), mb_match_data(md)
{
}
const code& mb_code;
input mb_input;
match_data& mb_match_data;
};
struct capture_builder {
const code& mb_code;
input mb_input;
capture_builder at(const string_fragment& remaining) &&
{
this->mb_input.i_offset = this->mb_input.i_next_offset
= remaining.sf_begin;
return *this;
}
matcher into(match_data& md) &&
{
return matcher{
this->mb_code,
this->mb_input,
md,
};
}
template<uint32_t Options = 0, typename F>
Result<string_fragment, matcher::error> for_each(F func) &&;
};
struct compile_error {
std::string ce_pattern;
int ce_code{0};
size_t ce_offset{0};
std::string get_message() const;
};
class code {
public:
class named_capture {
public:
size_t get_index() const;
string_fragment get_name() const;
PCRE2_SPTR nc_entry;
};
class named_captures {
public:
struct iterator {
named_capture operator*() const;
iterator& operator++();
bool operator==(const iterator& other) const;
bool operator!=(const iterator& other) const;
uint32_t i_entry_size;
PCRE2_SPTR i_entry;
};
iterator begin() const;
iterator end() const;
bool empty() const { return this->nc_count == 0; }
size_t size() const { return this->nc_count; }
private:
friend code;
named_captures() = default;
uint32_t nc_count{0};
uint32_t nc_entry_size{0};
PCRE2_SPTR nc_name_table{nullptr};
};
static Result<code, compile_error> from(string_fragment sf,
int options = 0);
template<typename T, std::size_t N>
static code from_const(const T (&str)[N], int options = 0)
{
return from(string_fragment::from_const(str), options).unwrap();
}
const std::string& get_pattern() const { return this->p_pattern; }
named_captures get_named_captures() const;
const char* get_name_for_capture(size_t index) const;
size_t get_capture_count() const;
int name_index(const char* name) const;
std::vector<string_fragment> get_captures() const;
match_data create_match_data() const;
capture_builder capture_from(string_fragment in) const
{
return capture_builder{
*this,
input{in},
};
}
matcher::matches_result find_in(string_fragment in,
uint32_t options = 0) const
{
static thread_local match_data md = this->create_match_data();
if (md.md_ovector_count < this->p_match_proto.md_ovector_count) {
md = this->create_match_data();
}
return this->capture_from(in).into(md).matches(options);
}
size_t match_partial(string_fragment in) const;
std::string replace(string_fragment str, const char* repl) const;
std::shared_ptr<code> to_shared() &&
{
return std::make_shared<code>(std::move(this->p_code),
std::move(this->p_pattern));
}
code(auto_mem<pcre2_code> code, std::string pattern)
: p_code(std::move(code)), p_pattern(std::move(pattern)),
p_match_proto(this->create_match_data())
{
}
private:
friend matcher;
friend match_data;
auto_mem<pcre2_code> p_code;
std::string p_pattern;
match_data p_match_proto;
};
template<typename T, std::size_t N>
nonstd::optional<string_fragment>
match_data::operator[](const T (&name)[N]) const
{
auto index = pcre2_substring_number_from_name(
this->md_code->p_code.in(),
reinterpret_cast<const unsigned char*>(name));
return this->operator[](index);
}
template<uint32_t Options, typename F>
Result<string_fragment, matcher::error>
capture_builder::for_each(F func) &&
{
auto md = this->mb_code.create_match_data();
auto mat = matcher{this->mb_code, this->mb_input, md};
bool done = false;
matcher::error eret;
while (!done) {
auto match_res = mat.matches(Options);
done = match_res.match(
[mat, &func](matcher::found) {
func(mat.mb_match_data);
return false;
},
[](matcher::not_found) { return true; },
[&eret](matcher::error err) {
eret = err;
return true;
});
}
if (eret.e_error_code == 0) {
return Ok(md.remaining());
}
return Err(eret);
}
} // namespace pcre2pp
} // namespace lnav
#endif