[pcre2pp] allocate match_data on the stack

pull/1063/head
Tim Stack 2 years ago
parent d7e79b014d
commit a437d9fcc9

@ -51,8 +51,9 @@ ansi_regex()
size_t
erase_ansi_escapes(string_fragment input)
{
static thread_local auto md = lnav::pcre2pp::match_data::unitialized();
const auto& regex = ansi_regex();
auto md = regex.create_match_data();
auto matcher = regex.capture_from(input).into(md);
while (true) {
@ -113,8 +114,8 @@ erase_ansi_escapes(string_fragment input)
void
scrub_ansi_string(std::string& str, string_attrs_t* sa)
{
static thread_local auto md = lnav::pcre2pp::match_data::unitialized();
const auto& regex = ansi_regex();
auto md = regex.create_match_data();
int64_t origin_offset = 0;
int last_origin_offset_end = 0;

@ -312,7 +312,8 @@ log_format::log_scanf(uint32_t line_number,
int pat_index = this->last_pattern_index();
while (!done && next_format(fmt, curr_fmt, pat_index)) {
auto md = fmt[curr_fmt].pcre->create_match_data();
static thread_local auto md = lnav::pcre2pp::match_data::unitialized();
auto match_res = fmt[curr_fmt]
.pcre->capture_from(line)
.into(md)
@ -802,6 +803,8 @@ external_log_format::scan(logfile& lf,
auto line_sf = sbr.to_string_fragment();
while (::next_format(this->elf_pattern_order, curr_fmt, pat_index)) {
static thread_local auto md = lnav::pcre2pp::match_data::unitialized();
auto* fpat = this->elf_pattern_order[curr_fmt].get();
auto* pat = fpat->p_pcre.pp_value.get();
@ -809,7 +812,6 @@ external_log_format::scan(logfile& lf,
continue;
}
auto md = pat->create_match_data();
auto match_res = pat->capture_from(line_sf)
.into(md)
.matches(PCRE2_NO_UTF_CHECK)
@ -907,13 +909,15 @@ external_log_format::scan(logfile& lf,
mod_iter->second.mf_mod_format);
if (mod_elf) {
static thread_local auto mod_md
= lnav::pcre2pp::match_data::unitialized();
shared_buffer_ref body_ref;
body_cap->trim();
int mod_pat_index = mod_elf->last_pattern_index();
auto& mod_pat = *mod_elf->elf_pattern_order[mod_pat_index];
auto mod_md = mod_pat.p_pcre.pp_value->create_match_data();
auto match_res = mod_pat.p_pcre.pp_value
->capture_from(body_cap.value())
.into(mod_md)
@ -1014,6 +1018,8 @@ external_log_format::module_scan(string_fragment body_cap,
int curr_fmt = -1, fmt_lock = -1;
while (::next_format(elf->elf_pattern_order, curr_fmt, fmt_lock)) {
static thread_local auto md = lnav::pcre2pp::match_data::unitialized();
auto& fpat = elf->elf_pattern_order[curr_fmt];
auto& pat = fpat->p_pcre;
@ -1021,7 +1027,6 @@ external_log_format::module_scan(string_fragment body_cap,
continue;
}
auto md = pat.pp_value->create_match_data();
auto match_res = pat.pp_value->capture_from(body_cap)
.into(md)
.matches(PCRE2_NO_UTF_CHECK)
@ -1054,6 +1059,8 @@ external_log_format::annotate(uint64_t line_number,
logline_value_vector& values,
bool annotate_module) const
{
static thread_local auto md = lnav::pcre2pp::match_data::unitialized();
auto& line = values.lvv_sbr;
struct line_range lr;
@ -1074,7 +1081,6 @@ external_log_format::annotate(uint64_t line_number,
auto& pat = *this->elf_pattern_order[pat_index];
sa.reserve(pat.p_pcre.pp_value->get_capture_count());
auto md = pat.p_pcre.pp_value->create_match_data();
auto match_res
= pat.p_pcre.pp_value->capture_from(line.to_string_fragment())
.into(md)

@ -55,6 +55,20 @@ quote(const char* unquoted)
return retval;
}
matcher
capture_builder::into(lnav::pcre2pp::match_data& md) &&
{
if (md.get_capacity() < this->mb_code.get_match_data_capacity()) {
md = this->mb_code.create_match_data();
}
return matcher{
this->mb_code,
this->mb_input,
md,
};
}
match_data
code::create_match_data() const
{

@ -106,6 +106,8 @@ public:
int get_count() const { return this->md_capture_end; }
uint32_t get_capacity() const { return this->md_ovector_count; }
private:
friend matcher;
friend code;
@ -195,14 +197,7 @@ struct capture_builder {
return *this;
}
matcher into(match_data& md) &&
{
return matcher{
this->mb_code,
this->mb_input,
md,
};
}
matcher into(match_data& md) &&;
template<uint32_t Options = 0, typename F>
Result<string_fragment, matcher::error> for_each(F func) &&;
@ -274,6 +269,10 @@ public:
std::vector<string_fragment> get_captures() const;
uint32_t get_match_data_capacity() const {
return this->p_match_proto.md_ovector_count;
}
match_data create_match_data() const;
capture_builder capture_from(string_fragment in) const

@ -54,7 +54,7 @@ regex101::import(const std::string& url,
{
static const auto USER_URL = lnav::pcre2pp::code::from_const(
R"(^https://regex101.com/r/(\w+)(?:/(\d+))?)");
static thread_local auto URL_MATCH_DATA = USER_URL.create_match_data();
static thread_local auto md = lnav::pcre2pp::match_data::unitialized();
static const auto NAME_RE = lnav::pcre2pp::code::from_const(R"(^\w+$)");
if (url.empty()) {
@ -100,10 +100,8 @@ regex101::import(const std::string& url,
.append("^ matched up to here"_comment)));
}
auto user_find_res = USER_URL.capture_from(url)
.into(URL_MATCH_DATA)
.matches()
.ignore_error();
auto user_find_res
= USER_URL.capture_from(url).into(md).matches().ignore_error();
if (!user_find_res) {
auto partial_len = USER_URL.match_partial(url);
return Err(lnav::console::user_message::error(
@ -118,7 +116,7 @@ regex101::import(const std::string& url,
.append("^ matched up to here"_comment)));
}
auto permalink = URL_MATCH_DATA[1]->to_string();
auto permalink = md[1]->to_string();
auto format_filename = existing_format
? fmt::format(FMT_STRING("{}.regex101-{}.json"), name, permalink)

@ -294,8 +294,9 @@ relative_time::from_str(string_fragment str)
bool found = false;
for (int lpc = 0; lpc < RTT__MAX && !found; lpc++) {
static thread_local auto md = lnav::pcre2pp::match_data::unitialized();
token_t token = (token_t) lpc;
auto md = MATCHERS[lpc].pcre.create_match_data();
auto match_res = MATCHERS[lpc]
.pcre.capture_from(remaining)
.into(md)

@ -467,9 +467,11 @@ text_anonymizer::next(string_fragment line)
} else {
static const auto ATTR_RE
= lnav::pcre2pp::code::from_const(R"([\w\-]+=)");
static thread_local auto md
= lnav::pcre2pp::match_data::unitialized();
auto remaining = string_fragment::from_str_range(
open_tag, space_index, open_tag.size());
auto md = ATTR_RE.create_match_data();
retval += open_tag.substr(0, space_index + 1);
while (!remaining.empty()) {

@ -224,7 +224,9 @@ json_path_handler_base::gen(yajlpp_gen_context& ygc, yajl_gen handle) const
ygc.ygc_depth += 1;
if (this->jph_obj_provider) {
auto md = this->jph_regex->create_match_data();
static thread_local auto md
= lnav::pcre2pp::match_data::unitialized();
auto find_res = this->jph_regex->capture_from(full_path)
.into(md)
.matches();
@ -504,7 +506,9 @@ json_path_handler_base::walk(
ypc.set_path(full_path).with_obj(root).update_callbacks();
if (this->jph_obj_provider) {
auto md = this->jph_regex->create_match_data();
static thread_local auto md
= lnav::pcre2pp::match_data::unitialized();
std::string full_path = lpath + "/";
if (!this->jph_regex->capture_from(full_path)
@ -707,7 +711,8 @@ yajlpp_parse_context::update_callbacks(const json_path_container* orig_handlers,
auto path_frag = string_fragment::from_byte_range(
this->ypc_path.data(), 1 + child_start, this->ypc_path.size() - 1);
for (const auto& jph : handlers->jpc_children) {
auto md = jph.jph_regex->create_match_data();
static thread_local auto md = lnav::pcre2pp::match_data::unitialized();
if (jph.jph_regex->capture_from(path_frag)
.into(md)
.matches()
@ -945,8 +950,8 @@ yajlpp_parse_context::handle_unused_or_delete(void* ctx)
if (!ypc->ypc_handler_stack.empty()
&& ypc->ypc_handler_stack.back()->jph_obj_deleter)
{
auto& jph = ypc->ypc_handler_stack.back();
auto md = jph->jph_regex->create_match_data();
static thread_local auto md = lnav::pcre2pp::match_data::unitialized();
auto key_start = ypc->ypc_path_index_stack.back();
auto path_frag = string_fragment::from_byte_range(
ypc->ypc_path.data(), key_start + 1, ypc->ypc_path.size() - 1);

@ -164,6 +164,8 @@ EXPECTED_FILES = \
$(srcdir)/%reldir%/test_cmds.sh_be1d9628fc447b6f17121d9457ea1602afe8f3f3.out \
$(srcdir)/%reldir%/test_cmds.sh_be3b7c5874b5f4d86cc230bd2f9802c98909e148.err \
$(srcdir)/%reldir%/test_cmds.sh_be3b7c5874b5f4d86cc230bd2f9802c98909e148.out \
$(srcdir)/%reldir%/test_cmds.sh_bf4e7fad67e281beaa11b6e2b03a00b419c7c9b0.err \
$(srcdir)/%reldir%/test_cmds.sh_bf4e7fad67e281beaa11b6e2b03a00b419c7c9b0.out \
$(srcdir)/%reldir%/test_cmds.sh_c01e10f7cae8d36fa79ae03be887cb5477025f6d.err \
$(srcdir)/%reldir%/test_cmds.sh_c01e10f7cae8d36fa79ae03be887cb5477025f6d.out \
$(srcdir)/%reldir%/test_cmds.sh_c2b4431dd0cc36c6201d263b727b3305e8cda6b1.err \
@ -974,6 +976,8 @@ EXPECTED_FILES = \
$(srcdir)/%reldir%/test_sql_xml_func.sh_fefeb387ae14d4171225ea06cbbff3ec43990cf0.out \
$(srcdir)/%reldir%/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.err \
$(srcdir)/%reldir%/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.out \
$(srcdir)/%reldir%/test_sql_yaml_func.sh_dc189d02e8979b7ed245d5d750f68b9965984699.err \
$(srcdir)/%reldir%/test_sql_yaml_func.sh_dc189d02e8979b7ed245d5d750f68b9965984699.out \
$(srcdir)/%reldir%/test_text_file.sh_5b51b55dff7332c5bee2c9b797c401c5614d574a.err \
$(srcdir)/%reldir%/test_text_file.sh_5b51b55dff7332c5bee2c9b797c401c5614d574a.out \
$(srcdir)/%reldir%/test_text_file.sh_6a24078983cf1b7a80b6fb65d5186cd125498136.err \

@ -0,0 +1,7 @@
✘ error: invalid timestamp: 2022-06-16Tabc
reason: the leading part of the timestamp was matched, however, the trailing text “Tabc” was not
 --> command-option:1
 | :goto 2022-06-16Tabc 
 |  ^--^ unrecognized input 
 = note: input matched time format “%Y-%m-%d”
 = help: fix the timestamp or remove the trailing text

@ -0,0 +1,4 @@
✘ error: failed to parse YAML content
reason: closing ] not found
 --> command-option:1
 | ;SELECT yaml_to_json('[abc') 
Loading…
Cancel
Save