diff --git a/src/base/ansi_scrubber.cc b/src/base/ansi_scrubber.cc index a7bb77c7..ee216330 100644 --- a/src/base/ansi_scrubber.cc +++ b/src/base/ansi_scrubber.cc @@ -51,8 +51,9 @@ ansi_regex() size_t erase_ansi_escapes(string_fragment input) { + static thread_local auto md = lnav::pcre2pp::match_data::unitialized(); + const auto& regex = ansi_regex(); - auto md = regex.create_match_data(); auto matcher = regex.capture_from(input).into(md); while (true) { @@ -113,8 +114,8 @@ erase_ansi_escapes(string_fragment input) void scrub_ansi_string(std::string& str, string_attrs_t* sa) { + static thread_local auto md = lnav::pcre2pp::match_data::unitialized(); const auto& regex = ansi_regex(); - auto md = regex.create_match_data(); int64_t origin_offset = 0; int last_origin_offset_end = 0; diff --git a/src/log_format.cc b/src/log_format.cc index 2ce78b46..3d40ebba 100644 --- a/src/log_format.cc +++ b/src/log_format.cc @@ -312,7 +312,8 @@ log_format::log_scanf(uint32_t line_number, int pat_index = this->last_pattern_index(); while (!done && next_format(fmt, curr_fmt, pat_index)) { - auto md = fmt[curr_fmt].pcre->create_match_data(); + static thread_local auto md = lnav::pcre2pp::match_data::unitialized(); + auto match_res = fmt[curr_fmt] .pcre->capture_from(line) .into(md) @@ -802,6 +803,8 @@ external_log_format::scan(logfile& lf, auto line_sf = sbr.to_string_fragment(); while (::next_format(this->elf_pattern_order, curr_fmt, pat_index)) { + static thread_local auto md = lnav::pcre2pp::match_data::unitialized(); + auto* fpat = this->elf_pattern_order[curr_fmt].get(); auto* pat = fpat->p_pcre.pp_value.get(); @@ -809,7 +812,6 @@ external_log_format::scan(logfile& lf, continue; } - auto md = pat->create_match_data(); auto match_res = pat->capture_from(line_sf) .into(md) .matches(PCRE2_NO_UTF_CHECK) @@ -907,13 +909,15 @@ external_log_format::scan(logfile& lf, mod_iter->second.mf_mod_format); if (mod_elf) { + static thread_local auto mod_md + = lnav::pcre2pp::match_data::unitialized(); + shared_buffer_ref body_ref; body_cap->trim(); int mod_pat_index = mod_elf->last_pattern_index(); auto& mod_pat = *mod_elf->elf_pattern_order[mod_pat_index]; - auto mod_md = mod_pat.p_pcre.pp_value->create_match_data(); auto match_res = mod_pat.p_pcre.pp_value ->capture_from(body_cap.value()) .into(mod_md) @@ -1014,6 +1018,8 @@ external_log_format::module_scan(string_fragment body_cap, int curr_fmt = -1, fmt_lock = -1; while (::next_format(elf->elf_pattern_order, curr_fmt, fmt_lock)) { + static thread_local auto md = lnav::pcre2pp::match_data::unitialized(); + auto& fpat = elf->elf_pattern_order[curr_fmt]; auto& pat = fpat->p_pcre; @@ -1021,7 +1027,6 @@ external_log_format::module_scan(string_fragment body_cap, continue; } - auto md = pat.pp_value->create_match_data(); auto match_res = pat.pp_value->capture_from(body_cap) .into(md) .matches(PCRE2_NO_UTF_CHECK) @@ -1054,6 +1059,8 @@ external_log_format::annotate(uint64_t line_number, logline_value_vector& values, bool annotate_module) const { + static thread_local auto md = lnav::pcre2pp::match_data::unitialized(); + auto& line = values.lvv_sbr; struct line_range lr; @@ -1074,7 +1081,6 @@ external_log_format::annotate(uint64_t line_number, auto& pat = *this->elf_pattern_order[pat_index]; sa.reserve(pat.p_pcre.pp_value->get_capture_count()); - auto md = pat.p_pcre.pp_value->create_match_data(); auto match_res = pat.p_pcre.pp_value->capture_from(line.to_string_fragment()) .into(md) diff --git a/src/pcrepp/pcre2pp.cc b/src/pcrepp/pcre2pp.cc index 7565595b..9e5c6bff 100644 --- a/src/pcrepp/pcre2pp.cc +++ b/src/pcrepp/pcre2pp.cc @@ -55,6 +55,20 @@ quote(const char* unquoted) return retval; } +matcher +capture_builder::into(lnav::pcre2pp::match_data& md) && +{ + if (md.get_capacity() < this->mb_code.get_match_data_capacity()) { + md = this->mb_code.create_match_data(); + } + + return matcher{ + this->mb_code, + this->mb_input, + md, + }; +} + match_data code::create_match_data() const { diff --git a/src/pcrepp/pcre2pp.hh b/src/pcrepp/pcre2pp.hh index 8099d387..a40d26cb 100644 --- a/src/pcrepp/pcre2pp.hh +++ b/src/pcrepp/pcre2pp.hh @@ -106,6 +106,8 @@ public: int get_count() const { return this->md_capture_end; } + uint32_t get_capacity() const { return this->md_ovector_count; } + private: friend matcher; friend code; @@ -195,14 +197,7 @@ struct capture_builder { return *this; } - matcher into(match_data& md) && - { - return matcher{ - this->mb_code, - this->mb_input, - md, - }; - } + matcher into(match_data& md) &&; template Result for_each(F func) &&; @@ -274,6 +269,10 @@ public: std::vector get_captures() const; + uint32_t get_match_data_capacity() const { + return this->p_match_proto.md_ovector_count; + } + match_data create_match_data() const; capture_builder capture_from(string_fragment in) const diff --git a/src/regex101.import.cc b/src/regex101.import.cc index e21078ad..c0e29845 100644 --- a/src/regex101.import.cc +++ b/src/regex101.import.cc @@ -54,7 +54,7 @@ regex101::import(const std::string& url, { static const auto USER_URL = lnav::pcre2pp::code::from_const( R"(^https://regex101.com/r/(\w+)(?:/(\d+))?)"); - static thread_local auto URL_MATCH_DATA = USER_URL.create_match_data(); + static thread_local auto md = lnav::pcre2pp::match_data::unitialized(); static const auto NAME_RE = lnav::pcre2pp::code::from_const(R"(^\w+$)"); if (url.empty()) { @@ -100,10 +100,8 @@ regex101::import(const std::string& url, .append("^ matched up to here"_comment))); } - auto user_find_res = USER_URL.capture_from(url) - .into(URL_MATCH_DATA) - .matches() - .ignore_error(); + auto user_find_res + = USER_URL.capture_from(url).into(md).matches().ignore_error(); if (!user_find_res) { auto partial_len = USER_URL.match_partial(url); return Err(lnav::console::user_message::error( @@ -118,7 +116,7 @@ regex101::import(const std::string& url, .append("^ matched up to here"_comment))); } - auto permalink = URL_MATCH_DATA[1]->to_string(); + auto permalink = md[1]->to_string(); auto format_filename = existing_format ? fmt::format(FMT_STRING("{}.regex101-{}.json"), name, permalink) diff --git a/src/relative_time.cc b/src/relative_time.cc index ef1d9e02..e6f11180 100644 --- a/src/relative_time.cc +++ b/src/relative_time.cc @@ -294,8 +294,9 @@ relative_time::from_str(string_fragment str) bool found = false; for (int lpc = 0; lpc < RTT__MAX && !found; lpc++) { + static thread_local auto md = lnav::pcre2pp::match_data::unitialized(); + token_t token = (token_t) lpc; - auto md = MATCHERS[lpc].pcre.create_match_data(); auto match_res = MATCHERS[lpc] .pcre.capture_from(remaining) .into(md) diff --git a/src/text_anonymizer.cc b/src/text_anonymizer.cc index 0a2d37a8..32a7a130 100644 --- a/src/text_anonymizer.cc +++ b/src/text_anonymizer.cc @@ -467,9 +467,11 @@ text_anonymizer::next(string_fragment line) } else { static const auto ATTR_RE = lnav::pcre2pp::code::from_const(R"([\w\-]+=)"); + static thread_local auto md + = lnav::pcre2pp::match_data::unitialized(); + auto remaining = string_fragment::from_str_range( open_tag, space_index, open_tag.size()); - auto md = ATTR_RE.create_match_data(); retval += open_tag.substr(0, space_index + 1); while (!remaining.empty()) { diff --git a/src/yajlpp/yajlpp.cc b/src/yajlpp/yajlpp.cc index 14a31b70..486aabf7 100644 --- a/src/yajlpp/yajlpp.cc +++ b/src/yajlpp/yajlpp.cc @@ -224,7 +224,9 @@ json_path_handler_base::gen(yajlpp_gen_context& ygc, yajl_gen handle) const ygc.ygc_depth += 1; if (this->jph_obj_provider) { - auto md = this->jph_regex->create_match_data(); + static thread_local auto md + = lnav::pcre2pp::match_data::unitialized(); + auto find_res = this->jph_regex->capture_from(full_path) .into(md) .matches(); @@ -504,7 +506,9 @@ json_path_handler_base::walk( ypc.set_path(full_path).with_obj(root).update_callbacks(); if (this->jph_obj_provider) { - auto md = this->jph_regex->create_match_data(); + static thread_local auto md + = lnav::pcre2pp::match_data::unitialized(); + std::string full_path = lpath + "/"; if (!this->jph_regex->capture_from(full_path) @@ -707,7 +711,8 @@ yajlpp_parse_context::update_callbacks(const json_path_container* orig_handlers, auto path_frag = string_fragment::from_byte_range( this->ypc_path.data(), 1 + child_start, this->ypc_path.size() - 1); for (const auto& jph : handlers->jpc_children) { - auto md = jph.jph_regex->create_match_data(); + static thread_local auto md = lnav::pcre2pp::match_data::unitialized(); + if (jph.jph_regex->capture_from(path_frag) .into(md) .matches() @@ -945,8 +950,8 @@ yajlpp_parse_context::handle_unused_or_delete(void* ctx) if (!ypc->ypc_handler_stack.empty() && ypc->ypc_handler_stack.back()->jph_obj_deleter) { - auto& jph = ypc->ypc_handler_stack.back(); - auto md = jph->jph_regex->create_match_data(); + static thread_local auto md = lnav::pcre2pp::match_data::unitialized(); + auto key_start = ypc->ypc_path_index_stack.back(); auto path_frag = string_fragment::from_byte_range( ypc->ypc_path.data(), key_start + 1, ypc->ypc_path.size() - 1); diff --git a/test/expected/expected.am b/test/expected/expected.am index 4068e539..c64b5784 100644 --- a/test/expected/expected.am +++ b/test/expected/expected.am @@ -164,6 +164,8 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_cmds.sh_be1d9628fc447b6f17121d9457ea1602afe8f3f3.out \ $(srcdir)/%reldir%/test_cmds.sh_be3b7c5874b5f4d86cc230bd2f9802c98909e148.err \ $(srcdir)/%reldir%/test_cmds.sh_be3b7c5874b5f4d86cc230bd2f9802c98909e148.out \ + $(srcdir)/%reldir%/test_cmds.sh_bf4e7fad67e281beaa11b6e2b03a00b419c7c9b0.err \ + $(srcdir)/%reldir%/test_cmds.sh_bf4e7fad67e281beaa11b6e2b03a00b419c7c9b0.out \ $(srcdir)/%reldir%/test_cmds.sh_c01e10f7cae8d36fa79ae03be887cb5477025f6d.err \ $(srcdir)/%reldir%/test_cmds.sh_c01e10f7cae8d36fa79ae03be887cb5477025f6d.out \ $(srcdir)/%reldir%/test_cmds.sh_c2b4431dd0cc36c6201d263b727b3305e8cda6b1.err \ @@ -974,6 +976,8 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_sql_xml_func.sh_fefeb387ae14d4171225ea06cbbff3ec43990cf0.out \ $(srcdir)/%reldir%/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.err \ $(srcdir)/%reldir%/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.out \ + $(srcdir)/%reldir%/test_sql_yaml_func.sh_dc189d02e8979b7ed245d5d750f68b9965984699.err \ + $(srcdir)/%reldir%/test_sql_yaml_func.sh_dc189d02e8979b7ed245d5d750f68b9965984699.out \ $(srcdir)/%reldir%/test_text_file.sh_5b51b55dff7332c5bee2c9b797c401c5614d574a.err \ $(srcdir)/%reldir%/test_text_file.sh_5b51b55dff7332c5bee2c9b797c401c5614d574a.out \ $(srcdir)/%reldir%/test_text_file.sh_6a24078983cf1b7a80b6fb65d5186cd125498136.err \ diff --git a/test/expected/test_cmds.sh_bf4e7fad67e281beaa11b6e2b03a00b419c7c9b0.err b/test/expected/test_cmds.sh_bf4e7fad67e281beaa11b6e2b03a00b419c7c9b0.err new file mode 100644 index 00000000..07df1b2b --- /dev/null +++ b/test/expected/test_cmds.sh_bf4e7fad67e281beaa11b6e2b03a00b419c7c9b0.err @@ -0,0 +1,7 @@ +✘ error: invalid timestamp: 2022-06-16Tabc + reason: the leading part of the timestamp was matched, however, the trailing text “Tabc” was not + --> command-option:1 + | :goto 2022-06-16Tabc  + |  ^--^ unrecognized input  + = note: input matched time format “%Y-%m-%d” + = help: fix the timestamp or remove the trailing text diff --git a/test/expected/test_cmds.sh_bf4e7fad67e281beaa11b6e2b03a00b419c7c9b0.out b/test/expected/test_cmds.sh_bf4e7fad67e281beaa11b6e2b03a00b419c7c9b0.out new file mode 100644 index 00000000..e69de29b diff --git a/test/expected/test_sql_yaml_func.sh_dc189d02e8979b7ed245d5d750f68b9965984699.err b/test/expected/test_sql_yaml_func.sh_dc189d02e8979b7ed245d5d750f68b9965984699.err new file mode 100644 index 00000000..b7f3a79d --- /dev/null +++ b/test/expected/test_sql_yaml_func.sh_dc189d02e8979b7ed245d5d750f68b9965984699.err @@ -0,0 +1,4 @@ +✘ error: failed to parse YAML content + reason: closing ] not found + --> command-option:1 + | ;SELECT yaml_to_json('[abc')  diff --git a/test/expected/test_sql_yaml_func.sh_dc189d02e8979b7ed245d5d750f68b9965984699.out b/test/expected/test_sql_yaml_func.sh_dc189d02e8979b7ed245d5d750f68b9965984699.out new file mode 100644 index 00000000..e69de29b