mirror of
https://github.com/tstack/lnav
synced 2024-11-03 23:15:38 +00:00
[ansi_scrubber] handle unicode in overstrike code
This commit is contained in:
parent
9c8cc04a99
commit
5abd483029
@ -42,8 +42,8 @@
|
||||
static const pcrepp&
|
||||
ansi_regex()
|
||||
{
|
||||
static const pcrepp retval(
|
||||
"\x1b\\[([\\d=;\\?]*)([a-zA-Z])|(?:[^\x08]\x08[^\x08])+");
|
||||
static const pcrepp retval("\x1b\\[([\\d=;\\?]*)([a-zA-Z])|(?:\\X\x08\\X)+",
|
||||
PCRE_UTF8);
|
||||
|
||||
return retval;
|
||||
}
|
||||
@ -61,28 +61,35 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
|
||||
while (regex.match(context, pi, PCRE_NO_UTF8_CHECK)) {
|
||||
auto* caps = context.all();
|
||||
const auto sf = pi.get_string_fragment(caps);
|
||||
auto bs_index_res = sf.codepoint_to_byte_index(1);
|
||||
|
||||
if (sf.length() >= 3 && sf[1] == '\b') {
|
||||
if (sf.length() >= 3 && bs_index_res.isOk()
|
||||
&& sf[bs_index_res.unwrap()] == '\b')
|
||||
{
|
||||
ssize_t fill_index = sf.sf_begin;
|
||||
ssize_t erased_size = (sf.length() / 3) * 2;
|
||||
ssize_t output_size = sf.length() / 3;
|
||||
line_range bold_range;
|
||||
line_range ul_range;
|
||||
auto sub_sf = sf;
|
||||
|
||||
if (sa != nullptr) {
|
||||
shift_string_attrs(
|
||||
*sa, caps->c_begin + sf.length() / 3, -erased_size);
|
||||
sa->emplace_back(line_range{last_origin_offset_end,
|
||||
caps->c_begin + (int) output_size},
|
||||
SA_ORIGIN_OFFSET.value(origin_offset));
|
||||
}
|
||||
for (ssize_t triple_index = 0; triple_index < output_size;
|
||||
triple_index++)
|
||||
{
|
||||
char lhs = sf[triple_index * 3];
|
||||
char rhs = sf[triple_index * 3 + 2];
|
||||
while (!sub_sf.empty()) {
|
||||
auto lhs_opt = sub_sf.consume_codepoint();
|
||||
if (!lhs_opt) {
|
||||
break;
|
||||
}
|
||||
auto lhs_pair = lhs_opt.value();
|
||||
auto mid_opt = lhs_pair.second.consume_codepoint();
|
||||
if (!mid_opt) {
|
||||
break;
|
||||
}
|
||||
auto mid_pair = mid_opt.value();
|
||||
auto rhs_opt = mid_pair.second.consume_codepoint();
|
||||
if (!rhs_opt) {
|
||||
break;
|
||||
}
|
||||
auto rhs_pair = rhs_opt.value();
|
||||
sub_sf = rhs_pair.second;
|
||||
|
||||
if (lhs == '_' || rhs == '_') {
|
||||
if (lhs_pair.first == '_' || rhs_pair.first == '_') {
|
||||
if (sa != nullptr && bold_range.is_valid()) {
|
||||
sa->emplace_back(bold_range,
|
||||
VC_STYLE.value(text_attrs{A_BOLD}));
|
||||
@ -94,7 +101,11 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
|
||||
ul_range.lr_start = fill_index;
|
||||
ul_range.lr_end = fill_index + 1;
|
||||
}
|
||||
str[fill_index++] = lhs == '_' ? rhs : lhs;
|
||||
auto cp = lhs_pair.first == '_' ? rhs_pair.first
|
||||
: lhs_pair.first;
|
||||
ww898::utf::utf8::write(cp, [&str, &fill_index](auto ch) {
|
||||
str[fill_index++] = ch;
|
||||
});
|
||||
} else {
|
||||
if (sa != nullptr && ul_range.is_valid()) {
|
||||
sa->emplace_back(
|
||||
@ -107,10 +118,26 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
|
||||
bold_range.lr_start = fill_index;
|
||||
bold_range.lr_end = fill_index + 1;
|
||||
}
|
||||
str[fill_index++] = rhs;
|
||||
ww898::utf::utf8::write(lhs_pair.first,
|
||||
[&str, &fill_index](auto ch) {
|
||||
str[fill_index++] = ch;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
auto output_size = fill_index - sf.sf_begin;
|
||||
auto erased_size = sf.length() - output_size;
|
||||
|
||||
if (sa != nullptr) {
|
||||
#if 0
|
||||
shift_string_attrs(
|
||||
*sa, caps->c_begin + sf.length() / 3, -erased_size);
|
||||
#endif
|
||||
sa->emplace_back(line_range{last_origin_offset_end,
|
||||
caps->c_begin + (int) output_size},
|
||||
SA_ORIGIN_OFFSET.value(origin_offset));
|
||||
}
|
||||
|
||||
if (sa != nullptr && ul_range.is_valid()) {
|
||||
sa->emplace_back(ul_range,
|
||||
VC_STYLE.value(text_attrs{A_UNDERLINE}));
|
||||
|
@ -132,6 +132,17 @@ struct string_fragment {
|
||||
|
||||
char front() const { return this->sf_string[this->sf_begin]; }
|
||||
|
||||
uint32_t front_codepoint() const
|
||||
{
|
||||
size_t index = 0;
|
||||
try {
|
||||
return ww898::utf::utf8::read(
|
||||
[this, &index]() { return this->data()[index++]; });
|
||||
} catch (const std::runtime_error& e) {
|
||||
return this->data()[0];
|
||||
}
|
||||
}
|
||||
|
||||
char back() const { return this->sf_string[this->sf_end - 1]; }
|
||||
|
||||
iterator begin() const { return &this->sf_string[this->sf_begin]; }
|
||||
@ -140,6 +151,26 @@ struct string_fragment {
|
||||
|
||||
bool empty() const { return !this->is_valid() || length() == 0; }
|
||||
|
||||
Result<ssize_t, const char*> codepoint_to_byte_index(ssize_t cp_index) const
|
||||
{
|
||||
ssize_t retval = 0;
|
||||
|
||||
while (cp_index > 0) {
|
||||
if (retval >= this->length()) {
|
||||
return Err("index is beyond the end of the string");
|
||||
}
|
||||
auto ch_len = TRY(ww898::utf::utf8::char_size([this, retval]() {
|
||||
return std::make_pair(this->data()[retval],
|
||||
this->length() - retval - 1);
|
||||
}));
|
||||
|
||||
retval += ch_len;
|
||||
cp_index -= 1;
|
||||
}
|
||||
|
||||
return Ok(retval);
|
||||
}
|
||||
|
||||
char operator[](int index) const
|
||||
{
|
||||
return this->sf_string[sf_begin + index];
|
||||
@ -276,6 +307,19 @@ struct string_fragment {
|
||||
.find_right_boundary(0, predicate);
|
||||
}
|
||||
|
||||
nonstd::optional<std::pair<uint32_t, string_fragment>> consume_codepoint()
|
||||
const
|
||||
{
|
||||
auto cp = this->front_codepoint();
|
||||
auto index_res = this->codepoint_to_byte_index(1);
|
||||
|
||||
if (index_res.isErr()) {
|
||||
return nonstd::nullopt;
|
||||
}
|
||||
|
||||
return std::make_pair(cp, this->substr(index_res.unwrap()));
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
nonstd::optional<string_fragment> consume(P predicate) const
|
||||
{
|
||||
|
@ -184,6 +184,8 @@ EXPECTED_FILES = \
|
||||
$(srcdir)/%reldir%/test_cmds.sh_d76d77ad95b9f120825417a6a8220c13df9541fc.out \
|
||||
$(srcdir)/%reldir%/test_cmds.sh_d7eebacdcf2cb194f25fa4ef97b7b5376b442467.err \
|
||||
$(srcdir)/%reldir%/test_cmds.sh_d7eebacdcf2cb194f25fa4ef97b7b5376b442467.out \
|
||||
$(srcdir)/%reldir%/test_cmds.sh_d836c84398c831c976df46f46fe3bf5983c44c37.err \
|
||||
$(srcdir)/%reldir%/test_cmds.sh_d836c84398c831c976df46f46fe3bf5983c44c37.out \
|
||||
$(srcdir)/%reldir%/test_cmds.sh_d8eeef53a58bdeddbc1028d7c525413e3ca1c8df.err \
|
||||
$(srcdir)/%reldir%/test_cmds.sh_d8eeef53a58bdeddbc1028d7c525413e3ca1c8df.out \
|
||||
$(srcdir)/%reldir%/test_cmds.sh_dbdd62995fdefc8318053af05a32416eccfa79fc.err \
|
||||
|
@ -0,0 +1,2 @@
|
||||
[1m[4mlog_top_line() [0m
|
||||
51
|
@ -46,12 +46,15 @@ int
|
||||
main(int argc, char* argv[])
|
||||
{
|
||||
{
|
||||
std::string boldish = "h\bhe\bel\blo\bo _\ba_\bb_\bc a\b_ b";
|
||||
std::string boldish
|
||||
= "\u2022\b\u2022\u2023\b\u2023 h\bhe\bel\blo\bo _\ba_\bb_\bc a\b_ "
|
||||
"b";
|
||||
string_attrs_t sa;
|
||||
|
||||
sa.clear();
|
||||
scrub_ansi_string(boldish, &sa);
|
||||
assert(boldish == "helo abc a b");
|
||||
printf("boldish %s\n", boldish.c_str());
|
||||
assert(boldish == "\u2022\u2023 helo abc a b");
|
||||
for (const auto& attr : sa) {
|
||||
printf("attr %d:%d %s\n",
|
||||
attr.sa_range.lr_start,
|
||||
|
@ -6,8 +6,8 @@ run_cap_test ${lnav_test} -n \
|
||||
-c ":switch-to-view help" \
|
||||
${test_dir}/logfile_access_log.0
|
||||
|
||||
run_cap_test ${lnav_test} -n \
|
||||
-c ":goto 2011-11-02 17:19:39" \
|
||||
run_cap_test env TZ=UTC ${lnav_test} -n \
|
||||
-c ":goto 2011-11-03 00:19:39" \
|
||||
-c ";SELECT log_top_line()" \
|
||||
${test_dir}/logfile_bro_http.log.0
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user