mirror of
https://github.com/tstack/lnav
synced 2024-11-01 21:40:34 +00:00
[logfile] handle logs with ANSI escapes
This is a partial fix for handling ANSI escapes in parts of log messages that would prevent regexes from matching. Still more work to do. Related to #1057
This commit is contained in:
parent
8613ad4d47
commit
740b827901
@ -59,21 +59,26 @@
|
||||
`faulty_bytes` the number of actually existing bytes taking part in this
|
||||
error.
|
||||
*/
|
||||
ssize_t
|
||||
utf8_scan_result
|
||||
is_utf8(const unsigned char* str,
|
||||
size_t len,
|
||||
const char** message,
|
||||
int* faulty_bytes,
|
||||
nonstd::optional<unsigned char> terminator)
|
||||
{
|
||||
size_t i = 0;
|
||||
bool has_ansi = false;
|
||||
ssize_t i = 0;
|
||||
|
||||
*message = nullptr;
|
||||
*faulty_bytes = 0;
|
||||
while (i < len) {
|
||||
if (str[i] == '\x1b') {
|
||||
has_ansi = true;
|
||||
}
|
||||
|
||||
if (terminator && str[i] == terminator.value()) {
|
||||
*message = nullptr;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
|
||||
if (str[i] <= 0x7F) /* 00..7F */ {
|
||||
@ -85,14 +90,14 @@ is_utf8(const unsigned char* str,
|
||||
= "After a first byte between C2 and DF, expecting a "
|
||||
"2nd byte between 80 and BF";
|
||||
*faulty_bytes = 2;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
} else {
|
||||
*message
|
||||
= "After a first byte between C2 and DF, expecting a 2nd "
|
||||
"byte.";
|
||||
*faulty_bytes = 1;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
i += 2;
|
||||
} else if (str[i] == 0xE0) /* E0 A0..BF 80..BF */ {
|
||||
@ -102,21 +107,21 @@ is_utf8(const unsigned char* str,
|
||||
= "After a first byte of E0, expecting a 2nd byte "
|
||||
"between A0 and BF.";
|
||||
*faulty_bytes = 2;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) {
|
||||
*message
|
||||
= "After a first byte of E0, expecting a 3nd byte "
|
||||
"between 80 and BF.";
|
||||
*faulty_bytes = 3;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
} else {
|
||||
*message
|
||||
= "After a first byte of E0, expecting two following "
|
||||
"bytes.";
|
||||
*faulty_bytes = 1;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
i += 3;
|
||||
} else if (str[i] >= 0xE1 && str[i] <= 0xEC) /* E1..EC 80..BF 80..BF */
|
||||
@ -127,21 +132,21 @@ is_utf8(const unsigned char* str,
|
||||
= "After a first byte between E1 and EC, expecting the "
|
||||
"2nd byte between 80 and BF.";
|
||||
*faulty_bytes = 2;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) {
|
||||
*message
|
||||
= "After a first byte between E1 and EC, expecting the "
|
||||
"3rd byte between 80 and BF.";
|
||||
*faulty_bytes = 3;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
} else {
|
||||
*message
|
||||
= "After a first byte between E1 and EC, expecting two "
|
||||
"following bytes.";
|
||||
*faulty_bytes = 1;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
i += 3;
|
||||
} else if (str[i] == 0xED) /* ED 80..9F 80..BF */ {
|
||||
@ -151,21 +156,21 @@ is_utf8(const unsigned char* str,
|
||||
= "After a first byte of ED, expecting 2nd byte "
|
||||
"between 80 and 9F.";
|
||||
*faulty_bytes = 2;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) {
|
||||
*message
|
||||
= "After a first byte of ED, expecting 3rd byte "
|
||||
"between 80 and BF.";
|
||||
*faulty_bytes = 3;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
} else {
|
||||
*message
|
||||
= "After a first byte of ED, expecting two following "
|
||||
"bytes.";
|
||||
*faulty_bytes = 1;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
i += 3;
|
||||
} else if (str[i] >= 0xEE && str[i] <= 0xEF) /* EE..EF 80..BF 80..BF */
|
||||
@ -176,21 +181,21 @@ is_utf8(const unsigned char* str,
|
||||
= "After a first byte between EE and EF, expecting 2nd "
|
||||
"byte between 80 and BF.";
|
||||
*faulty_bytes = 2;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) {
|
||||
*message
|
||||
= "After a first byte between EE and EF, expecting 3rd "
|
||||
"byte between 80 and BF.";
|
||||
*faulty_bytes = 3;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
} else {
|
||||
*message
|
||||
= "After a first byte between EE and EF, two following "
|
||||
"bytes.";
|
||||
*faulty_bytes = 1;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
i += 3;
|
||||
} else if (str[i] == 0xF0) /* F0 90..BF 80..BF 80..BF */ {
|
||||
@ -200,60 +205,61 @@ is_utf8(const unsigned char* str,
|
||||
= "After a first byte of F0, expecting 2nd byte "
|
||||
"between 90 and BF.";
|
||||
*faulty_bytes = 2;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) {
|
||||
*message
|
||||
= "After a first byte of F0, expecting 3rd byte "
|
||||
"between 80 and BF.";
|
||||
*faulty_bytes = 3;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
if (str[i + 3] < 0x80 || str[i + 3] > 0xBF) {
|
||||
*message
|
||||
= "After a first byte of F0, expecting 4th byte "
|
||||
"between 80 and BF.";
|
||||
*faulty_bytes = 4;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
} else {
|
||||
*message
|
||||
= "After a first byte of F0, expecting three following "
|
||||
"bytes.";
|
||||
*faulty_bytes = 1;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
i += 4;
|
||||
} else if (str[i] >= 0xF1
|
||||
&& str[i] <= 0xF3) /* F1..F3 80..BF 80..BF 80..BF */ {
|
||||
&& str[i] <= 0xF3) /* F1..F3 80..BF 80..BF 80..BF */
|
||||
{
|
||||
if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */ {
|
||||
if (str[i + 1] < 0x80 || str[i + 1] > 0xBF) {
|
||||
*message
|
||||
= "After a first byte of F1, F2, or F3, expecting a "
|
||||
"2nd byte between 80 and BF.";
|
||||
*faulty_bytes = 2;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) {
|
||||
*message
|
||||
= "After a first byte of F1, F2, or F3, expecting a "
|
||||
"3rd byte between 80 and BF.";
|
||||
*faulty_bytes = 3;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
if (str[i + 3] < 0x80 || str[i + 3] > 0xBF) {
|
||||
*message
|
||||
= "After a first byte of F1, F2, or F3, expecting a "
|
||||
"4th byte between 80 and BF.";
|
||||
*faulty_bytes = 4;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
} else {
|
||||
*message
|
||||
= "After a first byte of F1, F2, or F3, expecting three "
|
||||
"following bytes.";
|
||||
*faulty_bytes = 1;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
i += 4;
|
||||
} else if (str[i] == 0xF4) /* F4 80..8F 80..BF 80..BF */ {
|
||||
@ -263,36 +269,36 @@ is_utf8(const unsigned char* str,
|
||||
= "After a first byte of F4, expecting 2nd byte "
|
||||
"between 80 and 8F.";
|
||||
*faulty_bytes = 2;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) {
|
||||
*message
|
||||
= "After a first byte of F4, expecting 3rd byte "
|
||||
"between 80 and BF.";
|
||||
*faulty_bytes = 3;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
if (str[i + 3] < 0x80 || str[i + 3] > 0xBF) {
|
||||
*message
|
||||
= "After a first byte of F4, expecting 4th byte "
|
||||
"between 80 and BF.";
|
||||
*faulty_bytes = 4;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
} else {
|
||||
*message
|
||||
= "After a first byte of F4, expecting three following "
|
||||
"bytes.";
|
||||
*faulty_bytes = 1;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
i += 4;
|
||||
} else {
|
||||
*message
|
||||
= "Expecting bytes in the following ranges: 00..7F C2..F4.";
|
||||
*faulty_bytes = 1;
|
||||
return i;
|
||||
return {i, has_ansi};
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
return {-1, has_ansi};
|
||||
}
|
||||
|
@ -33,10 +33,16 @@
|
||||
|
||||
#include "optional.hpp"
|
||||
|
||||
ssize_t is_utf8(const unsigned char* str,
|
||||
struct utf8_scan_result {
|
||||
ssize_t usr_end{0};
|
||||
bool usr_has_ansi{false};
|
||||
};
|
||||
|
||||
utf8_scan_result is_utf8(const unsigned char* str,
|
||||
size_t len,
|
||||
const char** message,
|
||||
int* faulty_bytes,
|
||||
nonstd::optional<unsigned char> terminator = nonstd::nullopt);
|
||||
nonstd::optional<unsigned char> terminator
|
||||
= nonstd::nullopt);
|
||||
|
||||
#endif /* _IS_UTF8_H */
|
||||
|
@ -45,14 +45,14 @@ scrub_to_utf8(char* buffer, size_t length)
|
||||
int faulty_bytes;
|
||||
|
||||
while (true) {
|
||||
ssize_t utf8_end
|
||||
auto scan_res
|
||||
= is_utf8((unsigned char*) buffer, length, &msg, &faulty_bytes);
|
||||
|
||||
if (msg == nullptr) {
|
||||
break;
|
||||
}
|
||||
for (int lpc = 0; lpc < faulty_bytes; lpc++) {
|
||||
buffer[utf8_end + lpc] = '?';
|
||||
buffer[scan_res.usr_end + lpc] = '?';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
#define HAVE_LIBCURL
|
||||
|
||||
#cmakedefine SIZEOF_OFF_T @SIZEOF_OFF_T @
|
||||
#cmakedefine SIZEOF_OFF_T @SIZEOF_OFF_T@
|
||||
|
||||
#cmakedefine VCS_PACKAGE_STRING "@VCS_PACKAGE_STRING@"
|
||||
|
||||
|
@ -90,7 +90,7 @@ data_parser::pairup(data_parser::schema_id_t* schema,
|
||||
} else if (iter->e_token == in_list.el_format.df_qualifier) {
|
||||
value.SPLICE(
|
||||
value.end(), key_comps, key_comps.begin(), key_comps.end());
|
||||
strip(value, element_if(DT_WHITE));
|
||||
strip(value, element_is_space{});
|
||||
if (!value.empty()) {
|
||||
el_stack.PUSH_BACK(element(value, DNT_VALUE));
|
||||
}
|
||||
@ -119,7 +119,7 @@ data_parser::pairup(data_parser::schema_id_t* schema,
|
||||
key_comps.begin(),
|
||||
key_iter);
|
||||
key_comps.POP_FRONT();
|
||||
strip(key_comps, element_if(DT_WHITE));
|
||||
strip(key_comps, element_is_space{});
|
||||
if (key_comps.empty()) {
|
||||
key_iter = key_comps.end();
|
||||
} else {
|
||||
@ -160,12 +160,12 @@ data_parser::pairup(data_parser::schema_id_t* schema,
|
||||
key_comps.resize(1);
|
||||
}
|
||||
|
||||
strip(value, element_if(DT_WHITE));
|
||||
strip(value, element_is_space{});
|
||||
value.remove_if(element_if(DT_COMMA));
|
||||
if (!value.empty()) {
|
||||
el_stack.PUSH_BACK(element(value, DNT_VALUE));
|
||||
}
|
||||
strip(key_comps, element_if(DT_WHITE));
|
||||
strip(key_comps, element_is_space{});
|
||||
if (!key_comps.empty()) {
|
||||
if (key_is_values) {
|
||||
el_stack.PUSH_BACK(element(key_comps, DNT_VALUE));
|
||||
@ -531,8 +531,8 @@ data_parser::end_of_value(data_parser::element_list_t& el_stack,
|
||||
key_comps.remove_if(element_if(DT_COMMA));
|
||||
value.remove_if(element_if(in_list.el_format.df_terminator));
|
||||
value.remove_if(element_if(DT_COMMA));
|
||||
strip(key_comps, element_if(DT_WHITE));
|
||||
strip(value, element_if(DT_WHITE));
|
||||
strip(key_comps, element_is_space{});
|
||||
strip(value, element_is_space{});
|
||||
if ((el_stack.empty() || el_stack.back().e_token != DNT_KEY)
|
||||
&& value.empty() && key_comps.size() > 1
|
||||
&& (key_comps.front().e_token == DT_WORD
|
||||
@ -550,7 +550,9 @@ data_parser::end_of_value(data_parser::element_list_t& el_stack,
|
||||
if (found_value) {
|
||||
key_end = key_comps.begin();
|
||||
}
|
||||
} else if (key_iter->e_token == DT_WHITE) {
|
||||
} else if (key_iter->e_token == DT_WHITE
|
||||
|| key_iter->e_token == DT_CSI)
|
||||
{
|
||||
} else {
|
||||
if (!found_value) {
|
||||
key_end = key_iter;
|
||||
@ -562,7 +564,7 @@ data_parser::end_of_value(data_parser::element_list_t& el_stack,
|
||||
key_end = key_comps.begin();
|
||||
}
|
||||
value.SPLICE(value.end(), key_comps, key_end, key_comps.end());
|
||||
strip(key_comps, element_if(DT_WHITE));
|
||||
strip(key_comps, element_is_space{});
|
||||
if (!key_comps.empty()) {
|
||||
el_stack.PUSH_BACK(element(key_comps, DNT_KEY, false));
|
||||
}
|
||||
@ -571,9 +573,9 @@ data_parser::end_of_value(data_parser::element_list_t& el_stack,
|
||||
value.SPLICE(
|
||||
value.end(), key_comps, key_comps.begin(), key_comps.end());
|
||||
}
|
||||
strip(value, element_if(DT_WHITE));
|
||||
strip(value, element_is_space{});
|
||||
strip(value, element_if(DT_COLON));
|
||||
strip(value, element_if(DT_WHITE));
|
||||
strip(value, element_is_space{});
|
||||
if (!value.empty()) {
|
||||
if (value.size() == 2 && value.back().e_token == DNT_GROUP) {
|
||||
element_list_t ELEMENT_LIST_T(group_pair);
|
||||
@ -681,6 +683,7 @@ dfs_prefix_next(data_format_state_t state, data_token_t next_token)
|
||||
case DT_HEX_NUMBER:
|
||||
case DT_NUMBER:
|
||||
case DT_WHITE:
|
||||
case DT_CSI:
|
||||
case DT_LSQUARE:
|
||||
case DT_RSQUARE:
|
||||
case DT_LANGLE:
|
||||
|
@ -77,7 +77,9 @@ struct data_format {
|
||||
data_token_t terminator = DT_INVALID) noexcept
|
||||
: df_name(name), df_appender(appender), df_terminator(terminator),
|
||||
df_qualifier(DT_INVALID), df_separator(DT_COLON),
|
||||
df_prefix_terminator(DT_INVALID){};
|
||||
df_prefix_terminator(DT_INVALID)
|
||||
{
|
||||
}
|
||||
|
||||
const char* df_name;
|
||||
data_token_t df_appender;
|
||||
@ -234,7 +236,7 @@ public:
|
||||
int group_depth = -1;
|
||||
|
||||
LIST_INIT_TRACE;
|
||||
};
|
||||
}
|
||||
|
||||
element_list_t(const element_list_t& other) : std::list<element>(other)
|
||||
{
|
||||
@ -247,7 +249,7 @@ public:
|
||||
int line = __LINE__;
|
||||
|
||||
LIST_DEINIT_TRACE;
|
||||
};
|
||||
}
|
||||
|
||||
void push_front(const element& elem, const char* fn, int line)
|
||||
{
|
||||
@ -255,7 +257,7 @@ public:
|
||||
|
||||
require(elem.e_capture.c_end >= -1);
|
||||
this->std::list<element>::push_front(elem);
|
||||
};
|
||||
}
|
||||
|
||||
void push_back(const element& elem, const char* fn, int line)
|
||||
{
|
||||
@ -263,28 +265,28 @@ public:
|
||||
|
||||
require(elem.e_capture.c_end >= -1);
|
||||
this->std::list<element>::push_back(elem);
|
||||
};
|
||||
}
|
||||
|
||||
void pop_front(const char* fn, int line)
|
||||
{
|
||||
LIST_TRACE;
|
||||
|
||||
this->std::list<element>::pop_front();
|
||||
};
|
||||
}
|
||||
|
||||
void pop_back(const char* fn, int line)
|
||||
{
|
||||
LIST_TRACE;
|
||||
|
||||
this->std::list<element>::pop_back();
|
||||
};
|
||||
}
|
||||
|
||||
void clear2(const char* fn, int line)
|
||||
{
|
||||
LIST_TRACE;
|
||||
|
||||
this->std::list<element>::clear();
|
||||
};
|
||||
}
|
||||
|
||||
void swap(element_list_t& other, const char* fn, int line)
|
||||
{
|
||||
@ -345,26 +347,33 @@ public:
|
||||
bool operator()(data_token_t token, const element& elem) const
|
||||
{
|
||||
return token == elem.e_token || token == DT_ANY;
|
||||
};
|
||||
}
|
||||
|
||||
bool operator()(const element& elem, data_token_t token) const
|
||||
{
|
||||
return (*this)(token, elem);
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
struct element_if {
|
||||
element_if(data_token_t token) : ei_token(token){};
|
||||
element_if(data_token_t token) : ei_token(token) {}
|
||||
|
||||
bool operator()(const element& a) const
|
||||
{
|
||||
return a.e_token == this->ei_token;
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
data_token_t ei_token;
|
||||
};
|
||||
|
||||
struct element_is_space {
|
||||
bool operator()(const element& el) const
|
||||
{
|
||||
return el.e_token == DT_WHITE || el.e_token == DT_CSI;
|
||||
}
|
||||
};
|
||||
|
||||
struct discover_format_state {
|
||||
discover_format_state();
|
||||
|
||||
@ -418,4 +427,5 @@ public:
|
||||
private:
|
||||
data_scanner* dp_scanner;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -185,6 +185,9 @@ static struct {
|
||||
{
|
||||
"escc",
|
||||
},
|
||||
{
|
||||
"csi",
|
||||
},
|
||||
|
||||
{
|
||||
"gbg",
|
||||
|
@ -93,6 +93,7 @@ enum data_token_t {
|
||||
DT_WHITE,
|
||||
DT_DOT,
|
||||
DT_ESCAPED_CHAR,
|
||||
DT_CSI,
|
||||
|
||||
DT_GARBAGE,
|
||||
|
||||
|
42054
src/data_scanner_re.cc
42054
src/data_scanner_re.cc
File diff suppressed because it is too large
Load Diff
@ -114,6 +114,7 @@ nonstd::optional<data_scanner::tokenize_result> data_scanner::tokenize2()
|
||||
|
||||
SPACE = [ \t\r];
|
||||
ALPHA = [a-zA-Z];
|
||||
ESC = "\x1b";
|
||||
NUM = [0-9];
|
||||
ALPHANUM = [a-zA-Z0-9_];
|
||||
EOF = "\x00";
|
||||
@ -137,7 +138,7 @@ nonstd::optional<data_scanner::tokenize_result> data_scanner::tokenize2()
|
||||
|
||||
EOF { return nonstd::nullopt; }
|
||||
|
||||
("u"|"r")?'"'('\\'.|[^\x00"\\]|'""')*'"' {
|
||||
("u"|"r")?'"'('\\'.|[^\x00\x1b"\\]|'""')*'"' {
|
||||
CAPTURE(DT_QUOTED_STRING);
|
||||
switch (this->ds_input[cap_inner.c_begin]) {
|
||||
case 'u':
|
||||
@ -152,7 +153,7 @@ nonstd::optional<data_scanner::tokenize_result> data_scanner::tokenize2()
|
||||
[a-qstv-zA-QSTV-Z]"'" {
|
||||
CAPTURE(DT_WORD);
|
||||
}
|
||||
("u"|"r")?"'"('\\'.|"''"|[^\x00'\\])*"'"/[^sS] {
|
||||
("u"|"r")?"'"('\\'.|"''"|[^\x00\x1b'\\])*"'"/[^sS] {
|
||||
CAPTURE(DT_QUOTED_STRING);
|
||||
switch (this->ds_input[cap_inner.c_begin]) {
|
||||
case 'u':
|
||||
@ -164,7 +165,7 @@ nonstd::optional<data_scanner::tokenize_result> data_scanner::tokenize2()
|
||||
cap_inner.c_end -= 1;
|
||||
return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()};
|
||||
}
|
||||
[a-zA-Z0-9]+":/""/"?[^\x00\r\n\t '"[\](){}]+[/a-zA-Z0-9\-=&?%] { RET(DT_URL); }
|
||||
[a-zA-Z0-9]+":/""/"?[^\x00\x1b\r\n\t '"[\](){}]+[/a-zA-Z0-9\-=&?%] { RET(DT_URL); }
|
||||
("/"|"./"|"../"|[A-Z]":\\"|"\\\\")("Program Files"(" (x86)")?)?[a-zA-Z0-9_\.\-\~/\\!@#$%^&*()]* { RET(DT_PATH); }
|
||||
(SPACE|NUM)NUM":"NUM{2}/[^:] { RET(DT_TIME); }
|
||||
(SPACE|NUM)NUM?":"NUM{2}":"NUM{2}("."NUM{3,6})?/[^:] { RET(DT_TIME); }
|
||||
@ -200,6 +201,10 @@ nonstd::optional<data_scanner::tokenize_result> data_scanner::tokenize2()
|
||||
RET(DT_H1);
|
||||
}
|
||||
|
||||
ESC"["[0-9=;?]*[a-zA-Z] {
|
||||
RET(DT_CSI);
|
||||
}
|
||||
|
||||
":" { RET(DT_COLON); }
|
||||
"=" { RET(DT_EQUALS); }
|
||||
"," { RET(DT_COMMA); }
|
||||
@ -237,7 +242,7 @@ nonstd::optional<data_scanner::tokenize_result> data_scanner::tokenize2()
|
||||
|
||||
("re-")?[a-zA-Z][a-z']+/([\r\n\t \(\)!\*:;'\"\?,]|[\.\!,\?]SPACE|EOF) { RET(DT_WORD); }
|
||||
|
||||
[^\x00"; \t\r\n:=,\(\)\{\}\[\]\+#!%\^&\*'\?<>\~`\|\.\\][^\x00"; \t\r\n:=,\(\)\{\}\[\]\+#!%\^&\*'\?<>\~`\|\\]*("::"[^\x00"; \r\n\t:=,\(\)\{\}\[\]\+#!%\^&\*'\?<>\~`\|\\]+)* {
|
||||
[^\x00\x1b"; \t\r\n:=,\(\)\{\}\[\]\+#!%\^&\*'\?<>\~`\|\.\\][^\x00\x1b"; \t\r\n:=,\(\)\{\}\[\]\+#!%\^&\*'\?<>\~`\|\\]*("::"[^\x00\x1b"; \r\n\t:=,\(\)\{\}\[\]\+#!%\^&\*'\?<>\~`\|\\]+)* {
|
||||
RET(DT_SYMBOL);
|
||||
}
|
||||
|
||||
|
@ -651,21 +651,22 @@ line_buffer::load_next_buffer()
|
||||
|
||||
auto before = line_start - this->lb_alt_buffer->begin();
|
||||
auto remaining = this->lb_alt_buffer.value().size() - before;
|
||||
auto utf8_end = is_utf8((unsigned char*) line_start,
|
||||
auto utf_scan_res = is_utf8((unsigned char*) line_start,
|
||||
remaining,
|
||||
&msg,
|
||||
&faulty_bytes,
|
||||
'\n');
|
||||
if (msg != nullptr) {
|
||||
lf = (char*) memchr(line_start, '\n', remaining);
|
||||
utf8_end = lf - line_start;
|
||||
utf_scan_res.usr_end = lf - line_start;
|
||||
valid_utf = false;
|
||||
}
|
||||
if (utf8_end >= 0) {
|
||||
lf = line_start + utf8_end;
|
||||
if (utf_scan_res.usr_end >= 0) {
|
||||
lf = line_start + utf_scan_res.usr_end;
|
||||
}
|
||||
this->lb_alt_line_starts.emplace_back(before);
|
||||
this->lb_alt_line_is_utf.emplace_back(valid_utf);
|
||||
this->lb_alt_line_has_ansi.emplace_back(utf_scan_res.usr_has_ansi);
|
||||
|
||||
if (lf != nullptr) {
|
||||
line_start = lf + 1;
|
||||
@ -727,6 +728,8 @@ line_buffer::fill_range(file_off_t start, ssize_t max_length)
|
||||
this->lb_alt_line_starts.clear();
|
||||
this->lb_line_is_utf = std::move(this->lb_alt_line_is_utf);
|
||||
this->lb_alt_line_is_utf.clear();
|
||||
this->lb_line_has_ansi = std::move(this->lb_alt_line_has_ansi);
|
||||
this->lb_alt_line_has_ansi.clear();
|
||||
this->lb_stats.s_used_preloads += 1;
|
||||
}
|
||||
if (this->in_range(start) && this->in_range(start + max_length - 1)) {
|
||||
@ -1045,7 +1048,7 @@ line_buffer::load_next_line(file_range prev_line)
|
||||
const char* msg;
|
||||
int faulty_bytes;
|
||||
|
||||
utf8_end = is_utf8((unsigned char*) line_start,
|
||||
auto scan_res = is_utf8((unsigned char*) line_start,
|
||||
retval.li_file_range.fr_size,
|
||||
&msg,
|
||||
&faulty_bytes,
|
||||
@ -1055,7 +1058,10 @@ line_buffer::load_next_line(file_range prev_line)
|
||||
line_start, '\n', retval.li_file_range.fr_size);
|
||||
utf8_end = lf - line_start;
|
||||
retval.li_valid_utf = false;
|
||||
} else {
|
||||
utf8_end = scan_res.usr_end;
|
||||
}
|
||||
retval.li_has_ansi = scan_res.usr_has_ansi;
|
||||
}
|
||||
|
||||
if (utf8_end >= 0) {
|
||||
|
@ -54,6 +54,7 @@ struct line_info {
|
||||
file_range li_file_range;
|
||||
bool li_partial{false};
|
||||
bool li_valid_utf{true};
|
||||
bool li_has_ansi{false};
|
||||
};
|
||||
|
||||
/**
|
||||
@ -320,6 +321,7 @@ private:
|
||||
nonstd::optional<auto_buffer> lb_alt_buffer;
|
||||
std::vector<uint32_t> lb_alt_line_starts;
|
||||
std::vector<bool> lb_alt_line_is_utf;
|
||||
std::vector<bool> lb_alt_line_has_ansi;
|
||||
std::future<bool> lb_loader_future;
|
||||
nonstd::optional<file_off_t> lb_loader_file_offset;
|
||||
|
||||
@ -342,6 +344,7 @@ private:
|
||||
|
||||
std::vector<uint32_t> lb_line_starts;
|
||||
std::vector<bool> lb_line_is_utf;
|
||||
std::vector<bool> lb_line_has_ansi;
|
||||
stats lb_stats;
|
||||
|
||||
nonstd::optional<auto_fd> lb_cached_fd;
|
||||
|
@ -129,7 +129,7 @@ log_data_helper::parse_line(content_line_t line, bool allow_middle)
|
||||
pugi::xpath_query query("//*");
|
||||
auto node_set = doc.select_nodes(query);
|
||||
|
||||
for (auto& xpath_node : node_set) {
|
||||
for (const auto& xpath_node : node_set) {
|
||||
auto node_path = lnav::pugixml::get_actual_path(
|
||||
xpath_node.node());
|
||||
for (auto& attr : xpath_node.node().attributes()) {
|
||||
@ -175,10 +175,8 @@ log_data_helper::get_line_bounds(size_t& line_index_out,
|
||||
|
||||
line_end_index_out = 0;
|
||||
do {
|
||||
const char* line_end;
|
||||
|
||||
line_index_out = line_end_index_out;
|
||||
line_end = (const char*) memchr(
|
||||
const auto* line_end = (const char*) memchr(
|
||||
this->ldh_line_values.lvv_sbr.get_data() + line_index_out + 1,
|
||||
'\n',
|
||||
this->ldh_line_values.lvv_sbr.length() - line_index_out - 1);
|
||||
|
@ -194,6 +194,10 @@ public:
|
||||
|
||||
bool is_valid_utf() const { return this->ll_valid_utf; }
|
||||
|
||||
void set_has_ansi(bool v) { this->ll_has_ansi = v; }
|
||||
|
||||
bool has_ansi() const { return this->ll_has_ansi; }
|
||||
|
||||
/** @param l The logging level. */
|
||||
void set_level(log_level_t l) { this->ll_level = l; };
|
||||
|
||||
@ -293,7 +297,8 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
file_off_t ll_offset;
|
||||
file_off_t ll_offset : 63;
|
||||
uint8_t ll_has_ansi : 1;
|
||||
time_t ll_time;
|
||||
unsigned int ll_millis : 10;
|
||||
unsigned int ll_opid : 6;
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include <sys/stat.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "base/ansi_scrubber.hh"
|
||||
#include "base/fs_util.hh"
|
||||
#include "base/injector.hh"
|
||||
#include "base/string_util.hh"
|
||||
@ -309,6 +310,7 @@ logfile::process_prefix(shared_buffer_ref& sbr,
|
||||
case log_format::SCAN_MATCH: {
|
||||
if (!this->lf_index.empty()) {
|
||||
this->lf_index.back().set_valid_utf(li.li_valid_utf);
|
||||
this->lf_index.back().set_has_ansi(li.li_has_ansi);
|
||||
}
|
||||
if (prescan_size > 0 && this->lf_index.size() >= prescan_size
|
||||
&& prescan_time != this->lf_index[prescan_size - 1].get_time())
|
||||
@ -369,6 +371,7 @@ logfile::process_prefix(shared_buffer_ref& sbr,
|
||||
last_mod,
|
||||
last_opid);
|
||||
this->lf_index.back().set_valid_utf(li.li_valid_utf);
|
||||
this->lf_index.back().set_has_ansi(li.li_has_ansi);
|
||||
break;
|
||||
}
|
||||
case log_format::SCAN_INCOMPLETE:
|
||||
@ -582,6 +585,17 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline)
|
||||
|
||||
auto sbr = read_result.unwrap();
|
||||
sbr.rtrim(is_line_ending);
|
||||
|
||||
if (li.li_has_ansi) {
|
||||
auto tmp_line = sbr.to_string_fragment().to_string();
|
||||
|
||||
scrub_ansi_string(tmp_line, nullptr);
|
||||
memcpy(sbr.get_writable_data(),
|
||||
tmp_line.c_str(),
|
||||
tmp_line.length());
|
||||
sbr.narrow(0, tmp_line.length());
|
||||
}
|
||||
|
||||
this->lf_longest_line
|
||||
= std::max(this->lf_longest_line, sbr.length());
|
||||
this->lf_partial_line = li.li_partial;
|
||||
|
@ -197,6 +197,9 @@ logfile_sub_source::text_value_for_line(textview_curses& tc,
|
||||
= this->lss_token_file->read_line(this->lss_token_line)
|
||||
.map([](auto sbr) { return to_string(sbr); })
|
||||
.unwrapOr({});
|
||||
if (this->lss_token_line->has_ansi()) {
|
||||
scrub_ansi_string(this->lss_token_value, &this->lss_token_attrs);
|
||||
}
|
||||
}
|
||||
this->lss_token_shift_start = 0;
|
||||
this->lss_token_shift_size = 0;
|
||||
|
@ -267,14 +267,14 @@ parse(const string_fragment& sf, event_handler& eh)
|
||||
const char* utf8_errmsg = nullptr;
|
||||
int utf8_faulty_bytes = 0;
|
||||
|
||||
auto utf8_erroff = is_utf8((unsigned char*) sf.data(),
|
||||
auto scan_res = is_utf8((unsigned char*) sf.data(),
|
||||
sf.length(),
|
||||
&utf8_errmsg,
|
||||
&utf8_faulty_bytes);
|
||||
if (utf8_errmsg != nullptr) {
|
||||
return Err(
|
||||
fmt::format(FMT_STRING("file has invalid UTF-8 at offset {}: {}"),
|
||||
utf8_erroff,
|
||||
scan_res.usr_end,
|
||||
utf8_errmsg));
|
||||
}
|
||||
|
||||
|
@ -31,6 +31,7 @@
|
||||
|
||||
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
|
||||
#include "byte_array.hh"
|
||||
#include "data_scanner.hh"
|
||||
#include "doctest/doctest.h"
|
||||
#include "lnav_config.hh"
|
||||
#include "lnav_util.hh"
|
||||
@ -148,10 +149,7 @@ class my_path_source : public unique_path_source {
|
||||
public:
|
||||
explicit my_path_source(ghc::filesystem::path p) : mps_path(std::move(p)) {}
|
||||
|
||||
ghc::filesystem::path get_path() const override
|
||||
{
|
||||
return this->mps_path;
|
||||
}
|
||||
ghc::filesystem::path get_path() const override { return this->mps_path; }
|
||||
|
||||
ghc::filesystem::path mps_path;
|
||||
};
|
||||
@ -214,3 +212,20 @@ TEST_CASE("user_message to json")
|
||||
|
||||
CHECK(json == json2);
|
||||
}
|
||||
|
||||
TEST_CASE("data_scanner CSI")
|
||||
{
|
||||
static const char INPUT[] = "\x1b[32mHello\x1b[0m";
|
||||
|
||||
data_scanner ds(string_fragment::from_const(INPUT));
|
||||
|
||||
auto tok_res = ds.tokenize2();
|
||||
CHECK(tok_res->tr_token == DT_CSI);
|
||||
CHECK(tok_res->to_string() == "\x1b[32m");
|
||||
tok_res = ds.tokenize2();
|
||||
CHECK(tok_res->tr_token == DT_SYMBOL);
|
||||
CHECK(tok_res->to_string() == "Hello");
|
||||
tok_res = ds.tokenize2();
|
||||
CHECK(tok_res->tr_token == DT_CSI);
|
||||
CHECK(tok_res->to_string() == "\x1b[0m");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user