[text_filter] scrub ansi when testing a filter

Related to #1163
This commit is contained in:
Tim Stack 2023-06-09 08:43:14 -07:00
parent f98f1e52c9
commit dd21bdfd82
28 changed files with 304 additions and 286 deletions

View File

@ -196,7 +196,7 @@ struct string_fragment {
return Ok(retval);
}
char operator[](int index) const
const char& operator[](int index) const
{
return this->sf_string[sf_begin + index];
}
@ -283,7 +283,8 @@ struct string_fragment {
this->sf_string, this->sf_begin + begin, this->sf_begin + end};
}
size_t count(char ch) const {
size_t count(char ch) const
{
size_t retval = 0;
for (int lpc = this->sf_begin; lpc < this->sf_end; lpc++) {

View File

@ -60,245 +60,248 @@
error.
*/
utf8_scan_result
is_utf8(const unsigned char* str,
size_t len,
const char** message,
int* faulty_bytes,
nonstd::optional<unsigned char> terminator)
is_utf8(string_fragment str, nonstd::optional<unsigned char> terminator)
{
bool has_ansi = false;
const auto* ustr = str.udata();
utf8_scan_result retval;
ssize_t i = 0;
*message = nullptr;
*faulty_bytes = 0;
while (i < len) {
if (str[i] == '\x1b') {
has_ansi = true;
while (i < str.length()) {
if (ustr[i] == '\x1b') {
retval.usr_has_ansi = true;
}
if (terminator && str[i] == terminator.value()) {
*message = nullptr;
return {i, has_ansi};
if (terminator && ustr[i] == terminator.value()) {
retval.usr_term = i;
break;
}
if (str[i] <= 0x7F) /* 00..7F */ {
if (retval.usr_message != nullptr) {
i += 1;
} else if (str[i] >= 0xC2 && str[i] <= 0xDF) /* C2..DF 80..BF */ {
if (i + 1 < len) /* Expect a 2nd byte */ {
if (str[i + 1] < 0x80 || str[i + 1] > 0xBF) {
*message
continue;
}
retval.usr_valid_end = i;
if (ustr[i] <= 0x7F) /* 00..7F */ {
i += 1;
} else if (ustr[i] >= 0xC2 && ustr[i] <= 0xDF) /* C2..DF 80..BF */ {
if (i + 1 < str.length()) /* Expect a 2nd byte */ {
if (ustr[i + 1] < 0x80 || ustr[i + 1] > 0xBF) {
retval.usr_message
= "After a first byte between C2 and DF, expecting a "
"2nd byte between 80 and BF";
*faulty_bytes = 2;
return {i, has_ansi};
retval.usr_faulty_bytes = 2;
continue;
}
} else {
*message
retval.usr_message
= "After a first byte between C2 and DF, expecting a 2nd "
"byte.";
*faulty_bytes = 1;
return {i, has_ansi};
retval.usr_faulty_bytes = 1;
continue;
}
i += 2;
} else if (str[i] == 0xE0) /* E0 A0..BF 80..BF */ {
if (i + 2 < len) /* Expect a 2nd and 3rd byte */ {
if (str[i + 1] < 0xA0 || str[i + 1] > 0xBF) {
*message
} else if (ustr[i] == 0xE0) /* E0 A0..BF 80..BF */ {
if (i + 2 < str.length()) /* Expect a 2nd and 3rd byte */ {
if (ustr[i + 1] < 0xA0 || ustr[i + 1] > 0xBF) {
retval.usr_message
= "After a first byte of E0, expecting a 2nd byte "
"between A0 and BF.";
*faulty_bytes = 2;
return {i, has_ansi};
retval.usr_faulty_bytes = 2;
continue;
}
if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) {
*message
if (ustr[i + 2] < 0x80 || ustr[i + 2] > 0xBF) {
retval.usr_message
= "After a first byte of E0, expecting a 3nd byte "
"between 80 and BF.";
*faulty_bytes = 3;
return {i, has_ansi};
retval.usr_faulty_bytes = 3;
continue;
}
} else {
*message
retval.usr_message
= "After a first byte of E0, expecting two following "
"bytes.";
*faulty_bytes = 1;
return {i, has_ansi};
retval.usr_faulty_bytes = 1;
continue;
}
i += 3;
} else if (str[i] >= 0xE1 && str[i] <= 0xEC) /* E1..EC 80..BF 80..BF */
} else if (ustr[i] >= 0xE1
&& ustr[i] <= 0xEC) /* E1..EC 80..BF 80..BF */
{
if (i + 2 < len) /* Expect a 2nd and 3rd byte */ {
if (str[i + 1] < 0x80 || str[i + 1] > 0xBF) {
*message
if (i + 2 < str.length()) /* Expect a 2nd and 3rd byte */ {
if (ustr[i + 1] < 0x80 || ustr[i + 1] > 0xBF) {
retval.usr_message
= "After a first byte between E1 and EC, expecting the "
"2nd byte between 80 and BF.";
*faulty_bytes = 2;
return {i, has_ansi};
retval.usr_faulty_bytes = 2;
continue;
}
if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) {
*message
if (ustr[i + 2] < 0x80 || ustr[i + 2] > 0xBF) {
retval.usr_message
= "After a first byte between E1 and EC, expecting the "
"3rd byte between 80 and BF.";
*faulty_bytes = 3;
return {i, has_ansi};
retval.usr_faulty_bytes = 3;
continue;
}
} else {
*message
retval.usr_message
= "After a first byte between E1 and EC, expecting two "
"following bytes.";
*faulty_bytes = 1;
return {i, has_ansi};
retval.usr_faulty_bytes = 1;
continue;
}
i += 3;
} else if (str[i] == 0xED) /* ED 80..9F 80..BF */ {
if (i + 2 < len) /* Expect a 2nd and 3rd byte */ {
if (str[i + 1] < 0x80 || str[i + 1] > 0x9F) {
*message
} else if (ustr[i] == 0xED) /* ED 80..9F 80..BF */ {
if (i + 2 < str.length()) /* Expect a 2nd and 3rd byte */ {
if (ustr[i + 1] < 0x80 || ustr[i + 1] > 0x9F) {
retval.usr_message
= "After a first byte of ED, expecting 2nd byte "
"between 80 and 9F.";
*faulty_bytes = 2;
return {i, has_ansi};
retval.usr_faulty_bytes = 2;
continue;
}
if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) {
*message
if (ustr[i + 2] < 0x80 || ustr[i + 2] > 0xBF) {
retval.usr_message
= "After a first byte of ED, expecting 3rd byte "
"between 80 and BF.";
*faulty_bytes = 3;
return {i, has_ansi};
retval.usr_faulty_bytes = 3;
continue;
}
} else {
*message
retval.usr_message
= "After a first byte of ED, expecting two following "
"bytes.";
*faulty_bytes = 1;
return {i, has_ansi};
retval.usr_faulty_bytes = 1;
continue;
}
i += 3;
} else if (str[i] >= 0xEE && str[i] <= 0xEF) /* EE..EF 80..BF 80..BF */
} else if (ustr[i] >= 0xEE
&& ustr[i] <= 0xEF) /* EE..EF 80..BF 80..BF */
{
if (i + 2 < len) /* Expect a 2nd and 3rd byte */ {
if (str[i + 1] < 0x80 || str[i + 1] > 0xBF) {
*message
if (i + 2 < str.length()) /* Expect a 2nd and 3rd byte */ {
if (ustr[i + 1] < 0x80 || ustr[i + 1] > 0xBF) {
retval.usr_message
= "After a first byte between EE and EF, expecting 2nd "
"byte between 80 and BF.";
*faulty_bytes = 2;
return {i, has_ansi};
retval.usr_faulty_bytes = 2;
continue;
}
if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) {
*message
if (ustr[i + 2] < 0x80 || ustr[i + 2] > 0xBF) {
retval.usr_message
= "After a first byte between EE and EF, expecting 3rd "
"byte between 80 and BF.";
*faulty_bytes = 3;
return {i, has_ansi};
retval.usr_faulty_bytes = 3;
continue;
}
} else {
*message
retval.usr_message
= "After a first byte between EE and EF, two following "
"bytes.";
*faulty_bytes = 1;
return {i, has_ansi};
retval.usr_faulty_bytes = 1;
continue;
}
i += 3;
} else if (str[i] == 0xF0) /* F0 90..BF 80..BF 80..BF */ {
if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */ {
if (str[i + 1] < 0x90 || str[i + 1] > 0xBF) {
*message
} else if (ustr[i] == 0xF0) /* F0 90..BF 80..BF 80..BF */ {
if (i + 3 < str.length()) /* Expect a 2nd, 3rd 3th byte */ {
if (ustr[i + 1] < 0x90 || ustr[i + 1] > 0xBF) {
retval.usr_message
= "After a first byte of F0, expecting 2nd byte "
"between 90 and BF.";
*faulty_bytes = 2;
return {i, has_ansi};
retval.usr_faulty_bytes = 2;
continue;
}
if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) {
*message
if (ustr[i + 2] < 0x80 || ustr[i + 2] > 0xBF) {
retval.usr_message
= "After a first byte of F0, expecting 3rd byte "
"between 80 and BF.";
*faulty_bytes = 3;
return {i, has_ansi};
retval.usr_faulty_bytes = 3;
continue;
}
if (str[i + 3] < 0x80 || str[i + 3] > 0xBF) {
*message
if (ustr[i + 3] < 0x80 || ustr[i + 3] > 0xBF) {
retval.usr_message
= "After a first byte of F0, expecting 4th byte "
"between 80 and BF.";
*faulty_bytes = 4;
return {i, has_ansi};
retval.usr_faulty_bytes = 4;
continue;
}
} else {
*message
retval.usr_message
= "After a first byte of F0, expecting three following "
"bytes.";
*faulty_bytes = 1;
return {i, has_ansi};
retval.usr_faulty_bytes = 1;
continue;
}
i += 4;
} else if (str[i] >= 0xF1
&& str[i] <= 0xF3) /* F1..F3 80..BF 80..BF 80..BF */
} else if (ustr[i] >= 0xF1
&& ustr[i] <= 0xF3) /* F1..F3 80..BF 80..BF 80..BF */
{
if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */ {
if (str[i + 1] < 0x80 || str[i + 1] > 0xBF) {
*message
if (i + 3 < str.length()) /* Expect a 2nd, 3rd 3th byte */ {
if (ustr[i + 1] < 0x80 || ustr[i + 1] > 0xBF) {
retval.usr_message
= "After a first byte of F1, F2, or F3, expecting a "
"2nd byte between 80 and BF.";
*faulty_bytes = 2;
return {i, has_ansi};
retval.usr_faulty_bytes = 2;
continue;
}
if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) {
*message
if (ustr[i + 2] < 0x80 || ustr[i + 2] > 0xBF) {
retval.usr_message
= "After a first byte of F1, F2, or F3, expecting a "
"3rd byte between 80 and BF.";
*faulty_bytes = 3;
return {i, has_ansi};
retval.usr_faulty_bytes = 3;
continue;
}
if (str[i + 3] < 0x80 || str[i + 3] > 0xBF) {
*message
if (ustr[i + 3] < 0x80 || ustr[i + 3] > 0xBF) {
retval.usr_message
= "After a first byte of F1, F2, or F3, expecting a "
"4th byte between 80 and BF.";
*faulty_bytes = 4;
return {i, has_ansi};
retval.usr_faulty_bytes = 4;
continue;
}
} else {
*message
retval.usr_message
= "After a first byte of F1, F2, or F3, expecting three "
"following bytes.";
*faulty_bytes = 1;
return {i, has_ansi};
retval.usr_faulty_bytes = 1;
continue;
}
i += 4;
} else if (str[i] == 0xF4) /* F4 80..8F 80..BF 80..BF */ {
if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */ {
if (str[i + 1] < 0x80 || str[i + 1] > 0x8F) {
*message
} else if (ustr[i] == 0xF4) /* F4 80..8F 80..BF 80..BF */ {
if (i + 3 < str.length()) /* Expect a 2nd, 3rd 3th byte */ {
if (ustr[i + 1] < 0x80 || ustr[i + 1] > 0x8F) {
retval.usr_message
= "After a first byte of F4, expecting 2nd byte "
"between 80 and 8F.";
*faulty_bytes = 2;
return {i, has_ansi};
retval.usr_faulty_bytes = 2;
continue;
}
if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) {
*message
if (ustr[i + 2] < 0x80 || ustr[i + 2] > 0xBF) {
retval.usr_message
= "After a first byte of F4, expecting 3rd byte "
"between 80 and BF.";
*faulty_bytes = 3;
return {i, has_ansi};
retval.usr_faulty_bytes = 3;
continue;
}
if (str[i + 3] < 0x80 || str[i + 3] > 0xBF) {
*message
if (ustr[i + 3] < 0x80 || ustr[i + 3] > 0xBF) {
retval.usr_message
= "After a first byte of F4, expecting 4th byte "
"between 80 and BF.";
*faulty_bytes = 4;
return {i, has_ansi};
retval.usr_faulty_bytes = 4;
continue;
}
} else {
*message
retval.usr_message
= "After a first byte of F4, expecting three following "
"bytes.";
*faulty_bytes = 1;
return {i, has_ansi};
retval.usr_faulty_bytes = 1;
continue;
}
i += 4;
} else {
*message
retval.usr_message
= "Expecting bytes in the following ranges: 00..7F C2..F4.";
*faulty_bytes = 1;
return {i, has_ansi};
retval.usr_faulty_bytes = 1;
continue;
}
}
return {-1, has_ansi};
return retval;
}

View File

@ -31,17 +31,28 @@
#include <stdlib.h>
#include <sys/types.h>
#include "intern_string.hh"
#include "optional.hpp"
struct utf8_scan_result {
ssize_t usr_end{0};
const char* usr_message{nullptr};
size_t usr_faulty_bytes{0};
ssize_t usr_valid_end{0};
nonstd::optional<ssize_t> usr_term;
bool usr_has_ansi{false};
const char* term_ptr(const string_fragment& frag) const
{
if (this->usr_term) {
return &frag[this->usr_term.value()];
} else {
return nullptr;
}
}
bool is_valid() const { return this->usr_message == nullptr; }
};
utf8_scan_result is_utf8(const unsigned char* str,
size_t len,
const char** message,
int* faulty_bytes,
utf8_scan_result is_utf8(string_fragment frag,
nonstd::optional<unsigned char> terminator
= nonstd::nullopt);

View File

@ -41,18 +41,15 @@
void
scrub_to_utf8(char* buffer, size_t length)
{
const char* msg;
int faulty_bytes;
while (true) {
auto scan_res
= is_utf8((unsigned char*) buffer, length, &msg, &faulty_bytes);
auto frag = string_fragment::from_bytes(buffer, length);
auto scan_res = is_utf8(frag);
if (msg == nullptr) {
if (scan_res.is_valid()) {
break;
}
for (int lpc = 0; lpc < faulty_bytes; lpc++) {
buffer[scan_res.usr_end + lpc] = '?';
for (int lpc = 0; lpc < scan_res.usr_faulty_bytes; lpc++) {
buffer[scan_res.usr_valid_end + lpc] = '?';
}
}
}

View File

@ -36,7 +36,7 @@ void
line_filter_observer::logline_new_lines(const logfile& lf,
logfile::const_iterator ll_begin,
logfile::const_iterator ll_end,
shared_buffer_ref& sbr)
const shared_buffer_ref& sbr)
{
size_t offset = std::distance(lf.begin(), ll_begin);
@ -48,9 +48,11 @@ line_filter_observer::logline_new_lines(const logfile& lf,
}
for (; ll_begin != ll_end; ++ll_begin) {
auto sbr_copy = sbr;
if (lf.get_format() != nullptr) {
lf.get_format()->get_subline(*ll_begin, sbr);
lf.get_format()->get_subline(*ll_begin, sbr_copy);
}
sbr_copy.erase_ansi();
for (auto& filter : this->lfo_filter_stack) {
if (filter->lf_deleted) {
continue;
@ -58,7 +60,7 @@ line_filter_observer::logline_new_lines(const logfile& lf,
if (offset
>= this->lfo_filter_state.tfs_filter_count[filter->get_index()])
{
filter->add_line(this->lfo_filter_state, ll_begin, sbr);
filter->add_line(this->lfo_filter_state, ll_begin, sbr_copy);
}
}
}

View File

@ -52,7 +52,7 @@ public:
void logline_new_lines(const logfile& lf,
logfile::const_iterator ll_begin,
logfile::const_iterator ll_end,
shared_buffer_ref& sbr) override;
const shared_buffer_ref& sbr) override;
void logline_eof(const logfile& lf) override;

View File

@ -691,31 +691,16 @@ line_buffer::load_next_buffer()
// log_debug("END preload read");
if (start > this->lb_last_line_offset) {
auto* line_start = this->lb_alt_buffer.value().begin();
const auto* line_start = this->lb_alt_buffer.value().begin();
do {
const char* msg = nullptr;
int faulty_bytes = 0;
bool valid_utf = true;
char* lf = nullptr;
auto before = line_start - this->lb_alt_buffer->begin();
auto remaining = this->lb_alt_buffer.value().size() - before;
auto utf_scan_res = is_utf8((unsigned char*) line_start,
remaining,
&msg,
&faulty_bytes,
'\n');
if (msg != nullptr) {
lf = (char*) memchr(line_start, '\n', remaining);
utf_scan_res.usr_end = lf - line_start;
valid_utf = false;
}
if (utf_scan_res.usr_end >= 0) {
lf = line_start + utf_scan_res.usr_end;
}
auto frag = string_fragment::from_bytes(line_start, remaining);
auto utf_scan_res = is_utf8(frag, '\n');
auto lf = utf_scan_res.term_ptr(frag);
this->lb_alt_line_starts.emplace_back(before);
this->lb_alt_line_is_utf.emplace_back(valid_utf);
this->lb_alt_line_is_utf.emplace_back(utf_scan_res.is_valid());
this->lb_alt_line_has_ansi.emplace_back(utf_scan_res.usr_has_ansi);
if (lf != nullptr) {
@ -1095,22 +1080,11 @@ line_buffer::load_next_line(file_range prev_line)
}
if (!found_in_cache) {
const char* msg;
int faulty_bytes;
auto scan_res = is_utf8((unsigned char*) line_start,
retval.li_file_range.fr_size,
&msg,
&faulty_bytes,
'\n');
if (msg != nullptr) {
lf = (char*) memchr(
line_start, '\n', retval.li_file_range.fr_size);
utf8_end = lf - line_start;
retval.li_valid_utf = false;
} else {
utf8_end = scan_res.usr_end;
}
auto frag = string_fragment::from_bytes(
line_start, retval.li_file_range.fr_size);
auto scan_res = is_utf8(frag, '\n');
utf8_end = scan_res.usr_term.value_or(-1);
retval.li_valid_utf = scan_res.is_valid();
retval.li_has_ansi = scan_res.usr_has_ansi;
}
@ -1205,6 +1179,9 @@ line_buffer::load_next_line(file_range prev_line)
retval.li_file_range.fr_offset,
(int) retval.li_partial);
#endif
retval.li_file_range.fr_metadata.m_has_ansi = retval.li_has_ansi;
retval.li_file_range.fr_metadata.m_valid_utf = retval.li_valid_utf;
return Ok(retval);
}

View File

@ -33,6 +33,7 @@
#include <stdio.h>
#include <string.h>
#include "base/is_utf8.hh"
#include "base/snippet_highlighters.hh"
#include "base/string_util.hh"
#include "command_executor.hh"
@ -418,10 +419,27 @@ struct json_log_userdata {
{
}
void add_sub_lines_for(const intern_string_t ist,
bool top_level,
nonstd::optional<double> val = nonstd::nullopt,
const unsigned char* str = nullptr,
ssize_t len = -1)
{
auto res
= this->jlu_format->value_line_count(ist, top_level, val, str, len);
this->jlu_has_ansi |= res.vlcr_has_ansi;
if (!res.vlcr_valid_utf) {
this->jlu_valid_utf = false;
}
this->jlu_sub_line_count += res.vlcr_count;
}
external_log_format* jlu_format{nullptr};
const logline* jlu_line{nullptr};
logline* jlu_base_line{nullptr};
int jlu_sub_line_count{1};
bool jlu_has_ansi{false};
bool jlu_valid_utf{true};
yajl_handle jlu_handle{nullptr};
const char* jlu_line_value{nullptr};
size_t jlu_line_size{0};
@ -440,8 +458,7 @@ read_json_null(yajlpp_parse_context* ypc)
json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata;
const intern_string_t field_name = ypc->get_path();
jlu->jlu_sub_line_count
+= jlu->jlu_format->value_line_count(field_name, ypc->is_level(1));
jlu->add_sub_lines_for(field_name, ypc->is_level(1));
return 1;
}
@ -452,8 +469,7 @@ read_json_bool(yajlpp_parse_context* ypc, int val)
json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata;
const intern_string_t field_name = ypc->get_path();
jlu->jlu_sub_line_count
+= jlu->jlu_format->value_line_count(field_name, ypc->is_level(1));
jlu->add_sub_lines_for(field_name, ypc->is_level(1));
return 1;
}
@ -524,42 +540,11 @@ read_json_number(yajlpp_parse_context* ypc,
}
}
jlu->jlu_sub_line_count
+= jlu->jlu_format->value_line_count(field_name,
ypc->is_level(1),
val,
(const unsigned char*) numberVal,
numberLen);
return 1;
}
static int
read_json_double(yajlpp_parse_context* ypc, double val)
{
json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata;
const intern_string_t field_name = ypc->get_path();
if (jlu->jlu_format->lf_timestamp_field == field_name) {
double divisor = jlu->jlu_format->elf_timestamp_divisor;
struct timeval tv;
tv.tv_sec = val / divisor;
tv.tv_usec = fmod(val, divisor) * (1000000.0 / divisor);
if (jlu->jlu_format->lf_date_time.dts_local_time) {
struct tm ltm;
localtime_r(&tv.tv_sec, &ltm);
#ifdef HAVE_STRUCT_TM_TM_ZONE
ltm.tm_zone = nullptr;
#endif
ltm.tm_isdst = 0;
tv.tv_sec = tm2sec(&ltm);
}
jlu->jlu_base_line->set_time(tv);
}
jlu->jlu_sub_line_count
+= jlu->jlu_format->value_line_count(field_name, ypc->is_level(1), val);
jlu->add_sub_lines_for(field_name,
ypc->is_level(1),
val,
(const unsigned char*) numberVal,
numberLen);
return 1;
}
@ -573,8 +558,7 @@ json_array_start(void* ctx)
if (ypc->ypc_path_index_stack.size() == 2) {
const intern_string_t field_name = ypc->get_path_fragment_i(0);
jlu->jlu_sub_line_count
+= jlu->jlu_format->value_line_count(field_name, true);
jlu->add_sub_lines_for(field_name, true);
jlu->jlu_sub_start = yajl_get_bytes_consumed(jlu->jlu_handle) - 1;
}
@ -798,6 +782,8 @@ external_log_format::scan(logfile& lf,
ll.set_level((log_level_t) (ll.get_level_and_flags()
| LEVEL_CONTINUED));
}
ll.set_has_ansi(jlu.jlu_has_ansi);
ll.set_valid_utf(jlu.jlu_valid_utf);
dst.emplace_back(ll);
}
} else {
@ -1366,7 +1352,7 @@ read_json_field(yajlpp_parse_context* ypc, const unsigned char* str, size_t len)
jlu->jlu_base_line->set_opid(opid);
}
jlu->jlu_sub_line_count += jlu->jlu_format->value_line_count(
jlu->add_sub_lines_for(
field_name, ypc->is_level(1), nonstd::nullopt, str, len);
return 1;
@ -1758,7 +1744,11 @@ external_log_format::get_subline(const logline& ll,
lv.lv_origin.lr_start = this->jlf_cached_line.size() + 2
+ lv.lv_meta.lvm_name.size() + 2;
do {
nl_pos = str.find('\n', curr_pos);
auto frag = string_fragment::from_str_range(
str, curr_pos, str.size());
auto utf_scan_res = is_utf8(frag, '\n');
nl_pos = utf_scan_res.usr_term.value_or(std::string::npos);
if (nl_pos != std::string::npos) {
line_len = nl_pos - curr_pos;
} else {
@ -1817,6 +1807,8 @@ external_log_format::get_subline(const logline& ll,
this->jlf_cached_line.data() + this_off,
next_off - this_off);
}
sbr.get_metadata().m_valid_utf = ll.is_valid_utf();
sbr.get_metadata().m_has_ansi = ll.has_ansi();
this->jlf_cached_sub_range.lr_start = this_off;
this->jlf_cached_sub_range.lr_end = next_off;
this->jlf_line_values.lvv_sbr = sbr;
@ -2944,19 +2936,38 @@ external_log_format::match_mime_type(const file_format_t ff) const
return this->elf_mime_types.count(ff) == 1;
}
long
auto
external_log_format::value_line_count(const intern_string_t ist,
bool top_level,
nonstd::optional<double> val,
const unsigned char* str,
ssize_t len)
ssize_t len) -> value_line_count_result
{
const auto iter = this->elf_value_defs.find(ist);
long line_count
= (str != nullptr) ? std::count(&str[0], &str[len], '\n') + 1 : 1;
value_line_count_result retval;
if (str != nullptr) {
while (len > 0) {
auto frag = string_fragment::from_bytes(str, len);
auto utf_res = is_utf8(frag, '\n');
if (!utf_res.is_valid()) {
retval.vlcr_valid_utf = false;
}
retval.vlcr_has_ansi |= utf_res.usr_has_ansi;
if (!utf_res.usr_term) {
break;
}
retval.vlcr_count += 1;
str += utf_res.usr_term.value() + 1;
len -= utf_res.usr_term.value() + 1;
}
}
if (iter == this->elf_value_defs.end()) {
return (this->jlf_hide_extra || !top_level) ? 0 : line_count;
if (this->jlf_hide_extra || !top_level) {
retval.vlcr_count = 0;
}
return retval;
}
if (iter->second->vd_meta.lvm_values_index) {
@ -2970,7 +2981,8 @@ external_log_format::value_line_count(const intern_string_t ist,
}
}
if (iter->second->vd_meta.is_hidden()) {
return 0;
retval.vlcr_count = 0;
return retval;
}
if (std::find_if(this->jlf_line_format.begin(),
@ -2978,10 +2990,10 @@ external_log_format::value_line_count(const intern_string_t ist,
json_field_cmp(json_log_field::VARIABLE, ist))
!= this->jlf_line_format.end())
{
return line_count - 1;
retval.vlcr_count -= 1;
}
return line_count;
return retval;
}
log_level_t

View File

@ -282,11 +282,18 @@ public:
bool hd_blink{false};
};
long value_line_count(const intern_string_t ist,
bool top_level,
nonstd::optional<double> val = nonstd::nullopt,
const unsigned char* str = nullptr,
ssize_t len = -1);
struct value_line_count_result {
size_t vlcr_count{1};
bool vlcr_has_ansi{false};
bool vlcr_valid_utf{true};
};
value_line_count_result value_line_count(const intern_string_t ist,
bool top_level,
nonstd::optional<double> val
= nonstd::nullopt,
const unsigned char* str = nullptr,
ssize_t len = -1);
bool has_value_def(const intern_string_t ist) const
{

View File

@ -326,8 +326,11 @@ logfile::process_prefix(shared_buffer_ref& sbr,
switch (found) {
case log_format::SCAN_MATCH: {
if (!this->lf_index.empty()) {
this->lf_index.back().set_valid_utf(li.li_valid_utf);
this->lf_index.back().set_has_ansi(li.li_has_ansi);
auto& last_line = this->lf_index.back();
last_line.set_valid_utf(last_line.is_valid_utf()
&& li.li_valid_utf);
last_line.set_has_ansi(last_line.has_ansi() || li.li_has_ansi);
}
if (prescan_size > 0 && this->lf_index.size() >= prescan_size
&& prescan_time != this->lf_index[prescan_size - 1].get_time())
@ -607,13 +610,7 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline)
sbr.rtrim(is_line_ending);
if (li.li_valid_utf && li.li_has_ansi) {
auto tmp_line = sbr.to_string_fragment().to_string();
scrub_ansi_string(tmp_line, nullptr);
memcpy(sbr.get_writable_data(tmp_line.length()),
tmp_line.c_str(),
tmp_line.length());
sbr.narrow(0, tmp_line.length());
sbr.erase_ansi();
}
this->lf_longest_line

View File

@ -453,7 +453,7 @@ public:
virtual void logline_new_lines(const logfile& lf,
logfile::const_iterator ll_begin,
logfile::const_iterator ll_end,
shared_buffer_ref& sbr)
const shared_buffer_ref& sbr)
= 0;
virtual void logline_eof(const logfile& lf) = 0;

View File

@ -43,7 +43,6 @@
#include "command_executor.hh"
#include "config.h"
#include "k_merge_tree.h"
#include "lnav.events.hh"
#include "log_accel.hh"
#include "logfile_sub_source.cfg.hh"
#include "md2attr_line.hh"
@ -221,9 +220,6 @@ logfile_sub_source::text_value_for_line(textview_curses& tc,
(char*) this->lss_token_value.c_str(),
this->lss_token_value.size());
format->annotate(line, this->lss_token_attrs, this->lss_token_values);
if (this->lss_token_line->get_sub_offset() != 0) {
this->lss_token_attrs.clear();
}
if (flags & RF_REWRITE) {
exec_context ec(
&this->lss_token_values, pretty_sql_callback, pretty_pipe_callback);
@ -1892,7 +1888,7 @@ log_location_history::loc_history_forward(vis_line_t current_top)
bool
sql_filter::matches(const logfile& lf,
logfile::const_iterator ll,
shared_buffer_ref& line)
const shared_buffer_ref& line)
{
if (!ll->is_message()) {
return false;

View File

@ -95,7 +95,7 @@ public:
bool matches(const logfile& lf,
logfile::const_iterator ll,
shared_buffer_ref& line) override
const shared_buffer_ref& line) override
{
return this->pf_pcre->find_in(line.to_string_fragment())
.ignore_error()
@ -126,7 +126,7 @@ public:
bool matches(const logfile& lf,
logfile::const_iterator ll,
shared_buffer_ref& line) override;
const shared_buffer_ref& line) override;
std::string to_command() const override;

View File

@ -264,18 +264,12 @@ namespace details {
Result<void, std::string>
parse(const string_fragment& sf, event_handler& eh)
{
const char* utf8_errmsg = nullptr;
int utf8_faulty_bytes = 0;
auto scan_res = is_utf8((unsigned char*) sf.data(),
sf.length(),
&utf8_errmsg,
&utf8_faulty_bytes);
if (utf8_errmsg != nullptr) {
auto scan_res = is_utf8(sf);
if (!scan_res.is_valid()) {
return Err(
fmt::format(FMT_STRING("file has invalid UTF-8 at offset {}: {}"),
scan_res.usr_end,
utf8_errmsg));
scan_res.usr_valid_end,
scan_res.usr_message));
}
MD_PARSER parser = {0};

View File

@ -109,6 +109,8 @@ shared_buffer_ref::shared_buffer_ref(shared_buffer_ref&& other) noexcept
other.sb_data = nullptr;
other.sb_length = 0;
}
this->sb_metadata = other.sb_metadata;
other.sb_metadata = {};
}
bool
@ -148,6 +150,7 @@ shared_buffer_ref::disown()
this->sb_owner = nullptr;
this->sb_data = nullptr;
this->sb_length = 0;
this->sb_metadata = {};
}
void

View File

@ -79,7 +79,7 @@ text_filter::revert_to_last(logfile_filter_state& lfs, size_t rollback_size)
void
text_filter::add_line(logfile_filter_state& lfs,
logfile::const_iterator ll,
shared_buffer_ref& line)
const shared_buffer_ref& line)
{
bool match_state = this->matches(*lfs.tfs_logfile, ll, line);
@ -442,7 +442,6 @@ textview_curses::textview_value_for_row(vis_line_t row, attr_line_t& value_out)
this->tc_sub_source->text_attrs_for_line(*this, row, sa);
scrub_ansi_string(str, &sa);
struct line_range body, orig_line;
body = find_string_attr_range(sa, &SA_BODY);
@ -862,7 +861,7 @@ template class bookmark_vector<vis_line_t>;
bool
empty_filter::matches(const logfile& lf,
logfile::const_iterator ll,
shared_buffer_ref& line)
const shared_buffer_ref& line)
{
return false;
}

View File

@ -119,13 +119,13 @@ public:
void add_line(logfile_filter_state& lfs,
logfile_const_iterator ll,
shared_buffer_ref& line);
const shared_buffer_ref& line);
void end_of_message(logfile_filter_state& lfs);
virtual bool matches(const logfile& lf,
logfile_const_iterator ll,
shared_buffer_ref& line)
const shared_buffer_ref& line)
= 0;
virtual std::string to_command() const = 0;
@ -151,7 +151,7 @@ public:
bool matches(const logfile& lf,
logfile_const_iterator ll,
shared_buffer_ref& line) override;
const shared_buffer_ref& line) override;
std::string to_command() const override;
};

View File

@ -292,8 +292,12 @@ EXPECTED_FILES = \
$(srcdir)/%reldir%/test_json_format.sh_d7362cffc8335c2fe6b6527315de59bd6f5dcc7f.out \
$(srcdir)/%reldir%/test_json_format.sh_dfff27a651650a04d93de9a06ab5480e94ce3a79.err \
$(srcdir)/%reldir%/test_json_format.sh_dfff27a651650a04d93de9a06ab5480e94ce3a79.out \
$(srcdir)/%reldir%/test_json_format.sh_f740026626ab554dacb249762d8be7d6539b8c6e.err \
$(srcdir)/%reldir%/test_json_format.sh_f740026626ab554dacb249762d8be7d6539b8c6e.out \
$(srcdir)/%reldir%/test_json_format.sh_fe19b7ebd349cd689b3f5c22618eab5ce995e68e.err \
$(srcdir)/%reldir%/test_json_format.sh_fe19b7ebd349cd689b3f5c22618eab5ce995e68e.out \
$(srcdir)/%reldir%/test_logfile.sh_05d1505168bf34b89fc0d1a39f1409cfe798119e.err \
$(srcdir)/%reldir%/test_logfile.sh_05d1505168bf34b89fc0d1a39f1409cfe798119e.out \
$(srcdir)/%reldir%/test_logfile.sh_08d731a04c877a34819b35de185e30a74c9fd497.err \
$(srcdir)/%reldir%/test_logfile.sh_08d731a04c877a34819b35de185e30a74c9fd497.out \
$(srcdir)/%reldir%/test_logfile.sh_09bd16e044302f6b121092534708594bdad11b5a.err \
@ -992,8 +996,8 @@ EXPECTED_FILES = \
$(srcdir)/%reldir%/test_text_file.sh_5b51b55dff7332c5bee2c9b797c401c5614d574a.out \
$(srcdir)/%reldir%/test_text_file.sh_6a24078983cf1b7a80b6fb65d5186cd125498136.err \
$(srcdir)/%reldir%/test_text_file.sh_6a24078983cf1b7a80b6fb65d5186cd125498136.out \
$(srcdir)/%reldir%/test_text_file.sh_7b00f32a3fff7fc2d78a87045ae842e58be88480.out \
$(srcdir)/%reldir%/test_text_file.sh_7b00f32a3fff7fc2d78a87045ae842e58be88480.err \
$(srcdir)/%reldir%/test_text_file.sh_7b00f32a3fff7fc2d78a87045ae842e58be88480.out \
$(srcdir)/%reldir%/test_text_file.sh_87943c6be50d701a03e901f16493314c839af1ab.err \
$(srcdir)/%reldir%/test_text_file.sh_87943c6be50d701a03e901f16493314c839af1ab.out \
$(srcdir)/%reldir%/test_text_file.sh_8b2cd055e6a1db2ed9b2af2a917f8556395fa653.err \

View File

@ -1,5 +1,5 @@
{"ts": "2013-09-06T20:00:48.124817Z", "lvl": "TRACE", "msg": "trace test"}
{"ts": "2013-09-06T20:00:49.124817Z", "lvl": "INFO", "msg": "Starting up service"}
{"ts": "2013-09-06T20:00:49.124817Z", "lvl": "INFO", "msg": "Starting up \u001B[0;32mservice\u001B[0m"}
{"ts": "2013-09-06T22:00:49.124817Z", "lvl": "INFO", "msg": "Shutting down service", "user": "steve@example.com"}
{"ts": "2013-09-06T22:00:59.124817Z", "lvl": "DEBUG5", "msg": "Details..."}
{"ts": "2013-09-06T22:00:59.124817Z", "lvl": "DEBUG4", "msg": "Details..."}

View File

@ -1,7 +1,7 @@
[2013-09-06T20:00:48.124] TRACE trace test
[2013-09-06T20:00:49.124] INFO Starting up service
[2013-09-06T20:00:49.124] INFO Starting up service
[2013-09-06T22:00:49.124] INFO Shutting down service
user: steve@example.com
@ -18,12 +18,12 @@
[2013-09-06T22:01:49.124] STATS 1 beat per second
[2013-09-06T22:01:49.124] WARNING not looking good
[2013-09-06T22:01:49.124] WARNING not looking good
[2013-09-06T22:01:49.124] ERROR looking bad
[2013-09-06T22:01:49.124] ERROR looking bad
[2013-09-06T22:01:49.124] CRITICAL sooo bad
[2013-09-06T22:01:49.124] CRITICAL sooo bad
[2013-09-06T22:01:49.124] FATAL shoot
[2013-09-06T22:01:49.124] FATAL shoot
 obj: { "field1" : "hi", "field2": 2 }
 arr: ["hi", {"sub1": true}]

View File

@ -1,7 +1,7 @@
[2013-09-06T20:00:48.124] ⋮ trace testbork bork bork
[2013-09-06T20:00:49.124] ⋮ Starting up servicebork bork bork
[2013-09-06T20:00:49.124] ⋮ Starting up servicebork bork bork
[2013-09-06T22:00:49.124] ⋮ Shutting down servicebork bork bork
user: mailto:steve@example.com

View File

@ -0,0 +1,2 @@
[2013-09-06T20:00:49.124] INFO Starting up service

View File

@ -0,0 +1,4 @@
Sep 19 09:24:20 Tims-MacBook-Air MobileDeviceUpdater[17530]: Entered:_AMMuxedDeviceDisconnected, mux-device:1003
Sep 19 09:24:20 Tims-MacBook-Air MobileDeviceUpdater[17530]: Entered:__thr_AMMuxedDeviceDisconnected, mux-device:1003
Sep 19 09:24:20 Tims-MacBook-Air MobileDeviceUpdater[17530]: tid:191f - Mux ID not found in mapping dictionary
Sep 19 09:24:20 Tims-MacBook-Air MobileDeviceUpdater[17530]: tid:191f - Can't handle disconnect with invalid ecid

View File

@ -1,5 +1,5 @@
{"ts": "2013-09-06T20:00:48.124817Z", "lvl": "TRACE", "msg": "trace test"}
{"ts": "2013-09-06T20:00:49.124817Z", "lvl": "INFO", "msg": "Starting up service"}
{"ts": "2013-09-06T20:00:49.124817Z", "lvl": "INFO", "msg": "Starting up \u001B[0;32mservice\u001B[0m"}
{"ts": "2013-09-06T22:00:49.124817Z", "lvl": "INFO", "msg": "Shutting down service", "user": "steve@example.com"}
{"ts": "2013-09-06T22:00:59.124817Z", "lvl": "DEBUG5", "msg": "Details..."}
{"ts": "2013-09-06T22:00:59.124817Z", "lvl": "DEBUG4", "msg": "Details..."}

View File

@ -12,6 +12,11 @@ run_cap_test ${lnav_test} -n \
-I ${test_dir} \
${test_dir}/logfile_json.json
run_cap_test ${lnav_test} -n \
-I ${test_dir} \
-c ':filter-in up service' \
${test_dir}/logfile_json.json
# json log format is not working"
run_cap_test ${lnav_test} -n -I ${test_dir} \
-c ':switch-to-view pretty' \

View File

@ -701,3 +701,7 @@ run_cap_test ${lnav_test} -n \
run_cap_test ${lnav_test} -n \
-c ';SELECT basename(filepath),descriptor,mimetype,content FROM lnav_file_metadata' \
logfile_syslog.1.gz
run_cap_test ${lnav_test} -n \
-c ':filter-in Air Mob' \
${test_dir}/logfile_ansi.1