[logfile] report utf error location

Related to #1156
pull/1170/head
Tim Stack 11 months ago
parent 5d915796fa
commit e0ff4434a9

@ -37,7 +37,7 @@
struct utf8_scan_result {
const char* usr_message{nullptr};
size_t usr_faulty_bytes{0};
string_fragment usr_valid_frag;
string_fragment usr_valid_frag{string_fragment::invalid()};
nonstd::optional<string_fragment> usr_remaining;
bool usr_has_ansi{false};

@ -1084,8 +1084,7 @@ line_buffer::load_next_line(file_range prev_line)
if (lf != nullptr) {
lf -= 1;
}
retval.li_valid_utf = scan_res.is_valid();
retval.li_has_ansi = scan_res.usr_has_ansi;
retval.li_utf8_scan_result = scan_res;
}
auto got_new_data = old_retval_size != retval.li_file_range.fr_size;
@ -1174,8 +1173,10 @@ line_buffer::load_next_line(file_range prev_line)
(int) retval.li_partial);
#endif
retval.li_file_range.fr_metadata.m_has_ansi = retval.li_has_ansi;
retval.li_file_range.fr_metadata.m_valid_utf = retval.li_valid_utf;
retval.li_file_range.fr_metadata.m_has_ansi
= retval.li_utf8_scan_result.usr_has_ansi;
retval.li_file_range.fr_metadata.m_valid_utf
= retval.li_utf8_scan_result.is_valid();
return Ok(retval);
}

@ -45,6 +45,7 @@
#include "base/auto_fd.hh"
#include "base/auto_mem.hh"
#include "base/file_range.hh"
#include "base/is_utf8.hh"
#include "base/lnav_log.hh"
#include "base/result.h"
#include "safe/safe.h"
@ -53,8 +54,7 @@
struct line_info {
file_range li_file_range;
bool li_partial{false};
bool li_valid_utf{true};
bool li_has_ansi{false};
utf8_scan_result li_utf8_scan_result{};
};
/**

@ -3205,6 +3205,22 @@ SELECT tbl_name FROM sqlite_master WHERE sql LIKE 'CREATE VIRTUAL TABLE%'
return EXIT_FAILURE;
}
for (const auto& lf : lnav_data.ld_active_files.fc_files) {
for (const auto& note : lf->get_notes()) {
switch (note.first) {
case logfile::note_type::not_utf: {
auto um = lnav::console::user_message::error(
note.second);
lnav::console::print(stderr, um);
break;
}
default:
break;
}
}
}
for (auto& pair : cmd_results) {
if (pair.first.isErr()) {
lnav::console::print(stderr, pair.first.unwrapErr());

@ -380,8 +380,9 @@ logfile::process_prefix(shared_buffer_ref& sbr,
auto& last_line = this->lf_index.back();
last_line.set_valid_utf(last_line.is_valid_utf()
&& li.li_valid_utf);
last_line.set_has_ansi(last_line.has_ansi() || li.li_has_ansi);
&& li.li_utf8_scan_result.is_valid());
last_line.set_has_ansi(last_line.has_ansi()
|| li.li_utf8_scan_result.usr_has_ansi);
}
if (prescan_size > 0 && this->lf_index.size() >= prescan_size
&& prescan_time != this->lf_index[prescan_size - 1].get_time())
@ -437,8 +438,8 @@ logfile::process_prefix(shared_buffer_ref& sbr,
last_level,
last_mod,
last_opid);
this->lf_index.back().set_valid_utf(li.li_valid_utf);
this->lf_index.back().set_has_ansi(li.li_has_ansi);
this->lf_index.back().set_valid_utf(li.li_utf8_scan_result.is_valid());
this->lf_index.back().set_has_ansi(li.li_utf8_scan_result.usr_has_ansi);
}
return retval;
@ -608,19 +609,37 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline)
}
prev_range = li.li_file_range;
if (!this->lf_options.loo_non_utf_is_visible && !li.li_valid_utf) {
if (this->lf_format == nullptr
&& !this->lf_options.loo_non_utf_is_visible
&& !li.li_utf8_scan_result.is_valid())
{
log_info("file is not utf, hiding: %s",
this->lf_filename.c_str());
this->lf_indexing = false;
this->lf_options.loo_is_visible = false;
auto note_text = fmt::format(
FMT_STRING("not indexing non-UTF-8 file -- line: "
"{}; column: {}; error: {}"),
this->lf_index.size() + 1,
li.li_utf8_scan_result.usr_valid_frag.sf_end,
li.li_utf8_scan_result.usr_message);
this->lf_notes.writeAccess()->emplace(note_type::not_utf,
"hiding non-UTF-8 file");
note_text);
if (this->lf_logfile_observer != nullptr) {
this->lf_logfile_observer->logfile_indexing(
this->shared_from_this(), 0, 0);
}
break;
}
if (this->lf_format != nullptr
&& !li.li_utf8_scan_result.is_valid())
{
log_warning("%s: invalid UTF-8 detected at %d:%d -- %s",
this->lf_filename.c_str(),
this->lf_index.size() + 1,
li.li_utf8_scan_result.usr_valid_frag.sf_end,
li.li_utf8_scan_result.usr_message);
}
size_t old_size = this->lf_index.size();
@ -641,7 +660,7 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline)
.unwrapOr(text_format_t::TF_UNKNOWN);
log_debug("setting text format to %d", this->lf_text_format);
}
if (!li.li_valid_utf
if (!li.li_utf8_scan_result.is_valid()
&& this->lf_text_format != text_format_t::TF_MARKDOWN
&& this->lf_text_format != text_format_t::TF_LOG)
{
@ -661,7 +680,9 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline)
auto sbr = read_result.unwrap();
sbr.rtrim(is_line_ending);
if (li.li_valid_utf && li.li_has_ansi) {
if (li.li_utf8_scan_result.is_valid()
&& li.li_utf8_scan_result.usr_has_ansi)
{
sbr.erase_ansi();
}

@ -157,9 +157,9 @@ main(int argc, char* argv[])
auto& root_formats = log_format::get_root_formats();
std::vector<std::shared_ptr<log_format>>::iterator iter;
std::vector<logline> index;
if (is_log) {
std::vector<logline> index;
logfile_open_options loo;
auto open_res = logfile::open(argv[lpc], loo);
auto lf = open_res.unwrap();

@ -167,14 +167,15 @@ main(int argc, char* argv[])
auto sbr = read_result.unwrap();
if (!li.li_valid_utf) {
if (!li.li_utf8_scan_result.is_valid()) {
scrub_to_utf8(sbr.get_writable_data(), sbr.length());
}
printf("%.*s", (int) sbr.length(), sbr.get_data());
if ((off_t) (li.li_file_range.fr_offset
+ li.li_file_range.fr_size)
< offset) {
< offset)
{
printf("\n");
}
last_range = li.li_file_range;

Loading…
Cancel
Save