[logfile] report utf error location

Related to #1156
pull/1170/head
Tim Stack 12 months ago
parent 5d915796fa
commit e0ff4434a9

@ -37,7 +37,7 @@
struct utf8_scan_result { struct utf8_scan_result {
const char* usr_message{nullptr}; const char* usr_message{nullptr};
size_t usr_faulty_bytes{0}; size_t usr_faulty_bytes{0};
string_fragment usr_valid_frag; string_fragment usr_valid_frag{string_fragment::invalid()};
nonstd::optional<string_fragment> usr_remaining; nonstd::optional<string_fragment> usr_remaining;
bool usr_has_ansi{false}; bool usr_has_ansi{false};

@ -1084,8 +1084,7 @@ line_buffer::load_next_line(file_range prev_line)
if (lf != nullptr) { if (lf != nullptr) {
lf -= 1; lf -= 1;
} }
retval.li_valid_utf = scan_res.is_valid(); retval.li_utf8_scan_result = scan_res;
retval.li_has_ansi = scan_res.usr_has_ansi;
} }
auto got_new_data = old_retval_size != retval.li_file_range.fr_size; auto got_new_data = old_retval_size != retval.li_file_range.fr_size;
@ -1174,8 +1173,10 @@ line_buffer::load_next_line(file_range prev_line)
(int) retval.li_partial); (int) retval.li_partial);
#endif #endif
retval.li_file_range.fr_metadata.m_has_ansi = retval.li_has_ansi; retval.li_file_range.fr_metadata.m_has_ansi
retval.li_file_range.fr_metadata.m_valid_utf = retval.li_valid_utf; = retval.li_utf8_scan_result.usr_has_ansi;
retval.li_file_range.fr_metadata.m_valid_utf
= retval.li_utf8_scan_result.is_valid();
return Ok(retval); return Ok(retval);
} }

@ -45,6 +45,7 @@
#include "base/auto_fd.hh" #include "base/auto_fd.hh"
#include "base/auto_mem.hh" #include "base/auto_mem.hh"
#include "base/file_range.hh" #include "base/file_range.hh"
#include "base/is_utf8.hh"
#include "base/lnav_log.hh" #include "base/lnav_log.hh"
#include "base/result.h" #include "base/result.h"
#include "safe/safe.h" #include "safe/safe.h"
@ -53,8 +54,7 @@
struct line_info { struct line_info {
file_range li_file_range; file_range li_file_range;
bool li_partial{false}; bool li_partial{false};
bool li_valid_utf{true}; utf8_scan_result li_utf8_scan_result{};
bool li_has_ansi{false};
}; };
/** /**

@ -3205,6 +3205,22 @@ SELECT tbl_name FROM sqlite_master WHERE sql LIKE 'CREATE VIRTUAL TABLE%'
return EXIT_FAILURE; return EXIT_FAILURE;
} }
for (const auto& lf : lnav_data.ld_active_files.fc_files) {
for (const auto& note : lf->get_notes()) {
switch (note.first) {
case logfile::note_type::not_utf: {
auto um = lnav::console::user_message::error(
note.second);
lnav::console::print(stderr, um);
break;
}
default:
break;
}
}
}
for (auto& pair : cmd_results) { for (auto& pair : cmd_results) {
if (pair.first.isErr()) { if (pair.first.isErr()) {
lnav::console::print(stderr, pair.first.unwrapErr()); lnav::console::print(stderr, pair.first.unwrapErr());

@ -380,8 +380,9 @@ logfile::process_prefix(shared_buffer_ref& sbr,
auto& last_line = this->lf_index.back(); auto& last_line = this->lf_index.back();
last_line.set_valid_utf(last_line.is_valid_utf() last_line.set_valid_utf(last_line.is_valid_utf()
&& li.li_valid_utf); && li.li_utf8_scan_result.is_valid());
last_line.set_has_ansi(last_line.has_ansi() || li.li_has_ansi); last_line.set_has_ansi(last_line.has_ansi()
|| li.li_utf8_scan_result.usr_has_ansi);
} }
if (prescan_size > 0 && this->lf_index.size() >= prescan_size if (prescan_size > 0 && this->lf_index.size() >= prescan_size
&& prescan_time != this->lf_index[prescan_size - 1].get_time()) && prescan_time != this->lf_index[prescan_size - 1].get_time())
@ -437,8 +438,8 @@ logfile::process_prefix(shared_buffer_ref& sbr,
last_level, last_level,
last_mod, last_mod,
last_opid); last_opid);
this->lf_index.back().set_valid_utf(li.li_valid_utf); this->lf_index.back().set_valid_utf(li.li_utf8_scan_result.is_valid());
this->lf_index.back().set_has_ansi(li.li_has_ansi); this->lf_index.back().set_has_ansi(li.li_utf8_scan_result.usr_has_ansi);
} }
return retval; return retval;
@ -608,19 +609,37 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline)
} }
prev_range = li.li_file_range; prev_range = li.li_file_range;
if (!this->lf_options.loo_non_utf_is_visible && !li.li_valid_utf) { if (this->lf_format == nullptr
&& !this->lf_options.loo_non_utf_is_visible
&& !li.li_utf8_scan_result.is_valid())
{
log_info("file is not utf, hiding: %s", log_info("file is not utf, hiding: %s",
this->lf_filename.c_str()); this->lf_filename.c_str());
this->lf_indexing = false; this->lf_indexing = false;
this->lf_options.loo_is_visible = false; this->lf_options.loo_is_visible = false;
auto note_text = fmt::format(
FMT_STRING("not indexing non-UTF-8 file -- line: "
"{}; column: {}; error: {}"),
this->lf_index.size() + 1,
li.li_utf8_scan_result.usr_valid_frag.sf_end,
li.li_utf8_scan_result.usr_message);
this->lf_notes.writeAccess()->emplace(note_type::not_utf, this->lf_notes.writeAccess()->emplace(note_type::not_utf,
"hiding non-UTF-8 file"); note_text);
if (this->lf_logfile_observer != nullptr) { if (this->lf_logfile_observer != nullptr) {
this->lf_logfile_observer->logfile_indexing( this->lf_logfile_observer->logfile_indexing(
this->shared_from_this(), 0, 0); this->shared_from_this(), 0, 0);
} }
break; break;
} }
if (this->lf_format != nullptr
&& !li.li_utf8_scan_result.is_valid())
{
log_warning("%s: invalid UTF-8 detected at %d:%d -- %s",
this->lf_filename.c_str(),
this->lf_index.size() + 1,
li.li_utf8_scan_result.usr_valid_frag.sf_end,
li.li_utf8_scan_result.usr_message);
}
size_t old_size = this->lf_index.size(); size_t old_size = this->lf_index.size();
@ -641,7 +660,7 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline)
.unwrapOr(text_format_t::TF_UNKNOWN); .unwrapOr(text_format_t::TF_UNKNOWN);
log_debug("setting text format to %d", this->lf_text_format); log_debug("setting text format to %d", this->lf_text_format);
} }
if (!li.li_valid_utf if (!li.li_utf8_scan_result.is_valid()
&& this->lf_text_format != text_format_t::TF_MARKDOWN && this->lf_text_format != text_format_t::TF_MARKDOWN
&& this->lf_text_format != text_format_t::TF_LOG) && this->lf_text_format != text_format_t::TF_LOG)
{ {
@ -661,7 +680,9 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline)
auto sbr = read_result.unwrap(); auto sbr = read_result.unwrap();
sbr.rtrim(is_line_ending); sbr.rtrim(is_line_ending);
if (li.li_valid_utf && li.li_has_ansi) { if (li.li_utf8_scan_result.is_valid()
&& li.li_utf8_scan_result.usr_has_ansi)
{
sbr.erase_ansi(); sbr.erase_ansi();
} }

@ -157,9 +157,9 @@ main(int argc, char* argv[])
auto& root_formats = log_format::get_root_formats(); auto& root_formats = log_format::get_root_formats();
std::vector<std::shared_ptr<log_format>>::iterator iter; std::vector<std::shared_ptr<log_format>>::iterator iter;
std::vector<logline> index;
if (is_log) { if (is_log) {
std::vector<logline> index;
logfile_open_options loo; logfile_open_options loo;
auto open_res = logfile::open(argv[lpc], loo); auto open_res = logfile::open(argv[lpc], loo);
auto lf = open_res.unwrap(); auto lf = open_res.unwrap();

@ -167,14 +167,15 @@ main(int argc, char* argv[])
auto sbr = read_result.unwrap(); auto sbr = read_result.unwrap();
if (!li.li_valid_utf) { if (!li.li_utf8_scan_result.is_valid()) {
scrub_to_utf8(sbr.get_writable_data(), sbr.length()); scrub_to_utf8(sbr.get_writable_data(), sbr.length());
} }
printf("%.*s", (int) sbr.length(), sbr.get_data()); printf("%.*s", (int) sbr.length(), sbr.get_data());
if ((off_t) (li.li_file_range.fr_offset if ((off_t) (li.li_file_range.fr_offset
+ li.li_file_range.fr_size) + li.li_file_range.fr_size)
< offset) { < offset)
{
printf("\n"); printf("\n");
} }
last_range = li.li_file_range; last_range = li.li_file_range;

Loading…
Cancel
Save