diff --git a/TESTS_ENVIRONMENT.in b/TESTS_ENVIRONMENT.in index 48aa143f..4b0e10fd 100644 --- a/TESTS_ENVIRONMENT.in +++ b/TESTS_ENVIRONMENT.in @@ -37,9 +37,6 @@ export TSHARK_CMD LIBARCHIVE_LIBS="@LIBARCHIVE_LIBS@" export LIBARCHIVE_LIBS -HOME="${top_builddir}/test" -export HOME - # The full path of the test case test_file=$1 # The base name of the test case @@ -47,6 +44,12 @@ test_file_base=`basename $1` # The current test number for shell based tests. test_num=0 +HOME="${top_builddir}/test/cfg/${test_file_base}" +export HOME + +rm -rf "${top_builddir}/test/cfg/${test_file_base}" +mkdir -p ${HOME} + test_hash="" lnav_test="${top_builddir}/src/lnav-test" diff --git a/src/formats/java_log.json b/src/formats/java_log.json index b5e89e2c..4a2338ce 100644 --- a/src/formats/java_log.json +++ b/src/formats/java_log.json @@ -40,6 +40,9 @@ }, "level-thread-class": { "pattern": "(?\\d{4}-\\d{2}-\\d{2}[ T]\\d{2}:\\d{2}:\\d{2}[,\\.]\\d{3}) (?\\w+)\\s+\\[(?[^\\]]+)\\] (?[^:]+): (?.*)" + }, + "level-class-src": { + "pattern": "(?\\d{4}-\\d{2}-\\d{2}[ T]\\d{2}:\\d{2}:\\d{2}[,\\.]\\d{3}) (?\\w+)\\s+(?[\\.\\w]+) \\((?[^:]+):(?[^(]+)\\((?\\d+)\\)\\) - (?.*)" } }, "level-field": "level", @@ -166,6 +169,9 @@ }, { "line": "2024-04-23 09:17:27,871 INFO [main] server.Server: jetty-9.4.50.v20221201; built: 2022-12-01T22:07:03.915Z; git: da9a0b30691a45daf90a9f17b5defa2f1434f882; jvm 11.0.21+9-LTS" + }, + { + "line": "2024-04-23 09:17:33,457 INFO namenode.NameNode (LogAdapter.java:info(51)) - registered UNIX signal handlers for [TERM, HUP, INT]" } ] } diff --git a/src/log_format.hh b/src/log_format.hh index 227369f7..dff2a2a5 100644 --- a/src/log_format.hh +++ b/src/log_format.hh @@ -135,8 +135,7 @@ struct logline_value_meta { logline_value_meta(intern_string_t name, value_kind_t kind, column_t col = external_column{}, - const std::optional& format - = std::nullopt) + const std::optional& format = std::nullopt) : lvm_name(name), lvm_kind(kind), lvm_column(col), lvm_format(format) { } @@ -329,9 +328,9 @@ struct logline_value_stats { }; struct logline_value_cmp { - explicit logline_value_cmp( - const intern_string_t* name = nullptr, - std::optional col = std::nullopt) + explicit logline_value_cmp(const intern_string_t* name = nullptr, + std::optional col + = std::nullopt) : lvc_name(name), lvc_column(col) { } @@ -567,6 +566,7 @@ public: }; std::string lf_description; + log_format* lf_root_format{this}; uint8_t lf_mod_index{0}; bool lf_multiline{true}; bool lf_structured{false}; diff --git a/src/logfile.cc b/src/logfile.cc index 3bc1a166..11784601 100644 --- a/src/logfile.cc +++ b/src/logfile.cc @@ -134,7 +134,6 @@ logfile::open(ghc::filesystem::path filename, lf->lf_valid_filename = false; } - lf->lf_content_id = hasher().update(lf->lf_filename).to_string(); lf->lf_line_buffer.set_fd(lf_fd); lf->lf_index.reserve(INDEX_RESERVE_INCREMENT); @@ -186,6 +185,7 @@ logfile::open(ghc::filesystem::path filename, } lf->file_options_have_changed(); + lf->lf_content_id = hasher().update(lf->lf_filename).to_string(); ensure(lf->invariant()); @@ -319,18 +319,20 @@ logfile::process_prefix(shared_buffer_ref& sbr, time_t prescan_time = 0; bool retval = false; - if (this->lf_format.get() != nullptr) { - if (!this->lf_index.empty()) { - prescan_time = this->lf_index[prescan_size - 1].get_time(); - } - /* We've locked onto a format, just use that scanner. */ - found = this->lf_format->scan(*this, this->lf_index, li, sbr, sbc); - } else if (this->lf_options.loo_detect_format) { + if (this->lf_options.loo_detect_format + && (this->lf_format == nullptr || this->lf_index.size() < 250)) + { const auto& root_formats = log_format::get_root_formats(); std::optional> best_match; size_t scan_count = 0; + if (this->lf_format != nullptr) { + best_match = std::make_pair( + this->lf_format.get(), + log_format::scan_match{this->lf_format_quality}); + } + /* * Try each scanner until we get a match. Fortunately, the formats * tend to be sufficiently different that there are few ambiguities... @@ -379,22 +381,41 @@ logfile::process_prefix(shared_buffer_ref& sbr, scan_count += 1; curr->clear(); this->set_format_base_time(curr.get()); - auto scan_res = curr->scan(*this, this->lf_index, li, sbr, sbc); + log_format::scan_result_t scan_res{mapbox::util::no_init{}}; + if (this->lf_format != nullptr + && this->lf_format->lf_root_format == curr.get()) + { + scan_res = this->lf_format->scan( + *this, this->lf_index, li, sbr, sbc); + } else { + scan_res = curr->scan(*this, this->lf_index, li, sbr, sbc); + } scan_res.match( - [this, &curr, &best_match, &prev_index_size]( - const log_format::scan_match& sm) { - if (!best_match - || sm.sm_quality > best_match->second.sm_quality) + [this, + &found, + &curr, + &best_match, + &prev_index_size, + starting_index_size](const log_format::scan_match& sm) { + if (best_match && this->lf_format != nullptr + && this->lf_format->lf_root_format == curr.get()) + { + prev_index_size = this->lf_index.size(); + found = best_match->second; + } else if (!best_match + || sm.sm_quality > best_match->second.sm_quality) { log_info( " scan with format (%s) matched with quality (%d)", curr->get_name().c_str(), sm.sm_quality); if (best_match) { - auto last = this->lf_index.begin(); - std::advance(last, prev_index_size); - this->lf_index.erase(this->lf_index.begin(), last); + auto starting_iter = std::next( + this->lf_index.begin(), starting_index_size); + auto last_iter = std::next(this->lf_index.begin(), + prev_index_size); + this->lf_index.erase(starting_iter, last_iter); } best_match = std::make_pair(curr.get(), sm); prev_index_size = this->lf_index.size(); @@ -415,10 +436,13 @@ logfile::process_prefix(shared_buffer_ref& sbr, "more data required", curr->get_name().c_str()); }, - [curr](const log_format::scan_no_match& snm) { - log_trace(" scan with format (%s) does not match -- %s", - curr->get_name().c_str(), - snm.snm_reason); + [this, curr](const log_format::scan_no_match& snm) { + if (this->lf_format == nullptr) { + log_trace( + " scan with format (%s) does not match -- %s", + curr->get_name().c_str(), + snm.snm_reason); + } }); } @@ -428,7 +452,12 @@ logfile::process_prefix(shared_buffer_ref& sbr, this->lf_options.loo_detect_format = false; } - if (best_match) { + if (best_match + && (this->lf_format == nullptr + || ((this->lf_format->lf_root_format != best_match->first) + && best_match->second.sm_quality + > this->lf_format_quality))) + { auto winner = best_match.value(); auto* curr = winner.first; log_info("%s:%d:log format found -- %s", @@ -438,10 +467,14 @@ logfile::process_prefix(shared_buffer_ref& sbr, this->lf_text_format = text_format_t::TF_LOG; this->lf_format = curr->specialized(); + this->lf_format_quality = winner.second.sm_quality; this->set_format_base_time(this->lf_format.get()); - this->lf_content_id - = hasher().update(sbr.get_data(), sbr.length()).to_string(); + if (this->lf_format->lf_date_time.dts_fmt_lock != -1) { + this->lf_content_id + = hasher().update(sbr.get_data(), sbr.length()).to_string(); + } + this->lf_applicable_taggers.clear(); for (auto& td_pair : this->lf_format->lf_tag_defs) { bool matches = td_pair.second->ftd_paths.empty(); for (const auto& pr : td_pair.second->ftd_paths) { @@ -461,6 +494,7 @@ logfile::process_prefix(shared_buffer_ref& sbr, this->lf_applicable_taggers.emplace_back(td_pair.second); } + this->lf_applicable_partitioners.clear(); for (auto& pd_pair : this->lf_format->lf_partition_defs) { bool matches = pd_pair.second->fpd_paths.empty(); for (const auto& pr : pd_pair.second->fpd_paths) { @@ -502,10 +536,17 @@ logfile::process_prefix(shared_buffer_ref& sbr, this->lf_index[lpc].set_millis(last_line.get_millis()); this->lf_index[lpc].set_level(LEVEL_INVALID); } + retval = true; } found = best_match->second; } + } else if (this->lf_format.get() != nullptr) { + if (!this->lf_index.empty()) { + prescan_time = this->lf_index[prescan_size - 1].get_time(); + } + /* We've locked onto a format, just use that scanner. */ + found = this->lf_format->scan(*this, this->lf_index, li, sbr, sbc); } if (found.is()) { diff --git a/src/logfile.hh b/src/logfile.hh index 8bc1e018..1231e027 100644 --- a/src/logfile.hh +++ b/src/logfile.hh @@ -327,8 +327,8 @@ public: * indexing. * @return True if any new lines were indexed. */ - rebuild_result_t rebuild_index( - std::optional deadline = std::nullopt); + rebuild_result_t rebuild_index(std::optional deadline + = std::nullopt); void reobserve_from(iterator iter); @@ -410,8 +410,8 @@ public: return this->lf_embedded_metadata; } - std::optional> - get_file_options() const + std::optional> get_file_options() + const { return this->lf_file_options; } @@ -445,6 +445,7 @@ private: std::string lf_content_id; struct stat lf_stat {}; std::shared_ptr lf_format; + uint32_t lf_format_quality{0}; std::vector lf_index; time_t lf_index_time{0}; file_off_t lf_index_size{0}; @@ -479,8 +480,7 @@ private: lf_applicable_partitioners; std::map lf_embedded_metadata; size_t lf_file_options_generation{0}; - std::optional> - lf_file_options; + std::optional> lf_file_options; }; class logline_observer { diff --git a/test/Makefile.am b/test/Makefile.am index d53b66d8..f88e19ab 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -583,6 +583,7 @@ test_remote.sh.log: remote/ssh_host_dsa_key remote/ssh_host_rsa_key remote/id_rs distclean-local: $(RM_V)rm -rf remote remote-tmp not:a:remote:dir $(RM_V)rm -rf sessions + $(RM_V)rm -rf cfg $(RM_V)rm -rf tmp $(RM_V)rm -rf piper-tmp $(RM_V)rm -rf rotmp diff --git a/test/expected/test_demux.sh_090b34f84229c02aad7ab2f1cea20cfcf94446d5.out b/test/expected/test_demux.sh_090b34f84229c02aad7ab2f1cea20cfcf94446d5.out index 5ed19c17..8bdc935b 100644 --- a/test/expected/test_demux.sh_090b34f84229c02aad7ab2f1cea20cfcf94446d5.out +++ b/test/expected/test_demux.sh_090b34f84229c02aad7ab2f1cea20cfcf94446d5.out @@ -1,22 +1,22 @@ -2013-06-06T19:13:20.123+0000 * Serving Flask app 'app.py' -2013-06-06T19:13:20.123+0000 web-1 | WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. -2013-06-06T19:13:20.123+0000 * Debug mode: on -2013-06-06T19:13:20.123+0000 * Running on all addresses (0.0.0.0) -2013-06-06T19:13:20.123+0000 * Running on http://127.0.0.1:5000 -2013-06-06T19:13:20.123+0000 web-1 | Press CTRL+C to quit -2013-06-06T19:13:20.123+0000 * Running on http://172.18.0.3:5000 -2013-06-06T19:13:20.123+0000 redis-1 | 1:C 25 Apr 2024 05:32:35.018 * oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo -2013-06-06T19:13:20.123+0000 * Restarting with stat -2013-06-06T19:13:20.123+0000 * Debugger is active! -2013-06-06T19:13:20.123+0000 * Debugger PIN: 593-762-075 -2013-06-06T19:13:20.123+0000 redis-1 | 1:C 25 Apr 2024 05:32:35.018 * Redis version=7.2.0, bits=64, commit=00000000, modified=0, pid=1, just started -2013-06-06T19:13:20.123+0000 redis-1 | 1:C 25 Apr 2024 05:32:35.018 # Warning: no config file specified, using the default config. In order to specify a config file use redis-server /path/to/redis.conf -2013-06-06T19:13:20.123+0000 redis-1 | 1:M 25 Apr 2024 05:32:35.019 * monotonic clock: POSIX clock_gettime -2013-06-06T19:13:20.123+0000 redis-1 | 1:M 25 Apr 2024 05:32:35.019 * Running mode=standalone, port=6379. -2013-06-06T19:13:20.123+0000 redis-1 | 1:M 25 Apr 2024 05:32:35.020 * Server initialized -2013-06-06T19:13:20.123+0000 redis-1 | 1:M 25 Apr 2024 05:32:35.022 * Ready to accept connections tcp -2013-06-06T19:13:20.123+0000 redis-1 | 1:signal-handler (1714024483) Received SIGTERM scheduling shutdown... -2013-06-06T19:13:20.123+0000 redis-1 | 1:M 25 Apr 2024 05:54:43.946 * User requested shutdown... -2013-06-06T19:13:20.123+0000 redis-1 | 1:M 25 Apr 2024 05:54:43.946 * Saving the final RDB snapshot before exiting. -2013-06-06T19:13:20.123+0000 redis-1 | 1:M 25 Apr 2024 05:54:43.948 * DB saved on disk -2013-06-06T19:13:20.123+0000 redis-1 | 1:M 25 Apr 2024 05:54:43.948 # Redis is now ready to exit, bye bye... +2013-06-06T19:13:20.123+0000 * Serving Flask app 'app.py' +2013-06-06T19:13:20.123+0000 * Debug mode: on +2013-06-06T19:13:20.123+0000 WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. +2013-06-06T19:13:20.123+0000 * Running on all addresses (0.0.0.0) +2013-06-06T19:13:20.123+0000 * Running on http://127.0.0.1:5000 +2013-06-06T19:13:20.123+0000 * Running on http://172.18.0.3:5000 +2013-06-06T19:13:20.123+0000 Press CTRL+C to quit +2013-06-06T19:13:20.123+0000 * Restarting with stat +2013-06-06T19:13:20.123+0000 * Debugger is active! +2013-06-06T19:13:20.123+0000 * Debugger PIN: 593-762-075 +1:C 25 Apr 2024 05:32:35.018 * oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo +1:C 25 Apr 2024 05:32:35.018 * Redis version=7.2.0, bits=64, commit=00000000, modified=0, pid=1, just started +1:C 25 Apr 2024 05:32:35.018 # Warning: no config file specified, using the default config. In order to specify a config file use redis-server /path/to/redis.conf +1:M 25 Apr 2024 05:32:35.019 * monotonic clock: POSIX clock_gettime +1:M 25 Apr 2024 05:32:35.019 * Running mode=standalone, port=6379. +1:M 25 Apr 2024 05:32:35.020 * Server initialized +1:M 25 Apr 2024 05:32:35.022 * Ready to accept connections tcp +1:signal-handler (25 Apr 2024 05:54:43.000) Received SIGTERM scheduling shutdown... +1:M 25 Apr 2024 05:54:43.946 * User requested shutdown... +1:M 25 Apr 2024 05:54:43.946 * Saving the final RDB snapshot before exiting. +1:M 25 Apr 2024 05:54:43.948 * DB saved on disk +1:M 25 Apr 2024 05:54:43.948 # Redis is now ready to exit, bye bye... diff --git a/test/expected/test_logfile.sh_08d731a04c877a34819b35de185e30a74c9fd497.out b/test/expected/test_logfile.sh_08d731a04c877a34819b35de185e30a74c9fd497.out index 1d2ec4e8..d6ecc1ff 100644 --- a/test/expected/test_logfile.sh_08d731a04c877a34819b35de185e30a74c9fd497.out +++ b/test/expected/test_logfile.sh_08d731a04c877a34819b35de185e30a74c9fd497.out @@ -1,3 +1,3 @@ -2600-01-03 09:23:00 0: +2600-12-03 09:23:00 0: 00:2 0 00:00:00 0: -2600-01-03 09:23:00 0: +2600-12-03 09:23:00 0: diff --git a/test/expected/test_shlexer.sh_14dd967cb2af90899c9e5e45d00b676b5a3163aa.out b/test/expected/test_shlexer.sh_14dd967cb2af90899c9e5e45d00b676b5a3163aa.out index 0dd40801..3c214f5a 100644 --- a/test/expected/test_shlexer.sh_14dd967cb2af90899c9e5e45d00b676b5a3163aa.out +++ b/test/expected/test_shlexer.sh_14dd967cb2af90899c9e5e45d00b676b5a3163aa.out @@ -2,7 +2,7 @@ til ^ wsp ^ eof ^ -eval -- ../test foo +eval -- ../test/cfg/test_shlexer.sh foo split: - 0 ^ -- ../test + 0 ^ -- ../test/cfg/test_shlexer.sh 1 ^-^ -- foo diff --git a/test/expected/test_sql.sh_57edc93426e6767aa44ab2356c55327553dcdc8d.err b/test/expected/test_sql.sh_57edc93426e6767aa44ab2356c55327553dcdc8d.err index 979055d2..a2a41960 100644 --- a/test/expected/test_sql.sh_57edc93426e6767aa44ab2356c55327553dcdc8d.err +++ b/test/expected/test_sql.sh_57edc93426e6767aa44ab2356c55327553dcdc8d.err @@ -1,6 +1,6 @@ ✘ error: no data was redirected to lnav's standard-input  --> command-option:1  | |rename-stdin foo  - --> ../test/.lnav/formats/default/rename-stdin.lnav:7 + --> ../test/cfg/test_sql.sh/.lnav/formats/default/rename-stdin.lnav:7  | ;SELECT raise_error('no data was redirected to lnav''s standard-input')   |  WHERE (SELECT count(1) FROM lnav_file WHERE filepath='stdin') = 0 diff --git a/test/expected/test_sql.sh_7f664c9cda0ae1c48333e21051b5e0eeafd5b4bc.err b/test/expected/test_sql.sh_7f664c9cda0ae1c48333e21051b5e0eeafd5b4bc.err index d0680566..54f307c8 100644 --- a/test/expected/test_sql.sh_7f664c9cda0ae1c48333e21051b5e0eeafd5b4bc.err +++ b/test/expected/test_sql.sh_7f664c9cda0ae1c48333e21051b5e0eeafd5b4bc.err @@ -1,5 +1,5 @@ ✘ error: expecting the new name for stdin as the first argument  --> command-option:1  | |rename-stdin  - --> ../test/.lnav/formats/default/rename-stdin.lnav:6 + --> ../test/cfg/test_sql.sh/.lnav/formats/default/rename-stdin.lnav:6  | ;SELECT raise_error('expecting the new name for stdin as the first argument') WHERE $1 IS NULL