You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
lnav/src/file_collection.cc

426 lines
15 KiB
C++

/**
* Copyright (c) 2020, Timothy Stack
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @file file_collection.cc
*/
#include "config.h"
#include <glob.h>
#include <unordered_map>
#include "base/opt_util.hh"
#include "logfile.hh"
#include "file_collection.hh"
static std::mutex REALPATH_CACHE_MUTEX;
static std::unordered_map<std::string, std::string> REALPATH_CACHE;
void file_collection::close_file(const std::shared_ptr<logfile> &lf)
{
if (lf->is_valid_filename()) {
std::lock_guard<std::mutex> lg(REALPATH_CACHE_MUTEX);
REALPATH_CACHE.erase(lf->get_filename());
} else {
this->fc_file_names.erase(lf->get_filename());
}
auto file_iter = find(this->fc_files.begin(),
this->fc_files.end(),
lf);
if (file_iter != this->fc_files.end()) {
this->fc_files.erase(file_iter);
this->fc_files_generation += 1;
}
this->regenerate_unique_file_names();
}
void file_collection::regenerate_unique_file_names()
{
unique_path_generator upg;
for (const auto &lf : this->fc_files) {
upg.add_source(lf);
}
upg.generate();
this->fc_largest_path_length = 0;
for (const auto &lf : this->fc_files) {
const auto &path = lf->get_unique_path();
if (path.length() > this->fc_largest_path_length) {
this->fc_largest_path_length = path.length();
}
}
for (const auto &pair : this->fc_other_files) {
auto bn = ghc::filesystem::path(pair.first).filename().string();
if (bn.length() > this->fc_largest_path_length) {
this->fc_largest_path_length = bn.length();
}
}
}
void file_collection::merge(const file_collection &other)
{
this->fc_recursive = other.fc_recursive;
this->fc_rotated = other.fc_rotated;
this->fc_name_to_errors.insert(other.fc_name_to_errors.begin(),
other.fc_name_to_errors.end());
this->fc_file_names.insert(other.fc_file_names.begin(),
other.fc_file_names.end());
if (!other.fc_files.empty()) {
this->fc_files.insert(this->fc_files.end(),
other.fc_files.begin(),
other.fc_files.end());
this->fc_files_generation += 1;
}
for (auto &pair : other.fc_renamed_files) {
pair.first->set_filename(pair.second);
}
this->fc_closed_files.insert(other.fc_closed_files.begin(),
other.fc_closed_files.end());
this->fc_other_files.insert(other.fc_other_files.begin(),
other.fc_other_files.end());
}
/**
* Functor used to compare files based on their device and inode number.
*/
struct same_file {
explicit same_file(const struct stat &stat) : sf_stat(stat) {};
/**
* Compare the given log file against the 'stat' given in the constructor.
* @param lf The log file to compare.
* @return True if the dev/inode values in the stat given in the
* constructor matches the stat in the logfile object.
*/
bool operator()(const std::shared_ptr<logfile> &lf) const
{
return this->sf_stat.st_dev == lf->get_stat().st_dev &&
this->sf_stat.st_ino == lf->get_stat().st_ino;
};
const struct stat &sf_stat;
};
/**
* Try to load the given file as a log file. If the file has not already been
* loaded, it will be loaded. If the file has already been loaded, the file
* name will be updated.
*
* @param filename The file name to check.
* @param fd An already-opened descriptor for 'filename'.
* @param required Specifies whether or not the file must exist and be valid.
*/
std::future<file_collection>
file_collection::watch_logfile(const std::string &filename,
logfile_open_options &loo, bool required)
{
file_collection retval;
struct stat st;
int rc;
if (this->fc_closed_files.count(filename)) {
return make_ready_future(retval);
}
if (loo.loo_fd != -1) {
rc = fstat(loo.loo_fd, &st);
} else {
rc = stat(filename.c_str(), &st);
}
if (rc == 0) {
if (S_ISDIR(st.st_mode) && this->fc_recursive) {
std::string wilddir = filename + "/*";
if (this->fc_file_names.find(wilddir) ==
this->fc_file_names.end()) {
retval.fc_file_names.emplace(wilddir, logfile_open_options());
}
return make_ready_future(retval);
}
if (!S_ISREG(st.st_mode)) {
if (required) {
rc = -1;
errno = EINVAL;
} else {
return make_ready_future(retval);
}
}
}
if (rc == -1) {
if (required) {
retval.fc_name_to_errors[filename] = strerror(errno);
}
return make_ready_future(retval);
}
auto stat_iter = find_if(this->fc_new_stats.begin(),
this->fc_new_stats.end(),
[&st](auto& elem) {
return st.st_ino == elem.st_ino &&
st.st_dev == elem.st_dev;
});
if (stat_iter != this->fc_new_stats.end()) {
// this file is probably a link that we have already scanned in this
// pass.
return make_ready_future(retval);
}
this->fc_new_stats.emplace_back(st);
auto file_iter = find_if(this->fc_files.begin(),
this->fc_files.end(),
same_file(st));
if (file_iter == this->fc_files.end()) {
if (this->fc_other_files.find(filename) != this->fc_other_files.end()) {
return make_ready_future(retval);
}
auto func = [filename, loo, prog = this->fc_progress, errs = this->fc_name_to_errors]() mutable {
file_collection retval;
if (errs.find(filename) != errs.end()) {
// The file is broken, no reason to try and reopen
return retval;
}
auto ff = detect_file_format(filename);
switch (ff) {
case file_format_t::FF_SQLITE_DB:
retval.fc_other_files[filename] = ff;
break;
case file_format_t::FF_ARCHIVE: {
nonstd::optional<std::list<archive_manager::extract_progress>::iterator>
prog_iter_opt;
if (loo.loo_source == logfile_name_source::ARCHIVE) {
// Don't try to open nested archives
return retval;
}
auto res = archive_manager::walk_archive_files(
filename,
[prog, &prog_iter_opt](
const auto &path,
const auto total) {
safe::WriteAccess<safe_scan_progress> sp(*prog);
prog_iter_opt | [&sp](auto prog_iter) {
sp->sp_extractions.erase(prog_iter);
};
auto prog_iter = sp->sp_extractions.emplace(
sp->sp_extractions.begin(),
path, total);
prog_iter_opt = prog_iter;
return &(*prog_iter);
},
[&filename, &retval](
const auto &tmp_path,
const auto &entry) {
auto arc_path = ghc::filesystem::relative(
entry.path(), tmp_path);
auto custom_name = filename / arc_path;
bool is_visible = true;
if (entry.file_size() == 0) {
log_info("hiding empty archive file: %s",
entry.path().c_str());
is_visible = false;
}
log_info("adding file from archive: %s/%s",
filename.c_str(),
entry.path().c_str());
retval.fc_file_names[entry.path().string()]
.with_filename(custom_name.string())
.with_source(logfile_name_source::ARCHIVE)
.with_visibility(is_visible)
.with_non_utf_visibility(false)
.with_visible_size_limit(128 * 1024);
});
if (res.isErr()) {
log_error("archive extraction failed: %s",
res.unwrapErr().c_str());
retval.clear();
retval.fc_name_to_errors[filename] = res.unwrapErr();
} else {
retval.fc_other_files[filename] = ff;
}
{
prog_iter_opt |
[&prog](auto prog_iter) {
prog->writeAccess()->sp_extractions.erase(
prog_iter);
};
}
break;
}
default:
log_info("loading new file: filename=%s", filename.c_str());
/* It's a new file, load it in. */
try {
auto lf = std::make_shared<logfile>(filename, loo);
retval.fc_files.push_back(lf);
} catch (logfile::error &e) {
retval.fc_name_to_errors[filename] = e.what();
}
break;
}
return retval;
};
return std::async(std::launch::async, func);
} else {
auto lf = *file_iter;
if (lf->is_valid_filename() && lf->get_filename() != filename) {
/* The file is already loaded, but has been found under a different
* name. We just need to update the stored file name.
*/
retval.fc_renamed_files.emplace_back(lf, filename);
}
}
return make_ready_future(retval);
}
/**
* Expand a glob pattern and call watch_logfile with the file names that match
* the pattern.
* @param path The glob pattern to expand.
* @param required Passed to watch_logfile.
*/
void file_collection::expand_filename(future_queue<file_collection> &fq,
const std::string &path,
logfile_open_options &loo,
bool required)
{
static_root_mem<glob_t, globfree> gl;
{
std::lock_guard<std::mutex> lg(REALPATH_CACHE_MUTEX);
if (REALPATH_CACHE.find(path) != REALPATH_CACHE.end()) {
return;
}
}
if (is_url(path.c_str())) {
return;
} else if (glob(path.c_str(), GLOB_NOCHECK, nullptr, gl.inout()) == 0) {
int lpc;
if (gl->gl_pathc == 1 /*&& gl.gl_matchc == 0*/) {
/* It's a pattern that doesn't match any files
* yet, allow it through since we'll load it in
* dynamically.
*/
if (access(path.c_str(), F_OK) == -1) {
required = false;
}
}
if (gl->gl_pathc > 1 ||
strcmp(path.c_str(), gl->gl_pathv[0]) != 0) {
required = false;
}
std::lock_guard<std::mutex> lg(REALPATH_CACHE_MUTEX);
for (lpc = 0; lpc < (int) gl->gl_pathc; lpc++) {
auto path_str = std::string(gl->gl_pathv[lpc]);
auto iter = REALPATH_CACHE.find(path_str);
if (iter == REALPATH_CACHE.end()) {
auto_mem<char> abspath;
if ((abspath = realpath(gl->gl_pathv[lpc], nullptr)) ==
nullptr) {
if (required) {
fprintf(stderr, "Cannot find file: %s -- %s",
gl->gl_pathv[lpc], strerror(errno));
}
continue;
} else {
auto p = REALPATH_CACHE.emplace(path_str, abspath.in());
iter = p.first;
}
}
if (required || access(iter->second.c_str(), R_OK) == 0) {
fq.push_back(watch_logfile(iter->second, loo, required));
}
}
}
}
file_collection file_collection::rescan_files(bool required)
{
file_collection retval;
future_queue<file_collection> fq([&retval](auto &fc) {
retval.merge(fc);
});
for (auto &pair : this->fc_file_names) {
if (pair.second.loo_fd == -1) {
this->expand_filename(fq, pair.first, pair.second, required);
if (this->fc_rotated) {
std::string path = pair.first + ".*";
this->expand_filename(fq, path, pair.second, false);
}
} else {
fq.push_back(watch_logfile(pair.first, pair.second, required));
}
if (retval.fc_files.size() >= 100) {
log_debug("too many new files, breaking...");
break;
}
}
fq.pop_to();
this->fc_new_stats.clear();
return retval;
}