lnav/src/archive_manager.cc

450 lines
14 KiB
C++
Raw Normal View History

/**
* Copyright (c) 2020, Timothy Stack
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2022-03-16 22:38:08 +00:00
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @file archive_manager.cc
*/
#include <unistd.h>
2022-03-16 22:38:08 +00:00
#include "config.h"
#if HAVE_ARCHIVE_H
2022-03-16 22:38:08 +00:00
# include "archive.h"
# include "archive_entry.h"
#endif
2022-03-16 22:38:08 +00:00
#include "archive_manager.cfg.hh"
#include "archive_manager.hh"
#include "auto_mem.hh"
2022-03-31 15:59:19 +00:00
#include "base/auto_fd.hh"
2022-03-13 22:49:41 +00:00
#include "base/fs_util.hh"
2022-03-16 22:38:08 +00:00
#include "base/humanize.hh"
#include "base/injector.hh"
#include "base/lnav_log.hh"
#include "base/paths.hh"
2022-03-16 22:38:08 +00:00
#include "fmt/format.h"
#include "lnav_util.hh"
namespace fs = ghc::filesystem;
namespace archive_manager {
2020-10-29 04:20:57 +00:00
class archive_lock {
public:
class guard {
public:
2022-03-16 22:38:08 +00:00
explicit guard(archive_lock& arc_lock) : g_lock(arc_lock)
{
2020-10-29 04:20:57 +00:00
this->g_lock.lock();
};
2022-03-16 22:38:08 +00:00
~guard()
{
2020-10-29 04:20:57 +00:00
this->g_lock.unlock();
};
private:
2022-03-16 22:38:08 +00:00
archive_lock& g_lock;
2020-10-29 04:20:57 +00:00
};
2022-03-16 22:38:08 +00:00
void lock() const
{
2020-10-29 04:20:57 +00:00
lockf(this->lh_fd, F_LOCK, 0);
};
2022-03-16 22:38:08 +00:00
void unlock() const
{
2020-10-29 04:20:57 +00:00
lockf(this->lh_fd, F_ULOCK, 0);
};
2022-03-16 22:38:08 +00:00
explicit archive_lock(const fs::path& archive_path)
{
2020-10-29 04:20:57 +00:00
auto lock_path = archive_path;
lock_path += ".lck";
2022-03-31 15:59:19 +00:00
auto open_res
= lnav::filesystem::open_file(lock_path, O_CREAT | O_RDWR, 0600);
if (open_res.isErr()) {
throw std::runtime_error(open_res.unwrapErr());
}
this->lh_fd = open_res.unwrap();
this->lh_fd.close_on_exec();
2020-10-29 04:20:57 +00:00
};
auto_fd lh_fd;
};
#if HAVE_ARCHIVE_H
/**
* Enables a subset of the supported archive formats to speed up detection,
* since some formats, like xar are unlikely to be used.
*/
2022-03-16 22:38:08 +00:00
static void
enable_desired_archive_formats(archive* arc)
{
2022-03-31 15:59:19 +00:00
/** @feature f0:archive.formats */
archive_read_support_format_7zip(arc);
archive_read_support_format_cpio(arc);
archive_read_support_format_lha(arc);
archive_read_support_format_rar(arc);
archive_read_support_format_tar(arc);
archive_read_support_format_zip(arc);
}
#endif
2022-03-16 22:38:08 +00:00
bool
is_archive(const fs::path& filename)
{
#if HAVE_ARCHIVE_H
auto_mem<archive> arc(archive_read_free);
arc = archive_read_new();
archive_read_support_filter_all(arc);
enable_desired_archive_formats(arc);
archive_read_support_format_raw(arc);
log_debug("read open %s", filename.c_str());
auto r = archive_read_open_filename(arc, filename.c_str(), 128 * 1024);
if (r == ARCHIVE_OK) {
2022-03-31 15:59:19 +00:00
struct archive_entry* entry = nullptr;
2022-03-31 15:59:19 +00:00
const auto* format_name = archive_format_name(arc);
log_debug("read next header %s %s", format_name, filename.c_str());
if (archive_read_next_header(arc, &entry) == ARCHIVE_OK) {
log_debug("read next done %s", filename.c_str());
static const auto RAW_FORMAT_NAME = string_fragment("raw");
static const auto GZ_FILTER_NAME = string_fragment("gzip");
format_name = archive_format_name(arc);
if (RAW_FORMAT_NAME == format_name) {
auto filter_count = archive_filter_count(arc);
if (filter_count == 1) {
return false;
}
2022-03-31 15:59:19 +00:00
const auto* first_filter_name = archive_filter_name(arc, 0);
if (filter_count == 2 && GZ_FILTER_NAME == first_filter_name) {
return false;
}
}
2022-03-16 22:38:08 +00:00
log_info(
"detected archive: %s -- %s", filename.c_str(), format_name);
return true;
}
2022-03-31 15:59:19 +00:00
log_info("archive read header failed: %s -- %s",
filename.c_str(),
archive_error_string(arc));
} else {
log_info("archive open failed: %s -- %s",
filename.c_str(),
archive_error_string(arc));
}
#endif
return false;
}
2022-03-16 22:38:08 +00:00
static fs::path
archive_cache_path()
{
return lnav::paths::workdir() / "archives";
}
fs::path
2022-03-16 22:38:08 +00:00
filename_to_tmp_path(const std::string& filename)
{
auto fn_path = fs::path(filename);
auto basename = fn_path.filename().string();
hasher h;
h.update(basename);
2022-03-13 22:49:41 +00:00
auto fd = auto_fd(lnav::filesystem::openp(filename, O_RDONLY));
if (fd != -1) {
char buffer[1024];
int rc;
rc = read(fd, buffer, sizeof(buffer));
if (rc >= 0) {
h.update(buffer, rc);
}
}
2022-03-29 05:00:49 +00:00
basename = fmt::format(FMT_STRING("arc-{}-{}"), h.to_string(), basename);
return archive_cache_path() / basename;
}
#if HAVE_ARCHIVE_H
static walk_result_t
copy_data(const std::string& filename,
2022-03-16 22:38:08 +00:00
struct archive* ar,
struct archive_entry* entry,
struct archive* aw,
const fs::path& entry_path,
struct extract_progress* ep)
{
int r;
2022-03-16 22:38:08 +00:00
const void* buff;
size_t size, total = 0, next_space_check = 0;
la_int64_t offset;
for (;;) {
if (total >= next_space_check) {
2022-03-31 15:59:19 +00:00
const auto& cfg = injector::get<const config&>();
auto tmp_space = fs::space(entry_path);
if (tmp_space.available < cfg.amc_min_free_space) {
return Err(fmt::format(
2022-03-16 22:38:08 +00:00
FMT_STRING("available space on disk ({}) is below the "
"minimum-free threshold ({}). Unable to unpack "
"'{}' to '{}'"),
humanize::file_size(tmp_space.available),
humanize::file_size(cfg.amc_min_free_space),
entry_path.filename().string(),
entry_path.parent_path().string()));
}
next_space_check += 1024 * 1024;
}
r = archive_read_data_block(ar, &buff, &size, &offset);
if (r == ARCHIVE_EOF) {
return Ok();
}
if (r != ARCHIVE_OK) {
2022-03-29 05:00:49 +00:00
return Err(
fmt::format(FMT_STRING("failed to read file: {} >> {} -- {}"),
filename,
archive_entry_pathname_utf8(entry),
archive_error_string(ar)));
}
r = archive_write_data_block(aw, buff, size, offset);
if (r != ARCHIVE_OK) {
2022-03-29 05:00:49 +00:00
return Err(fmt::format(FMT_STRING("failed to write file: {} -- {}"),
entry_path.string(),
archive_error_string(aw)));
}
2020-10-29 04:20:57 +00:00
total += size;
2020-10-29 04:24:23 +00:00
ep->ep_out_size.fetch_add(size);
}
}
2022-03-16 22:38:08 +00:00
static walk_result_t
extract(const std::string& filename, const extract_cb& cb)
{
2022-03-31 15:59:19 +00:00
static const int FLAGS = ARCHIVE_EXTRACT_TIME | ARCHIVE_EXTRACT_PERM
2022-03-16 22:38:08 +00:00
| ARCHIVE_EXTRACT_ACL | ARCHIVE_EXTRACT_FFLAGS;
2022-03-31 15:59:19 +00:00
std::error_code ec;
2020-10-29 04:20:57 +00:00
auto tmp_path = filename_to_tmp_path(filename);
2022-03-31 15:59:19 +00:00
fs::create_directories(tmp_path.parent_path(), ec);
if (ec) {
return Err(fmt::format("Unable to create directory: {} -- {}",
tmp_path.parent_path().string(),
ec.message()));
}
2020-10-29 04:20:57 +00:00
auto arc_lock = archive_lock(tmp_path);
auto lock_guard = archive_lock::guard(arc_lock);
auto done_path = tmp_path;
done_path += ".done";
if (fs::exists(done_path)) {
2021-05-27 20:01:11 +00:00
size_t file_count = 0;
if (fs::is_directory(tmp_path)) {
for (const auto& entry : fs::directory_iterator(tmp_path)) {
(void) entry;
file_count += 1;
}
}
if (file_count > 0) {
2022-03-16 22:38:08 +00:00
fs::last_write_time(done_path, std::chrono::system_clock::now());
log_info("%s: archive has already been extracted!",
done_path.c_str());
2021-05-27 20:01:11 +00:00
return Ok();
}
2022-03-31 15:59:19 +00:00
log_warning("%s: archive cache has been damaged, re-extracting",
done_path.c_str());
2021-05-27 20:01:11 +00:00
fs::remove(done_path);
2020-10-29 04:20:57 +00:00
}
auto_mem<archive> arc(archive_free);
auto_mem<archive> ext(archive_free);
arc = archive_read_new();
enable_desired_archive_formats(arc);
archive_read_support_format_raw(arc);
archive_read_support_filter_all(arc);
ext = archive_write_disk_new();
archive_write_disk_set_options(ext, FLAGS);
archive_write_disk_set_standard_lookup(ext);
2022-03-16 22:38:08 +00:00
if (archive_read_open_filename(arc, filename.c_str(), 10240) != ARCHIVE_OK)
{
2022-03-29 05:00:49 +00:00
return Err(fmt::format(FMT_STRING("unable to open archive: {} -- {}"),
filename,
archive_error_string(arc)));
}
2022-03-16 22:38:08 +00:00
log_info("extracting %s to %s", filename.c_str(), tmp_path.c_str());
while (true) {
2022-03-31 15:59:19 +00:00
struct archive_entry* entry = nullptr;
auto r = archive_read_next_header(arc, &entry);
if (r == ARCHIVE_EOF) {
2020-10-29 04:18:57 +00:00
log_info("all done");
break;
}
if (r != ARCHIVE_OK) {
2022-03-29 05:00:49 +00:00
return Err(
fmt::format(FMT_STRING("unable to read entry header: {} -- {}"),
filename,
archive_error_string(arc)));
}
2022-03-31 15:59:19 +00:00
const auto* format_name = archive_format_name(arc);
auto filter_count = archive_filter_count(arc);
auto_mem<archive_entry> wentry(archive_entry_free);
wentry = archive_entry_clone(entry);
auto desired_pathname = fs::path(archive_entry_pathname(entry));
if (strcmp(format_name, "raw") == 0 && filter_count >= 2) {
desired_pathname = fs::path(filename).filename();
}
auto entry_path = tmp_path / desired_pathname;
2022-03-31 15:59:19 +00:00
auto* prog = cb(
2022-03-16 22:38:08 +00:00
entry_path,
archive_entry_size_is_set(entry) ? archive_entry_size(entry) : -1);
archive_entry_copy_pathname(wentry, entry_path.c_str());
auto entry_mode = archive_entry_mode(wentry);
archive_entry_set_perm(
2022-03-16 22:38:08 +00:00
wentry, S_IRUSR | (S_ISDIR(entry_mode) ? S_IXUSR | S_IWUSR : 0));
r = archive_write_header(ext, wentry);
if (r < ARCHIVE_OK) {
2022-03-29 05:00:49 +00:00
return Err(
fmt::format(FMT_STRING("unable to write entry: {} -- {}"),
entry_path.string(),
archive_error_string(ext)));
2022-03-31 15:59:19 +00:00
}
if (!archive_entry_size_is_set(entry) || archive_entry_size(entry) > 0)
{
TRY(copy_data(filename, arc, entry, ext, entry_path, prog));
}
r = archive_write_finish_entry(ext);
if (r != ARCHIVE_OK) {
2022-03-29 05:00:49 +00:00
return Err(
fmt::format(FMT_STRING("unable to finish entry: {} -- {}"),
entry_path.string(),
archive_error_string(ext)));
}
}
archive_read_close(arc);
archive_write_close(ext);
2022-03-31 15:59:19 +00:00
lnav::filesystem::open_file(done_path, O_CREAT | O_WRONLY, 0600);
2020-10-29 04:20:57 +00:00
return Ok();
}
2020-10-29 04:24:23 +00:00
#endif
2022-03-16 22:38:08 +00:00
walk_result_t
walk_archive_files(
const std::string& filename,
const extract_cb& cb,
const std::function<void(const fs::path&, const fs::directory_entry&)>&
callback)
{
2020-10-29 04:24:23 +00:00
#if HAVE_ARCHIVE_H
auto tmp_path = filename_to_tmp_path(filename);
auto result = extract(filename, cb);
if (result.isErr()) {
fs::remove_all(tmp_path);
return result;
}
2020-10-29 04:24:23 +00:00
for (const auto& entry : fs::recursive_directory_iterator(tmp_path)) {
if (!entry.is_regular_file()) {
continue;
}
callback(tmp_path, entry);
}
return Ok();
#else
2020-11-01 06:12:40 +00:00
return Err(std::string("not compiled with libarchive"));
#endif
2020-10-29 04:24:23 +00:00
}
2022-03-16 22:38:08 +00:00
void
cleanup_cache()
{
(void) std::async(std::launch::async, []() {
auto now = std::chrono::system_clock::now();
auto cache_path = archive_cache_path();
2022-03-31 15:59:19 +00:00
const auto& cfg = injector::get<const config&>();
std::vector<fs::path> to_remove;
log_debug("cache-ttl %d", cfg.amc_cache_ttl.count());
for (const auto& entry : fs::directory_iterator(cache_path)) {
if (entry.path().extension() != ".done") {
continue;
}
auto mtime = fs::last_write_time(entry.path());
auto exp_time = mtime + cfg.amc_cache_ttl;
if (now < exp_time) {
continue;
}
to_remove.emplace_back(entry.path());
}
for (auto& entry : to_remove) {
log_debug("removing cached archive: %s", entry.c_str());
fs::remove(entry);
entry.replace_extension(".lck");
fs::remove(entry);
entry.replace_extension();
fs::remove_all(entry);
}
});
}
2022-03-16 22:38:08 +00:00
} // namespace archive_manager