[files] initial impl for opening archives

pull/796/head
Timothy Stack 4 years ago
parent 9f524136bd
commit 579e4274f9

@ -10,6 +10,9 @@ export srcdir
top_builddir=`dirname $0`
export top_builddir
builddir=`pwd`
export builddir
test_dir="@abssrcdir@/test"
export test_dir
@ -20,6 +23,9 @@ export BZIP2_SUPPORT
BZIP2_CMD="@BZIP2_CMD@"
export BZIP2_CMD
LIBARCHIVE_LIBS="@LIBARCHIVE_LIBS@"
export LIBARCHIVE_LIBS
HOME="${top_builddir}/test"
export HOME

@ -10,6 +10,7 @@ clean-outbox: outbox
rm -f outbox/*
PACKAGE_URLS = \
https://libarchive.org/downloads/libarchive-3.4.3.tar.gz \
https://ftp.gnu.org/gnu/make/make-4.2.1.tar.gz \
ftp://ftp.gnu.org/gnu/ncurses/ncurses-5.9.tar.gz \
https://ftp.pcre.org/pub/pcre/pcre-8.42.tar.gz \

@ -45,23 +45,24 @@ if test x"${OS}" != x"FreeBSD"; then
if test x"$(lsb_release | awk '{print $3}')" == x"Alpine"; then
TARGET_FILE='/vagrant/lnav-musl.zip'
../lnav/configure \
CFLAGS='-static -no-pie -s' \
CXXFLAGS='-static -U__unused -no-pie -s' \
CFLAGS='-static -no-pie -s -O2' \
CXXFLAGS='-static -U__unused -no-pie -s -O2' \
LDFLAGS="-L${FAKE_ROOT}/lib" \
CPPFLAGS="-I${FAKE_ROOT}/include" \
--enable-static
PATH="${FAKE_ROOT}/bin:${PATH}"
else
../lnav/configure \
--with-libarchive=${FAKE_ROOT} \
LDFLAGS="-L${FAKE_ROOT}/lib" \
CPPFLAGS="-I${FAKE_ROOT}/include" \
CPPFLAGS="-I${FAKE_ROOT}/include -O2" \
PATH="${FAKE_ROOT}/bin:${PATH}"
fi
else
../lnav/configure \
LDFLAGS="-L${FAKE_ROOT}/lib -static" \
LIBS="-lm -lelf" \
CPPFLAGS="-I${FAKE_ROOT}/include" \
CPPFLAGS="-I${FAKE_ROOT}/include -O2" \
PATH="${FAKE_ROOT}/bin:${PATH}"
fi

@ -136,3 +136,8 @@ else
make &&
make install)
fi
(cd libarchive-* &&
./configure --prefix=${FAKE_ROOT} &&
make &&
make install)

@ -229,6 +229,7 @@ add_library(diag STATIC
config.h
ansi_scrubber.cc
archive_manager.cc
bin2c.h
bookmarks.cc
bottom_status_source.cc
@ -332,6 +333,7 @@ add_library(diag STATIC
spookyhash/SpookyV2.cpp
all_logs_vtab.hh
archive_manager.hh
attr_line.hh
auto_fd.hh
auto_mem.hh

@ -204,6 +204,7 @@ LNAV_BUILT_FILES = \
AM_LDFLAGS = \
$(STATIC_LDFLAGS) \
$(LIBARCHIVE_LDFLAGS) \
$(READLINE_LDFLAGS) \
$(SQLITE3_LDFLAGS) \
$(PCRE_LDFLAGS)
@ -212,6 +213,7 @@ AM_CPPFLAGS = \
-DSYSCONFDIR='"$(sysconfdir)"' \
-I$(srcdir)/fmtlib \
-Wall \
$(LIBARCHIVE_CFLAGS) \
$(READLINE_CFLAGS) \
$(SQLITE3_CFLAGS) \
$(LIBCURL_CPPFLAGS)
@ -226,6 +228,7 @@ LDADD = \
$(READLINE_LIBS) \
$(CURSES_LIB) \
$(SQLITE3_LIBS) \
$(LIBARCHIVE_LIBS) \
$(LIBCURL) \
-lpcrecpp
@ -243,6 +246,7 @@ dist_noinst_DATA = \
noinst_HEADERS = \
all_logs_vtab.hh \
ansi_scrubber.hh \
archive_manager.hh \
attr_line.hh \
auto_fd.hh \
auto_mem.hh \
@ -373,6 +377,7 @@ nodist_libdiag_a_SOURCES = \
libdiag_a_SOURCES = \
ansi_scrubber.cc \
archive_manager.cc \
bookmarks.cc \
bottom_status_source.cc \
collation-functions.cc \

@ -0,0 +1,218 @@
/**
* Copyright (c) 2020, Timothy Stack
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @file archive_manager.cc
*/
#include "config.h"
#include <glob.h>
#include <unistd.h>
#if HAVE_ARCHIVE_H
#include "archive.h"
#include "archive_entry.h"
#endif
#include "auto_mem.hh"
#include "fmt/format.h"
#include "base/lnav_log.hh"
#include "archive_manager.hh"
namespace fs = ghc::filesystem;
namespace archive_manager {
bool is_archive(const std::string &filename)
{
#if HAVE_ARCHIVE_H
auto_mem<archive> arc(archive_read_free);
arc = archive_read_new();
archive_read_support_filter_all(arc);
archive_read_support_format_all(arc);
auto r = archive_read_open_filename(arc, filename.c_str(), 16384);
if (r == ARCHIVE_OK) {
struct archive_entry *entry;
if (archive_read_next_header(arc, &entry) == ARCHIVE_OK) {
log_info("detected archive: %s -- %s",
filename.c_str(),
archive_format_name(arc));
return true;
} else {
log_info("archive read header failed: %s -- %s",
filename.c_str(),
archive_error_string(arc));
}
} else {
log_info("archive open failed: %s -- %s",
filename.c_str(),
archive_error_string(arc));
}
#endif
return false;
}
fs::path
filename_to_tmp_path(const std::string &filename)
{
auto fn_path = fs::path(filename);
auto basename = fn_path.filename();
auto subdir_name = fmt::format("lnav-{}-archives", getuid());
auto tmp_path = fs::temp_directory_path();
// TODO include a content-hash in the path name
return tmp_path / fs::path(subdir_name) / basename;
}
void walk_archive_files(const std::string &filename,
const std::function<void(
const fs::directory_entry &)>& callback)
{
auto tmp_path = filename_to_tmp_path(filename);
// TODO take care of locking
if (!fs::exists(tmp_path)) {
extract(filename);
}
for (const auto& entry : fs::recursive_directory_iterator(tmp_path)) {
if (!entry.is_regular_file()) {
continue;
}
callback(entry);
}
}
#if HAVE_ARCHIVE_H
static int
copy_data(struct archive *ar, struct archive *aw)
{
int r;
const void *buff;
size_t size;
la_int64_t offset;
for (;;) {
r = archive_read_data_block(ar, &buff, &size, &offset);
if (r == ARCHIVE_EOF) {
return (ARCHIVE_OK);
}
if (r < ARCHIVE_OK) {
return (r);
}
r = archive_write_data_block(aw, buff, size, offset);
if (r < ARCHIVE_OK) {
log_error("%s", archive_error_string(aw));
return (r);
}
}
}
void extract(const std::string &filename)
{
static int FLAGS = ARCHIVE_EXTRACT_TIME
| ARCHIVE_EXTRACT_PERM
| ARCHIVE_EXTRACT_ACL
| ARCHIVE_EXTRACT_FFLAGS;
auto_mem<archive> arc(archive_free);
auto_mem<archive> ext(archive_free);
arc = archive_read_new();
archive_read_support_format_all(arc);
archive_read_support_filter_all(arc);
ext = archive_write_disk_new();
archive_write_disk_set_options(ext, FLAGS);
archive_write_disk_set_standard_lookup(ext);
if (archive_read_open_filename(arc, filename.c_str(), 10240) != ARCHIVE_OK) {
return;
}
auto tmp_path = filename_to_tmp_path(filename);
log_info("extracting %s to %s", filename.c_str(), tmp_path.c_str());
while (true) {
struct archive_entry *entry;
auto r = archive_read_next_header(arc, &entry);
if (r == ARCHIVE_EOF) {
break;
}
if (r < ARCHIVE_OK) {
log_error("%s", archive_error_string(arc));
}
if (r < ARCHIVE_WARN) {
return;
}
auto_mem<archive_entry> wentry(archive_entry_free);
wentry = archive_entry_clone(entry);
auto entry_path = tmp_path / fs::path(archive_entry_pathname(entry));
archive_entry_copy_pathname(wentry, entry_path.c_str());
auto entry_mode = archive_entry_mode(wentry);
archive_entry_set_perm(
wentry, S_IRUSR | (S_ISDIR(entry_mode) ? S_IXUSR|S_IWUSR : 0));
r = archive_write_header(ext, wentry);
if (r < ARCHIVE_OK) {
log_error("%s", archive_error_string(ext));
}
else if (archive_entry_size(entry) > 0) {
r = copy_data(arc, ext);
if (r < ARCHIVE_OK) {
log_error("%s", archive_error_string(ext));
}
if (r < ARCHIVE_WARN) {
return;
}
}
r = archive_write_finish_entry(ext);
if (r < ARCHIVE_OK) {
log_error("%s", archive_error_string(ext));
}
if (r < ARCHIVE_WARN) {
return;
}
}
archive_read_close(arc);
archive_write_close(ext);
// TODO return errors
}
#else
void extract(const std::string &filename)
{
}
#endif
}

@ -0,0 +1,51 @@
/**
* Copyright (c) 2020, Timothy Stack
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @file archive_manager.hh
*/
#ifndef lnav_archive_manager_hh
#define lnav_archive_manager_hh
#include <string>
#include <functional>
#include "ghc/filesystem.hpp"
namespace archive_manager {
bool is_archive(const std::string &filename);
ghc::filesystem::path filename_to_tmp_path(const std::string &filename);
void walk_archive_files(const std::string &filename,
const std::function<void(const ghc::filesystem::directory_entry&)>&);
void extract(const std::string &filename);
}
#endif

@ -40,6 +40,8 @@
#include "strnatcmp.h"
struct string_fragment {
using iterator = const char *;
explicit string_fragment(const char *str, int begin = 0, int end = -1)
: sf_string(str), sf_begin(begin), sf_end(end == -1 ? strlen(str) : end) {
};
@ -67,6 +69,14 @@ struct string_fragment {
return &this->sf_string[this->sf_begin];
}
iterator begin() const {
return &this->sf_string[this->sf_begin];
}
iterator end() const {
return &this->sf_string[this->sf_end];
}
bool empty() const {
return length() == 0;
};
@ -109,7 +119,18 @@ struct string_fragment {
strncmp(this->data(), str, this->length()) == 0;
};
const char *to_string(char *buf) {
bool startswith(const char *prefix) const {
auto iter = this->begin();
while (*prefix != '\0' && *prefix == *iter && iter < this->end()) {
prefix += 1;
iter += 1;
}
return *prefix == '\0';
}
const char *to_string(char *buf) const {
memcpy(buf, this->data(), this->length());
buf[this->length()] = '\0';
@ -120,10 +141,6 @@ struct string_fragment {
return std::string(this->data(), this->length());
}
std::string to_string() {
return std::string(&this->sf_string[this->sf_begin], this->length());
};
void clear() {
this->sf_begin = 0;
this->sf_end = 0;

@ -132,6 +132,10 @@
#include <curl/curl.h>
#endif
#if HAVE_ARCHIVE_H
#include <archive.h>
#endif
#include "yajlpp/yajlpp.hh"
#include "readline_callbacks.hh"
#include "command_executor.hh"
@ -143,6 +147,7 @@
#include "log_search_table.hh"
#include "shlex.hh"
#include "log_actions.hh"
#include "archive_manager.hh"
#ifndef SYSCONFDIR
#define SYSCONFDIR "/usr/etc"
@ -967,26 +972,41 @@ static bool watch_logfile(string filename, logfile_open_options &loo, bool requi
file_format_t ff = detect_file_format(filename);
switch (ff) {
case FF_SQLITE_DB:
lnav_data.ld_other_files.push_back(filename);
attach_sqlite_db(lnav_data.ld_db.in(), filename);
retval = true;
break;
case file_format_t::FF_SQLITE_DB:
lnav_data.ld_other_files.push_back(filename);
attach_sqlite_db(lnav_data.ld_db.in(), filename);
retval = true;
break;
default:
/* It's a new file, load it in. */
shared_ptr<logfile> lf = make_shared<logfile>(filename, loo);
case file_format_t::FF_ARCHIVE: {
archive_manager::walk_archive_files(filename,
[&filename](const auto& entry) {
logfile_open_options loo;
log_info("adding file from archive: %s/%s",
filename.c_str(),
entry.path().c_str());
// TODO add some heuristics for hiding files
lnav_data.ld_file_names[entry.path().string()] = loo;
});
lnav_data.ld_other_files.emplace_back(filename);
break;
}
log_info("loading new file: filename=%s",
filename.c_str());
lf->set_logfile_observer(&obs);
lnav_data.ld_files.push_back(lf);
lnav_data.ld_text_source.push_back(lf);
default:
/* It's a new file, load it in. */
shared_ptr<logfile> lf = make_shared<logfile>(filename, loo);
regenerate_unique_file_names();
log_info("loading new file: filename=%s",
filename.c_str());
lf->set_logfile_observer(&obs);
lnav_data.ld_files.push_back(lf);
lnav_data.ld_text_source.push_back(lf);
retval = true;
break;
regenerate_unique_file_names();
retval = true;
break;
}
}
}
@ -2536,6 +2556,9 @@ int main(int argc, char *argv[])
#endif
#ifdef HAVE_LIBCURL
log_info(" curl=%s (%s)", LIBCURL_VERSION, LIBCURL_TIMESTAMP);
#endif
#ifdef HAVE_ARCHIVE_H
log_info(" libarchive=%d", ARCHIVE_VERSION_NUMBER);
#endif
log_info(" ncurses=%s", NCURSES_VERSION);
log_info(" pcre=%s", pcre_version());

@ -48,6 +48,7 @@
#include "base/result.h"
#include "ansi_scrubber.hh"
#include "view_curses.hh"
#include "archive_manager.hh"
using namespace std;
@ -304,17 +305,23 @@ std::pair<std::string, std::string> split_path(const char *path, ssize_t len)
file_format_t detect_file_format(const std::string &filename)
{
file_format_t retval = FF_UNKNOWN;
if (archive_manager::is_archive(filename)) {
return file_format_t::FF_ARCHIVE;
}
file_format_t retval = file_format_t::FF_UNKNOWN;
auto_fd fd;
if ((fd = open(filename.c_str(), O_RDONLY)) != -1) {
char buffer[32];
int rc;
ssize_t rc;
if ((rc = read(fd, buffer, sizeof(buffer))) > 0) {
if (rc > 16 &&
strncmp(buffer, "SQLite format 3", 16) == 0) {
retval = FF_SQLITE_DB;
static auto SQLITE3_HEADER = "SQLite format 3";
auto header_frag = string_fragment(buffer, 0, rc);
if (header_frag.startswith(SQLITE3_HEADER)) {
retval = file_format_t::FF_SQLITE_DB;
}
}
}

@ -219,9 +219,10 @@ std::pair<std::string, std::string> split_path(const std::string &path) {
return split_path(path.c_str(), path.size());
};
enum file_format_t {
enum class file_format_t {
FF_UNKNOWN,
FF_SQLITE_DB,
FF_ARCHIVE,
};
file_format_t detect_file_format(const std::string &filename);

@ -71,7 +71,7 @@ public:
};
void add_source(std::shared_ptr<unique_path_source> path_source) {
void add_source(const std::shared_ptr<unique_path_source>& path_source) {
ghc::filesystem::path path = path_source->get_path();
path_source->set_unique_path(path.filename());
@ -107,6 +107,9 @@ public:
if (common.empty()) {
common = path.filename();
if (common.empty()) {
all_common = false;
}
} else if (common != path.filename()) {
all_common = false;
}
@ -115,8 +118,9 @@ public:
if (all_common) {
for (auto &src : pair.second) {
auto &path = src->get_path_prefix();
auto par = path.parent_path();
if (path.empty()) {
if (path.empty() || path == par) {
all_common = false;
} else {
src->set_path_prefix(path.parent_path());
@ -147,11 +151,11 @@ public:
src->set_path_prefix(parent);
if (!parent.empty()) {
if (parent.empty() || parent == prefix) {
src->set_unique_path("[" + src->get_unique_path());
} else {
this->upg_unique_paths[src->get_unique_path()].push_back(
src);
} else {
src->set_unique_path("[" + src->get_unique_path());
}
}

@ -4,9 +4,14 @@ LOG_COMPILER = $(SHELL) $(top_builddir)/TESTS_ENVIRONMENT
AM_CPPFLAGS = \
-Wall \
$(LIBARCHIVE_CFLAGS) \
-I$(top_srcdir)/src/ \
-I$(top_srcdir)/src/fmtlib
AM_LDFLAGS = \
$(LIBARCHIVE_LDFLAGS) \
$(STATIC_LDFLAGS)
noinst_LIBRARIES = libyajlpp.a
noinst_HEADERS = \
@ -35,6 +40,7 @@ test_json_ptr_SOURCES = test_json_ptr.cc
test_yajlpp_SOURCES = test_yajlpp.cc
LDADD = \
$(LIBARCHIVE_LIBS) \
libyajlpp.a \
$(top_builddir)/src/base/libbase.a \
$(top_builddir)/src/fmtlib/libcppfmt.a \

@ -10,6 +10,7 @@ AM_CPPFLAGS = \
-Wall \
-I$(top_srcdir)/src \
-I$(top_srcdir)/src/fmtlib \
$(LIBARCHIVE_CFLAGS) \
$(READLINE_CFLAGS) \
$(SQLITE3_CFLAGS)
@ -48,6 +49,7 @@ check_PROGRAMS = \
test_top_status
AM_LDFLAGS = \
$(LIBARCHIVE_LDFLAGS) \
$(STATIC_LDFLAGS) \
$(SQLITE3_LDFLAGS) \
$(READLINE_LDFLAGS) \
@ -76,6 +78,7 @@ LDADD = \
$(top_builddir)/src/yajlpp/libyajlpp.a \
$(top_builddir)/src/base/libbase.a \
$(CURSES_LIB) \
$(LIBARCHIVE_LIBS) \
$(SQLITE3_LIBS) \
$(PCRE_LIBS) \
$(READLINE_LIBS) \
@ -361,13 +364,16 @@ DISTCLEANFILES = \
logfile_syslog_test.0 \
logfile_syslog_fr_test.0 \
logfile_syslog_with_mixed_times_test.0 \
test-logs.tgz \
test_pretty_in.* \
tmp \
unreadable.log \
empty \
scripts-empty
distclean-local:
$(RM_V)rm -rf sessions
$(RM_V)rm -rf tmp
$(RM_V)rm -rf meta-sessions
$(RM_V)rm -rf test-config
$(RM_V)rm -rf .lnav

@ -155,7 +155,7 @@ TEST_CASE("ptime_roundtrip") {
class my_path_source : public unique_path_source {
public:
explicit my_path_source(const ghc::filesystem::path &p) : mps_path(p) {
explicit my_path_source(ghc::filesystem::path p) : mps_path(std::move(p)) {
}

@ -14,6 +14,30 @@ log_time
EOF
fi
if test x"${LIBARCHIVE_LIBS}" != x""; then
(cd ${srcdir} && tar cfz ${builddir}/test-logs.tgz logfile_access_log.* logfile_empty.0)
mkdir -p tmp
run_test env TMPDIR=tmp ${lnav_test} -n test-logs.tgz
check_output "archive not unpacked" <<EOF
192.168.202.254 - - [20/Jul/2009:22:59:26 +0000] "GET /vmw/cgi/tramp HTTP/1.0" 200 134 "-" "gPXE/0.9.7"
192.168.202.254 - - [20/Jul/2009:22:59:29 +0000] "GET /vmw/vSphere/default/vmkboot.gz HTTP/1.0" 404 46210 "-" "gPXE/0.9.7"
192.168.202.254 - - [20/Jul/2009:22:59:29 +0000] "GET /vmw/vSphere/default/vmkernel.gz HTTP/1.0" 200 78929 "-" "gPXE/0.9.7"
10.112.81.15 - - [15/Feb/2013:06:00:31 +0000] "-" 400 0 "-" "-"
EOF
if ! test tmp/*/test-logs.tgz/logfile_access_log.0; then
echo "archived file not unpacked"
exit 1
fi
if test -w tmp/*/test-logs.tgz/logfile_access_log.0; then
echo "archived file is writable"
exit 1
fi
fi
touch unreadable.log
chmod ugo-r unreadable.log

Loading…
Cancel
Save