mirror of https://github.com/tstack/lnav
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1497 lines
49 KiB
C++
1497 lines
49 KiB
C++
/**
|
|
* Copyright (c) 2007-2012, Timothy Stack
|
|
*
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice, this
|
|
* list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
* * Neither the name of Timothy Stack nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* @file line_buffer.cc
|
|
*/
|
|
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
|
|
#include "config.h"
|
|
|
|
#ifdef HAVE_BZLIB_H
|
|
# include <bzlib.h>
|
|
#endif
|
|
|
|
#include <algorithm>
|
|
#include <set>
|
|
|
|
#ifdef HAVE_X86INTRIN_H
|
|
# include "simdutf8check.h"
|
|
#endif
|
|
|
|
#include "base/auto_pid.hh"
|
|
#include "base/fs_util.hh"
|
|
#include "base/injector.bind.hh"
|
|
#include "base/injector.hh"
|
|
#include "base/is_utf8.hh"
|
|
#include "base/isc.hh"
|
|
#include "base/math_util.hh"
|
|
#include "base/paths.hh"
|
|
#include "fmtlib/fmt/format.h"
|
|
#include "hasher.hh"
|
|
#include "line_buffer.hh"
|
|
#include "piper.looper.hh"
|
|
#include "scn/scn.h"
|
|
|
|
using namespace std::chrono_literals;
|
|
|
|
static const ssize_t INITIAL_REQUEST_SIZE = 16 * 1024;
|
|
static const ssize_t DEFAULT_INCREMENT = 128 * 1024;
|
|
static const ssize_t INITIAL_COMPRESSED_BUFFER_SIZE = 5 * 1024 * 1024;
|
|
static const ssize_t MAX_COMPRESSED_BUFFER_SIZE = 32 * 1024 * 1024;
|
|
|
|
const ssize_t line_buffer::DEFAULT_LINE_BUFFER_SIZE = 256 * 1024;
|
|
const ssize_t line_buffer::MAX_LINE_BUFFER_SIZE
|
|
= 4 * 4 * line_buffer::DEFAULT_LINE_BUFFER_SIZE;
|
|
|
|
class io_looper : public isc::service<io_looper> {};
|
|
|
|
struct io_looper_tag {};
|
|
|
|
static auto bound_io = injector::bind_multiple<isc::service_base>()
|
|
.add_singleton<io_looper, io_looper_tag>();
|
|
|
|
namespace injector {
|
|
template<>
|
|
void
|
|
force_linking(io_looper_tag anno)
|
|
{
|
|
}
|
|
} // namespace injector
|
|
|
|
/*
|
|
* XXX REMOVE ME
|
|
*
|
|
* The stock bzipped file code does not use pread, so we need to use a lock to
|
|
* get exclusive access to the file. In the future, we should just rewrite
|
|
* the bzipped file code to use pread.
|
|
*/
|
|
class lock_hack {
|
|
public:
|
|
class guard {
|
|
public:
|
|
guard() : g_lock(lock_hack::singleton()) { this->g_lock.lock(); }
|
|
|
|
~guard() { this->g_lock.unlock(); }
|
|
|
|
private:
|
|
lock_hack& g_lock;
|
|
};
|
|
|
|
static lock_hack& singleton()
|
|
{
|
|
static lock_hack retval;
|
|
|
|
return retval;
|
|
}
|
|
|
|
void lock() { lockf(this->lh_fd, F_LOCK, 0); }
|
|
|
|
void unlock() { lockf(this->lh_fd, F_ULOCK, 0); }
|
|
|
|
private:
|
|
lock_hack()
|
|
{
|
|
char lockname[64];
|
|
|
|
snprintf(lockname, sizeof(lockname), "/tmp/lnav.%d.lck", getpid());
|
|
this->lh_fd = open(lockname, O_CREAT | O_RDWR, 0600);
|
|
log_perror(fcntl(this->lh_fd, F_SETFD, FD_CLOEXEC));
|
|
unlink(lockname);
|
|
}
|
|
|
|
auto_fd lh_fd;
|
|
};
|
|
/* XXX END */
|
|
|
|
#define Z_BUFSIZE 65536U
|
|
#define SYNCPOINT_SIZE (1024 * 1024)
|
|
line_buffer::gz_indexed::gz_indexed()
|
|
{
|
|
if ((this->inbuf = auto_mem<Bytef>::malloc(Z_BUFSIZE)) == NULL) {
|
|
throw std::bad_alloc();
|
|
}
|
|
}
|
|
|
|
void
|
|
line_buffer::gz_indexed::close()
|
|
{
|
|
// Release old stream, if we were open
|
|
if (*this) {
|
|
inflateEnd(&this->strm);
|
|
::close(this->gz_fd);
|
|
this->syncpoints.clear();
|
|
this->gz_fd = -1;
|
|
}
|
|
}
|
|
|
|
void
|
|
line_buffer::gz_indexed::init_stream()
|
|
{
|
|
if (*this) {
|
|
inflateEnd(&this->strm);
|
|
}
|
|
|
|
// initialize inflate struct
|
|
this->strm.zalloc = Z_NULL;
|
|
this->strm.zfree = Z_NULL;
|
|
this->strm.opaque = Z_NULL;
|
|
this->strm.avail_in = 0;
|
|
this->strm.next_in = Z_NULL;
|
|
this->strm.avail_out = 0;
|
|
int rc = inflateInit2(&strm, GZ_HEADER_MODE);
|
|
if (rc != Z_OK) {
|
|
throw(rc); // FIXME: exception wrapper
|
|
}
|
|
}
|
|
|
|
void
|
|
line_buffer::gz_indexed::continue_stream()
|
|
{
|
|
// Save our position and output buffer
|
|
auto total_in = this->strm.total_in;
|
|
auto total_out = this->strm.total_out;
|
|
auto avail_out = this->strm.avail_out;
|
|
auto next_out = this->strm.next_out;
|
|
|
|
init_stream();
|
|
|
|
// Restore position and output buffer
|
|
this->strm.total_in = total_in;
|
|
this->strm.total_out = total_out;
|
|
this->strm.avail_out = avail_out;
|
|
this->strm.next_out = next_out;
|
|
}
|
|
|
|
void
|
|
line_buffer::gz_indexed::open(int fd, lnav::gzip::header& hd)
|
|
{
|
|
this->close();
|
|
this->init_stream();
|
|
this->gz_fd = fd;
|
|
|
|
unsigned char name[1024];
|
|
unsigned char comment[4096];
|
|
|
|
name[0] = '\0';
|
|
comment[0] = '\0';
|
|
|
|
gz_header gz_hd;
|
|
memset(&gz_hd, 0, sizeof(gz_hd));
|
|
gz_hd.name = name;
|
|
gz_hd.name_max = sizeof(name);
|
|
gz_hd.comment = comment;
|
|
gz_hd.comm_max = sizeof(comment);
|
|
|
|
Bytef inbuf[8192];
|
|
Bytef outbuf[8192];
|
|
this->strm.next_out = outbuf;
|
|
this->strm.total_out = 0;
|
|
this->strm.avail_out = sizeof(outbuf);
|
|
this->strm.next_in = inbuf;
|
|
this->strm.total_in = 0;
|
|
|
|
if (inflateGetHeader(&this->strm, &gz_hd) == Z_OK) {
|
|
auto rc = pread(fd, inbuf, sizeof(inbuf), 0);
|
|
if (rc >= 0) {
|
|
this->strm.avail_in = rc;
|
|
|
|
inflate(&this->strm, Z_BLOCK);
|
|
inflateEnd(&this->strm);
|
|
|
|
this->strm.next_out = Z_NULL;
|
|
this->strm.next_in = Z_NULL;
|
|
this->strm.next_in = Z_NULL;
|
|
this->strm.total_in = 0;
|
|
this->strm.avail_in = 0;
|
|
this->init_stream();
|
|
|
|
switch (gz_hd.done) {
|
|
case 0:
|
|
log_debug("%d: no gzip header data", fd);
|
|
break;
|
|
case 1:
|
|
hd.h_mtime.tv_sec = gz_hd.time;
|
|
hd.h_name = std::string((char*) name);
|
|
hd.h_comment = std::string((char*) comment);
|
|
break;
|
|
default:
|
|
log_error("%d: failed to read gzip header data", fd);
|
|
break;
|
|
}
|
|
} else {
|
|
log_error("%d: failed to read gzip header from file: %s",
|
|
fd,
|
|
strerror(errno));
|
|
}
|
|
} else {
|
|
log_error("%d: unable to get gzip header", fd);
|
|
}
|
|
}
|
|
|
|
int
|
|
line_buffer::gz_indexed::stream_data(void* buf, size_t size)
|
|
{
|
|
this->strm.avail_out = size;
|
|
this->strm.next_out = (unsigned char*) buf;
|
|
|
|
size_t last = this->syncpoints.empty() ? 0 : this->syncpoints.back().in;
|
|
while (this->strm.avail_out) {
|
|
if (!this->strm.avail_in) {
|
|
int rc = ::pread(
|
|
this->gz_fd, &this->inbuf[0], Z_BUFSIZE, this->strm.total_in);
|
|
if (rc < 0) {
|
|
return rc;
|
|
}
|
|
this->strm.next_in = this->inbuf;
|
|
this->strm.avail_in = rc;
|
|
}
|
|
if (this->strm.avail_in) {
|
|
int flush = last > this->strm.total_in ? Z_SYNC_FLUSH : Z_BLOCK;
|
|
auto err = inflate(&this->strm, flush);
|
|
if (err == Z_STREAM_END) {
|
|
// Reached end of stream; re-init for a possible subsequent
|
|
// stream
|
|
continue_stream();
|
|
} else if (err != Z_OK) {
|
|
log_error(" inflate-error: %d %s",
|
|
(int) err,
|
|
this->strm.msg ? this->strm.msg : "");
|
|
break;
|
|
}
|
|
|
|
if (this->strm.total_in >= last + SYNCPOINT_SIZE
|
|
&& size > this->strm.avail_out + GZ_WINSIZE
|
|
&& (this->strm.data_type & GZ_END_OF_BLOCK_MASK)
|
|
&& !(this->strm.data_type & GZ_END_OF_FILE_MASK))
|
|
{
|
|
this->syncpoints.emplace_back(this->strm, size);
|
|
last = this->strm.total_out;
|
|
}
|
|
} else if (this->strm.avail_out) {
|
|
// Processed all the gz file data but didn't fill
|
|
// the output buffer. We're done, even though we
|
|
// produced fewer bytes than requested.
|
|
break;
|
|
}
|
|
}
|
|
return size - this->strm.avail_out;
|
|
}
|
|
|
|
void
|
|
line_buffer::gz_indexed::seek(off_t offset)
|
|
{
|
|
if ((size_t) offset == this->strm.total_out) {
|
|
return;
|
|
}
|
|
|
|
indexDict* dict = nullptr;
|
|
// Find highest syncpoint not past offset
|
|
// FIXME: Make this a binary-tree search
|
|
for (auto& d : this->syncpoints) {
|
|
if (d.out <= offset) {
|
|
dict = &d;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Choose highest available syncpoint, or keep current offset if it's ok
|
|
if ((size_t) offset < this->strm.total_out
|
|
|| (dict && this->strm.total_out < (size_t) dict->out))
|
|
{
|
|
// Release the old z_stream
|
|
inflateEnd(&this->strm);
|
|
if (dict) {
|
|
dict->apply(&this->strm);
|
|
} else {
|
|
init_stream();
|
|
}
|
|
}
|
|
|
|
// Stream from compressed file until we reach our offset
|
|
unsigned char dummy[Z_BUFSIZE];
|
|
while ((size_t) offset > this->strm.total_out) {
|
|
size_t to_copy
|
|
= std::min(static_cast<size_t>(Z_BUFSIZE),
|
|
static_cast<size_t>(offset - this->strm.total_out));
|
|
auto bytes = stream_data(dummy, to_copy);
|
|
if (bytes <= 0) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
int
|
|
line_buffer::gz_indexed::read(void* buf, size_t offset, size_t size)
|
|
{
|
|
if (offset != this->strm.total_out) {
|
|
// log_debug("doing seek! %d %d", offset, this->strm.total_out);
|
|
this->seek(offset);
|
|
}
|
|
|
|
int bytes = stream_data(buf, size);
|
|
|
|
return bytes;
|
|
}
|
|
|
|
line_buffer::line_buffer()
|
|
{
|
|
ensure(this->invariant());
|
|
}
|
|
|
|
line_buffer::~line_buffer()
|
|
{
|
|
auto empty_fd = auto_fd();
|
|
|
|
// Make sure any shared refs take ownership of the data.
|
|
this->lb_share_manager.invalidate_refs();
|
|
this->set_fd(empty_fd);
|
|
}
|
|
|
|
void
|
|
line_buffer::set_fd(auto_fd& fd)
|
|
{
|
|
file_off_t newoff = 0;
|
|
|
|
{
|
|
safe::WriteAccess<safe_gz_indexed> gi(this->lb_gz_file);
|
|
|
|
if (*gi) {
|
|
gi->close();
|
|
}
|
|
}
|
|
|
|
if (this->lb_bz_file) {
|
|
this->lb_bz_file = false;
|
|
}
|
|
|
|
if (fd != -1) {
|
|
/* Sync the fd's offset with the object. */
|
|
newoff = lseek(fd, 0, SEEK_CUR);
|
|
if (newoff == -1) {
|
|
if (errno != ESPIPE) {
|
|
throw error(errno);
|
|
}
|
|
|
|
/* It's a pipe, start with a zero offset. */
|
|
newoff = 0;
|
|
this->lb_seekable = false;
|
|
} else {
|
|
char gz_id[2 + 1 + 1 + 4];
|
|
|
|
if (pread(fd, gz_id, sizeof(gz_id), 0) == sizeof(gz_id)) {
|
|
auto piper_hdr_opt = lnav::piper::read_header(fd, gz_id);
|
|
|
|
if (piper_hdr_opt) {
|
|
static intern_string_t SRC = intern_string::lookup("piper");
|
|
|
|
auto meta_buf = std::move(piper_hdr_opt.value());
|
|
|
|
auto meta_sf = string_fragment::from_bytes(meta_buf.in(),
|
|
meta_buf.size());
|
|
auto meta_parse_res
|
|
= lnav::piper::header_handlers.parser_for(SRC).of(
|
|
meta_sf);
|
|
if (meta_parse_res.isErr()) {
|
|
log_error("failed to parse piper header: %s",
|
|
meta_parse_res.unwrapErr()[0]
|
|
.to_attr_line()
|
|
.get_string()
|
|
.c_str());
|
|
throw error(EINVAL);
|
|
}
|
|
|
|
this->lb_line_metadata = true;
|
|
this->lb_file_offset
|
|
= lnav::piper::HEADER_SIZE + meta_buf.size();
|
|
this->lb_piper_header_size = this->lb_file_offset;
|
|
this->lb_header = meta_parse_res.unwrap();
|
|
} else if (gz_id[0] == '\037' && gz_id[1] == '\213') {
|
|
int gzfd = dup(fd);
|
|
|
|
log_perror(fcntl(gzfd, F_SETFD, FD_CLOEXEC));
|
|
if (lseek(fd, 0, SEEK_SET) < 0) {
|
|
close(gzfd);
|
|
throw error(errno);
|
|
}
|
|
lnav::gzip::header hdr;
|
|
|
|
this->lb_gz_file.writeAccess()->open(gzfd, hdr);
|
|
this->lb_compressed = true;
|
|
this->lb_file_time = hdr.h_mtime.tv_sec;
|
|
if (this->lb_file_time < 0) {
|
|
this->lb_file_time = 0;
|
|
}
|
|
this->lb_compressed_offset
|
|
= lseek(this->lb_fd, 0, SEEK_CUR);
|
|
if (!hdr.empty()) {
|
|
this->lb_header = std::move(hdr);
|
|
}
|
|
this->resize_buffer(INITIAL_COMPRESSED_BUFFER_SIZE);
|
|
}
|
|
#ifdef HAVE_BZLIB_H
|
|
else if (gz_id[0] == 'B' && gz_id[1] == 'Z')
|
|
{
|
|
if (lseek(fd, 0, SEEK_SET) < 0) {
|
|
throw error(errno);
|
|
}
|
|
this->lb_bz_file = true;
|
|
this->lb_compressed = true;
|
|
|
|
/*
|
|
* Loading data from a bzip2 file is pretty slow, so we try
|
|
* to keep as much in memory as possible.
|
|
*/
|
|
this->resize_buffer(INITIAL_COMPRESSED_BUFFER_SIZE);
|
|
|
|
this->lb_compressed_offset = 0;
|
|
}
|
|
#endif
|
|
}
|
|
this->lb_seekable = true;
|
|
}
|
|
}
|
|
this->lb_file_offset = newoff;
|
|
this->lb_buffer.clear();
|
|
this->lb_fd = std::move(fd);
|
|
|
|
ensure(this->invariant());
|
|
}
|
|
|
|
void
|
|
line_buffer::resize_buffer(size_t new_max)
|
|
{
|
|
if (new_max <= MAX_LINE_BUFFER_SIZE
|
|
&& new_max > (size_t) this->lb_buffer.capacity())
|
|
{
|
|
/* Still need more space, try a realloc. */
|
|
this->lb_share_manager.invalidate_refs();
|
|
this->lb_buffer.expand_to(new_max);
|
|
}
|
|
}
|
|
|
|
void
|
|
line_buffer::ensure_available(file_off_t start, ssize_t max_length)
|
|
{
|
|
ssize_t prefill, available;
|
|
|
|
require(this->lb_compressed || max_length <= MAX_LINE_BUFFER_SIZE);
|
|
|
|
// log_debug("ensure avail %d %d", start, max_length);
|
|
|
|
if (this->lb_file_size != -1) {
|
|
if (start + (file_off_t) max_length > this->lb_file_size) {
|
|
max_length = (this->lb_file_size - start);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Check to see if the start is inside the cached range or immediately
|
|
* after.
|
|
*/
|
|
if (start < this->lb_file_offset
|
|
|| start > (file_off_t) (this->lb_file_offset + this->lb_buffer.size()))
|
|
{
|
|
/*
|
|
* The request is outside the cached range, need to reload the
|
|
* whole thing.
|
|
*/
|
|
this->lb_share_manager.invalidate_refs();
|
|
prefill = 0;
|
|
this->lb_buffer.clear();
|
|
if ((this->lb_file_size != (ssize_t) -1)
|
|
&& (start + this->lb_buffer.capacity() > this->lb_file_size))
|
|
{
|
|
require(start <= this->lb_file_size);
|
|
/*
|
|
* If the start is near the end of the file, move the offset back a
|
|
* bit so we can get more of the file in the cache.
|
|
*/
|
|
this->lb_file_offset = this->lb_file_size
|
|
- std::min(this->lb_file_size,
|
|
(file_ssize_t) this->lb_buffer.capacity());
|
|
} else {
|
|
this->lb_file_offset = start;
|
|
}
|
|
} else {
|
|
/* The request is in the cached range. Record how much extra data is in
|
|
* the buffer before the requested range.
|
|
*/
|
|
prefill = start - this->lb_file_offset;
|
|
}
|
|
require(this->lb_file_offset <= start);
|
|
require(prefill <= this->lb_buffer.size());
|
|
|
|
available = this->lb_buffer.capacity() - (start - this->lb_file_offset);
|
|
require(available <= this->lb_buffer.capacity());
|
|
|
|
if (max_length > available) {
|
|
// log_debug("need more space!");
|
|
/*
|
|
* Need more space, move any existing data to the front of the
|
|
* buffer.
|
|
*/
|
|
this->lb_share_manager.invalidate_refs();
|
|
|
|
this->lb_buffer.resize_by(-prefill);
|
|
this->lb_file_offset += prefill;
|
|
// log_debug("adjust file offset for prefill %d", this->lb_file_offset);
|
|
memmove(this->lb_buffer.at(0),
|
|
this->lb_buffer.at(prefill),
|
|
this->lb_buffer.size());
|
|
|
|
available = this->lb_buffer.capacity() - (start - this->lb_file_offset);
|
|
if (max_length > available) {
|
|
this->resize_buffer(roundup_size(max_length, DEFAULT_INCREMENT));
|
|
}
|
|
}
|
|
this->lb_line_starts.clear();
|
|
this->lb_line_is_utf.clear();
|
|
}
|
|
|
|
bool
|
|
line_buffer::load_next_buffer()
|
|
{
|
|
// log_debug("loader here!");
|
|
auto retval = false;
|
|
auto start = this->lb_loader_file_offset.value();
|
|
ssize_t rc = 0;
|
|
safe::WriteAccess<safe_gz_indexed> gi(this->lb_gz_file);
|
|
|
|
// log_debug("BEGIN preload read");
|
|
/* ... read in the new data. */
|
|
if (!this->lb_cached_fd && *gi) {
|
|
if (this->lb_file_size != (ssize_t) -1 && this->in_range(start)
|
|
&& this->in_range(this->lb_file_size - 1))
|
|
{
|
|
rc = 0;
|
|
} else {
|
|
// log_debug("async decomp start");
|
|
rc = gi->read(this->lb_alt_buffer.value().end(),
|
|
start + this->lb_alt_buffer.value().size(),
|
|
this->lb_alt_buffer.value().available());
|
|
this->lb_compressed_offset = gi->get_source_offset();
|
|
if (rc != -1 && (rc < this->lb_alt_buffer.value().available())
|
|
&& (start + this->lb_alt_buffer.value().size() + rc
|
|
> this->lb_file_size))
|
|
{
|
|
this->lb_file_size
|
|
= (start + this->lb_alt_buffer.value().size() + rc);
|
|
}
|
|
#if 0
|
|
log_debug("async decomp end %d+%d:%d",
|
|
this->lb_alt_buffer->size(),
|
|
rc,
|
|
this->lb_alt_buffer->capacity());
|
|
#endif
|
|
}
|
|
}
|
|
#ifdef HAVE_BZLIB_H
|
|
else if (!this->lb_cached_fd && this->lb_bz_file)
|
|
{
|
|
if (this->lb_file_size != (ssize_t) -1
|
|
&& (((ssize_t) start >= this->lb_file_size)
|
|
|| (this->in_range(start)
|
|
&& this->in_range(this->lb_file_size - 1))))
|
|
{
|
|
rc = 0;
|
|
} else {
|
|
lock_hack::guard guard;
|
|
char scratch[32 * 1024];
|
|
BZFILE* bz_file;
|
|
file_off_t seek_to;
|
|
int bzfd;
|
|
|
|
/*
|
|
* Unfortunately, there is no bzseek, so we need to reopen the
|
|
* file every time we want to do a read.
|
|
*/
|
|
bzfd = dup(this->lb_fd);
|
|
if (lseek(this->lb_fd, 0, SEEK_SET) < 0) {
|
|
close(bzfd);
|
|
throw error(errno);
|
|
}
|
|
if ((bz_file = BZ2_bzdopen(bzfd, "r")) == nullptr) {
|
|
close(bzfd);
|
|
if (errno == 0) {
|
|
throw std::bad_alloc();
|
|
} else {
|
|
throw error(errno);
|
|
}
|
|
}
|
|
|
|
seek_to = start + this->lb_alt_buffer.value().size();
|
|
while (seek_to > 0) {
|
|
int count;
|
|
|
|
count = BZ2_bzread(bz_file,
|
|
scratch,
|
|
std::min((size_t) seek_to, sizeof(scratch)));
|
|
seek_to -= count;
|
|
}
|
|
rc = BZ2_bzread(bz_file,
|
|
this->lb_alt_buffer->end(),
|
|
this->lb_alt_buffer->available());
|
|
this->lb_compressed_offset = lseek(bzfd, 0, SEEK_SET);
|
|
BZ2_bzclose(bz_file);
|
|
|
|
if (rc != -1 && (rc < (this->lb_alt_buffer.value().available()))
|
|
&& (start + this->lb_alt_buffer.value().size() + rc
|
|
> this->lb_file_size))
|
|
{
|
|
this->lb_file_size
|
|
= (start + this->lb_alt_buffer.value().size() + rc);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
else
|
|
{
|
|
rc = pread(this->lb_cached_fd ? this->lb_cached_fd.value().get()
|
|
: this->lb_fd.get(),
|
|
this->lb_alt_buffer.value().end(),
|
|
this->lb_alt_buffer.value().available(),
|
|
start + this->lb_alt_buffer.value().size());
|
|
}
|
|
// XXX For some reason, cygwin is giving us a bogus return value when
|
|
// up to the end of the file.
|
|
if (rc > (this->lb_alt_buffer.value().available())) {
|
|
rc = -1;
|
|
#ifdef ENODATA
|
|
errno = ENODATA;
|
|
#else
|
|
errno = EAGAIN;
|
|
#endif
|
|
}
|
|
switch (rc) {
|
|
case 0:
|
|
if (start < (file_off_t) this->lb_file_size) {
|
|
retval = true;
|
|
}
|
|
break;
|
|
|
|
case (ssize_t) -1:
|
|
switch (errno) {
|
|
#ifdef ENODATA
|
|
/* Cygwin seems to return this when pread reaches the end of
|
|
* the file. */
|
|
case ENODATA:
|
|
#endif
|
|
case EINTR:
|
|
case EAGAIN:
|
|
break;
|
|
|
|
default:
|
|
throw error(errno);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
this->lb_alt_buffer.value().resize_by(rc);
|
|
retval = true;
|
|
break;
|
|
}
|
|
// log_debug("END preload read");
|
|
|
|
if (start > this->lb_last_line_offset) {
|
|
const auto* line_start = this->lb_alt_buffer.value().begin();
|
|
|
|
do {
|
|
auto before = line_start - this->lb_alt_buffer->begin();
|
|
auto remaining = this->lb_alt_buffer.value().size() - before;
|
|
auto frag = string_fragment::from_bytes(line_start, remaining);
|
|
auto utf_scan_res = is_utf8(frag, '\n');
|
|
auto lf = utf_scan_res.remaining_ptr(frag);
|
|
this->lb_alt_line_starts.emplace_back(before);
|
|
this->lb_alt_line_is_utf.emplace_back(utf_scan_res.is_valid());
|
|
this->lb_alt_line_has_ansi.emplace_back(utf_scan_res.usr_has_ansi);
|
|
|
|
line_start = lf;
|
|
} while (line_start != nullptr
|
|
&& line_start < this->lb_alt_buffer->end());
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
bool
|
|
line_buffer::fill_range(file_off_t start, ssize_t max_length)
|
|
{
|
|
bool retval = false;
|
|
|
|
require(start >= 0);
|
|
|
|
// log_debug("fill range %d %d", start, max_length);
|
|
#if 0
|
|
log_debug("(%p) fill range %d %d (%d) %d",
|
|
this,
|
|
start,
|
|
max_length,
|
|
this->lb_file_offset,
|
|
this->lb_buffer.size());
|
|
#endif
|
|
if (!lnav::pid::in_child && this->lb_loader_future.valid()
|
|
&& start >= this->lb_loader_file_offset.value())
|
|
{
|
|
#if 0
|
|
log_debug("getting preload! %d %d",
|
|
start,
|
|
this->lb_loader_file_offset.value());
|
|
#endif
|
|
nonstd::optional<std::chrono::system_clock::time_point> wait_start;
|
|
|
|
if (this->lb_loader_future.wait_for(std::chrono::seconds(0))
|
|
!= std::future_status::ready)
|
|
{
|
|
wait_start
|
|
= nonstd::make_optional(std::chrono::system_clock::now());
|
|
}
|
|
retval = this->lb_loader_future.get();
|
|
if (false && wait_start) {
|
|
auto diff = std::chrono::system_clock::now() - wait_start.value();
|
|
log_debug("wait done! %d", diff.count());
|
|
}
|
|
// log_debug("got preload");
|
|
this->lb_loader_future = {};
|
|
this->lb_share_manager.invalidate_refs();
|
|
this->lb_file_offset = this->lb_loader_file_offset.value();
|
|
this->lb_loader_file_offset = nonstd::nullopt;
|
|
this->lb_buffer.swap(this->lb_alt_buffer.value());
|
|
this->lb_alt_buffer.value().clear();
|
|
this->lb_line_starts = std::move(this->lb_alt_line_starts);
|
|
this->lb_alt_line_starts.clear();
|
|
this->lb_line_is_utf = std::move(this->lb_alt_line_is_utf);
|
|
this->lb_alt_line_is_utf.clear();
|
|
this->lb_line_has_ansi = std::move(this->lb_alt_line_has_ansi);
|
|
this->lb_alt_line_has_ansi.clear();
|
|
this->lb_stats.s_used_preloads += 1;
|
|
}
|
|
if (this->in_range(start)
|
|
&& (max_length == 0 || this->in_range(start + max_length - 1)))
|
|
{
|
|
/* Cache already has the data, nothing to do. */
|
|
retval = true;
|
|
if (!lnav::pid::in_child && this->lb_seekable && this->lb_buffer.full()
|
|
&& !this->lb_loader_file_offset)
|
|
{
|
|
// log_debug("loader available start=%d", start);
|
|
auto last_lf_iter = std::find(
|
|
this->lb_buffer.rbegin(), this->lb_buffer.rend(), '\n');
|
|
if (last_lf_iter != this->lb_buffer.rend()) {
|
|
auto usable_size
|
|
= std::distance(last_lf_iter, this->lb_buffer.rend());
|
|
// log_debug("found linefeed %d", usable_size);
|
|
if (!this->lb_alt_buffer) {
|
|
// log_debug("allocating new buffer!");
|
|
this->lb_alt_buffer
|
|
= auto_buffer::alloc(this->lb_buffer.capacity());
|
|
}
|
|
this->lb_alt_buffer->resize(this->lb_buffer.size()
|
|
- usable_size);
|
|
memcpy(this->lb_alt_buffer.value().begin(),
|
|
this->lb_buffer.at(usable_size),
|
|
this->lb_alt_buffer->size());
|
|
this->lb_loader_file_offset
|
|
= this->lb_file_offset + usable_size;
|
|
#if 0
|
|
log_debug("load offset %d",
|
|
this->lb_loader_file_offset.value());
|
|
log_debug("launch loader");
|
|
#endif
|
|
auto prom = std::make_shared<std::promise<bool>>();
|
|
this->lb_loader_future = prom->get_future();
|
|
this->lb_stats.s_requested_preloads += 1;
|
|
isc::to<io_looper&, io_looper_tag>().send(
|
|
[this, prom](auto& ioloop) mutable {
|
|
prom->set_value(this->load_next_buffer());
|
|
});
|
|
}
|
|
}
|
|
} else if (this->lb_fd != -1) {
|
|
ssize_t rc;
|
|
|
|
/* Make sure there is enough space, then */
|
|
this->ensure_available(start, max_length);
|
|
|
|
safe::WriteAccess<safe_gz_indexed> gi(this->lb_gz_file);
|
|
|
|
/* ... read in the new data. */
|
|
if (!this->lb_cached_fd && *gi) {
|
|
// log_debug("old decomp start");
|
|
if (this->lb_file_size != (ssize_t) -1 && this->in_range(start)
|
|
&& this->in_range(this->lb_file_size - 1))
|
|
{
|
|
rc = 0;
|
|
} else {
|
|
this->lb_stats.s_decompressions += 1;
|
|
if (false && this->lb_last_line_offset > 0) {
|
|
this->lb_stats.s_hist[(this->lb_file_offset * 10)
|
|
/ this->lb_last_line_offset]
|
|
+= 1;
|
|
}
|
|
rc = gi->read(this->lb_buffer.end(),
|
|
this->lb_file_offset + this->lb_buffer.size(),
|
|
this->lb_buffer.available());
|
|
this->lb_compressed_offset = gi->get_source_offset();
|
|
if (rc != -1 && (rc < this->lb_buffer.available())) {
|
|
this->lb_file_size
|
|
= (this->lb_file_offset + this->lb_buffer.size() + rc);
|
|
}
|
|
}
|
|
#if 0
|
|
log_debug("old decomp end -- %d+%d:%d",
|
|
this->lb_buffer.size(),
|
|
rc,
|
|
this->lb_buffer.capacity());
|
|
#endif
|
|
}
|
|
#ifdef HAVE_BZLIB_H
|
|
else if (!this->lb_cached_fd && this->lb_bz_file)
|
|
{
|
|
if (this->lb_file_size != (ssize_t) -1
|
|
&& (((ssize_t) start >= this->lb_file_size)
|
|
|| (this->in_range(start)
|
|
&& this->in_range(this->lb_file_size - 1))))
|
|
{
|
|
rc = 0;
|
|
} else {
|
|
lock_hack::guard guard;
|
|
char scratch[32 * 1024];
|
|
BZFILE* bz_file;
|
|
file_off_t seek_to;
|
|
int bzfd;
|
|
|
|
/*
|
|
* Unfortunately, there is no bzseek, so we need to reopen the
|
|
* file every time we want to do a read.
|
|
*/
|
|
bzfd = dup(this->lb_fd);
|
|
if (lseek(this->lb_fd, 0, SEEK_SET) < 0) {
|
|
close(bzfd);
|
|
throw error(errno);
|
|
}
|
|
if ((bz_file = BZ2_bzdopen(bzfd, "r")) == NULL) {
|
|
close(bzfd);
|
|
if (errno == 0) {
|
|
throw std::bad_alloc();
|
|
} else {
|
|
throw error(errno);
|
|
}
|
|
}
|
|
|
|
seek_to = this->lb_file_offset + this->lb_buffer.size();
|
|
while (seek_to > 0) {
|
|
int count;
|
|
|
|
count = BZ2_bzread(
|
|
bz_file,
|
|
scratch,
|
|
std::min((size_t) seek_to, sizeof(scratch)));
|
|
seek_to -= count;
|
|
}
|
|
rc = BZ2_bzread(bz_file,
|
|
this->lb_buffer.end(),
|
|
this->lb_buffer.available());
|
|
this->lb_compressed_offset = lseek(bzfd, 0, SEEK_SET);
|
|
BZ2_bzclose(bz_file);
|
|
|
|
if (rc != -1 && (rc < (this->lb_buffer.available()))) {
|
|
this->lb_file_size
|
|
= (this->lb_file_offset + this->lb_buffer.size() + rc);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
else if (this->lb_seekable)
|
|
{
|
|
this->lb_stats.s_preads += 1;
|
|
if (false && this->lb_last_line_offset > 0) {
|
|
this->lb_stats.s_hist[(this->lb_file_offset * 10)
|
|
/ this->lb_last_line_offset]
|
|
+= 1;
|
|
}
|
|
#if 0
|
|
log_debug("%d: pread %lld",
|
|
this->lb_fd.get(),
|
|
this->lb_file_offset + this->lb_buffer.size());
|
|
#endif
|
|
rc = pread(this->lb_cached_fd ? this->lb_cached_fd.value().get()
|
|
: this->lb_fd.get(),
|
|
this->lb_buffer.end(),
|
|
this->lb_buffer.available(),
|
|
this->lb_file_offset + this->lb_buffer.size());
|
|
// log_debug("pread rc %d", rc);
|
|
} else {
|
|
rc = read(this->lb_fd,
|
|
this->lb_buffer.end(),
|
|
this->lb_buffer.available());
|
|
}
|
|
// XXX For some reason, cygwin is giving us a bogus return value when
|
|
// up to the end of the file.
|
|
if (rc > (this->lb_buffer.available())) {
|
|
rc = -1;
|
|
#ifdef ENODATA
|
|
errno = ENODATA;
|
|
#else
|
|
errno = EAGAIN;
|
|
#endif
|
|
}
|
|
switch (rc) {
|
|
case 0:
|
|
if (!this->lb_seekable) {
|
|
this->lb_file_size
|
|
= this->lb_file_offset + this->lb_buffer.size();
|
|
}
|
|
if (start < (file_off_t) this->lb_file_size) {
|
|
retval = true;
|
|
}
|
|
|
|
if (this->lb_compressed) {
|
|
/*
|
|
* For compressed files, increase the buffer size so we
|
|
* don't have to spend as much time uncompressing the data.
|
|
*/
|
|
this->resize_buffer(MAX_COMPRESSED_BUFFER_SIZE);
|
|
}
|
|
break;
|
|
|
|
case (ssize_t) -1:
|
|
switch (errno) {
|
|
#ifdef ENODATA
|
|
/* Cygwin seems to return this when pread reaches the end of
|
|
* the */
|
|
/* file. */
|
|
case ENODATA:
|
|
#endif
|
|
case EINTR:
|
|
case EAGAIN:
|
|
break;
|
|
|
|
default:
|
|
throw error(errno);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
this->lb_buffer.resize_by(rc);
|
|
retval = true;
|
|
break;
|
|
}
|
|
|
|
if (!lnav::pid::in_child && this->lb_seekable && this->lb_buffer.full()
|
|
&& !this->lb_loader_file_offset)
|
|
{
|
|
// log_debug("loader available2 start=%d", start);
|
|
auto last_lf_iter = std::find(
|
|
this->lb_buffer.rbegin(), this->lb_buffer.rend(), '\n');
|
|
if (last_lf_iter != this->lb_buffer.rend()) {
|
|
auto usable_size
|
|
= std::distance(last_lf_iter, this->lb_buffer.rend());
|
|
// log_debug("found linefeed %d", usable_size);
|
|
if (!this->lb_alt_buffer) {
|
|
// log_debug("allocating new buffer!");
|
|
this->lb_alt_buffer
|
|
= auto_buffer::alloc(this->lb_buffer.capacity());
|
|
} else if (this->lb_alt_buffer->capacity()
|
|
< this->lb_buffer.capacity())
|
|
{
|
|
this->lb_alt_buffer->expand_to(this->lb_buffer.capacity());
|
|
}
|
|
this->lb_alt_buffer->resize(this->lb_buffer.size()
|
|
- usable_size);
|
|
memcpy(this->lb_alt_buffer->begin(),
|
|
this->lb_buffer.at(usable_size),
|
|
this->lb_alt_buffer->size());
|
|
this->lb_loader_file_offset
|
|
= this->lb_file_offset + usable_size;
|
|
#if 0
|
|
log_debug("load offset %d",
|
|
this->lb_loader_file_offset.value());
|
|
log_debug("launch loader");
|
|
#endif
|
|
auto prom = std::make_shared<std::promise<bool>>();
|
|
this->lb_loader_future = prom->get_future();
|
|
this->lb_stats.s_requested_preloads += 1;
|
|
isc::to<io_looper&, io_looper_tag>().send(
|
|
[this, prom](auto& ioloop) mutable {
|
|
prom->set_value(this->load_next_buffer());
|
|
});
|
|
}
|
|
}
|
|
ensure(this->lb_buffer.size() <= this->lb_buffer.capacity());
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
Result<line_info, std::string>
|
|
line_buffer::load_next_line(file_range prev_line)
|
|
{
|
|
const char* line_start = nullptr;
|
|
bool done = false;
|
|
line_info retval;
|
|
|
|
require(this->lb_fd != -1);
|
|
|
|
if (this->lb_line_metadata && prev_line.fr_offset == 0) {
|
|
prev_line.fr_offset = this->lb_piper_header_size;
|
|
}
|
|
|
|
auto offset = prev_line.next_offset();
|
|
ssize_t request_size = INITIAL_REQUEST_SIZE;
|
|
retval.li_file_range.fr_offset = offset;
|
|
if (this->lb_buffer.empty() || !this->in_range(offset)) {
|
|
this->fill_range(offset, this->lb_buffer.capacity());
|
|
} else if (offset == this->lb_file_offset + this->lb_buffer.size()) {
|
|
if (!this->fill_range(offset, INITIAL_REQUEST_SIZE)) {
|
|
retval.li_file_range.fr_offset = offset;
|
|
retval.li_file_range.fr_size = 0;
|
|
if (this->is_pipe()) {
|
|
retval.li_partial = !this->is_pipe_closed();
|
|
} else {
|
|
retval.li_partial = true;
|
|
}
|
|
return Ok(retval);
|
|
}
|
|
}
|
|
if (prev_line.next_offset() == 0) {
|
|
auto is_utf_res = is_utf8(string_fragment::from_bytes(
|
|
this->lb_buffer.begin(), this->lb_buffer.size()));
|
|
this->lb_is_utf8 = is_utf_res.is_valid();
|
|
if (!this->lb_is_utf8) {
|
|
log_warning("input is not utf8 -- %s", is_utf_res.usr_message);
|
|
}
|
|
}
|
|
while (!done) {
|
|
auto old_retval_size = retval.li_file_range.fr_size;
|
|
const char* lf = nullptr;
|
|
|
|
/* Find the data in the cache and */
|
|
line_start = this->get_range(offset, retval.li_file_range.fr_size);
|
|
/* ... look for the end-of-line or end-of-file. */
|
|
ssize_t utf8_end = -1;
|
|
|
|
bool found_in_cache = false;
|
|
if (!this->lb_line_starts.empty()) {
|
|
auto buffer_offset = offset - this->lb_file_offset;
|
|
|
|
auto start_iter = std::lower_bound(this->lb_line_starts.begin(),
|
|
this->lb_line_starts.end(),
|
|
buffer_offset);
|
|
if (start_iter != this->lb_line_starts.end()) {
|
|
auto next_line_iter = start_iter + 1;
|
|
|
|
// log_debug("found offset %d %d", buffer_offset, *start_iter);
|
|
if (next_line_iter != this->lb_line_starts.end()) {
|
|
utf8_end = *next_line_iter - 1 - *start_iter;
|
|
found_in_cache = true;
|
|
lf = line_start + utf8_end;
|
|
} else {
|
|
// log_debug("no next iter");
|
|
}
|
|
} else {
|
|
// log_debug("no buffer_offset found");
|
|
}
|
|
}
|
|
|
|
if (!found_in_cache) {
|
|
auto frag = string_fragment::from_bytes(
|
|
line_start, retval.li_file_range.fr_size);
|
|
auto scan_res = is_utf8(frag, '\n');
|
|
lf = scan_res.remaining_ptr(frag);
|
|
if (lf != nullptr) {
|
|
lf -= 1;
|
|
}
|
|
retval.li_utf8_scan_result = scan_res;
|
|
}
|
|
|
|
auto got_new_data = old_retval_size != retval.li_file_range.fr_size;
|
|
#if 0
|
|
log_debug("load next loop %p reqsize %d lsize %d",
|
|
lf,
|
|
request_size,
|
|
retval.li_file_range.fr_size);
|
|
#endif
|
|
if (lf != nullptr
|
|
|| (retval.li_file_range.fr_size >= MAX_LINE_BUFFER_SIZE)
|
|
|| (request_size >= MAX_LINE_BUFFER_SIZE)
|
|
|| (!got_new_data
|
|
&& (!this->is_pipe() || request_size > DEFAULT_INCREMENT)))
|
|
{
|
|
if ((lf != nullptr)
|
|
&& ((size_t) (lf - line_start) >= MAX_LINE_BUFFER_SIZE - 1))
|
|
{
|
|
lf = nullptr;
|
|
}
|
|
if (lf != nullptr) {
|
|
retval.li_partial = false;
|
|
retval.li_file_range.fr_size = lf - line_start;
|
|
// delim
|
|
retval.li_file_range.fr_size += 1;
|
|
if (offset >= this->lb_last_line_offset) {
|
|
this->lb_last_line_offset
|
|
= offset + retval.li_file_range.fr_size;
|
|
}
|
|
} else {
|
|
if (retval.li_file_range.fr_size >= MAX_LINE_BUFFER_SIZE) {
|
|
log_warning("Line exceeded max size: offset=%d", offset);
|
|
retval.li_file_range.fr_size = MAX_LINE_BUFFER_SIZE - 1;
|
|
retval.li_partial = false;
|
|
} else {
|
|
retval.li_partial = true;
|
|
}
|
|
this->ensure_available(offset, retval.li_file_range.fr_size);
|
|
|
|
if (retval.li_file_range.fr_size >= MAX_LINE_BUFFER_SIZE) {
|
|
retval.li_file_range.fr_size = MAX_LINE_BUFFER_SIZE - 1;
|
|
}
|
|
if (retval.li_partial) {
|
|
/*
|
|
* Since no delimiter was seen, we need to remember the
|
|
* offset of the last line in the file so we don't
|
|
* mistakenly return two partial lines to the caller.
|
|
*
|
|
* 1. read_line() - returns partial line
|
|
* 2. file is written
|
|
* 3. read_line() - returns the middle of partial line.
|
|
*/
|
|
this->lb_last_line_offset = offset;
|
|
} else if (offset >= this->lb_last_line_offset) {
|
|
this->lb_last_line_offset
|
|
= offset + retval.li_file_range.fr_size;
|
|
}
|
|
}
|
|
|
|
offset += retval.li_file_range.fr_size;
|
|
|
|
done = true;
|
|
} else {
|
|
if (!this->is_pipe() || !this->is_pipe_closed()) {
|
|
retval.li_partial = true;
|
|
}
|
|
request_size
|
|
= std::min<ssize_t>(this->lb_buffer.size() + DEFAULT_INCREMENT,
|
|
MAX_LINE_BUFFER_SIZE);
|
|
}
|
|
|
|
if (!done
|
|
&& !this->fill_range(
|
|
offset,
|
|
std::max(request_size, (ssize_t) this->lb_buffer.available())))
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
ensure(retval.li_file_range.fr_size <= this->lb_buffer.size());
|
|
ensure(this->invariant());
|
|
#if 0
|
|
log_debug("got line part %d %d",
|
|
retval.li_file_range.fr_offset,
|
|
(int) retval.li_partial);
|
|
#endif
|
|
|
|
retval.li_file_range.fr_metadata.m_has_ansi
|
|
= retval.li_utf8_scan_result.usr_has_ansi;
|
|
retval.li_file_range.fr_metadata.m_valid_utf
|
|
= retval.li_utf8_scan_result.is_valid();
|
|
|
|
if (this->lb_line_metadata) {
|
|
auto sv = scn::string_view{
|
|
line_start,
|
|
(size_t) retval.li_file_range.fr_size,
|
|
};
|
|
|
|
char level;
|
|
auto scan_res = scn::scan(sv,
|
|
"{}.{}:{};",
|
|
retval.li_timestamp.tv_sec,
|
|
retval.li_timestamp.tv_usec,
|
|
level);
|
|
if (scan_res) {
|
|
retval.li_timestamp.tv_sec
|
|
= lnav::to_local_time(date::sys_seconds{std::chrono::seconds{
|
|
retval.li_timestamp.tv_sec}})
|
|
.time_since_epoch()
|
|
.count();
|
|
retval.li_level = abbrev2level(&level, 1);
|
|
}
|
|
}
|
|
|
|
return Ok(retval);
|
|
}
|
|
|
|
Result<shared_buffer_ref, std::string>
|
|
line_buffer::read_range(file_range fr)
|
|
{
|
|
shared_buffer_ref retval;
|
|
const char* line_start;
|
|
file_ssize_t avail;
|
|
|
|
#if 0
|
|
if (this->lb_last_line_offset != -1
|
|
&& fr.fr_offset > this->lb_last_line_offset)
|
|
{
|
|
/*
|
|
* Don't return anything past the last known line. The caller needs
|
|
* to try reading at the offset of the last line again.
|
|
*/
|
|
return Err(
|
|
fmt::format(FMT_STRING("attempt to read past the known end of the "
|
|
"file: read-offset={}; last_line_offset={}"),
|
|
fr.fr_offset,
|
|
this->lb_last_line_offset));
|
|
}
|
|
#endif
|
|
|
|
if (!(this->in_range(fr.fr_offset)
|
|
&& this->in_range(fr.fr_offset + fr.fr_size - 1)))
|
|
{
|
|
if (!this->fill_range(fr.fr_offset, fr.fr_size)) {
|
|
return Err(std::string("unable to read file"));
|
|
}
|
|
}
|
|
line_start = this->get_range(fr.fr_offset, avail);
|
|
|
|
if (fr.fr_size > avail) {
|
|
return Err(fmt::format(
|
|
FMT_STRING("short-read (need: {}; avail: {})"), fr.fr_size, avail));
|
|
}
|
|
if (this->lb_line_metadata) {
|
|
auto new_start
|
|
= static_cast<const char*>(memchr(line_start, ';', fr.fr_size));
|
|
if (new_start) {
|
|
auto offset = new_start - line_start + 1;
|
|
line_start += offset;
|
|
fr.fr_size -= offset;
|
|
}
|
|
}
|
|
retval.share(this->lb_share_manager, line_start, fr.fr_size);
|
|
retval.get_metadata() = fr.fr_metadata;
|
|
|
|
return Ok(std::move(retval));
|
|
}
|
|
|
|
file_range
|
|
line_buffer::get_available()
|
|
{
|
|
return {this->lb_file_offset,
|
|
static_cast<file_ssize_t>(this->lb_buffer.size())};
|
|
}
|
|
|
|
line_buffer::gz_indexed::indexDict::indexDict(const z_stream& s,
|
|
const file_size_t size)
|
|
{
|
|
assert((s.data_type & GZ_END_OF_BLOCK_MASK));
|
|
assert(!(s.data_type & GZ_END_OF_FILE_MASK));
|
|
assert(size >= s.avail_out + GZ_WINSIZE);
|
|
this->bits = s.data_type & GZ_BORROW_BITS_MASK;
|
|
this->in = s.total_in;
|
|
this->out = s.total_out;
|
|
auto last_byte_in = s.next_in[-1];
|
|
this->in_bits = last_byte_in >> (8 - this->bits);
|
|
// Copy the last 32k uncompressed data (sliding window) to our
|
|
// index
|
|
memcpy(this->index, s.next_out - GZ_WINSIZE, GZ_WINSIZE);
|
|
}
|
|
|
|
int
|
|
line_buffer::gz_indexed::indexDict::apply(z_streamp s)
|
|
{
|
|
s->zalloc = Z_NULL;
|
|
s->zfree = Z_NULL;
|
|
s->opaque = Z_NULL;
|
|
s->avail_in = 0;
|
|
s->next_in = Z_NULL;
|
|
auto ret = inflateInit2(s, GZ_RAW_MODE);
|
|
if (ret != Z_OK) {
|
|
return ret;
|
|
}
|
|
if (this->bits) {
|
|
inflatePrime(s, this->bits, this->in_bits);
|
|
}
|
|
s->total_in = this->in;
|
|
s->total_out = this->out;
|
|
inflateSetDictionary(s, this->index, GZ_WINSIZE);
|
|
return ret;
|
|
}
|
|
|
|
bool
|
|
line_buffer::is_likely_to_flush(file_range prev_line)
|
|
{
|
|
auto avail = this->get_available();
|
|
|
|
if (prev_line.fr_offset < avail.fr_offset) {
|
|
return true;
|
|
}
|
|
auto prev_line_end = prev_line.fr_offset + prev_line.fr_size;
|
|
auto avail_end = avail.fr_offset + avail.fr_size;
|
|
if (avail_end < prev_line_end) {
|
|
return true;
|
|
}
|
|
auto remaining = avail_end - prev_line_end;
|
|
return remaining < INITIAL_REQUEST_SIZE;
|
|
}
|
|
|
|
void
|
|
line_buffer::quiesce()
|
|
{
|
|
if (this->lb_loader_future.valid()) {
|
|
this->lb_loader_future.wait();
|
|
}
|
|
}
|
|
|
|
static ghc::filesystem::path
|
|
line_buffer_cache_path()
|
|
{
|
|
return lnav::paths::workdir() / "buffer-cache";
|
|
}
|
|
|
|
void
|
|
line_buffer::enable_cache()
|
|
{
|
|
if (!this->lb_compressed || this->lb_cached_fd) {
|
|
log_info("%d: skipping cache request (compressed=%d already-cached=%d)",
|
|
this->lb_fd.get(),
|
|
this->lb_compressed,
|
|
(bool) this->lb_cached_fd);
|
|
return;
|
|
}
|
|
|
|
struct stat st;
|
|
|
|
if (fstat(this->lb_fd, &st) == -1) {
|
|
log_error("failed to fstat(%d) - %d", this->lb_fd.get(), errno);
|
|
return;
|
|
}
|
|
|
|
auto cached_base_name = hasher()
|
|
.update(st.st_dev)
|
|
.update(st.st_ino)
|
|
.update(st.st_size)
|
|
.to_string();
|
|
auto cache_dir = line_buffer_cache_path() / cached_base_name.substr(0, 2);
|
|
|
|
ghc::filesystem::create_directories(cache_dir);
|
|
|
|
auto cached_file_name = fmt::format(FMT_STRING("{}.bin"), cached_base_name);
|
|
auto cached_file_path = cache_dir / cached_file_name;
|
|
auto cached_done_path
|
|
= cache_dir / fmt::format(FMT_STRING("{}.done"), cached_base_name);
|
|
|
|
log_info(
|
|
"%d:cache file path: %s", this->lb_fd.get(), cached_file_path.c_str());
|
|
|
|
auto fl = lnav::filesystem::file_lock(cached_file_path);
|
|
auto guard = lnav::filesystem::file_lock::guard(&fl);
|
|
|
|
if (ghc::filesystem::exists(cached_done_path)) {
|
|
log_info("%d:using existing cache file");
|
|
auto open_res = lnav::filesystem::open_file(cached_file_path, O_RDWR);
|
|
if (open_res.isOk()) {
|
|
this->lb_cached_fd = open_res.unwrap();
|
|
return;
|
|
}
|
|
ghc::filesystem::remove(cached_done_path);
|
|
}
|
|
|
|
auto create_res = lnav::filesystem::create_file(
|
|
cached_file_path, O_RDWR | O_TRUNC, 0600);
|
|
if (create_res.isErr()) {
|
|
log_error("failed to create cache file: %s -- %s",
|
|
cached_file_path.c_str(),
|
|
create_res.unwrapErr().c_str());
|
|
return;
|
|
}
|
|
|
|
auto write_fd = create_res.unwrap();
|
|
auto done = false;
|
|
|
|
static const ssize_t FILL_LENGTH = 1024 * 1024;
|
|
auto off = file_off_t{0};
|
|
while (!done) {
|
|
log_debug("%d: caching file content at %d", this->lb_fd.get(), off);
|
|
if (!this->fill_range(off, FILL_LENGTH)) {
|
|
log_debug("%d: caching finished", this->lb_fd.get());
|
|
done = true;
|
|
} else {
|
|
file_ssize_t avail;
|
|
|
|
const auto* data = this->get_range(off, avail);
|
|
auto rc = write(write_fd, data, avail);
|
|
if (rc != avail) {
|
|
log_error("%d: short write!", this->lb_fd.get());
|
|
return;
|
|
}
|
|
|
|
off += avail;
|
|
}
|
|
}
|
|
|
|
lnav::filesystem::create_file(cached_done_path, O_WRONLY, 0600);
|
|
|
|
this->lb_cached_fd = std::move(write_fd);
|
|
}
|
|
|
|
void
|
|
line_buffer::cleanup_cache()
|
|
{
|
|
(void) std::async(std::launch::async, []() {
|
|
auto now = std::chrono::system_clock::now();
|
|
auto cache_path = line_buffer_cache_path();
|
|
std::vector<ghc::filesystem::path> to_remove;
|
|
std::error_code ec;
|
|
|
|
for (const auto& cache_subdir :
|
|
ghc::filesystem::directory_iterator(cache_path, ec))
|
|
{
|
|
for (const auto& entry :
|
|
ghc::filesystem::directory_iterator(cache_subdir, ec))
|
|
{
|
|
auto mtime = ghc::filesystem::last_write_time(entry.path());
|
|
auto exp_time = mtime + 1h;
|
|
if (now < exp_time) {
|
|
continue;
|
|
}
|
|
|
|
to_remove.emplace_back(entry.path());
|
|
}
|
|
}
|
|
|
|
for (auto& entry : to_remove) {
|
|
log_debug("removing compressed file cache: %s", entry.c_str());
|
|
ghc::filesystem::remove_all(entry, ec);
|
|
}
|
|
});
|
|
}
|