mirror of https://github.com/tstack/lnav
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
455 lines
11 KiB
C++
455 lines
11 KiB
C++
/**
|
|
* Copyright (c) 2019, Timothy Stack
|
|
*
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice, this
|
|
* list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
* * Neither the name of Timothy Stack nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <algorithm>
|
|
#include <iterator>
|
|
#include <regex>
|
|
#include <sstream>
|
|
|
|
#include "string_util.hh"
|
|
|
|
#include "config.h"
|
|
#include "is_utf8.hh"
|
|
#include "lnav_log.hh"
|
|
|
|
void
|
|
scrub_to_utf8(char* buffer, size_t length)
|
|
{
|
|
while (true) {
|
|
auto frag = string_fragment::from_bytes(buffer, length);
|
|
auto scan_res = is_utf8(frag);
|
|
|
|
if (scan_res.is_valid()) {
|
|
break;
|
|
}
|
|
for (size_t lpc = 0; lpc < scan_res.usr_faulty_bytes; lpc++) {
|
|
buffer[scan_res.usr_valid_frag.sf_end + lpc] = '?';
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
quote_content(auto_buffer& buf, const string_fragment& sf, char quote_char)
|
|
{
|
|
for (char ch : sf) {
|
|
if (ch == quote_char) {
|
|
buf.push_back('\\').push_back(ch);
|
|
continue;
|
|
}
|
|
switch (ch) {
|
|
case '\\':
|
|
buf.push_back('\\').push_back('\\');
|
|
break;
|
|
case '\n':
|
|
buf.push_back('\\').push_back('n');
|
|
break;
|
|
case '\t':
|
|
buf.push_back('\\').push_back('t');
|
|
break;
|
|
case '\r':
|
|
buf.push_back('\\').push_back('r');
|
|
break;
|
|
case '\a':
|
|
buf.push_back('\\').push_back('a');
|
|
break;
|
|
case '\b':
|
|
buf.push_back('\\').push_back('b');
|
|
break;
|
|
default:
|
|
buf.push_back(ch);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
size_t
|
|
unquote_content(char* dst, const char* str, size_t len, char quote_char)
|
|
{
|
|
size_t index = 0;
|
|
|
|
for (size_t lpc = 0; lpc < len; lpc++, index++) {
|
|
dst[index] = str[lpc];
|
|
if (str[lpc] == quote_char) {
|
|
lpc += 1;
|
|
} else if (str[lpc] == '\\' && (lpc + 1) < len) {
|
|
switch (str[lpc + 1]) {
|
|
case 'n':
|
|
dst[index] = '\n';
|
|
break;
|
|
case 'r':
|
|
dst[index] = '\r';
|
|
break;
|
|
case 't':
|
|
dst[index] = '\t';
|
|
break;
|
|
default:
|
|
dst[index] = str[lpc + 1];
|
|
break;
|
|
}
|
|
lpc += 1;
|
|
}
|
|
}
|
|
dst[index] = '\0';
|
|
|
|
return index;
|
|
}
|
|
|
|
size_t
|
|
unquote(char* dst, const char* str, size_t len)
|
|
{
|
|
if (str[0] == 'r' || str[0] == 'u') {
|
|
str += 1;
|
|
len -= 1;
|
|
}
|
|
char quote_char = str[0];
|
|
|
|
require(str[0] == '\'' || str[0] == '"');
|
|
|
|
return unquote_content(dst, &str[1], len - 2, quote_char);
|
|
}
|
|
|
|
size_t
|
|
unquote_w3c(char* dst, const char* str, size_t len)
|
|
{
|
|
size_t index = 0;
|
|
|
|
require(str[0] == '\'' || str[0] == '"');
|
|
|
|
for (size_t lpc = 1; lpc < (len - 1); lpc++, index++) {
|
|
dst[index] = str[lpc];
|
|
if (str[lpc] == '"') {
|
|
lpc += 1;
|
|
}
|
|
}
|
|
dst[index] = '\0';
|
|
|
|
return index;
|
|
}
|
|
|
|
void
|
|
truncate_to(std::string& str, size_t max_char_len)
|
|
{
|
|
static const std::string ELLIPSIS = "\u22ef";
|
|
|
|
if (str.length() < max_char_len) {
|
|
return;
|
|
}
|
|
|
|
auto str_char_len_res = utf8_string_length(str);
|
|
|
|
if (str_char_len_res.isErr()) {
|
|
// XXX
|
|
return;
|
|
}
|
|
|
|
auto str_char_len = str_char_len_res.unwrap();
|
|
if (str_char_len <= max_char_len) {
|
|
return;
|
|
}
|
|
|
|
if (max_char_len < 3) {
|
|
str = ELLIPSIS;
|
|
return;
|
|
}
|
|
|
|
auto chars_to_remove = (str_char_len - max_char_len) + 1;
|
|
auto midpoint = str_char_len / 2;
|
|
auto chars_to_keep_at_front = midpoint - (chars_to_remove / 2);
|
|
auto bytes_to_keep_at_front
|
|
= utf8_char_to_byte_index(str, chars_to_keep_at_front);
|
|
auto remove_up_to_bytes = utf8_char_to_byte_index(
|
|
str, chars_to_keep_at_front + chars_to_remove);
|
|
auto bytes_to_remove = remove_up_to_bytes - bytes_to_keep_at_front;
|
|
str.erase(bytes_to_keep_at_front, bytes_to_remove);
|
|
str.insert(bytes_to_keep_at_front, ELLIPSIS);
|
|
}
|
|
|
|
bool
|
|
is_url(const std::string& fn)
|
|
{
|
|
static const auto url_re = std::regex("^(file|https?|ftps?|scp|sftp):.*");
|
|
|
|
return std::regex_match(fn, url_re);
|
|
}
|
|
|
|
size_t
|
|
last_word_str(char* str, size_t len, size_t max_len)
|
|
{
|
|
if (len < max_len) {
|
|
return len;
|
|
}
|
|
|
|
size_t last_start = 0;
|
|
|
|
for (size_t index = 0; index < len; index++) {
|
|
switch (str[index]) {
|
|
case '.':
|
|
case '-':
|
|
case '/':
|
|
case ':':
|
|
last_start = index + 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (last_start == 0) {
|
|
return len;
|
|
}
|
|
|
|
memmove(&str[0], &str[last_start], len - last_start);
|
|
return len - last_start;
|
|
}
|
|
|
|
size_t
|
|
abbreviate_str(char* str, size_t len, size_t max_len)
|
|
{
|
|
size_t last_start = 1;
|
|
|
|
if (len < max_len) {
|
|
return len;
|
|
}
|
|
|
|
for (size_t index = 0; index < len; index++) {
|
|
switch (str[index]) {
|
|
case '.':
|
|
case '-':
|
|
case '/':
|
|
case ':':
|
|
memmove(&str[last_start], &str[index], len - index);
|
|
len -= (index - last_start);
|
|
index = last_start + 1;
|
|
last_start = index + 1;
|
|
|
|
if (len < max_len) {
|
|
return len;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
void
|
|
split_ws(const std::string& str, std::vector<std::string>& toks_out)
|
|
{
|
|
std::stringstream ss(str);
|
|
std::string buf;
|
|
|
|
while (ss >> buf) {
|
|
toks_out.push_back(buf);
|
|
}
|
|
}
|
|
|
|
std::string
|
|
repeat(const std::string& input, size_t num)
|
|
{
|
|
std::ostringstream os;
|
|
std::fill_n(std::ostream_iterator<std::string>(os), num, input);
|
|
return os.str();
|
|
}
|
|
|
|
std::string
|
|
center_str(const std::string& subject, size_t width)
|
|
{
|
|
std::string retval = subject;
|
|
|
|
truncate_to(retval, width);
|
|
|
|
auto retval_length = utf8_string_length(retval).unwrapOr(retval.length());
|
|
auto total_fill = width - retval_length;
|
|
auto before = total_fill / 2;
|
|
auto after = total_fill - before;
|
|
|
|
retval.insert(0, before, ' ');
|
|
retval.append(after, ' ');
|
|
|
|
return retval;
|
|
}
|
|
|
|
bool
|
|
is_blank(const std::string& str)
|
|
{
|
|
return std::all_of(
|
|
str.begin(), str.end(), [](const auto ch) { return isspace(ch); });
|
|
}
|
|
|
|
std::string
|
|
scrub_ws(const char* in, ssize_t len)
|
|
{
|
|
static const std::string TAB_SYMBOL = "\u21e5";
|
|
static const std::string LF_SYMBOL = "\u240a";
|
|
static const std::string CR_SYMBOL = "\u240d";
|
|
|
|
std::string retval;
|
|
|
|
for (size_t lpc = 0; (len == -1 && in[lpc]) || (len >= 0 && lpc < len);
|
|
lpc++)
|
|
{
|
|
auto ch = in[lpc];
|
|
|
|
switch (ch) {
|
|
case '\t':
|
|
retval.append(TAB_SYMBOL);
|
|
break;
|
|
case '\n':
|
|
retval.append(LF_SYMBOL);
|
|
break;
|
|
case '\r':
|
|
retval.append(CR_SYMBOL);
|
|
break;
|
|
default:
|
|
retval.append(1, ch);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
static constexpr const char* const SUPERSCRIPT_NUMS[] = {
|
|
"⁰",
|
|
"¹",
|
|
"²",
|
|
"³",
|
|
"⁴",
|
|
"⁵",
|
|
"⁶",
|
|
"⁷",
|
|
"⁸",
|
|
"⁹",
|
|
};
|
|
|
|
std::string
|
|
to_superscript(const std::string& in)
|
|
{
|
|
std::string retval;
|
|
for (const auto ch : in) {
|
|
if (isdigit(ch)) {
|
|
auto index = ch - '0';
|
|
|
|
retval.append(SUPERSCRIPT_NUMS[index]);
|
|
} else {
|
|
retval.push_back(ch);
|
|
}
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
namespace fmt {
|
|
auto
|
|
formatter<lnav::tainted_string>::format(const lnav::tainted_string& ts,
|
|
format_context& ctx)
|
|
-> decltype(ctx.out()) const
|
|
{
|
|
auto esc_res = fmt::v10::detail::find_escape(&(*ts.ts_str.begin()),
|
|
&(*ts.ts_str.end()));
|
|
if (esc_res.end == nullptr) {
|
|
return formatter<string_view>::format(ts.ts_str, ctx);
|
|
}
|
|
|
|
return format_to(ctx.out(), FMT_STRING("{:?}"), ts.ts_str);
|
|
}
|
|
} // namespace fmt
|
|
|
|
namespace lnav {
|
|
namespace pcre2pp {
|
|
|
|
static bool
|
|
is_meta(char ch)
|
|
{
|
|
switch (ch) {
|
|
case '\\':
|
|
case '^':
|
|
case '$':
|
|
case '.':
|
|
case '[':
|
|
case ']':
|
|
case '(':
|
|
case ')':
|
|
case '*':
|
|
case '+':
|
|
case '?':
|
|
case '{':
|
|
case '}':
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static std::optional<const char*>
|
|
char_escape_seq(char ch)
|
|
{
|
|
switch (ch) {
|
|
case '\t':
|
|
return "\\t";
|
|
case '\n':
|
|
return "\\n";
|
|
}
|
|
|
|
return std::nullopt;
|
|
}
|
|
|
|
std::string
|
|
quote(string_fragment str)
|
|
{
|
|
std::string retval;
|
|
|
|
while (true) {
|
|
auto cp_pair_opt = str.consume_codepoint();
|
|
if (!cp_pair_opt) {
|
|
break;
|
|
}
|
|
|
|
auto cp_pair = cp_pair_opt.value();
|
|
if ((cp_pair.first & ~0xff) == 0) {
|
|
if (is_meta(cp_pair.first)) {
|
|
retval.push_back('\\');
|
|
} else {
|
|
auto esc_seq = char_escape_seq(cp_pair.first);
|
|
if (esc_seq) {
|
|
retval.append(esc_seq.value());
|
|
str = cp_pair_opt->second;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
ww898::utf::utf8::write(cp_pair.first,
|
|
[&retval](char ch) { retval.push_back(ch); });
|
|
str = cp_pair_opt->second;
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
} // namespace pcre2pp
|
|
} // namespace lnav
|