You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
lnav/src/data_scanner.cc

406 lines
8.3 KiB
C++

/**
* Copyright (c) 2007-2012, Timothy Stack
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <algorithm>
#include "data_scanner.hh"
#include "config.h"
void
data_scanner::capture_t::ltrim(const char* str)
{
while (this->c_begin < this->c_end && isspace(str[this->c_begin])) {
this->c_begin += 1;
}
}
static struct {
const char* name;
} MATCHERS[DT_TERMINAL_MAX] = {
{
"quot",
},
{
"comm",
},
{
"url",
},
{
"path",
},
{
"mac",
},
{
"date",
},
{
"time",
},
{
"dt",
},
/* { "qual", pcrepp("\\A([^\\s:=]+:[^\\s:=,]+(?!,)(?::[^\\s:=,]+)*)"), }, */
{
"ipv6",
},
{
"hexd",
},
{
"xmld",
},
{
"xmlt",
},
{
"xmlo",
},
{
"xmlc",
},
{
"h1",
},
{
"h2",
},
{
"h3",
},
{
"coln",
},
{
"eq",
},
{
"comm",
},
{
"semi",
},
{
"emda",
},
{
"empt",
},
{
"lcur",
},
{
"rcur",
},
{
"lsqu",
},
{
"rsqu",
},
{
"lpar",
},
{
"rpar",
},
{
"lang",
},
{
"rang",
},
{
"ipv4",
},
{
"uuid",
},
{
"cc",
},
{
"vers",
},
{
"oct",
},
{
"pcnt",
},
{
"num",
},
{
"hex",
},
{
"mail",
},
{
"cnst",
},
{
"word",
},
{
"id",
},
{
"sym",
},
{
"unit",
},
{
"line",
},
{
"wspc",
},
{
"dot",
},
{
"escc",
},
{
"csi",
},
{
"gbg",
},
{
"zwsp",
},
{
"dffi",
},
{
"dfch",
},
};
const char* DNT_NAMES[DNT_MAX - DNT_KEY] = {
"key",
"pair",
"val",
"row",
"unit",
"meas",
"var",
"rang",
"grp",
};
const char*
data_scanner::token2name(data_token_t token)
{
if (token < 0) {
return "inv";
}
if (token < DT_TERMINAL_MAX) {
return MATCHERS[token].name;
}
if (token == DT_ANY) {
return "any";
}
return DNT_NAMES[token - DNT_KEY];
}
bool
data_scanner::is_credit_card(string_fragment cc) const
{
auto cc_no_spaces = cc.to_string();
auto new_end = std::remove_if(cc_no_spaces.begin(),
cc_no_spaces.end(),
[](auto ch) { return ch == ' '; });
cc_no_spaces.erase(new_end, cc_no_spaces.end());
int len = cc_no_spaces.size();
int double_even_sum = 0;
// Step 1: double every second digit, starting from right.
// if results in 2 digit number, add the digits to obtain single digit
// number. sum all answers to obtain 'double_even_sum'
for (int lpc = len - 2; lpc >= 0; lpc = lpc - 2) {
int dbl = ((cc_no_spaces[lpc] - '0') * 2);
if (dbl > 9) {
dbl = (dbl / 10) + (dbl % 10);
}
double_even_sum += dbl;
}
// Step 2: add every odd placed digit from right to double_even_sum's value
for (int lpc = len - 1; lpc >= 0; lpc = lpc - 2) {
double_even_sum += (cc_no_spaces[lpc] - 48);
}
// Step 3: check if final 'double_even_sum' is multiple of 10
// if yes, it is valid.
return double_even_sum % 10 == 0;
}
void
data_scanner::cleanup_end()
{
auto done = false;
while (!this->ds_input.empty() && !done) {
switch (this->ds_input.back()) {
case '.':
case ' ':
case '\r':
case '\n':
this->ds_input.pop_back();
break;
default:
done = true;
break;
}
}
}
std::optional<data_scanner::tokenize_result>
data_scanner::tokenize2(text_format_t tf)
{
auto retval = this->tokenize_int(tf);
if (this->ds_last_bracket_matched) {
this->ds_matching_brackets.pop_back();
this->ds_last_bracket_matched = false;
}
if (retval) {
auto dt = retval.value().tr_token;
switch (dt) {
case DT_LSQUARE:
case DT_LCURLY:
case DT_LPAREN:
this->ds_matching_brackets.emplace_back(retval.value());
break;
case DT_RSQUARE:
case DT_RCURLY:
case DT_RPAREN:
if (!this->ds_matching_brackets.empty()
&& this->ds_matching_brackets.back().tr_token
== to_opener(dt))
{
this->ds_last_bracket_matched = true;
}
break;
default:
break;
}
}
return retval;
}
std::optional<data_scanner::tokenize_result>
data_scanner::find_matching_bracket(text_format_t tf, tokenize_result tr)
{
switch (tr.tr_token) {
case DT_LSQUARE:
case DT_LCURLY:
case DT_LPAREN: {
auto curr_size = this->ds_matching_brackets.size();
while (true) {
auto tok_res = this->tokenize2(tf);
if (!tok_res) {
break;
}
if (this->ds_matching_brackets.size() == curr_size
&& this->ds_last_bracket_matched)
{
return tokenize_result{
DNT_GROUP,
{
tr.tr_capture.c_begin,
tok_res->tr_capture.c_end,
},
{
tr.tr_capture.c_begin,
tok_res->tr_capture.c_end,
},
tr.tr_data,
};
}
}
break;
}
case DT_RSQUARE:
case DT_RCURLY:
case DT_RPAREN: {
for (auto riter = this->ds_matching_brackets.rbegin();
riter != this->ds_matching_brackets.rend();
++riter)
{
if (riter->tr_token == to_opener(tr.tr_token)) {
return data_scanner::tokenize_result{
DNT_GROUP,
{
riter->tr_capture.c_begin,
tr.tr_capture.c_end,
},
{
riter->tr_capture.c_begin,
tr.tr_capture.c_end,
},
tr.tr_data,
};
}
}
break;
}
default:
break;
}
return std::nullopt;
}