lnav/src/data_scanner.cc

/**
 * Copyright (c) 2007-2012, Timothy Stack
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * * Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer.
 * * Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 * * Neither the name of Timothy Stack nor the names of its contributors
 * may be used to endorse or promote products derived from this software
 * without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <algorithm>

#include "data_scanner.hh"

#include "config.h"

void
data_scanner::capture_t::ltrim(const char* str)
{
    while (this->c_begin < this->c_end && isspace(str[this->c_begin])) {
        this->c_begin += 1;
    }
}

static struct {
    const char* name;
} MATCHERS[DT_TERMINAL_MAX] = {
    {
        "quot",
    },
    {
        "comm",
    },
    {
        "url",
    },
    {
        "path",
    },
    {
        "mac",
    },
    {
        "date",
    },
    {
        "time",
    },
    {
        "dt",
    },
    /* { "qual", pcrepp("\\A([^\\s:=]+:[^\\s:=,]+(?!,)(?::[^\\s:=,]+)*)"), }, */
    {
        "ipv6",
    },
    {
        "hexd",
    },

    {
        "xmld",
    },
    {
        "xmlt",
    },
    {
        "xmlo",
    },

    {
        "xmlc",
    },

    {
        "h1",
    },
    {
        "h2",
    },
    {
        "h3",
    },

    {
        "coln",
    },
    {
        "eq",
    },
    {
        "comm",
    },
    {
        "semi",
    },
    {
        "emda",
    },

    {
        "empt",
    },

    {
        "lcur",
    },
    {
        "rcur",
    },

    {
        "lsqu",
    },
    {
        "rsqu",
    },

    {
        "lpar",
    },
    {
        "rpar",
    },

    {
        "lang",
    },
    {
        "rang",
    },

    {
        "ipv4",
    },

    {
        "uuid",
    },

    {
        "cc",
    },
    {
        "vers",
    },
    {
        "oct",
    },
    {
        "pcnt",
    },
    {
        "num",
    },
    {
        "hex",
    },

    {
        "mail",
    },
    {
        "cnst",
    },
    {
        "word",
    },
    {
        "id",
    },
    {
        "sym",
    },
    {
        "unit",
    },
    {
        "line",
    },
    {
        "wspc",
    },
    {
        "dot",
    },
    {
        "escc",
    },
    {
        "csi",
    },

    {
        "gbg",
    },
    {
        "zwsp",
    },
    {
        "dffi",
    },
    {
        "dfch",
    },
};

const char* DNT_NAMES[DNT_MAX - DNT_KEY] = {
    "key",
    "pair",
    "val",
    "row",
    "unit",
    "meas",
    "var",
    "rang",
    "grp",
};

const char*
data_scanner::token2name(data_token_t token)
{
    if (token < 0) {
        return "inv";
    }
    if (token < DT_TERMINAL_MAX) {
        return MATCHERS[token].name;
    }
    if (token == DT_ANY) {
        return "any";
    }
    return DNT_NAMES[token - DNT_KEY];
}

bool
data_scanner::is_credit_card(string_fragment cc) const
{
    auto cc_no_spaces = cc.to_string();
    auto new_end = std::remove_if(cc_no_spaces.begin(),
                                  cc_no_spaces.end(),
                                  [](auto ch) { return ch == ' '; });
    cc_no_spaces.erase(new_end, cc_no_spaces.end());
    int len = cc_no_spaces.size();
    int double_even_sum = 0;

    // Step 1: double every second digit, starting from right.
    // if results in 2 digit number, add the digits to obtain single digit
    // number. sum all answers to obtain 'double_even_sum'

    for (int lpc = len - 2; lpc >= 0; lpc = lpc - 2) {
        int dbl = ((cc_no_spaces[lpc] - '0') * 2);
        if (dbl > 9) {
            dbl = (dbl / 10) + (dbl % 10);
        }
        double_even_sum += dbl;
    }

    // Step 2: add every odd placed digit from right to double_even_sum's value

    for (int lpc = len - 1; lpc >= 0; lpc = lpc - 2) {
        double_even_sum += (cc_no_spaces[lpc] - 48);
    }

    // Step 3: check if final 'double_even_sum' is multiple of 10
    // if yes, it is valid.

    return double_even_sum % 10 == 0;
}

void
data_scanner::cleanup_end()
{
    auto done = false;

    while (!this->ds_input.empty() && !done) {
        switch (this->ds_input.back()) {
            case '.':
            case ' ':
            case '\r':
            case '\n':
                this->ds_input.pop_back();
                break;
            default:
                done = true;
                break;
        }
    }
}

std::optional<data_scanner::tokenize_result>
data_scanner::tokenize2(text_format_t tf)
{
    auto retval = this->tokenize_int(tf);

    if (this->ds_last_bracket_matched) {
        this->ds_matching_brackets.pop_back();
        this->ds_last_bracket_matched = false;
    }
    if (retval) {
        auto dt = retval.value().tr_token;
        switch (dt) {
            case DT_LSQUARE:
            case DT_LCURLY:
            case DT_LPAREN:
                this->ds_matching_brackets.emplace_back(retval.value());
                break;
            case DT_RSQUARE:
            case DT_RCURLY:
            case DT_RPAREN:
                if (!this->ds_matching_brackets.empty()
                    && this->ds_matching_brackets.back().tr_token
                        == to_opener(dt))
                {
                    this->ds_last_bracket_matched = true;
                }
                break;
            default:
                break;
        }
    }

    return retval;
}

std::optional<data_scanner::tokenize_result>
data_scanner::find_matching_bracket(text_format_t tf, tokenize_result tr)
{
    switch (tr.tr_token) {
        case DT_LSQUARE:
        case DT_LCURLY:
        case DT_LPAREN: {
            auto curr_size = this->ds_matching_brackets.size();
            while (true) {
                auto tok_res = this->tokenize2(tf);
                if (!tok_res) {
                    break;
                }

                if (this->ds_matching_brackets.size() == curr_size
                    && this->ds_last_bracket_matched)
                {
                    return tokenize_result{
                        DNT_GROUP,
                        {
                            tr.tr_capture.c_begin,
                            tok_res->tr_capture.c_end,
                        },
                        {
                            tr.tr_capture.c_begin,
                            tok_res->tr_capture.c_end,
                        },
                        tr.tr_data,
                    };
                }
            }
            break;
        }
        case DT_RSQUARE:
        case DT_RCURLY:
        case DT_RPAREN: {
            for (auto riter = this->ds_matching_brackets.rbegin();
                 riter != this->ds_matching_brackets.rend();
                 ++riter)
            {
                if (riter->tr_token == to_opener(tr.tr_token)) {
                    return data_scanner::tokenize_result{
                        DNT_GROUP,
                        {
                            riter->tr_capture.c_begin,
                            tr.tr_capture.c_end,
                        },
                        {
                            riter->tr_capture.c_begin,
                            tr.tr_capture.c_end,
                        },
                        tr.tr_data,
                    };
                }
            }
            break;
        }
        default:
            break;
    }

    return std::nullopt;
}