/** * Copyright (c) 2015, Timothy Stack * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of Timothy Stack nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include "base/date_time_scanner.hh" #include "config.h" #include "data_scanner.hh" /*!conditions:re2c*/ std::optional data_scanner::tokenize_int(text_format_t tf) { data_token_t token_out = DT_INVALID; capture_t cap_all; capture_t cap_inner; # define YYCTYPE unsigned char # define CAPTURE(tok) { \ if (YYCURSOR.val == EMPTY) { \ this->ds_next_offset = this->ds_input.length(); \ } else { \ this->ds_next_offset = YYCURSOR.val - this->ds_input.udata(); \ } \ cap_all.c_end = this->ds_next_offset; \ cap_inner.c_end = this->ds_next_offset; \ token_out = tok; \ } # define RET(tok) { \ CAPTURE(tok); \ return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()}; \ } static const unsigned char *EMPTY = (const unsigned char *) ""; struct _YYCURSOR { YYCTYPE operator*() const { if (this->val < this->lim) { return *val; } return '\0'; } operator const YYCTYPE *() const { if (this->val < this->lim) { return this->val; } return EMPTY; } const YYCTYPE *operator=(const YYCTYPE *rhs) { this->val = rhs; return rhs; } const YYCTYPE *operator+(int rhs) { return this->val + rhs; } const _YYCURSOR *operator-=(int rhs) { this->val -= rhs; return this; } _YYCURSOR& operator++() { this->val += 1; return *this; } const YYCTYPE *val{nullptr}; const YYCTYPE *lim{nullptr}; } YYCURSOR; YYCURSOR = (const unsigned char *) this->ds_input.udata() + this->ds_next_offset; _YYCURSOR yyt1; _YYCURSOR yyt2; _YYCURSOR yyt3; _YYCURSOR yyt4; _YYCURSOR hunk_heading; const YYCTYPE *YYLIMIT = (const unsigned char *) this->ds_input.end(); const YYCTYPE *YYMARKER = YYCURSOR; class _yycond { public: int operator()() const { return this->val; } void operator=(int v) { this->val = v; } int val{yycinit}; } c; if (this->ds_bol) { c = yycbol; } this->ds_bol = false; if (this->ds_units) { c = yycunits; } this->ds_units = false; YYCURSOR.lim = YYLIMIT; cap_all.c_begin = this->ds_next_offset; cap_all.c_end = this->ds_next_offset; cap_inner.c_begin = this->ds_next_offset; cap_inner.c_end = this->ds_next_offset; /*!re2c re2c:yyfill:enable = 0; re2c:sentinel = 0; re2c:define:YYCTYPE = uint8_t; re2c:define:YYGETCONDITION = "c"; re2c:define:YYSETCONDITION = "c = @@;"; re2c:tags = 1; SPACE = [ \t\r]; ALPHA = [a-zA-Z]; ESC = "\x1b"; NUM = [0-9]; ALPHANUM = [a-zA-Z0-9_]; EOF = ""; SYN = "\x16"; IPV4SEG = ("25"[0-5]|("2"[0-4]|"1"{0,1}[0-9]){0,1}[0-9]); IPV4ADDR = (IPV4SEG"."){3,3}IPV4SEG; IPV6SEG = [0-9a-fA-F]{1,4}; IPV6ADDR = ( (IPV6SEG":"){7,7}IPV6SEG| (IPV6SEG":"){1,7}":"| (IPV6SEG":"){1,6}":"IPV6SEG| (IPV6SEG":"){1,5}(":"IPV6SEG){1,2}| (IPV6SEG":"){1,4}(":"IPV6SEG){1,3}| (IPV6SEG":"){1,3}(":"IPV6SEG){1,4}| (IPV6SEG":"){1,2}(":"IPV6SEG){1,5}| IPV6SEG":"((":"IPV6SEG){1,6})| ":"((":"IPV6SEG){1,7}|":")| [a-fA-F0-9]{4}":"(":"IPV6SEG){0,4}"%"[0-9a-zA-Z]{1,}| "::"('ffff'(":0"{1,4}){0,1}":"){0,1}IPV4ADDR| (IPV6SEG":"){1,4}":"IPV4ADDR ); UNITS = (([mup]?("s"|"S"))|(([kKmMgG]"i"?)?[bB])|("m"|"min")); EOF { return std::nullopt; } [\x00] { return std::nullopt; } <*> * { return std::nullopt; } SYN+ { RET(DT_ZERO_WIDTH_SPACE); } ("f"|"u"|"r")?'"'('\\'[^\x00]|[^\x00\x16\x1b\n"\\]|'""')*'"' { CAPTURE(DT_QUOTED_STRING); switch (this->ds_input[cap_inner.c_begin]) { case 'f': case 'u': case 'r': cap_inner.c_begin += 1; break; } cap_inner.c_begin += 1; cap_inner.c_end -= 1; return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()}; } ("f"|"u"|"r")?'"""' { CAPTURE(DT_QUOTED_STRING); switch (this->ds_input[cap_inner.c_begin]) { case 'f': case 'u': case 'r': cap_inner.c_begin += 1; break; } cap_inner.c_begin += 3; goto yyc_dbldocstring; } ([\x00]|'"""') { CAPTURE(DT_QUOTED_STRING); cap_inner.c_end -= 3; return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()}; } * { goto yyc_dbldocstring; } ("f"|"u"|"r")?"'''" { CAPTURE(DT_QUOTED_STRING); switch (this->ds_input[cap_inner.c_begin]) { case 'f': case 'u': case 'r': cap_inner.c_begin += 1; break; } cap_inner.c_begin += 3; goto yyc_sdocstring; } ([\x00]|"'''") { CAPTURE(DT_QUOTED_STRING); cap_inner.c_end -= 3; return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()}; } * { goto yyc_sdocstring; } "/*" ([^\x00*]|"*"+[^\x00/])* "*"+ "/" { CAPTURE(DT_COMMENT); if (tf == text_format_t::TF_DIFF) { auto sf = this->to_string_fragment(cap_all); auto split_res = sf.split_when(string_fragment::tag1{'\n'}); cap_all.c_end = split_res.first.sf_end; cap_inner.c_end = split_res.first.sf_end; this->ds_next_offset = cap_all.c_end; } return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()}; } "