2013-05-03 06:02:03 +00:00
|
|
|
/**
|
|
|
|
* Copyright (c) 2007-2012, Timothy Stack
|
|
|
|
*
|
|
|
|
* All rights reserved.
|
2013-05-28 04:35:00 +00:00
|
|
|
*
|
2013-05-03 06:02:03 +00:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
2013-05-28 04:35:00 +00:00
|
|
|
*
|
2013-05-03 06:02:03 +00:00
|
|
|
* * Redistributions of source code must retain the above copyright notice, this
|
|
|
|
* list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
|
|
* and/or other materials provided with the distribution.
|
|
|
|
* * Neither the name of Timothy Stack nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
2013-05-28 04:35:00 +00:00
|
|
|
*
|
2013-05-03 06:02:03 +00:00
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
|
|
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
|
|
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
|
|
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
|
|
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
2011-06-13 14:46:03 +00:00
|
|
|
|
|
|
|
#ifndef __data_scanner_hh
|
|
|
|
#define __data_scanner_hh
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
|
|
|
|
#include "pcrepp.hh"
|
2014-02-01 14:41:11 +00:00
|
|
|
#include "shared_buffer.hh"
|
2011-06-13 14:46:03 +00:00
|
|
|
|
|
|
|
enum data_token_t {
|
2013-05-28 04:35:00 +00:00
|
|
|
DT_INVALID = -1,
|
2013-05-24 14:55:56 +00:00
|
|
|
|
|
|
|
DT_QUOTED_STRING = 0,
|
|
|
|
DT_URL,
|
2011-06-20 05:30:10 +00:00
|
|
|
DT_PATH,
|
2011-06-18 20:42:07 +00:00
|
|
|
DT_MAC_ADDRESS,
|
2013-06-23 13:11:37 +00:00
|
|
|
DT_DATE,
|
2013-05-24 14:55:56 +00:00
|
|
|
DT_TIME,
|
|
|
|
DT_IPV6_ADDRESS,
|
2014-03-09 19:55:02 +00:00
|
|
|
DT_HEX_DUMP,
|
2015-03-17 06:10:34 +00:00
|
|
|
DT_XML_EMPTY_TAG,
|
|
|
|
DT_XML_OPEN_TAG,
|
|
|
|
DT_XML_CLOSE_TAG,
|
2013-05-28 04:35:00 +00:00
|
|
|
/* DT_QUALIFIED_NAME, */
|
|
|
|
|
2013-07-18 04:24:33 +00:00
|
|
|
DT_COLON,
|
|
|
|
DT_EQUALS,
|
2011-06-13 14:46:03 +00:00
|
|
|
DT_COMMA,
|
2013-05-24 14:55:56 +00:00
|
|
|
DT_SEMI,
|
|
|
|
|
|
|
|
DT_LCURLY,
|
|
|
|
DT_RCURLY,
|
2011-06-13 14:46:03 +00:00
|
|
|
|
2013-05-24 14:55:56 +00:00
|
|
|
DT_LSQUARE,
|
|
|
|
DT_RSQUARE,
|
|
|
|
|
|
|
|
DT_LPAREN,
|
|
|
|
DT_RPAREN,
|
|
|
|
|
|
|
|
DT_LANGLE,
|
|
|
|
DT_RANGLE,
|
|
|
|
|
|
|
|
DT_IPV4_ADDRESS,
|
|
|
|
DT_UUID,
|
2011-06-13 14:46:03 +00:00
|
|
|
|
2011-06-18 20:42:07 +00:00
|
|
|
DT_VERSION_NUMBER,
|
2011-06-13 14:46:03 +00:00
|
|
|
DT_OCTAL_NUMBER,
|
|
|
|
DT_PERCENTAGE,
|
|
|
|
DT_NUMBER,
|
2011-06-18 20:42:07 +00:00
|
|
|
DT_HEX_NUMBER,
|
2011-06-13 14:46:03 +00:00
|
|
|
|
2013-06-13 05:36:31 +00:00
|
|
|
DT_EMAIL,
|
2013-06-08 13:10:18 +00:00
|
|
|
DT_CONSTANT,
|
2013-05-24 14:55:56 +00:00
|
|
|
DT_WORD,
|
|
|
|
DT_SYMBOL,
|
2011-06-13 14:46:03 +00:00
|
|
|
DT_LINE,
|
|
|
|
DT_WHITE,
|
|
|
|
DT_DOT,
|
|
|
|
|
|
|
|
DT_GARBAGE,
|
|
|
|
|
|
|
|
DT_TERMINAL_MAX = DT_GARBAGE + 1,
|
2011-06-14 03:07:39 +00:00
|
|
|
|
2013-05-28 04:35:00 +00:00
|
|
|
DNT_KEY = 50,
|
2011-06-14 03:07:39 +00:00
|
|
|
DNT_PAIR,
|
|
|
|
DNT_VALUE,
|
|
|
|
DNT_ROW,
|
|
|
|
DNT_UNITS,
|
|
|
|
DNT_MEASUREMENT,
|
|
|
|
DNT_VARIABLE_KEY,
|
|
|
|
DNT_ROWRANGE,
|
2011-06-18 20:42:07 +00:00
|
|
|
DNT_DATE_TIME,
|
2013-05-24 14:55:56 +00:00
|
|
|
DNT_GROUP,
|
|
|
|
|
|
|
|
DNT_MAX,
|
2013-05-28 04:35:00 +00:00
|
|
|
|
2011-06-14 14:21:53 +00:00
|
|
|
DT_ANY = 100,
|
2011-06-13 14:46:03 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
class data_scanner {
|
|
|
|
public:
|
2011-06-18 20:42:07 +00:00
|
|
|
static const char *token2name(data_token_t token);
|
2013-05-28 04:35:00 +00:00
|
|
|
|
2014-09-29 05:36:07 +00:00
|
|
|
data_scanner(const std::string &line, size_t off = 0, size_t len = (size_t) -1)
|
2013-05-28 04:35:00 +00:00
|
|
|
: ds_line(line),
|
2013-06-03 14:45:19 +00:00
|
|
|
ds_pcre_input(ds_line.c_str(), off, len)
|
2013-05-28 04:35:00 +00:00
|
|
|
{
|
|
|
|
if (!line.empty() && line[line.length() - 1] == '.') {
|
|
|
|
this->ds_pcre_input.pi_length -= 1;
|
|
|
|
}
|
2011-06-13 14:46:03 +00:00
|
|
|
};
|
|
|
|
|
2014-09-29 05:36:07 +00:00
|
|
|
data_scanner(shared_buffer_ref &line, size_t off = 0, size_t len = (size_t) -1)
|
2015-04-02 03:42:14 +00:00
|
|
|
: ds_sbr(line), ds_pcre_input(line.get_data(), off, len == (size_t) -1 ? line.length() : len)
|
2014-02-01 14:41:11 +00:00
|
|
|
{
|
2015-04-02 03:42:14 +00:00
|
|
|
require(len == (size_t) -1 || len <= line.length());
|
2014-02-01 14:41:11 +00:00
|
|
|
if (line.length() > 0 && line.get_data()[line.length() - 1] == '.') {
|
|
|
|
this->ds_pcre_input.pi_length -= 1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2011-06-13 14:46:03 +00:00
|
|
|
bool tokenize(pcre_context &pc, data_token_t &token_out);
|
2015-06-08 13:27:45 +00:00
|
|
|
bool tokenize2(pcre_context &pc, data_token_t &token_out);
|
2011-06-13 14:46:03 +00:00
|
|
|
|
2011-06-14 14:21:53 +00:00
|
|
|
pcre_input &get_input() { return this->ds_pcre_input; };
|
|
|
|
|
2011-06-13 14:46:03 +00:00
|
|
|
private:
|
|
|
|
std::string ds_line;
|
2014-02-01 14:41:11 +00:00
|
|
|
shared_buffer_ref ds_sbr;
|
2015-03-17 06:10:34 +00:00
|
|
|
pcre_input ds_pcre_input;
|
2011-06-13 14:46:03 +00:00
|
|
|
};
|
2015-03-17 06:10:34 +00:00
|
|
|
|
2011-06-13 14:46:03 +00:00
|
|
|
#endif
|