[data parser] hex dump values and add the uwsgi log format

pull/69/head
Timothy Stack 11 years ago
parent e8672e825a
commit e75f42dd07

@ -48,7 +48,8 @@ data_format_state_t dfs_prefix_next(data_format_state_t state,
case DFS_INIT:
switch (next_token) {
case DT_PATH:
case DT_SEPARATOR:
case DT_COLON:
case DT_EQUALS:
case DT_CONSTANT:
case DT_EMAIL:
case DT_WORD:
@ -100,7 +101,10 @@ data_format_state_t dfs_semi_next(data_format_state_t state,
case DFS_KEY:
switch (next_token) {
case DT_SEPARATOR: retval = DFS_VALUE; break;
case DT_COLON:
case DT_EQUALS:
retval = DFS_VALUE;
break;
case DT_SEMI: retval = DFS_ERROR; break;
@ -146,7 +150,8 @@ data_format_state_t dfs_comma_next(data_format_state_t state,
case DFS_KEY:
switch (next_token) {
case DT_SEPARATOR:
case DT_COLON:
case DT_EQUALS:
retval = DFS_VALUE;
break;
@ -168,7 +173,8 @@ data_format_state_t dfs_comma_next(data_format_state_t state,
case DFS_EXPECTING_SEP:
switch (next_token) {
case DT_SEPARATOR:
case DT_COLON:
case DT_EQUALS:
retval = DFS_VALUE;
break;
@ -187,7 +193,8 @@ data_format_state_t dfs_comma_next(data_format_state_t state,
retval = DFS_INIT;
break;
case DT_SEPARATOR:
case DT_COLON:
case DT_EQUALS:
retval = DFS_ERROR;
break;

@ -111,6 +111,7 @@
*/
#define ELEMENT_LIST_T(var) var("" #var, __FILE__, __LINE__)
#define PUSH_FRONT(elem) push_front(elem, __FILE__, __LINE__)
#define PUSH_BACK(elem) push_back(elem, __FILE__, __LINE__)
#define POP_FRONT(elem) pop_front(__FILE__, __LINE__)
#define POP_BACK(elem) pop_back(__FILE__, __LINE__)
@ -140,7 +141,9 @@ struct data_format {
data_format(const char *name,
data_token_t appender = DT_INVALID,
data_token_t terminator = DT_INVALID)
: df_name(name), df_appender(appender), df_terminator(terminator)
: df_name(name),
df_appender(appender),
df_terminator(terminator)
{};
const char * df_name;
@ -265,6 +268,13 @@ public:
LIST_DEINIT_TRACE;
};
void push_front(const element &elem, const char *fn, int line)
{
ELEMENT_TRACE;
this->std::list<element>::push_front(elem);
};
void push_back(const element &elem, const char *fn, int line)
{
ELEMENT_TRACE;
@ -462,6 +472,8 @@ private:
: dp_errors("dp_errors", __FILE__, __LINE__),
dp_pairs("dp_pairs", __FILE__, __LINE__),
dp_format(NULL),
dp_qualifier(DT_INVALID),
dp_separator(DT_INVALID),
dp_scanner(ds)
{
if (TRACE_FILE != NULL) {
@ -508,7 +520,17 @@ private:
key_comps.PUSH_BACK(*iter);
}
else if (iter->e_token == DT_SEPARATOR) {
else if (iter->e_token == this->dp_qualifier) {
value.SPLICE(value.end(),
key_comps,
key_comps.begin(),
key_comps.end());
strip(value, element_if(DT_WHITE));
if (!value.empty()) {
el_stack.PUSH_BACK(element(value, DNT_VALUE));
}
}
else if (iter->e_token == this->dp_separator) {
element_list_t::iterator key_iter = key_comps.end();
bool found = false;
@ -633,15 +655,11 @@ private:
continue;
}
if (kv_iter->e_token != DNT_VALUE) {
el_stack.POP_FRONT();
continue;
}
std::string key_val =
this->get_element_string(el_stack.front());
element_list_t ELEMENT_LIST_T(pair_subs);
if (schema != NULL) {
SHA_Update(&context, key_val.c_str(), key_val.length());
}
@ -660,11 +678,28 @@ private:
free_row.POP_FRONT();
}
++kv_iter;
bool has_value = false;
if (kv_iter->e_token == DNT_VALUE) {
++kv_iter;
has_value = true;
}
pair_subs.SPLICE(pair_subs.begin(),
el_stack,
el_stack.begin(),
kv_iter);
if (!has_value) {
element_list_t ELEMENT_LIST_T(blank_value);
struct element blank;
blank.e_token = DT_QUOTED_STRING;
blank.e_capture.c_begin = blank.e_capture.c_end = pair_subs.front().e_capture.c_end;
blank_value.PUSH_BACK(blank);
pair_subs.PUSH_BACK(element(blank_value, DNT_VALUE));
}
pairs_out.PUSH_BACK(element(pair_subs, DNT_PAIR));
}
@ -709,6 +744,7 @@ private:
case DT_IPV4_ADDRESS:
case DT_IPV6_ADDRESS:
case DT_MAC_ADDRESS:
case DT_HEX_DUMP:
case DT_UUID:
case DT_URL:
case DT_PATH:
@ -750,7 +786,7 @@ private:
blank.e_token = DNT_KEY;
pair_subs.PUSH_BACK(blank);
pair_subs.PUSH_BACK(prefix.front());
pairs_out.push_front(element(pair_subs, DNT_PAIR));
pairs_out.PUSH_FRONT(element(pair_subs, DNT_PAIR));
}
if (schema != NULL) {
@ -766,6 +802,8 @@ private:
this->dp_group_token.push_back(DT_INVALID);
this->dp_group_stack.resize(1);
this->dp_qualifier = DT_INVALID;
this->dp_separator = DT_COLON;
data_format_state_t prefix_state = DFS_INIT;
data_format_state_t semi_state = DFS_INIT;
@ -846,6 +884,11 @@ private:
this->dp_group_stack.pop_back();
}
if (hist[DT_EQUALS]) {
this->dp_qualifier = DT_COLON;
this->dp_separator = DT_EQUALS;
}
if (semi_state != DFS_ERROR && hist[DT_SEMI]) {
this->dp_format = &FORMAT_SEMI;
}
@ -902,6 +945,8 @@ private:
element_list_t dp_pairs;
schema_id_t dp_schema_id;
data_format * dp_format;
data_token_t dp_qualifier;
data_token_t dp_separator;
private:
data_scanner *dp_scanner;

@ -49,7 +49,9 @@ static struct {
{ "path", pcrepp("\\A((?:/|\\./|\\.\\./)[\\w\\.\\-_\\~/]*)"),
},
{ "mac", pcrepp(
"\\A([0-9a-fA-F][0-9a-fA-F](?::[0-9a-fA-F][0-9a-fA-F]){5})"), },
"\\A([0-9a-fA-F][0-9a-fA-F](?::[0-9a-fA-F][0-9a-fA-F]){5})(?!:)"), },
{ "hex", pcrepp(
"\\A([0-9a-fA-F][0-9a-fA-F](?::[0-9a-fA-F][0-9a-fA-F])+)"), },
{ "date",
pcrepp("\\A(\\d{4}/\\d{1,2}/\\d{1,2}|\\d{4}-\\d{1,2}-\\d{1,2})"), },
{ "time", pcrepp(
@ -58,7 +60,9 @@ static struct {
{ "ipv6", pcrepp("\\A(::|[:\\da-fA-f\\.]+[a-fA-f\\d])"),
},
{ "sep", pcrepp("\\A(:|=)"),
{ "coln", pcrepp("\\A(:)"),
},
{ "eq", pcrepp("\\A(=)"),
},
{ "comm", pcrepp("\\A(,)"),
},
@ -245,12 +249,22 @@ bool data_scanner::tokenize(pcre_context &pc, data_token_t &token_out)
}
break;
case DT_SEPARATOR: {
case DT_COLON: {
pi.pi_offset = pi.pi_next_offset;
if (str[pi.pi_offset] == ':') {
token_out = data_token_t(DT_COLON);
single_char_capture(pc, pi);
return true;
}
}
break;
case DT_EQUALS: {
pi.pi_offset = pi.pi_next_offset;
if (str[pi.pi_offset] == ':' ||
str[pi.pi_offset] == '=') {
token_out = data_token_t(DT_SEPARATOR);
if (str[pi.pi_offset] == '=') {
token_out = data_token_t(DT_EQUALS);
single_char_capture(pc, pi);
return true;
}

@ -41,12 +41,14 @@ enum data_token_t {
DT_URL,
DT_PATH,
DT_MAC_ADDRESS,
DT_HEX_DUMP,
DT_DATE,
DT_TIME,
DT_IPV6_ADDRESS,
/* DT_QUALIFIED_NAME, */
DT_SEPARATOR,
DT_COLON,
DT_EQUALS,
DT_COMMA,
DT_SEMI,

@ -306,5 +306,88 @@
"identifier" : true
}
}
},
"uwsgi_log" : {
"regex" : [
"^\\[pid: (?<s_pid>\\d+)\\|app: (?<s_app>[\\-\\d]+)\\|req: (?<s_req>[\\-\\d]+)/(?<s_worker_reqs>\\d+)\\] (?<c_ip>[^ ]+) \\((?<cs_username>[^\\)]*)\\) \\{(?<cs_vars>\\d+) vars in (?<cs_bytes>\\d+) bytes\\} \\[(?<timestamp>[^\\]]+)\\] (?<cs_method>[A-Z]+) (?<cs_uri_stem>[^ \\?]+)(?:\\?(?<cs_uri_query>[^ ]*))? => generated (?<sc_bytes>\\d+) bytes in (?<s_runtime>\\d+ \\w+) \\((?<cs_version>[^ ]+) (?<sc_status>\\d+)\\) (?<sc_headers>\\d+) headers in (?<sc_header_bytes>\\d+) bytes \\((?<s_switches>\\d+) switches on core (?<s_core>\\d+)\\)"
],
"level-field": "sc_status",
"level" : {
"error" : "^[^123]"
},
"value" : {
"s_pid" : {
"kind" : "string",
"identifier" : true
},
"s_app" : {
"kind" : "string",
"identifier" : true
},
"s_req" : {
"kind" : "integer"
},
"s_worker_reqs" : {
"kind" : "integer"
},
"c_ip" : {
"kind" : "string",
"collate" : "ipaddress",
"identifier" : true
},
"cs_username" : {
"kind" : "string",
"identifier" : true
},
"cs_vars" : {
"kind" : "integer"
},
"cs_bytes" : {
"kind" : "integer"
},
"cs_method" : {
"kind" : "string",
"identifier" : true
},
"cs_uri_stem" : {
"kind" : "string",
"identifier" : true
},
"cs_uri_query" : {
"kind" : "string"
},
"sc_bytes" : {
"kind" : "integer"
},
"s_runtime" : {
"kind" : "string"
},
"cs_version" : {
"kind" : "string",
"identifier" : true
},
"sc_status" : {
"kind" : "integer",
"foreign-key" : true
},
"sc_headers" : {
"kind" : "integer"
},
"sc_header_bytes" : {
"kind" : "integer"
},
"s_switches" : {
"kind" : "integer"
},
"s_core" : {
"kind" : "string",
"identifier" : true
}
},
"sample" : [
{
"line" : "[pid: 24386|app: 0|req: 482950/4125645] 86.221.170.65 () {44 vars in 1322 bytes} [Tue Jan 3 05:01:31 2012] GET /contest/log_presence/shhootter/?_=1325592089910 => generated 192 bytes in 21 msecs (HTTP/1.1 200) 4 headers in 188 bytes (1 switches on core 0)"
}
]
}
}

@ -191,7 +191,7 @@ void grep_proc::child_loop(void)
line_value.clear();
done = !this->gp_source.grep_value_for_line(line, line_value);
if (!done) {
pcre_context_static<60> pc;
pcre_context_static<128> pc;
pcre_input pi(line_value);
while (this->gp_pcre.match(pc, pi)) {

@ -3223,7 +3223,7 @@ static void setup_highlights(textview_curses::highlight_map_t &hm)
view_colors::VCR_DIFF_ADD);
hm["$diffm"] = textview_curses::
highlighter(xpcre_compile(
"^(?:--- |-[^-].*)"), false,
"^(?:--- .*|-[^-].*)"), false,
view_colors::VCR_DIFF_DELETE);
hm["$diffs"] = textview_curses::
highlighter(xpcre_compile(

@ -203,7 +203,7 @@ bool external_log_format::scan(std::vector<logline> &dst,
int len)
{
pcre_input pi(prefix, 0, len);
pcre_context_static<30> pc;
pcre_context_static<128> pc;
bool retval = false;
int curr_fmt = -1;
@ -228,7 +228,7 @@ bool external_log_format::scan(std::vector<logline> &dst,
}
if (level_cap != NULL && level_cap->c_begin != -1) {
pcre_context_static<30> pc_level;
pcre_context_static<128> pc_level;
pcre_input pi_level(pi.get_substr_start(level_cap),
0,
level_cap->length());
@ -271,7 +271,7 @@ void external_log_format::annotate(const std::string &line,
string_attrs_t &sa,
std::vector<logline_value> &values) const
{
pcre_context_static<30> pc;
pcre_context_static<128> pc;
pcre_input pi(line);
struct line_range lr;
pcre_context::capture_t *cap;
@ -366,7 +366,7 @@ void external_log_format::build(std::vector<std::string> &errors)
for (std::vector<sample>::iterator iter = this->elf_samples.begin();
iter != this->elf_samples.end();
++iter) {
pcre_context_static<30> pc;
pcre_context_static<128> pc;
pcre_input pi(iter->s_line);
bool found = false;

@ -250,7 +250,7 @@ public:
this->tc_sub_source->text_value_for_line(*this, start, str);
for (off = 0; off < (int)str.size(); ) {
int rc, matches[60];
int rc, matches[128];
rc = pcre_exec(hl.h_code,
hl.h_code_extra,
@ -259,7 +259,7 @@ public:
off,
0,
matches,
60);
128);
if (rc > 0) {
struct line_range lr;

@ -1,4 +1,12 @@
qualified:name: foo=1 bar=2
key 0:0
word 0:9 ^-------^ qualified
val 0:9 ^-------^ qualified
pair 0:9 ^-------^ qualified
key 10:10 ^
word 10:14 ^--^ name
val 10:14 ^--^ name
pair 10:14 ^--^ name
key 16:19 ^-^ foo
num 20:21 ^ 1
val 20:21 ^ 1

@ -1,4 +1,8 @@
timstack : TTY=pts/6 ; PWD=/auto/wstimstack/rpms/lbuild/test ; USER=root ; COMMAND=/usr/bin/tail /var/log/messages
key 0:0
word 0:8 ^------^ timstack
val 0:8 ^------^ timstack
pair 0:8 ^------^ timstack
key 11:14 ^-^ TTY
sym 15:20 ^---^ pts/6
val 15:20 ^---^ pts/6

@ -1,4 +1,8 @@
Jun 3 07:00:23 Tim-Stacks-iMac.local sudo[2326]: stack : TTY=ttys002 ; PWD=/ ; USER=root ; COMMAND=/bin/ls
key 53:53 ^
word 53:58 ^---^ stack
val 53:58 ^---^ stack
pair 53:58 ^---^ stack
key 61:64 ^-^ TTY
sym 65:72 ^-----^ ttys002
val 65:72 ^-----^ ttys002

@ -1,4 +1,8 @@
Jun 3 07:02:37 Tim-Stacks-iMac.local sudo[2717]: stack : TTY=ttys002 ; PWD=/ ; USER=root ; COMMAND=/usr/bin/env VAR1=foo ls
key 53:53 ^
word 53:58 ^---^ stack
val 53:58 ^---^ stack
pair 53:58 ^---^ stack
key 61:64 ^-^ TTY
sym 65:72 ^-----^ ttys002
val 65:72 ^-----^ ttys002

@ -1,10 +1,15 @@
Jun 2 00:34:32 Tim-Stacks-iMac kernel[0]: vmnet: VNetUserIf_Create: created userIf at 0xffffff802644f400.
key 50:50 ^
key 43:48 ^---^ vmnet
quot 48:48 ^
val 48:48 ^
pair 43:48 ^---^ vmnet
key 50:67 ^---------------^ VNetUserIf_Create
pair 50:67 ^---------------^ VNetUserIf_Create
key 77:77 ^
word 69:76 ^-----^ created
wspc 76:77 ^
sym 77:83 ^----^ userIf
pair 77:83 ^----^ userIf
key 87:87 ^
wspc 83:84 ^
word 84:86 ^^ at
wspc 86:87 ^
hex 87:105 ^----------------^ 0xffffff802644f400
pair 87:105 ^----------------^ 0xffffff802644f400
val 69:105 ^----------------------------------^ created userIf at 0xffffff802644f400
pair 50:105 ^-----------------------------------------------------^ VNetUserIf_Create: created userIf at 0xffffff802644f400

@ -0,0 +1,67 @@
Jul 14 14:31:06 linjenkins3 kernel: [31809412.513897] [UFW BLOCK] IN=eth0 OUT= MAC=40:40:2e:9a:ad:92:c4:71:fe:f1:b9:7f:08:00 SRC=69.60.116.202 DST=173.203.237.224 LEN=44 TOS=0x00 PREC=0x00 TTL=29 ID=15852 PROTO=TCP SPT=43998 DPT=3389 WINDOW=3072 RES=0x00 SYN URGP=0
key 37:68 ^-----------------------------^ 31809412.513897] [UFW BLOCK] IN
sym 69:73 ^--^ eth0
val 69:73 ^--^ eth0
pair 37:73 ^----------------------------------^ 31809412.513897] [UFW BLOCK] IN=eth0
key 74:77 ^-^ OUT
quot 77:77 ^
val 77:77 ^
pair 74:77 ^-^ OUT
key 79:82 ^-^ MAC
hex 83:124 ^---------------------------------------^ 40:40:2e:9a:ad:92:c4:71:fe:f1:b9:7f:08:00
val 83:124 ^---------------------------------------^ 40:40:2e:9a:ad:92:c4:71:fe:f1:b9:7f:08:00
pair 79:124 ^-------------------------------------------^ MAC=40:40:2e:9a:ad:92:c4:71:fe:f1:b9:7f:08:00
key 125:128 ^-^ SRC
ipv4 129:142 ^-----------^ 69.60.116.202
val 129:142 ^-----------^ 69.60.116.202
pair 125:142 ^---------------^ SRC=69.60.116.202
key 143:146 ^-^ DST
ipv4 147:162 ^-------------^ 173.203.237.224
val 147:162 ^-------------^ 173.203.237.224
pair 143:162 ^-----------------^ DST=173.203.237.224
key 163:166 ^-^ LEN
num 167:169 ^^ 44
val 167:169 ^^ 44
pair 163:169 ^----^ LEN=44
key 170:173 ^-^ TOS
hex 174:178 ^--^ 0x00
val 174:178 ^--^ 0x00
pair 170:178 ^------^ TOS=0x00
key 179:183 ^--^ PREC
hex 184:188 ^--^ 0x00
val 184:188 ^--^ 0x00
pair 179:188 ^-------^ PREC=0x00
key 189:192 ^-^ TTL
num 193:195 ^^ 29
val 193:195 ^^ 29
pair 189:195 ^----^ TTL=29
key 196:198 ^^ ID
num 199:204 ^---^ 15852
val 199:204 ^---^ 15852
pair 196:204 ^------^ ID=15852
key 205:210 ^---^ PROTO
sym 211:214 ^-^ TCP
val 211:214 ^-^ TCP
pair 205:214 ^-------^ PROTO=TCP
key 215:218 ^-^ SPT
num 219:224 ^---^ 43998
val 219:224 ^---^ 43998
pair 215:224 ^-------^ SPT=43998
key 225:228 ^-^ DPT
num 229:233 ^--^ 3389
val 229:233 ^--^ 3389
pair 225:233 ^------^ DPT=3389
key 234:240 ^----^ WINDOW
num 241:245 ^--^ 3072
val 241:245 ^--^ 3072
pair 234:245 ^---------^ WINDOW=3072
key 246:249 ^-^ RES
hex 250:254 ^--^ 0x00
wspc 254:255 ^
sym 255:258 ^-^ SYN
val 250:258 ^------^ 0x00 SYN
pair 246:258 ^----------^ RES=0x00 SYN
key 259:263 ^--^ URGP
num 264:265 ^ 0
val 264:265 ^ 0
pair 259:265 ^----^ URGP=0
Loading…
Cancel
Save