diff --git a/src/data_parser.cc b/src/data_parser.cc index bbc3a7b6..d9d4acc2 100644 --- a/src/data_parser.cc +++ b/src/data_parser.cc @@ -48,7 +48,8 @@ data_format_state_t dfs_prefix_next(data_format_state_t state, case DFS_INIT: switch (next_token) { case DT_PATH: - case DT_SEPARATOR: + case DT_COLON: + case DT_EQUALS: case DT_CONSTANT: case DT_EMAIL: case DT_WORD: @@ -100,7 +101,10 @@ data_format_state_t dfs_semi_next(data_format_state_t state, case DFS_KEY: switch (next_token) { - case DT_SEPARATOR: retval = DFS_VALUE; break; + case DT_COLON: + case DT_EQUALS: + retval = DFS_VALUE; + break; case DT_SEMI: retval = DFS_ERROR; break; @@ -146,7 +150,8 @@ data_format_state_t dfs_comma_next(data_format_state_t state, case DFS_KEY: switch (next_token) { - case DT_SEPARATOR: + case DT_COLON: + case DT_EQUALS: retval = DFS_VALUE; break; @@ -168,7 +173,8 @@ data_format_state_t dfs_comma_next(data_format_state_t state, case DFS_EXPECTING_SEP: switch (next_token) { - case DT_SEPARATOR: + case DT_COLON: + case DT_EQUALS: retval = DFS_VALUE; break; @@ -187,7 +193,8 @@ data_format_state_t dfs_comma_next(data_format_state_t state, retval = DFS_INIT; break; - case DT_SEPARATOR: + case DT_COLON: + case DT_EQUALS: retval = DFS_ERROR; break; diff --git a/src/data_parser.hh b/src/data_parser.hh index e9885856..865742c8 100644 --- a/src/data_parser.hh +++ b/src/data_parser.hh @@ -111,6 +111,7 @@ */ #define ELEMENT_LIST_T(var) var("" #var, __FILE__, __LINE__) +#define PUSH_FRONT(elem) push_front(elem, __FILE__, __LINE__) #define PUSH_BACK(elem) push_back(elem, __FILE__, __LINE__) #define POP_FRONT(elem) pop_front(__FILE__, __LINE__) #define POP_BACK(elem) pop_back(__FILE__, __LINE__) @@ -140,7 +141,9 @@ struct data_format { data_format(const char *name, data_token_t appender = DT_INVALID, data_token_t terminator = DT_INVALID) - : df_name(name), df_appender(appender), df_terminator(terminator) + : df_name(name), + df_appender(appender), + df_terminator(terminator) {}; const char * df_name; @@ -265,6 +268,13 @@ public: LIST_DEINIT_TRACE; }; + void push_front(const element &elem, const char *fn, int line) + { + ELEMENT_TRACE; + + this->std::list::push_front(elem); + }; + void push_back(const element &elem, const char *fn, int line) { ELEMENT_TRACE; @@ -462,6 +472,8 @@ private: : dp_errors("dp_errors", __FILE__, __LINE__), dp_pairs("dp_pairs", __FILE__, __LINE__), dp_format(NULL), + dp_qualifier(DT_INVALID), + dp_separator(DT_INVALID), dp_scanner(ds) { if (TRACE_FILE != NULL) { @@ -508,7 +520,17 @@ private: key_comps.PUSH_BACK(*iter); } - else if (iter->e_token == DT_SEPARATOR) { + else if (iter->e_token == this->dp_qualifier) { + value.SPLICE(value.end(), + key_comps, + key_comps.begin(), + key_comps.end()); + strip(value, element_if(DT_WHITE)); + if (!value.empty()) { + el_stack.PUSH_BACK(element(value, DNT_VALUE)); + } + } + else if (iter->e_token == this->dp_separator) { element_list_t::iterator key_iter = key_comps.end(); bool found = false; @@ -633,15 +655,11 @@ private: continue; } - if (kv_iter->e_token != DNT_VALUE) { - el_stack.POP_FRONT(); - continue; - } - std::string key_val = this->get_element_string(el_stack.front()); element_list_t ELEMENT_LIST_T(pair_subs); + if (schema != NULL) { SHA_Update(&context, key_val.c_str(), key_val.length()); } @@ -660,11 +678,28 @@ private: free_row.POP_FRONT(); } - ++kv_iter; + bool has_value = false; + + if (kv_iter->e_token == DNT_VALUE) { + ++kv_iter; + has_value = true; + } + pair_subs.SPLICE(pair_subs.begin(), el_stack, el_stack.begin(), kv_iter); + + if (!has_value) { + element_list_t ELEMENT_LIST_T(blank_value); + struct element blank; + + blank.e_token = DT_QUOTED_STRING; + blank.e_capture.c_begin = blank.e_capture.c_end = pair_subs.front().e_capture.c_end; + blank_value.PUSH_BACK(blank); + pair_subs.PUSH_BACK(element(blank_value, DNT_VALUE)); + } + pairs_out.PUSH_BACK(element(pair_subs, DNT_PAIR)); } @@ -709,6 +744,7 @@ private: case DT_IPV4_ADDRESS: case DT_IPV6_ADDRESS: case DT_MAC_ADDRESS: + case DT_HEX_DUMP: case DT_UUID: case DT_URL: case DT_PATH: @@ -750,7 +786,7 @@ private: blank.e_token = DNT_KEY; pair_subs.PUSH_BACK(blank); pair_subs.PUSH_BACK(prefix.front()); - pairs_out.push_front(element(pair_subs, DNT_PAIR)); + pairs_out.PUSH_FRONT(element(pair_subs, DNT_PAIR)); } if (schema != NULL) { @@ -766,6 +802,8 @@ private: this->dp_group_token.push_back(DT_INVALID); this->dp_group_stack.resize(1); + this->dp_qualifier = DT_INVALID; + this->dp_separator = DT_COLON; data_format_state_t prefix_state = DFS_INIT; data_format_state_t semi_state = DFS_INIT; @@ -846,6 +884,11 @@ private: this->dp_group_stack.pop_back(); } + if (hist[DT_EQUALS]) { + this->dp_qualifier = DT_COLON; + this->dp_separator = DT_EQUALS; + } + if (semi_state != DFS_ERROR && hist[DT_SEMI]) { this->dp_format = &FORMAT_SEMI; } @@ -902,6 +945,8 @@ private: element_list_t dp_pairs; schema_id_t dp_schema_id; data_format * dp_format; + data_token_t dp_qualifier; + data_token_t dp_separator; private: data_scanner *dp_scanner; diff --git a/src/data_scanner.cc b/src/data_scanner.cc index f120089f..cebdf817 100644 --- a/src/data_scanner.cc +++ b/src/data_scanner.cc @@ -49,7 +49,9 @@ static struct { { "path", pcrepp("\\A((?:/|\\./|\\.\\./)[\\w\\.\\-_\\~/]*)"), }, { "mac", pcrepp( - "\\A([0-9a-fA-F][0-9a-fA-F](?::[0-9a-fA-F][0-9a-fA-F]){5})"), }, + "\\A([0-9a-fA-F][0-9a-fA-F](?::[0-9a-fA-F][0-9a-fA-F]){5})(?!:)"), }, + { "hex", pcrepp( + "\\A([0-9a-fA-F][0-9a-fA-F](?::[0-9a-fA-F][0-9a-fA-F])+)"), }, { "date", pcrepp("\\A(\\d{4}/\\d{1,2}/\\d{1,2}|\\d{4}-\\d{1,2}-\\d{1,2})"), }, { "time", pcrepp( @@ -58,7 +60,9 @@ static struct { { "ipv6", pcrepp("\\A(::|[:\\da-fA-f\\.]+[a-fA-f\\d])"), }, - { "sep", pcrepp("\\A(:|=)"), + { "coln", pcrepp("\\A(:)"), + }, + { "eq", pcrepp("\\A(=)"), }, { "comm", pcrepp("\\A(,)"), }, @@ -245,12 +249,22 @@ bool data_scanner::tokenize(pcre_context &pc, data_token_t &token_out) } break; - case DT_SEPARATOR: { + case DT_COLON: { + pi.pi_offset = pi.pi_next_offset; + + if (str[pi.pi_offset] == ':') { + token_out = data_token_t(DT_COLON); + single_char_capture(pc, pi); + return true; + } + } + break; + + case DT_EQUALS: { pi.pi_offset = pi.pi_next_offset; - if (str[pi.pi_offset] == ':' || - str[pi.pi_offset] == '=') { - token_out = data_token_t(DT_SEPARATOR); + if (str[pi.pi_offset] == '=') { + token_out = data_token_t(DT_EQUALS); single_char_capture(pc, pi); return true; } diff --git a/src/data_scanner.hh b/src/data_scanner.hh index 9c617409..5df2120a 100644 --- a/src/data_scanner.hh +++ b/src/data_scanner.hh @@ -41,12 +41,14 @@ enum data_token_t { DT_URL, DT_PATH, DT_MAC_ADDRESS, + DT_HEX_DUMP, DT_DATE, DT_TIME, DT_IPV6_ADDRESS, /* DT_QUALIFIED_NAME, */ - DT_SEPARATOR, + DT_COLON, + DT_EQUALS, DT_COMMA, DT_SEMI, diff --git a/src/default-log-formats.json b/src/default-log-formats.json index 598e1d7f..aada5947 100644 --- a/src/default-log-formats.json +++ b/src/default-log-formats.json @@ -306,5 +306,88 @@ "identifier" : true } } + }, + "uwsgi_log" : { + "regex" : [ + "^\\[pid: (?\\d+)\\|app: (?[\\-\\d]+)\\|req: (?[\\-\\d]+)/(?\\d+)\\] (?[^ ]+) \\((?[^\\)]*)\\) \\{(?\\d+) vars in (?\\d+) bytes\\} \\[(?[^\\]]+)\\] (?[A-Z]+) (?[^ \\?]+)(?:\\?(?[^ ]*))? => generated (?\\d+) bytes in (?\\d+ \\w+) \\((?[^ ]+) (?\\d+)\\) (?\\d+) headers in (?\\d+) bytes \\((?\\d+) switches on core (?\\d+)\\)" + ], + "level-field": "sc_status", + "level" : { + "error" : "^[^123]" + }, + "value" : { + "s_pid" : { + "kind" : "string", + "identifier" : true + }, + "s_app" : { + "kind" : "string", + "identifier" : true + }, + "s_req" : { + "kind" : "integer" + }, + "s_worker_reqs" : { + "kind" : "integer" + }, + "c_ip" : { + "kind" : "string", + "collate" : "ipaddress", + "identifier" : true + }, + "cs_username" : { + "kind" : "string", + "identifier" : true + }, + "cs_vars" : { + "kind" : "integer" + }, + "cs_bytes" : { + "kind" : "integer" + }, + "cs_method" : { + "kind" : "string", + "identifier" : true + }, + "cs_uri_stem" : { + "kind" : "string", + "identifier" : true + }, + "cs_uri_query" : { + "kind" : "string" + }, + "sc_bytes" : { + "kind" : "integer" + }, + "s_runtime" : { + "kind" : "string" + }, + "cs_version" : { + "kind" : "string", + "identifier" : true + }, + "sc_status" : { + "kind" : "integer", + "foreign-key" : true + }, + "sc_headers" : { + "kind" : "integer" + }, + "sc_header_bytes" : { + "kind" : "integer" + }, + "s_switches" : { + "kind" : "integer" + }, + "s_core" : { + "kind" : "string", + "identifier" : true + } + }, + "sample" : [ + { + "line" : "[pid: 24386|app: 0|req: 482950/4125645] 86.221.170.65 () {44 vars in 1322 bytes} [Tue Jan 3 05:01:31 2012] GET /contest/log_presence/shhootter/?_=1325592089910 => generated 192 bytes in 21 msecs (HTTP/1.1 200) 4 headers in 188 bytes (1 switches on core 0)" + } + ] } } diff --git a/src/grep_proc.cc b/src/grep_proc.cc index 62c72bf0..d3d75ff3 100644 --- a/src/grep_proc.cc +++ b/src/grep_proc.cc @@ -191,7 +191,7 @@ void grep_proc::child_loop(void) line_value.clear(); done = !this->gp_source.grep_value_for_line(line, line_value); if (!done) { - pcre_context_static<60> pc; + pcre_context_static<128> pc; pcre_input pi(line_value); while (this->gp_pcre.match(pc, pi)) { diff --git a/src/lnav.cc b/src/lnav.cc index e45e2b82..3307c762 100644 --- a/src/lnav.cc +++ b/src/lnav.cc @@ -3223,7 +3223,7 @@ static void setup_highlights(textview_curses::highlight_map_t &hm) view_colors::VCR_DIFF_ADD); hm["$diffm"] = textview_curses:: highlighter(xpcre_compile( - "^(?:--- |-[^-].*)"), false, + "^(?:--- .*|-[^-].*)"), false, view_colors::VCR_DIFF_DELETE); hm["$diffs"] = textview_curses:: highlighter(xpcre_compile( diff --git a/src/log_format.cc b/src/log_format.cc index ef22e624..aaa3324b 100644 --- a/src/log_format.cc +++ b/src/log_format.cc @@ -203,7 +203,7 @@ bool external_log_format::scan(std::vector &dst, int len) { pcre_input pi(prefix, 0, len); - pcre_context_static<30> pc; + pcre_context_static<128> pc; bool retval = false; int curr_fmt = -1; @@ -228,7 +228,7 @@ bool external_log_format::scan(std::vector &dst, } if (level_cap != NULL && level_cap->c_begin != -1) { - pcre_context_static<30> pc_level; + pcre_context_static<128> pc_level; pcre_input pi_level(pi.get_substr_start(level_cap), 0, level_cap->length()); @@ -271,7 +271,7 @@ void external_log_format::annotate(const std::string &line, string_attrs_t &sa, std::vector &values) const { - pcre_context_static<30> pc; + pcre_context_static<128> pc; pcre_input pi(line); struct line_range lr; pcre_context::capture_t *cap; @@ -366,7 +366,7 @@ void external_log_format::build(std::vector &errors) for (std::vector::iterator iter = this->elf_samples.begin(); iter != this->elf_samples.end(); ++iter) { - pcre_context_static<30> pc; + pcre_context_static<128> pc; pcre_input pi(iter->s_line); bool found = false; diff --git a/src/textview_curses.hh b/src/textview_curses.hh index 96a9a5f9..6eb67e6b 100644 --- a/src/textview_curses.hh +++ b/src/textview_curses.hh @@ -250,7 +250,7 @@ public: this->tc_sub_source->text_value_for_line(*this, start, str); for (off = 0; off < (int)str.size(); ) { - int rc, matches[60]; + int rc, matches[128]; rc = pcre_exec(hl.h_code, hl.h_code_extra, @@ -259,7 +259,7 @@ public: off, 0, matches, - 60); + 128); if (rc > 0) { struct line_range lr; diff --git a/test/datafile_simple.6 b/test/datafile_simple.6 index df5f1426..72a011df 100644 --- a/test/datafile_simple.6 +++ b/test/datafile_simple.6 @@ -1,4 +1,12 @@ qualified:name: foo=1 bar=2 + key 0:0 +word 0:9 ^-------^ qualified + val 0:9 ^-------^ qualified +pair 0:9 ^-------^ qualified + key 10:10 ^ +word 10:14 ^--^ name + val 10:14 ^--^ name +pair 10:14 ^--^ name key 16:19 ^-^ foo num 20:21 ^ 1 val 20:21 ^ 1 diff --git a/test/datafile_syslog.0 b/test/datafile_syslog.0 index 3f1e5d31..d37c5cc7 100644 --- a/test/datafile_syslog.0 +++ b/test/datafile_syslog.0 @@ -1,4 +1,8 @@ timstack : TTY=pts/6 ; PWD=/auto/wstimstack/rpms/lbuild/test ; USER=root ; COMMAND=/usr/bin/tail /var/log/messages + key 0:0 +word 0:8 ^------^ timstack + val 0:8 ^------^ timstack +pair 0:8 ^------^ timstack key 11:14 ^-^ TTY sym 15:20 ^---^ pts/6 val 15:20 ^---^ pts/6 diff --git a/test/log-samples/sample-27353a72ba4025448f261dcfa6ea16e474187795.txt b/test/log-samples/sample-27353a72ba4025448f261dcfa6ea16e474187795.txt index e85457a8..ca0d28b9 100644 --- a/test/log-samples/sample-27353a72ba4025448f261dcfa6ea16e474187795.txt +++ b/test/log-samples/sample-27353a72ba4025448f261dcfa6ea16e474187795.txt @@ -1,4 +1,8 @@ Jun 3 07:00:23 Tim-Stacks-iMac.local sudo[2326]: stack : TTY=ttys002 ; PWD=/ ; USER=root ; COMMAND=/bin/ls + key 53:53 ^ +word 53:58 ^---^ stack + val 53:58 ^---^ stack +pair 53:58 ^---^ stack key 61:64 ^-^ TTY sym 65:72 ^-----^ ttys002 val 65:72 ^-----^ ttys002 diff --git a/test/log-samples/sample-70c906b3c1a1cf03f15bde92ee78edfa6f9b7960.txt b/test/log-samples/sample-70c906b3c1a1cf03f15bde92ee78edfa6f9b7960.txt index a0c7d09b..5877a733 100644 --- a/test/log-samples/sample-70c906b3c1a1cf03f15bde92ee78edfa6f9b7960.txt +++ b/test/log-samples/sample-70c906b3c1a1cf03f15bde92ee78edfa6f9b7960.txt @@ -1,4 +1,8 @@ Jun 3 07:02:37 Tim-Stacks-iMac.local sudo[2717]: stack : TTY=ttys002 ; PWD=/ ; USER=root ; COMMAND=/usr/bin/env VAR1=foo ls + key 53:53 ^ +word 53:58 ^---^ stack + val 53:58 ^---^ stack +pair 53:58 ^---^ stack key 61:64 ^-^ TTY sym 65:72 ^-----^ ttys002 val 65:72 ^-----^ ttys002 diff --git a/test/log-samples/sample-ad31f12d2adabd07e3ddda3ad5b0dbf6b49c4c99.txt b/test/log-samples/sample-ad31f12d2adabd07e3ddda3ad5b0dbf6b49c4c99.txt index fd557c69..4e42a21c 100644 --- a/test/log-samples/sample-ad31f12d2adabd07e3ddda3ad5b0dbf6b49c4c99.txt +++ b/test/log-samples/sample-ad31f12d2adabd07e3ddda3ad5b0dbf6b49c4c99.txt @@ -1,10 +1,15 @@ Jun 2 00:34:32 Tim-Stacks-iMac kernel[0]: vmnet: VNetUserIf_Create: created userIf at 0xffffff802644f400. - key 50:50 ^ + key 43:48 ^---^ vmnet +quot 48:48 ^ + val 48:48 ^ +pair 43:48 ^---^ vmnet key 50:67 ^---------------^ VNetUserIf_Create -pair 50:67 ^---------------^ VNetUserIf_Create - key 77:77 ^ +word 69:76 ^-----^ created +wspc 76:77 ^ sym 77:83 ^----^ userIf -pair 77:83 ^----^ userIf - key 87:87 ^ +wspc 83:84 ^ +word 84:86 ^^ at +wspc 86:87 ^ hex 87:105 ^----------------^ 0xffffff802644f400 -pair 87:105 ^----------------^ 0xffffff802644f400 + val 69:105 ^----------------------------------^ created userIf at 0xffffff802644f400 +pair 50:105 ^-----------------------------------------------------^ VNetUserIf_Create: created userIf at 0xffffff802644f400 diff --git a/test/log-samples/sample-c15acd32844669d23d0cbc88ec548129ed2c592e.txt b/test/log-samples/sample-c15acd32844669d23d0cbc88ec548129ed2c592e.txt new file mode 100644 index 00000000..327bdcc5 --- /dev/null +++ b/test/log-samples/sample-c15acd32844669d23d0cbc88ec548129ed2c592e.txt @@ -0,0 +1,67 @@ + Jul 14 14:31:06 linjenkins3 kernel: [31809412.513897] [UFW BLOCK] IN=eth0 OUT= MAC=40:40:2e:9a:ad:92:c4:71:fe:f1:b9:7f:08:00 SRC=69.60.116.202 DST=173.203.237.224 LEN=44 TOS=0x00 PREC=0x00 TTL=29 ID=15852 PROTO=TCP SPT=43998 DPT=3389 WINDOW=3072 RES=0x00 SYN URGP=0 + key 37:68 ^-----------------------------^ 31809412.513897] [UFW BLOCK] IN + sym 69:73 ^--^ eth0 + val 69:73 ^--^ eth0 +pair 37:73 ^----------------------------------^ 31809412.513897] [UFW BLOCK] IN=eth0 + key 74:77 ^-^ OUT +quot 77:77 ^ + val 77:77 ^ +pair 74:77 ^-^ OUT + key 79:82 ^-^ MAC + hex 83:124 ^---------------------------------------^ 40:40:2e:9a:ad:92:c4:71:fe:f1:b9:7f:08:00 + val 83:124 ^---------------------------------------^ 40:40:2e:9a:ad:92:c4:71:fe:f1:b9:7f:08:00 +pair 79:124 ^-------------------------------------------^ MAC=40:40:2e:9a:ad:92:c4:71:fe:f1:b9:7f:08:00 + key 125:128 ^-^ SRC +ipv4 129:142 ^-----------^ 69.60.116.202 + val 129:142 ^-----------^ 69.60.116.202 +pair 125:142 ^---------------^ SRC=69.60.116.202 + key 143:146 ^-^ DST +ipv4 147:162 ^-------------^ 173.203.237.224 + val 147:162 ^-------------^ 173.203.237.224 +pair 143:162 ^-----------------^ DST=173.203.237.224 + key 163:166 ^-^ LEN + num 167:169 ^^ 44 + val 167:169 ^^ 44 +pair 163:169 ^----^ LEN=44 + key 170:173 ^-^ TOS + hex 174:178 ^--^ 0x00 + val 174:178 ^--^ 0x00 +pair 170:178 ^------^ TOS=0x00 + key 179:183 ^--^ PREC + hex 184:188 ^--^ 0x00 + val 184:188 ^--^ 0x00 +pair 179:188 ^-------^ PREC=0x00 + key 189:192 ^-^ TTL + num 193:195 ^^ 29 + val 193:195 ^^ 29 +pair 189:195 ^----^ TTL=29 + key 196:198 ^^ ID + num 199:204 ^---^ 15852 + val 199:204 ^---^ 15852 +pair 196:204 ^------^ ID=15852 + key 205:210 ^---^ PROTO + sym 211:214 ^-^ TCP + val 211:214 ^-^ TCP +pair 205:214 ^-------^ PROTO=TCP + key 215:218 ^-^ SPT + num 219:224 ^---^ 43998 + val 219:224 ^---^ 43998 +pair 215:224 ^-------^ SPT=43998 + key 225:228 ^-^ DPT + num 229:233 ^--^ 3389 + val 229:233 ^--^ 3389 +pair 225:233 ^------^ DPT=3389 + key 234:240 ^----^ WINDOW + num 241:245 ^--^ 3072 + val 241:245 ^--^ 3072 +pair 234:245 ^---------^ WINDOW=3072 + key 246:249 ^-^ RES + hex 250:254 ^--^ 0x00 +wspc 254:255 ^ + sym 255:258 ^-^ SYN + val 250:258 ^------^ 0x00 SYN +pair 246:258 ^----------^ RES=0x00 SYN + key 259:263 ^--^ URGP + num 264:265 ^ 0 + val 264:265 ^ 0 +pair 259:265 ^----^ URGP=0