[data-parser] paths in keys

pull/1235/head
Tim Stack 5 months ago
parent a9eac0520d
commit 8d2aed91c3

@ -94,7 +94,7 @@ data_parser::pairup(data_parser::schema_id_t* schema,
}
} else if (iter->e_token == in_list.el_format.df_separator) {
auto key_iter = key_comps.end();
bool found = false, key_is_values = true;
bool found = false, key_is_values = true, mixed_values = false;
if (!key_comps.empty()) {
do {
@ -131,13 +131,58 @@ data_parser::pairup(data_parser::schema_id_t* schema,
case DT_SYMBOL:
key_is_values = false;
break;
case DT_WHITE:
break;
case DT_QUOTED_STRING:
case DT_URL:
case DT_PATH:
case DT_MAC_ADDRESS:
case DT_DATE:
case DT_TIME:
case DT_DATE_TIME:
case DT_IPV4_ADDRESS:
case DT_IPV6_ADDRESS:
case DT_HEX_DUMP:
case DT_UUID:
case DT_CREDIT_CARD_NUMBER:
case DT_VERSION_NUMBER:
case DT_OCTAL_NUMBER:
case DT_PERCENTAGE:
case DT_NUMBER:
case DT_HEX_NUMBER:
case DT_EMAIL:
case DT_CONSTANT: {
if (in_list.el_format.df_terminator
== DT_INVALID)
{
element_list_t ELEMENT_LIST_T(key_value);
mixed_values = true;
auto value_iter = key_iter;
++key_iter;
key_value.SPLICE(key_value.end(),
key_comps,
value_iter,
key_iter);
if (key_comps.empty()) {
key_iter = key_comps.end();
} else {
key_iter = key_comps.begin();
}
el_stack.PUSH_BACK(
element(key_value, DNT_VALUE));
}
break;
}
default:
break;
}
}
} while (key_iter != key_comps.begin() && !found);
}
if (!found && !el_stack.empty() && !key_comps.empty()) {
if (!found && !mixed_values && !el_stack.empty()
&& !key_comps.empty())
{
element_list_t::iterator value_iter;
if (el_stack.size() > 1
@ -165,6 +210,12 @@ data_parser::pairup(data_parser::schema_id_t* schema,
}
strip(key_comps, element_is_space{});
if (!key_comps.empty()) {
if (mixed_values) {
key_is_values = false;
while (key_comps.size() > 1) {
key_comps.POP_FRONT();
}
}
if (key_is_values) {
el_stack.PUSH_BACK(element(key_comps, DNT_VALUE));
} else {
@ -1049,7 +1100,9 @@ data_parser::discover_format_state::finalize()
if (this->dfs_semi_state != DFS_ERROR && this->dfs_hist[DT_SEMI]) {
this->dfs_format = FORMAT_SEMI;
} else if (this->dfs_comma_state != DFS_ERROR) {
this->dfs_format = FORMAT_COMMA;
if (this->dfs_hist[DT_COMMA] > 0) {
this->dfs_format = FORMAT_COMMA;
}
if (separator == DT_COLON && this->dfs_hist[DT_COMMA] > 0) {
if (!((this->dfs_hist[DT_COLON] == this->dfs_hist[DT_COMMA])
|| ((this->dfs_hist[DT_COLON] - 1)

@ -409,9 +409,33 @@ dist_noinst_DATA = \
formats/sqldir/init.sql \
formats/timestamp/format.json \
formats/xmlmsg/format.json \
log-samples/sample-27353a72ba4025448f261dcfa6ea16e474187795.txt \
log-samples/sample-70c906b3c1a1cf03f15bde92ee78edfa6f9b7960.txt \
log-samples/sample-ad31f12d2adabd07e3ddda3ad5b0dbf6b49c4c99.txt \
log-samples/sample-057d6c669632ef9d07b6adec605f6bdeae19af27.txt \
log-samples/sample-06aaa6f48a801f592558575d886864d6c3ab9ed4.txt \
log-samples/sample-1aeb47c0a97d19bb7418f0172480e05e49c6e53e.txt \
log-samples/sample-27353a72ba4025448f261dcfa6ea16e474187795.txt \
log-samples/sample-3856ad0f551a04fde41a020158d6b33ef97c870a.txt \
log-samples/sample-45364b3fd51af92a4ad8a309b5f4fd88.txt \
log-samples/sample-500c9e492e04f5f58862c8086ca301de0dd976ce.txt \
log-samples/sample-55ac97afae4b0650ccb62e2dbc8d89bb.txt \
log-samples/sample-6049d4309f26eefb1a3406d937a9ba8a0df592a7.txt \
log-samples/sample-62315d884afdc4155b35f905415c74bfcfd39fc2.txt \
log-samples/sample-70c906b3c1a1cf03f15bde92ee78edfa6f9b7960.txt \
log-samples/sample-9cf7fbb3546c676c686fac0ed096d026f46c875f.txt \
log-samples/sample-a74570613c082c7fe283672031e18e54e8887ffb.txt \
log-samples/sample-aca2878a2e50779c6697c0747ab1f60e4b368dcb.txt \
log-samples/sample-ad31f12d2adabd07e3ddda3ad5b0dbf6b49c4c99.txt \
log-samples/sample-bc6f6cf689fa5455616b4d9fbe121a48d3c9de59.txt \
log-samples/sample-c15acd32844669d23d0cbc88ec548129ed2c592e.txt \
log-samples/sample-c23f22c1b932b904203e018f78dead95fb89b15d.txt \
log-samples/sample-d0d6b3fc6766caac5ac3fac4a3754ceaab785eb8.txt \
log-samples/sample-d4a0aedc8350f64b22403eeef4eca71fbf749d2b.txt \
log-samples/sample-d714b5e8cd354321f376ed1c0a70ec9a2f58076d.txt \
log-samples/sample-dd7d406352ec6a11d966b6f015a9482b060f2b29.txt \
log-samples/sample-e779d1771e34f5203ae73e85802e78002be63db6.txt \
log-samples/sample-eef32793daf841a576d8a5cd27239d5d.txt \
log-samples/sample-f2fba0d0b1e57f9a707ea96a8a4efcdc.txt \
log-samples/sample-f5afbee90a8c054061c4e9ffe673293cce7761de.txt \
log-samples/sample-fc8923633e57bacd641d80dde3ff878212230552.txt \
remote-log-dir/logfile_access_log.0 \
remote-log-dir/logfile_access_log.1 \
tui-captures/tui_echo.0 \

@ -194,7 +194,7 @@ main(int argc, char* argv[])
string_attrs_t sa;
if (format.get() != nullptr) {
format->annotate(0, sa, ll_values);
format->annotate(0, sa, ll_values, false);
body = find_string_attr_range(sa, &SA_BODY);
}

@ -1,4 +1,30 @@
Jun 3 07:00:23 Tim-Stacks-iMac.local sudo[2326]: stack : TTY=ttys002 ; PWD=/ ; USER=root ; COMMAND=/bin/ls
msg :
format :
null
key 53:53 ^
word 53:58 ^---^ stack
val 53:58 ^---^ stack
pair 53:58 ^---^ stack
key 61:64 ^-^ TTY
sym 65:72 ^-----^ ttys002
val 65:72 ^-----^ ttys002
pair 61:72 ^---------^ TTY=ttys002
key 75:78 ^-^ PWD
path 79:80 ^ /
val 79:80 ^ /
pair 75:80 ^---^ PWD=/
key 83:87 ^--^ USER
word 88:92 ^--^ root
val 88:92 ^--^ root
pair 83:92 ^-------^ USER=root
key 95:102 ^-----^ COMMAND
path 103:110 ^-----^ /bin/ls
val 103:110 ^-----^ /bin/ls
pair 95:110 ^-------------^ COMMAND=/bin/ls
msg :stack : TTY=ttys002 ; PWD=/ ; USER=root ; COMMAND=/bin/ls
format :# : TTY=# ; PWD=# ; USER=# ; COMMAND=#
{
"col_0": "stack",
"TTY": "ttys002",
"PWD": "/",
"USER": "root",
"COMMAND": "/bin/ls"
}

@ -1,4 +1,35 @@
Jun 3 07:02:37 Tim-Stacks-iMac.local sudo[2717]: stack : TTY=ttys002 ; PWD=/ ; USER=root ; COMMAND=/usr/bin/env VAR1=foo ls
msg :
format :
null
key 53:53 ^
word 53:58 ^---^ stack
val 53:58 ^---^ stack
pair 53:58 ^---^ stack
key 61:64 ^-^ TTY
sym 65:72 ^-----^ ttys002
val 65:72 ^-----^ ttys002
pair 61:72 ^---------^ TTY=ttys002
key 75:78 ^-^ PWD
path 79:80 ^ /
val 79:80 ^ /
pair 75:80 ^---^ PWD=/
key 83:87 ^--^ USER
word 88:92 ^--^ root
val 88:92 ^--^ root
pair 83:92 ^-------^ USER=root
key 95:102 ^-----^ COMMAND
path 103:115 ^----------^ /usr/bin/env
wspc 115:116 ^
sym 116:120 ^--^ VAR1
word 121:124 ^-^ foo
wspc 124:125 ^
word 125:127 ^^ ls
val 103:127 ^----------------------^ /usr/bin/env VAR1=foo ls
pair 95:127 ^------------------------------^ COMMAND=/usr/bin/env VAR1=foo ls
msg :stack : TTY=ttys002 ; PWD=/ ; USER=root ; COMMAND=/usr/bin/env VAR1=foo ls
format :# : TTY=# ; PWD=# ; USER=# ; COMMAND=#
{
"col_0": "stack",
"TTY": "ttys002",
"PWD": "/",
"USER": "root",
"COMMAND": "/usr/bin/env VAR1=foo ls"
}

@ -57,16 +57,14 @@ pair 225:233
pair 234:245 ^---------^ WINDOW=3072
key 246:249 ^-^ RES
hex 250:254 ^--^ 0x00
wspc 254:255 ^
sym 255:258 ^-^ SYN
val 250:258 ^------^ 0x00 SYN
pair 246:258 ^----------^ RES=0x00 SYN
val 250:254 ^--^ 0x00
pair 246:254 ^------^ RES=0x00
key 259:263 ^--^ URGP
num 264:265 ^ 0
val 264:265 ^ 0
pair 259:265 ^----^ URGP=0
msg :[31809412.513897] [UFW BLOCK] IN=eth0 OUT= MAC=40:40:2e:9a:ad:92:c4:71:fe:f1:b9:7f:08:00 SRC=69.60.116.202 DST=173.203.237.224 LEN=44 TOS=0x00 PREC=0x00 TTL=29 ID=15852 PROTO=TCP SPT=43998 DPT=3389 WINDOW=3072 RES=0x00 SYN URGP=0
format :[31809412.513897] [UFW BLOCK] IN=# OUT=# MAC=# SRC=# DST=# LEN=# TOS=# PREC=# TTL=# ID=# PROTO=# SPT=# DPT=# WINDOW=# RES=# URGP=#
format :[31809412.513897] [UFW BLOCK] IN=# OUT=# MAC=# SRC=# DST=# LEN=# TOS=# PREC=# TTL=# ID=# PROTO=# SPT=# DPT=# WINDOW=# RES=# SYN URGP=#
{
"31809412.513897] [UFW BLOCK] IN": "eth0",
"OUT": "",
@ -82,6 +80,6 @@ format :[31809412.513897] [UFW BLOCK] IN=# OUT=# MAC=# SRC=# DST=# LEN=# TO
"SPT": 43998,
"DPT": 3389,
"WINDOW": 3072,
"RES": "0x00 SYN",
"RES": "0x00",
"URGP": 0
}

@ -0,0 +1,15 @@
2022-05-17T08:56:54.107Z In(14) settingsd[1001392457]: debug [ConfigStore:66064b9700] File /usr/lib/vmware/configmanager/apply_modules/advanced_options/plugin.json does not support type:3
key 91:91 ^
path 91:163 ^----------------------------------------------------------------------^ /usr/lib/vmware/configmanager/apply_modules/advanced_options/plugin.json
val 91:163 ^----------------------------------------------------------------------^ /usr/lib/vmware/configmanager/apply_modules/advanced_options/plugin.json
pair 91:163 ^----------------------------------------------------------------------^ /usr/lib/vmware/configmanager/apply_modules/advanced_options/plugin.json
key 181:185 ^--^ type
num 186:187 ^ 3
val 186:187 ^ 3
pair 181:187 ^----^ type:3
msg :File /usr/lib/vmware/configmanager/apply_modules/advanced_options/plugin.json does not support type:3
format :File # does not support type:#
{
"col_0": "/usr/lib/vmware/configmanager/apply_modules/advanced_options/plugin.json",
"type": 3
}

@ -0,0 +1,32 @@
Nov 3 09:47:02 veridian sudo: timstack : TTY=pts/6 ; PWD=/auto/wstimstack/rpms/lbuild/test ; USER=root ; COMMAND=/usr/bin/tail /var/log/messages
key 31:31 ^
word 31:39 ^------^ timstack
val 31:39 ^------^ timstack
pair 31:39 ^------^ timstack
key 42:45 ^-^ TTY
sym 46:51 ^---^ pts/6
val 46:51 ^---^ pts/6
pair 42:51 ^-------^ TTY=pts/6
key 54:57 ^-^ PWD
path 58:91 ^-------------------------------^ /auto/wstimstack/rpms/lbuild/test
val 58:91 ^-------------------------------^ /auto/wstimstack/rpms/lbuild/test
pair 54:91 ^-----------------------------------^ PWD=/auto/wstimstack/rpms/lbuild/test
key 94:98 ^--^ USER
word 99:103 ^--^ root
val 99:103 ^--^ root
pair 94:103 ^-------^ USER=root
key 106:113 ^-----^ COMMAND
path 114:127 ^-----------^ /usr/bin/tail
wspc 127:128 ^
path 128:145 ^---------------^ /var/log/messages
val 114:145 ^-----------------------------^ /usr/bin/tail /var/log/messages
pair 106:145 ^-------------------------------------^ COMMAND=/usr/bin/tail /var/log/messages
msg :timstack : TTY=pts/6 ; PWD=/auto/wstimstack/rpms/lbuild/test ; USER=root ; COMMAND=/usr/bin/tail /var/log/messages
format :# : TTY=# ; PWD=# ; USER=# ; COMMAND=#
{
"col_0": "timstack",
"TTY": "pts/6",
"PWD": "/auto/wstimstack/rpms/lbuild/test",
"USER": "root",
"COMMAND": "/usr/bin/tail /var/log/messages"
}

@ -1,4 +1,4 @@
#! /usr/bin/env python
#! /usr/bin/env python3
# Copyright (c) 2013, Timothy Stack
#
@ -44,6 +44,7 @@ list_depth = {}
list_format = {}
breakpoints = set()
def completer(text, state):
options = [x for x in itertools.chain(name_to_addr,
element_lists,
@ -54,6 +55,7 @@ def completer(text, state):
except IndexError:
return None
readline.set_completer(completer)
if 'libedit' in readline.__doc__:
@ -67,27 +69,30 @@ for line in open("scanned.dpt"):
if line.startswith("input "):
input_line = line[6:-1]
else:
ops.append(map(string.strip, line.split()))
ops.append([x.strip() for x in line.split()])
def getstr(capture):
start, end = capture.split(':')
return input_line[int(start):int(end)]
def printlist(name_or_addr):
if name_or_addr in name_to_addr:
addr = name_to_addr[name_or_addr]
print "% 3d (%s:%s) %s" % (list_depth.get(addr, -1), name_or_addr, addr, element_lists[addr])
print("% 3d (%s:%s) %s" % (list_depth.get(addr, -1), name_or_addr, addr, element_lists[addr]))
elif name_or_addr in element_lists:
addr = name_or_addr
print "% 3d (%s:%s) %s" % (list_depth.get(name_or_addr, -1),
addr_to_name.get(name_or_addr, name_or_addr),
name_or_addr,
element_lists[name_or_addr])
print("% 3d (%s:%s) %s" % (list_depth.get(name_or_addr, -1),
addr_to_name.get(name_or_addr, name_or_addr),
name_or_addr,
element_lists[name_or_addr]))
else:
print "error: unknown list --", name_or_addr
print("error: unknown list --", name_or_addr)
if addr in list_format:
print " format -- appender(%s) term(%s) qual(%s) sep(%s) prefix_term(%s)" % tuple(list_format[addr])
print(" format -- appender(%s) term(%s) qual(%s) sep(%s) prefix_term(%s)" % tuple(list_format[addr]))
def handleop(fields):
addr = fields[0]
@ -122,7 +127,7 @@ def handleop(fields):
elif method_name == 'splice':
pos = int(method_args[0])
other = element_lists[method_args[1]]
start, from_end = map(int, method_args[2].split(':'))
start, from_end = list(map(int, method_args[2].split(':')))
end = len(other) - from_end
sub_list = other[start:end]
del other[start:end]
@ -134,7 +139,8 @@ def handleop(fields):
elif method_name == 'point':
breakpoints.add(method_args[0])
else:
print "Unhandled method: ", method_name
print("Unhandled method: ", method_name)
def playupto(length):
addr_to_name.clear()
@ -144,6 +150,7 @@ def playupto(length):
for index in range(length):
handleop(ops[index])
def find_prev_point(start, name):
orig_start = start
while start > 0:
@ -155,6 +162,7 @@ def find_prev_point(start, name):
return start + 1
return orig_start + 1
def find_next_point(start, name):
orig_start = start
while start < len(ops):
@ -166,13 +174,15 @@ def find_next_point(start, name):
return start + 1
return orig_start + 1
def printall():
print input_line
print(input_line)
sorted_lists = [(list_depth.get(addr, -1), addr) for addr in element_lists]
sorted_lists.sort()
for _depth, addr in sorted_lists:
printlist(addr)
index = len(ops)
last_cmd = ['']
watch_list = set()
@ -180,20 +190,20 @@ while True:
playupto(index)
if index == 0:
print "init"
print("init")
else:
op = ops[index - 1]
print "#%s %s" % (index -1, op)
print("#%s %s" % (index - 1, op))
if op[2] == 'push_back':
print getstr(op[4])
print(getstr(op[4]))
for list_name in watch_list:
printlist(list_name)
try:
cmd = raw_input("> ").split()
cmd = input("> ").split()
except EOFError:
print
print()
break
if not cmd or cmd[0] == '':
@ -202,16 +212,16 @@ while True:
if not cmd or cmd[0] == '':
pass
elif cmd[0] == 'h':
print 'Help:'
print ' q - quit'
print ' s - Start over'
print ' n - Next step'
print ' r - Previous step'
print ' b - Previous breakpoint'
print ' c - Next breakpoint'
print ' p - Print state'
print ' w <var> - Add a variable to the watch list'
print ' u <var> - Remove a variable from the watch list'
print('Help:')
print(' q - quit')
print(' s - Start over')
print(' n - Next step')
print(' r - Previous step')
print(' b - Previous breakpoint')
print(' c - Next breakpoint')
print(' p - Print state')
print(' w <var> - Add a variable to the watch list')
print(' u <var> - Remove a variable from the watch list')
elif cmd[0] == 'q':
break
elif cmd[0] == 's':
@ -242,7 +252,7 @@ while True:
if watch_list:
watch_list.remove(cmd[1])
else:
print "error: unknown command --", cmd
print("error: unknown command --", cmd)
printall()

Loading…
Cancel
Save