[data_scanner] tweaks to fix slow build times and a couple warnings

This commit is contained in:
Tim Stack 2022-08-20 21:01:15 -07:00
parent 7310f9dcec
commit 23ab1abd73
4 changed files with 28953 additions and 86698 deletions

View File

@ -1,6 +1,6 @@
# aminclude_static.am generated automatically by Autoconf # aminclude_static.am generated automatically by Autoconf
# from AX_AM_MACROS_STATIC on Thu Jul 28 22:07:38 PDT 2022 # from AX_AM_MACROS_STATIC on Sat Aug 20 18:43:07 PDT 2022
# Code coverage # Code coverage

View File

@ -86,7 +86,7 @@ time_fmts.cc: ptimec$(BUILD_EXEEXT)
if HAVE_RE2C if HAVE_RE2C
%.cc: %.re %.cc: %.re
$(RE2C_V)$(RE2C_CMD) --tags -8 -o $@ $< $(RE2C_V)$(RE2C_CMD) --bit-vectors -W --tags -8 -o $@ $<
$(REC2_V)test $@ -ef $(srcdir)/$*.cc || cp $@ $(srcdir)/$*.cc $(REC2_V)test $@ -ef $(srcdir)/$*.cc || cp $@ $(srcdir)/$*.cc
endif endif
@ -357,9 +357,6 @@ THIRD_PARTY_SRCS = \
libdatascanner_a_SOURCES = \ libdatascanner_a_SOURCES = \
data_scanner_re.cc data_scanner_re.cc
# XXX The data_scanner_re optimized build is taking 30+ minutes to run for
# some reason, so we need to override the flags
libdatascanner_a_CXXFLAGS = -O1 -g
libdiag_a_SOURCES = \ libdiag_a_SOURCES = \
$(THIRD_PARTY_SRCS) \ $(THIRD_PARTY_SRCS) \

File diff suppressed because it is too large Load Diff

View File

@ -47,6 +47,7 @@ bool data_scanner::tokenize2(pcre_context &pc, data_token_t &token_out)
cap[1].c_end = pi.pi_next_offset; \ cap[1].c_end = pi.pi_next_offset; \
token_out = tok; \ token_out = tok; \
} }
# define RET(tok) { \ # define RET(tok) { \
CAPTURE(tok); \ CAPTURE(tok); \
return true; \ return true; \
@ -136,7 +137,7 @@ bool data_scanner::tokenize2(pcre_context &pc, data_token_t &token_out)
EOF { return false; } EOF { return false; }
("u"|"r")?'"'('\\'.|[^\x00\"\\]|'""')*'"' { ("u"|"r")?'"'('\\'.|[^\x00"\\]|'""')*'"' {
CAPTURE(DT_QUOTED_STRING); CAPTURE(DT_QUOTED_STRING);
switch (pi.get_string()[cap[1].c_begin]) { switch (pi.get_string()[cap[1].c_begin]) {
case 'u': case 'u':
@ -151,7 +152,7 @@ bool data_scanner::tokenize2(pcre_context &pc, data_token_t &token_out)
[a-qstv-zA-QSTV-Z]"'" { [a-qstv-zA-QSTV-Z]"'" {
CAPTURE(DT_WORD); CAPTURE(DT_WORD);
} }
("u"|"r")?"'"('\\'.|"''"|[^\x00\'\\])*"'"/[^sS] { ("u"|"r")?"'"('\\'.|"''"|[^\x00'\\])*"'"/[^sS] {
CAPTURE(DT_QUOTED_STRING); CAPTURE(DT_QUOTED_STRING);
switch (pi.get_string()[cap[1].c_begin]) { switch (pi.get_string()[cap[1].c_begin]) {
case 'u': case 'u':
@ -163,7 +164,7 @@ bool data_scanner::tokenize2(pcre_context &pc, data_token_t &token_out)
cap[1].c_end -= 1; cap[1].c_end -= 1;
return true; return true;
} }
[a-zA-Z0-9]+":/""/"?[^\x00\r\n\t '"\[\](){}]+[/a-zA-Z0-9\-=&?%] { RET(DT_URL); } [a-zA-Z0-9]+":/""/"?[^\x00\r\n\t '"[\](){}]+[/a-zA-Z0-9\-=&?%] { RET(DT_URL); }
("/"|"./"|"../")[a-zA-Z0-9_\.\-\~/!@#$%^&*()]* { RET(DT_PATH); } ("/"|"./"|"../")[a-zA-Z0-9_\.\-\~/!@#$%^&*()]* { RET(DT_PATH); }
(SPACE|NUM)NUM":"NUM{2}/[^:] { RET(DT_TIME); } (SPACE|NUM)NUM":"NUM{2}/[^:] { RET(DT_TIME); }
(SPACE|NUM)NUM?":"NUM{2}":"NUM{2}("."NUM{3,6})?/[^:] { RET(DT_TIME); } (SPACE|NUM)NUM?":"NUM{2}":"NUM{2}("."NUM{3,6})?/[^:] { RET(DT_TIME); }
@ -236,7 +237,7 @@ bool data_scanner::tokenize2(pcre_context &pc, data_token_t &token_out)
("re-")?[a-zA-Z][a-z']+/([\r\n\t \(\)!\*:;'\"\?,]|[\.\!,\?]SPACE|EOF) { RET(DT_WORD); } ("re-")?[a-zA-Z][a-z']+/([\r\n\t \(\)!\*:;'\"\?,]|[\.\!,\?]SPACE|EOF) { RET(DT_WORD); }
[^\x00"; \t\r\n:=,\(\)\{\}\[\]\+#!%\^&\*'\?<>\~`\|\\]+("::"[^\x00"; \r\n\t:=,\(\)\{\}\[\]\+#!%\^&\*'\?<>\~`\|\\]+)* { [^\x00"; \t\r\n:=,\(\)\{\}\[\]\+#!%\^&\*'\?<>\~`\|\.\\][^\x00"; \t\r\n:=,\(\)\{\}\[\]\+#!%\^&\*'\?<>\~`\|\\]*("::"[^\x00"; \r\n\t:=,\(\)\{\}\[\]\+#!%\^&\*'\?<>\~`\|\\]+)* {
RET(DT_SYMBOL); RET(DT_SYMBOL);
} }