Some more sequence matching stuff.

pull/37/merge
Tim Stack 13 years ago
parent e3d2d41aba
commit 44e8abc593

@ -47,6 +47,7 @@ noinst_HEADERS = \
auto_temp_file.hh \
bookmarks.hh \
bottom_status_source.hh \
byte_array.hh \
db_sub_source.hh \
grep_proc.hh \
help.hh \
@ -61,6 +62,8 @@ noinst_HEADERS = \
pcrepp.hh \
piper_proc.hh \
readline_curses.hh \
sequence_matcher.hh \
sequence_sink.hh \
statusview_curses.hh \
strong_int.hh \
termios_guard.hh \
@ -83,6 +86,7 @@ libdiag_a_SOURCES = \
logfile.cc \
logfile_sub_source.cc \
readline_curses.cc \
sequence_matcher.cc \
statusview_curses.cc \
piper_proc.cc \
textview_curses.cc \

@ -55,10 +55,10 @@ am_libdiag_a_OBJECTS = bookmarks.$(OBJEXT) grep_proc.$(OBJEXT) \
hist_source.$(OBJEXT) line_buffer.$(OBJEXT) \
listview_curses.$(OBJEXT) log_format.$(OBJEXT) \
logfile.$(OBJEXT) logfile_sub_source.$(OBJEXT) \
readline_curses.$(OBJEXT) statusview_curses.$(OBJEXT) \
piper_proc.$(OBJEXT) textview_curses.$(OBJEXT) \
view_curses.$(OBJEXT) vt52_curses.$(OBJEXT) \
log_vtab_impl.$(OBJEXT)
readline_curses.$(OBJEXT) sequence_matcher.$(OBJEXT) \
statusview_curses.$(OBJEXT) piper_proc.$(OBJEXT) \
textview_curses.$(OBJEXT) view_curses.$(OBJEXT) \
vt52_curses.$(OBJEXT) log_vtab_impl.$(OBJEXT)
libdiag_a_OBJECTS = $(am_libdiag_a_OBJECTS)
am__installdirs = "$(DESTDIR)$(bindir)"
binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
@ -229,6 +229,7 @@ noinst_HEADERS = \
auto_temp_file.hh \
bookmarks.hh \
bottom_status_source.hh \
byte_array.hh \
db_sub_source.hh \
grep_proc.hh \
help.hh \
@ -243,6 +244,8 @@ noinst_HEADERS = \
pcrepp.hh \
piper_proc.hh \
readline_curses.hh \
sequence_matcher.hh \
sequence_sink.hh \
statusview_curses.hh \
strong_int.hh \
termios_guard.hh \
@ -265,6 +268,7 @@ libdiag_a_SOURCES = \
logfile.cc \
logfile_sub_source.cc \
readline_curses.cc \
sequence_matcher.cc \
statusview_curses.cc \
piper_proc.cc \
textview_curses.cc \
@ -380,6 +384,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/logfile_sub_source.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/piper_proc.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readline_curses.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sequence_matcher.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/statusview_curses.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/textview_curses.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/view_curses.Po@am__quote@

@ -0,0 +1,23 @@
#ifndef __byte_array_hh
#define __byte_array_hh
#include <string.h>
#include <sys/types.h>
template<size_t BYTE_COUNT>
struct byte_array {
byte_array() { };
byte_array(const byte_array &other) {
memcpy(this->ba_data, other.ba_data, BYTE_COUNT);
};
bool operator<(const byte_array &other) const {
return memcmp(this->ba_data, other.ba_data, BYTE_COUNT) < 0;
};
unsigned char ba_data[BYTE_COUNT];
};
#endif

@ -25,6 +25,7 @@ grep_proc::grep_proc(pcre *code,
int &maxfd,
fd_set &readfds)
: gp_pcre(code),
gp_code(code),
gp_source(gps),
gp_pipe_offset(0),
gp_child(-1),
@ -167,25 +168,23 @@ void grep_proc::start(void)
m = pc.all();
fprintf(stdout, "[%d:%d]\n", m->c_begin, m->c_end);
for (pc_iter = pc.begin(); pc_iter != pc.end(); pc_iter++) {
if (pc_iter->c_begin < 0) {
/* If the capture was conditional, pcre will
* return a -1 here.
*/
continue;
}
fprintf(stdout,
"(%d:%d)",
pc_iter->c_begin,
pc_iter->c_end);
fwrite(pi.get_substr_start(pc_iter),
1,
pc_iter->length(),
stdout);
/* If the capture was conditional, pcre will return a -1
* here.
*/
if (pc_iter->c_begin >= 0) {
fwrite(pi.get_substr_start(pc_iter),
1,
pc_iter->length(),
stdout);
}
fputc('\n', stdout);
}
fprintf(stdout, "/\n");
}
}
if (((line + 1) % 10000) == 0) {
@ -259,16 +258,22 @@ void grep_proc::dispatch_line(char *line)
}
}
else if (sscanf(line, "(%d:%d)%n", &start, &end, &capture_start) == 2) {
assert(start >= 0);
assert(start == -1 || start >= 0);
assert(end >= 0);
/* Pass the match offsets to the sink delegate. */
/* Pass the captured strings to the sink delegate. */
if (this->gp_sink != NULL) {
this->gp_sink->grep_capture(*this,
this->gp_last_line,
start,
end,
&line[capture_start]);
start < 0 ?
NULL : &line[capture_start]);
}
}
else if (line[0] == '/') {
if (this->gp_sink != NULL) {
this->gp_sink->grep_match_end(*this, this->gp_last_line);
}
}
else {

@ -102,6 +102,8 @@ public:
int start,
int end,
char *capture) { };
virtual void grep_match_end(grep_proc &gp, grep_line_t line) { };
};
/**
@ -126,10 +128,8 @@ public:
};
/**
* Construct a grep_proc object. This involves compiling the regular
* expression and then forking off the child process. Note that both the
* parent and child return from this call and you must call the start()
* method immediately afterward to get things going.
* Construct a grep_proc object. You must call the start() method
* to fork off the child process and begin processing.
*
* @param code The pcre code to run over the lines of input.
* @param gps The source of the data to match.

@ -195,7 +195,8 @@ public:
this->gr_highlighter->get_role(this->gr_next_field);
}
}
sscanf(capture, "%f", &amount);
if (capture != 0)
sscanf(capture, "%f", &amount);
this->add_value(this->gr_x, this->gr_next_field, amount);
++ this->gr_next_field;

@ -0,0 +1,52 @@
#include "config.h"
#include <openssl/sha.h>
#include "sequence_matcher.hh"
using namespace std;
sequence_matcher::sequence_matcher(field_col_t &example)
{
for (field_col_t::iterator col_iter = example.begin();
col_iter != example.end();
++col_iter) {
std::string first_value;
field sf;
sf.sf_value = *col_iter;
for (field_row_t::iterator row_iter = (*col_iter).begin();
row_iter != (*col_iter).end();
++row_iter) {
if (row_iter == (*col_iter).begin()) {
first_value = *row_iter;
}
else if (first_value != *row_iter) {
sf.sf_type = FT_CONSTANT;
}
}
if (sf.sf_type == FT_VARIABLE)
sf.sf_value.clear();
this->sm_fields.push_back(sf);
}
this->sm_count = example.front().size();
}
void sequence_matcher::identity(const std::vector<string> &values, id_t &id_out)
{
SHA_CTX context;
int lpc = 0;
SHA_Init(&context);
for (std::list<field>::iterator iter = sm_fields.begin();
iter != sm_fields.end();
++iter, lpc++) {
if (iter->sf_type == FT_VARIABLE) {
SHA_Update(&context,
values[lpc].c_str(),
values[lpc].length() + 1);
}
}
SHA_Final(id_out.ba_data, &context);
}

@ -0,0 +1,76 @@
#ifndef __sequence_matcher_hh
#define __sequence_matcher_hh
#include <list>
#include <string>
#include <vector>
#include "byte_array.hh"
class sequence_matcher {
public:
typedef std::vector<std::string> field_row_t;
typedef std::list<field_row_t> field_col_t;
typedef byte_array<20> id_t;
enum field_type_t {
FT_VARIABLE,
FT_CONSTANT,
};
struct field {
public:
field() : sf_type(FT_VARIABLE) { };
field_type_t sf_type;
field_row_t sf_value;
};
sequence_matcher(field_col_t &example);
void identity(const std::vector<std::string> &values, id_t &id_out);
template<typename T>
bool match(const std::vector<std::string> &values,
std::vector<T> &state,
T index) {
bool index_match = true;
int lpc = 0;
retry:
for (std::list<field>::iterator iter = this->sm_fields.begin();
iter != this->sm_fields.end();
++iter, lpc++) {
if (iter->sf_type != sequence_matcher::FT_CONSTANT) {
continue;
}
if (iter->sf_value[state.size()] != values[lpc]) {
if (state.size() > 0) {
state.clear();
lpc = 0;
goto retry;
}
else {
index_match = false;
break;
}
}
}
if (index_match) {
state.push_back(index);
}
return (this->sm_count == state.size());
};
private:
int sm_count;
std::list<field> sm_fields;
};
#endif

@ -0,0 +1,63 @@
#ifndef __sequence_sink_hh
#define __sequence_sink_hh
#include <map>
#include "bookmarks.hh"
#include "grep_proc.hh"
#include "sequence_matcher.hh"
class sequence_sink : public grep_proc_sink {
public:
sequence_sink(sequence_matcher &sm, bookmark_vector &bv) :
ss_matcher(sm),
ss_bookmarks(bv) {
};
void grep_match(grep_proc &gp,
grep_line_t line,
int start,
int end) {
this->ss_line_values.clear();
};
void grep_capture(grep_proc &gp,
grep_line_t line,
int start,
int end,
char *capture) {
if (start == -1)
this->ss_line_values.push_back("");
else
this->ss_line_values.push_back(std::string(capture));
};
void grep_match_end(grep_proc &gp, grep_line_t line) {
sequence_matcher::id_t line_id;
this->ss_matcher.identity(this->ss_line_values, line_id);
std::vector<grep_line_t> &line_state = this->ss_state[line_id];
if (this->ss_matcher.match(this->ss_line_values,
line_state,
line)) {
std::vector<grep_line_t>::iterator iter;
for (iter = line_state.begin();
iter != line_state.end();
++iter) {
this->ss_bookmarks.insert_once(vis_line_t(*iter));
}
line_state.clear();
}
};
private:
sequence_matcher &ss_matcher;
bookmark_vector &ss_bookmarks;
std::vector<std::string> ss_line_values;
std::map< sequence_matcher::id_t, std::vector<grep_line_t> > ss_state;
};
#endif

@ -90,7 +90,9 @@ drive_sequencer_SOURCES = \
../src/logfile.cc \
../src/log_format.cc \
../src/line_buffer.cc \
../src/sequence_matcher.cc \
drive_sequencer.cc
drive_sequencer_LDADD = -lcrypto
drive_vt52_curses_SOURCES = \
../src/vt52_curses.cc \

@ -83,9 +83,8 @@ drive_readline_curses_DEPENDENCIES = $(am__DEPENDENCIES_1) \
$(am__DEPENDENCIES_1)
am_drive_sequencer_OBJECTS = grep_proc.$(OBJEXT) logfile.$(OBJEXT) \
log_format.$(OBJEXT) line_buffer.$(OBJEXT) \
drive_sequencer.$(OBJEXT)
sequence_matcher.$(OBJEXT) drive_sequencer.$(OBJEXT)
drive_sequencer_OBJECTS = $(am_drive_sequencer_OBJECTS)
drive_sequencer_LDADD = $(LDADD)
drive_sequencer_DEPENDENCIES =
am_drive_vt52_curses_OBJECTS = vt52_curses.$(OBJEXT) \
drive_vt52_curses.$(OBJEXT)
@ -338,8 +337,10 @@ drive_sequencer_SOURCES = \
../src/logfile.cc \
../src/log_format.cc \
../src/line_buffer.cc \
../src/sequence_matcher.cc \
drive_sequencer.cc
drive_sequencer_LDADD = -lcrypto
drive_vt52_curses_SOURCES = \
../src/vt52_curses.cc \
drive_vt52_curses.cc
@ -495,6 +496,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/logfile_sub_source.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readline_curses.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scripty.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sequence_matcher.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slicer.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_auto_fd.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_auto_mem.Po@am__quote@
@ -633,6 +635,20 @@ readline_curses.obj: ../src/readline_curses.cc
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o readline_curses.obj `if test -f '../src/readline_curses.cc'; then $(CYGPATH_W) '../src/readline_curses.cc'; else $(CYGPATH_W) '$(srcdir)/../src/readline_curses.cc'; fi`
sequence_matcher.o: ../src/sequence_matcher.cc
@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT sequence_matcher.o -MD -MP -MF $(DEPDIR)/sequence_matcher.Tpo -c -o sequence_matcher.o `test -f '../src/sequence_matcher.cc' || echo '$(srcdir)/'`../src/sequence_matcher.cc
@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/sequence_matcher.Tpo $(DEPDIR)/sequence_matcher.Po
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='../src/sequence_matcher.cc' object='sequence_matcher.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o sequence_matcher.o `test -f '../src/sequence_matcher.cc' || echo '$(srcdir)/'`../src/sequence_matcher.cc
sequence_matcher.obj: ../src/sequence_matcher.cc
@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT sequence_matcher.obj -MD -MP -MF $(DEPDIR)/sequence_matcher.Tpo -c -o sequence_matcher.obj `if test -f '../src/sequence_matcher.cc'; then $(CYGPATH_W) '../src/sequence_matcher.cc'; else $(CYGPATH_W) '$(srcdir)/../src/sequence_matcher.cc'; fi`
@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/sequence_matcher.Tpo $(DEPDIR)/sequence_matcher.Po
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='../src/sequence_matcher.cc' object='sequence_matcher.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o sequence_matcher.obj `if test -f '../src/sequence_matcher.cc'; then $(CYGPATH_W) '../src/sequence_matcher.cc'; else $(CYGPATH_W) '$(srcdir)/../src/sequence_matcher.cc'; fi`
bookmarks.o: ../src/bookmarks.cc
@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT bookmarks.o -MD -MP -MF $(DEPDIR)/bookmarks.Tpo -c -o bookmarks.o `test -f '../src/bookmarks.cc' || echo '$(srcdir)/'`../src/bookmarks.cc
@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/bookmarks.Tpo $(DEPDIR)/bookmarks.Po

@ -6,6 +6,7 @@
#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
@ -18,123 +19,118 @@
#include "pcrepp.hh"
#include "logfile.hh"
#include "sequence_sink.hh"
#include "sequence_matcher.hh"
using namespace std;
string in[] = {
"eth0 up",
"eth0 down",
""
};
class sequence_source {
class my_source : public grep_proc_source {
public:
virtual ~sequence_source() { };
virtual bool sequence_value_for_field(int line,
int col,
std::string &value_out) = 0;
};
my_source(auto_fd &fd) : ms_offset(0) {
this->ms_buffer.set_fd(fd);
};
template<size_t BYTE_COUNT>
class byte_array {
public:
bool grep_value_for_line(int line_number, string &value_out) {
bool retval = false;
bool operator<(const byte_array &other) const {
return memcmp(this->ba_data, other.ba_data, BYTE_COUNT) < 0;
try {
size_t len;
char *line;
if ((line = this->ms_buffer.read_line(this->ms_offset,
len)) != NULL) {
value_out = string(line, len);
retval = true;
}
}
catch (line_buffer::error &e) {
fprintf(stderr,
"error: source buffer error %d %s\n",
this->ms_buffer.get_fd(),
strerror(e.e_err));
}
return retval;
};
unsigned char ba_data[BYTE_COUNT];
};
class sequence_matcher {
public:
typedef std::vector<string> field_row_t;
typedef std::list<field_row_t> field_col_t;
private:
line_buffer ms_buffer;
off_t ms_offset;
enum field_type_t {
FT_VARIABLE,
FT_CONSTANT,
};
class state<T> {
public:
std::vector<T> sms_line;
};
byte_array<20> sms_id;
};
class field {
int main(int argc, char *argv[])
{
int c, retval = EXIT_SUCCESS;
const char *errptr;
auto_fd fd;
pcre *code;
int eoff;
if (argc < 3) {
fprintf(stderr, "error: expecting pattern and file arguments\n");
retval = EXIT_FAILURE;
}
else if ((fd = open(argv[2], O_RDONLY)) == -1) {
perror("open");
retval = EXIT_FAILURE;
}
else if ((code = pcre_compile(argv[1],
PCRE_CASELESS,
&errptr,
&eoff,
NULL)) == NULL) {
fprintf(stderr, "error: invalid pattern -- %s\n", errptr);
}
else {
my_source ms(fd);
fd_set read_fds;
int maxfd;
sequence_matcher::field_col_t fc;
fc.resize(2);
sequence_matcher::field_row_t &frf = fc.front();
frf.resize(2);
frf[0] = "eth0";
frf[1] = "eth0";
public:
field() : sf_type(FT_VARIABLE) { };
sequence_matcher::field_row_t &frb = fc.back();
frb.resize(2);
frb[0] = "up";
frb[1] = "down";
field_type_t sf_type;
field_col_t sf_value;
};
static bookmark_type_t SEQUENCE;
sequence_matcher sm(fc);
bookmarks bm;
sequence_sink ss(sm, bm[&SEQUENCE]);
FD_ZERO(&read_fds);
grep_proc gp(code, ms, maxfd, read_fds);
gp.queue_request();
gp.start();
gp.set_sink(&ss);
sequence_matcher(field_col_t example) {
for (field_col_t::iterator col_iter = example.begin();
col_iter != example.end();
++col_iter) {
std::string first_value;
while (bm[&SEQUENCE].size() == 0) {
fd_set rfds = read_fds;
for (field_row_t::iterator row_iter = (*col_iter).begin();
row_iter != (*col_iter).end();
++row_iter) {
if (row_iter == (*col_iter).begin()) {
first_value = *row_iter;
}
else if (first_value != *row_iter) {
}
}
select(maxfd + 1, &rfds, NULL, NULL, NULL);
gp.check_fd_set(rfds);
}
};
for (bookmark_vector::iterator iter = bm[&SEQUENCE].begin();
iter != bm[&SEQUENCE].end();
++iter) {
printf("%d\n", (const int)*iter);
}
}
};
int main(int argc, char *argv[])
{
int c, retval = EXIT_SUCCESS;
pcre_context_static<20> *captures = new pcre_context_static<20>[2];
long cols = 0;
sequence::field *fields = new sequence::field[2];
pcrepp re("(\\w+) (up|down)");
for (int lpc = 0; in[lpc] != ""; lpc++) {
pcre_input pi(in[lpc]);
bool rc;
rc = re.match(captures[lpc], pi);
cols = max(cols, captures[lpc].end() - captures[lpc].begin());
assert(rc);
}
for (int curr_col = 0; curr_col < cols; curr_col++) {
string first_row;
for (int curr_row = 0; curr_row < 2; curr_row++) {
pcre_input pi(in[curr_row]);
string curr = pi.get_substr(captures[curr_col].begin() + curr_col);
if (curr_row == 0) {
first_row = curr;
}
else if (first_row != curr) {
fields[curr_col].sf_type = sequence::FT_CONSTANT;
}
}
}
for (int lpc = 0; lpc < cols; lpc++) {
printf("field[%d] = %d\n", lpc, fields[lpc].sf_type);
}
return retval;
return retval;
}

@ -0,0 +1,3 @@
Nov 3 09:23:38 veridian foo[7998]: eth0 is up
Nov 3 09:23:38 veridian foo[16442]: eth1 is up
Nov 3 09:23:38 veridian foo[7999]: eth0 is down
Loading…
Cancel
Save