From f1087184d60832539f9e6f1cea561a7e3d6c7719 Mon Sep 17 00:00:00 2001 From: Timothy Stack Date: Tue, 25 Jun 2013 18:45:07 -0700 Subject: [PATCH] [log format] lay some foundation for making log formats configurable --- src/log_format.cc | 119 +++++++++++++++----------- src/log_format.hh | 185 ++++++++++++++++++++++++++++++++++++---- src/log_format_impls.cc | 2 +- src/pcrepp.hh | 4 + 4 files changed, 243 insertions(+), 67 deletions(-) diff --git a/src/log_format.cc b/src/log_format.cc index 57a9c15e..36af0e09 100644 --- a/src/log_format.cc +++ b/src/log_format.cc @@ -176,34 +176,77 @@ static bool next_format(const char *fmt[], int &index, int &locked_index) return retval; } -char *log_format::log_scanf(const char *line, - const char *fmt[], - int expected_matches, - const char *time_fmt[], - char *time_dest, - struct tm *tm_out, - time_t &time_out, - ...) +static const char *std_time_fmt[] = { + "%Y-%m-%d %H:%M:%S", + "%Y-%m-%d %H:%M", + "%Y-%m-%dT%H:%M:%S", + "%Y-%m-%dT%H:%M:%SZ", + "%Y/%m/%d %H:%M:%S", + "%Y/%m/%d %H:%M", + + "%a %b %d %H:%M:%S %Y", + "%a %b %d %H:%M:%S %Z %Y", + + "%d/%b/%Y:%H:%M:%S %z", + + "%b %d %H:%M:%S", + + NULL, +}; + +const char *log_format::time_scanf(const char *time_dest, + const char *time_fmt[], + struct tm *tm_out, + time_t &time_out) { - static const char *std_time_fmt[] = { - "%Y-%m-%d %H:%M:%S", - "%Y-%m-%d %H:%M", - "%Y-%m-%dT%H:%M:%S", - "%Y-%m-%dT%H:%M:%SZ", - "%Y/%m/%d %H:%M:%S", - "%Y/%m/%d %H:%M", + int curr_time_fmt = -1; + bool found = false; + const char *retval = NULL; + + if (!time_fmt) { + time_fmt = std_time_fmt; + } + + while (next_format(time_fmt, + curr_time_fmt, + this->lf_time_fmt_lock)) { + memset(tm_out, 0, sizeof(struct tm)); + if ((retval = strptime(time_dest, + time_fmt[curr_time_fmt], + tm_out)) != NULL) { + if (tm_out->tm_year < 70) { + /* XXX We should pull the time from the file mtime (?) */ + tm_out->tm_year = 80; + } + time_out = tm2sec(tm_out); - "%a %b %d %H:%M:%S %Y", + // this->lf_fmt_lock = curr_fmt; + this->lf_time_fmt_lock = curr_time_fmt; + this->lf_time_fmt_len = retval - time_dest; - "%d/%b/%Y:%H:%M:%S %z", + found = true; + break; + } + } - "%b %d %H:%M:%S", + if (!found) { + retval = NULL; + } - NULL, - }; + return retval; +} +const char *log_format::log_scanf(const char *line, + const char *fmt[], + int expected_matches, + const char *time_fmt[], + char *time_dest, + struct tm *tm_out, + time_t &time_out, + ...) +{ int curr_fmt = -1; - char * retval = NULL; + const char * retval = NULL; va_list args; while (next_format(fmt, curr_fmt, this->lf_fmt_lock)) { @@ -222,37 +265,11 @@ char *log_format::log_scanf(const char *line, retval = NULL; } else { - int curr_time_fmt = -1; - bool found = false; - - if (!time_fmt) { - time_fmt = std_time_fmt; - } - - while (next_format(time_fmt, - curr_time_fmt, - this->lf_time_fmt_lock)) { - memset(tm_out, 0, sizeof(struct tm)); - if ((retval = strptime(time_dest, - time_fmt[curr_time_fmt], - tm_out)) != NULL) { - if (tm_out->tm_year < 70) { - /* XXX We should pull the time from the file mtime (?) */ - tm_out->tm_year = 80; - } - time_out = tm2sec(tm_out); - - this->lf_fmt_lock = curr_fmt; - this->lf_time_fmt_lock = curr_time_fmt; - this->lf_time_fmt_len = retval - time_dest; - - found = true; - break; - } - } + retval = this->time_scanf(time_dest, time_fmt, tm_out, time_out); - if (!found) { - retval = NULL; + if (retval) { + this->lf_fmt_lock = curr_fmt; + break; } } diff --git a/src/log_format.hh b/src/log_format.hh index ab21058d..b561c519 100644 --- a/src/log_format.hh +++ b/src/log_format.hh @@ -43,9 +43,18 @@ #include #include +#include "pcrepp.hh" #include "byte_array.hh" #include "view_curses.hh" +/** + * Convert the time stored in a 'tm' struct into epoch time. + * + * @param t The 'tm' structure to convert to epoch time. + * @return The given time in seconds since the epoch. + */ +time_t tm2sec(const struct tm *t); + class logfile_filter { public: typedef enum { @@ -376,25 +385,171 @@ public: protected: static std::vector lf_root_formats; - char *log_scanf(const char *line, - const char *fmt[], - int expected_matches, - const char *time_fmt[], - char *time_dest, - struct tm *tm_out, - time_t &time_out, - ...); + const char *log_scanf(const char *line, + const char *fmt[], + int expected_matches, + const char *time_fmt[], + char *time_dest, + struct tm *tm_out, + time_t &time_out, + ...); + + const char *time_scanf(const char *time_dest, + const char *time_fmt[], + struct tm *tm_out, + time_t &time_out); int lf_fmt_lock; int lf_time_fmt_lock; int lf_time_fmt_len; }; -/** - * Convert the time stored in a 'tm' struct into epoch time. - * - * @param t The 'tm' structure to convert to epoch time. - * @return The given time in seconds since the epoch. - */ -time_t tm2sec(const struct tm *t); +class external_log_format : public log_format { + +public: + struct sample { + std::string s_line; + logline::level_t s_level; + }; + + struct value_def { + std::string vd_name; + logline_value::kind_t vd_kind; + }; + + struct level_pattern { + std::string lp_regex; + pcrepp *lp_pcre; + }; + + external_log_format(const std::string &name) : elf_name(name) { }; + + std::string get_name(void) { + return this->elf_name; + }; + + bool scan(std::vector &dst, + off_t offset, + char *prefix, + int len) { + pcre_input pi(prefix, 0, len); + pcre_context_static<30> pc; + bool retval = false; + + if (this->elf_pcre->match(pc, pi)) { + pcre_context::capture_t *ts = pc["timestamp"]; + pcre_context::capture_t *level_cap = pc[this->elf_level_field]; + const char *ts_str = pi.get_substr_start(ts); + const char *last; + time_t line_time; + struct tm log_time; + uint16_t millis = 0; + logline::level_t level = logline::LEVEL_INFO; + + if ((last = this->time_scanf(ts_str, + NULL, + &log_time, + line_time)) == NULL) { + return false; + } + + /* Try to pull out the milliseconds value. */ + if (last[0] == ',' || last[0] == '.') { + int subsec_len = 0; + + sscanf(last + 1, "%hd%n", &millis, &subsec_len); + if (millis >= 1000) { + millis = 0; + } + } + + if (level_cap != NULL && level_cap->c_begin != -1) { + pcre_context_static<30> pc_level; + pcre_input pi_level(pi.get_substr_start(level_cap), + 0, + level_cap->length()); + + for (std::map::iterator iter = this->elf_level_patterns.begin(); + iter != this->elf_level_patterns.end(); + ++iter) { + if (iter->second.lp_pcre->match(pc_level, pi_level)) { + level = iter->first; + break; + } + } + } + + dst.push_back(logline(offset, + line_time, + millis, + level)); + + retval = true; + } + + return retval; + }; + + void annotate(const std::string &line, + string_attrs_t &sa, + std::vector &values) const + { + pcre_context_static<30> pc; + pcre_input pi(line); + struct line_range lr; + pcre_context::capture_t *cap; + + if (!this->elf_pcre->match(pc, pi)) + return; + + cap = pc["timestamp"]; + lr.lr_start = cap->c_begin; + lr.lr_end = cap->c_end; + sa[lr].insert(make_string_attr("timestamp", 0)); + + cap = pc["body"]; + lr.lr_start = cap->c_begin; + lr.lr_end = cap->c_end; + sa[lr].insert(make_string_attr("body", 0)); + + for (std::vector::const_iterator iter = + this->elf_value_defs.begin(); + iter != this->elf_value_defs.end(); + ++iter) { + cap = pc[iter->vd_name]; + + values.push_back(logline_value(iter->vd_name, + iter->vd_kind, + pi.get_substr(cap))); + } + } + + void build(void) { + this->elf_pcre = new pcrepp(this->elf_regex.c_str()); + for (std::map::iterator iter = this->elf_level_patterns.begin(); + iter != this->elf_level_patterns.end(); + ++iter) { + iter->second.lp_pcre = new pcrepp(iter->second.lp_regex.c_str()); + } + }; + + std::auto_ptr specialized() { + std::auto_ptr retval((log_format *) + new external_log_format(*this)); + + return retval; + }; + + std::string elf_regex; + pcrepp *elf_pcre; + std::vector elf_samples; + std::vector elf_value_defs; + std::string elf_level_field; + std::map elf_level_patterns; + +private: + const std::string elf_name; + +}; + #endif diff --git a/src/log_format_impls.cc b/src/log_format_impls.cc index e706f720..ca6b0cc9 100644 --- a/src/log_format_impls.cc +++ b/src/log_format_impls.cc @@ -526,7 +526,7 @@ class generic_log_format : public log_format { char timestr[64 + 32]; time_t line_time; char level[64]; - char * last_pos; + const char *last_pos; int prefix_len; if ((last_pos = this->log_scanf(prefix, diff --git a/src/pcrepp.hh b/src/pcrepp.hh index f7f81af5..c7706317 100644 --- a/src/pcrepp.hh +++ b/src/pcrepp.hh @@ -115,6 +115,10 @@ public: capture_t *operator[](const char *name) const; + capture_t *operator[](const std::string &name) const { + return (*this)[name.c_str()]; + }; + protected: pcre_context(capture_t *captures, int max_count) : pc_captures(captures), pc_max_count(max_count), pc_count(0) { };