2012-11-28 16:39:39 +00:00
|
|
|
/**
|
2013-05-03 06:02:03 +00:00
|
|
|
* Copyright (c) 2007-2012, Timothy Stack
|
|
|
|
*
|
|
|
|
* All rights reserved.
|
2013-05-28 04:35:00 +00:00
|
|
|
*
|
2013-05-03 06:02:03 +00:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
2013-05-28 04:35:00 +00:00
|
|
|
*
|
2013-05-03 06:02:03 +00:00
|
|
|
* * Redistributions of source code must retain the above copyright notice, this
|
|
|
|
* list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
|
|
* and/or other materials provided with the distribution.
|
|
|
|
* * Neither the name of Timothy Stack nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
2013-05-28 04:35:00 +00:00
|
|
|
*
|
2013-05-03 06:02:03 +00:00
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
|
|
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
|
|
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
|
|
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
|
|
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*
|
2012-11-28 16:39:39 +00:00
|
|
|
* @file log_format_impls.cc
|
|
|
|
*/
|
2009-09-14 01:07:32 +00:00
|
|
|
|
2010-01-29 23:17:08 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
|
2014-02-01 14:41:11 +00:00
|
|
|
#include "pcrepp.hh"
|
2009-09-14 01:07:32 +00:00
|
|
|
#include "log_format.hh"
|
|
|
|
#include "log_vtab_impl.hh"
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
2012-10-29 23:38:58 +00:00
|
|
|
static pcrepp RDNS_PATTERN("^(?:com|net|org|edu|[a-z][a-z])"
|
|
|
|
"(\\.\\w+)+(.+)");
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Attempt to scrub a reverse-DNS string.
|
2013-05-28 04:35:00 +00:00
|
|
|
*
|
2012-10-29 23:38:58 +00:00
|
|
|
* @param str The string to scrub. If the string looks like a reverse-DNS
|
|
|
|
* string, the leading components of the name will be reduced to a single
|
|
|
|
* letter. For example, "com.example.foo" will be reduced to "c.e.foo".
|
|
|
|
* @return The scrubbed version of the input string or the original string
|
|
|
|
* if it is not a reverse-DNS string.
|
|
|
|
*/
|
|
|
|
static string scrub_rdns(const string &str)
|
|
|
|
{
|
2013-05-28 04:35:00 +00:00
|
|
|
pcre_context_static<30> context;
|
|
|
|
pcre_input input(str);
|
|
|
|
string retval;
|
|
|
|
|
|
|
|
if (RDNS_PATTERN.match(context, input)) {
|
|
|
|
pcre_context::capture_t *cap;
|
|
|
|
|
|
|
|
cap = context.begin();
|
|
|
|
for (int index = 0; index < cap->c_begin; index++) {
|
|
|
|
if (index == 0 || str[index - 1] == '.') {
|
|
|
|
if (index > 0) {
|
|
|
|
retval.append(1, '.');
|
2012-10-29 23:38:58 +00:00
|
|
|
}
|
2013-05-28 04:35:00 +00:00
|
|
|
retval.append(1, str[index]);
|
|
|
|
}
|
2012-10-29 23:38:58 +00:00
|
|
|
}
|
2013-05-28 04:35:00 +00:00
|
|
|
retval += input.get_substr(cap);
|
|
|
|
retval += input.get_substr(cap + 1);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
retval = str;
|
|
|
|
}
|
|
|
|
return retval;
|
2012-10-29 23:38:58 +00:00
|
|
|
}
|
|
|
|
|
2009-09-14 01:07:32 +00:00
|
|
|
class generic_log_format : public log_format {
|
2013-05-28 04:35:00 +00:00
|
|
|
static pcrepp &scrub_pattern(void)
|
|
|
|
{
|
|
|
|
static pcrepp SCRUB_PATTERN(
|
|
|
|
"\\d+-(\\d+-\\d+ \\d+:\\d+:\\d+(?:,\\d+)?:)\\w+:(.*)");
|
2012-09-22 23:15:15 +00:00
|
|
|
|
|
|
|
return SCRUB_PATTERN;
|
|
|
|
}
|
|
|
|
|
2014-02-01 14:41:11 +00:00
|
|
|
static pcre_format *get_pcre_log_formats() {
|
|
|
|
static pcre_format log_fmt[] = {
|
2015-07-11 04:39:03 +00:00
|
|
|
pcre_format("^(?<timestamp>[\\dTZ: +/\\-,\\.-]+)([^:]+)"),
|
|
|
|
pcre_format("^(?<timestamp>[\\w:+/\\.-]+) \\[\\w (.*)"),
|
|
|
|
pcre_format("^(?<timestamp>[\\w:,/\\.-]+) (.*)"),
|
|
|
|
pcre_format("^(?<timestamp>[\\w: \\.,/-]+)\\[[^\\]]+\\](.*)"),
|
|
|
|
pcre_format("^(?<timestamp>[\\w: \\.,/-]+) (.*)"),
|
|
|
|
|
2015-12-11 04:36:25 +00:00
|
|
|
pcre_format("^\\[(?<timestamp>[\\w: \\.,+/-]+)\\]\\s*(\\w+):?"),
|
|
|
|
pcre_format("^\\[(?<timestamp>[\\w: \\.,+/-]+)\\] (.*)"),
|
|
|
|
pcre_format("^\\[(?<timestamp>[\\w: \\.,+/-]+)\\] \\[(\\w+)\\]"),
|
|
|
|
pcre_format("^\\[(?<timestamp>[\\w: \\.,+/-]+)\\] \\w+ (.*)"),
|
|
|
|
pcre_format("^\\[(?<timestamp>[\\w: ,+/-]+)\\] \\(\\d+\\) (.*)"),
|
2015-07-11 04:39:03 +00:00
|
|
|
|
|
|
|
pcre_format()
|
2014-02-01 14:41:11 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
return log_fmt;
|
|
|
|
};
|
|
|
|
|
2015-07-11 04:39:03 +00:00
|
|
|
std::string get_pattern_regex() const {
|
|
|
|
return get_pcre_log_formats()[this->lf_fmt_lock].name;
|
|
|
|
}
|
|
|
|
|
2015-06-03 13:36:58 +00:00
|
|
|
intern_string_t get_name() const {
|
|
|
|
return intern_string::lookup("generic_log");
|
|
|
|
};
|
2009-09-14 01:07:32 +00:00
|
|
|
|
2013-05-28 04:35:00 +00:00
|
|
|
void scrub(string &line)
|
|
|
|
{
|
2012-09-22 23:15:15 +00:00
|
|
|
pcre_context_static<30> context;
|
|
|
|
pcre_input pi(line);
|
2013-05-28 04:35:00 +00:00
|
|
|
string new_line = "";
|
2012-09-22 23:15:15 +00:00
|
|
|
|
|
|
|
if (scrub_pattern().match(context, pi)) {
|
|
|
|
pcre_context::capture_t *cap;
|
|
|
|
|
|
|
|
for (cap = context.begin(); cap != context.end(); cap++) {
|
2012-10-29 23:38:58 +00:00
|
|
|
new_line += scrub_rdns(pi.get_substr(cap));
|
2012-09-22 23:15:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
line = new_line;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2015-11-27 20:47:42 +00:00
|
|
|
scan_result_t scan(vector<logline> &dst,
|
|
|
|
off_t offset,
|
|
|
|
shared_buffer_ref &sbr)
|
2013-05-28 04:35:00 +00:00
|
|
|
{
|
2014-06-18 04:29:42 +00:00
|
|
|
struct exttm log_time;
|
2013-06-30 23:43:08 +00:00
|
|
|
struct timeval log_tv;
|
2014-10-20 05:16:40 +00:00
|
|
|
pcre_context::capture_t ts, level;
|
2013-06-26 01:45:07 +00:00
|
|
|
const char *last_pos;
|
2013-05-28 04:35:00 +00:00
|
|
|
|
2014-10-20 05:16:40 +00:00
|
|
|
if ((last_pos = this->log_scanf(
|
|
|
|
sbr.get_data(),
|
|
|
|
sbr.length(),
|
|
|
|
get_pcre_log_formats(),
|
|
|
|
NULL,
|
|
|
|
&log_time,
|
2015-03-21 18:56:12 +00:00
|
|
|
&log_tv,
|
2013-05-28 04:35:00 +00:00
|
|
|
|
2014-10-20 05:16:40 +00:00
|
|
|
&ts,
|
|
|
|
&level)) != NULL) {
|
|
|
|
const char *level_str = &sbr.get_data()[level.c_begin];
|
|
|
|
logline::level_t level_val = logline::string2level(
|
|
|
|
level_str, level.length());
|
2013-06-23 13:11:37 +00:00
|
|
|
|
2015-09-14 15:56:42 +00:00
|
|
|
this->check_for_new_year(dst, log_time, log_tv);
|
2014-03-16 05:33:56 +00:00
|
|
|
|
2014-10-20 05:16:40 +00:00
|
|
|
dst.push_back(logline(offset, log_tv, level_val));
|
2015-11-27 20:47:42 +00:00
|
|
|
return SCAN_MATCH;
|
2013-05-28 04:35:00 +00:00
|
|
|
}
|
|
|
|
|
2015-11-27 20:47:42 +00:00
|
|
|
return SCAN_NO_MATCH;
|
2009-09-14 01:07:32 +00:00
|
|
|
};
|
|
|
|
|
2014-02-01 14:41:11 +00:00
|
|
|
void annotate(shared_buffer_ref &line,
|
2013-05-24 14:55:56 +00:00
|
|
|
string_attrs_t &sa,
|
2015-07-19 09:40:02 +00:00
|
|
|
std::vector<logline_value> &values,
|
|
|
|
bool annotate_module) const
|
2013-05-28 04:35:00 +00:00
|
|
|
{
|
2014-02-01 14:41:11 +00:00
|
|
|
pcre_format &fmt = get_pcre_log_formats()[this->lf_fmt_lock];
|
2013-05-28 04:35:00 +00:00
|
|
|
struct line_range lr;
|
2013-06-04 13:53:25 +00:00
|
|
|
int prefix_len = 0;
|
2014-02-01 14:41:11 +00:00
|
|
|
pcre_input pi(line.get_data(), 0, line.length());
|
|
|
|
pcre_context_static<30> pc;
|
2013-05-28 04:35:00 +00:00
|
|
|
|
2014-02-01 14:41:11 +00:00
|
|
|
if (!fmt.pcre.match(pc, pi)) {
|
2013-06-29 13:22:24 +00:00
|
|
|
return;
|
|
|
|
}
|
2013-05-28 04:35:00 +00:00
|
|
|
|
2014-02-01 14:41:11 +00:00
|
|
|
lr.lr_start = pc[0]->c_begin;
|
|
|
|
lr.lr_end = pc[0]->c_end;
|
2014-01-25 17:29:35 +00:00
|
|
|
sa.push_back(string_attr(lr, &logline::L_TIMESTAMP));
|
2013-05-28 04:35:00 +00:00
|
|
|
|
2014-02-01 14:41:11 +00:00
|
|
|
const char *level = &line.get_data()[pc[1]->c_begin];
|
2013-06-08 13:10:18 +00:00
|
|
|
|
2014-02-01 14:41:11 +00:00
|
|
|
if (logline::string2level(level, pc[1]->length(), true) == logline::LEVEL_UNKNOWN) {
|
2014-02-01 23:47:15 +00:00
|
|
|
prefix_len = pc[0]->c_end;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
prefix_len = pc[1]->c_end;
|
2013-06-04 13:53:25 +00:00
|
|
|
}
|
|
|
|
|
2013-05-28 04:35:00 +00:00
|
|
|
lr.lr_start = 0;
|
|
|
|
lr.lr_end = prefix_len;
|
2014-01-25 17:29:35 +00:00
|
|
|
sa.push_back(string_attr(lr, &logline::L_PREFIX));
|
2013-05-28 04:35:00 +00:00
|
|
|
|
|
|
|
lr.lr_start = prefix_len;
|
|
|
|
lr.lr_end = line.length();
|
2014-01-25 17:29:35 +00:00
|
|
|
sa.push_back(string_attr(lr, &textview_curses::SA_BODY));
|
2013-05-24 14:55:56 +00:00
|
|
|
};
|
|
|
|
|
2015-07-18 03:39:06 +00:00
|
|
|
auto_ptr<log_format> specialized(int fmt_lock)
|
2013-05-28 04:35:00 +00:00
|
|
|
{
|
|
|
|
auto_ptr<log_format> retval((log_format *)
|
|
|
|
new generic_log_format(*this));
|
2009-09-14 01:07:32 +00:00
|
|
|
|
2013-05-28 04:35:00 +00:00
|
|
|
return retval;
|
2009-09-14 01:07:32 +00:00
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
log_format::register_root_format<generic_log_format> generic_log_instance;
|