[format] add TCF file format and some fixes to make it work

Defect Number:
    Reviewed By:
   Testing Done:
pull/111/head
Timothy Stack 10 years ago
parent 2781a63cfb
commit 72e3a4d8df

@ -9,4 +9,6 @@ Contributors
Suresh Sundriyal (sureshsundriyal@gmail.com)
Matt Dordal (matt@dordal.org)
Christopher Meng
Salvatore Bonaccorso
Henrietta Stack

@ -33,7 +33,8 @@ TIME_FORMATS = \
"%d/%b/%Y:%H:%M:%S %z" \
"%b %d %H:%M:%S" \
"%m/%d/%y %H:%M:%S" \
"%m%d %H:%M:%S"
"%m%d %H:%M:%S" \
"%M:%S"
time_fmts.cc: ptimec
./ptimec $(TIME_FORMATS) > $@

@ -365,7 +365,8 @@ TIME_FORMATS = \
"%d/%b/%Y:%H:%M:%S %z" \
"%b %d %H:%M:%S" \
"%m/%d/%y %H:%M:%S" \
"%m%d %H:%M:%S"
"%m%d %H:%M:%S" \
"%M:%S"
AM_LDFLAGS = \
$(STATIC_LDFLAGS) \

@ -378,6 +378,51 @@
}
]
},
"tcf_log" : {
"title" : "TCF Log",
"description" : "Target Communication Framework log",
"url" : [
"http://wiki.eclipse.org/TCF",
"http://git.eclipse.org/c/tcf/org.eclipse.tcf.git/tree/target_explorer/plugins/org.eclipse.tm.te.tcf.log.core/src/org/eclipse/tm/te/tcf/log/core/internal/listener/ChannelTraceListener.java?id=b6e81bb8405f99dda2764b22cff876fa00f734f5#n144"
],
"regex" : {
"std" : {
"pattern" : "^TCF (?<timestamp>\\d{2}:\\d{2}.\\d{3}): (?:Server-Properties: |channel server|\\w+: (?<dir>--->|<---) (?<type>\\w)(?: (?<token>\\w+))?(?: (?<service>\\w+))?(?: (?<name>\\w+))? )(?<body>.*)"
}
},
"value" : {
"dir" : {
"kind" : "string"
},
"type" : {
"kind" : "string",
"identifier" : true
},
"token" : {
"kind" : "string",
"identifier" : true
},
"service" : {
"kind" : "string",
"identifier" : true
},
"name" : {
"kind" : "string",
"identifier" : true
}
},
"sample" : [
{
"line" : "TCF 29:47.191: Server-Properties: {\"Name\":\"TCF Protocol Logger\",\"OSName\":\"Linux 3.2.0-60-generic\",\"UserName\":\"xavier\",\"AgentID\":\"1fde3dd1-d4be-4f79-8090-6f8d212f03bf\",\"TransportName\":\"TCP\",\"Proxy\":\"\",\"ValueAdd\":\"1\",\"Port\":\"1534\"}"
},
{
"line" : "TCF 30:11.475: 0: <--- R 2 [\"P1\"] <eom>"
},
{
"line" : "TCF 30:11.475: 0: ---> C 4 RunControl getChildren \"P1\" <eom>"
}
]
},
"tcsh_history" : {
"title" : "TCSH History",
"description" : "The tcsh history file format.",

@ -185,8 +185,9 @@ throw (error)
}
this->lb_file_time = read_le32(
(const unsigned char *)&gz_id[4]);
if (this->lb_file_time < 0)
if (this->lb_file_time < 0) {
this->lb_file_time = 0;
}
this->lb_gz_offset = lseek(this->lb_fd, 0, SEEK_CUR);
}
#ifdef HAVE_BZLIB_H

@ -264,6 +264,43 @@ const char *log_format::log_scanf(const char *line,
return retval;
}
void log_format::check_for_new_year(std::vector<logline> &dst,
const struct timeval &log_tv)
{
if (dst.empty()) {
return;
}
time_t diff = dst.back().get_time() - log_tv.tv_sec;
if (diff > (5 * 60)) {
int off_year = 0, off_month = 0, off_day = 0, off_hour = 0;
std::vector<logline>::iterator iter;
if (diff > (60 * 24 * 60 * 60)) {
off_year = 1;
} else if (diff > (15 * 24 * 60 * 60)) {
off_month = 1;
} else if (diff > (12 * 60 * 60)) {
off_day = 1;
} else {
off_hour = 1;
}
for (iter = dst.begin(); iter != dst.end(); iter++) {
time_t ot = iter->get_time();
struct tm *otm;
otm = gmtime(&ot);
otm->tm_year -= off_year;
otm->tm_mon -= off_month;
otm->tm_yday -= off_day;
otm->tm_hour -= off_hour;
iter->set_time(tm2sec(otm));
}
}
}
/*
* XXX This needs some cleanup.
*/
@ -1010,6 +1047,16 @@ void external_log_format::build(std::vector<std::string> &errors)
if (!pat.p_pcre)
continue;
if (pat.p_pcre->name_index(this->lf_timestamp_field) < 0) {
errors.push_back("error:" +
this->elf_name +
":timestamp field '" +
this->lf_timestamp_field +
"' not found in pattern -- " +
pat.p_string);
continue;
}
if (pat.p_pcre->match(pc, pi)) {
const char *ts = pi.get_substr_start(
pc[this->lf_timestamp_field]);
@ -1017,9 +1064,15 @@ void external_log_format::build(std::vector<std::string> &errors)
struct timeval tv;
struct tm tm;
if (dts.scan(ts, NULL, &tm, tv) != NULL) {
found = true;
break;
found = true;
if (dts.scan(ts, NULL, &tm, tv) == NULL) {
errors.push_back("error:" +
this->elf_name +
":invalid sample -- " +
iter->s_line);
errors.push_back("error:" +
this->elf_name +
":unrecognized timestamp format");
}
}
}

@ -604,21 +604,7 @@ public:
};
void check_for_new_year(std::vector<logline> &dst,
const struct timeval &log_tv) {
if (!dst.empty() &&
((dst.back().get_time() - log_tv.tv_sec) > (24 * 60 * 60))) {
std::vector<logline>::iterator iter;
for (iter = dst.begin(); iter != dst.end(); iter++) {
time_t ot = iter->get_time();
struct tm *otm;
otm = gmtime(&ot);
otm->tm_year -= 1;
iter->set_time(tm2sec(otm));
}
}
};
const struct timeval &log_tv);
date_time_scanner lf_date_time;
int lf_fmt_lock;

@ -76,6 +76,137 @@ static string scrub_rdns(const string &str)
return retval;
}
class strace_log_format : public log_format {
static pcrepp &value_pattern(void)
{
static pcrepp VALUE_PATTERN(
"([0-9:.]*) ([a-zA-Z_][a-zA-Z_0-9]*)\\("
"(.*)\\)"
"\\s+= ([-xa-fA-F\\d\\?]+)[^<]+(?:<(\\d+\\.\\d+)>)?");
return VALUE_PATTERN;
};
string get_name() const { return "strace_log"; };
bool scan(vector<logline> &dst,
off_t offset,
char *prefix,
int len)
{
static const char *log_fmt[] = {
"%63[0-9:].%d",
NULL
};
static const char *time_fmt[] = {
"%H:%M:%S",
NULL
};
bool retval = false;
struct tm log_time;
char timestr[64];
struct timeval log_tv;
int usecs;
if (this->log_scanf(prefix,
log_fmt,
2,
time_fmt,
timestr,
&log_time,
log_tv,
timestr,
&usecs)) {
logline::level_t level = logline::LEVEL_UNKNOWN;
const char * eq;
if ((eq = strrchr(prefix, '=')) != NULL) {
int rc;
if (sscanf(eq, "= %d", &rc) == 1 && rc < 0) {
level = logline::LEVEL_ERROR;
}
}
if (!dst.empty() && (log_tv.tv_sec < dst.back().get_time())) {
log_tv.tv_sec += (24 * 60 * 60);
}
log_tv.tv_usec = usecs;
dst.push_back(logline(offset, log_tv, level));
retval = true;
}
return retval;
};
auto_ptr<log_format> specialized()
{
auto_ptr<log_format> retval((log_format *)
new strace_log_format(*this));
return retval;
};
void annotate(shared_buffer_ref &line,
string_attrs_t &sa,
std::vector<logline_value> &values) const
{
pcre_context_static<30> pc;
pcre_input pi(line.get_data(), 0, line.length());
if (value_pattern().match(pc, pi)) {
static struct {
const char * name;
logline_value::kind_t kind;
} columns[] = {
{ "", logline_value::VALUE_TEXT },
{ "funcname", logline_value::VALUE_TEXT },
{ "args", logline_value::VALUE_TEXT },
{ "result", logline_value::VALUE_TEXT },
{ "duration", logline_value::VALUE_FLOAT },
{ NULL, logline_value::VALUE_UNKNOWN },
};
pcre_context::iterator iter;
struct line_range lr;
iter = pc.begin();
if (iter->c_begin != -1) {
lr.lr_start = iter->c_begin;
lr.lr_end = iter->c_end;
sa.push_back(string_attr(lr, &logline::L_TIMESTAMP));
}
lr.lr_start = 0;
lr.lr_end = line.length();
sa.push_back(string_attr(lr, &logline::L_PREFIX));
lr.lr_start = line.length();
lr.lr_end = line.length();
sa.push_back(string_attr(lr, &textview_curses::SA_BODY));
for (int lpc = 0; columns[lpc].name; lpc++) {
pcre_context::iterator cap = pc.begin() + lpc;
shared_buffer_ref value_str;
if (columns[lpc].name[0] == '\0') {
continue;
}
value_str.subset(line, cap->c_begin, cap->length());
values.push_back(logline_value(columns[lpc].name,
columns[lpc].kind,
value_str));
}
}
};
};
log_format::register_root_format<strace_log_format> strace_log_instance;
class generic_log_format : public log_format {
static pcrepp &scrub_pattern(void)
{
@ -240,134 +371,3 @@ class generic_log_format : public log_format {
};
log_format::register_root_format<generic_log_format> generic_log_instance;
class strace_log_format : public log_format {
static pcrepp &value_pattern(void)
{
static pcrepp VALUE_PATTERN(
"([0-9:.]*) ([a-zA-Z_][a-zA-Z_0-9]*)\\("
"(.*)\\)"
"\\s+= ([-xa-fA-F\\d\\?]+)[^<]+(?:<(\\d+\\.\\d+)>)?");
return VALUE_PATTERN;
};
string get_name() const { return "strace_log"; };
bool scan(vector<logline> &dst,
off_t offset,
char *prefix,
int len)
{
static const char *log_fmt[] = {
"%63[0-9:].%d",
NULL
};
static const char *time_fmt[] = {
"%H:%M:%S",
NULL
};
bool retval = false;
struct tm log_time;
char timestr[64];
struct timeval log_tv;
int usecs;
if (this->log_scanf(prefix,
log_fmt,
2,
time_fmt,
timestr,
&log_time,
log_tv,
timestr,
&usecs)) {
logline::level_t level = logline::LEVEL_UNKNOWN;
const char * eq;
if ((eq = strrchr(prefix, '=')) != NULL) {
int rc;
if (sscanf(eq, "= %d", &rc) == 1 && rc < 0) {
level = logline::LEVEL_ERROR;
}
}
if (!dst.empty() && (log_tv.tv_sec < dst.back().get_time())) {
log_tv.tv_sec += (24 * 60 * 60);
}
log_tv.tv_usec = usecs;
dst.push_back(logline(offset, log_tv, level));
retval = true;
}
return retval;
};
auto_ptr<log_format> specialized()
{
auto_ptr<log_format> retval((log_format *)
new strace_log_format(*this));
return retval;
};
void annotate(shared_buffer_ref &line,
string_attrs_t &sa,
std::vector<logline_value> &values) const
{
pcre_context_static<30> pc;
pcre_input pi(line.get_data(), 0, line.length());
if (value_pattern().match(pc, pi)) {
static struct {
const char * name;
logline_value::kind_t kind;
} columns[] = {
{ "", logline_value::VALUE_TEXT },
{ "funcname", logline_value::VALUE_TEXT },
{ "args", logline_value::VALUE_TEXT },
{ "result", logline_value::VALUE_TEXT },
{ "duration", logline_value::VALUE_FLOAT },
{ NULL, logline_value::VALUE_UNKNOWN },
};
pcre_context::iterator iter;
struct line_range lr;
iter = pc.begin();
if (iter->c_begin != -1) {
lr.lr_start = iter->c_begin;
lr.lr_end = iter->c_end;
sa.push_back(string_attr(lr, &logline::L_TIMESTAMP));
}
lr.lr_start = 0;
lr.lr_end = line.length();
sa.push_back(string_attr(lr, &logline::L_PREFIX));
lr.lr_start = line.length();
lr.lr_end = line.length();
sa.push_back(string_attr(lr, &textview_curses::SA_BODY));
for (int lpc = 0; columns[lpc].name; lpc++) {
pcre_context::iterator cap = pc.begin() + lpc;
shared_buffer_ref value_str;
if (columns[lpc].name[0] == '\0') {
continue;
}
value_str.subset(line, cap->c_begin, cap->length());
values.push_back(logline_value(columns[lpc].name,
columns[lpc].kind,
value_str));
}
}
};
};
log_format::register_root_format<strace_log_format> strace_log_instance;

@ -364,7 +364,7 @@ static struct json_path_handler format_handlers[] = {
json_path_handler("^/\\w+/(json|convert-to-local-time)$", read_format_bool),
json_path_handler("^/\\w+/timestamp-divisor$", read_format_double)
.add_cb(read_format_int),
json_path_handler("^/\\w+/(file-pattern|level-field|timestamp-field|body-field|url|title|description)$",
json_path_handler("^/\\w+/(file-pattern|level-field|timestamp-field|body-field|url|url#|title|description)$",
read_format_field),
json_path_handler("^/\\w+/level/"
"(trace|debug|info|warning|error|critical|fatal)$",

@ -125,12 +125,23 @@ bool logfile::exists(void) const
this->lf_stat.st_ino == st.st_ino;
}
void logfile::set_format_base_time(log_format *lf)
{
time_t file_time = this->lf_line_buffer.get_file_time();
if (file_time == 0) {
file_time = this->lf_stat.st_mtime;
}
lf->lf_date_time.set_base_time(file_time);
}
void logfile::process_prefix(off_t offset, char *prefix, int len)
{
bool found = false;
if (this->lf_format.get() != NULL) {
/* We've locked onto a format, just use that scanner. */
this->set_format_base_time(this->lf_format.get());
found = this->lf_format->scan(this->lf_index, offset, prefix, len);
}
else if (this->lf_index.size() < MAX_UNRECOGNIZED_LINES) {
@ -145,20 +156,28 @@ void logfile::process_prefix(off_t offset, char *prefix, int len)
for (iter = root_formats.begin();
iter != root_formats.end() && !found;
++iter) {
if (!(*iter)->match_name(this->lf_filename))
if (!(*iter)->match_name(this->lf_filename)) {
continue;
}
(*iter)->clear();
(*iter)->lf_date_time.set_base_time(this->lf_line_buffer.get_file_time());
if ((*iter)->lf_date_time.dts_base_time == 0) {
(*iter)->lf_date_time.set_base_time(this->lf_stat.st_mtime);
this->set_format_base_time(*iter);
if (!(*iter)->scan(this->lf_index, offset, prefix, len)) {
log_debug("%s:%d:log format does not match -- %s",
this->lf_filename.c_str(),
this->lf_index.size(),
(*iter)->get_name().c_str());
}
if ((*iter)->scan(this->lf_index, offset, prefix, len)) {
else {
#if 0
require(this->lf_index.size() == 1 ||
(this->lf_index[this->lf_index.size() - 2] <
this->lf_index[this->lf_index.size() - 1]));
#endif
log_debug("%s:%d:log format found -- %s",
this->lf_filename.c_str(),
this->lf_index.size(),
(*iter)->get_name().c_str());
this->lf_format =
auto_ptr<log_format>((*iter)->specialized());

@ -334,6 +334,8 @@ protected:
*/
void process_prefix(off_t offset, char *prefix, int len);
void set_format_base_time(log_format *lf);
bool lf_valid_filename;
std::string lf_filename;
std::string lf_content_id;

@ -194,6 +194,8 @@ dist_noinst_DATA = \
logfile_strace_log.0 \
logfile_syslog.0 \
logfile_syslog.1 \
logfile_tcf.0 \
logfile_tcf.1 \
logfile_tcsh_history.0 \
logfile_with_a_really_long_name_to_test_a_bug_with_long_names.0 \
mvwattrline_output.0 \

@ -762,6 +762,8 @@ dist_noinst_DATA = \
logfile_strace_log.0 \
logfile_syslog.0 \
logfile_syslog.1 \
logfile_tcf.0 \
logfile_tcf.1 \
logfile_tcsh_history.0 \
logfile_with_a_really_long_name_to_test_a_bug_with_long_names.0 \
mvwattrline_output.0 \

@ -49,7 +49,7 @@ const char *TMP_NAME = "scanned.tmp";
int main(int argc, char *argv[])
{
int c, retval = EXIT_SUCCESS;
bool prompt = false;
bool prompt = false, is_log = false;
{
std::vector<std::string> paths, errors;
@ -57,12 +57,16 @@ int main(int argc, char *argv[])
load_formats(paths, errors);
}
while ((c = getopt(argc, argv, "p")) != -1) {
while ((c = getopt(argc, argv, "pl")) != -1) {
switch (c) {
case 'p':
prompt = true;
break;
case 'l':
is_log = true;
break;
default:
retval = EXIT_FAILURE;
break;
@ -121,13 +125,16 @@ int main(int argc, char *argv[])
vector<log_format *>::iterator iter;
vector<logline> index;
for (iter = root_formats.begin();
iter != root_formats.end() && !found;
++iter) {
(*iter)->clear();
if ((*iter)->scan(index, 13, log_line, strlen(log_line))) {
format = (*iter)->specialized();
found = true;
if (is_log) {
for (iter = root_formats.begin();
iter != root_formats.end() && !found;
++iter) {
(*iter)->clear();
if ((*iter)->scan(
index, 13, log_line, strlen(log_line))) {
format = (*iter)->specialized();
found = true;
}
}
}

@ -0,0 +1,30 @@
TCF 29:47.191: Server-Properties: {"Name":"TCF Protocol Logger","OSName":"Linux 3.2.0-60-generic","UserName":"xavier","AgentID":"1fde3dd1-d4be-4f79-8090-6f8d212f03bf","TransportName":"TCP","Proxy":"","ValueAdd":"1","Port":"1534"}
TCF 30:04.735: channel server connecting
TCF 30:04.735: channel server connected
TCF 30:11.474: 0: ---> C 2 RunControl getChildren null <eom>
TCF 30:11.475: 0: <--- R 2 ["P1"] <eom>
TCF 30:11.475: 0: ---> C 3 RunControl getContext "P1" <eom>
TCF 30:11.475: 0: <--- R 3 {"ID":"P1","ProcessID":"P1","Name":"VxWorks","CanSuspend":true,"CanResume":1,"IsContainer":true,"WordSize":4,"CanTerminate":true,"CanDetach":true,"RCGroup":"P1","SymbolsGroup":"P1","CPUGroup":"P1","DiagnosticTestProcess":true} <eom>
TCF 30:11.475: 0: ---> C 4 RunControl getChildren "P1" <eom>
TCF 30:11.475: 0: <--- R 4 ["P2"] <eom>
TCF 30:11.475: 0: ---> C 5 RunControl getContext "P2" <eom>
TCF 30:11.476: 0: <--- R 5 {"ID":"P2","ParentID":"P1","ProcessID":"P2","Name":"Kernel","CanSuspend":true,"CanResume":1,"IsContainer":true,"WordSize":4,"CanTerminate":true,"CanDetach":true,"RCGroup":"P2","BPGroup":"P2","SymbolsGroup":"P2","CPUGroup":"P2","DiagnosticTestProcess":true} <eom>
TCF 30:11.476: 0: ---> C 6 RunControl getChildren "P2" <eom>
TCF 30:11.476: 0: <--- R 6 [] <eom>
TCF 30:11.523: 0: ---> C RR4 RunControl getChildren null <eom>
TCF 30:11.524: 0: <--- R RR4 ["P1"] <eom>
TCF 30:11.525: 0: ---> C RR6 RunControl getContext "P1" <eom>
TCF 30:11.526: 0: <--- R RR6 {"ID":"P1","ProcessID":"P1","Name":"VxWorks","CanSuspend":true,"CanResume":1,"IsContainer":true,"WordSize":4,"CanTerminate":true,"CanDetach":true,"RCGroup":"P1","SymbolsGroup":"P1","CPUGroup":"P1","DiagnosticTestProcess":true} <eom>
TCF 30:11.530: 0: ---> C RR7 RunControl getChildren "P1" <eom>
TCF 30:11.530: 0: <--- R RR7 ["P2"] <eom>
TCF 30:11.531: 0: ---> C RR8 RunControl getContext "P2" <eom>
TCF 30:11.531: 0: <--- R RR8 {"ID":"P2","ParentID":"P1","ProcessID":"P2","Name":"Kernel","CanSuspend":true,"CanResume":1,"IsContainer":true,"WordSize":4,"CanTerminate":true,"CanDetach":true,"RCGroup":"P2","BPGroup":"P2","SymbolsGroup":"P2","CPUGroup":"P2","DiagnosticTestProcess":true} <eom>
TCF 30:11.533: 0: ---> C RR9 RunControl getChildren "P2" <eom>
TCF 30:11.533: 0: <--- R RR9 [] <eom>
TCF 30:11.536: 0: ---> C RR10 Locator getAgentID <eom>
TCF 30:11.536: 0: <--- R RR10 "00000000-0000-4daa-8665-a704d3dc0000" <eom>
TCF 30:21.573: 0: ---> C RR11 ProcessesV1 getChildren null false <eom>
TCF 30:21.573: 0: <--- R RR11 ["P1"] <eom>
TCF 30:21.577: 0: ---> C RR12 ProcessesV1 getContext "P1" <eom>
TCF 30:21.577: 0: <--- R RR12 {"Name":"VxWorks","CanTerminate":false,"CanAttach":true,"IsProcess":false,"ID":"P1"} <eom>
TCF 30:21.756: 0: <--- E RunControl contextAdded [{"ID":"P2.1623333904","ParentID":"P2","ProcessID":"P2","Name":"ipcom_syslogd","CanSuspend":true,"CanResume":12351,"HasState":true,"WordSize":4,"CanTerminate":true,"CanDetach":true,"RCGroup":"P2.1623333904","BPGroup":"P2","SymbolsGroup":"P2","CPUGroup":"P2","DiagnosticTestProcess":true}] <eom>

@ -0,0 +1,3 @@
TCF 59:47.191: Server-Properties: {"Name":"TCF Protocol Logger","OSName":"Linux 3.2.0-60-generic","UserName":"xavier","AgentID":"1fde3dd1-d4be-4f79-8090-6f8d212f03bf","TransportName":"TCP","Proxy":"","ValueAdd":"1","Port":"1534"}
TCF 30:11.474: 0: ---> C 2 RunControl getChildren null <eom>
TCF 01:11.475: 0: <--- R 2 ["P1"] <eom>

@ -6,6 +6,6 @@ for fn in ${top_srcdir}/test/datafile_simple.*; do
done
for fn in ${top_srcdir}/test/log-samples/*.txt; do
run_test ./drive_data_scanner $fn
run_test ./drive_data_scanner -l $fn
on_error_fail_with "$fn does not match"
done

@ -66,6 +66,15 @@ Jan 03 09:47:02 2007 -- 000
EOF
fi
touch -t 20140406110923 ${srcdir}/logfile_tcf.1
run_test ./drive_logfile -t -f tcf_log ${srcdir}/logfile_tcf.1
check_output "TCF timestamp interpreted incorrectly for hour wrap?" <<EOF
Apr 06 20:59:47 2014 -- 191
Apr 06 21:30:11 2014 -- 474
Apr 06 22:01:11 2014 -- 475
EOF
# The TCSH format converts to local time, so we need to specify a TZ
export TZ="UTC"
run_test ./drive_logfile -t -f tcsh_history ${srcdir}/logfile_tcsh_history.0

Loading…
Cancel
Save