/** * Copyright (c) 2007-2017, Timothy Stack * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of Timothy Stack nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * @file log_format_impls.cc */ #include "config.h" #include #include #include #include "pcrepp/pcrepp.hh" #include "sql_util.hh" #include "log_format.hh" #include "log_vtab_impl.hh" #include "base/opt_util.hh" #include "base/injector.bind.hh" #include "yajlpp/yajlpp.hh" #include "formats/logfmt/logfmt.parser.hh" using namespace std; static pcrepp RDNS_PATTERN("^(?:com|net|org|edu|[a-z][a-z])" "(\\.\\w+)+(.+)"); /** * Attempt to scrub a reverse-DNS string. * * @param str The string to scrub. If the string looks like a reverse-DNS * string, the leading components of the name will be reduced to a single * letter. For example, "com.example.foo" will be reduced to "c.e.foo". * @return The scrubbed version of the input string or the original string * if it is not a reverse-DNS string. */ static string scrub_rdns(const string &str) { pcre_context_static<30> context; pcre_input input(str); string retval; if (RDNS_PATTERN.match(context, input)) { pcre_context::capture_t *cap; cap = context.begin(); for (int index = 0; index < cap->c_begin; index++) { if (index == 0 || str[index - 1] == '.') { if (index > 0) { retval.append(1, '.'); } retval.append(1, str[index]); } } retval += input.get_substr(cap); retval += input.get_substr(cap + 1); } else { retval = str; } return retval; } class generic_log_format : public log_format { static pcrepp &scrub_pattern() { static pcrepp SCRUB_PATTERN( "\\d+-(\\d+-\\d+ \\d+:\\d+:\\d+(?:,\\d+)?:)\\w+:(.*)"); return SCRUB_PATTERN; } static pcre_format *get_pcre_log_formats() { static pcre_format log_fmt[] = { pcre_format("^(?:\\*\\*\\*\\s+)?(?@[0-9a-zA-Z]{16,24})(.*)"), pcre_format("^(?:\\*\\*\\*\\s+)?(?[\\dTZ: +/\\-,\\.-]+)([^:]+)"), pcre_format("^(?:\\*\\*\\*\\s+)?(?[\\w:+/\\.-]+) \\[\\w (.*)"), pcre_format("^(?:\\*\\*\\*\\s+)?(?[\\w:,/\\.-]+) (.*)"), pcre_format("^(?:\\*\\*\\*\\s+)?(?[\\w:,/\\.-]+) - (.*)"), pcre_format("^(?:\\*\\*\\*\\s+)?(?[\\w: \\.,/-]+) - (.*)"), pcre_format("^(?:\\*\\*\\*\\s+)?(?[\\w: \\.,/-]+)\\[[^\\]]+\\](.*)"), pcre_format("^(?:\\*\\*\\*\\s+)?(?[\\w: \\.,/-]+) (.*)"), pcre_format(R"(^(?:\*\*\*\s+)?\[(?[\w: \.,+/-]+)\]\s*(\w+):?)"), pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?[\\w: \\.,+/-]+)\\] (.*)"), pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?[\\w: \\.,+/-]+)\\] \\[(\\w+)\\]"), pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?[\\w: \\.,+/-]+)\\] \\w+ (.*)"), pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?[\\w: ,+/-]+)\\] \\(\\d+\\) (.*)"), pcre_format() }; return log_fmt; }; std::string get_pattern_regex(uint64_t line_number) const { int pat_index = this->pattern_index_for_line(line_number); return get_pcre_log_formats()[pat_index].name; } const intern_string_t get_name() const { return intern_string::lookup("generic_log"); }; void scrub(string &line) { pcre_context_static<30> context; pcre_input pi(line); string new_line; if (scrub_pattern().match(context, pi)) { pcre_context::capture_t *cap; for (cap = context.begin(); cap != context.end(); cap++) { new_line += scrub_rdns(pi.get_substr(cap)); } line = new_line; } }; scan_result_t scan(logfile &lf, vector &dst, const line_info &li, shared_buffer_ref &sbr) { struct exttm log_time; struct timeval log_tv; pcre_context::capture_t ts, level; const char *last_pos; if ((last_pos = this->log_scanf( dst.size(), sbr.get_data(), sbr.length(), get_pcre_log_formats(), nullptr, &log_time, &log_tv, &ts, &level)) != nullptr) { const char *level_str = &sbr.get_data()[level.c_begin]; log_level_t level_val = string2level(level_str, level.length()); if (!((log_time.et_flags & ETF_DAY_SET) && (log_time.et_flags & ETF_MONTH_SET) && (log_time.et_flags & ETF_YEAR_SET))) { this->check_for_new_year(dst, log_time, log_tv); } dst.emplace_back(li.li_file_range.fr_offset, log_tv, level_val); return SCAN_MATCH; } return SCAN_NO_MATCH; }; void annotate(uint64_t line_number, shared_buffer_ref &line, string_attrs_t &sa, std::vector &values, bool annotate_module) const { int pat_index = this->pattern_index_for_line(line_number); pcre_format &fmt = get_pcre_log_formats()[pat_index]; struct line_range lr; int prefix_len = 0; pcre_input pi(line.get_data(), 0, line.length()); pcre_context_static<30> pc; if (!fmt.pcre.match(pc, pi)) { return; } lr.lr_start = pc[0]->c_begin; lr.lr_end = pc[0]->c_end; sa.emplace_back(lr, &logline::L_TIMESTAMP); const char *level = &line.get_data()[pc[1]->c_begin]; if (string2level(level, pc[1]->length(), true) == LEVEL_UNKNOWN) { prefix_len = pc[0]->c_end; } else { prefix_len = pc[1]->c_end; } lr.lr_start = 0; lr.lr_end = prefix_len; sa.emplace_back(lr, &logline::L_PREFIX); lr.lr_start = prefix_len; lr.lr_end = line.length(); sa.emplace_back(lr, &SA_BODY); }; shared_ptr specialized(int fmt_lock) { return std::make_shared(*this); }; }; string from_escaped_string(const char *str, size_t len) { string retval; for (size_t lpc = 0; lpc < len; lpc++) { switch (str[lpc]) { case '\\': if ((lpc + 3) < len && str[lpc + 1] == 'x') { int ch; if (sscanf(&str[lpc + 2], "%2x", &ch) == 1) { retval.append(1, (char) ch & 0xff); lpc += 3; } } break; default: retval.append(1, str[lpc]); break; } } return retval; } nonstd::optional lnav_strnstr(const char *s, const char *find, size_t slen) { char c, sc; size_t len; if ((c = *find++) != '\0') { len = strlen(find); do { do { if (slen < 1 || (sc = *s) == '\0') { return nonstd::nullopt; } --slen; ++s; } while (sc != c); if (len > slen) { return nonstd::nullopt; } } while (strncmp(s, find, len) != 0); s--; } return s; } struct separated_string { const char *ss_str; size_t ss_len; const char *ss_separator; size_t ss_separator_len; separated_string(const char *str, size_t len) : ss_str(str), ss_len(len), ss_separator(",") { this->ss_separator_len = strlen(this->ss_separator); }; separated_string &with_separator(const char *sep) { this->ss_separator = sep; this->ss_separator_len = strlen(sep); return *this; }; struct iterator { const separated_string &i_parent; const char *i_pos; const char *i_next_pos; size_t i_index; iterator(const separated_string &ss, const char *pos) : i_parent(ss), i_pos(pos), i_next_pos(pos), i_index(0) { this->update(); }; void update() { const separated_string &ss = this->i_parent; auto next_field = lnav_strnstr( this->i_pos, ss.ss_separator, ss.ss_len - (this->i_pos - ss.ss_str)); if (next_field) { this->i_next_pos = next_field.value() + ss.ss_separator_len; } else { this->i_next_pos = ss.ss_str + ss.ss_len; } }; iterator &operator++() { this->i_pos = this->i_next_pos; this->update(); this->i_index += 1; return *this; }; string_fragment operator*() { const separated_string &ss = this->i_parent; int end; if (this->i_next_pos < (ss.ss_str + ss.ss_len)) { end = this->i_next_pos - ss.ss_str - ss.ss_separator_len; } else { end = this->i_next_pos - ss.ss_str; } return string_fragment(ss.ss_str, this->i_pos - ss.ss_str, end); }; bool operator==(const iterator &other) const { return (&this->i_parent == &other.i_parent) && (this->i_pos == other.i_pos); }; bool operator!=(const iterator &other) const { return !(*this == other); }; size_t index() const { return this->i_index; }; }; iterator begin() { return {*this, this->ss_str}; }; iterator end() { return {*this, this->ss_str + this->ss_len}; }; }; class bro_log_format : public log_format { public: struct field_def { logline_value_meta fd_meta; std::string fd_collator; int fd_numeric_index; explicit field_def(const intern_string_t name, int col, log_format *format) : fd_meta(name, value_kind_t::VALUE_TEXT, col, format), fd_numeric_index(-1) { }; field_def &with_kind(value_kind_t kind, bool identifier = false, const std::string &collator = "") { this->fd_meta.lvm_kind = kind; this->fd_meta.lvm_identifier = identifier; this->fd_collator = collator; return *this; }; field_def &with_numeric_index(int index) { this->fd_numeric_index = index; return *this; } }; bro_log_format() { this->lf_is_self_describing = true; this->lf_time_ordered = false; }; const intern_string_t get_name() const { static const intern_string_t name(intern_string::lookup("bro")); return this->blf_format_name.empty() ? name : this->blf_format_name; }; virtual void clear() { this->log_format::clear(); this->blf_format_name.clear(); this->blf_field_defs.clear(); }; scan_result_t scan_int(std::vector &dst, const line_info &li, shared_buffer_ref &sbr) { static const intern_string_t STATUS_CODE = intern_string::lookup("bro_status_code"); static const intern_string_t TS = intern_string::lookup("bro_ts"); static const intern_string_t UID = intern_string::lookup("bro_uid"); separated_string ss(sbr.get_data(), sbr.length()); struct timeval tv; struct exttm tm; bool found_ts = false; log_level_t level = LEVEL_INFO; uint8_t opid = 0; ss.with_separator(this->blf_separator.get()); for (auto iter = ss.begin(); iter != ss.end(); ++iter) { if (iter.index() == 0 && *iter == "#close") { return SCAN_MATCH; } if (iter.index() >= this->blf_field_defs.size()) { break; } const auto &fd = this->blf_field_defs[iter.index()]; if (TS == fd.fd_meta.lvm_name) { string_fragment sf = *iter; if (this->lf_date_time.scan(sf.data(), sf.length(), nullptr, &tm, tv)) { this->lf_timestamp_flags = tm.et_flags; found_ts = true; } } else if (STATUS_CODE == fd.fd_meta.lvm_name) { string_fragment sf = *iter; if (!sf.empty() && sf[0] >= '4') { level = LEVEL_ERROR; } } else if (UID == fd.fd_meta.lvm_name) { string_fragment sf = *iter; opid = hash_str(sf.data(), sf.length()); } if (fd.fd_numeric_index >= 0) { switch (fd.fd_meta.lvm_kind) { case value_kind_t::VALUE_INTEGER: case value_kind_t::VALUE_FLOAT: { string_fragment sf = *iter; char field_copy[sf.length() + 1]; double val; if (sscanf(sf.to_string(field_copy), "%lf", &val) == 1) { this->lf_value_stats[fd.fd_numeric_index].add_value(val); } break; } default: break; } } } if (found_ts) { dst.emplace_back(li.li_file_range.fr_offset, tv, level, 0, opid); return SCAN_MATCH; } else { return SCAN_NO_MATCH; } } scan_result_t scan(logfile &lf, std::vector &dst, const line_info &li, shared_buffer_ref &sbr) { static pcrepp SEP_RE(R"(^#separator\s+(.+))"); if (!this->blf_format_name.empty()) { return this->scan_int(dst, li, sbr); } if (dst.empty() || dst.size() > 20 || sbr.empty() || sbr.get_data()[0] == '#') { return SCAN_NO_MATCH; } pcre_context_static<20> pc; auto line_iter = dst.begin(); auto read_result = lf.read_line(line_iter); if (read_result.isErr()) { return SCAN_NO_MATCH; } auto line = read_result.unwrap(); pcre_input pi(line.get_data(), 0, line.length()); if (!SEP_RE.match(pc, pi)) { return SCAN_NO_MATCH; } this->clear(); string sep = from_escaped_string(pi.get_substr_start(pc[0]), pc[0]->length()); this->blf_separator = intern_string::lookup(sep); for (++line_iter; line_iter != dst.end(); ++line_iter) { auto next_read_result = lf.read_line(line_iter); if (next_read_result.isErr()) { return SCAN_NO_MATCH; } line = next_read_result.unwrap(); separated_string ss(line.get_data(), line.length()); ss.with_separator(this->blf_separator.get()); auto iter = ss.begin(); string_fragment directive = *iter; if (directive.empty() || directive[0] != '#') { continue; } ++iter; if (iter == ss.end()) { continue; } if (directive == "#set_separator") { this->blf_set_separator = intern_string::lookup(*iter); } else if (directive == "#empty_field") { this->blf_empty_field = intern_string::lookup(*iter); } else if (directive == "#unset_field") { this->blf_unset_field = intern_string::lookup(*iter); } else if (directive == "#path") { string path = to_string(*iter); char full_name[128]; snprintf(full_name, sizeof(full_name), "bro_%s_log", path.c_str()); this->blf_format_name = intern_string::lookup(full_name); } else if (directive == "#fields") { do { this->blf_field_defs.emplace_back( intern_string::lookup("bro_" + sql_safe_ident(*iter)), this->blf_field_defs.size(), this); ++iter; } while (iter != ss.end()); } else if (directive == "#types") { static const char *KNOWN_IDS[] = { "bro_conn_uids", "bro_fuid", "bro_host", "bro_info_code", "bro_method", "bro_mime_type", "bro_orig_fuids", "bro_parent_fuid", "bro_proto", "bro_referrer", "bro_resp_fuids", "bro_service", "bro_status_code", "bro_uid", "bro_uri", "bro_user_agent", "bro_username", }; int numeric_count = 0; do { string_fragment field_type = *iter; auto &fd = this->blf_field_defs[iter.index() - 1]; if (field_type == "time") { fd.with_kind(value_kind_t::VALUE_TIMESTAMP); } else if (field_type == "string") { bool ident = binary_search(begin(KNOWN_IDS), end(KNOWN_IDS), fd.fd_meta.lvm_name); fd.with_kind(value_kind_t::VALUE_TEXT, ident); } else if (field_type == "count") { bool ident = binary_search(begin(KNOWN_IDS), end(KNOWN_IDS), fd.fd_meta.lvm_name); fd.with_kind(value_kind_t::VALUE_INTEGER, ident) .with_numeric_index(numeric_count); numeric_count += 1; } else if (field_type == "bool") { fd.with_kind(value_kind_t::VALUE_BOOLEAN); } else if (field_type == "addr") { fd.with_kind(value_kind_t::VALUE_TEXT, true, "ipaddress"); } else if (field_type == "port") { fd.with_kind(value_kind_t::VALUE_INTEGER, true); } else if (field_type == "interval") { fd.with_kind(value_kind_t::VALUE_FLOAT) .with_numeric_index(numeric_count); numeric_count += 1; } ++iter; } while (iter != ss.end()); this->lf_value_stats.resize(numeric_count); } } if (!this->blf_format_name.empty() && !this->blf_separator.empty() && !this->blf_field_defs.empty()) { dst.clear(); return this->scan_int(dst, li, sbr); } this->blf_format_name.clear(); this->lf_value_stats.clear(); return SCAN_NO_MATCH; }; void annotate(uint64_t line_number, shared_buffer_ref &sbr, string_attrs_t &sa, std::vector &values, bool annotate_module) const { static const intern_string_t TS = intern_string::lookup("bro_ts"); static const intern_string_t UID = intern_string::lookup("bro_uid"); separated_string ss(sbr.get_data(), sbr.length()); ss.with_separator(this->blf_separator.get()); for (auto iter = ss.begin(); iter != ss.end(); ++iter) { if (iter.index() >= this->blf_field_defs.size()) { return; } const field_def &fd = this->blf_field_defs[iter.index()]; string_fragment sf = *iter; if (sf == this->blf_empty_field) { sf.clear(); } else if (sf == this->blf_unset_field) { sf.invalidate(); } auto lr = line_range(sf.sf_begin, sf.sf_end); if (fd.fd_meta.lvm_name == TS) { sa.emplace_back(lr, &logline::L_TIMESTAMP); } else if (fd.fd_meta.lvm_name == UID) { sa.emplace_back(lr, &logline::L_OPID); } if (lr.is_valid()) { values.emplace_back(fd.fd_meta, sbr, lr); } else { values.emplace_back(fd.fd_meta); } } }; const logline_value_stats *stats_for_value(const intern_string_t &name) const { const logline_value_stats *retval = nullptr; for (size_t lpc = 0; lpc < this->blf_field_defs.size(); lpc++) { if (this->blf_field_defs[lpc].fd_meta.lvm_name == name) { if (this->blf_field_defs[lpc].fd_numeric_index < 0) { break; } retval = &this->lf_value_stats[this->blf_field_defs[lpc].fd_numeric_index]; break; } } return retval; }; std::shared_ptr specialized(int fmt_lock = -1) { return make_shared(*this); }; class bro_log_table : public log_format_vtab_impl { public: bro_log_table(const bro_log_format &format) : log_format_vtab_impl(format), blt_format(format) { } void get_columns(vector &cols) const override { for (const auto &fd : this->blt_format.blf_field_defs) { std::pair type_pair = log_vtab_impl::logline_value_to_sqlite_type(fd.fd_meta.lvm_kind); cols.emplace_back(fd.fd_meta.lvm_name.to_string(), type_pair.first, fd.fd_collator, false, "", type_pair.second); } }; void get_foreign_keys(std::vector &keys_inout) const override { this->log_vtab_impl::get_foreign_keys(keys_inout); for (const auto &fd : this->blt_format.blf_field_defs) { if (fd.fd_meta.lvm_identifier) { keys_inout.push_back(fd.fd_meta.lvm_name.to_string()); } } } const bro_log_format &blt_format; }; static map> &get_tables() { static map> retval; return retval; }; std::shared_ptr get_vtab_impl() const { if (this->blf_format_name.empty()) { return nullptr; } std::shared_ptr retval = nullptr; auto &tables = get_tables(); auto iter = tables.find(this->blf_format_name); if (iter == tables.end()) { retval = std::make_shared(*this); tables[this->blf_format_name] = retval; } return retval; }; void get_subline(const logline &ll, shared_buffer_ref &sbr, bool full_message) { } intern_string_t blf_format_name; intern_string_t blf_separator; intern_string_t blf_set_separator; intern_string_t blf_empty_field; intern_string_t blf_unset_field; vector blf_field_defs; }; struct ws_separated_string { const char *ss_str; size_t ss_len; explicit ws_separated_string(const char *str = nullptr, size_t len = -1) : ss_str(str), ss_len(len) { }; struct iterator { enum class state_t { NORMAL, QUOTED, }; const ws_separated_string &i_parent; const char *i_pos; const char *i_next_pos; size_t i_index{0}; state_t i_state{state_t::NORMAL}; iterator(const ws_separated_string &ss, const char *pos) : i_parent(ss), i_pos(pos), i_next_pos(pos) { this->update(); }; void update() { const auto &ss = this->i_parent; bool done = false; while (!done && this->i_next_pos < (ss.ss_str + ss.ss_len)) { switch (this->i_state) { case state_t::NORMAL: if (*this->i_next_pos == '"') { this->i_state = state_t::QUOTED; } else if (isspace(*this->i_next_pos)) { done = true; } break; case state_t::QUOTED: if (*this->i_next_pos == '"') { this->i_state = state_t::NORMAL; } break; } if (!done) { this->i_next_pos += 1; } } }; iterator &operator++() { const auto &ss = this->i_parent; this->i_pos = this->i_next_pos; while (this->i_pos < (ss.ss_str + ss.ss_len) && isspace(*this->i_pos)) { this->i_pos += 1; this->i_next_pos += 1; } this->update(); this->i_index += 1; return *this; }; string_fragment operator*() { const auto &ss = this->i_parent; int end = this->i_next_pos - ss.ss_str; return string_fragment(ss.ss_str, this->i_pos - ss.ss_str, end); }; bool operator==(const iterator &other) const { return (&this->i_parent == &other.i_parent) && (this->i_pos == other.i_pos); }; bool operator!=(const iterator &other) const { return !(*this == other); }; size_t index() const { return this->i_index; }; }; iterator begin() { return {*this, this->ss_str}; }; iterator end() { return {*this, this->ss_str + this->ss_len}; }; }; class w3c_log_format : public log_format { public: struct field_def { const intern_string_t fd_name; logline_value_meta fd_meta; std::string fd_collator; int fd_numeric_index; explicit field_def(const intern_string_t name) : fd_name(name), fd_meta(intern_string::lookup(sql_safe_ident(name.to_string_fragment())), value_kind_t::VALUE_TEXT), fd_numeric_index(-1) { }; field_def(const intern_string_t name, logline_value_meta meta) : fd_name(name), fd_meta(meta), fd_numeric_index(-1) { } field_def(int col, const char *name, value_kind_t kind, bool ident = false, std::string coll = "") : fd_name(intern_string::lookup(name)), fd_meta(intern_string::lookup(sql_safe_ident(string_fragment(name))), kind, col), fd_collator(std::move(coll)), fd_numeric_index(-1) { this->fd_meta.lvm_identifier = ident; } field_def &with_kind(value_kind_t kind, bool identifier = false, const std::string &collator = "") { this->fd_meta.lvm_kind = kind; this->fd_meta.lvm_identifier = identifier; this->fd_collator = collator; return *this; }; field_def &with_numeric_index(int index) { this->fd_numeric_index = index; return *this; } }; struct field_to_struct_t { field_to_struct_t(const char *prefix, const char *struct_name) : fs_prefix(prefix), fs_struct_name(intern_string::lookup(struct_name)) { } const char *fs_prefix; intern_string_t fs_struct_name; }; static const std::vector KNOWN_FIELDS; const static std::vector KNOWN_STRUCT_FIELDS; w3c_log_format() { this->lf_is_self_describing = true; this->lf_time_ordered = false; }; const intern_string_t get_name() const override { static const intern_string_t name(intern_string::lookup("w3c")); return this->wlf_format_name.empty() ? name : this->wlf_format_name; }; void clear() override { this->log_format::clear(); this->wlf_time_scanner.clear(); this->wlf_format_name.clear(); this->wlf_field_defs.clear(); }; scan_result_t scan_int(std::vector &dst, const line_info &li, shared_buffer_ref &sbr) { static const intern_string_t F_DATE = intern_string::lookup("date"); static const intern_string_t F_DATE_LOCAL = intern_string::lookup("date-local"); static const intern_string_t F_DATE_UTC = intern_string::lookup("date-UTC"); static const intern_string_t F_TIME = intern_string::lookup("time"); static const intern_string_t F_TIME_LOCAL = intern_string::lookup("time-local"); static const intern_string_t F_TIME_UTC = intern_string::lookup("time-UTC"); static const intern_string_t F_STATUS_CODE = intern_string::lookup("sc-status"); ws_separated_string ss(sbr.get_data(), sbr.length()); struct timeval date_tv{0, 0}, time_tv{0, 0}; struct exttm date_tm, time_tm; bool found_date = false, found_time = false; log_level_t level = LEVEL_INFO; for (auto iter = ss.begin(); iter != ss.end(); ++iter) { if (iter.index() >= this->wlf_field_defs.size()) { level = LEVEL_INVALID; break; } const field_def &fd = this->wlf_field_defs[iter.index()]; string_fragment sf = *iter; if (sf.startswith("#")) { if (sf == "#Date:") { date_time_scanner dts; struct exttm tm; struct timeval tv; if (dts.scan(sbr.get_data_at(sf.length() + 1), sbr.length() - sf.length() - 1, nullptr, &tm, tv)) { this->lf_date_time.set_base_time(tv.tv_sec); this->wlf_time_scanner.set_base_time(tv.tv_sec); } } dst.emplace_back(li.li_file_range.fr_offset, 0, 0, LEVEL_IGNORE, 0); return SCAN_MATCH; } sf.trim("\" \t"); if (F_DATE == fd.fd_name || F_DATE_LOCAL == fd.fd_name || F_DATE_UTC == fd.fd_name) { if (this->lf_date_time.scan(sf.data(), sf.length(), nullptr, &date_tm, date_tv)) { this->lf_timestamp_flags |= date_tm.et_flags; found_date = true; } } else if (F_TIME == fd.fd_name || F_TIME_LOCAL == fd.fd_name || F_TIME_UTC == fd.fd_name) { if (this->wlf_time_scanner.scan(sf.data(), sf.length(), nullptr, &time_tm, time_tv)) { this->lf_timestamp_flags |= time_tm.et_flags; found_time = true; } } else if (F_STATUS_CODE == fd.fd_name) { if (!sf.empty() && sf[0] >= '4') { level = LEVEL_ERROR; } } if (fd.fd_numeric_index >= 0) { switch (fd.fd_meta.lvm_kind) { case value_kind_t::VALUE_INTEGER: case value_kind_t::VALUE_FLOAT: { char field_copy[sf.length() + 1]; double val; if (sscanf(sf.to_string(field_copy), "%lf", &val) == 1) { this->lf_value_stats[fd.fd_numeric_index].add_value(val); } break; } default: break; } } } if (found_time) { struct exttm tm = time_tm; struct timeval tv; if (found_date) { tm.et_tm.tm_year = date_tm.et_tm.tm_year; tm.et_tm.tm_mday = date_tm.et_tm.tm_mday; tm.et_tm.tm_mon = date_tm.et_tm.tm_mon; tm.et_tm.tm_wday = date_tm.et_tm.tm_wday; tm.et_tm.tm_yday = date_tm.et_tm.tm_yday; } tv.tv_sec = tm2sec(&tm.et_tm); tv.tv_usec = tm.et_nsec / 1000; dst.emplace_back(li.li_file_range.fr_offset, tv, level, 0); return SCAN_MATCH; } else { return SCAN_NO_MATCH; } } scan_result_t scan(logfile &lf, std::vector &dst, const line_info &li, shared_buffer_ref &sbr) override { static auto W3C_LOG_NAME = intern_string::lookup("w3c_log"); static auto X_FIELDS_NAME = intern_string::lookup("x_fields"); static auto X_FIELDS_IDX = 0; if (!this->wlf_format_name.empty()) { return this->scan_int(dst, li, sbr); } if (dst.empty() || dst.size() > 20 || sbr.empty() || sbr.get_data()[0] == '#') { return SCAN_NO_MATCH; } this->clear(); for (auto line_iter = dst.begin(); line_iter != dst.end(); ++line_iter) { auto next_read_result = lf.read_line(line_iter); if (next_read_result.isErr()) { return SCAN_NO_MATCH; } auto line = next_read_result.unwrap(); ws_separated_string ss(line.get_data(), line.length()); auto iter = ss.begin(); string_fragment directive = *iter; if (directive.empty() || directive[0] != '#') { continue; } ++iter; if (iter == ss.end()) { continue; } if (directive == "#Date:") { date_time_scanner dts; struct exttm tm; struct timeval tv; if (dts.scan(line.get_data_at(directive.length() + 1), line.length() - directive.length() - 1, nullptr, &tm, tv)) { this->lf_date_time.set_base_time(tv.tv_sec); this->wlf_time_scanner.set_base_time(tv.tv_sec); } } else if (directive == "#Fields:") { int numeric_count = 0; do { string_fragment sf = *iter; sf.trim(")"); auto field_iter = std::find_if(begin(KNOWN_FIELDS), end(KNOWN_FIELDS), [&sf](auto elem) { return sf == elem.fd_name; }); if (field_iter != end(KNOWN_FIELDS)) { this->wlf_field_defs.emplace_back(*field_iter); } else if (sf == "date" || sf == "time") { this->wlf_field_defs.emplace_back( intern_string::lookup(sf)); } else { const auto fs_iter = std::find_if( begin(KNOWN_STRUCT_FIELDS), end(KNOWN_STRUCT_FIELDS), [&sf](auto elem) { return sf.startswith(elem.fs_prefix); }); if (fs_iter != end(KNOWN_STRUCT_FIELDS)) { auto field_name = intern_string::lookup(sf.substr(3)); this->wlf_field_defs.emplace_back( field_name, logline_value_meta( field_name, value_kind_t::VALUE_TEXT, KNOWN_FIELDS.size() + 1 + std::distance(begin(KNOWN_STRUCT_FIELDS), fs_iter), this) .with_struct_name(fs_iter->fs_struct_name)); } else { auto field_name = intern_string::lookup(sf); this->wlf_field_defs.emplace_back( field_name, logline_value_meta(field_name, value_kind_t::VALUE_TEXT, KNOWN_FIELDS.size() + X_FIELDS_IDX, this) .with_struct_name(X_FIELDS_NAME)); } } auto& fd = this->wlf_field_defs.back(); fd.fd_meta.lvm_format = nonstd::make_optional(this); switch (fd.fd_meta.lvm_kind) { case value_kind_t::VALUE_FLOAT: case value_kind_t::VALUE_INTEGER: fd.with_numeric_index(numeric_count); numeric_count += 1; break; default: break; } ++iter; } while (iter != ss.end()); this->wlf_format_name = W3C_LOG_NAME; this->lf_value_stats.resize(numeric_count); } } if (!this->wlf_format_name.empty() && !this->wlf_field_defs.empty()) { dst.clear(); return this->scan_int(dst, li, sbr); } this->wlf_format_name.clear(); this->lf_value_stats.clear(); return SCAN_NO_MATCH; }; void annotate(uint64_t line_number, shared_buffer_ref &sbr, string_attrs_t &sa, std::vector &values, bool annotate_module) const override { ws_separated_string ss(sbr.get_data(), sbr.length()); for (auto iter = ss.begin(); iter != ss.end(); ++iter) { string_fragment sf = *iter; if (iter.index() >= this->wlf_field_defs.size()) { sa.emplace_back(line_range{sf.sf_begin, -1}, &SA_INVALID, (void *) "extra fields detected"); return; } const field_def &fd = this->wlf_field_defs[iter.index()]; if (sf == "-") { sf.invalidate(); } auto lr = line_range(sf.sf_begin, sf.sf_end); if (lr.is_valid()) { values.emplace_back(fd.fd_meta, sbr, lr); if (sf.startswith("\"")) { auto& meta = values.back().lv_meta; if (meta.lvm_kind == value_kind_t::VALUE_TEXT) { meta.lvm_kind = value_kind_t::VALUE_W3C_QUOTED; } else { meta.lvm_kind = value_kind_t::VALUE_NULL; } } } else { values.emplace_back(fd.fd_meta); } } }; const logline_value_stats *stats_for_value(const intern_string_t &name) const override { const logline_value_stats *retval = nullptr; for (const auto & wlf_field_def : this->wlf_field_defs) { if (wlf_field_def.fd_meta.lvm_name == name) { if (wlf_field_def.fd_numeric_index < 0) { break; } retval = &this->lf_value_stats[wlf_field_def.fd_numeric_index]; break; } } return retval; }; std::shared_ptr specialized(int fmt_lock = -1) override { return make_shared(*this); }; class w3c_log_table : public log_format_vtab_impl { public: explicit w3c_log_table(const w3c_log_format &format) : log_format_vtab_impl(format), wlt_format(format) { } void get_columns(vector &cols) const override { for (const auto &fd : KNOWN_FIELDS) { auto type_pair = log_vtab_impl::logline_value_to_sqlite_type( fd.fd_meta.lvm_kind); cols.emplace_back(fd.fd_meta.lvm_name.to_string(), type_pair.first, fd.fd_collator, false, "", type_pair.second); } cols.emplace_back("x_fields"); cols.back().with_comment( "A JSON-object that contains fields that are not first-class columns"); for (const auto& fs : KNOWN_STRUCT_FIELDS) { cols.emplace_back(fs.fs_struct_name.to_string()); } }; void get_foreign_keys(std::vector &keys_inout) const override { this->log_vtab_impl::get_foreign_keys(keys_inout); for (const auto &fd : KNOWN_FIELDS) { if (fd.fd_meta.lvm_identifier) { keys_inout.push_back(fd.fd_meta.lvm_name.to_string()); } } } const w3c_log_format &wlt_format; }; static map> &get_tables() { static map> retval; return retval; }; std::shared_ptr get_vtab_impl() const override { if (this->wlf_format_name.empty()) { return nullptr; } std::shared_ptr retval = nullptr; auto &tables = get_tables(); auto iter = tables.find(this->wlf_format_name); if (iter == tables.end()) { retval = std::make_shared(*this); tables[this->wlf_format_name] = retval; } return retval; }; void get_subline(const logline &ll, shared_buffer_ref &sbr, bool full_message) override { } date_time_scanner wlf_time_scanner; intern_string_t wlf_format_name; vector wlf_field_defs; }; static int KNOWN_FIELD_INDEX = 0; const std::vector w3c_log_format::KNOWN_FIELDS = { { KNOWN_FIELD_INDEX++, "cs-method", value_kind_t::VALUE_TEXT, true, }, { KNOWN_FIELD_INDEX++, "c-ip", value_kind_t::VALUE_TEXT, true, "ipaddress", }, { KNOWN_FIELD_INDEX++, "cs-bytes", value_kind_t::VALUE_INTEGER, false, }, { KNOWN_FIELD_INDEX++, "cs-host", value_kind_t::VALUE_TEXT, true, }, { KNOWN_FIELD_INDEX++, "cs-uri-stem", value_kind_t::VALUE_TEXT, true, "naturalnocase", }, { KNOWN_FIELD_INDEX++, "cs-uri-query", value_kind_t::VALUE_TEXT, false, }, { KNOWN_FIELD_INDEX++, "cs-username", value_kind_t::VALUE_TEXT, false, }, { KNOWN_FIELD_INDEX++, "cs-version", value_kind_t::VALUE_TEXT, true, }, { KNOWN_FIELD_INDEX++, "s-ip", value_kind_t::VALUE_TEXT, true, "ipaddress", }, { KNOWN_FIELD_INDEX++, "s-port", value_kind_t::VALUE_INTEGER, true, }, { KNOWN_FIELD_INDEX++, "s-computername", value_kind_t::VALUE_TEXT, true, }, { KNOWN_FIELD_INDEX++, "s-sitename", value_kind_t::VALUE_TEXT, true, }, { KNOWN_FIELD_INDEX++, "sc-bytes", value_kind_t::VALUE_INTEGER, false, }, { KNOWN_FIELD_INDEX++, "sc-status", value_kind_t::VALUE_INTEGER, false, }, { KNOWN_FIELD_INDEX++, "sc-substatus", value_kind_t::VALUE_INTEGER, false, }, { KNOWN_FIELD_INDEX++, "time-taken", value_kind_t::VALUE_FLOAT, false, }, }; const std::vector w3c_log_format::KNOWN_STRUCT_FIELDS = { {"cs(", "cs_headers"}, {"sc(", "sc_headers"}, {"rs(", "rs_headers"}, {"sr(", "sr_headers"}, }; struct logfmt_pair_handler { explicit logfmt_pair_handler(date_time_scanner &dts) : lph_dt_scanner(dts) { } bool process_value(const string_fragment& value_frag) { if (this->lph_key_frag == "time" || this->lph_key_frag == "ts") { if (!this->lph_dt_scanner.scan(value_frag.data(), value_frag.length(), nullptr, &this->lph_time_tm, this->lph_tv)) { return false; } this->lph_found_time = true; } else if (this->lph_key_frag == "level") { this->lph_level = string2level(value_frag.data(), value_frag.length()); } return true; } date_time_scanner &lph_dt_scanner; bool lph_found_time{false}; struct exttm lph_time_tm{}; struct timeval lph_tv{0, 0}; log_level_t lph_level{log_level_t::LEVEL_INFO}; string_fragment lph_key_frag{""}; }; class logfmt_format : public log_format { public: const intern_string_t get_name() const override { const static auto NAME = intern_string::lookup("logfmt_log"); return NAME; } class logfmt_log_table : public log_format_vtab_impl { public: logfmt_log_table(const log_format &format) : log_format_vtab_impl(format) {} void get_columns(vector &cols) const override { static const auto FIELDS = std::string("fields"); cols.emplace_back(FIELDS); }; }; shared_ptr get_vtab_impl() const override { static auto retval = std::make_shared(*this); return retval; } scan_result_t scan(logfile &lf, vector &dst, const line_info &li, shared_buffer_ref &sbr) override { auto p = logfmt::parser(string_fragment{sbr.get_data(), 0, (int) sbr.length()}); scan_result_t retval = scan_result_t::SCAN_NO_MATCH; bool done = false; logfmt_pair_handler lph(this->lf_date_time); while (!done) { auto parse_result = p.step(); done = parse_result.match( [](const logfmt::parser::end_of_input &) { return true; }, [&lph](const logfmt::parser::kvpair &kvp) { lph.lph_key_frag = kvp.first; return kvp.second.match( [](const logfmt::parser::bool_value& bv) { return false; }, [&lph](const logfmt::parser::float_value& fv) { return lph.process_value(fv.fv_str_value); }, [&lph](const logfmt::parser::int_value& iv) { return lph.process_value(iv.iv_str_value); }, [&lph](const logfmt::parser::quoted_value &qv) { auto_mem handle(yajl_free); yajl_callbacks cb; handle = yajl_alloc(&cb, nullptr, &lph); memset(&cb, 0, sizeof(cb)); cb.yajl_string = +[](void *ctx, const unsigned char* str, size_t len) -> int { auto& lph = *((logfmt_pair_handler *)ctx); string_fragment value_frag{str, 0, (int) len}; return lph.process_value(value_frag); }; if (yajl_parse(handle, (const unsigned char *) qv.qv_value.data(), qv.qv_value.length()) != yajl_status_ok || yajl_complete_parse(handle) != yajl_status_ok) { log_debug("json parsing failed"); string_fragment unq_frag{ qv.qv_value.sf_string, qv.qv_value.sf_begin + 1, qv.qv_value.sf_end - 1, }; return lph.process_value(unq_frag); } return false; }, [&lph](const logfmt::parser::unquoted_value &uv) { return lph.process_value(uv.uv_value); } ); }, [](const logfmt::parser::error &err) { // log_error("logfmt parse error: %s", err.e_msg.c_str()); return true; } ); } if (lph.lph_found_time) { dst.emplace_back(li.li_file_range.fr_offset, lph.lph_tv, lph.lph_level); retval = scan_result_t::SCAN_MATCH; } return retval; } void annotate(uint64_t line_number, shared_buffer_ref &sbr, string_attrs_t &sa, vector &values, bool annotate_module) const override { static const auto FIELDS_NAME = intern_string::lookup("fields"); auto p = logfmt::parser( string_fragment{sbr.get_data(), 0, (int) sbr.length()}); bool done = false; while (!done) { auto parse_result = p.step(); done = parse_result.match( [](const logfmt::parser::end_of_input &) { return true; }, [this, &sa, &values, &sbr](const logfmt::parser::kvpair &kvp) { auto value_frag = kvp.second.match( [this, &kvp, &values](const logfmt::parser::bool_value& bv) { auto lvm = logline_value_meta{ intern_string::lookup(kvp.first), value_kind_t::VALUE_INTEGER, 0, (log_format *) this } .with_struct_name(FIELDS_NAME); values.emplace_back(lvm, bv.bv_value); return bv.bv_str_value; }, [this, &kvp, &values](const logfmt::parser::int_value& iv) { auto lvm = logline_value_meta{ intern_string::lookup(kvp.first), value_kind_t::VALUE_INTEGER, 0, (log_format *) this } .with_struct_name(FIELDS_NAME); values.emplace_back(lvm, iv.iv_value); return iv.iv_str_value; }, [this, &kvp, &values](const logfmt::parser::float_value& fv) { auto lvm = logline_value_meta{ intern_string::lookup(kvp.first), value_kind_t::VALUE_INTEGER, 0, (log_format *) this } .with_struct_name(FIELDS_NAME); values.emplace_back(lvm, fv.fv_value); return fv.fv_str_value; }, [](const logfmt::parser::quoted_value &qv) { return qv.qv_value; }, [](const logfmt::parser::unquoted_value &uv) { return uv.uv_value; } ); auto value_lr = line_range{ value_frag.sf_begin, value_frag.sf_end }; if (kvp.first == "time" || kvp.first == "ts") { sa.emplace_back(value_lr, &logline::L_TIMESTAMP); } else if (kvp.first == "level") { } else if (kvp.first == "msg") { sa.emplace_back(value_lr, &SA_BODY); } else if (!kvp.second.is() && !kvp.second.is()) { auto lvm = logline_value_meta{ intern_string::lookup(kvp.first), value_frag.startswith("\"") ? value_kind_t::VALUE_JSON : value_kind_t::VALUE_TEXT, 0, (log_format *) this } .with_struct_name(FIELDS_NAME); shared_buffer_ref value_sbr; value_sbr.subset(sbr, value_frag.sf_begin, value_frag.length()); values.emplace_back(lvm, value_sbr); } return false; }, [line_number, &sbr](const logfmt::parser::error &err) { log_error("bad line %.*s", sbr.length(), sbr.get_data()); log_error("%lld:logfmt parse error: %s", line_number, err.e_msg.c_str()); return true; } ); } } shared_ptr specialized(int fmt_lock) override { return std::make_shared(*this); }; }; static auto format_binder = injector::bind_multiple() .add() .add() .add() .add();