[data_scanner] recognize XML comments

pull/1106/merge
Tim Stack 9 months ago
parent 5c0333fd64
commit 12c2718a16

@ -854,6 +854,11 @@ operator==(const string_fragment& left, const intern_string_t& right)
&& (memcmp(left.data(), right.get(), left.length()) == 0);
}
inline string_fragment operator"" _frag(const char* str, std::size_t len)
{
return string_fragment::from_byte_range(str, 0, len);
}
namespace std {
inline string
to_string(const string_fragment& s)

@ -94,6 +94,7 @@ enum exttm_bits_t {
ETB_Z_FOR_UTC,
ETB_Z_COLON,
ETB_Z_IS_UTC,
ETB_Z_IS_GMT,
};
enum exttm_flags_t {
@ -113,6 +114,7 @@ enum exttm_flags_t {
ETF_Z_FOR_UTC = (1UL << ETB_Z_FOR_UTC),
ETF_Z_COLON = (1UL << ETB_Z_COLON),
ETF_Z_IS_UTC = (1UL << ETB_Z_IS_UTC),
ETF_Z_IS_GMT = (1UL << ETB_Z_IS_GMT),
};
struct exttm {

File diff suppressed because it is too large Load Diff

@ -140,9 +140,10 @@ nonstd::optional<data_scanner::tokenize_result> data_scanner::tokenize2(text_for
EOF { return nonstd::nullopt; }
("u"|"r")?'"'('\\'.|[^\x00\x1b"\\]|'""')*'"' {
("f"|"u"|"r")?'"'('\\'.|[^\x00\x1b"\\]|'""')*'"' {
CAPTURE(DT_QUOTED_STRING);
switch (this->ds_input[cap_inner.c_begin]) {
case 'f':
case 'u':
case 'r':
cap_inner.c_begin += 1;
@ -152,9 +153,10 @@ nonstd::optional<data_scanner::tokenize_result> data_scanner::tokenize2(text_for
cap_inner.c_end -= 1;
return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()};
}
("u"|"r")?'"""'[^\x00\x1b]*'"""' {
("f"|"u"|"r")?'"""'[^\x00\x1b]*'"""' {
CAPTURE(DT_QUOTED_STRING);
switch (this->ds_input[cap_inner.c_begin]) {
case 'f':
case 'u':
case 'r':
cap_inner.c_begin += 1;
@ -164,13 +166,16 @@ nonstd::optional<data_scanner::tokenize_result> data_scanner::tokenize2(text_for
cap_inner.c_end -= 1;
return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()};
}
"/" "*" ([^\x00*]|"*"+[^\x00/])* "*"+ "/" {
"/*" ([^\x00*]|"*"+[^\x00/])* "*"+ "/" {
RET(DT_COMMENT);
}
"<!--" ([^\x00*]|"-"+[^\x00>])* "-"{2,} ">" {
RET(DT_COMMENT);
}
[a-qstv-zA-QSTV-Z]"'" {
CAPTURE(DT_WORD);
}
("u"|"r")?"'"('\\'.|"''"|[^\x00\x1b'\\])*"'"/[^sS] {
("f"|"u"|"r")?"'"('\\'.|"''"|[^\x00\x1b'\\])*"'"/[^sS] {
CAPTURE(DT_QUOTED_STRING);
if (tf == text_format_t::TF_RUST) {
auto sf = this->to_string_fragment(cap_all);
@ -181,6 +186,7 @@ nonstd::optional<data_scanner::tokenize_result> data_scanner::tokenize2(text_for
return tokenize_result{DT_SYMBOL, cap_all, cap_inner, this->ds_input.data()};
}
switch (this->ds_input[cap_inner.c_begin]) {
case 'f':
case 'u':
case 'r':
cap_inner.c_begin += 1;

@ -280,6 +280,12 @@ public:
el.e_capture.c_begin,
el.e_capture.c_end,
section_types_t::comment);
this->sw_line.get_attrs().emplace_back(
line_range{
this->sw_range.lr_start + el.e_capture.c_begin,
this->sw_range.lr_start + el.e_capture.c_end,
},
VC_ROLE.value(role_t::VCR_COMMENT));
break;
case DT_XML_OPEN_TAG:
this->flush_values();
@ -395,6 +401,14 @@ public:
el.e_capture.c_begin,
el.e_capture.c_end,
section_types_t::multiline_string);
this->sw_line.get_attrs().emplace_back(
line_range{
this->sw_range.lr_start
+ el.e_capture.c_begin,
this->sw_range.lr_start
+ el.e_capture.c_end,
},
VC_ROLE.value(role_t::VCR_STRING));
}
}
this->sw_values.emplace_back(el);

@ -52,7 +52,7 @@ class generic_log_format : public log_format {
pcre_format(
"^(?:\\*\\*\\*\\s+)?(?<timestamp>@[0-9a-zA-Z]{16,24})(.*)"),
pcre_format(
"^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\dTZ: +/\\-,\\.-]+)([^:]+)"),
R"(^(?:\*\*\*\s+)?(?<timestamp>(?:\s|\d{4}[\-\/]\d{2}[\-\/]\d{2}|T|\d{1,2}:\d{2}(?::\d{2}(?:[\.,]\d{3,6})?)?|Z|[+\-]\d{2}:?\d{2}|[A-Z]{3,4})+)(?:\s+|:)([^:]+))"),
pcre_format(
"^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w:+/\\.-]+) \\[\\w (.*)"),
pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w:,/\\.-]+) (.*)"),

@ -335,7 +335,9 @@ logfile_sub_source::text_value_for_line(textview_curses& tc,
if (format->lf_timestamp_flags & ETF_ZONE_SET
&& format->lf_date_time.dts_zoned_to_local)
{
adjusted_tm.et_flags &= ~ETF_Z_IS_UTC;
if (format->lf_timestamp_flags & ETF_Z_IS_UTC) {
adjusted_tm.et_flags &= ~ETF_Z_IS_UTC;
}
}
adjusted_tm.et_gmtoff
= format->lf_date_time.dts_local_offset_cache;

@ -32,13 +32,38 @@
#include "base/attr_line.builder.hh"
#include "base/itertools.hh"
#include "base/lnav_log.hh"
#include "base/map_util.hh"
#include "document.sections.hh"
#include "pcrepp/pcre2pp.hh"
#include "pugixml/pugixml.hpp"
#include "readline_highlighters.hh"
#include "text_format.hh"
#include "textfile_highlighters.hh"
#include "view_curses.hh"
using namespace lnav::roles::literals;
static const std::map<string_fragment, text_format_t> CODE_NAME_TO_TEXT_FORMAT
= {
{"c"_frag, text_format_t::TF_C_LIKE},
{"c++"_frag, text_format_t::TF_C_LIKE},
{"java"_frag, text_format_t::TF_JAVA},
{"python"_frag, text_format_t::TF_PYTHON},
{"rust"_frag, text_format_t::TF_RUST},
{"toml"_frag, text_format_t::TF_TOML},
{"yaml"_frag, text_format_t::TF_YAML},
{"xml"_frag, text_format_t::TF_XML},
};
static highlight_map_t
get_highlight_map()
{
highlight_map_t retval;
setup_highlights(retval);
return retval;
}
void
md2attr_line::flush_footnotes()
{
@ -191,7 +216,23 @@ md2attr_line::leave_block(const md4cpp::event_handler::block& bl)
auto lang_sf = string_fragment::from_bytes(code_detail->lang.text,
code_detail->lang.size);
if (lang_sf == "lnav") {
auto tf_opt = lnav::map::find(CODE_NAME_TO_TEXT_FORMAT, lang_sf);
if (tf_opt) {
static const auto highlighters = get_highlight_map();
lnav::document::discover_structure(
block_text, line_range{0, -1}, tf_opt.value());
for (const auto& hl_pair : highlighters) {
const auto& hl = hl_pair.second;
if (!hl.h_text_formats.empty()
&& hl.h_text_formats.count(tf_opt.value()) == 0)
{
continue;
}
hl.annotate(block_text, 0);
}
} else if (lang_sf == "lnav") {
readline_lnav_highlighter(block_text, block_text.length());
} else if (lang_sf == "sql" || lang_sf == "sqlite") {
readline_sqlite_highlighter(block_text, block_text.length());

@ -201,16 +201,6 @@ ftime_a(char* dst, off_t& off_inout, ssize_t len, const struct exttm& tm)
}
}
inline void
ftime_Z(char* dst, off_t& off_inout, ssize_t len, const struct exttm& tm)
{
if (tm.et_flags & ETF_Z_IS_UTC) {
PTIME_APPEND('U');
PTIME_APPEND('T');
PTIME_APPEND('C');
}
}
inline void
ftime_b(char* dst, off_t& off_inout, ssize_t len, const struct exttm& tm)
{
@ -898,6 +888,14 @@ ptime_Z_upto(struct exttm* dst,
&& str[off_inout + 2] == 'C')
{
PTIME_CONSUME(3, { dst->et_flags |= ETF_ZONE_SET | ETF_Z_IS_UTC; });
dst->et_gmtoff = 0;
return true;
}
if (avail >= 3 && str[off_inout + 0] == 'G' && str[off_inout + 1] == 'M'
&& str[off_inout + 2] == 'T')
{
PTIME_CONSUME(3, { dst->et_flags |= ETF_ZONE_SET | ETF_Z_IS_GMT; });
dst->et_gmtoff = 0;
return true;
}
@ -912,11 +910,18 @@ ptime_Z_upto_end(struct exttm* dst,
{
auto avail = len - off_inout;
if (avail == 3 && str[off_inout + 0] == 'U' && str[off_inout + 1] == 'T'
if (avail >= 3 && str[off_inout + 0] == 'U' && str[off_inout + 1] == 'T'
&& str[off_inout + 2] == 'C')
{
PTIME_CONSUME(3, { dst->et_flags |= ETF_ZONE_SET | ETF_Z_IS_UTC; });
dst->et_gmtoff = 0;
return true;
}
if (avail >= 3 && str[off_inout + 0] == 'G' && str[off_inout + 1] == 'M'
&& str[off_inout + 2] == 'T')
{
PTIME_CONSUME(3, { dst->et_flags |= ETF_ZONE_SET | ETF_Z_IS_GMT; });
dst->et_gmtoff = 0;
return true;
}
@ -1006,6 +1011,22 @@ ftime_z(char* dst, off_t& off_inout, ssize_t len, const struct exttm& tm)
PTIME_APPEND('0' + ((mins / 1) % 10));
}
inline void
ftime_Z(char* dst, off_t& off_inout, ssize_t len, const struct exttm& tm)
{
if (tm.et_flags & ETF_Z_IS_UTC) {
PTIME_APPEND('U');
PTIME_APPEND('T');
PTIME_APPEND('C');
} else if (tm.et_flags & ETF_Z_IS_GMT) {
PTIME_APPEND('G');
PTIME_APPEND('M');
PTIME_APPEND('T');
} else if (tm.et_flags & ETF_ZONE_SET) {
ftime_z(dst, off_inout, len, tm);
}
}
inline bool
ptime_f(struct exttm* dst, const char* str, off_t& off_inout, ssize_t len)
{

@ -37,7 +37,8 @@ template<typename T, std::size_t N>
static std::shared_ptr<lnav::pcre2pp::code>
xpcre_compile(const T (&pattern)[N], int options = 0)
{
return lnav::pcre2pp::code::from_const(pattern, options).to_shared();
return lnav::pcre2pp::code::from_const(pattern, options | PCRE2_MULTILINE)
.to_shared();
}
void
@ -422,7 +423,11 @@ setup_highlights(highlight_map_t& hm)
hm[{highlight_source_t::INTERNAL, "0.comment"}]
= highlighter(
xpcre_compile(
R"((?<=[\s;])//.*|/\*.*\*/|\(\*.*\*\)|^#\s*(?!include|if|ifndef|elif|else|endif|error|pragma|define|undef).*|\s+#.*|dnl.*)"))
R"((?<=[\s;])//.*|/\*.*\*/|\(\*.*\*\)|^#\s*(?!include|if|ifndef|elif|else|endif|error|pragma|define|undef).*|dnl.*)"))
.with_nestable(false)
.with_role(role_t::VCR_COMMENT);
hm[{highlight_source_t::INTERNAL, "z.comment"}]
= highlighter(xpcre_compile(R"(\s+#.*)"))
.with_nestable(false)
.with_role(role_t::VCR_COMMENT);
hm[{highlight_source_t::INTERNAL, "javadoc"}]

@ -3,10 +3,15 @@ TIME_FORMATS = \
"@%@" \
"%Y-%m-%d %H:%M:%S.%f%z" \
"%Y-%m-%d %H:%M:%S,%f%z" \
"%Y-%m-%d %H:%M:%S.%f %Z" \
"%Y-%m-%d %H:%M:%S,%f %Z" \
"%Y-%m-%d %H:%M:%S,%L%z" \
"%Y-%m-%d %H:%M:%S,%L %z" \
"%Y-%m-%d %H:%M:%S.%L%z" \
"%Y-%m-%d %H:%M:%S.%L %z" \
"%Y-%m-%d %H:%M:%S.%L %Z" \
"%Y-%m-%d %H:%M:%S,%L" \
"%Y-%m-%d %H:%M:%S.%L" \
"%Y-%m-%d %H:%M:%S%z" \
"%Y-%m-%d %H:%M:%S %z" \
"%Y-%m-%d %H:%M:%S:%L" \

@ -1,5 +1,8 @@
<?xml version="1.0"?>
<catalog>
<!--
- A comment for testing.
-->
<book id="bk101">
<author>Gambardella, Matthew</author>
<title>XML Developer's Guide</title>
@ -7,7 +10,8 @@
<price>44.95</price>
<publish_date>2000-10-01</publish_date>
<description>An in-depth look at creating applications
with XML.</description>
with XML.
</description>
</book>
<book id="bk102">
<author>Ralls, Kim</author>
@ -17,7 +21,8 @@
<publish_date>2000-12-16</publish_date>
<description>A former architect battles corporate zombies,
an evil sorceress, and her own childhood to become queen
of the world.</description>
of the world.
</description>
</book>
<book id="bk103">
<author>Corets, Eva</author>
@ -27,7 +32,8 @@
<publish_date>2000-11-17</publish_date>
<description>After the collapse of a nanotechnology
society in England, the young survivors lay the
foundation for a new society.</description>
foundation for a new society.
</description>
</book>
<book id="bk104">
<author>Corets, Eva</author>
@ -38,7 +44,8 @@
<description>In post-apocalypse England, the mysterious
agent known only as Oberon helps to create a new life
for the inhabitants of London. Sequel to Maeve
Ascendant.</description>
Ascendant.
</description>
</book>
<book id="bk105">
<author>Corets, Eva</author>
@ -48,7 +55,8 @@
<publish_date>2001-09-10</publish_date>
<description>The two daughters of Maeve, half-sisters,
battle one another for control of England. Sequel to
Oberon's Legacy.</description>
Oberon's Legacy.
</description>
</book>
<book id="bk106">
<author>Randall, Cynthia</author>
@ -57,7 +65,8 @@
<price>4.95</price>
<publish_date>2000-09-02</publish_date>
<description>When Carla meets Paul at an ornithology
conference, tempers fly as feathers get ruffled.</description>
conference, tempers fly as feathers get ruffled.
</description>
</book>
<book id="bk107">
<author>Thurman, Paula</author>
@ -66,7 +75,8 @@
<price>4.95</price>
<publish_date>2000-11-02</publish_date>
<description>A deep sea diver finds true love twenty
thousand leagues beneath the sea.</description>
thousand leagues beneath the sea.
</description>
</book>
<book id="bk108">
<author>Knorr, Stefan</author>
@ -75,7 +85,8 @@
<price>4.95</price>
<publish_date>2000-12-06</publish_date>
<description>An anthology of horror stories about roaches,
centipedes, scorpions and other insects.</description>
centipedes, scorpions and other insects.
</description>
</book>
<book id="bk109">
<author>Kress, Peter</author>
@ -85,7 +96,8 @@
<publish_date>2000-11-02</publish_date>
<description>After an inadvertant trip through a Heisenberg
Uncertainty Device, James Salway discovers the problems
of being quantum.</description>
of being quantum.
</description>
</book>
<book id="bk110">
<author>O'Brien, Tim</author>
@ -94,7 +106,8 @@
<price>36.95</price>
<publish_date>2000-12-09</publish_date>
<description>Microsoft's .NET initiative is explored in
detail in this deep programmer's reference.</description>
detail in this deep programmer's reference.
</description>
</book>
<book id="bk111">
<author>O'Brien, Tim</author>
@ -104,7 +117,8 @@
<publish_date>2000-12-01</publish_date>
<description>The Microsoft MSXML3 parser is covered in
detail, with attention to XML DOM interfaces, XSLT processing,
SAX and more.</description>
SAX and more.
</description>
</book>
<book id="bk112">
<author>Galos, Mike</author>
@ -115,6 +129,7 @@
<description>Microsoft Visual Studio 7 is explored in depth,
looking at how Visual Basic, Visual C++, C#, and ASP+ are
integrated into a comprehensive development
environment.</description>
environment.
</description>
</book>
</catalog>

@ -1 +1 @@
2009-07-20 22:59:30,221:ERROR:Goodbye, World!
2009-07-20 22:59:30,221:ERROR:Goodbye, World!

@ -1 +1 @@
2009-07-20 22:59:30,221:ERROR:Goodbye, World!
2009-07-20 22:59:30,221:ERROR:Goodbye, World!

@ -1,2 +1,2 @@
2009-07-20 22:59:30,221:ERROR:Goodbye, World!
2009-07-20 22:59:30,221:ERROR:Goodbye, World!
2009-07-20 22:59:30,221:ERROR:Goodbye, World!
2009-07-20 22:59:30,221:ERROR:Goodbye, World!

@ -1,3 +1,3 @@
2009-07-20 22:59:27,672:DEBUG:Hello, World!
How are you today?
2009-07-20 22:59:30,221:ERROR:Goodbye, World!
2009-07-20 22:59:30,221:ERROR:Goodbye, World!

@ -1 +1 @@
2014-10-08 16:56:38,344:WARN:foo bar baz
2014-10-08 16:56:38,344:WARN:foo bar baz

@ -1 +1 @@
2014-10-08 16:56:38,344:WARN:foo bar baz
2014-10-08 16:56:38,344:WARN:foo bar baz

@ -14,3 +14,31 @@
Goodbye, ▌World╏!
foo bar bar 
baz "xyz" 
/* 
 * This program prints "Hello, World!" 
 */ 
 
#include <stdio.h> 
 
int main() { 
 printf("Hello, World!\n"); 
} 
def hw(name): 
 """ 
 This function prints "Hello, <name>!" 
 """ 
 
 print(f"Hello, {name}!") # test comment 
<?xml version="1.0" encoding="utf-8" ?> 
<books> 
 <!-- Line comment --> 
 <book id="100"> 
 <author>Finnegan</author> 
 </book> 
</books> 

@ -46,6 +46,8 @@ static const char* GOOD_TIMES[] = {
"2022-08-27T17:22:01.694554+00:00",
"2022-08-27T17:22:01.694554+0000",
"2022-08-27T17:22:01.694554Z",
"2022-08-27 17:22:01.694554 UTC",
"2022-08-27 17:22:01.694554 GMT",
"2017 May 08 Mon 18:57:57.578",
"May 01 00:00:01",
"May 10 12:00:01",

@ -264,10 +264,10 @@ run_test ${lnav_test} -n \
check_output "time_offset in lnav_file table is not reordering?" <<EOF
Wed May 19 12:00:01 2021 line 1
/abc/def
Wed May 19 12:00:02 2021 line 2
Wed May 19 12:00:02 +0000 2021 line 2
Wed May 19 12:00:03 2021 line 3
/ghi/jkl
Wed May 19 12:00:04 2021 line 4
Wed May 19 12:00:04 +0000 2021 line 4
EOF

@ -18,3 +18,39 @@
</pre>
Goodbye, <span style="border-left: solid cyan; border-right: dashed green">World</span>!
```foolang
foo bar bar
baz "xyz"
```
```c
/*
* This program prints "Hello, World!"
*/
#include <stdio.h>
int main() {
printf("Hello, World!\n");
}
```
```python
def hw(name):
"""
This function prints "Hello, <name>!"
"""
print(f"Hello, {name}!") # test comment
```
```xml
<?xml version="1.0" encoding="utf-8" ?>
<books>
<!-- Line comment -->
<book id="100">
<author>Finnegan</author>
</book>
</books>
```

Loading…
Cancel
Save