[doc.sections] recognize diff sections

8 months ago · 9663b1f49f
parent 8ed0eaf0e7
commit 9663b1f49f
10 changed files with 13957 additions and 13343 deletions
--- a/NEWS.md
+++ b/NEWS.md
@ -108,6 +108,12 @@ Features:
 * Added the `config file-options` management command that
  can be used to examine the options that will be applied
  to a given file.
+* When viewing a diff, the sections of the diff for each
+  file is recognized and shown in the breadcrumb bar.  So,
+  you can see the file the focused line is in.  You can
+  also jump to a particular file by focusing on the
+  breadcrumb bar, selecting the crumb, and then selecting
+  the desired file.

 Bug Fixes:
 * Binary data piped into stdin should now be treated the same
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -87,7 +87,7 @@ time_fmts.cc: ptimec$(BUILD_EXEEXT)

 if HAVE_RE2C
 %.cc: %.re
-	$(RE2C_V)$(RE2C_CMD) --bit-vectors -W --tags -8 -o $@ $<
+	$(RE2C_V)$(RE2C_CMD) --bit-vectors -W -8 -o $@ $<
 	$(REC2_V)test $@ -ef $(srcdir)/$*.cc || cp $@ $(srcdir)/$*.cc
 endif

--- a/src/data_scanner.cc
+++ b/src/data_scanner.cc
@ -206,6 +206,12 @@ static struct {
    {
        "zwsp",
    },
+    {
+        "dffi",
+    },
+    {
+        "dfch",
+    },
 };

 const char* DNT_NAMES[DNT_MAX - DNT_KEY] = {
--- a/src/data_scanner.hh
+++ b/src/data_scanner.hh
@ -102,9 +102,12 @@ enum data_token_t {
    DT_GARBAGE,
    DT_ZERO_WIDTH_SPACE,

-    DT_TERMINAL_MAX = DT_ZERO_WIDTH_SPACE + 1,
+    DT_DIFF_FILE_HEADER,
+    DT_DIFF_HUNK_HEADING,

-    DNT_KEY = 50,
+    DT_TERMINAL_MAX = DT_DIFF_HUNK_HEADING + 1,
+
+    DNT_KEY = 52,
    DNT_PAIR,
    DNT_VALUE,
    DNT_ROW,
--- a/src/data_scanner_re.cc
+++ b/src/data_scanner_re.cc
--- a/src/data_scanner_re.re
+++ b/src/data_scanner_re.re
@ -100,6 +100,7 @@ nonstd::optional<data_scanner::tokenize_result> data_scanner::tokenize2(text_for
    _YYCURSOR yyt2;
    _YYCURSOR yyt3;
    _YYCURSOR yyt4;
+    _YYCURSOR hunk_heading;
    const YYCTYPE *YYLIMIT = (const unsigned char *) this->ds_input.end();
    const YYCTYPE *YYMARKER = YYCURSOR;

@ -112,7 +113,7 @@ nonstd::optional<data_scanner::tokenize_result> data_scanner::tokenize2(text_for

    /*!re2c
       re2c:yyfill:enable = 0;
-       re2c:flags:tags = 1;
+	   re2c:tags = 1;

       SPACE = [ \t\r];
       ALPHA = [a-zA-Z];
@ -245,7 +246,30 @@ nonstd::optional<data_scanner::tokenize_result> data_scanner::tokenize2(text_for
       }

       "\n"[A-Z][A-Z _\-0-9]+"\n" {
-           RET(DT_H1);
+           CAPTURE(DT_H1);
+           cap_inner.c_begin += 1;
+           cap_inner.c_end -= 1;
+           return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()};
+       }
+
+       "\ndiff --git "[^\n\x00]+"\n" {
+           CAPTURE(DT_H1);
+           cap_inner.c_begin += 1;
+           cap_inner.c_end = cap_inner.c_begin;
+           return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()};
+       }
+
+       "--- "[^\n\x00]+"\n+++ "[^\n\x00]+"\n" {
+           CAPTURE(DT_DIFF_FILE_HEADER);
+           cap_inner.c_end -= 1;
+           return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()};
+       }
+
+       "@@ -"[0-9]+","[0-9]+" +"[0-9]+","[0-9]+" @@ " @hunk_heading ([^\n\x00]+)"\n" {
+           CAPTURE(DT_DIFF_HUNK_HEADING);
+           cap_inner.c_begin = hunk_heading.val - this->ds_input.udata();
+           cap_inner.c_end -= 1;
+           return tokenize_result{token_out, cap_all, cap_inner, this->ds_input.data()};
       }

       ESC"["[0-9=;?]*[a-zA-Z] {
--- a/src/document.sections.cc
+++ b/src/document.sections.cc
@ -173,16 +173,18 @@ discover_metadata_int(const attr_line_t& al, metadata_builder& mb)
                new_open_intervals.emplace_back(std::move(oi));
            }
        }
-        auto* parent_node = new_open_intervals.empty()
-            ? root_node.get()
-            : new_open_intervals.back().oi_node.get();
-        new_open_intervals.emplace_back(role_num,
-                                        hdr_attr.sa_range.lr_start,
-                                        al.get_substring(hdr_attr.sa_range));
-        new_open_intervals.back().oi_node->hn_parent = parent_node;
-        new_open_intervals.back().oi_node->hn_start
-            = hdr_attr.sa_range.lr_start;
-
+        if (!hdr_attr.sa_range.empty()) {
+            auto* parent_node = new_open_intervals.empty()
+                ? root_node.get()
+                : new_open_intervals.back().oi_node.get();
+            new_open_intervals.emplace_back(
+                role_num,
+                hdr_attr.sa_range.lr_start,
+                al.get_substring(hdr_attr.sa_range));
+            new_open_intervals.back().oi_node->hn_parent = parent_node;
+            new_open_intervals.back().oi_node->hn_start
+                = hdr_attr.sa_range.lr_start;
+        }
        open_intervals = std::move(new_open_intervals);
    }

@ -267,9 +269,8 @@ public:
    metadata walk()
    {
        metadata_builder mb;
-        size_t garbage_count = 0;

-        while (garbage_count < 1000) {
+        while (true) {
            auto tokenize_res
                = this->sw_scanner.tokenize2(this->sw_text_format);
            if (!tokenize_res) {
@ -279,11 +280,12 @@ public:
            auto dt = tokenize_res->tr_token;

            element el(dt, tokenize_res->tr_capture);
+            const auto& inner_cap = tokenize_res->tr_inner_capture;

 #if 0
-            log_debug("tok %s %s",
-                      data_scanner::token2name(dt),
-                      tokenize_res->to_string().c_str());
+            printf("tok %s %s\n",
+                   data_scanner::token2name(dt),
+                   tokenize_res->to_string().c_str());
 #endif
            if (dt != DT_WHITE) {
                this->sw_at_start = false;
@ -339,13 +341,54 @@ public:
                case DT_H1: {
                    this->sw_line.get_attrs().emplace_back(
                        line_range{
-                            this->sw_range.lr_start + el.e_capture.c_begin + 1,
-                            this->sw_range.lr_start + el.e_capture.c_end - 1,
+                            this->sw_range.lr_start + inner_cap.c_begin,
+                            this->sw_range.lr_start + inner_cap.c_end,
                        },
                        VC_ROLE.value(role_t::VCR_H1));
                    this->sw_line_number += 2;
                    break;
                }
+                case DT_DIFF_FILE_HEADER: {
+                    auto sf = this->sw_scanner.to_string_fragment(inner_cap);
+                    auto split_res = sf.split_pair(string_fragment::tag1{'\n'});
+                    auto file1 = split_res->first.consume_n(4).value();
+                    auto file2 = split_res->second.consume_n(4).value();
+                    if ((file1 == "/dev/null" || file1.startswith("a/"))
+                        && file2.startswith("b/"))
+                    {
+                        if (file1 != "/dev/null") {
+                            file1 = file1.consume_n(2).value();
+                        }
+                        file2 = file2.consume_n(2).value();
+                    }
+                    if (file1 == "/dev/null" || file1 == file2) {
+                        this->sw_line.get_attrs().emplace_back(
+                            line_range{
+                                this->sw_range.lr_start + file2.sf_begin,
+                                this->sw_range.lr_start + file2.sf_end,
+                            },
+                            VC_ROLE.value(role_t::VCR_H1));
+                    } else {
+                        this->sw_line.get_attrs().emplace_back(
+                            line_range{
+                                this->sw_range.lr_start + inner_cap.c_begin,
+                                this->sw_range.lr_start + inner_cap.c_end,
+                            },
+                            VC_ROLE.value(role_t::VCR_H1));
+                    }
+                    this->sw_line_number += 2;
+                    break;
+                }
+                case DT_DIFF_HUNK_HEADING: {
+                    this->sw_line.get_attrs().emplace_back(
+                        line_range{
+                            this->sw_range.lr_start + inner_cap.c_begin,
+                            this->sw_range.lr_start + inner_cap.c_end,
+                        },
+                        VC_ROLE.value(role_t::VCR_H2));
+                    this->sw_line_number += 1;
+                    break;
+                }
                case DT_LCURLY:
                case DT_LSQUARE:
                case DT_LPAREN: {
@ -426,9 +469,6 @@ public:
                case DT_ZERO_WIDTH_SPACE:
                    break;
                default:
-                    if (dt == DT_GARBAGE) {
-                        garbage_count += 1;
-                    }
                    if (dt == DT_QUOTED_STRING) {
                        auto quoted_sf = tokenize_res->to_string_fragment();

--- a/src/log_format.cc
+++ b/src/log_format.cc
@ -1091,10 +1091,6 @@ external_log_format::scan(logfile& lf,
        yajl_handle handle = this->jlf_yajl_handle.get();
        json_log_userdata jlu(sbr, &sbc);

-        if (!this->lf_specialized && dst.size() >= 3) {
-            return log_format::scan_no_match{"file is not JSON-lines"};
-        }
-
        if (li.li_partial) {
            log_debug("skipping partial line at offset %d",
                      li.li_file_range.fr_offset);
--- a/src/logfile.cc
+++ b/src/logfile.cc
@ -435,7 +435,7 @@ logfile::process_prefix(shared_buffer_ref& sbr,
             * written out at the same time as the last one, so we need to
             * go back and update everything.
             */
-            auto& last_line = this->lf_index[this->lf_index.size() - 1];
+            auto& last_line = this->lf_index.back();

            for (size_t lpc = 0; lpc < this->lf_index.size() - 1; lpc++) {
                if (this->lf_format->lf_multiline) {
--- a/test/document.sections.tests.cc
+++ b/test/document.sections.tests.cc
@ -141,6 +141,59 @@ DESCRIPTION
        });
 }

+TEST_CASE("lnav::document::sections::doc for diff")
+{
+    attr_line_t INPUT = R"(
+[sql] add json_group_object aggregate function
+
+diff --git a/NEWS b/NEWS
+index d239d2f..7a06070 100644
+--- a/NEWS
+++ b/NEWS
+@@ -4,6 +4,8 @@ lnav v0.8.1:
+      * Log formats can now create SQL views and execute other statements
+        by adding '.sql' files to their format directories.  The SQL scripts
+        will be executed on startup.
+     * Added a 'json_group_object' aggregate SQL function that collects values
+       from a GROUP BY query into an JSON object.
+
+      Interface Changes:
+      * The 'o/O' hotkeys have been reassigned to navigate through log
+diff --git a/configure.ac b/configure.ac
+index 718a2d4..10f5580 100644
+--- a/configure.ac
+++ b/configure.ac
+@@ -39,8 +39,8 @@ AC_PROG_CXX
+
+ CPPFLAGS="$CPPFLAGS -D_ISOC99_SOURCE -D__STDC_LIMIT_MACROS"
+
+-# CFLAGS=`echo $CFLAGS | sed 's/-O2//g'`
+-# CXXFLAGS=`echo $CXXFLAGS | sed 's/-O2//g'`
+CFLAGS=`echo $CFLAGS | sed 's/-O2//g'`
+CXXFLAGS=`echo $CXXFLAGS | sed 's/-O2//g'`
+
+ AC_ARG_VAR(SFTP_TEST_URL)
+)";
+
+    auto meta = lnav::document::discover_structure(INPUT, line_range{0, -1});
+
+    meta.m_sections_tree.visit_all([](const auto& intv) {
+        auto ser = intv.value.match(
+            [](const std::string& name) { return name; },
+            [](const size_t index) { return fmt::format("{}", index); });
+        printf("interval %d:%d %s\n", intv.start, intv.stop, ser.c_str());
+    });
+    lnav::document::hier_node::depth_first(
+        meta.m_sections_root.get(), [](const auto* node) {
+            printf("node %p %d\n", node, node->hn_start);
+            for (const auto& pair : node->hn_named_children) {
+                printf("  child: %p %s\n", pair.second, pair.first.c_str());
+            }
+        });
+
+    CHECK(meta.m_sections_root->hn_named_children.size() == 2);
+}
+
 TEST_CASE("lnav::document::sections::sql")
 {
    attr_line_t INPUT