From 24b75e139a95e7f13df3bff274e8476407373fb8 Mon Sep 17 00:00:00 2001 From: rwxrob Date: Mon, 25 Apr 2022 22:51:29 -0400 Subject: [PATCH] Overhaul the Block parser --- z/mark.go | 156 ++++++++++++++++++++++++++----------------------- z/mark_test.go | 38 ++++++++---- 2 files changed, 111 insertions(+), 83 deletions(-) diff --git a/z/mark.go b/z/mark.go index dff87be..f5a6619 100644 --- a/z/mark.go +++ b/z/mark.go @@ -4,6 +4,7 @@ import ( "fmt" "regexp" "strconv" + "unicode" "github.com/rwxrob/scan" "github.com/rwxrob/term" @@ -67,109 +68,113 @@ func (s *Block) String() string { return string(s.V) } func Blocks(in string) []*Block { var blocks []*Block - verbpre := regexp.MustCompile(` {4,}`) + s := scan.R{Buf: []byte(to.Dedented(in))} + //s.Trace++ -MAIN: for s.Scan() { - if s.Rune == '*' { // bulleted list - if !s.Peek(" ") { - goto PARA - } - m := s.Pos - 1 + // bulleted list + if s.Peek("* ") { + var beg, end int + beg = s.Pos - 1 + for s.Scan() { if s.Peek("\n\n") { - blocks = append(blocks, &Block{Bulleted, s.Buf[m:s.Pos]}) - s.Pos += 2 - continue MAIN + end = s.Pos - 1 + s.Pos++ + break } } + + blocks = append(blocks, &Block{Bulleted, s.Buf[beg:end]}) + continue } - if s.Rune == '1' { // numbered list - if !s.Peek(". ") { - goto PARA - } - m := s.Pos - 1 + // numbered list + if s.Peek("1. ") { + var beg, end int + beg = s.Pos - 1 + for s.Scan() { if s.Peek("\n\n") { - blocks = append(blocks, &Block{Numbered, s.Buf[m:s.Pos]}) - s.Pos += 2 - continue MAIN + end = s.Pos - 1 + s.Pos++ + break } } + + blocks = append(blocks, &Block{Numbered, s.Buf[beg:end]}) + continue } - if s.Rune == ' ' { // verbatim - s.Pos -= 1 - ln := s.Match(verbpre) - s.Pos++ + // verbatim + if ln := s.Match(begVerbatim); ln >= 4 { + s.Pos-- - if ln < 0 { - continue - } - pre := s.Buf[s.Pos-1 : s.Pos+ln-1] - s.Pos += len(pre) - 1 + var beg, end int + beg = s.Pos - var block []byte for s.Scan() { - if s.Rune == '\n' { - - // add in indented lines - if s.Peek(string(pre)) { - block = append(block, '\n') - s.Pos += len(pre) - continue - } - - // end of the block - blocks = append(blocks, &Block{Verbatim, block}) - continue MAIN + if s.Peek("\n\n") { + s.Pos++ + end = s.Pos - 2 + break } - block = append(block, []byte(string(s.Rune))...) } - } - - if s.Rune == '\n' || s.Rune == '\r' || s.Rune == '\t' { + dedented := to.Dedented(string(s.Buf[beg:end])) + blocks = append(blocks, &Block{Verbatim, []byte(dedented)}) continue } - PARA: - { - var block []byte - block = append(block, []byte(string(s.Rune))...) + // paragraph (default) + if !unicode.IsSpace(s.Rune) { + + buf := []byte(string(s.Rune)) + for s.Scan() { if s.Peek("\n\n") { - block = append(block, []byte(string(s.Rune))...) - blocks = append(blocks, &Block{Paragraph, block}) - s.Scan() - s.Scan() - continue MAIN + s.Pos++ + break } - if s.Rune == '\n' || s.Rune == '\r' { - block = append(block, ' ') + if ln := s.Match(ws); ln > 0 { + buf = append(buf, ' ') + s.Pos += ln - 1 continue } - block = append(block, []byte(string(s.Rune))...) - } + buf = append(buf, []byte(string(s.Rune))...) - if len(block) > 0 { - blocks = append(blocks, &Block{Paragraph, block}) } - } // PARA + if len(buf) > 0 { + blocks = append(blocks, &Block{Paragraph, buf}) + } + continue + } } return blocks } +// don't expose these until mark has own package + +var begVerbatim = regexp.MustCompile(`^ {4,}`) +var ws = regexp.MustCompile(`^[\s\r\n]+`) +var begUnder = regexp.MustCompile(`^<\p{L}`) +var endUnder = regexp.MustCompile(`^\p{L}>`) +var begBoldItalic = regexp.MustCompile(`^\*{3}\p{L}`) +var endBoldItalic = regexp.MustCompile(`^\p{L}\*{3}`) +var begBold = regexp.MustCompile(`^\*{2}\p{L}`) +var endBold = regexp.MustCompile(`^\p{L}\*{2}`) +var begItalic = regexp.MustCompile(`^\*\p{L}`) +var endItalic = regexp.MustCompile(`^\p{L}\*`) + // Emph renders BonzaiMark emphasis spans specifically for // VT100-compatible terminals (which almost all are today): // @@ -187,13 +192,15 @@ func Emph[T string | []byte | []rune](buf T) string { for s.Scan() { // - if s.Rune == '<' { + if s.Match(begUnder) > 0 { nbuf = append(nbuf, '<') nbuf = append(nbuf, []rune(term.Under)...) for s.Scan() { - if s.Rune == '>' { + if s.Match(endUnder) > 0 { + nbuf = append(nbuf, s.Rune) nbuf = append(nbuf, []rune(term.Reset)...) nbuf = append(nbuf, '>') + s.Pos++ break } nbuf = append(nbuf, s.Rune) @@ -202,13 +209,15 @@ func Emph[T string | []byte | []rune](buf T) string { } // ***BoldItalic*** - if s.Rune == '*' && s.Peek("**") { - s.Pos += 2 + if s.Match(begBoldItalic) > 0 { + s.Scan() + s.Scan() nbuf = append(nbuf, []rune(term.BoldItalic)...) for s.Scan() { - if s.Rune == '*' && s.Peek("**") { - s.Pos += 2 + if s.Match(endBoldItalic) > 0 { + nbuf = append(nbuf, s.Rune) nbuf = append(nbuf, []rune(term.Reset)...) + s.Pos += 3 break } nbuf = append(nbuf, s.Rune) @@ -217,12 +226,13 @@ func Emph[T string | []byte | []rune](buf T) string { } // **Bold** - if s.Rune == '*' && s.Peek("*") { - s.Pos++ + if s.Match(begBold) > 0 { + s.Pos += 1 nbuf = append(nbuf, []rune(term.Bold)...) for s.Scan() { - if s.Rune == '*' && s.Peek("*") { - s.Pos++ + if s.Match(endBold) > 0 { + nbuf = append(nbuf, s.Rune) + s.Pos += 2 nbuf = append(nbuf, []rune(term.Reset)...) break } @@ -232,11 +242,13 @@ func Emph[T string | []byte | []rune](buf T) string { } // *Italic* - if s.Rune == '*' { + if s.Match(begItalic) > 0 { nbuf = append(nbuf, []rune(term.Italic)...) for s.Scan() { - if s.Rune == '*' { + if s.Match(endItalic) > 0 { + nbuf = append(nbuf, s.Rune) nbuf = append(nbuf, []rune(term.Reset)...) + s.Pos++ break } nbuf = append(nbuf, s.Rune) diff --git a/z/mark_test.go b/z/mark_test.go index f092db1..96391f9 100644 --- a/z/mark_test.go +++ b/z/mark_test.go @@ -4,6 +4,7 @@ import ( "fmt" Z "github.com/rwxrob/bonzai/z" + // "github.com/rwxrob/scan" "github.com/rwxrob/term" ) @@ -13,6 +14,7 @@ func init() { term.BoldItalic = `` term.Under = `` term.Reset = `` + // scan.Trace++ } func ExampleLines() { @@ -37,12 +39,13 @@ func ExampleBlocks_bulleted() { ` blocks := Z.Blocks(in) - fmt.Printf("%v %q\n", blocks[1].T, blocks[1]) - fmt.Printf("%v %q\n", blocks[2].T, blocks[2]) + fmt.Printf("%q\n", blocks[0]) + fmt.Printf("%q\n", blocks[1]) //Output: - // 3 "* another block\n* here" - // 1 "*boldnotbullet*" + // "* some thing\n* another thing" + // "* another block\n* here" + } func ExampleBlocks_numbered() { @@ -56,10 +59,14 @@ func ExampleBlocks_numbered() { ` - fmt.Printf("%q\n", Z.Blocks(in)[1]) + blocks := Z.Blocks(in) + fmt.Printf("%q\n", blocks[0]) + fmt.Printf("%q\n", blocks[1]) //Output: + // "1. some thing\n2. another thing" // "1. another block\n2. here" + } func ExampleBlocks_paragraph() { @@ -73,17 +80,19 @@ func ExampleBlocks_paragraph() { here on multiple lines. - And another one here + And another one here with just a bit more. ` - fmt.Printf("%q\n", Z.Blocks(in)[0]) - fmt.Printf("%q\n", Z.Blocks(in)[1]) + blocks := Z.Blocks(in) + fmt.Printf("%q\n", blocks[0]) + fmt.Printf("%q\n", blocks[1]) // Output: // "Simple paragraph here on multiple lines." // "And another one here with just a bit more." + } func ExampleBlocks_verbatim() { @@ -107,9 +116,10 @@ func ExampleBlocks_verbatim() { ` - fmt.Printf("%q\n", Z.Blocks(in)[0]) - fmt.Printf("%q\n", Z.Blocks(in)[1]) - fmt.Printf("%q\n", Z.Blocks(in)[2]) + blocks := Z.Blocks(in) + fmt.Printf("%q\n", blocks[0]) + fmt.Printf("%q\n", blocks[1]) + fmt.Printf("%q\n", blocks[2]) //Output: // "Must have another block type first." @@ -124,8 +134,10 @@ func ExampleEmph_under() { term.Under = `` term.Reset = `` fmt.Println(Z.Emph("")) + fmt.Println(Z.Emph("< UNDER >")) // Output: // <UNDER> + // < UNDER > } func ExampleEmph_boldItalic() { @@ -140,16 +152,20 @@ func ExampleEmph_bold() { term.Bold = `` term.Reset = `` fmt.Println(Z.Emph("**Bold**")) + fmt.Println(Z.Emph("** Bold **")) // Output: // Bold + // ** Bold ** } func ExampleEmph_italic() { term.Italic = `` term.Reset = `` fmt.Println(Z.Emph("*Italic*")) + fmt.Println(Z.Emph("* Italic *")) // Output: // Italic + // * Italic * } func ExampleEmph_basics() {