Overhaul the Block parser

pull/97/head
rwxrob 2 years ago
parent a50f023f74
commit 24b75e139a
No known key found for this signature in database
GPG Key ID: 2B9111F33082AE77

@ -4,6 +4,7 @@ import (
"fmt" "fmt"
"regexp" "regexp"
"strconv" "strconv"
"unicode"
"github.com/rwxrob/scan" "github.com/rwxrob/scan"
"github.com/rwxrob/term" "github.com/rwxrob/term"
@ -67,109 +68,113 @@ func (s *Block) String() string { return string(s.V) }
func Blocks(in string) []*Block { func Blocks(in string) []*Block {
var blocks []*Block var blocks []*Block
verbpre := regexp.MustCompile(` {4,}`)
s := scan.R{Buf: []byte(to.Dedented(in))} s := scan.R{Buf: []byte(to.Dedented(in))}
//s.Trace++
MAIN:
for s.Scan() { for s.Scan() {
if s.Rune == '*' { // bulleted list // bulleted list
if !s.Peek(" ") { if s.Peek("* ") {
goto PARA var beg, end int
} beg = s.Pos - 1
m := s.Pos - 1
for s.Scan() { for s.Scan() {
if s.Peek("\n\n") { if s.Peek("\n\n") {
blocks = append(blocks, &Block{Bulleted, s.Buf[m:s.Pos]}) end = s.Pos - 1
s.Pos += 2 s.Pos++
continue MAIN break
} }
} }
blocks = append(blocks, &Block{Bulleted, s.Buf[beg:end]})
continue
} }
if s.Rune == '1' { // numbered list // numbered list
if !s.Peek(". ") { if s.Peek("1. ") {
goto PARA var beg, end int
} beg = s.Pos - 1
m := s.Pos - 1
for s.Scan() { for s.Scan() {
if s.Peek("\n\n") { if s.Peek("\n\n") {
blocks = append(blocks, &Block{Numbered, s.Buf[m:s.Pos]}) end = s.Pos - 1
s.Pos += 2 s.Pos++
continue MAIN break
} }
} }
blocks = append(blocks, &Block{Numbered, s.Buf[beg:end]})
continue
} }
if s.Rune == ' ' { // verbatim // verbatim
s.Pos -= 1 if ln := s.Match(begVerbatim); ln >= 4 {
ln := s.Match(verbpre) s.Pos--
s.Pos++
if ln < 0 { var beg, end int
continue beg = s.Pos
}
pre := s.Buf[s.Pos-1 : s.Pos+ln-1]
s.Pos += len(pre) - 1
var block []byte
for s.Scan() { for s.Scan() {
if s.Rune == '\n' { if s.Peek("\n\n") {
s.Pos++
// add in indented lines end = s.Pos - 2
if s.Peek(string(pre)) { break
block = append(block, '\n')
s.Pos += len(pre)
continue
}
// end of the block
blocks = append(blocks, &Block{Verbatim, block})
continue MAIN
} }
block = append(block, []byte(string(s.Rune))...)
} }
} dedented := to.Dedented(string(s.Buf[beg:end]))
blocks = append(blocks, &Block{Verbatim, []byte(dedented)})
if s.Rune == '\n' || s.Rune == '\r' || s.Rune == '\t' {
continue continue
} }
PARA: // paragraph (default)
{ if !unicode.IsSpace(s.Rune) {
var block []byte
block = append(block, []byte(string(s.Rune))...) buf := []byte(string(s.Rune))
for s.Scan() { for s.Scan() {
if s.Peek("\n\n") { if s.Peek("\n\n") {
block = append(block, []byte(string(s.Rune))...) s.Pos++
blocks = append(blocks, &Block{Paragraph, block}) break
s.Scan()
s.Scan()
continue MAIN
} }
if s.Rune == '\n' || s.Rune == '\r' { if ln := s.Match(ws); ln > 0 {
block = append(block, ' ') buf = append(buf, ' ')
s.Pos += ln - 1
continue continue
} }
block = append(block, []byte(string(s.Rune))...) buf = append(buf, []byte(string(s.Rune))...)
}
if len(block) > 0 {
blocks = append(blocks, &Block{Paragraph, block})
} }
} // PARA if len(buf) > 0 {
blocks = append(blocks, &Block{Paragraph, buf})
}
continue
}
} }
return blocks return blocks
} }
// don't expose these until mark has own package
var begVerbatim = regexp.MustCompile(`^ {4,}`)
var ws = regexp.MustCompile(`^[\s\r\n]+`)
var begUnder = regexp.MustCompile(`^<\p{L}`)
var endUnder = regexp.MustCompile(`^\p{L}>`)
var begBoldItalic = regexp.MustCompile(`^\*{3}\p{L}`)
var endBoldItalic = regexp.MustCompile(`^\p{L}\*{3}`)
var begBold = regexp.MustCompile(`^\*{2}\p{L}`)
var endBold = regexp.MustCompile(`^\p{L}\*{2}`)
var begItalic = regexp.MustCompile(`^\*\p{L}`)
var endItalic = regexp.MustCompile(`^\p{L}\*`)
// Emph renders BonzaiMark emphasis spans specifically for // Emph renders BonzaiMark emphasis spans specifically for
// VT100-compatible terminals (which almost all are today): // VT100-compatible terminals (which almost all are today):
// //
@ -187,13 +192,15 @@ func Emph[T string | []byte | []rune](buf T) string {
for s.Scan() { for s.Scan() {
// <under> // <under>
if s.Rune == '<' { if s.Match(begUnder) > 0 {
nbuf = append(nbuf, '<') nbuf = append(nbuf, '<')
nbuf = append(nbuf, []rune(term.Under)...) nbuf = append(nbuf, []rune(term.Under)...)
for s.Scan() { for s.Scan() {
if s.Rune == '>' { if s.Match(endUnder) > 0 {
nbuf = append(nbuf, s.Rune)
nbuf = append(nbuf, []rune(term.Reset)...) nbuf = append(nbuf, []rune(term.Reset)...)
nbuf = append(nbuf, '>') nbuf = append(nbuf, '>')
s.Pos++
break break
} }
nbuf = append(nbuf, s.Rune) nbuf = append(nbuf, s.Rune)
@ -202,13 +209,15 @@ func Emph[T string | []byte | []rune](buf T) string {
} }
// ***BoldItalic*** // ***BoldItalic***
if s.Rune == '*' && s.Peek("**") { if s.Match(begBoldItalic) > 0 {
s.Pos += 2 s.Scan()
s.Scan()
nbuf = append(nbuf, []rune(term.BoldItalic)...) nbuf = append(nbuf, []rune(term.BoldItalic)...)
for s.Scan() { for s.Scan() {
if s.Rune == '*' && s.Peek("**") { if s.Match(endBoldItalic) > 0 {
s.Pos += 2 nbuf = append(nbuf, s.Rune)
nbuf = append(nbuf, []rune(term.Reset)...) nbuf = append(nbuf, []rune(term.Reset)...)
s.Pos += 3
break break
} }
nbuf = append(nbuf, s.Rune) nbuf = append(nbuf, s.Rune)
@ -217,12 +226,13 @@ func Emph[T string | []byte | []rune](buf T) string {
} }
// **Bold** // **Bold**
if s.Rune == '*' && s.Peek("*") { if s.Match(begBold) > 0 {
s.Pos++ s.Pos += 1
nbuf = append(nbuf, []rune(term.Bold)...) nbuf = append(nbuf, []rune(term.Bold)...)
for s.Scan() { for s.Scan() {
if s.Rune == '*' && s.Peek("*") { if s.Match(endBold) > 0 {
s.Pos++ nbuf = append(nbuf, s.Rune)
s.Pos += 2
nbuf = append(nbuf, []rune(term.Reset)...) nbuf = append(nbuf, []rune(term.Reset)...)
break break
} }
@ -232,11 +242,13 @@ func Emph[T string | []byte | []rune](buf T) string {
} }
// *Italic* // *Italic*
if s.Rune == '*' { if s.Match(begItalic) > 0 {
nbuf = append(nbuf, []rune(term.Italic)...) nbuf = append(nbuf, []rune(term.Italic)...)
for s.Scan() { for s.Scan() {
if s.Rune == '*' { if s.Match(endItalic) > 0 {
nbuf = append(nbuf, s.Rune)
nbuf = append(nbuf, []rune(term.Reset)...) nbuf = append(nbuf, []rune(term.Reset)...)
s.Pos++
break break
} }
nbuf = append(nbuf, s.Rune) nbuf = append(nbuf, s.Rune)

@ -4,6 +4,7 @@ import (
"fmt" "fmt"
Z "github.com/rwxrob/bonzai/z" Z "github.com/rwxrob/bonzai/z"
// "github.com/rwxrob/scan"
"github.com/rwxrob/term" "github.com/rwxrob/term"
) )
@ -13,6 +14,7 @@ func init() {
term.BoldItalic = `<bolditalic>` term.BoldItalic = `<bolditalic>`
term.Under = `<under>` term.Under = `<under>`
term.Reset = `<reset>` term.Reset = `<reset>`
// scan.Trace++
} }
func ExampleLines() { func ExampleLines() {
@ -37,12 +39,13 @@ func ExampleBlocks_bulleted() {
` `
blocks := Z.Blocks(in) blocks := Z.Blocks(in)
fmt.Printf("%v %q\n", blocks[1].T, blocks[1]) fmt.Printf("%q\n", blocks[0])
fmt.Printf("%v %q\n", blocks[2].T, blocks[2]) fmt.Printf("%q\n", blocks[1])
//Output: //Output:
// 3 "* another block\n* here" // "* some thing\n* another thing"
// 1 "*boldnotbullet*" // "* another block\n* here"
} }
func ExampleBlocks_numbered() { func ExampleBlocks_numbered() {
@ -56,10 +59,14 @@ func ExampleBlocks_numbered() {
` `
fmt.Printf("%q\n", Z.Blocks(in)[1]) blocks := Z.Blocks(in)
fmt.Printf("%q\n", blocks[0])
fmt.Printf("%q\n", blocks[1])
//Output: //Output:
// "1. some thing\n2. another thing"
// "1. another block\n2. here" // "1. another block\n2. here"
} }
func ExampleBlocks_paragraph() { func ExampleBlocks_paragraph() {
@ -73,17 +80,19 @@ func ExampleBlocks_paragraph() {
here on multiple here on multiple
lines. lines.
And another one here And another one here
with just a bit more. with just a bit more.
` `
fmt.Printf("%q\n", Z.Blocks(in)[0]) blocks := Z.Blocks(in)
fmt.Printf("%q\n", Z.Blocks(in)[1]) fmt.Printf("%q\n", blocks[0])
fmt.Printf("%q\n", blocks[1])
// Output: // Output:
// "Simple paragraph here on multiple lines." // "Simple paragraph here on multiple lines."
// "And another one here with just a bit more." // "And another one here with just a bit more."
} }
func ExampleBlocks_verbatim() { func ExampleBlocks_verbatim() {
@ -107,9 +116,10 @@ func ExampleBlocks_verbatim() {
` `
fmt.Printf("%q\n", Z.Blocks(in)[0]) blocks := Z.Blocks(in)
fmt.Printf("%q\n", Z.Blocks(in)[1]) fmt.Printf("%q\n", blocks[0])
fmt.Printf("%q\n", Z.Blocks(in)[2]) fmt.Printf("%q\n", blocks[1])
fmt.Printf("%q\n", blocks[2])
//Output: //Output:
// "Must have another block type first." // "Must have another block type first."
@ -124,8 +134,10 @@ func ExampleEmph_under() {
term.Under = `<under>` term.Under = `<under>`
term.Reset = `<reset>` term.Reset = `<reset>`
fmt.Println(Z.Emph("<UNDER>")) fmt.Println(Z.Emph("<UNDER>"))
fmt.Println(Z.Emph("< UNDER >"))
// Output: // Output:
// <<under>UNDER<reset>> // <<under>UNDER<reset>>
// < UNDER >
} }
func ExampleEmph_boldItalic() { func ExampleEmph_boldItalic() {
@ -140,16 +152,20 @@ func ExampleEmph_bold() {
term.Bold = `<bold>` term.Bold = `<bold>`
term.Reset = `<reset>` term.Reset = `<reset>`
fmt.Println(Z.Emph("**Bold**")) fmt.Println(Z.Emph("**Bold**"))
fmt.Println(Z.Emph("** Bold **"))
// Output: // Output:
// <bold>Bold<reset> // <bold>Bold<reset>
// ** Bold **
} }
func ExampleEmph_italic() { func ExampleEmph_italic() {
term.Italic = `<italic>` term.Italic = `<italic>`
term.Reset = `<reset>` term.Reset = `<reset>`
fmt.Println(Z.Emph("*Italic*")) fmt.Println(Z.Emph("*Italic*"))
fmt.Println(Z.Emph("* Italic *"))
// Output: // Output:
// <italic>Italic<reset> // <italic>Italic<reset>
// * Italic *
} }
func ExampleEmph_basics() { func ExampleEmph_basics() {

Loading…
Cancel
Save