Overhaul the Block parser

pull/97/head
rwxrob 2 years ago
parent a50f023f74
commit 24b75e139a
No known key found for this signature in database
GPG Key ID: 2B9111F33082AE77

@ -4,6 +4,7 @@ import (
"fmt"
"regexp"
"strconv"
"unicode"
"github.com/rwxrob/scan"
"github.com/rwxrob/term"
@ -67,109 +68,113 @@ func (s *Block) String() string { return string(s.V) }
func Blocks(in string) []*Block {
var blocks []*Block
verbpre := regexp.MustCompile(` {4,}`)
s := scan.R{Buf: []byte(to.Dedented(in))}
//s.Trace++
MAIN:
for s.Scan() {
if s.Rune == '*' { // bulleted list
if !s.Peek(" ") {
goto PARA
}
m := s.Pos - 1
// bulleted list
if s.Peek("* ") {
var beg, end int
beg = s.Pos - 1
for s.Scan() {
if s.Peek("\n\n") {
blocks = append(blocks, &Block{Bulleted, s.Buf[m:s.Pos]})
s.Pos += 2
continue MAIN
end = s.Pos - 1
s.Pos++
break
}
}
blocks = append(blocks, &Block{Bulleted, s.Buf[beg:end]})
continue
}
if s.Rune == '1' { // numbered list
if !s.Peek(". ") {
goto PARA
}
m := s.Pos - 1
// numbered list
if s.Peek("1. ") {
var beg, end int
beg = s.Pos - 1
for s.Scan() {
if s.Peek("\n\n") {
blocks = append(blocks, &Block{Numbered, s.Buf[m:s.Pos]})
s.Pos += 2
continue MAIN
end = s.Pos - 1
s.Pos++
break
}
}
blocks = append(blocks, &Block{Numbered, s.Buf[beg:end]})
continue
}
if s.Rune == ' ' { // verbatim
s.Pos -= 1
ln := s.Match(verbpre)
s.Pos++
// verbatim
if ln := s.Match(begVerbatim); ln >= 4 {
s.Pos--
if ln < 0 {
continue
}
pre := s.Buf[s.Pos-1 : s.Pos+ln-1]
s.Pos += len(pre) - 1
var beg, end int
beg = s.Pos
var block []byte
for s.Scan() {
if s.Rune == '\n' {
// add in indented lines
if s.Peek(string(pre)) {
block = append(block, '\n')
s.Pos += len(pre)
continue
}
// end of the block
blocks = append(blocks, &Block{Verbatim, block})
continue MAIN
if s.Peek("\n\n") {
s.Pos++
end = s.Pos - 2
break
}
block = append(block, []byte(string(s.Rune))...)
}
}
if s.Rune == '\n' || s.Rune == '\r' || s.Rune == '\t' {
dedented := to.Dedented(string(s.Buf[beg:end]))
blocks = append(blocks, &Block{Verbatim, []byte(dedented)})
continue
}
PARA:
{
var block []byte
block = append(block, []byte(string(s.Rune))...)
// paragraph (default)
if !unicode.IsSpace(s.Rune) {
buf := []byte(string(s.Rune))
for s.Scan() {
if s.Peek("\n\n") {
block = append(block, []byte(string(s.Rune))...)
blocks = append(blocks, &Block{Paragraph, block})
s.Scan()
s.Scan()
continue MAIN
s.Pos++
break
}
if s.Rune == '\n' || s.Rune == '\r' {
block = append(block, ' ')
if ln := s.Match(ws); ln > 0 {
buf = append(buf, ' ')
s.Pos += ln - 1
continue
}
block = append(block, []byte(string(s.Rune))...)
}
buf = append(buf, []byte(string(s.Rune))...)
if len(block) > 0 {
blocks = append(blocks, &Block{Paragraph, block})
}
} // PARA
if len(buf) > 0 {
blocks = append(blocks, &Block{Paragraph, buf})
}
continue
}
}
return blocks
}
// don't expose these until mark has own package
var begVerbatim = regexp.MustCompile(`^ {4,}`)
var ws = regexp.MustCompile(`^[\s\r\n]+`)
var begUnder = regexp.MustCompile(`^<\p{L}`)
var endUnder = regexp.MustCompile(`^\p{L}>`)
var begBoldItalic = regexp.MustCompile(`^\*{3}\p{L}`)
var endBoldItalic = regexp.MustCompile(`^\p{L}\*{3}`)
var begBold = regexp.MustCompile(`^\*{2}\p{L}`)
var endBold = regexp.MustCompile(`^\p{L}\*{2}`)
var begItalic = regexp.MustCompile(`^\*\p{L}`)
var endItalic = regexp.MustCompile(`^\p{L}\*`)
// Emph renders BonzaiMark emphasis spans specifically for
// VT100-compatible terminals (which almost all are today):
//
@ -187,13 +192,15 @@ func Emph[T string | []byte | []rune](buf T) string {
for s.Scan() {
// <under>
if s.Rune == '<' {
if s.Match(begUnder) > 0 {
nbuf = append(nbuf, '<')
nbuf = append(nbuf, []rune(term.Under)...)
for s.Scan() {
if s.Rune == '>' {
if s.Match(endUnder) > 0 {
nbuf = append(nbuf, s.Rune)
nbuf = append(nbuf, []rune(term.Reset)...)
nbuf = append(nbuf, '>')
s.Pos++
break
}
nbuf = append(nbuf, s.Rune)
@ -202,13 +209,15 @@ func Emph[T string | []byte | []rune](buf T) string {
}
// ***BoldItalic***
if s.Rune == '*' && s.Peek("**") {
s.Pos += 2
if s.Match(begBoldItalic) > 0 {
s.Scan()
s.Scan()
nbuf = append(nbuf, []rune(term.BoldItalic)...)
for s.Scan() {
if s.Rune == '*' && s.Peek("**") {
s.Pos += 2
if s.Match(endBoldItalic) > 0 {
nbuf = append(nbuf, s.Rune)
nbuf = append(nbuf, []rune(term.Reset)...)
s.Pos += 3
break
}
nbuf = append(nbuf, s.Rune)
@ -217,12 +226,13 @@ func Emph[T string | []byte | []rune](buf T) string {
}
// **Bold**
if s.Rune == '*' && s.Peek("*") {
s.Pos++
if s.Match(begBold) > 0 {
s.Pos += 1
nbuf = append(nbuf, []rune(term.Bold)...)
for s.Scan() {
if s.Rune == '*' && s.Peek("*") {
s.Pos++
if s.Match(endBold) > 0 {
nbuf = append(nbuf, s.Rune)
s.Pos += 2
nbuf = append(nbuf, []rune(term.Reset)...)
break
}
@ -232,11 +242,13 @@ func Emph[T string | []byte | []rune](buf T) string {
}
// *Italic*
if s.Rune == '*' {
if s.Match(begItalic) > 0 {
nbuf = append(nbuf, []rune(term.Italic)...)
for s.Scan() {
if s.Rune == '*' {
if s.Match(endItalic) > 0 {
nbuf = append(nbuf, s.Rune)
nbuf = append(nbuf, []rune(term.Reset)...)
s.Pos++
break
}
nbuf = append(nbuf, s.Rune)

@ -4,6 +4,7 @@ import (
"fmt"
Z "github.com/rwxrob/bonzai/z"
// "github.com/rwxrob/scan"
"github.com/rwxrob/term"
)
@ -13,6 +14,7 @@ func init() {
term.BoldItalic = `<bolditalic>`
term.Under = `<under>`
term.Reset = `<reset>`
// scan.Trace++
}
func ExampleLines() {
@ -37,12 +39,13 @@ func ExampleBlocks_bulleted() {
`
blocks := Z.Blocks(in)
fmt.Printf("%v %q\n", blocks[1].T, blocks[1])
fmt.Printf("%v %q\n", blocks[2].T, blocks[2])
fmt.Printf("%q\n", blocks[0])
fmt.Printf("%q\n", blocks[1])
//Output:
// 3 "* another block\n* here"
// 1 "*boldnotbullet*"
// "* some thing\n* another thing"
// "* another block\n* here"
}
func ExampleBlocks_numbered() {
@ -56,10 +59,14 @@ func ExampleBlocks_numbered() {
`
fmt.Printf("%q\n", Z.Blocks(in)[1])
blocks := Z.Blocks(in)
fmt.Printf("%q\n", blocks[0])
fmt.Printf("%q\n", blocks[1])
//Output:
// "1. some thing\n2. another thing"
// "1. another block\n2. here"
}
func ExampleBlocks_paragraph() {
@ -73,17 +80,19 @@ func ExampleBlocks_paragraph() {
here on multiple
lines.
And another one here
And another one here
with just a bit more.
`
fmt.Printf("%q\n", Z.Blocks(in)[0])
fmt.Printf("%q\n", Z.Blocks(in)[1])
blocks := Z.Blocks(in)
fmt.Printf("%q\n", blocks[0])
fmt.Printf("%q\n", blocks[1])
// Output:
// "Simple paragraph here on multiple lines."
// "And another one here with just a bit more."
}
func ExampleBlocks_verbatim() {
@ -107,9 +116,10 @@ func ExampleBlocks_verbatim() {
`
fmt.Printf("%q\n", Z.Blocks(in)[0])
fmt.Printf("%q\n", Z.Blocks(in)[1])
fmt.Printf("%q\n", Z.Blocks(in)[2])
blocks := Z.Blocks(in)
fmt.Printf("%q\n", blocks[0])
fmt.Printf("%q\n", blocks[1])
fmt.Printf("%q\n", blocks[2])
//Output:
// "Must have another block type first."
@ -124,8 +134,10 @@ func ExampleEmph_under() {
term.Under = `<under>`
term.Reset = `<reset>`
fmt.Println(Z.Emph("<UNDER>"))
fmt.Println(Z.Emph("< UNDER >"))
// Output:
// <<under>UNDER<reset>>
// < UNDER >
}
func ExampleEmph_boldItalic() {
@ -140,16 +152,20 @@ func ExampleEmph_bold() {
term.Bold = `<bold>`
term.Reset = `<reset>`
fmt.Println(Z.Emph("**Bold**"))
fmt.Println(Z.Emph("** Bold **"))
// Output:
// <bold>Bold<reset>
// ** Bold **
}
func ExampleEmph_italic() {
term.Italic = `<italic>`
term.Reset = `<reset>`
fmt.Println(Z.Emph("*Italic*"))
fmt.Println(Z.Emph("* Italic *"))
// Output:
// <italic>Italic<reset>
// * Italic *
}
func ExampleEmph_basics() {

Loading…
Cancel
Save