mirror of
https://github.com/rwxrob/bonzai
synced 2024-11-14 18:12:59 +00:00
Add scan.Hook support
This commit is contained in:
parent
bed5d82c50
commit
b86c1e15fb
6
scan/is/compound.go
Normal file
6
scan/is/compound.go
Normal file
@ -0,0 +1,6 @@
|
||||
package is
|
||||
|
||||
// keep only compound expressions here
|
||||
|
||||
var WS = Any{' ', '\n', '\t', '\r'}
|
||||
var Digit = Rng{0, 9}
|
@ -33,7 +33,16 @@ Composites
|
||||
|
||||
Composites are compound expressions composed of others. They represent
|
||||
the tokens and classes from PEGN and other grammars and are designed to
|
||||
simplify grammar development at a higher level. Pull requests are welcome for missing, commonly used composite candidates.
|
||||
simplify grammar development at a higher level. Pull requests are
|
||||
welcome for missing, commonly used composite candidates.
|
||||
|
||||
Hooks
|
||||
|
||||
Hooks are not strictly an expression type and are declared in the scan
|
||||
package itself (to avoid a cyclical import dependency since it is passed
|
||||
a scan.R). A Hook is passed only the scanner struct and must return a bool
|
||||
indicating if the scan should proceed. See scan.Hook for more
|
||||
information.
|
||||
*/
|
||||
package is
|
||||
|
||||
@ -68,7 +77,7 @@ type Opt []any
|
||||
|
||||
// --------------------------- parameterized --------------------------
|
||||
|
||||
// MMx parameterized advancing expression scans for the inclusive
|
||||
// MMx is a parameterized advancing expression that matches an inclusive
|
||||
// minimum and maximum count of the given expression (This). Use within
|
||||
// is.Lk to disable advancement.
|
||||
type MMx struct {
|
||||
@ -77,34 +86,39 @@ type MMx struct {
|
||||
This any
|
||||
}
|
||||
|
||||
// Min parameterized advancing expression scans for the inclusive minimum
|
||||
// number of the given expression item (This). Use within is.Lk to
|
||||
// disable advancement.
|
||||
// Min is a parameterized advancing expression that matches an inclusive
|
||||
// minimum number of the given expression item (This). Use within is.Lk
|
||||
// to disable advancement.
|
||||
type Min struct {
|
||||
Min int
|
||||
This any
|
||||
}
|
||||
|
||||
// Mn1 parameterized advancing expression is shorthand for is.Min{1,This}.
|
||||
// Mn1 is shorthand for is.Min{1,This}.
|
||||
type Mn1 struct{ This any }
|
||||
|
||||
// N parameterized advancing expression scans for exactly N number of
|
||||
// the given expression (This). Use within is.Lk to disable advancement.
|
||||
// N is a parameterized advancing expression that matches exactly
|
||||
// N number of the given expression (This). Use within is.Lk to disable
|
||||
// advancement.
|
||||
type N struct {
|
||||
N int
|
||||
This any
|
||||
}
|
||||
|
||||
// Rng parameterized advancing expression scans for a single Unicode
|
||||
// code point (rune, uint32) from an inclusive consecutive set from
|
||||
// First to Last (First,Last). Use within is.Lk to disable advancement.
|
||||
// Rng is a parameterized advancing expression that matches a single
|
||||
// Unicode code point (rune, uint32) from an inclusive consecutive set
|
||||
// from First to Last (First,Last). Use within is.Lk to disable
|
||||
// advancement.
|
||||
type Rng struct {
|
||||
First rune
|
||||
Last rune
|
||||
}
|
||||
|
||||
// ---------------------------- composites ----------------------------
|
||||
// (keep most common to the left)
|
||||
|
||||
var WS = In{' ', '\n', '\t', '\r'}
|
||||
var Digit = Rng{0, 9}
|
||||
// Esc is a parameterized advancing expression that matches everything
|
||||
// in the given expression (This) except for an expression (Not) that
|
||||
// requires being immediately preceded by the escape expression (Esc).
|
||||
type Esc struct {
|
||||
Not any
|
||||
Esc any
|
||||
This any
|
||||
}
|
||||
|
112
scan/scan.go
112
scan/scan.go
@ -23,13 +23,30 @@ import (
|
||||
|
||||
"github.com/rwxrob/bonzai/scan/is"
|
||||
"github.com/rwxrob/bonzai/scan/tk"
|
||||
"github.com/rwxrob/bonzai/util"
|
||||
)
|
||||
|
||||
// Scanner implements a non-linear, rune-centric, buffered data scanner.
|
||||
// See New for creating a usable struct that implements Scanner. The
|
||||
// buffer and cursor are directly exposed to facilitate
|
||||
// higher-performance, direct access when needed.
|
||||
type Scanner struct {
|
||||
// Hook is a function expression that accepts a reference to the current
|
||||
// scanner and simply returns true or false. Hook functions are allowed
|
||||
// to do whatever they need and must advance the scan.R themselves (if
|
||||
// necessary) and should not be abused and are given the lowest priority
|
||||
// when searching for expressions. Static scanning expressions will
|
||||
// usually be faster than any Hook. Hook allows PEGN (and others) to
|
||||
// indicate Hook names for executable code that must be run during the
|
||||
// scanning of a specific grammar (indicated as "rhetorical" in some
|
||||
// grammars). In fact, scan.Rs can be converted into parsers relatively
|
||||
// easily simply by implementing a set of Hook functions to capture or
|
||||
// render scanned data at specific points during the scan process. Since
|
||||
// only the name of the Hook function is required BPEGN remains
|
||||
// compatible with PEGN one-for-one transpiling.
|
||||
type Hook func(s *R) bool
|
||||
|
||||
// R (as in scan.R or "scanner") implements a non-linear, rune-centric,
|
||||
// buffered data scanner and provides full support for BPEGN. See New
|
||||
// for creating a usable struct that implements scan.R. The buffer and
|
||||
// cursor are directly exposed to facilitate higher-performance, direct
|
||||
// access when needed.
|
||||
type R struct {
|
||||
|
||||
// Buf is the data buffer providing infinite look-ahead and behind.
|
||||
Buf []byte
|
||||
@ -40,21 +57,14 @@ type Scanner struct {
|
||||
|
||||
// Snapped contains the latest Cur when Snap was called.
|
||||
Snapped *Cur
|
||||
|
||||
// ExtendExpect provides a hook to support additional custom
|
||||
// scannables for both Expect and Check Scanner methods. Take note of
|
||||
// the ErrorExpected errors in order to construct similar errors where
|
||||
// returning ErrorExpected itself would not provide clear error
|
||||
// messages.
|
||||
ExtendExpect func(s *Scanner, scannable ...any) (*Cur, error)
|
||||
}
|
||||
|
||||
// New returns a newly initialized non-linear, rune-centric, buffered
|
||||
// data scanner with support for parsing data from io.Reader, string,
|
||||
// and []byte types. Returns nil and the error if any encountered during
|
||||
// initialization. Also see the Init method.
|
||||
func New(i any) (*Scanner, error) {
|
||||
s := new(Scanner)
|
||||
func New(i any) (*R, error) {
|
||||
s := new(R)
|
||||
if err := s.Init(i); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -65,7 +75,7 @@ func New(i any) (*Scanner, error) {
|
||||
// into buffered memory, scans the first rune, and sets the internals of
|
||||
// scanner appropriately returning an error if anything happens while
|
||||
// attempting to read and buffer the data (OOM, etc.).
|
||||
func (s *Scanner) Init(i any) error {
|
||||
func (s *R) Init(i any) error {
|
||||
s.Cur = new(Cur)
|
||||
s.Cur.Pos = Pos{}
|
||||
s.Cur.Pos.Line = 1
|
||||
@ -91,7 +101,7 @@ func (s *Scanner) Init(i any) error {
|
||||
}
|
||||
|
||||
// reads and buffers io.Reader, string, or []byte types
|
||||
func (s *Scanner) buffer(i any) error {
|
||||
func (s *R) buffer(i any) error {
|
||||
var err error
|
||||
switch v := i.(type) {
|
||||
case io.Reader:
|
||||
@ -117,7 +127,7 @@ func (s *Scanner) buffer(i any) error {
|
||||
// The method of scanning isn't as optimized as other scanner (for
|
||||
// example, the scanner from the bonzai/json package), but it is
|
||||
// sufficient for most high level needs.
|
||||
func (s *Scanner) Scan() {
|
||||
func (s *R) Scan() {
|
||||
|
||||
if s.Cur.Next == s.BufLen {
|
||||
s.Cur.Rune = tk.EOD
|
||||
@ -144,24 +154,24 @@ func (s *Scanner) Scan() {
|
||||
|
||||
// ScanN scans the next n runes advancing n runes forward or returns
|
||||
// EOD if attempted after already at end of data.
|
||||
func (s *Scanner) ScanN(n int) {
|
||||
func (s *R) ScanN(n int) {
|
||||
for i := 0; i < n; i++ {
|
||||
s.Scan()
|
||||
}
|
||||
}
|
||||
|
||||
// String delegates to internal cursor String.
|
||||
func (s *Scanner) String() string { return s.Cur.String() }
|
||||
func (s *R) String() string { return s.Cur.String() }
|
||||
|
||||
// Print delegates to internal cursor Print.
|
||||
func (s *Scanner) Print() { s.Cur.Print() }
|
||||
func (s *R) Print() { s.Cur.Print() }
|
||||
|
||||
// Log delegates to internal cursor Log.
|
||||
func (s *Scanner) Log() { s.Cur.Log() }
|
||||
func (s *R) Log() { s.Cur.Log() }
|
||||
|
||||
// Mark returns a copy of the current scanner cursor to preserve like
|
||||
// a bookmark into the buffer data. See Cur, Look, LookSlice.
|
||||
func (s *Scanner) Mark() *Cur {
|
||||
func (s *R) Mark() *Cur {
|
||||
if s.Cur == nil {
|
||||
return nil
|
||||
}
|
||||
@ -171,23 +181,23 @@ func (s *Scanner) Mark() *Cur {
|
||||
}
|
||||
|
||||
// Snap sets an extra internal cursor to the current cursor. See Mark.
|
||||
func (s *Scanner) Snap() { s.Snapped = s.Mark() }
|
||||
func (s *R) Snap() { s.Snapped = s.Mark() }
|
||||
|
||||
// Back jumps the current cursor to the last Snap (Snapped).
|
||||
func (s *Scanner) Back() { s.Jump(s.Snapped) }
|
||||
func (s *R) Back() { s.Jump(s.Snapped) }
|
||||
|
||||
// Jump replaces the internal cursor with a copy of the one passed
|
||||
// effectively repositioning the scanner's current position in the
|
||||
// buffered data. Beware, however, that the new cursor must originate
|
||||
// from the same (or identical) data buffer or the values will be out of
|
||||
// sync.
|
||||
func (s *Scanner) Jump(c *Cur) { nc := *c; s.Cur = &nc }
|
||||
func (s *R) Jump(c *Cur) { nc := *c; s.Cur = &nc }
|
||||
|
||||
// Peek returns a string containing all the runes from the current
|
||||
// scanner cursor position forward to the number of runes passed.
|
||||
// If end of data is encountered it will return everything up until that
|
||||
// point. Also see Look and LookSlice.
|
||||
func (s *Scanner) Peek(n uint) string {
|
||||
func (s *R) Peek(n uint) string {
|
||||
buf := ""
|
||||
pos := s.Cur.Byte
|
||||
for c := uint(0); c < n; c++ {
|
||||
@ -205,7 +215,7 @@ func (s *Scanner) Peek(n uint) string {
|
||||
// scanner cursor position ahead or behind to the passed cursor
|
||||
// position. Neither the internal nor the passed cursor position is
|
||||
// changed. Also see Peek and LookSlice.
|
||||
func (s *Scanner) Look(to *Cur) string {
|
||||
func (s *R) Look(to *Cur) string {
|
||||
if to.Byte < s.Cur.Byte {
|
||||
return string(s.Buf[to.Byte:s.Cur.Next])
|
||||
}
|
||||
@ -214,7 +224,7 @@ func (s *Scanner) Look(to *Cur) string {
|
||||
|
||||
// LookSlice returns a string containing all the bytes from the first
|
||||
// cursor up to the second cursor. Neither cursor position is changed.
|
||||
func (s *Scanner) LookSlice(beg *Cur, end *Cur) string {
|
||||
func (s *R) LookSlice(beg *Cur, end *Cur) string {
|
||||
return string(s.Buf[beg.Byte:end.Next])
|
||||
}
|
||||
|
||||
@ -224,7 +234,7 @@ func (s *Scanner) LookSlice(beg *Cur, end *Cur) string {
|
||||
// string - "foo" simple string
|
||||
// rune - 'f' uint32, but "rune" in errors
|
||||
// is.Not{any...} - negative look-ahead set (slice)
|
||||
// is.In{any...} - one positive look-ahead from set (slice)
|
||||
// is.Any{any...} - one positive look-ahead from set (slice)
|
||||
// is.Seq{any...} - required positive look-ahead sequence (slice)
|
||||
// is.Opt{any...} - optional positive look-ahead set (slice)
|
||||
// is.Min{n,any} - minimum positive look-aheads
|
||||
@ -237,7 +247,7 @@ func (s *Scanner) LookSlice(beg *Cur, end *Cur) string {
|
||||
// allows for very readable functional grammar parsers to be created
|
||||
// quickly without exceptional overhead from additional function calls
|
||||
// and indirection. As some have said, "it's regex without the regex."
|
||||
func (s *Scanner) Expect(scannables ...any) (*Cur, error) {
|
||||
func (s *R) Expect(scannables ...any) (*Cur, error) {
|
||||
var beg, end *Cur
|
||||
beg = s.Cur
|
||||
|
||||
@ -272,7 +282,7 @@ func (s *Scanner) Expect(scannables ...any) (*Cur, error) {
|
||||
case is.Lk: // ----------------------------------------------------
|
||||
var m *Cur
|
||||
for _, i := range v {
|
||||
m, _ = s.Check(i)
|
||||
m, _ = s.check(i)
|
||||
if m != nil {
|
||||
break
|
||||
}
|
||||
@ -284,7 +294,7 @@ func (s *Scanner) Expect(scannables ...any) (*Cur, error) {
|
||||
|
||||
case is.Not: // ----------------------------------------------------
|
||||
for _, i := range v {
|
||||
if _, e := s.Check(i); e == nil {
|
||||
if _, e := s.check(i); e == nil {
|
||||
err := s.ErrorExpected(v, i)
|
||||
s.Jump(beg)
|
||||
return nil, err
|
||||
@ -292,7 +302,7 @@ func (s *Scanner) Expect(scannables ...any) (*Cur, error) {
|
||||
}
|
||||
end = s.Mark()
|
||||
|
||||
case is.In: // -----------------------------------------------------
|
||||
case is.Any: // -----------------------------------------------------
|
||||
var m *Cur
|
||||
for _, i := range v {
|
||||
var err error
|
||||
@ -397,11 +407,29 @@ func (s *Scanner) Expect(scannables ...any) (*Cur, error) {
|
||||
end = s.Mark()
|
||||
s.Scan()
|
||||
|
||||
default: // --------------------------------------------------------
|
||||
if s.ExtendExpect != nil {
|
||||
return s.ExtendExpect(s, scannables...)
|
||||
case is.Esc: // ----------------------------------------------------
|
||||
// TODO
|
||||
|
||||
case Hook: // ------------------------------------------------------
|
||||
if !v(s) {
|
||||
return nil, fmt.Errorf(
|
||||
"expect: hook function failed (%v)",
|
||||
util.FuncName(v),
|
||||
)
|
||||
}
|
||||
return nil, fmt.Errorf("expect: unscannable type (%T)", m)
|
||||
end = s.Mark()
|
||||
|
||||
case func(r *R) bool:
|
||||
if !v(s) {
|
||||
return nil, fmt.Errorf(
|
||||
"expect: hook function failed (%v)",
|
||||
util.FuncName(v),
|
||||
)
|
||||
}
|
||||
end = s.Mark()
|
||||
|
||||
default: // --------------------------------------------------------
|
||||
return nil, fmt.Errorf("expect: unscannable expression (%T)", m)
|
||||
}
|
||||
}
|
||||
return end, nil
|
||||
@ -410,7 +438,7 @@ func (s *Scanner) Expect(scannables ...any) (*Cur, error) {
|
||||
// ErrorExpected returns a verbose, one-line error describing what was
|
||||
// expected when it encountered whatever the scanner last scanned. All
|
||||
// scannable types are supported. See Expect.
|
||||
func (s *Scanner) ErrorExpected(this any, args ...any) error {
|
||||
func (s *R) ErrorExpected(this any, args ...any) error {
|
||||
var msg string
|
||||
but := fmt.Sprintf(` at %v`, s)
|
||||
if s.Cur != nil && s.Cur.Rune == tk.EOD && s.Cur.Len == 0 {
|
||||
@ -428,7 +456,7 @@ func (s *Scanner) ErrorExpected(this any, args ...any) error {
|
||||
msg = fmt.Sprintf(`expected %q`, v)
|
||||
case is.Not:
|
||||
msg = fmt.Sprintf(`unexpected %q`, args[0])
|
||||
case is.In:
|
||||
case is.Any:
|
||||
str := `expected one of %q`
|
||||
msg = fmt.Sprintf(str, v)
|
||||
case is.Seq:
|
||||
@ -459,11 +487,11 @@ func (s *Scanner) ErrorExpected(this any, args ...any) error {
|
||||
}
|
||||
|
||||
// NewLine delegates to interval Curs.NewLine.
|
||||
func (s *Scanner) NewLine() { s.Cur.NewLine() }
|
||||
func (s *R) NewLine() { s.Cur.NewLine() }
|
||||
|
||||
// Check behaves exactly like Expect but jumps back to the original
|
||||
// check behaves exactly like Expect but jumps back to the original
|
||||
// cursor position after scanning for expected scannable values.
|
||||
func (s *Scanner) Check(scannables ...any) (*Cur, error) {
|
||||
func (s *R) check(scannables ...any) (*Cur, error) {
|
||||
defer s.Jump(s.Mark())
|
||||
return s.Expect(scannables...)
|
||||
}
|
||||
|
@ -186,16 +186,6 @@ func ExampleExpect_basic() {
|
||||
// <EOD>
|
||||
}
|
||||
|
||||
func ExampleCheck() {
|
||||
s, _ := scan.New("some thing")
|
||||
c, _ := s.Check("some", ' ', "thin") // same as Expect ...
|
||||
c.Print() // ... with cur return ...
|
||||
s.Print() // ... just doesn't advance
|
||||
// Output:
|
||||
// U+006E 'n' 1,9-9 (9-9)
|
||||
// U+0073 's' 1,1-1 (1-1)
|
||||
}
|
||||
|
||||
func ExampleExpect_lk() {
|
||||
s, _ := scan.New("some thing")
|
||||
_, e := s.Expect(is.Lk{"foo"})
|
||||
@ -206,7 +196,7 @@ func ExampleExpect_lk() {
|
||||
s.Print()
|
||||
c, _ = s.Expect(is.Lk{is.Rng{'l', 'p'}})
|
||||
s.Print() // not advanced
|
||||
c, _ = s.Expect(is.In{is.Rng{'l', 'p'}})
|
||||
c, _ = s.Expect(is.Any{is.Rng{'l', 'p'}})
|
||||
s.Print() // advanced
|
||||
// Output:
|
||||
// expected ["foo"] at U+0073 's' 1,1-1 (1-1)
|
||||
@ -231,13 +221,13 @@ func ExampleExpect_not() {
|
||||
// unexpected "some" at U+0073 's' 1,1-1 (1-1)
|
||||
}
|
||||
|
||||
func ExampleExpect_in() {
|
||||
func ExampleExpect_any() {
|
||||
s, _ := scan.New("some thing")
|
||||
s.Scan()
|
||||
c, _ := s.Expect(is.In{'O', 'o', "ome"})
|
||||
c, _ := s.Expect(is.Any{'O', 'o', "ome"})
|
||||
c.Print()
|
||||
s.Print()
|
||||
_, err := s.Expect(is.In{'x', 'y', "zee"})
|
||||
_, err := s.Expect(is.Any{'x', 'y', "zee"})
|
||||
fmt.Println(err)
|
||||
// Output:
|
||||
// U+006F 'o' 1,2-2 (2-2)
|
||||
@ -309,7 +299,7 @@ func ExampleExpect_min() {
|
||||
// U+006F 'o' 1,2-2 (2-2)
|
||||
}
|
||||
|
||||
func ExampleExpect_min_Max() {
|
||||
func ExampleExpect_mMx() {
|
||||
s, _ := scan.New("sommme thing")
|
||||
s.Snap()
|
||||
s.ScanN(2)
|
||||
@ -328,7 +318,7 @@ func ExampleExpect_min_Max() {
|
||||
// expected min 1, max 3 of 'X' at U+006F 'o' 1,2-2 (2-2)
|
||||
}
|
||||
|
||||
func ExampleExpect_count() {
|
||||
func ExampleExpect_n() {
|
||||
s, _ := scan.New("sommme thing")
|
||||
s.Snap()
|
||||
s.ScanN(2)
|
||||
@ -347,7 +337,7 @@ func ExampleExpect_count() {
|
||||
// expected exactly 3 of 'X' at U+006F 'o' 1,2-2 (2-2)
|
||||
}
|
||||
|
||||
func ExampleExpect_in_Range() {
|
||||
func ExampleExpect_rng() {
|
||||
s, _ := scan.New("some thing")
|
||||
s.Scan()
|
||||
c1, _ := s.Expect(is.Rng{'l', 'p'})
|
||||
@ -358,19 +348,43 @@ func ExampleExpect_in_Range() {
|
||||
// U+006D 'm' 1,3-3 (3-3)
|
||||
}
|
||||
|
||||
func ExampleExtendExpect() {
|
||||
func FailHook(s *scan.R) bool { return false }
|
||||
|
||||
func ExampleExpect_hook() {
|
||||
|
||||
// plain function signature
|
||||
WouldSave := scan.Hook(func(s *scan.R) bool {
|
||||
fmt.Println("would save")
|
||||
return true
|
||||
})
|
||||
|
||||
// as scan.Hook
|
||||
WouldScan := scan.Hook(func(s *scan.R) bool {
|
||||
s.Scan()
|
||||
return true
|
||||
})
|
||||
|
||||
// FailHook defined outside of Example function (see source)
|
||||
|
||||
s, _ := scan.New("some thing")
|
||||
s.ExtendExpect = func(s *scan.Scanner, a ...any) (*scan.Cur, error) {
|
||||
return s.Cur, fmt.Errorf("custom error for type %T handled at %v",
|
||||
a[0], s.Cur,
|
||||
)
|
||||
}
|
||||
_, e := s.Expect([]byte{'0'})
|
||||
s.Scan()
|
||||
s.Expect(WouldSave)
|
||||
s.Print() // hook didn't advance
|
||||
s.Expect(WouldScan)
|
||||
s.Print() // hook advanced scan by one
|
||||
_, e := s.Expect(FailHook)
|
||||
fmt.Println(e)
|
||||
|
||||
// Output:
|
||||
// custom error for type []uint8 handled at U+0073 's' 1,1-1 (1-1)
|
||||
// would save
|
||||
// U+006F 'o' 1,2-2 (2-2)
|
||||
// U+006D 'm' 1,3-3 (3-3)
|
||||
// expect: hook function failed (FailHook)
|
||||
|
||||
}
|
||||
|
||||
// TODO Esc
|
||||
|
||||
func ExampleSnap() {
|
||||
s, _ := scan.New("some thing")
|
||||
s.ScanN(3)
|
||||
|
Loading…
Reference in New Issue
Block a user