Expose Buf and Cur in scanner for perf

pull/53/head
rwxrob 2 years ago
parent 27a334b282
commit 796adcd061
No known key found for this signature in database
GPG Key ID: 2B9111F33082AE77

@ -9,13 +9,13 @@ import (
"github.com/rwxrob/bonzai/scanner/tk"
)
// Scanner implements a non-linear, rune-centric, buffered data
// scanner. See New for a creating a usable struct that implements
// Scanner.
// Scanner implements a non-linear, rune-centric, buffered data scanner.
// See New for creating a usable struct that implements Scanner. The
// buffer and cursor are directly exposed to facilitate
// higher-performance, direct access when needed.
type Scanner struct {
in io.Reader
buf []byte
cur *Cur
Buf []byte
Cur *Cur
}
// New returns a newly initialized non-linear, rune-centric, buffered
@ -29,27 +29,27 @@ func New(i interface{}) *Scanner {
return nil
}
// Init reads all of passed parsable (io.Reader, string, []byte) into
// memory, parses the first rune, and sets the internals of scanner
// appropriately returning an error if anything happens while attempting
// to read and buffer the data (OOM, etc.).
// Init reads all of passed parsable data (io.Reader, string, []byte)
// into buffered memory, scans the first rune, and sets the internals of
// scanner appropriately returning an error if anything happens while
// attempting to read and buffer the data (OOM, etc.).
func (p *Scanner) Init(i interface{}) error {
if err := p.buffer(i); err != nil {
return err
}
r, ln := utf8.DecodeRune(p.buf) // scan first
r, ln := utf8.DecodeRune(p.Buf) // scan first
if ln == 0 {
r = tk.EOD
return fmt.Errorf("scanner: failed to scan first rune")
}
p.cur = new(Cur)
p.cur.Rune = r
p.cur.Len = ln
p.cur.Next = ln
p.cur.Pos.Line = 1
p.cur.Pos.LineRune = 1
p.cur.Pos.LineByte = 1
p.cur.Pos.Rune = 1
p.Cur = new(Cur)
p.Cur.Rune = r
p.Cur.Len = ln
p.Cur.Next = ln
p.Cur.Pos.Line = 1
p.Cur.Pos.LineRune = 1
p.Cur.Pos.LineByte = 1
p.Cur.Pos.Rune = 1
return nil
}
@ -58,137 +58,135 @@ func (p *Scanner) buffer(i interface{}) error {
var err error
switch in := i.(type) {
case io.Reader:
p.buf, err = io.ReadAll(in)
p.Buf, err = io.ReadAll(in)
if err != nil {
return err
}
case string:
p.buf = []byte(in)
p.Buf = []byte(in)
case []byte:
p.buf = in
p.Buf = in
default:
return fmt.Errorf("scanner: unsupported input type: %t", i)
}
if len(p.buf) == 0 {
if len(p.Buf) == 0 {
return fmt.Errorf("scanner: no input")
}
return err
}
// Next parses the next rune advancing a single rune forward or sets
// current cursor rune to tk.EOD if at end of data. Returns p.Done() if
// attempted after already at end of data.
func (p *Scanner) Next() {
// Scan decodes the next rune and advances the scanner cursor by one.
func (p *Scanner) Scan() {
if p.Done() {
return
}
r, ln := utf8.DecodeRune(p.buf[p.cur.Next:])
r, ln := utf8.DecodeRune(p.Buf[p.Cur.Next:])
if ln != 0 {
p.cur.Byte = p.cur.Next
p.cur.Pos.LineByte += p.cur.Len
p.Cur.Byte = p.Cur.Next
p.Cur.Pos.LineByte += p.Cur.Len
} else {
r = tk.EOD
}
p.cur.Rune = r
p.cur.Pos.Rune += 1
p.cur.Next += ln
p.cur.Pos.LineRune += 1
p.cur.Len = ln
p.Cur.Rune = r
p.Cur.Pos.Rune += 1
p.Cur.Next += ln
p.Cur.Pos.LineRune += 1
p.Cur.Len = ln
}
func (p *Scanner) Move(n int) {
// ScanN scans the next n runes advancing n runes forward or returns
// p.Done() if attempted after already at end of data.
func (p *Scanner) ScanN(n int) {
for i := 0; i < n; i++ {
p.Next()
p.Scan()
}
}
// Done returns true if current cursor rune is tk.EOD and the cursor length
// is also zero.
func (p *Scanner) Done() bool {
return p.cur.Rune == tk.EOD && p.cur.Len == 0
return p.Cur.Rune == tk.EOD && p.Cur.Len == 0
}
// String delegates to internal cursor String.
func (p *Scanner) String() string { return p.cur.String() }
func (p *Scanner) String() string { return p.Cur.String() }
// Print delegates to internal cursor Print.
func (p *Scanner) Print() { p.cur.Print() }
// Cur returns exact cursor used by Scanner. See CopyCur and Cur struct.
func (p *Scanner) Cur() *Cur { return p.cur }
func (p *Scanner) Print() { p.Cur.Print() }
// CopyCur returns a copy of the current scanner cursor. See Cur.
func (p *Scanner) CopyCur() *Cur {
if p.cur == nil {
if p.Cur == nil {
return nil
}
// force a copy
cp := *p.cur
cp := *p.Cur
return &cp
}
// Jump replaces the internal cursor with a copy of the one passed
// effectively repositioning the scanner's current position in the
// buffered data.
func (p *Scanner) Jump(c *Cur) { nc := *c; p.cur = &nc }
func (p *Scanner) Jump(c *Cur) { nc := *c; p.Cur = &nc }
// Look returns a string containing all the bytes from the current
// scanner cursor position up to the passed cursor position, forward or
// backward. Neither the internal nor the passed cursor position is
// changed.
func (p *Scanner) Look(to *Cur) string {
if to.Byte < p.cur.Byte {
return string(p.buf[to.Byte:p.cur.Next])
if to.Byte < p.Cur.Byte {
return string(p.Buf[to.Byte:p.Cur.Next])
}
return string(p.buf[p.cur.Byte:to.Next])
return string(p.Buf[p.Cur.Byte:to.Next])
}
// LookSlice returns a string containing all the bytes from the first
// cursor up to the second cursor. Neither cursor position is changed.
func (p *Scanner) LookSlice(beg *Cur, end *Cur) string {
return string(p.buf[beg.Byte:end.Next])
return string(p.Buf[beg.Byte:end.Next])
}
// Expect takes a variable list of parsable types including rune,
// string, Class, Check, Opt, Not, Seq, One, Min, MinMax, Count. This
// allows grammars to be represented simply and parsed easily without
// exceptional overhead from additional function calls and indirection.
func (p *Scanner) Expect(ms ...interface{}) (*Cur, error) {
func (p *Scanner) Expect(scannable ...interface{}) (*Cur, error) {
var beg, end *Cur
beg = p.Cur()
for _, m := range ms {
beg = p.Cur
for _, m := range scannable {
switch v := m.(type) {
case rune:
if p.cur.Rune != v {
if p.Cur.Rune != v {
err := p.ErrorExpected(m)
p.Jump(beg)
return nil, err
}
end = p.CopyCur()
p.Next()
p.Scan()
case string:
if v == "" {
return nil, fmt.Errorf("expect: cannot parse empty string")
}
for _, r := range []rune(v) {
if p.cur.Rune != r {
if p.Cur.Rune != r {
err := p.ErrorExpected(r)
p.Jump(beg)
return nil, err
}
end = p.CopyCur()
p.Next()
p.Scan()
}
/*
case Class:
if !v.Check(p.cur.Rune) {
if !v.Check(p.Cur.Rune) {
err := p.ErrorExpected(v)
p.Jump(beg)
return nil, err
}
end = p.CopyCur()
p.Next()
p.Scan()
case Check:
rv, err := v.Check(p)
@ -198,7 +196,7 @@ func (p *Scanner) Expect(ms ...interface{}) (*Cur, error) {
}
end = rv
p.Jump(rv)
p.Next()
p.Scan()
case is.Opt:
m, err := p.Expect(is.MinMax{v.This, 0, 1})
@ -305,10 +303,10 @@ func (p *Scanner) ErrorExpected(this interface{}) error {
but := fmt.Sprintf(` but got %v`, p)
if p.Done() {
runes := `runes`
if p.cur.Pos.Rune == 1 {
if p.Cur.Pos.Rune == 1 {
runes = `rune`
}
but = fmt.Sprintf(` but exceeded data length (%v %v)`, p.cur.Pos.Rune, runes)
but = fmt.Sprintf(` but exceeded data length (%v %v)`, p.Cur.Pos.Rune, runes)
}
// TODO add verbose errors for *all* types in Grammar
switch v := this.(type) {
@ -327,7 +325,7 @@ func (p *Scanner) ErrorExpected(this interface{}) error {
}
// NewLine delegates to interval Curs.NewLine.
func (p *Scanner) NewLine() { p.cur.NewLine() }
func (p *Scanner) NewLine() { p.Cur.NewLine() }
func (p *Scanner) Check(ms ...interface{}) (*Cur, error) {
defer p.Jump(p.CopyCur())

Loading…
Cancel
Save