Expose Buf and Cur in scanner for perf

2 years ago · 796adcd061
parent 27a334b282
commit 796adcd061
1 changed files with 61 additions and 63 deletions
--- a/scanner/scanner.go
+++ b/scanner/scanner.go
@ -9,13 +9,13 @@ import (
 	"github.com/rwxrob/bonzai/scanner/tk"
 )

-// Scanner implements a non-linear, rune-centric, buffered data
-// scanner. See New for a creating a usable struct that implements
-// Scanner.
+// Scanner implements a non-linear, rune-centric, buffered data scanner.
+// See New for creating a usable struct that implements Scanner. The
+// buffer and cursor are directly exposed to facilitate
+// higher-performance, direct access when needed.
 type Scanner struct {
-	in  io.Reader
-	buf []byte
-	cur *Cur
+	Buf []byte
+	Cur *Cur
 }

 // New returns a newly initialized non-linear, rune-centric, buffered
@ -29,27 +29,27 @@ func New(i interface{}) *Scanner {
 	return nil
 }

-// Init reads all of passed parsable (io.Reader, string, []byte) into
-// memory, parses the first rune, and sets the internals of scanner
-// appropriately returning an error if anything happens while attempting
-// to read and buffer the data (OOM, etc.).
+// Init reads all of passed parsable data (io.Reader, string, []byte)
+// into buffered memory, scans the first rune, and sets the internals of
+// scanner appropriately returning an error if anything happens while
+// attempting to read and buffer the data (OOM, etc.).
 func (p *Scanner) Init(i interface{}) error {
 	if err := p.buffer(i); err != nil {
 		return err
 	}
-	r, ln := utf8.DecodeRune(p.buf) // scan first
+	r, ln := utf8.DecodeRune(p.Buf) // scan first
 	if ln == 0 {
 		r = tk.EOD
 		return fmt.Errorf("scanner: failed to scan first rune")
 	}
-	p.cur = new(Cur)
-	p.cur.Rune = r
-	p.cur.Len = ln
-	p.cur.Next = ln
-	p.cur.Pos.Line = 1
-	p.cur.Pos.LineRune = 1
-	p.cur.Pos.LineByte = 1
-	p.cur.Pos.Rune = 1
+	p.Cur = new(Cur)
+	p.Cur.Rune = r
+	p.Cur.Len = ln
+	p.Cur.Next = ln
+	p.Cur.Pos.Line = 1
+	p.Cur.Pos.LineRune = 1
+	p.Cur.Pos.LineByte = 1
+	p.Cur.Pos.Rune = 1
 	return nil
 }

@ -58,137 +58,135 @@ func (p *Scanner) buffer(i interface{}) error {
 	var err error
 	switch in := i.(type) {
 	case io.Reader:
-		p.buf, err = io.ReadAll(in)
+		p.Buf, err = io.ReadAll(in)
 		if err != nil {
 			return err
 		}
 	case string:
-		p.buf = []byte(in)
+		p.Buf = []byte(in)
 	case []byte:
-		p.buf = in
+		p.Buf = in
 	default:
 		return fmt.Errorf("scanner: unsupported input type: %t", i)
 	}
-	if len(p.buf) == 0 {
+	if len(p.Buf) == 0 {
 		return fmt.Errorf("scanner: no input")
 	}
 	return err
 }

-// Next parses the next rune advancing a single rune forward or sets
-// current cursor rune to tk.EOD if at end of data. Returns p.Done() if
-// attempted after already at end of data.
-func (p *Scanner) Next() {
+// Scan decodes the next rune and advances the scanner cursor by one.
+func (p *Scanner) Scan() {
 	if p.Done() {
 		return
 	}
-	r, ln := utf8.DecodeRune(p.buf[p.cur.Next:])
+	r, ln := utf8.DecodeRune(p.Buf[p.Cur.Next:])
 	if ln != 0 {
-		p.cur.Byte = p.cur.Next
-		p.cur.Pos.LineByte += p.cur.Len
+		p.Cur.Byte = p.Cur.Next
+		p.Cur.Pos.LineByte += p.Cur.Len
 	} else {
 		r = tk.EOD
 	}
-	p.cur.Rune = r
-	p.cur.Pos.Rune += 1
-	p.cur.Next += ln
-	p.cur.Pos.LineRune += 1
-	p.cur.Len = ln
+	p.Cur.Rune = r
+	p.Cur.Pos.Rune += 1
+	p.Cur.Next += ln
+	p.Cur.Pos.LineRune += 1
+	p.Cur.Len = ln
 }
-func (p *Scanner) Move(n int) {
+
+// ScanN scans the next n runes advancing n runes forward or returns
+// p.Done() if attempted after already at end of data.
+func (p *Scanner) ScanN(n int) {
 	for i := 0; i < n; i++ {
-		p.Next()
+		p.Scan()
 	}
 }

 // Done returns true if current cursor rune is tk.EOD and the cursor length
 // is also zero.
 func (p *Scanner) Done() bool {
-	return p.cur.Rune == tk.EOD && p.cur.Len == 0
+	return p.Cur.Rune == tk.EOD && p.Cur.Len == 0
 }

 // String delegates to internal cursor String.
-func (p *Scanner) String() string { return p.cur.String() }
+func (p *Scanner) String() string { return p.Cur.String() }

 // Print delegates to internal cursor Print.
-func (p *Scanner) Print() { p.cur.Print() }
-
-// Cur returns exact cursor used by Scanner. See CopyCur and Cur struct.
-func (p *Scanner) Cur() *Cur { return p.cur }
+func (p *Scanner) Print() { p.Cur.Print() }

 // CopyCur returns a copy of the current scanner cursor. See Cur.
 func (p *Scanner) CopyCur() *Cur {
-	if p.cur == nil {
+	if p.Cur == nil {
 		return nil
 	}
 	// force a copy
-	cp := *p.cur
+	cp := *p.Cur
 	return &cp
 }

 // Jump replaces the internal cursor with a copy of the one passed
 // effectively repositioning the scanner's current position in the
 // buffered data.
-func (p *Scanner) Jump(c *Cur) { nc := *c; p.cur = &nc }
+func (p *Scanner) Jump(c *Cur) { nc := *c; p.Cur = &nc }

 // Look returns a string containing all the bytes from the current
 // scanner cursor position up to the passed cursor position, forward or
 // backward. Neither the internal nor the passed  cursor position is
 // changed.
 func (p *Scanner) Look(to *Cur) string {
-	if to.Byte < p.cur.Byte {
-		return string(p.buf[to.Byte:p.cur.Next])
+	if to.Byte < p.Cur.Byte {
+		return string(p.Buf[to.Byte:p.Cur.Next])
 	}
-	return string(p.buf[p.cur.Byte:to.Next])
+	return string(p.Buf[p.Cur.Byte:to.Next])
 }

 // LookSlice returns a string containing all the bytes from the first
 // cursor up to the second cursor. Neither cursor position is changed.
 func (p *Scanner) LookSlice(beg *Cur, end *Cur) string {
-	return string(p.buf[beg.Byte:end.Next])
+	return string(p.Buf[beg.Byte:end.Next])
 }

 // Expect takes a variable list of parsable types including rune,
 // string, Class, Check, Opt, Not, Seq, One, Min, MinMax, Count. This
 // allows grammars to be represented simply and parsed easily without
 // exceptional overhead from additional function calls and indirection.
-func (p *Scanner) Expect(ms ...interface{}) (*Cur, error) {
+func (p *Scanner) Expect(scannable ...interface{}) (*Cur, error) {
 	var beg, end *Cur
-	beg = p.Cur()
-	for _, m := range ms {
+	beg = p.Cur
+	for _, m := range scannable {
 		switch v := m.(type) {

 		case rune:
-			if p.cur.Rune != v {
+			if p.Cur.Rune != v {
 				err := p.ErrorExpected(m)
 				p.Jump(beg)
 				return nil, err
 			}
 			end = p.CopyCur()
-			p.Next()
+			p.Scan()

 		case string:
 			if v == "" {
 				return nil, fmt.Errorf("expect: cannot parse empty string")
 			}
 			for _, r := range []rune(v) {
-				if p.cur.Rune != r {
+				if p.Cur.Rune != r {
 					err := p.ErrorExpected(r)
 					p.Jump(beg)
 					return nil, err
 				}
 				end = p.CopyCur()
-				p.Next()
+				p.Scan()
 			}
 			/*
 				case Class:
-					if !v.Check(p.cur.Rune) {
+					if !v.Check(p.Cur.Rune) {
 						err := p.ErrorExpected(v)
 						p.Jump(beg)
 						return nil, err
 					}
 					end = p.CopyCur()
-					p.Next()
+					p.Scan()

 				case Check:
 					rv, err := v.Check(p)
@ -198,7 +196,7 @@ func (p *Scanner) Expect(ms ...interface{}) (*Cur, error) {
 					}
 					end = rv
 					p.Jump(rv)
-					p.Next()
+					p.Scan()

 				case is.Opt:
 					m, err := p.Expect(is.MinMax{v.This, 0, 1})
@ -305,10 +303,10 @@ func (p *Scanner) ErrorExpected(this interface{}) error {
 	but := fmt.Sprintf(` but got %v`, p)
 	if p.Done() {
 		runes := `runes`
-		if p.cur.Pos.Rune == 1 {
+		if p.Cur.Pos.Rune == 1 {
 			runes = `rune`
 		}
-		but = fmt.Sprintf(` but exceeded data length (%v %v)`, p.cur.Pos.Rune, runes)
+		but = fmt.Sprintf(` but exceeded data length (%v %v)`, p.Cur.Pos.Rune, runes)
 	}
 	// TODO add verbose errors for *all* types in Grammar
 	switch v := this.(type) {
@ -327,7 +325,7 @@ func (p *Scanner) ErrorExpected(this interface{}) error {
 }

 // NewLine delegates to interval Curs.NewLine.
-func (p *Scanner) NewLine() { p.cur.NewLine() }
+func (p *Scanner) NewLine() { p.Cur.NewLine() }

 func (p *Scanner) Check(ms ...interface{}) (*Cur, error) {
 	defer p.Jump(p.CopyCur())