// A modified version of Go's JSON implementation. // Copyright 2010 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package json import ( "strconv" "unicode" "unicode/utf16" "unicode/utf8" "github.com/d5/tengo/v2" ) // Decode parses the JSON-encoded data and returns the result object. func Decode(data []byte) (tengo.Object, error) { var d decodeState err := checkValid(data, &d.scan) if err != nil { return nil, err } d.init(data) d.scan.reset() d.scanWhile(scanSkipSpace) return d.value() } // decodeState represents the state while decoding a JSON value. type decodeState struct { data []byte off int // next read offset in data opcode int // last read result scan scanner } // readIndex returns the position of the last byte read. func (d *decodeState) readIndex() int { return d.off - 1 } const phasePanicMsg = "JSON decoder out of sync - data changing underfoot?" func (d *decodeState) init(data []byte) *decodeState { d.data = data d.off = 0 return d } // scanNext processes the byte at d.data[d.off]. func (d *decodeState) scanNext() { if d.off < len(d.data) { d.opcode = d.scan.step(&d.scan, d.data[d.off]) d.off++ } else { d.opcode = d.scan.eof() d.off = len(d.data) + 1 // mark processed EOF with len+1 } } // scanWhile processes bytes in d.data[d.off:] until it // receives a scan code not equal to op. func (d *decodeState) scanWhile(op int) { s, data, i := &d.scan, d.data, d.off for i < len(data) { newOp := s.step(s, data[i]) i++ if newOp != op { d.opcode = newOp d.off = i return } } d.off = len(data) + 1 // mark processed EOF with len+1 d.opcode = d.scan.eof() } func (d *decodeState) value() (tengo.Object, error) { switch d.opcode { default: panic(phasePanicMsg) case scanBeginArray: o, err := d.array() if err != nil { return nil, err } d.scanNext() return o, nil case scanBeginObject: o, err := d.object() if err != nil { return nil, err } d.scanNext() return o, nil case scanBeginLiteral: return d.literal() } } func (d *decodeState) array() (tengo.Object, error) { var arr []tengo.Object for { // Look ahead for ] - can only happen on first iteration. d.scanWhile(scanSkipSpace) if d.opcode == scanEndArray { break } o, err := d.value() if err != nil { return nil, err } arr = append(arr, o) // Next token must be , or ]. if d.opcode == scanSkipSpace { d.scanWhile(scanSkipSpace) } if d.opcode == scanEndArray { break } if d.opcode != scanArrayValue { panic(phasePanicMsg) } } return &tengo.Array{Value: arr}, nil } func (d *decodeState) object() (tengo.Object, error) { m := make(map[string]tengo.Object) for { // Read opening " of string key or closing }. d.scanWhile(scanSkipSpace) if d.opcode == scanEndObject { // closing } - can only happen on first iteration. break } if d.opcode != scanBeginLiteral { panic(phasePanicMsg) } // Read string key. start := d.readIndex() d.scanWhile(scanContinue) item := d.data[start:d.readIndex()] key, ok := unquote(item) if !ok { panic(phasePanicMsg) } // Read : before value. if d.opcode == scanSkipSpace { d.scanWhile(scanSkipSpace) } if d.opcode != scanObjectKey { panic(phasePanicMsg) } d.scanWhile(scanSkipSpace) // Read value. o, err := d.value() if err != nil { return nil, err } m[key] = o // Next token must be , or }. if d.opcode == scanSkipSpace { d.scanWhile(scanSkipSpace) } if d.opcode == scanEndObject { break } if d.opcode != scanObjectValue { panic(phasePanicMsg) } } return &tengo.Map{Value: m}, nil } func (d *decodeState) literal() (tengo.Object, error) { // All bytes inside literal return scanContinue op code. start := d.readIndex() d.scanWhile(scanContinue) item := d.data[start:d.readIndex()] switch c := item[0]; c { case 'n': // null return tengo.UndefinedValue, nil case 't', 'f': // true, false if c == 't' { return tengo.TrueValue, nil } return tengo.FalseValue, nil case '"': // string s, ok := unquote(item) if !ok { panic(phasePanicMsg) } return &tengo.String{Value: s}, nil default: // number if c != '-' && (c < '0' || c > '9') { panic(phasePanicMsg) } n, _ := strconv.ParseFloat(string(item), 10) return &tengo.Float{Value: n}, nil } } // getu4 decodes \uXXXX from the beginning of s, returning the hex value, // or it returns -1. func getu4(s []byte) rune { if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { return -1 } var r rune for _, c := range s[2:6] { switch { case '0' <= c && c <= '9': c = c - '0' case 'a' <= c && c <= 'f': c = c - 'a' + 10 case 'A' <= c && c <= 'F': c = c - 'A' + 10 default: return -1 } r = r*16 + rune(c) } return r } // unquote converts a quoted JSON string literal s into an actual string t. // The rules are different than for Go, so cannot use strconv.Unquote. func unquote(s []byte) (t string, ok bool) { s, ok = unquoteBytes(s) t = string(s) return } func unquoteBytes(s []byte) (t []byte, ok bool) { if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { return } s = s[1 : len(s)-1] // Check for unusual characters. If there are none, then no unquoting is // needed, so return a slice of the original bytes. r := 0 for r < len(s) { c := s[r] if c == '\\' || c == '"' || c < ' ' { break } if c < utf8.RuneSelf { r++ continue } rr, size := utf8.DecodeRune(s[r:]) if rr == utf8.RuneError && size == 1 { break } r += size } if r == len(s) { return s, true } b := make([]byte, len(s)+2*utf8.UTFMax) w := copy(b, s[0:r]) for r < len(s) { // Out of room? Can only happen if s is full of // malformed UTF-8 and we're replacing each // byte with RuneError. if w >= len(b)-2*utf8.UTFMax { nb := make([]byte, (len(b)+utf8.UTFMax)*2) copy(nb, b[0:w]) b = nb } switch c := s[r]; { case c == '\\': r++ if r >= len(s) { return } switch s[r] { default: return case '"', '\\', '/', '\'': b[w] = s[r] r++ w++ case 'b': b[w] = '\b' r++ w++ case 'f': b[w] = '\f' r++ w++ case 'n': b[w] = '\n' r++ w++ case 'r': b[w] = '\r' r++ w++ case 't': b[w] = '\t' r++ w++ case 'u': r-- rr := getu4(s[r:]) if rr < 0 { return } r += 6 if utf16.IsSurrogate(rr) { rr1 := getu4(s[r:]) dec := utf16.DecodeRune(rr, rr1) if dec != unicode.ReplacementChar { // A valid pair; consume. r += 6 w += utf8.EncodeRune(b[w:], dec) break } // Invalid surrogate; fall back to replacement rune. rr = unicode.ReplacementChar } w += utf8.EncodeRune(b[w:], rr) } // Quote, control characters are invalid. case c == '"', c < ' ': return // ASCII case c < utf8.RuneSelf: b[w] = c r++ w++ // Coerce to well-formed UTF-8. default: rr, size := utf8.DecodeRune(s[r:]) r += size w += utf8.EncodeRune(b[w:], rr) } } return b[0:w], true }