867 lines
18 KiB
Go
867 lines
18 KiB
Go
|
package parser
|
||
|
|
||
|
import (
|
||
|
"bytes"
|
||
|
"errors"
|
||
|
"fmt"
|
||
|
"regexp"
|
||
|
"strconv"
|
||
|
"strings"
|
||
|
"unicode"
|
||
|
"unicode/utf8"
|
||
|
|
||
|
"github.com/robertkrimen/otto/ast"
|
||
|
"github.com/robertkrimen/otto/file"
|
||
|
"github.com/robertkrimen/otto/token"
|
||
|
)
|
||
|
|
||
|
type _chr struct {
|
||
|
value rune
|
||
|
width int
|
||
|
}
|
||
|
|
||
|
var matchIdentifier = regexp.MustCompile(`^[$_\p{L}][$_\p{L}\d}]*$`)
|
||
|
|
||
|
func isDecimalDigit(chr rune) bool {
|
||
|
return '0' <= chr && chr <= '9'
|
||
|
}
|
||
|
|
||
|
func digitValue(chr rune) int {
|
||
|
switch {
|
||
|
case '0' <= chr && chr <= '9':
|
||
|
return int(chr - '0')
|
||
|
case 'a' <= chr && chr <= 'f':
|
||
|
return int(chr - 'a' + 10)
|
||
|
case 'A' <= chr && chr <= 'F':
|
||
|
return int(chr - 'A' + 10)
|
||
|
}
|
||
|
return 16 // Larger than any legal digit value
|
||
|
}
|
||
|
|
||
|
func isDigit(chr rune, base int) bool {
|
||
|
return digitValue(chr) < base
|
||
|
}
|
||
|
|
||
|
func isIdentifierStart(chr rune) bool {
|
||
|
return chr == '$' || chr == '_' || chr == '\\' ||
|
||
|
'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
|
||
|
chr >= utf8.RuneSelf && unicode.IsLetter(chr)
|
||
|
}
|
||
|
|
||
|
func isIdentifierPart(chr rune) bool {
|
||
|
return chr == '$' || chr == '_' || chr == '\\' ||
|
||
|
'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
|
||
|
'0' <= chr && chr <= '9' ||
|
||
|
chr >= utf8.RuneSelf && (unicode.IsLetter(chr) || unicode.IsDigit(chr))
|
||
|
}
|
||
|
|
||
|
func (self *_parser) scanIdentifier() (string, error) {
|
||
|
offset := self.chrOffset
|
||
|
parse := false
|
||
|
for isIdentifierPart(self.chr) {
|
||
|
if self.chr == '\\' {
|
||
|
distance := self.chrOffset - offset
|
||
|
self.read()
|
||
|
if self.chr != 'u' {
|
||
|
return "", fmt.Errorf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
|
||
|
}
|
||
|
parse = true
|
||
|
var value rune
|
||
|
for j := 0; j < 4; j++ {
|
||
|
self.read()
|
||
|
decimal, ok := hex2decimal(byte(self.chr))
|
||
|
if !ok {
|
||
|
return "", fmt.Errorf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
|
||
|
}
|
||
|
value = value<<4 | decimal
|
||
|
}
|
||
|
if value == '\\' {
|
||
|
return "", fmt.Errorf("Invalid identifier escape value: %c (%s)", value, string(value))
|
||
|
} else if distance == 0 {
|
||
|
if !isIdentifierStart(value) {
|
||
|
return "", fmt.Errorf("Invalid identifier escape value: %c (%s)", value, string(value))
|
||
|
}
|
||
|
} else if distance > 0 {
|
||
|
if !isIdentifierPart(value) {
|
||
|
return "", fmt.Errorf("Invalid identifier escape value: %c (%s)", value, string(value))
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
self.read()
|
||
|
}
|
||
|
literal := string(self.str[offset:self.chrOffset])
|
||
|
if parse {
|
||
|
return parseStringLiteral(literal)
|
||
|
}
|
||
|
return literal, nil
|
||
|
}
|
||
|
|
||
|
// 7.2
|
||
|
func isLineWhiteSpace(chr rune) bool {
|
||
|
switch chr {
|
||
|
case '\u0009', '\u000b', '\u000c', '\u0020', '\u00a0', '\ufeff':
|
||
|
return true
|
||
|
case '\u000a', '\u000d', '\u2028', '\u2029':
|
||
|
return false
|
||
|
case '\u0085':
|
||
|
return false
|
||
|
}
|
||
|
return unicode.IsSpace(chr)
|
||
|
}
|
||
|
|
||
|
// 7.3
|
||
|
func isLineTerminator(chr rune) bool {
|
||
|
switch chr {
|
||
|
case '\u000a', '\u000d', '\u2028', '\u2029':
|
||
|
return true
|
||
|
}
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
func (self *_parser) scan() (tkn token.Token, literal string, idx file.Idx) {
|
||
|
|
||
|
self.implicitSemicolon = false
|
||
|
|
||
|
for {
|
||
|
self.skipWhiteSpace()
|
||
|
|
||
|
idx = self.idxOf(self.chrOffset)
|
||
|
insertSemicolon := false
|
||
|
|
||
|
switch chr := self.chr; {
|
||
|
case isIdentifierStart(chr):
|
||
|
var err error
|
||
|
literal, err = self.scanIdentifier()
|
||
|
if err != nil {
|
||
|
tkn = token.ILLEGAL
|
||
|
break
|
||
|
}
|
||
|
if len(literal) > 1 {
|
||
|
// Keywords are longer than 1 character, avoid lookup otherwise
|
||
|
var strict bool
|
||
|
tkn, strict = token.IsKeyword(literal)
|
||
|
|
||
|
switch tkn {
|
||
|
|
||
|
case 0: // Not a keyword
|
||
|
if literal == "true" || literal == "false" {
|
||
|
self.insertSemicolon = true
|
||
|
tkn = token.BOOLEAN
|
||
|
return
|
||
|
} else if literal == "null" {
|
||
|
self.insertSemicolon = true
|
||
|
tkn = token.NULL
|
||
|
return
|
||
|
}
|
||
|
|
||
|
case token.KEYWORD:
|
||
|
tkn = token.KEYWORD
|
||
|
if strict {
|
||
|
// TODO If strict and in strict mode, then this is not a break
|
||
|
break
|
||
|
}
|
||
|
return
|
||
|
|
||
|
case
|
||
|
token.THIS,
|
||
|
token.BREAK,
|
||
|
token.THROW, // A newline after a throw is not allowed, but we need to detect it
|
||
|
token.RETURN,
|
||
|
token.CONTINUE,
|
||
|
token.DEBUGGER:
|
||
|
self.insertSemicolon = true
|
||
|
return
|
||
|
|
||
|
default:
|
||
|
return
|
||
|
|
||
|
}
|
||
|
}
|
||
|
self.insertSemicolon = true
|
||
|
tkn = token.IDENTIFIER
|
||
|
return
|
||
|
case '0' <= chr && chr <= '9':
|
||
|
self.insertSemicolon = true
|
||
|
tkn, literal = self.scanNumericLiteral(false)
|
||
|
return
|
||
|
default:
|
||
|
self.read()
|
||
|
switch chr {
|
||
|
case -1:
|
||
|
if self.insertSemicolon {
|
||
|
self.insertSemicolon = false
|
||
|
self.implicitSemicolon = true
|
||
|
}
|
||
|
tkn = token.EOF
|
||
|
case '\r', '\n', '\u2028', '\u2029':
|
||
|
self.insertSemicolon = false
|
||
|
self.implicitSemicolon = true
|
||
|
self.comments.AtLineBreak()
|
||
|
continue
|
||
|
case ':':
|
||
|
tkn = token.COLON
|
||
|
case '.':
|
||
|
if digitValue(self.chr) < 10 {
|
||
|
insertSemicolon = true
|
||
|
tkn, literal = self.scanNumericLiteral(true)
|
||
|
} else {
|
||
|
tkn = token.PERIOD
|
||
|
}
|
||
|
case ',':
|
||
|
tkn = token.COMMA
|
||
|
case ';':
|
||
|
tkn = token.SEMICOLON
|
||
|
case '(':
|
||
|
tkn = token.LEFT_PARENTHESIS
|
||
|
case ')':
|
||
|
tkn = token.RIGHT_PARENTHESIS
|
||
|
insertSemicolon = true
|
||
|
case '[':
|
||
|
tkn = token.LEFT_BRACKET
|
||
|
case ']':
|
||
|
tkn = token.RIGHT_BRACKET
|
||
|
insertSemicolon = true
|
||
|
case '{':
|
||
|
tkn = token.LEFT_BRACE
|
||
|
case '}':
|
||
|
tkn = token.RIGHT_BRACE
|
||
|
insertSemicolon = true
|
||
|
case '+':
|
||
|
tkn = self.switch3(token.PLUS, token.ADD_ASSIGN, '+', token.INCREMENT)
|
||
|
if tkn == token.INCREMENT {
|
||
|
insertSemicolon = true
|
||
|
}
|
||
|
case '-':
|
||
|
tkn = self.switch3(token.MINUS, token.SUBTRACT_ASSIGN, '-', token.DECREMENT)
|
||
|
if tkn == token.DECREMENT {
|
||
|
insertSemicolon = true
|
||
|
}
|
||
|
case '*':
|
||
|
tkn = self.switch2(token.MULTIPLY, token.MULTIPLY_ASSIGN)
|
||
|
case '/':
|
||
|
if self.chr == '/' {
|
||
|
if self.mode&StoreComments != 0 {
|
||
|
literal := string(self.readSingleLineComment())
|
||
|
self.comments.AddComment(ast.NewComment(literal, self.idx))
|
||
|
continue
|
||
|
}
|
||
|
self.skipSingleLineComment()
|
||
|
continue
|
||
|
} else if self.chr == '*' {
|
||
|
if self.mode&StoreComments != 0 {
|
||
|
literal = string(self.readMultiLineComment())
|
||
|
self.comments.AddComment(ast.NewComment(literal, self.idx))
|
||
|
continue
|
||
|
}
|
||
|
self.skipMultiLineComment()
|
||
|
continue
|
||
|
} else {
|
||
|
// Could be division, could be RegExp literal
|
||
|
tkn = self.switch2(token.SLASH, token.QUOTIENT_ASSIGN)
|
||
|
insertSemicolon = true
|
||
|
}
|
||
|
case '%':
|
||
|
tkn = self.switch2(token.REMAINDER, token.REMAINDER_ASSIGN)
|
||
|
case '^':
|
||
|
tkn = self.switch2(token.EXCLUSIVE_OR, token.EXCLUSIVE_OR_ASSIGN)
|
||
|
case '<':
|
||
|
tkn = self.switch4(token.LESS, token.LESS_OR_EQUAL, '<', token.SHIFT_LEFT, token.SHIFT_LEFT_ASSIGN)
|
||
|
case '>':
|
||
|
tkn = self.switch6(token.GREATER, token.GREATER_OR_EQUAL, '>', token.SHIFT_RIGHT, token.SHIFT_RIGHT_ASSIGN, '>', token.UNSIGNED_SHIFT_RIGHT, token.UNSIGNED_SHIFT_RIGHT_ASSIGN)
|
||
|
case '=':
|
||
|
tkn = self.switch2(token.ASSIGN, token.EQUAL)
|
||
|
if tkn == token.EQUAL && self.chr == '=' {
|
||
|
self.read()
|
||
|
tkn = token.STRICT_EQUAL
|
||
|
}
|
||
|
case '!':
|
||
|
tkn = self.switch2(token.NOT, token.NOT_EQUAL)
|
||
|
if tkn == token.NOT_EQUAL && self.chr == '=' {
|
||
|
self.read()
|
||
|
tkn = token.STRICT_NOT_EQUAL
|
||
|
}
|
||
|
case '&':
|
||
|
if self.chr == '^' {
|
||
|
self.read()
|
||
|
tkn = self.switch2(token.AND_NOT, token.AND_NOT_ASSIGN)
|
||
|
} else {
|
||
|
tkn = self.switch3(token.AND, token.AND_ASSIGN, '&', token.LOGICAL_AND)
|
||
|
}
|
||
|
case '|':
|
||
|
tkn = self.switch3(token.OR, token.OR_ASSIGN, '|', token.LOGICAL_OR)
|
||
|
case '~':
|
||
|
tkn = token.BITWISE_NOT
|
||
|
case '?':
|
||
|
tkn = token.QUESTION_MARK
|
||
|
case '"', '\'':
|
||
|
insertSemicolon = true
|
||
|
tkn = token.STRING
|
||
|
var err error
|
||
|
literal, err = self.scanString(self.chrOffset - 1)
|
||
|
if err != nil {
|
||
|
tkn = token.ILLEGAL
|
||
|
}
|
||
|
default:
|
||
|
self.errorUnexpected(idx, chr)
|
||
|
tkn = token.ILLEGAL
|
||
|
}
|
||
|
}
|
||
|
self.insertSemicolon = insertSemicolon
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (self *_parser) switch2(tkn0, tkn1 token.Token) token.Token {
|
||
|
if self.chr == '=' {
|
||
|
self.read()
|
||
|
return tkn1
|
||
|
}
|
||
|
return tkn0
|
||
|
}
|
||
|
|
||
|
func (self *_parser) switch3(tkn0, tkn1 token.Token, chr2 rune, tkn2 token.Token) token.Token {
|
||
|
if self.chr == '=' {
|
||
|
self.read()
|
||
|
return tkn1
|
||
|
}
|
||
|
if self.chr == chr2 {
|
||
|
self.read()
|
||
|
return tkn2
|
||
|
}
|
||
|
return tkn0
|
||
|
}
|
||
|
|
||
|
func (self *_parser) switch4(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token) token.Token {
|
||
|
if self.chr == '=' {
|
||
|
self.read()
|
||
|
return tkn1
|
||
|
}
|
||
|
if self.chr == chr2 {
|
||
|
self.read()
|
||
|
if self.chr == '=' {
|
||
|
self.read()
|
||
|
return tkn3
|
||
|
}
|
||
|
return tkn2
|
||
|
}
|
||
|
return tkn0
|
||
|
}
|
||
|
|
||
|
func (self *_parser) switch6(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token, chr3 rune, tkn4, tkn5 token.Token) token.Token {
|
||
|
if self.chr == '=' {
|
||
|
self.read()
|
||
|
return tkn1
|
||
|
}
|
||
|
if self.chr == chr2 {
|
||
|
self.read()
|
||
|
if self.chr == '=' {
|
||
|
self.read()
|
||
|
return tkn3
|
||
|
}
|
||
|
if self.chr == chr3 {
|
||
|
self.read()
|
||
|
if self.chr == '=' {
|
||
|
self.read()
|
||
|
return tkn5
|
||
|
}
|
||
|
return tkn4
|
||
|
}
|
||
|
return tkn2
|
||
|
}
|
||
|
return tkn0
|
||
|
}
|
||
|
|
||
|
func (self *_parser) chrAt(index int) _chr {
|
||
|
value, width := utf8.DecodeRuneInString(self.str[index:])
|
||
|
return _chr{
|
||
|
value: value,
|
||
|
width: width,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (self *_parser) _peek() rune {
|
||
|
if self.offset+1 < self.length {
|
||
|
return rune(self.str[self.offset+1])
|
||
|
}
|
||
|
return -1
|
||
|
}
|
||
|
|
||
|
func (self *_parser) read() {
|
||
|
if self.offset < self.length {
|
||
|
self.chrOffset = self.offset
|
||
|
chr, width := rune(self.str[self.offset]), 1
|
||
|
if chr >= utf8.RuneSelf { // !ASCII
|
||
|
chr, width = utf8.DecodeRuneInString(self.str[self.offset:])
|
||
|
if chr == utf8.RuneError && width == 1 {
|
||
|
self.error(self.chrOffset, "Invalid UTF-8 character")
|
||
|
}
|
||
|
}
|
||
|
self.offset += width
|
||
|
self.chr = chr
|
||
|
} else {
|
||
|
self.chrOffset = self.length
|
||
|
self.chr = -1 // EOF
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// This is here since the functions are so similar
|
||
|
func (self *_RegExp_parser) read() {
|
||
|
if self.offset < self.length {
|
||
|
self.chrOffset = self.offset
|
||
|
chr, width := rune(self.str[self.offset]), 1
|
||
|
if chr >= utf8.RuneSelf { // !ASCII
|
||
|
chr, width = utf8.DecodeRuneInString(self.str[self.offset:])
|
||
|
if chr == utf8.RuneError && width == 1 {
|
||
|
self.error(self.chrOffset, "Invalid UTF-8 character")
|
||
|
}
|
||
|
}
|
||
|
self.offset += width
|
||
|
self.chr = chr
|
||
|
} else {
|
||
|
self.chrOffset = self.length
|
||
|
self.chr = -1 // EOF
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (self *_parser) readSingleLineComment() (result []rune) {
|
||
|
for self.chr != -1 {
|
||
|
self.read()
|
||
|
if isLineTerminator(self.chr) {
|
||
|
return
|
||
|
}
|
||
|
result = append(result, self.chr)
|
||
|
}
|
||
|
|
||
|
// Get rid of the trailing -1
|
||
|
result = result[:len(result)-1]
|
||
|
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (self *_parser) readMultiLineComment() (result []rune) {
|
||
|
self.read()
|
||
|
for self.chr >= 0 {
|
||
|
chr := self.chr
|
||
|
self.read()
|
||
|
if chr == '*' && self.chr == '/' {
|
||
|
self.read()
|
||
|
return
|
||
|
}
|
||
|
|
||
|
result = append(result, chr)
|
||
|
}
|
||
|
|
||
|
self.errorUnexpected(0, self.chr)
|
||
|
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (self *_parser) skipSingleLineComment() {
|
||
|
for self.chr != -1 {
|
||
|
self.read()
|
||
|
if isLineTerminator(self.chr) {
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (self *_parser) skipMultiLineComment() {
|
||
|
self.read()
|
||
|
for self.chr >= 0 {
|
||
|
chr := self.chr
|
||
|
self.read()
|
||
|
if chr == '*' && self.chr == '/' {
|
||
|
self.read()
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
|
||
|
self.errorUnexpected(0, self.chr)
|
||
|
}
|
||
|
|
||
|
func (self *_parser) skipWhiteSpace() {
|
||
|
for {
|
||
|
switch self.chr {
|
||
|
case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
|
||
|
self.read()
|
||
|
continue
|
||
|
case '\r':
|
||
|
if self._peek() == '\n' {
|
||
|
self.comments.AtLineBreak()
|
||
|
self.read()
|
||
|
}
|
||
|
fallthrough
|
||
|
case '\u2028', '\u2029', '\n':
|
||
|
if self.insertSemicolon {
|
||
|
return
|
||
|
}
|
||
|
self.comments.AtLineBreak()
|
||
|
self.read()
|
||
|
continue
|
||
|
}
|
||
|
if self.chr >= utf8.RuneSelf {
|
||
|
if unicode.IsSpace(self.chr) {
|
||
|
self.read()
|
||
|
continue
|
||
|
}
|
||
|
}
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (self *_parser) skipLineWhiteSpace() {
|
||
|
for isLineWhiteSpace(self.chr) {
|
||
|
self.read()
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (self *_parser) scanMantissa(base int) {
|
||
|
for digitValue(self.chr) < base {
|
||
|
self.read()
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (self *_parser) scanEscape(quote rune) {
|
||
|
|
||
|
var length, base uint32
|
||
|
switch self.chr {
|
||
|
//case '0', '1', '2', '3', '4', '5', '6', '7':
|
||
|
// Octal:
|
||
|
// length, base, limit = 3, 8, 255
|
||
|
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'', '0':
|
||
|
self.read()
|
||
|
return
|
||
|
case '\r', '\n', '\u2028', '\u2029':
|
||
|
self.scanNewline()
|
||
|
return
|
||
|
case 'x':
|
||
|
self.read()
|
||
|
length, base = 2, 16
|
||
|
case 'u':
|
||
|
self.read()
|
||
|
length, base = 4, 16
|
||
|
default:
|
||
|
self.read() // Always make progress
|
||
|
return
|
||
|
}
|
||
|
|
||
|
var value uint32
|
||
|
for ; length > 0 && self.chr != quote && self.chr >= 0; length-- {
|
||
|
digit := uint32(digitValue(self.chr))
|
||
|
if digit >= base {
|
||
|
break
|
||
|
}
|
||
|
value = value*base + digit
|
||
|
self.read()
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (self *_parser) scanString(offset int) (string, error) {
|
||
|
// " ' /
|
||
|
quote := rune(self.str[offset])
|
||
|
|
||
|
for self.chr != quote {
|
||
|
chr := self.chr
|
||
|
if chr == '\n' || chr == '\r' || chr == '\u2028' || chr == '\u2029' || chr < 0 {
|
||
|
goto newline
|
||
|
}
|
||
|
self.read()
|
||
|
if chr == '\\' {
|
||
|
if quote == '/' {
|
||
|
if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
|
||
|
goto newline
|
||
|
}
|
||
|
self.read()
|
||
|
} else {
|
||
|
self.scanEscape(quote)
|
||
|
}
|
||
|
} else if chr == '[' && quote == '/' {
|
||
|
// Allow a slash (/) in a bracket character class ([...])
|
||
|
// TODO Fix this, this is hacky...
|
||
|
quote = -1
|
||
|
} else if chr == ']' && quote == -1 {
|
||
|
quote = '/'
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// " ' /
|
||
|
self.read()
|
||
|
|
||
|
return string(self.str[offset:self.chrOffset]), nil
|
||
|
|
||
|
newline:
|
||
|
self.scanNewline()
|
||
|
err := "String not terminated"
|
||
|
if quote == '/' {
|
||
|
err = "Invalid regular expression: missing /"
|
||
|
self.error(self.idxOf(offset), err)
|
||
|
}
|
||
|
return "", errors.New(err)
|
||
|
}
|
||
|
|
||
|
func (self *_parser) scanNewline() {
|
||
|
if self.chr == '\r' {
|
||
|
self.read()
|
||
|
if self.chr != '\n' {
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
self.read()
|
||
|
}
|
||
|
|
||
|
func hex2decimal(chr byte) (value rune, ok bool) {
|
||
|
{
|
||
|
chr := rune(chr)
|
||
|
switch {
|
||
|
case '0' <= chr && chr <= '9':
|
||
|
return chr - '0', true
|
||
|
case 'a' <= chr && chr <= 'f':
|
||
|
return chr - 'a' + 10, true
|
||
|
case 'A' <= chr && chr <= 'F':
|
||
|
return chr - 'A' + 10, true
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func parseNumberLiteral(literal string) (value interface{}, err error) {
|
||
|
// TODO Is Uint okay? What about -MAX_UINT
|
||
|
value, err = strconv.ParseInt(literal, 0, 64)
|
||
|
if err == nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
parseIntErr := err // Save this first error, just in case
|
||
|
|
||
|
value, err = strconv.ParseFloat(literal, 64)
|
||
|
if err == nil {
|
||
|
return
|
||
|
} else if err.(*strconv.NumError).Err == strconv.ErrRange {
|
||
|
// Infinity, etc.
|
||
|
return value, nil
|
||
|
}
|
||
|
|
||
|
err = parseIntErr
|
||
|
|
||
|
if err.(*strconv.NumError).Err == strconv.ErrRange {
|
||
|
if len(literal) > 2 && literal[0] == '0' && (literal[1] == 'X' || literal[1] == 'x') {
|
||
|
// Could just be a very large number (e.g. 0x8000000000000000)
|
||
|
var value float64
|
||
|
literal = literal[2:]
|
||
|
for _, chr := range literal {
|
||
|
digit := digitValue(chr)
|
||
|
if digit >= 16 {
|
||
|
goto error
|
||
|
}
|
||
|
value = value*16 + float64(digit)
|
||
|
}
|
||
|
return value, nil
|
||
|
}
|
||
|
}
|
||
|
|
||
|
error:
|
||
|
return nil, errors.New("Illegal numeric literal")
|
||
|
}
|
||
|
|
||
|
func parseStringLiteral(literal string) (string, error) {
|
||
|
// Best case scenario...
|
||
|
if literal == "" {
|
||
|
return "", nil
|
||
|
}
|
||
|
|
||
|
// Slightly less-best case scenario...
|
||
|
if !strings.ContainsRune(literal, '\\') {
|
||
|
return literal, nil
|
||
|
}
|
||
|
|
||
|
str := literal
|
||
|
buffer := bytes.NewBuffer(make([]byte, 0, 3*len(literal)/2))
|
||
|
|
||
|
for len(str) > 0 {
|
||
|
switch chr := str[0]; {
|
||
|
// We do not explicitly handle the case of the quote
|
||
|
// value, which can be: " ' /
|
||
|
// This assumes we're already passed a partially well-formed literal
|
||
|
case chr >= utf8.RuneSelf:
|
||
|
chr, size := utf8.DecodeRuneInString(str)
|
||
|
buffer.WriteRune(chr)
|
||
|
str = str[size:]
|
||
|
continue
|
||
|
case chr != '\\':
|
||
|
buffer.WriteByte(chr)
|
||
|
str = str[1:]
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
if len(str) <= 1 {
|
||
|
panic("len(str) <= 1")
|
||
|
}
|
||
|
chr := str[1]
|
||
|
var value rune
|
||
|
if chr >= utf8.RuneSelf {
|
||
|
str = str[1:]
|
||
|
var size int
|
||
|
value, size = utf8.DecodeRuneInString(str)
|
||
|
str = str[size:] // \ + <character>
|
||
|
} else {
|
||
|
str = str[2:] // \<character>
|
||
|
switch chr {
|
||
|
case 'b':
|
||
|
value = '\b'
|
||
|
case 'f':
|
||
|
value = '\f'
|
||
|
case 'n':
|
||
|
value = '\n'
|
||
|
case 'r':
|
||
|
value = '\r'
|
||
|
case 't':
|
||
|
value = '\t'
|
||
|
case 'v':
|
||
|
value = '\v'
|
||
|
case 'x', 'u':
|
||
|
size := 0
|
||
|
switch chr {
|
||
|
case 'x':
|
||
|
size = 2
|
||
|
case 'u':
|
||
|
size = 4
|
||
|
}
|
||
|
if len(str) < size {
|
||
|
return "", fmt.Errorf("invalid escape: \\%s: len(%q) != %d", string(chr), str, size)
|
||
|
}
|
||
|
for j := 0; j < size; j++ {
|
||
|
decimal, ok := hex2decimal(str[j])
|
||
|
if !ok {
|
||
|
return "", fmt.Errorf("invalid escape: \\%s: %q", string(chr), str[:size])
|
||
|
}
|
||
|
value = value<<4 | decimal
|
||
|
}
|
||
|
str = str[size:]
|
||
|
if chr == 'x' {
|
||
|
break
|
||
|
}
|
||
|
if value > utf8.MaxRune {
|
||
|
panic("value > utf8.MaxRune")
|
||
|
}
|
||
|
case '0':
|
||
|
if len(str) == 0 || '0' > str[0] || str[0] > '7' {
|
||
|
value = 0
|
||
|
break
|
||
|
}
|
||
|
fallthrough
|
||
|
case '1', '2', '3', '4', '5', '6', '7':
|
||
|
// TODO strict
|
||
|
value = rune(chr) - '0'
|
||
|
j := 0
|
||
|
for ; j < 2; j++ {
|
||
|
if len(str) < j+1 {
|
||
|
break
|
||
|
}
|
||
|
chr := str[j]
|
||
|
if '0' > chr || chr > '7' {
|
||
|
break
|
||
|
}
|
||
|
decimal := rune(str[j]) - '0'
|
||
|
value = (value << 3) | decimal
|
||
|
}
|
||
|
str = str[j:]
|
||
|
case '\\':
|
||
|
value = '\\'
|
||
|
case '\'', '"':
|
||
|
value = rune(chr)
|
||
|
case '\r':
|
||
|
if len(str) > 0 {
|
||
|
if str[0] == '\n' {
|
||
|
str = str[1:]
|
||
|
}
|
||
|
}
|
||
|
fallthrough
|
||
|
case '\n':
|
||
|
continue
|
||
|
default:
|
||
|
value = rune(chr)
|
||
|
}
|
||
|
}
|
||
|
buffer.WriteRune(value)
|
||
|
}
|
||
|
|
||
|
return buffer.String(), nil
|
||
|
}
|
||
|
|
||
|
func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) {
|
||
|
|
||
|
offset := self.chrOffset
|
||
|
tkn := token.NUMBER
|
||
|
|
||
|
if decimalPoint {
|
||
|
offset--
|
||
|
self.scanMantissa(10)
|
||
|
goto exponent
|
||
|
}
|
||
|
|
||
|
if self.chr == '0' {
|
||
|
offset := self.chrOffset
|
||
|
self.read()
|
||
|
if self.chr == 'x' || self.chr == 'X' {
|
||
|
// Hexadecimal
|
||
|
self.read()
|
||
|
if isDigit(self.chr, 16) {
|
||
|
self.read()
|
||
|
} else {
|
||
|
return token.ILLEGAL, self.str[offset:self.chrOffset]
|
||
|
}
|
||
|
self.scanMantissa(16)
|
||
|
|
||
|
if self.chrOffset-offset <= 2 {
|
||
|
// Only "0x" or "0X"
|
||
|
self.error(0, "Illegal hexadecimal number")
|
||
|
}
|
||
|
|
||
|
goto hexadecimal
|
||
|
} else if self.chr == '.' {
|
||
|
// Float
|
||
|
goto float
|
||
|
} else {
|
||
|
// Octal, Float
|
||
|
if self.chr == 'e' || self.chr == 'E' {
|
||
|
goto exponent
|
||
|
}
|
||
|
self.scanMantissa(8)
|
||
|
if self.chr == '8' || self.chr == '9' {
|
||
|
return token.ILLEGAL, self.str[offset:self.chrOffset]
|
||
|
}
|
||
|
goto octal
|
||
|
}
|
||
|
}
|
||
|
|
||
|
self.scanMantissa(10)
|
||
|
|
||
|
float:
|
||
|
if self.chr == '.' {
|
||
|
self.read()
|
||
|
self.scanMantissa(10)
|
||
|
}
|
||
|
|
||
|
exponent:
|
||
|
if self.chr == 'e' || self.chr == 'E' {
|
||
|
self.read()
|
||
|
if self.chr == '-' || self.chr == '+' {
|
||
|
self.read()
|
||
|
}
|
||
|
if isDecimalDigit(self.chr) {
|
||
|
self.read()
|
||
|
self.scanMantissa(10)
|
||
|
} else {
|
||
|
return token.ILLEGAL, self.str[offset:self.chrOffset]
|
||
|
}
|
||
|
}
|
||
|
|
||
|
hexadecimal:
|
||
|
octal:
|
||
|
if isIdentifierStart(self.chr) || isDecimalDigit(self.chr) {
|
||
|
return token.ILLEGAL, self.str[offset:self.chrOffset]
|
||
|
}
|
||
|
|
||
|
return tkn, self.str[offset:self.chrOffset]
|
||
|
}
|