Beego/vendor/github.com/Knetic/govaluate/parsing.go

451 lines
9.1 KiB
Go

package govaluate
import (
"bytes"
"errors"
"fmt"
"regexp"
"strconv"
"time"
"unicode"
)
func parseTokens(expression string, functions map[string]ExpressionFunction) ([]ExpressionToken, error) {
var ret []ExpressionToken
var token ExpressionToken
var stream *lexerStream
var state lexerState
var err error
var found bool
stream = newLexerStream(expression)
state = validLexerStates[0]
for stream.canRead() {
token, err, found = readToken(stream, state, functions)
if err != nil {
return ret, err
}
if !found {
break
}
state, err = getLexerStateForToken(token.Kind)
if err != nil {
return ret, err
}
// append this valid token
ret = append(ret, token)
}
err = checkBalance(ret)
if err != nil {
return nil, err
}
return ret, nil
}
func readToken(stream *lexerStream, state lexerState, functions map[string]ExpressionFunction) (ExpressionToken, error, bool) {
var function ExpressionFunction
var ret ExpressionToken
var tokenValue interface{}
var tokenTime time.Time
var tokenString string
var kind TokenKind
var character rune
var found bool
var completed bool
var err error
// numeric is 0-9, or .
// string starts with '
// variable is alphanumeric, always starts with a letter
// bracket always means variable
// symbols are anything non-alphanumeric
// all others read into a buffer until they reach the end of the stream
for stream.canRead() {
character = stream.readCharacter()
if unicode.IsSpace(character) {
continue
}
kind = UNKNOWN
// numeric constant
if isNumeric(character) {
tokenString = readTokenUntilFalse(stream, isNumeric)
tokenValue, err = strconv.ParseFloat(tokenString, 64)
if err != nil {
errorMsg := fmt.Sprintf("Unable to parse numeric value '%v' to float64\n", tokenString)
return ExpressionToken{}, errors.New(errorMsg), false
}
kind = NUMERIC
break
}
// comma, separator
if character == ',' {
tokenValue = ","
kind = SEPARATOR
break
}
// escaped variable
if character == '[' {
tokenValue, completed = readUntilFalse(stream, true, false, true, isNotClosingBracket)
kind = VARIABLE
if !completed {
return ExpressionToken{}, errors.New("Unclosed parameter bracket"), false
}
// above method normally rewinds us to the closing bracket, which we want to skip.
stream.rewind(-1)
break
}
// regular variable - or function?
if unicode.IsLetter(character) {
tokenString = readTokenUntilFalse(stream, isVariableName)
tokenValue = tokenString
kind = VARIABLE
// boolean?
if tokenValue == "true" {
kind = BOOLEAN
tokenValue = true
} else {
if tokenValue == "false" {
kind = BOOLEAN
tokenValue = false
}
}
// textual operator?
if tokenValue == "in" || tokenValue == "IN" {
// force lower case for consistency
tokenValue = "in"
kind = COMPARATOR
}
// function?
function, found = functions[tokenString]
if found {
kind = FUNCTION
tokenValue = function
}
break
}
if !isNotQuote(character) {
tokenValue, completed = readUntilFalse(stream, true, false, true, isNotQuote)
if !completed {
return ExpressionToken{}, errors.New("Unclosed string literal"), false
}
// advance the stream one position, since reading until false assumes the terminator is a real token
stream.rewind(-1)
// check to see if this can be parsed as a time.
tokenTime, found = tryParseTime(tokenValue.(string))
if found {
kind = TIME
tokenValue = tokenTime
} else {
kind = STRING
}
break
}
if character == '(' {
tokenValue = character
kind = CLAUSE
break
}
if character == ')' {
tokenValue = character
kind = CLAUSE_CLOSE
break
}
// must be a known symbol
tokenString = readTokenUntilFalse(stream, isNotAlphanumeric)
tokenValue = tokenString
// quick hack for the case where "-" can mean "prefixed negation" or "minus", which are used
// very differently.
if state.canTransitionTo(PREFIX) {
_, found = prefixSymbols[tokenString]
if found {
kind = PREFIX
break
}
}
_, found = modifierSymbols[tokenString]
if found {
kind = MODIFIER
break
}
_, found = logicalSymbols[tokenString]
if found {
kind = LOGICALOP
break
}
_, found = comparatorSymbols[tokenString]
if found {
kind = COMPARATOR
break
}
_, found = ternarySymbols[tokenString]
if found {
kind = TERNARY
break
}
errorMessage := fmt.Sprintf("Invalid token: '%s'", tokenString)
return ret, errors.New(errorMessage), false
}
ret.Kind = kind
ret.Value = tokenValue
return ret, nil, (kind != UNKNOWN)
}
func readTokenUntilFalse(stream *lexerStream, condition func(rune) bool) string {
var ret string
stream.rewind(1)
ret, _ = readUntilFalse(stream, false, true, true, condition)
return ret
}
/*
Returns the string that was read until the given [condition] was false, or whitespace was broken.
Returns false if the stream ended before whitespace was broken or condition was met.
*/
func readUntilFalse(stream *lexerStream, includeWhitespace bool, breakWhitespace bool, allowEscaping bool, condition func(rune) bool) (string, bool) {
var tokenBuffer bytes.Buffer
var character rune
var conditioned bool
conditioned = false
for stream.canRead() {
character = stream.readCharacter()
// Use backslashes to escape anything
if allowEscaping && character == '\\' {
character = stream.readCharacter()
tokenBuffer.WriteString(string(character))
continue
}
if unicode.IsSpace(character) {
if breakWhitespace && tokenBuffer.Len() > 0 {
conditioned = true
break
}
if !includeWhitespace {
continue
}
}
if condition(character) {
tokenBuffer.WriteString(string(character))
} else {
conditioned = true
stream.rewind(1)
break
}
}
return tokenBuffer.String(), conditioned
}
/*
Checks to see if any optimizations can be performed on the given [tokens], which form a complete, valid expression.
The returns slice will represent the optimized (or unmodified) list of tokens to use.
*/
func optimizeTokens(tokens []ExpressionToken) ([]ExpressionToken, error) {
var token ExpressionToken
var symbol OperatorSymbol
var err error
var index int
for index, token = range tokens {
// if we find a regex operator, and the right-hand value is a constant, precompile and replace with a pattern.
if token.Kind != COMPARATOR {
continue
}
symbol = comparatorSymbols[token.Value.(string)]
if symbol != REQ && symbol != NREQ {
continue
}
index++
token = tokens[index]
if token.Kind == STRING {
token.Kind = PATTERN
token.Value, err = regexp.Compile(token.Value.(string))
if err != nil {
return tokens, err
}
tokens[index] = token
}
}
return tokens, nil
}
/*
Checks the balance of tokens which have multiple parts, such as parenthesis.
*/
func checkBalance(tokens []ExpressionToken) error {
var stream *tokenStream
var token ExpressionToken
var parens int
stream = newTokenStream(tokens)
for stream.hasNext() {
token = stream.next()
if token.Kind == CLAUSE {
parens++
continue
}
if token.Kind == CLAUSE_CLOSE {
parens--
continue
}
}
if parens != 0 {
return errors.New("Unbalanced parenthesis")
}
return nil
}
func isNumeric(character rune) bool {
return unicode.IsDigit(character) || character == '.'
}
func isNotQuote(character rune) bool {
return character != '\'' && character != '"'
}
func isNotAlphanumeric(character rune) bool {
return !(unicode.IsDigit(character) ||
unicode.IsLetter(character) ||
character == '(' ||
character == ')' ||
!isNotQuote(character))
}
func isVariableName(character rune) bool {
return unicode.IsLetter(character) ||
unicode.IsDigit(character) ||
character == '_'
}
func isNotClosingBracket(character rune) bool {
return character != ']'
}
/*
Attempts to parse the [candidate] as a Time.
Tries a series of standardized date formats, returns the Time if one applies,
otherwise returns false through the second return.
*/
func tryParseTime(candidate string) (time.Time, bool) {
var ret time.Time
var found bool
timeFormats := [...]string{
time.ANSIC,
time.UnixDate,
time.RubyDate,
time.Kitchen,
time.RFC3339,
time.RFC3339Nano,
"2006-01-02", // RFC 3339
"2006-01-02 15:04", // RFC 3339 with minutes
"2006-01-02 15:04:05", // RFC 3339 with seconds
"2006-01-02 15:04:05-07:00", // RFC 3339 with seconds and timezone
"2006-01-02T15Z0700", // ISO8601 with hour
"2006-01-02T15:04Z0700", // ISO8601 with minutes
"2006-01-02T15:04:05Z0700", // ISO8601 with seconds
"2006-01-02T15:04:05.999999999Z0700", // ISO8601 with nanoseconds
}
for _, format := range timeFormats {
ret, found = tryParseExactTime(candidate, format)
if found {
return ret, true
}
}
return time.Now(), false
}
func tryParseExactTime(candidate string, format string) (time.Time, bool) {
var ret time.Time
var err error
ret, err = time.ParseInLocation(format, candidate, time.Local)
if err != nil {
return time.Now(), false
}
return ret, true
}