bee/vendor/github.com/cosiner/argv/scanner.go

283 lines
5.7 KiB
Go

package argv
import "unicode"
// Scanner is a cmdline string scanner.
//
// It split cmdline string to tokens: space, string, pipe, reverse quote string.
type Scanner struct {
env map[string]string
text []rune
rpos int
dollarBuf []rune
}
// NewScanner create a scanner and init it's internal states.
func NewScanner(text []rune, env map[string]string) *Scanner {
return &Scanner{
text: text,
env: env,
}
}
func (s *Scanner) envs() map[string]string {
return s.env
}
const _RuneEOF = 0
func (s *Scanner) nextRune() rune {
if s.rpos >= len(s.text) {
return _RuneEOF
}
r := s.text[s.rpos]
s.rpos++
return r
}
func (s *Scanner) unreadRune(r rune) {
if r != _RuneEOF {
s.rpos--
}
}
func (s *Scanner) isEscapeChars(r rune) (rune, bool) {
switch r {
case 'a':
return '\a', true
case 'b':
return '\b', true
case 'f':
return '\f', true
case 'n':
return '\n', true
case 'r':
return '\r', true
case 't':
return '\t', true
case 'v':
return '\v', true
case '\\':
return '\\', true
case '$':
return '$', true
}
return r, false
}
func (s *Scanner) endEnv(r rune) bool {
if r == '_' || (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') {
return false
}
return true
}
// TokenType is the type of tokens recognized by the scanner.
type TokenType uint32
// Token is generated by the scanner with a type and value.
type Token struct {
Type TokenType
Value []rune
}
const (
// TokString for string, single quoted string and double quoted string
TokString TokenType = iota + 1
// TokPipe is the '|' character
TokPipe
// TokReversequote is reverse quoted string
TokReversequote
// TokSpace represent space character sequence
TokSpace
// TokEOF means the input end.
TokEOF
)
func (s *Scanner) getEnv(name string) string {
return s.env[name]
}
func (s *Scanner) specialVar(r rune) (string, bool) {
switch r {
case '0', '*', '#', '@', '?', '$':
v, has := s.env[string(r)]
return v, has
default:
return "", false
}
}
func (s *Scanner) checkDollarStart(tok *Token, r rune, from, switchTo uint8) uint8 {
state := from
nr := s.nextRune()
if val, has := s.specialVar(nr); has {
if val != "" {
tok.Value = append(tok.Value, []rune(val)...)
}
} else if s.endEnv(nr) {
tok.Value = append(tok.Value, r)
s.unreadRune(nr)
} else {
state = switchTo
s.dollarBuf = append(s.dollarBuf[:0], nr)
}
return state
}
func (s *Scanner) checkDollarEnd(tok *Token, r rune, from, switchTo uint8) uint8 {
var state = from
if s.endEnv(r) {
tok.Value = append(tok.Value, []rune(s.getEnv(string(s.dollarBuf)))...)
state = switchTo
s.unreadRune(r)
} else {
s.dollarBuf = append(s.dollarBuf, r)
}
return state
}
// Next return next token, if it reach the end, TOK_EOF will be returned.
//
// Error is returned for invalid syntax such as unpaired quotes.
func (s *Scanner) Next() (Token, error) {
const (
Initial = iota + 1
Space
ReverseQuote
String
StringDollar
StringQuoteSingle
StringQuoteDouble
StringQuoteDoubleDollar
)
var (
tok Token
state uint8 = Initial
)
s.dollarBuf = s.dollarBuf[:0]
for {
r := s.nextRune()
switch state {
case Initial:
switch {
case r == _RuneEOF:
tok.Type = TokEOF
return tok, nil
case r == '|':
tok.Type = TokPipe
return tok, nil
case r == '`':
state = ReverseQuote
case unicode.IsSpace(r):
state = Space
s.unreadRune(r)
default:
state = String
s.unreadRune(r)
}
case Space:
if r == _RuneEOF || !unicode.IsSpace(r) {
s.unreadRune(r)
tok.Type = TokSpace
return tok, nil
}
case ReverseQuote:
switch r {
case _RuneEOF:
return tok, ErrInvalidSyntax
case '`':
tok.Type = TokReversequote
return tok, nil
default:
tok.Value = append(tok.Value, r)
}
case String:
switch {
case r == _RuneEOF || r == '|' || r == '`' || unicode.IsSpace(r):
tok.Type = TokString
s.unreadRune(r)
return tok, nil
case r == '\'':
state = StringQuoteSingle
case r == '"':
state = StringQuoteDouble
case r == '\\':
nr := s.nextRune()
if nr == _RuneEOF {
return tok, ErrInvalidSyntax
}
tok.Value = append(tok.Value, nr)
case r == '$':
state = s.checkDollarStart(&tok, r, state, StringDollar)
default:
tok.Value = append(tok.Value, r)
}
case StringDollar:
state = s.checkDollarEnd(&tok, r, state, String)
case StringQuoteSingle:
switch r {
case _RuneEOF:
return tok, ErrInvalidSyntax
case '\'':
state = String
case '\\':
nr := s.nextRune()
if escape, ok := s.isEscapeChars(nr); ok {
tok.Value = append(tok.Value, escape)
} else {
tok.Value = append(tok.Value, r)
s.unreadRune(nr)
}
default:
tok.Value = append(tok.Value, r)
}
case StringQuoteDouble:
switch r {
case _RuneEOF:
return tok, ErrInvalidSyntax
case '"':
state = String
case '\\':
nr := s.nextRune()
if nr == _RuneEOF {
return tok, ErrInvalidSyntax
}
if escape, ok := s.isEscapeChars(nr); ok {
tok.Value = append(tok.Value, escape)
} else {
tok.Value = append(tok.Value, r)
s.unreadRune(nr)
}
case '$':
state = s.checkDollarStart(&tok, r, state, StringQuoteDoubleDollar)
default:
tok.Value = append(tok.Value, r)
}
case StringQuoteDoubleDollar:
state = s.checkDollarEnd(&tok, r, state, StringQuoteDouble)
}
}
}
// Scan is a utility function help split input text as tokens.
func Scan(text []rune, env map[string]string) ([]Token, error) {
s := NewScanner(text, env)
var tokens []Token
for {
tok, err := s.Next()
if err != nil {
return nil, err
}
tokens = append(tokens, tok)
if tok.Type == TokEOF {
break
}
}
return tokens, nil
}