Files
services/search/handler/lexer.go
2022-01-07 09:58:10 +00:00

288 lines
5.4 KiB
Go

package handler
import (
"fmt"
"strings"
"unicode"
"unicode/utf8"
)
type itemType int
const (
itemError itemType = iota
itemNumber
itemIdentifier
itemBoolean
itemBooleanOp
itemString
itemOperator
itemLeftParen
itemRightParen
)
const (
eof = -1
)
type item struct {
typ itemType
val string
}
type stateFn func(*lexer) stateFn
func (l *lexer) run() {
for state := lexStartStatement; state != nil; {
state = state(l)
}
close(l.items)
}
type lexer struct {
name string //used only for error reports
input string // the string being scanned
start int // start position of this item
pos int // current position in the input
width int // width of last rune read
items chan item // channel of scanned items
}
func lex(name, input string) (*lexer, chan item) {
l := &lexer{
name: name,
input: input,
items: make(chan item),
}
go l.run()
return l, l.items
}
func (l *lexer) emit(t itemType) {
l.items <- item{t, l.input[l.start:l.pos]}
l.start = l.pos
}
func (l *lexer) errorf(format string, args ...interface{}) stateFn {
l.items <- item{
itemError,
fmt.Sprintf(format, args...),
}
return nil
}
func (l *lexer) consumeSpace() {
l.acceptRun(" ")
l.ignore()
}
// next returns the next rune in the input
func (l *lexer) next() (rune int32) {
if l.pos == len(l.input) {
l.width = 0
return eof
}
rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width
return rune
}
// ignore skips over the pending input before this point
func (l *lexer) ignore() {
l.start = l.pos
}
// backup steps back one rune
// Can be called only once per call of next
func (l *lexer) backup() {
l.pos -= l.width
}
// peek returns but does not consume
// the next rune in the input
func (l *lexer) peek() int32 {
rune := l.next()
l.backup()
return rune
}
// accept consumes the next rune
// if it's from the valid set
func (l *lexer) accept(valid string) bool {
if strings.IndexRune(valid, l.next()) >= 0 {
return true
}
l.backup()
return false
}
// acceptRune consumes a run of runes from the valid set
func (l *lexer) acceptRun(valid string) {
for strings.IndexRune(valid, l.next()) >= 0 {
}
l.backup()
}
func lexIdent(l *lexer) stateFn {
l.consumeSpace()
for {
r := l.next()
if r == eof {
return l.errorf("Unexpected end of input %q", l.input[l.start:])
}
if unicode.IsSpace(r) || strings.IndexRune("=><", r) >= 0 {
l.backup()
break
}
}
if l.pos > l.start {
l.emit(itemIdentifier)
}
return lexOperator(l)
}
func lexValue(l *lexer) stateFn {
l.consumeSpace()
switch {
case l.accept(`"'`):
l.backup()
return lexString(l)
case strings.HasPrefix(l.input[l.start:], "true"), strings.HasPrefix(l.input[l.start:], "false"):
return lexBool(l)
default:
// try it as a number
return lexNumber(l)
}
}
func lexString(l *lexer) stateFn {
// TODO support single and double quotes with escaping
if !l.accept(`"'`) {
return l.errorf("Unexpected value %v, expected a quote", l.peek())
}
// ignore the quote
openQuote := l.input[l.start:l.pos]
l.ignore()
lastRead := ""
for {
r := l.next()
if r == eof { // should only happen in error case
return l.errorf("Unexpected value %v, incorrectly terminated value %s %v", l.input[l.start:], openQuote, lastRead)
}
if string(r) == openQuote && lastRead != `\` {
l.backup()
l.emit(itemString)
l.next()
l.ignore() // ignore the quote
return lexEndStatement(l)
}
lastRead = string(r)
}
}
const (
operatorEquals = `==`
operatorGreater = `>=`
operatorLess = `<=`
parenLeft = `(`
parenRight = `)`
)
func lexOperator(l *lexer) stateFn {
l.consumeSpace()
switch l.input[l.pos : l.pos+2] {
case operatorEquals, operatorGreater, operatorLess:
l.pos += 2
l.emit(itemOperator)
return lexValue(l)
}
// look for identifier
return l.errorf("Unexpected operator %q", l.input[l.pos:l.pos+2])
}
func lexNumber(l *lexer) stateFn {
l.consumeSpace()
// optional leading sign
l.accept("+-")
digits := "0123456789"
if l.accept("0") && l.accept("xX") {
digits = "0123456789abcdefABCDEF"
}
l.acceptRun(digits)
if l.accept(".") {
l.acceptRun(digits)
}
if l.accept("eE") {
l.accept("+-")
l.acceptRun("0123456789")
}
if l.start == l.pos {
return l.errorf("Unexpected value %s", l.input[l.start:])
}
l.emit(itemNumber)
return lexEndStatement(l)
}
func lexStartStatement(l *lexer) stateFn {
l.consumeSpace()
if string(l.peek()) == parenLeft {
l.next()
l.emit(itemLeftParen)
return lexStartStatement(l)
}
return lexIdent(l)
}
func lexEndStatement(l *lexer) stateFn {
// is this the end of the statement or can we find a boolean op
l.consumeSpace()
if string(l.peek()) == parenRight {
l.next()
l.emit(itemRightParen)
return lexEndStatement(l)
}
for {
r := l.next()
if r == eof {
break
}
if unicode.IsSpace(r) {
l.backup()
break
}
}
if l.start == l.pos {
return nil
}
if l.input[l.start:l.pos] == "and" || l.input[l.start:l.pos] == "AND" || l.input[l.start:l.pos] == "or" || l.input[l.start:l.pos] == "OR" {
l.emit(itemBooleanOp)
return lexStartStatement(l)
}
return l.errorf("Unexpected input %v", l.input[l.start:l.pos])
}
func lexBool(l *lexer) stateFn {
for {
r := l.next()
if r == eof {
break
}
if unicode.IsSpace(r) {
l.backup()
break
}
}
if l.input[l.start:l.pos] == "true" || l.input[l.start:l.pos] == "false" {
l.emit(itemBoolean)
return lexEndStatement(l)
}
return l.errorf("Unexpected value %q, expecting a boolean", l.input[l.start:l.pos])
}