TDL/lexer.go
2020-11-04 22:27:06 +01:00

232 lines
3.7 KiB
Go

package main
import (
"fmt"
"io"
"unicode"
)
type SymType int
const (
SymNone SymType = iota
SymUnknown
SymEOI
SymPreamble
SymComma
SymPercent
SymTimes
SymDash
SymAt
SymPlus
SymNotice
SymBrokenNotice
SymString
SymBrokenString
SymInteger
SymFloat
SymIdent
SymReps
SymIncrease
)
func (t SymType) String() string {
switch t {
case SymNone:
return "none"
case SymUnknown:
return "unknown"
case SymEOI:
return "EOI"
case SymPreamble:
return "preamble"
case SymComma:
return ","
case SymPercent:
return "%"
case SymTimes:
return "x"
case SymDash:
return "-"
case SymAt:
return "@"
case SymPlus:
return "+"
case SymNotice:
return "notice"
case SymBrokenNotice:
return "broken notice"
case SymString:
return "string"
case SymBrokenString:
return "broken string"
case SymInteger:
return "integer"
case SymFloat:
return "float"
case SymIdent:
return "identifier"
case SymReps:
return "reps"
case SymIncrease:
return "increase"
default:
return "(no description for token, this should not have happened)"
}
}
type Sym struct {
Type SymType
Value string
}
func (s Sym) String() string {
return "(" + s.Type.String() + " - '" + s.Value + "')"
}
type Lexer struct {
input []rune
symStart int
pos int
line int
ch rune
}
func NewLexer(input []rune) *Lexer {
return &Lexer{input: input, pos: -1, line: 1}
}
func (l *Lexer) getCh() {
if l.pos >= len(l.input)-1 {
l.pos = len(l.input)
l.ch = 0
} else {
l.pos++
l.ch = l.input[l.pos]
}
}
func (l *Lexer) isEOI() bool {
return l.ch == 0 && l.pos >= len(l.input)-1
}
func (l *Lexer) symbolValue() string {
return string(l.input[l.symStart:l.pos])
}
func (l *Lexer) skipWhitespace() {
inComment := false
for unicode.IsSpace(l.ch) || (l.ch == 0 && !l.isEOI()) || l.ch == '[' || inComment {
if l.ch == '[' {
inComment = true
}
if l.ch == ']' {
inComment = false
}
if l.ch == '\n' {
l.line++
}
l.getCh()
}
}
func (l *Lexer) Lex() Sym {
sym := SymNone
var start int
l.skipWhitespace()
for sym == SymNone {
start = l.pos
switch l.ch {
case 0:
if l.isEOI() {
sym = SymEOI
} else {
l.getCh()
}
case ',':
sym = SymComma
l.getCh()
case '%':
sym = SymPercent
l.getCh()
case 'x':
sym = SymTimes
l.getCh()
case '-':
sym = SymDash
l.getCh()
case '@':
sym = SymAt
l.getCh()
case '+':
sym = SymPlus
l.getCh()
case '(':
sym = SymNotice
l.getCh()
for l.ch != ')' && l.ch != '\n' && !l.isEOI() {
l.getCh()
}
if l.ch != ')' {
sym = SymBrokenNotice
} else {
l.getCh()
}
case '"':
sym = SymString
l.getCh()
for l.ch != '"' && l.ch != '\n' && !l.isEOI() {
l.getCh()
}
if l.ch != '"' {
sym = SymBrokenString
} else {
l.getCh()
}
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
sym = SymInteger
l.getCh()
for l.ch >= '0' && l.ch <= '9' {
l.getCh()
}
if l.ch == '.' {
sym = SymFloat
l.getCh()
for l.ch >= '0' && l.ch <= '9' {
l.getCh()
}
}
default:
if unicode.IsLetter(l.ch) {
sym = SymIdent
l.getCh()
for unicode.IsLetter(l.ch) || unicode.IsDigit(l.ch) {
l.getCh()
}
if l.ch == ':' {
sym = SymPreamble
l.getCh()
}
val := string(l.input[start:l.pos])
if val == "reps" {
sym = SymReps
} else if val == "increase" {
sym = SymIncrease
}
} else {
sym = SymUnknown
l.getCh()
}
}
}
l.symStart = start
return Sym{sym, l.symbolValue()}
}
func DebugLexer(input []rune, writer io.Writer) {
l := NewLexer(input)
for tok := l.Lex(); tok.Type != SymEOI; tok = l.Lex() {
fmt.Fprintf(writer, "line %d: %v\n", l.line, tok)
}
}