package main import ( "fmt" "io" "unicode" ) type SymType int const ( SymNone SymType = iota SymUnknown SymEOI SymPreamble SymComma SymPercent SymTimes SymDash SymAt SymPlus SymNotice SymBrokenNotice SymString SymBrokenString SymInteger SymFloat SymIdent SymReps SymIncrease ) func (t SymType) String() string { switch t { case SymNone: return "none" case SymUnknown: return "unknown" case SymEOI: return "EOI" case SymPreamble: return "preamble" case SymComma: return "," case SymPercent: return "%" case SymTimes: return "x" case SymDash: return "-" case SymAt: return "@" case SymPlus: return "+" case SymNotice: return "notice" case SymBrokenNotice: return "broken notice" case SymString: return "string" case SymBrokenString: return "broken string" case SymInteger: return "integer" case SymFloat: return "float" case SymIdent: return "identifier" case SymReps: return "reps" case SymIncrease: return "increase" default: return "(no description for token, this should not have happened)" } } type Sym struct { Type SymType Value string } func (s Sym) String() string { return "(" + s.Type.String() + " - '" + s.Value + "')" } type Lexer struct { input []rune symStart int pos int line int ch rune } func NewLexer(input []rune) *Lexer { return &Lexer{input: input, pos: -1, line: 1} } func (l *Lexer) getCh() { if l.pos >= len(l.input)-1 { l.pos = len(l.input) l.ch = 0 } else { l.pos++ l.ch = l.input[l.pos] } } func (l *Lexer) isEOI() bool { return l.ch == 0 && l.pos >= len(l.input)-1 } func (l *Lexer) symbolValue() string { return string(l.input[l.symStart:l.pos]) } func (l *Lexer) skipWhitespace() { inComment := false for unicode.IsSpace(l.ch) || (l.ch == 0 && !l.isEOI()) || l.ch == '[' || inComment { if l.ch == '[' { inComment = true } if l.ch == ']' { inComment = false } if l.ch == '\n' { l.line++ } l.getCh() } } func (l *Lexer) Lex() Sym { sym := SymNone var start int l.skipWhitespace() for sym == SymNone { start = l.pos switch l.ch { case 0: if l.isEOI() { sym = SymEOI } else { l.getCh() } case ',': sym = SymComma l.getCh() case '%': sym = SymPercent l.getCh() case 'x': sym = SymTimes l.getCh() case '-': sym = SymDash l.getCh() case '@': sym = SymAt l.getCh() case '+': sym = SymPlus l.getCh() case '(': sym = SymNotice l.getCh() for l.ch != ')' && l.ch != '\n' && !l.isEOI() { l.getCh() } if l.ch != ')' { sym = SymBrokenNotice } else { l.getCh() } case '"': sym = SymString l.getCh() for l.ch != '"' && l.ch != '\n' && !l.isEOI() { l.getCh() } if l.ch != '"' { sym = SymBrokenString } else { l.getCh() } case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': sym = SymInteger l.getCh() for l.ch >= '0' && l.ch <= '9' { l.getCh() } if l.ch == '.' { sym = SymFloat l.getCh() for l.ch >= '0' && l.ch <= '9' { l.getCh() } } default: if unicode.IsLetter(l.ch) { sym = SymIdent l.getCh() for unicode.IsLetter(l.ch) || unicode.IsDigit(l.ch) { l.getCh() } if l.ch == ':' { sym = SymPreamble l.getCh() } val := string(l.input[start:l.pos]) if val == "reps" { sym = SymReps } else if val == "increase" { sym = SymIncrease } } else { sym = SymUnknown l.getCh() } } } l.symStart = start return Sym{sym, l.symbolValue()} } func DebugLexer(input []rune, writer io.Writer) { l := NewLexer(input) for tok := l.Lex(); tok.Type != SymEOI; tok = l.Lex() { fmt.Fprintf(writer, "line %d: %v\n", l.line, tok) } }