From 9bf668e7114bb92a0b072db5d4e092c0b8f964c4 Mon Sep 17 00:00:00 2001 From: Marc Vertes Date: Thu, 22 Jan 2026 11:33:32 +0100 Subject: chore: refactor token types to avoid mutate scanner tokens Mutating scanner tokens or reusing scanner token attributes to store other metadata is a hack. Introduce a new parser token type with arbitrary args. The next step will be to use the arg field instead of scanner token fields. --- comp/compiler.go | 3 +-- parser/decl.go | 5 ++--- parser/expr.go | 38 +++++++++++++++++----------------- parser/parse.go | 63 ++++++++++++++++++++++++++++++-------------------------- parser/tokens.go | 16 +++++++++++++- 5 files changed, 71 insertions(+), 54 deletions(-) diff --git a/comp/compiler.go b/comp/compiler.go index 55a3166..4fa34de 100644 --- a/comp/compiler.go +++ b/comp/compiler.go @@ -13,7 +13,6 @@ import ( "github.com/mvertes/parscan/lang" "github.com/mvertes/parscan/parser" - "github.com/mvertes/parscan/scanner" "github.com/mvertes/parscan/symbol" "github.com/mvertes/parscan/vm" ) @@ -66,7 +65,7 @@ func (c *Compiler) Generate(tokens parser.Tokens) (err error) { stack := []*symbol.Symbol{} // for symbolic evaluation and type checking flen := []int{} // stack length according to function scopes - emit := func(t scanner.Token, op vm.Op, arg ...int) { + emit := func(t parser.Token, op vm.Op, arg ...int) { _, file, line, _ := runtime.Caller(1) fmt.Fprintf(os.Stderr, "%s:%d: %v emit %v %v\n", path.Base(file), line, t, op, arg) c.Code = append(c.Code, vm.Instruction{Pos: vm.Pos(t.Pos), Op: op, Arg: arg}) diff --git a/parser/decl.go b/parser/decl.go index 8833580..807fd72 100644 --- a/parser/decl.go +++ b/parser/decl.go @@ -9,7 +9,6 @@ import ( "strings" "github.com/mvertes/parscan/lang" - "github.com/mvertes/parscan/scanner" "github.com/mvertes/parscan/symbol" "github.com/mvertes/parscan/vm" ) @@ -357,9 +356,9 @@ func (p *Parser) parseVarLine(in Tokens) (out Tokens, err error) { if v, err = p.parseExpr(v, ""); err != nil { return out, err } - out = append(out, scanner.Token{Tok: lang.Ident, Str: vars[i]}) + out = append(out, newIdent(vars[i], 0)) out = append(out, v...) - out = append(out, scanner.Token{Tok: lang.Assign}) + out = append(out, newToken(lang.Assign, 0)) } return out, err } diff --git a/parser/expr.go b/parser/expr.go index 149b5e2..835ae28 100644 --- a/parser/expr.go +++ b/parser/expr.go @@ -17,7 +17,7 @@ func (p *Parser) parseExpr(in Tokens, typeStr string) (out Tokens, err error) { var ops Tokens var ctype string - popop := func() scanner.Token { + popop := func() Token { l := len(ops) - 1 t := ops[l] ops = ops[:l] @@ -28,7 +28,7 @@ func (p *Parser) parseExpr(in Tokens, typeStr string) (out Tokens, err error) { } // addop adds an operator to the operator stack. - addop := func(t scanner.Token) { + addop := func(t Token) { // Operators on stack with a lower precedence are poped out and output first. for len(ops) > 0 && p.precedence(t) < p.precedence(ops[len(ops)-1]) { out = append(out, popop()) @@ -82,14 +82,14 @@ func (p *Parser) parseExpr(in Tokens, typeStr string) (out Tokens, err error) { addop(t) xp := strconv.Itoa(p.labelCount[p.scope]) p.labelCount[p.scope]++ - out = append(out, scanner.Token{Tok: lang.JumpSetFalse, Str: p.scope + "x" + xp}) + out = append(out, Token{Token: scanner.Token{Tok: lang.JumpSetFalse, Str: p.scope + "x" + xp}}) ops[len(ops)-1].Str = p.scope + "x" + xp case lang.Lor: addop(t) xp := strconv.Itoa(p.labelCount[p.scope]) p.labelCount[p.scope]++ - out = append(out, scanner.Token{Tok: lang.JumpSetTrue, Str: p.scope + "x" + xp}) + out = append(out, Token{Token: scanner.Token{Tok: lang.JumpSetTrue, Str: p.scope + "x" + xp}}) ops[len(ops)-1].Str = p.scope + "x" + xp case lang.Ident: @@ -114,12 +114,12 @@ func (p *Parser) parseExpr(in Tokens, typeStr string) (out Tokens, err error) { if i == 0 || in[i-1].Tok.IsOperator() { out = append(out, toks...) } else { - prec := p.precedence(scanner.Token{Tok: lang.Call}) + prec := p.precedence(Token{Token: scanner.Token{Tok: lang.Call}}) for len(ops) > 0 && prec < p.precedence(ops[len(ops)-1]) { out = append(out, popop()) } // func call: ensure that the func token in on the top of the stack, after args. - ops = append(ops, scanner.Token{Tok: lang.Call, Pos: t.Pos, Beg: p.numItems(t.Block(), lang.Comma)}) + ops = append(ops, Token{Token: scanner.Token{Tok: lang.Call, Pos: t.Pos, Beg: p.numItems(t.Block(), lang.Comma)}}) out = append(out, toks...) } @@ -129,14 +129,14 @@ func (p *Parser) parseExpr(in Tokens, typeStr string) (out Tokens, err error) { typ := p.Symbols[typeStr].Type.Elem() ctype = typ.String() p.Symbols.Add(symbol.UnsetAddr, ctype, vm.NewValue(typ), symbol.Type, typ, p.funcScope != "") - out = append(out, scanner.Token{Tok: lang.Ident, Pos: t.Pos, Str: ctype}) + out = append(out, Token{Token: scanner.Token{Tok: lang.Ident, Pos: t.Pos, Str: ctype}}) } toks, err := p.parseComposite(t.Block(), ctype) out = append(out, toks...) if err != nil { return out, err } - ops = append(ops, scanner.Token{Tok: lang.Composite, Pos: t.Pos, Str: ctype}) + ops = append(ops, Token{Token: scanner.Token{Tok: lang.Composite, Pos: t.Pos, Str: ctype}}) case lang.BracketBlock: if i == 0 || in[i-1].Tok.IsOperator() { @@ -148,7 +148,7 @@ func (p *Parser) parseExpr(in Tokens, typeStr string) (out Tokens, err error) { ctype = typ.String() // p.Symbols.Add(symbol.UnsetAddr, ctype, vm.NewValue(typ), symbol.Type, typ, p.funcScope != "") p.Symbols.Add(symbol.UnsetAddr, ctype, vm.NewValue(typ), symbol.Type, typ, false) - out = append(out, scanner.Token{Tok: lang.Ident, Pos: t.Pos, Str: ctype}) + out = append(out, Token{Token: scanner.Token{Tok: lang.Ident, Pos: t.Pos, Str: ctype}}) i += n - 1 break } @@ -163,10 +163,10 @@ func (p *Parser) parseExpr(in Tokens, typeStr string) (out Tokens, err error) { if i < len(in)-2 && in[i+1].Tok == lang.Assign { // A bracket block followed by assign implies an IndexAssign token, // as assignement to a map element cannot be implemented through a normal Assign. - ops = append(ops, scanner.Token{Tok: lang.IndexAssign, Pos: t.Pos}) + ops = append(ops, Token{Token: scanner.Token{Tok: lang.IndexAssign, Pos: t.Pos}}) i++ } else if toks[len(toks)-1].Tok != lang.Slice { - ops = append(ops, scanner.Token{Tok: lang.Index, Pos: t.Pos}) + ops = append(ops, Token{Token: scanner.Token{Tok: lang.Index, Pos: t.Pos}}) } case lang.Struct: @@ -176,7 +176,7 @@ func (p *Parser) parseExpr(in Tokens, typeStr string) (out Tokens, err error) { } ctype = typ.String() p.Symbols.Add(symbol.UnsetAddr, ctype, vm.NewValue(typ), symbol.Type, typ, p.funcScope != "") - out = append(out, scanner.Token{Tok: lang.Ident, Pos: t.Pos, Str: ctype}) + out = append(out, Token{Token: scanner.Token{Tok: lang.Ident, Pos: t.Pos, Str: ctype}}) i++ case lang.Map: @@ -186,7 +186,7 @@ func (p *Parser) parseExpr(in Tokens, typeStr string) (out Tokens, err error) { } ctype = typ.String() p.Symbols.Add(symbol.UnsetAddr, ctype, vm.NewValue(typ), symbol.Type, typ, p.funcScope != "") - out = append(out, scanner.Token{Tok: lang.Ident, Pos: t.Pos, Str: ctype}) + out = append(out, Token{Token: scanner.Token{Tok: lang.Ident, Pos: t.Pos, Str: ctype}}) i += n - 1 case lang.Comment: @@ -218,9 +218,9 @@ func (p *Parser) parseComposite(s, typ string) (Tokens, error) { } if noColon { // Insert a numeric index key and a colon operator. - result = append(result, scanner.Token{Tok: lang.Int, Str: strconv.Itoa(i)}) + result = append(result, Token{Token: scanner.Token{Tok: lang.Int, Str: strconv.Itoa(i)}}) result = append(result, toks...) - result = append(result, scanner.Token{Tok: lang.Colon, Str: ":"}) + result = append(result, Token{Token: scanner.Token{Tok: lang.Colon, Str: ":"}}) sliceLen++ } else { result = append(result, toks...) @@ -231,7 +231,7 @@ func (p *Parser) parseComposite(s, typ string) (Tokens, error) { return result, nil } -func (p *Parser) parseBlock(t scanner.Token, typ string) (result Tokens, err error) { +func (p *Parser) parseBlock(t Token, typ string) (result Tokens, err error) { tokens, err := p.Scan(t.Block(), false) if err != nil { return tokens, err @@ -245,12 +245,12 @@ func (p *Parser) parseBlock(t scanner.Token, typ string) (result Tokens, err err } if len(sub) == 0 { if i == 0 { - result = append(result, scanner.Token{Tok: lang.Int, Str: "0"}) + result = append(result, Token{Token: scanner.Token{Tok: lang.Int, Str: "0"}}) continue } else if i == 2 { return nil, errors.New("final index required in 3-index slice") } - result = append(result, scanner.Token{Tok: lang.Len, Beg: 1}) + result = append(result, Token{Token: scanner.Token{Tok: lang.Len, Beg: 1}}) continue } toks, err := p.parseExpr(sub, typ) @@ -259,7 +259,7 @@ func (p *Parser) parseBlock(t scanner.Token, typ string) (result Tokens, err err } result = append(result, toks...) } - result = append(result, scanner.Token{Tok: lang.Slice, Pos: t.Pos}) + result = append(result, Token{Token: scanner.Token{Tok: lang.Slice, Pos: t.Pos}}) return result, err } diff --git a/parser/parse.go b/parser/parse.go index 67d8657..89163cf 100644 --- a/parser/parse.go +++ b/parser/parse.go @@ -55,8 +55,15 @@ func NewParser(spec *lang.Spec, noPkg bool) *Parser { } // Scan performs lexical analysis on s and returns Tokens or an error. -func (p *Parser) Scan(s string, endSemi bool) (Tokens, error) { - return p.Scanner.Scan(s, endSemi) +func (p *Parser) Scan(s string, endSemi bool) (out Tokens, err error) { + toks, err := p.Scanner.Scan(s, endSemi) + if err != nil { + return out, err + } + for _, t := range toks { + out = append(out, Token{Token: t}) + } + return out, err } // Parse performs syntax analysis on s and returns Tokens or an error. @@ -161,7 +168,7 @@ func (p *Parser) parseBreak(in Tokens) (out Tokens, err error) { default: return nil, ErrBreak } - out = Tokens{{Tok: lang.Goto, Str: label}} + out = Tokens{{Token: scanner.Token{Tok: lang.Goto, Str: label}}} return out, err } @@ -179,7 +186,7 @@ func (p *Parser) parseContinue(in Tokens) (out Tokens, err error) { default: return nil, ErrContinue } - out = Tokens{{Tok: lang.Goto, Str: label}} + out = Tokens{{Token: scanner.Token{Tok: lang.Goto, Str: label}}} return out, err } @@ -188,7 +195,7 @@ func (p *Parser) parseGoto(in Tokens) (out Tokens, err error) { return nil, ErrGoto } // TODO: check validity of user provided label - return Tokens{{Tok: lang.Goto, Str: p.funcScope + "/" + in[1].Str}}, nil + return Tokens{{Token: scanner.Token{Tok: lang.Goto, Str: p.funcScope + "/" + in[1].Str}}}, nil } func (p *Parser) parseFor(in Tokens) (out Tokens, err error) { @@ -208,10 +215,8 @@ func (p *Parser) parseFor(in Tokens) (out Tokens, err error) { case 1: if in.Index(lang.Range) >= 0 { init = pre[0] - // cond = Tokens{{Tok: lang.Next, Str: p.scope + "c"}} - // final = Tokens{{Tok: lang.Stop, Str: p.scope + "f"}} - cond = Tokens{{Tok: lang.Next}} - final = Tokens{{Tok: lang.Stop}} + cond = Tokens{{Token: scanner.Token{Tok: lang.Next}}} + final = Tokens{{Token: scanner.Token{Tok: lang.Stop}}} } else { cond = pre[0] } @@ -226,13 +231,13 @@ func (p *Parser) parseFor(in Tokens) (out Tokens, err error) { } out = init } - out = append(out, scanner.Token{Tok: lang.Label, Str: p.scope + "b"}) + out = append(out, Token{Token: scanner.Token{Tok: lang.Label, Str: p.scope + "b"}}) if len(cond) > 0 { if cond, err = p.parseExpr(cond, ""); err != nil { return nil, err } out = append(out, cond...) - out = append(out, scanner.Token{Tok: lang.JumpFalse, Str: p.scope + "e"}) + out = append(out, Token{Token: scanner.Token{Tok: lang.JumpFalse, Str: p.scope + "e"}}) } if body, err = p.Parse(in[len(in)-1].Block()); err != nil { return nil, err @@ -245,8 +250,8 @@ func (p *Parser) parseFor(in Tokens) (out Tokens, err error) { out = append(out, post...) } out = append(out, - scanner.Token{Tok: lang.Goto, Str: p.scope + "b"}, - scanner.Token{Tok: lang.Label, Str: p.scope + "e"}) + Token{Token: scanner.Token{Tok: lang.Goto, Str: p.scope + "b"}}, + Token{Token: scanner.Token{Tok: lang.Label, Str: p.scope + "e"}}) out = append(out, final...) return out, err } @@ -282,8 +287,8 @@ func (p *Parser) parseFunc(in Tokens) (out Tokens, err error) { }() out = Tokens{ - {Tok: lang.Goto, Str: fname + "_end"}, // Skip function definition. - {Tok: lang.Label, Pos: in[0].Pos, Str: fname}, + {Token: scanner.Token{Tok: lang.Goto, Str: fname + "_end"}}, // Skip function definition. + {Token: scanner.Token{Tok: lang.Label, Pos: in[0].Pos, Str: fname}}, } bi := in.Index(lang.BraceBlock) @@ -303,7 +308,7 @@ func (p *Parser) parseFunc(in Tokens) (out Tokens, err error) { return out, err } if l := p.framelen[p.funcScope] - 1; l > 0 { - out = append(out, scanner.Token{Tok: lang.Grow, Beg: l}) + out = append(out, Token{Token: scanner.Token{Tok: lang.Grow, Beg: l}}) } out = append(out, toks...) if out[len(out)-1].Tok != lang.Return { @@ -315,7 +320,7 @@ func (p *Parser) parseFunc(in Tokens) (out Tokens, err error) { } out = append(out, x...) } - out = append(out, scanner.Token{Tok: lang.Label, Str: fname + "_end"}) + out = append(out, Token{Token: scanner.Token{Tok: lang.Label, Str: fname + "_end"}}) return out, err } @@ -336,9 +341,9 @@ func (p *Parser) parseIf(in Tokens) (out Tokens, err error) { return nil, err } if sc > 0 { - pre = append(pre, scanner.Token{Tok: lang.Goto, Str: p.scope + "e0"}) + pre = append(pre, Token{Token: scanner.Token{Tok: lang.Goto, Str: p.scope + "e0"}}) } - pre = append(pre, scanner.Token{Tok: lang.Label, Str: p.scope + "e" + ssc}) + pre = append(pre, Token{Token: scanner.Token{Tok: lang.Label, Str: p.scope + "e" + ssc}}) out = append(pre, out...) i-- @@ -366,7 +371,7 @@ func (p *Parser) parseIf(in Tokens) (out Tokens, err error) { return nil, err } pre = append(pre, cond...) - pre = append(pre, scanner.Token{Tok: lang.JumpFalse, Str: p.scope + "e" + ssc}) + pre = append(pre, Token{Token: scanner.Token{Tok: lang.JumpFalse, Str: p.scope + "e" + ssc}}) out = append(pre, out...) i = ifp if i > 1 && in[i].Tok == lang.If && in[i-1].Tok == lang.Else { // Step over 'else if'. @@ -428,12 +433,12 @@ func (p *Parser) parseSwitch(in Tokens) (out Tokens, err error) { } out = append(out, co...) } - out = append(out, scanner.Token{Tok: lang.Label, Str: p.breakLabel}) + out = append(out, Token{Token: scanner.Token{Tok: lang.Label, Str: p.breakLabel}}) return out, err } func (p *Parser) parseCaseClause(in Tokens, index, maximum int, condSwitch bool) (out Tokens, err error) { - in = append(in, scanner.Token{Tok: lang.Semicolon}) // Force a ';' at the end of body clause. + in = append(in, Token{Token: scanner.Token{Tok: lang.Semicolon}}) // Force a ';' at the end of body clause. var conds, body Tokens tl := in.Split(lang.Colon) if len(tl) != 2 { @@ -459,24 +464,24 @@ func (p *Parser) parseCaseClause(in Tokens, index, maximum int, condSwitch bool) } else { next = fmt.Sprintf("%sc%d.%d", p.scope, index, i+1) } - out = append(out, scanner.Token{Tok: lang.Label, Str: txt}) + out = append(out, Token{Token: scanner.Token{Tok: lang.Label, Str: txt}}) if len(cond) > 0 { out = append(out, cond...) if condSwitch { - out = append(out, scanner.Token{Tok: lang.EqualSet}) + out = append(out, Token{Token: scanner.Token{Tok: lang.EqualSet}}) } - out = append(out, scanner.Token{Tok: lang.JumpFalse, Str: next}) + out = append(out, Token{Token: scanner.Token{Tok: lang.JumpFalse, Str: next}}) } out = append(out, body...) if i != len(lcond)-1 || index != maximum { - out = append(out, scanner.Token{Tok: lang.Goto, Str: p.scope + "e"}) + out = append(out, Token{Token: scanner.Token{Tok: lang.Goto, Str: p.scope + "e"}}) } } return out, err } func (p *Parser) parseLabel(in Tokens) (out Tokens, err error) { - return Tokens{{Tok: lang.Label, Str: p.funcScope + "/" + in[0].Str}}, nil + return Tokens{{Token: scanner.Token{Tok: lang.Label, Str: p.funcScope + "/" + in[0].Str}}}, nil } func (p *Parser) parseReturn(in Tokens) (out Tokens, err error) { @@ -485,7 +490,7 @@ func (p *Parser) parseReturn(in Tokens) (out Tokens, err error) { return out, err } } else if l == 0 { - in = Tokens{{Tok: lang.Return}} // Implicit return in functions with no return parameters. + in = Tokens{{Token: scanner.Token{Tok: lang.Return}}} // Implicit return in functions with no return parameters. } // TODO: the function symbol should be already present in the parser context. @@ -527,6 +532,6 @@ func (p *Parser) popScope() { p.scope = p.scope[:j] } -func (p *Parser) precedence(t scanner.Token) int { +func (p *Parser) precedence(t Token) int { return p.TokenProps[t.Tok].Precedence } diff --git a/parser/tokens.go b/parser/tokens.go index ac08e2b..9ae655b 100644 --- a/parser/tokens.go +++ b/parser/tokens.go @@ -7,8 +7,14 @@ import ( "github.com/mvertes/parscan/scanner" ) +// Token represents a parser token. +type Token struct { + scanner.Token + Arg []any +} + // Tokens represents slice of tokens. -type Tokens []scanner.Token +type Tokens []Token func (toks Tokens) String() (s string) { var sb strings.Builder @@ -62,3 +68,11 @@ func (toks Tokens) SplitStart(tok lang.Token) (result []Tokens) { toks = toks[i+1:] } } + +func newIdent(name string, pos int, arg ...any) Token { + return Token{Token: scanner.Token{Tok: lang.Ident, Pos: pos, Str: name}, Arg: arg} +} + +func newToken(tok lang.Token, pos int, arg ...any) Token { + return Token{Token: scanner.Token{Tok: tok, Pos: pos}, Arg: arg} +} -- cgit v1.2.3