diff options
| author | Marc Vertes <mvertes@free.fr> | 2026-01-06 19:02:29 +0100 |
|---|---|---|
| committer | Marc Vertes <mvertes@free.fr> | 2026-01-06 19:02:29 +0100 |
| commit | bffc031ea83c7176aac3d3828de0060c6630140c (patch) | |
| tree | 32e30f3bec94884936c2cfc2d53d3ae496e13d61 /parser | |
| parent | f07fc0178831432b68f1b9bd6c96b257aa2e9abe (diff) | |
fix: correct and simplify parsing of expressions.
The expressions were parsed from right to left, and it was incorrect and
cumbersome. Now they are processed from left to right, with a simpler
and correct handling of precedence rules.
The vm function call syntax has been changed to set the function
before the input arguments on the stack, as to follow the declaring
order in languages.
Diffstat (limited to 'parser')
| -rw-r--r-- | parser/decl.go | 15 | ||||
| -rw-r--r-- | parser/expr.go | 327 | ||||
| -rw-r--r-- | parser/parse.go | 14 | ||||
| -rw-r--r-- | parser/type.go | 14 |
4 files changed, 133 insertions, 237 deletions
diff --git a/parser/decl.go b/parser/decl.go index f6e8102..015f4bd 100644 --- a/parser/decl.go +++ b/parser/decl.go @@ -69,7 +69,7 @@ func (p *Parser) parseConstLine(in Tokens) (out Tokens, err error) { values = nil } for i, v := range values { - if v, err = p.parseExpr(v); err != nil { + if v, err = p.parseExpr(v, ""); err != nil { return out, err } cval, _, err := p.evalConstExpr(v) @@ -99,11 +99,11 @@ func (p *Parser) evalConstExpr(in Tokens) (cval constant.Value, length int, err id := t.Tok switch { case id.IsBinaryOp(): - op1, l1, err := p.evalConstExpr(in[:l]) + op2, l2, err := p.evalConstExpr(in[:l]) if err != nil { return nil, 0, err } - op2, l2, err := p.evalConstExpr(in[:l-l1]) + op1, l1, err := p.evalConstExpr(in[:l-l2]) if err != nil { return nil, 0, err } @@ -164,6 +164,8 @@ func constValue(c constant.Value) any { return nil } +// Correspondence between language independent parscan tokens and Go stdlib tokens, +// To enable the use of the Go constant expression evaluator. var gotok = map[lang.Token]token.Token{ lang.Char: token.CHAR, lang.Imag: token.IMAG, @@ -352,13 +354,12 @@ func (p *Parser) parseVarLine(in Tokens) (out Tokens, err error) { values = nil } for i, v := range values { - if v, err = p.parseExpr(v); err != nil { + if v, err = p.parseExpr(v, ""); err != nil { return out, err } + out = append(out, scanner.Token{Tok: lang.Ident, Str: vars[i]}) out = append(out, v...) - out = append(out, - scanner.Token{Tok: lang.Ident, Str: vars[i]}, - scanner.Token{Tok: lang.Assign}) + out = append(out, scanner.Token{Tok: lang.Assign}) } return out, err } diff --git a/parser/expr.go b/parser/expr.go index 5958279..5267aa1 100644 --- a/parser/expr.go +++ b/parser/expr.go @@ -1,7 +1,6 @@ package parser import ( - "fmt" "log" "strconv" @@ -11,194 +10,167 @@ import ( "github.com/mvertes/parscan/vm" ) -func (p *Parser) parseExpr(in Tokens) (out Tokens, err error) { - log.Println("parseExpr in:", in) - var ops, selectors Tokens - var vl int - var selectorIndex string - - // - // Process tokens from last to first, the goal is to reorder the tokens in - // a stack machine processing order, so it can be directly interpreted. - // - if len(in) > 1 && in[0].Tok == lang.Func { - // Function as value (i.e closure). - if out, err = p.parseFunc(in); err != nil { - return out, err +// parseExpr transform an infix expression into a postfix notation. +func (p *Parser) parseExpr(in Tokens, typeStr string) (out Tokens, err error) { + log.Println("parseExpr2 in:", in) + var ops Tokens + + popop := func() (t scanner.Token) { + l := len(ops) - 1 + t = ops[l] + ops = ops[:l] + if t.Tok.IsLogicalOp() { + t.Tok = lang.Label // Implement conditional branching directly. } - // Get function label and use it as a symbol ident. - fid := out[1] - fid.Tok = lang.Ident - out = append(out, fid) - return out, err + return t } - for i := len(in) - 1; i >= 0; i-- { - t := in[i] - // temporary assumptions: binary operators, returning 1 value - switch t.Tok { - case lang.Ident: - if i > 0 && in[i-1].Tok == lang.Period { - selectorIndex = t.Str - continue - } - // resolve symbol if not a selector rhs. - _, sc, ok := p.Symbols.Get(t.Str, p.scope) - if ok { - if sc != "" { - t.Str = sc + "/" + t.Str - } - } + lin := len(in) + for i := 0; i < lin; i++ { + switch t := in[i]; t.Tok { + case lang.Int, lang.String: out = append(out, t) - vl++ - case lang.Colon: - // Make ':' a key-value operator for literal composite. - ops = append(ops, t) + case lang.Func: + // Function as value (i.e closure). + if out, err = p.parseFunc(in); err != nil { + return out, err + } + // Get function label and use it as a symbol ident. + fid := out[1] + fid.Tok = lang.Ident + out = append(out, fid) + return out, err case lang.Period: - t.Str += selectorIndex - selectors = append(Tokens{t}, selectors...) - continue + // TODO: fail if next is not an ident. + t.Str += in[i+1].Str // Hardwire selector argument. + for len(ops) > 0 && p.precedence(t) < p.precedence(ops[len(ops)-1]) { + out = append(out, popop()) + } + ops = append(ops, t) + i++ // Skip over next ident. - case lang.Int, lang.String: - out = append(out, t) - vl++ + case lang.Colon: + t.Str = typeStr + for len(ops) > 0 && p.precedence(t) < p.precedence(ops[len(ops)-1]) { + out = append(out, popop()) + } + ops = append(ops, t) - case lang.Define, lang.Add, lang.Sub, lang.Assign, lang.Equal, lang.Greater, lang.Less, - lang.Mul, lang.Land, lang.Lor, lang.Shl, lang.Shr, lang.Not, lang.And: + case lang.Add, lang.And, lang.Assign, lang.Define, lang.Equal, lang.Greater, lang.Less, lang.Mul, lang.Not, lang.Sub, lang.Shl, lang.Shr: if i == 0 || in[i-1].Tok.IsOperator() { // An operator preceded by an operator or no token is unary. t.Tok = lang.UnaryOp[t.Tok] - j := len(out) - 1 - l := out[j] - if p.precedence(l) > 0 { - out = append(out[:j], t, l) - break - } - out = append(out, t) - break } - if vl < 2 { - ops = append(ops, t) + for len(ops) > 0 && p.precedence(t) < p.precedence(ops[len(ops)-1]) { + out = append(out, popop()) + } + ops = append(ops, t) + + case lang.Land: + for len(ops) > 0 && p.precedence(t) < p.precedence(ops[len(ops)-1]) { + out = append(out, popop()) + } + xp := strconv.Itoa(p.labelCount[p.scope]) + p.labelCount[p.scope]++ + out = append(out, scanner.Token{Tok: lang.JumpSetFalse, Str: p.scope + "x" + xp}) + t.Str = p.scope + "x" + xp + ops = append(ops, t) + + case lang.Lor: + for len(ops) > 0 && p.precedence(t) < p.precedence(ops[len(ops)-1]) { + out = append(out, popop()) + } + xp := strconv.Itoa(p.labelCount[p.scope]) + p.labelCount[p.scope]++ + out = append(out, scanner.Token{Tok: lang.JumpSetTrue, Str: p.scope + "x" + xp}) + t.Str = p.scope + "x" + xp + ops = append(ops, t) + + case lang.Ident: + if i < lin-1 && in[i].Tok == lang.Colon { + continue + } + s, sc, ok := p.Symbols.Get(t.Str, p.scope) + if ok && sc != "" { + t.Str = sc + "/" + t.Str + } + if s != nil && s.Kind == symbol.Type { + typeStr = s.Type.String() + } + out = append(out, t) + if i+1 < len(in) && in[i+1].Tok == lang.Define { + // Ident is to be assigned next. Define it as a var. + p.Symbols.Add(symbol.UnsetAddr, t.Str, vm.Value{}, symbol.Var, nil, false) } case lang.ParenBlock: - // If the previous token is an arithmetic, logic or assign operator then - // this parenthesis block is an enclosed expr, otherwise a call expr. + toks, err := p.parseExprStr(t.Block(), typeStr) + if err != nil { + return out, err + } if i == 0 || in[i-1].Tok.IsOperator() { - out = append(out, t) - vl++ - break + out = append(out, toks...) + } else { + prec := p.precedence(scanner.Token{Tok: lang.Call}) + for len(ops) > 0 && prec < p.precedence(ops[len(ops)-1]) { + out = append(out, popop()) + } + // func call: ensure that the func token in on the top of the stack, after args. + ops = append(ops, scanner.Token{Tok: lang.Call, Pos: t.Pos, Beg: p.numItems(t.Block(), lang.Comma)}) + out = append(out, toks...) } - // The call expression can be a function call, a conversion, - // a type assersion (including for type switch) - // func call: push args and func address then call - out = append(out, t) - vl++ - ops = append(ops, scanner.Token{Tok: lang.Call, Pos: t.Pos, Beg: p.numItems(t.Block(), lang.Comma)}) case lang.BraceBlock: - // the block can be a func body or a composite type content. - // In both cases it is preceded by a type definition. - // We must determine the starting token of type def, - // parse the type def, and substitute the type def by a single ident. - // TODO: handle implicit type in composite expression. - ti := p.typeStartIndex(in[:len(in)-1]) - if ti == -1 { - return out, ErrInvalidType - } - typ, err := p.parseTypeExpr(in[ti : len(in)-1]) + toks, err := p.parseExprStr(t.Block(), typeStr) + out = append(out, toks...) if err != nil { - return out, ErrInvalidType + return out, err } - p.Symbols.Add(symbol.UnsetAddr, typ.String(), vm.NewValue(typ), symbol.Type, typ, p.funcScope != "") - out = append(out, t, scanner.Token{Tok: lang.Ident, Pos: t.Pos, Str: typ.String()}) - i = ti - vl += 2 - ops = append(ops, scanner.Token{Tok: lang.Composite, Pos: t.Pos}) + ops = append(ops, scanner.Token{Tok: lang.Composite, Pos: t.Pos, Str: typeStr}) case lang.BracketBlock: - out = append(out, t) - vl++ + toks, err := p.parseExprStr(t.Block(), typeStr) + if err != nil { + return out, err + } + out = append(out, toks...) ops = append(ops, scanner.Token{Tok: lang.Index, Pos: t.Pos}) case lang.Comment: - return out, nil - - default: - return nil, fmt.Errorf("invalid expression: %v: %q", t.Tok, t.Str) - } - - if len(selectors) > 0 { - out = append(out, selectors...) - selectors = nil - } + // return out, nil - if lops, lout := len(ops), len(out); lops > 0 && vl > lops { - op := ops[lops-1] - ops = ops[:lops-1] - // Reorder tokens according to operator precedence rules. - if p.precedence(out[lout-2]) > p.precedence(op) { - op, out[lout-1], out[lout-2] = out[lout-2], op, out[lout-1] - if p.precedence(out[lout-3]) > p.precedence(out[lout-1]) { - out[lout-1], out[lout-2], out[lout-3] = out[lout-3], out[lout-1], out[lout-2] - } - } - out = append(out, op) - vl-- - } - } - out = append(out, ops...) - - log.Println("parseExpr out:", out, "vl:", vl, "ops:", ops) - // A logical operator (&&, ||) involves additional control flow operations. - if out, err = p.parseLogical(out); err != nil { - return out, err - } - - if l := len(out) - 1; l >= 0 && (out[l].Tok == lang.Define || out[l].Tok == lang.Assign) { - // Handle the assignment of a logical expression. - s1 := p.subExprLen(out[:l]) - head, err := p.parseLogical(out[:l-s1]) - if err != nil { - return out, err - } - out = append(head, out[l-s1:]...) - } - - // The tokens are now properly ordered, process nested blocks. - for i := len(out) - 1; i >= 0; i-- { - t := out[i] - var toks Tokens - switch t.Tok { - case lang.ParenBlock, lang.BracketBlock, lang.BraceBlock: - if toks, err = p.parseExprStr(t.Block()); err != nil { + case lang.Struct: + typ, err := p.parseTypeExpr(in[i : i+2]) + if err != nil { return out, err } + typeStr = typ.String() + p.Symbols.Add(symbol.UnsetAddr, typ.String(), vm.NewValue(typ), symbol.Type, typ, p.funcScope != "") + out = append(out, scanner.Token{Tok: lang.Ident, Pos: t.Pos, Str: typeStr}) + log.Println("### typ:", typ) + i++ + default: - continue + log.Println("unxexpected token:", t) } - - // replace block token by its parsed result. - log.Println("toks:", toks) - out2 := append(Tokens{}, out[:i]...) - out2 = append(out2, toks...) - out = append(out2, out[i+1:]...) } - + for len(ops) > 0 { + out = append(out, popop()) + } log.Println("Final out:", out) return out, err } -func (p *Parser) parseExprStr(s string) (tokens Tokens, err error) { +func (p *Parser) parseExprStr(s, typ string) (tokens Tokens, err error) { if tokens, err = p.Scan(s, false); err != nil { return tokens, err } var result Tokens for _, sub := range tokens.Split(lang.Comma) { - toks, err := p.parseExpr(sub) + toks, err := p.parseExpr(sub, typ) if err != nil { return result, err } @@ -207,66 +179,3 @@ func (p *Parser) parseExprStr(s string) (tokens Tokens, err error) { return result, err } - -// parseLogical handles logical expressions with control flow (&& and ||) by -// ensuring the left hand side is evaluated unconditionally first, then the -// right hand side can be skipped or not by inserting a conditional jump and label. -// If the last token is not a logical operator then the function is idempotent. -func (p *Parser) parseLogical(in Tokens) (out Tokens, err error) { - l := len(in) - 1 - if l < 0 || !in[l].Tok.IsLogicalOp() { - return in, nil - } - - xp := strconv.Itoa(p.labelCount[p.scope]) - p.labelCount[p.scope]++ - rhsIndex := p.subExprLen(in[:l]) - - lhs, err := p.parseLogical(in[l-rhsIndex : l]) - if err != nil { - return out, err - } - - rhs, err := p.parseLogical(in[:l-rhsIndex]) - if err != nil { - return out, err - } - out = append(out, lhs...) - - if in[l].Tok == lang.Lor { - out = append(out, scanner.Token{Tok: lang.JumpSetTrue, Str: p.scope + "x" + xp}) - } else { - out = append(out, scanner.Token{Tok: lang.JumpSetFalse, Str: p.scope + "x" + xp}) - } - - out = append(out, rhs...) - out = append(out, scanner.Token{Tok: lang.Label, Str: p.scope + "x" + xp}) - return out, err -} - -// subExprLen returns the length of the first complete sub-expression starting from the input end. -func (p *Parser) subExprLen(in Tokens) int { - l := len(in) - 1 - last := in[l] - - switch last.Tok { - case lang.Int, lang.Float, lang.String, lang.Char, lang.Ident, lang.ParenBlock, lang.BracketBlock: - return 1 - - case lang.Call: - s1 := p.subExprLen(in[:l]) - return 1 + s1 + p.subExprLen(in[:l-s1]) - // TODO: add selector and index operators when ready - } - - if last.Tok.IsBinaryOp() { - s1 := p.subExprLen(in[:l]) - return 1 + s1 + p.subExprLen(in[:l-s1]) - } - - if last.Tok.IsUnaryOp() { - return 1 + p.subExprLen(in[:l]) - } - - return 0 // should not occur. TODO: diplay some error here. -} diff --git a/parser/parse.go b/parser/parse.go index e7d5399..60311dc 100644 --- a/parser/parse.go +++ b/parser/parse.go @@ -143,7 +143,7 @@ func (p *Parser) parseStmt(in Tokens) (out Tokens, err error) { } fallthrough default: - return p.parseExpr(in) + return p.parseExpr(in, "") } } @@ -220,7 +220,7 @@ func (p *Parser) parseFor(in Tokens) (out Tokens, err error) { } out = append(out, scanner.Token{Tok: lang.Label, Str: p.scope + "b"}) if len(cond) > 0 { - if cond, err = p.parseExpr(cond); err != nil { + if cond, err = p.parseExpr(cond, ""); err != nil { return nil, err } out = append(out, cond...) @@ -353,7 +353,7 @@ func (p *Parser) parseIf(in Tokens) (out Tokens, err error) { } pre = append(pre, init...) } - if cond, err = p.parseExpr(cond); err != nil { + if cond, err = p.parseExpr(cond, ""); err != nil { return nil, err } pre = append(pre, cond...) @@ -393,7 +393,7 @@ func (p *Parser) parseSwitch(in Tokens) (out Tokens, err error) { } condSwitch := false if len(cond) > 0 { - if cond, err = p.parseExpr(cond); err != nil { + if cond, err = p.parseExpr(cond, ""); err != nil { return nil, err } out = append(out, cond...) @@ -436,7 +436,7 @@ func (p *Parser) parseCaseClause(in Tokens, index, maximum int, condSwitch bool) } lcond := conds.Split(lang.Comma) for i, cond := range lcond { - if cond, err = p.parseExpr(cond); err != nil { + if cond, err = p.parseExpr(cond, ""); err != nil { return out, err } txt := fmt.Sprintf("%sc%d.%d", p.scope, index, i) @@ -472,7 +472,7 @@ func (p *Parser) parseLabel(in Tokens) (out Tokens, err error) { func (p *Parser) parseReturn(in Tokens) (out Tokens, err error) { if l := len(in); l > 1 { - if out, err = p.parseExpr(in[1:]); err != nil { + if out, err = p.parseExpr(in[1:], ""); err != nil { return out, err } } else if l == 0 { @@ -519,5 +519,5 @@ func (p *Parser) popScope() { } func (p *Parser) precedence(t scanner.Token) int { - return p.TokenProps[t.Str].Precedence + return p.TokenProps[t.Tok].Precedence } diff --git a/parser/type.go b/parser/type.go index 12bc06b..70ed4d1 100644 --- a/parser/type.go +++ b/parser/type.go @@ -197,17 +197,3 @@ func (p *Parser) hasFirstParam(in Tokens) bool { s, _, ok := p.Symbols.Get(in[0].Str, p.scope) return !ok || s.Kind != symbol.Type } - -// typeStartIndex returns the index of the start of type expression in tokens, or -1. -func (p *Parser) typeStartIndex(in Tokens) int { - index := len(in) - 1 - for i := index; i >= 0; i-- { - switch in[i].Tok { - case lang.Ident, lang.Struct, lang.Map, lang.Func, lang.Interface, lang.Mul, lang.BraceBlock, lang.BracketBlock, lang.ParenBlock: - index = i - default: - return index - } - } - return -1 -} |
