From 6e2349e875e77d8af8b7d6f00f718db6813b40c1 Mon Sep 17 00:00:00 2001 From: Marc Vertes Date: Fri, 3 Nov 2023 19:05:14 +0100 Subject: feat: add support for control flow operators in expressions Logical operators `&&` (and), `||` (or) are now parsed in expressions. The control flow tokens (labels, conditional jumps) are added accordingly. --- parser/compiler.go | 36 ++++++++++- parser/expr.go | 187 +++++++++++++++++++++++++++++++++++++++++++++++++++++ parser/parse.go | 110 ------------------------------- parser/symbol.go | 21 +++--- 4 files changed, 234 insertions(+), 120 deletions(-) create mode 100644 parser/expr.go (limited to 'parser') diff --git a/parser/compiler.go b/parser/compiler.go index 4791069..6bf9312 100644 --- a/parser/compiler.go +++ b/parser/compiler.go @@ -124,6 +124,11 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { if s.local { c.Emit(int64(t.Pos), vm.Fdup, int64(s.index)) } else { + if s.index < 0 { + // This global symbol is defined but not yet used. Add it to data. + s.index = len(c.Data) + c.Data = append(c.Data, s.value) + } c.Emit(int64(t.Pos), vm.Dup, int64(s.index)) } @@ -155,6 +160,30 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { } c.Emit(int64(t.Pos), vm.JumpFalse, int64(i)) + case lang.JumpSetFalse: + label := t.Str[13:] + i := 0 + if s, ok := c.symbols[label]; !ok { + // t.Beg contains the position in code which needs to be fixed. + t.Beg = len(c.Code) + fixList = append(fixList, t) + } else { + i = s.value.(int) - len(c.Code) + } + c.Emit(int64(t.Pos), vm.JumpSetFalse, int64(i)) + + case lang.JumpSetTrue: + label := t.Str[12:] + i := 0 + if s, ok := c.symbols[label]; !ok { + // t.Beg contains the position in code which needs to be fixed. + t.Beg = len(c.Code) + fixList = append(fixList, t) + } else { + i = s.value.(int) - len(c.Code) + } + c.Emit(int64(t.Pos), vm.JumpSetTrue, int64(i)) + case lang.Goto: label := t.Str[5:] i := 0 @@ -177,11 +206,16 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { // Finally we fix unresolved labels for jump destinations. for _, t := range fixList { var label string + // TODO: this could be simplified. switch t.Id { case lang.Goto: label = t.Str[5:] case lang.JumpFalse: label = t.Str[10:] + case lang.JumpSetFalse: + label = t.Str[13:] + case lang.JumpSetTrue: + label = t.Str[12:] } s, ok := c.symbols[label] if !ok { @@ -213,7 +247,7 @@ func (c *Compiler) PrintCode() { } extra := "" switch l[1] { - case vm.Jump, vm.JumpFalse, vm.JumpTrue, vm.Calli: + case vm.Jump, vm.JumpFalse, vm.JumpTrue, vm.JumpSetFalse, vm.JumpSetTrue, vm.Calli: if d, ok := labels[i+(int)(l[2])]; ok { extra = "// " + d } diff --git a/parser/expr.go b/parser/expr.go new file mode 100644 index 0000000..18c97a5 --- /dev/null +++ b/parser/expr.go @@ -0,0 +1,187 @@ +package parser + +import ( + "log" + "strconv" + + "github.com/gnolang/parscan/lang" + "github.com/gnolang/parscan/scanner" +) + +func (p *Parser) ParseExpr(in Tokens) (out Tokens, err error) { + log.Println("ParseExpr in:", in) + var ops Tokens + var vl int + // + // Process tokens from last to first, the goal is to reorder the tokens in + // a stack machine processing order, so it can be directly interpreted. + // + for i := len(in) - 1; i >= 0; i-- { + t := in[i] + // temporary assumptions: binary operators, returning 1 value + switch t.Id { + case lang.Ident: + // resolve symbol if not a selector rhs. + // TODO: test for selector expr. + _, sc, ok := p.getSym(t.Str, p.scope) + if ok { + if sc != "" { + t.Str = sc + "/" + t.Str + } + } + out = append(out, t) + vl++ + case lang.Int, lang.String: + out = append(out, t) + vl++ + case lang.Define, lang.Add, lang.Sub, lang.Assign, lang.Equal, lang.Greater, lang.Less, lang.Mul, lang.Land, lang.Lor: + if vl < 2 { + ops = append(ops, t) + break + } + case lang.ParenBlock: + // If the previous token is an arithmetic, logic or assign operator then + // this parenthesis block is an enclosed expr, otherwise a call expr. + if i == 0 || in[i-1].Id.IsOperator() { + out = append(out, t) + vl++ + break + } + // The call expression can be a function call, a conversion, + // a type assersion (including for type switch) + // func call: push args and func address then call + out = append(out, t) + vl++ + if t2 := in[i-1]; t2.Id == lang.Ident { + if s, sc, ok := p.getSym(t2.Str, p.scope); ok { + log.Println("callExpr:", t2.Str, p.scope, s, ok, sc) + if s.kind == symValue { + // Store the number of input parameters in the token Beg field. + ops = append(ops, scanner.Token{Str: "callX", Id: lang.CallX, Pos: t.Pos, Beg: p.numItems(t.Block(), lang.Comma)}) + break + } + } + } + ops = append(ops, scanner.Token{Str: "call", Id: lang.Call, Pos: t.Pos}) + } + if lops, lout := len(ops), len(out); lops > 0 && vl > lops { + op := ops[lops-1] + ops = ops[:lops-1] + // Reorder tokens according to operator precedence rules. + if p.precedence(out[lout-2]) > p.precedence(op) { + op, out[lout-1], out[lout-2] = out[lout-2], op, out[lout-1] + if p.precedence(out[lout-3]) > p.precedence(out[lout-1]) { + out[lout-1], out[lout-2], out[lout-3] = out[lout-3], out[lout-1], out[lout-2] + } + } + out = append(out, op) + vl-- + } + } + out = append(out, ops...) + + log.Println("ParseExpr out:", out, "vl:", vl, "ops:", ops) + // A logical operator (&&, ||) involves additional control flow operations. + if out, err = p.ParseLogical(out); err != nil { + return out, err + } + if l := len(out) - 1; out[l].Id == lang.Define || out[l].Id == lang.Assign { + // Handle the assignment of a logical expression. + s1 := p.subExprLen(out[:l]) + head, err := p.ParseLogical(out[:l-s1]) + if err != nil { + return out, err + } + out = append(head, out[l-s1:]...) + } + + // The tokens are now properly ordered, process nested blocks. + for i := len(out) - 1; i >= 0; i-- { + t := out[i] + var toks Tokens + switch t.Id { + case lang.ParenBlock, lang.BracketBlock: + if toks, err = p.ParseExprStr(t.Block()); err != nil { + return out, err + } + default: + continue + } + + // replace block token by its parsed result. + log.Println("toks:", toks) + out2 := append(Tokens{}, out[:i]...) + out2 = append(out2, toks...) + out = append(out2, out[i+1:]...) + } + log.Println("Final out:", out) + return out, err +} + +func (p *Parser) ParseExprStr(s string) (tokens Tokens, err error) { + if tokens, err = p.Scan(s, false); err != nil { + return + } + var result Tokens + for _, sub := range tokens.Split(lang.Comma) { + toks, err := p.ParseExpr(sub) + if err != nil { + return result, err + } + result = append(toks, result...) + } + return result, err +} + +// ParseLogical handles logical expressions with control flow (&& and ||) by +// ensuring the left hand side is evaluated unconditionally first, then the +// right hand side can be skipped or not by inserting a conditional jump and label. +// If the last token is not a logical operator then the function is idempotent. +func (p *Parser) ParseLogical(in Tokens) (out Tokens, err error) { + l := len(in) - 1 + if !in[l].Id.IsLogicalOp() { + return in, nil + } + xp := strconv.Itoa(p.labelCount[p.scope]) + p.labelCount[p.scope]++ + rhsIndex := p.subExprLen(in[:l]) + lhs, err := p.ParseLogical(in[l-rhsIndex : l]) + if err != nil { + return out, err + } + rhs, err := p.ParseLogical(in[:l-rhsIndex]) + if err != nil { + return out, err + } + out = append(out, lhs...) + if in[l].Id == lang.Lor { + out = append(out, scanner.Token{Id: lang.JumpSetTrue, Str: "JumpSetTrue " + p.scope + "x" + xp}) + } else { + out = append(out, scanner.Token{Id: lang.JumpSetFalse, Str: "JumpSetFalse " + p.scope + "x" + xp}) + } + out = append(out, rhs...) + out = append(out, scanner.Token{Id: lang.Label, Str: p.scope + "x" + xp}) + return out, err +} + +// subExprLen returns the length of the first complete sub-expression starting from the input end. +func (p *Parser) subExprLen(in Tokens) int { + l := len(in) - 1 + last := in[l] + switch last.Id { + case lang.Int, lang.Float, lang.String, lang.Char, lang.Ident, lang.ParenBlock, lang.BracketBlock: + return 1 + case lang.Call: + s1 := p.subExprLen(in[:l]) + return 1 + s1 + p.subExprLen(in[:l-s1]) + // TODO: add selector and index operators when ready + } + if last.Id.IsBinaryOp() { + s1 := p.subExprLen(in[:l]) + return 1 + s1 + p.subExprLen(in[:l-s1]) + } + if last.Id.IsUnaryOp() { + return 1 + p.subExprLen(in[:l]) + } + return 0 // should not occur. TODO: diplay some error here. +} diff --git a/parser/parse.go b/parser/parse.go index 757468c..fb8ae2c 100644 --- a/parser/parse.go +++ b/parser/parse.go @@ -441,116 +441,6 @@ func (p *Parser) ParseReturn(in Tokens) (out Tokens, err error) { return out, err } -func (p *Parser) ParseExpr(in Tokens) (out Tokens, err error) { - log.Println("ParseExpr in:", in) - var ops Tokens - var vl int - // - // Process tokens from last to first, the goal is to reorder the tokens in - // a stack machine processing order, so it can be directly interpreted. - // - for i := len(in) - 1; i >= 0; i-- { - t := in[i] - // temporary assumptions: binary operators, returning 1 value - switch t.Id { - case lang.Ident: - // resolve symbol if not a selector rhs. - // TODO: test for selector expr. - _, sc, ok := p.getSym(t.Str, p.scope) - if ok && sc != "" { - t.Str = sc + "/" + t.Str - } - out = append(out, t) - vl++ - case lang.Int, lang.String: - out = append(out, t) - vl++ - case lang.Define, lang.Add, lang.Sub, lang.Assign, lang.Equal, lang.Greater, lang.Less, lang.Mul: - // TODO: handle operator precedence to swap operators / operands if necessary - if vl < 2 { - ops = append(ops, t) - break - } - case lang.ParenBlock: - // If the previous token is an arithmetic, logic or assign operator then - // this parenthesis block is an enclosed expr, otherwise a call expr. - if i == 0 || in[i-1].Id.IsOperator() { - out = append(out, t) - vl++ - break - } - // The call expression can be a function call, a conversion, - // a type assersion (including for type switch) - // func call: push args and func address then call - out = append(out, t) - vl++ - if t2 := in[i-1]; t2.Id == lang.Ident { - if s, sc, ok := p.getSym(t2.Str, p.scope); ok { - log.Println("callExpr:", t2.Str, p.scope, s, ok, sc) - if s.kind == symValue { - // Store the number of input parameters in the token Beg field. - ops = append(ops, scanner.Token{Str: "callX", Id: lang.CallX, Pos: t.Pos, Beg: p.numItems(t.Block(), lang.Comma)}) - break - } - } - } - ops = append(ops, scanner.Token{Str: "call", Id: lang.Call, Pos: t.Pos}) - } - if lops, lout := len(ops), len(out); lops > 0 && vl > lops { - op := ops[lops-1] - ops = ops[:lops-1] - // Reorder tokens according to operator precedence rules. - if p.precedence(out[lout-2]) > p.precedence(op) { - op, out[lout-1], out[lout-2] = out[lout-2], op, out[lout-1] - if p.precedence(out[lout-3]) > p.precedence(out[lout-1]) { - out[lout-1], out[lout-2], out[lout-3] = out[lout-3], out[lout-1], out[lout-2] - } - } - out = append(out, op) - vl-- - } - } - out = append(out, ops...) // TODO: verify that ops should be added in this order. - - log.Println("ParseExpr out:", out, "vl:", vl, "ops:", ops) - // The tokens are now properly ordered, process nested blocks. - for i := len(out) - 1; i >= 0; i-- { - t := out[i] - var toks Tokens - switch t.Id { - case lang.ParenBlock, lang.BracketBlock: - if toks, err = p.ParseExprStr(t.Block()); err != nil { - return out, err - } - default: - continue - } - - // replace block token by its parsed result. - log.Println("toks:", toks) - out2 := append(Tokens{}, out[:i]...) - out2 = append(out2, toks...) - out = append(out2, out[i+1:]...) - } - log.Println("Final out:", out) - return out, err -} - -func (p *Parser) ParseExprStr(s string) (tokens Tokens, err error) { - if tokens, err = p.Scan(s, false); err != nil { - return - } - var result Tokens - for _, sub := range tokens.Split(lang.Comma) { - toks, err := p.ParseExpr(sub) - if err != nil { - return result, err - } - result = append(toks, result...) - } - return result, err -} - func (p *Parser) numItems(s string, sep lang.TokenId) int { tokens, err := p.Scan(s, false) if err != nil { diff --git a/parser/symbol.go b/parser/symbol.go index f83ec4e..d32bb59 100644 --- a/parser/symbol.go +++ b/parser/symbol.go @@ -1,6 +1,7 @@ package parser import ( + "fmt" "reflect" "strings" ) @@ -51,15 +52,17 @@ func (p *Parser) getSym(name, scope string) (sym *symbol, sc string, ok bool) { func initUniverse() map[string]*symbol { return map[string]*symbol{ - "any": {kind: symType, Type: reflect.TypeOf((*any)(nil)).Elem()}, - "bool": {kind: symType, Type: reflect.TypeOf((*bool)(nil)).Elem()}, - "error": {kind: symType, Type: reflect.TypeOf((*error)(nil)).Elem()}, - "int": {kind: symType, Type: reflect.TypeOf((*int)(nil)).Elem()}, - "string": {kind: symType, Type: reflect.TypeOf((*string)(nil)).Elem()}, + "any": {kind: symType, index: -1, Type: reflect.TypeOf((*any)(nil)).Elem()}, + "bool": {kind: symType, index: -1, Type: reflect.TypeOf((*bool)(nil)).Elem()}, + "error": {kind: symType, index: -1, Type: reflect.TypeOf((*error)(nil)).Elem()}, + "int": {kind: symType, index: -1, Type: reflect.TypeOf((*int)(nil)).Elem()}, + "string": {kind: symType, index: -1, Type: reflect.TypeOf((*string)(nil)).Elem()}, - "nil": {}, - "iota": {value: 0}, - "true": {value: true}, - "false": {value: false}, + "nil": {index: -1}, + "iota": {index: -1, value: 0}, + "true": {index: -1, value: true, Type: reflect.TypeOf(true)}, + "false": {index: -1, value: false, Type: reflect.TypeOf(false)}, + + "println": {index: -1, value: func(v ...any) { fmt.Println(v...) }}, } } -- cgit v1.2.3