diff options
| author | Marc Vertes <mvertes@free.fr> | 2023-11-03 19:05:14 +0100 |
|---|---|---|
| committer | Marc Vertes <mvertes@free.fr> | 2023-11-03 19:05:14 +0100 |
| commit | 6e2349e875e77d8af8b7d6f00f718db6813b40c1 (patch) | |
| tree | 74b476bc508264378fcb11bc7849658d76955f68 | |
| parent | 20d331813cf05f42961f0f6df531c2880f753a07 (diff) | |
feat: add support for control flow operators in expressions
Logical operators `&&` (and), `||` (or) are now parsed in expressions.
The control flow tokens (labels, conditional jumps) are added
accordingly.
| -rw-r--r-- | lang/token.go | 63 | ||||
| -rw-r--r-- | main.go | 1 | ||||
| -rw-r--r-- | parser/compiler.go | 36 | ||||
| -rw-r--r-- | parser/expr.go | 187 | ||||
| -rw-r--r-- | parser/parse.go | 110 | ||||
| -rw-r--r-- | parser/symbol.go | 21 | ||||
| -rw-r--r-- | vm/vm.go | 114 |
7 files changed, 338 insertions, 194 deletions
diff --git a/lang/token.go b/lang/token.go index 995d621..880f474 100644 --- a/lang/token.go +++ b/lang/token.go @@ -13,6 +13,7 @@ const ( String // Binary operators (except indicated) + // Arithmetic and bitwise binary operators Add // + Sub // - Mul // * @@ -24,7 +25,21 @@ const ( Shl // << Shr // >> AndNot // &^ + Period // . + // Binary operators returning a boolean + Equal // == + Greater // > + GreaterEqual // >= + Land // && + Less // < + LessEqual // <= + Lor // || + NotEqual // != + + // Assigment operators (arithmetic and bitwise) + Define // := + Assign // = AddAssign // += SubAssign // -= MulAssign // *= @@ -36,31 +51,21 @@ const ( ShlAssign // <<= ShrAssign // >>= AndNotAssign // &^= - - Land // && - Lor // || - Arrow // unary -> Inc // ++ Dec // -- - Equal // == - Less // < - Greater // > - Assign // = - Not // unary ! - Plus // unary + - Minus // unary - - Address // unary & - Deref // unary * - BitComp // unary ^ - NotEqual // != - LessEqual // <= - GreaterEqual // >= - Define // := - Ellipsis // unary ... - Period // . - Tilde // unary ~ - // Separators + // Unary operations + Plus // unary + + Minus // unary - + Address // unary & + Deref // unary * + BitComp // unary ^ + Arrow // unary -> + Ellipsis // unary ... + Not // unary ! + Tilde // unary ~ + + // Separators (punctuation) Comma // , Semicolon // ; Colon // : @@ -97,13 +102,19 @@ const ( Type Var - // Internal tokens (no corresponding keyword) + // Internal virtual machine tokens (no corresponding keyword) Call CallX Label JumpFalse + JumpSetFalse + JumpSetTrue ) -func (t TokenId) IsKeyword() bool { return t >= Break && t <= Var } -func (t TokenId) IsOperator() bool { return t >= Add && t <= Tilde } -func (t TokenId) IsBlock() bool { return t >= ParenBlock && t <= BraceBlock } +func (t TokenId) IsKeyword() bool { return t >= Break && t <= Var } +func (t TokenId) IsOperator() bool { return t >= Add && t <= Tilde } +func (t TokenId) IsBlock() bool { return t >= ParenBlock && t <= BraceBlock } +func (t TokenId) IsBoolOp() bool { return t >= Equal && t <= NotEqual || t == Not } +func (t TokenId) IsBinaryOp() bool { return t >= Add && t <= NotEqual } +func (t TokenId) IsUnaryOp() bool { return t >= Plus && t <= Tilde } +func (t TokenId) IsLogicalOp() bool { return t == Land || t == Lor } @@ -72,7 +72,6 @@ func run(arg []string) (err error) { args := rflag.Args() interp := parser.NewInterpreter(scanner.NewScanner(golang.GoSpec)) - interp.AddSym("println", fmt.Println) in := os.Stdin if len(args) > 0 { diff --git a/parser/compiler.go b/parser/compiler.go index 4791069..6bf9312 100644 --- a/parser/compiler.go +++ b/parser/compiler.go @@ -124,6 +124,11 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { if s.local { c.Emit(int64(t.Pos), vm.Fdup, int64(s.index)) } else { + if s.index < 0 { + // This global symbol is defined but not yet used. Add it to data. + s.index = len(c.Data) + c.Data = append(c.Data, s.value) + } c.Emit(int64(t.Pos), vm.Dup, int64(s.index)) } @@ -155,6 +160,30 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { } c.Emit(int64(t.Pos), vm.JumpFalse, int64(i)) + case lang.JumpSetFalse: + label := t.Str[13:] + i := 0 + if s, ok := c.symbols[label]; !ok { + // t.Beg contains the position in code which needs to be fixed. + t.Beg = len(c.Code) + fixList = append(fixList, t) + } else { + i = s.value.(int) - len(c.Code) + } + c.Emit(int64(t.Pos), vm.JumpSetFalse, int64(i)) + + case lang.JumpSetTrue: + label := t.Str[12:] + i := 0 + if s, ok := c.symbols[label]; !ok { + // t.Beg contains the position in code which needs to be fixed. + t.Beg = len(c.Code) + fixList = append(fixList, t) + } else { + i = s.value.(int) - len(c.Code) + } + c.Emit(int64(t.Pos), vm.JumpSetTrue, int64(i)) + case lang.Goto: label := t.Str[5:] i := 0 @@ -177,11 +206,16 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { // Finally we fix unresolved labels for jump destinations. for _, t := range fixList { var label string + // TODO: this could be simplified. switch t.Id { case lang.Goto: label = t.Str[5:] case lang.JumpFalse: label = t.Str[10:] + case lang.JumpSetFalse: + label = t.Str[13:] + case lang.JumpSetTrue: + label = t.Str[12:] } s, ok := c.symbols[label] if !ok { @@ -213,7 +247,7 @@ func (c *Compiler) PrintCode() { } extra := "" switch l[1] { - case vm.Jump, vm.JumpFalse, vm.JumpTrue, vm.Calli: + case vm.Jump, vm.JumpFalse, vm.JumpTrue, vm.JumpSetFalse, vm.JumpSetTrue, vm.Calli: if d, ok := labels[i+(int)(l[2])]; ok { extra = "// " + d } diff --git a/parser/expr.go b/parser/expr.go new file mode 100644 index 0000000..18c97a5 --- /dev/null +++ b/parser/expr.go @@ -0,0 +1,187 @@ +package parser + +import ( + "log" + "strconv" + + "github.com/gnolang/parscan/lang" + "github.com/gnolang/parscan/scanner" +) + +func (p *Parser) ParseExpr(in Tokens) (out Tokens, err error) { + log.Println("ParseExpr in:", in) + var ops Tokens + var vl int + // + // Process tokens from last to first, the goal is to reorder the tokens in + // a stack machine processing order, so it can be directly interpreted. + // + for i := len(in) - 1; i >= 0; i-- { + t := in[i] + // temporary assumptions: binary operators, returning 1 value + switch t.Id { + case lang.Ident: + // resolve symbol if not a selector rhs. + // TODO: test for selector expr. + _, sc, ok := p.getSym(t.Str, p.scope) + if ok { + if sc != "" { + t.Str = sc + "/" + t.Str + } + } + out = append(out, t) + vl++ + case lang.Int, lang.String: + out = append(out, t) + vl++ + case lang.Define, lang.Add, lang.Sub, lang.Assign, lang.Equal, lang.Greater, lang.Less, lang.Mul, lang.Land, lang.Lor: + if vl < 2 { + ops = append(ops, t) + break + } + case lang.ParenBlock: + // If the previous token is an arithmetic, logic or assign operator then + // this parenthesis block is an enclosed expr, otherwise a call expr. + if i == 0 || in[i-1].Id.IsOperator() { + out = append(out, t) + vl++ + break + } + // The call expression can be a function call, a conversion, + // a type assersion (including for type switch) + // func call: push args and func address then call + out = append(out, t) + vl++ + if t2 := in[i-1]; t2.Id == lang.Ident { + if s, sc, ok := p.getSym(t2.Str, p.scope); ok { + log.Println("callExpr:", t2.Str, p.scope, s, ok, sc) + if s.kind == symValue { + // Store the number of input parameters in the token Beg field. + ops = append(ops, scanner.Token{Str: "callX", Id: lang.CallX, Pos: t.Pos, Beg: p.numItems(t.Block(), lang.Comma)}) + break + } + } + } + ops = append(ops, scanner.Token{Str: "call", Id: lang.Call, Pos: t.Pos}) + } + if lops, lout := len(ops), len(out); lops > 0 && vl > lops { + op := ops[lops-1] + ops = ops[:lops-1] + // Reorder tokens according to operator precedence rules. + if p.precedence(out[lout-2]) > p.precedence(op) { + op, out[lout-1], out[lout-2] = out[lout-2], op, out[lout-1] + if p.precedence(out[lout-3]) > p.precedence(out[lout-1]) { + out[lout-1], out[lout-2], out[lout-3] = out[lout-3], out[lout-1], out[lout-2] + } + } + out = append(out, op) + vl-- + } + } + out = append(out, ops...) + + log.Println("ParseExpr out:", out, "vl:", vl, "ops:", ops) + // A logical operator (&&, ||) involves additional control flow operations. + if out, err = p.ParseLogical(out); err != nil { + return out, err + } + if l := len(out) - 1; out[l].Id == lang.Define || out[l].Id == lang.Assign { + // Handle the assignment of a logical expression. + s1 := p.subExprLen(out[:l]) + head, err := p.ParseLogical(out[:l-s1]) + if err != nil { + return out, err + } + out = append(head, out[l-s1:]...) + } + + // The tokens are now properly ordered, process nested blocks. + for i := len(out) - 1; i >= 0; i-- { + t := out[i] + var toks Tokens + switch t.Id { + case lang.ParenBlock, lang.BracketBlock: + if toks, err = p.ParseExprStr(t.Block()); err != nil { + return out, err + } + default: + continue + } + + // replace block token by its parsed result. + log.Println("toks:", toks) + out2 := append(Tokens{}, out[:i]...) + out2 = append(out2, toks...) + out = append(out2, out[i+1:]...) + } + log.Println("Final out:", out) + return out, err +} + +func (p *Parser) ParseExprStr(s string) (tokens Tokens, err error) { + if tokens, err = p.Scan(s, false); err != nil { + return + } + var result Tokens + for _, sub := range tokens.Split(lang.Comma) { + toks, err := p.ParseExpr(sub) + if err != nil { + return result, err + } + result = append(toks, result...) + } + return result, err +} + +// ParseLogical handles logical expressions with control flow (&& and ||) by +// ensuring the left hand side is evaluated unconditionally first, then the +// right hand side can be skipped or not by inserting a conditional jump and label. +// If the last token is not a logical operator then the function is idempotent. +func (p *Parser) ParseLogical(in Tokens) (out Tokens, err error) { + l := len(in) - 1 + if !in[l].Id.IsLogicalOp() { + return in, nil + } + xp := strconv.Itoa(p.labelCount[p.scope]) + p.labelCount[p.scope]++ + rhsIndex := p.subExprLen(in[:l]) + lhs, err := p.ParseLogical(in[l-rhsIndex : l]) + if err != nil { + return out, err + } + rhs, err := p.ParseLogical(in[:l-rhsIndex]) + if err != nil { + return out, err + } + out = append(out, lhs...) + if in[l].Id == lang.Lor { + out = append(out, scanner.Token{Id: lang.JumpSetTrue, Str: "JumpSetTrue " + p.scope + "x" + xp}) + } else { + out = append(out, scanner.Token{Id: lang.JumpSetFalse, Str: "JumpSetFalse " + p.scope + "x" + xp}) + } + out = append(out, rhs...) + out = append(out, scanner.Token{Id: lang.Label, Str: p.scope + "x" + xp}) + return out, err +} + +// subExprLen returns the length of the first complete sub-expression starting from the input end. +func (p *Parser) subExprLen(in Tokens) int { + l := len(in) - 1 + last := in[l] + switch last.Id { + case lang.Int, lang.Float, lang.String, lang.Char, lang.Ident, lang.ParenBlock, lang.BracketBlock: + return 1 + case lang.Call: + s1 := p.subExprLen(in[:l]) + return 1 + s1 + p.subExprLen(in[:l-s1]) + // TODO: add selector and index operators when ready + } + if last.Id.IsBinaryOp() { + s1 := p.subExprLen(in[:l]) + return 1 + s1 + p.subExprLen(in[:l-s1]) + } + if last.Id.IsUnaryOp() { + return 1 + p.subExprLen(in[:l]) + } + return 0 // should not occur. TODO: diplay some error here. +} diff --git a/parser/parse.go b/parser/parse.go index 757468c..fb8ae2c 100644 --- a/parser/parse.go +++ b/parser/parse.go @@ -441,116 +441,6 @@ func (p *Parser) ParseReturn(in Tokens) (out Tokens, err error) { return out, err } -func (p *Parser) ParseExpr(in Tokens) (out Tokens, err error) { - log.Println("ParseExpr in:", in) - var ops Tokens - var vl int - // - // Process tokens from last to first, the goal is to reorder the tokens in - // a stack machine processing order, so it can be directly interpreted. - // - for i := len(in) - 1; i >= 0; i-- { - t := in[i] - // temporary assumptions: binary operators, returning 1 value - switch t.Id { - case lang.Ident: - // resolve symbol if not a selector rhs. - // TODO: test for selector expr. - _, sc, ok := p.getSym(t.Str, p.scope) - if ok && sc != "" { - t.Str = sc + "/" + t.Str - } - out = append(out, t) - vl++ - case lang.Int, lang.String: - out = append(out, t) - vl++ - case lang.Define, lang.Add, lang.Sub, lang.Assign, lang.Equal, lang.Greater, lang.Less, lang.Mul: - // TODO: handle operator precedence to swap operators / operands if necessary - if vl < 2 { - ops = append(ops, t) - break - } - case lang.ParenBlock: - // If the previous token is an arithmetic, logic or assign operator then - // this parenthesis block is an enclosed expr, otherwise a call expr. - if i == 0 || in[i-1].Id.IsOperator() { - out = append(out, t) - vl++ - break - } - // The call expression can be a function call, a conversion, - // a type assersion (including for type switch) - // func call: push args and func address then call - out = append(out, t) - vl++ - if t2 := in[i-1]; t2.Id == lang.Ident { - if s, sc, ok := p.getSym(t2.Str, p.scope); ok { - log.Println("callExpr:", t2.Str, p.scope, s, ok, sc) - if s.kind == symValue { - // Store the number of input parameters in the token Beg field. - ops = append(ops, scanner.Token{Str: "callX", Id: lang.CallX, Pos: t.Pos, Beg: p.numItems(t.Block(), lang.Comma)}) - break - } - } - } - ops = append(ops, scanner.Token{Str: "call", Id: lang.Call, Pos: t.Pos}) - } - if lops, lout := len(ops), len(out); lops > 0 && vl > lops { - op := ops[lops-1] - ops = ops[:lops-1] - // Reorder tokens according to operator precedence rules. - if p.precedence(out[lout-2]) > p.precedence(op) { - op, out[lout-1], out[lout-2] = out[lout-2], op, out[lout-1] - if p.precedence(out[lout-3]) > p.precedence(out[lout-1]) { - out[lout-1], out[lout-2], out[lout-3] = out[lout-3], out[lout-1], out[lout-2] - } - } - out = append(out, op) - vl-- - } - } - out = append(out, ops...) // TODO: verify that ops should be added in this order. - - log.Println("ParseExpr out:", out, "vl:", vl, "ops:", ops) - // The tokens are now properly ordered, process nested blocks. - for i := len(out) - 1; i >= 0; i-- { - t := out[i] - var toks Tokens - switch t.Id { - case lang.ParenBlock, lang.BracketBlock: - if toks, err = p.ParseExprStr(t.Block()); err != nil { - return out, err - } - default: - continue - } - - // replace block token by its parsed result. - log.Println("toks:", toks) - out2 := append(Tokens{}, out[:i]...) - out2 = append(out2, toks...) - out = append(out2, out[i+1:]...) - } - log.Println("Final out:", out) - return out, err -} - -func (p *Parser) ParseExprStr(s string) (tokens Tokens, err error) { - if tokens, err = p.Scan(s, false); err != nil { - return - } - var result Tokens - for _, sub := range tokens.Split(lang.Comma) { - toks, err := p.ParseExpr(sub) - if err != nil { - return result, err - } - result = append(toks, result...) - } - return result, err -} - func (p *Parser) numItems(s string, sep lang.TokenId) int { tokens, err := p.Scan(s, false) if err != nil { diff --git a/parser/symbol.go b/parser/symbol.go index f83ec4e..d32bb59 100644 --- a/parser/symbol.go +++ b/parser/symbol.go @@ -1,6 +1,7 @@ package parser import ( + "fmt" "reflect" "strings" ) @@ -51,15 +52,17 @@ func (p *Parser) getSym(name, scope string) (sym *symbol, sc string, ok bool) { func initUniverse() map[string]*symbol { return map[string]*symbol{ - "any": {kind: symType, Type: reflect.TypeOf((*any)(nil)).Elem()}, - "bool": {kind: symType, Type: reflect.TypeOf((*bool)(nil)).Elem()}, - "error": {kind: symType, Type: reflect.TypeOf((*error)(nil)).Elem()}, - "int": {kind: symType, Type: reflect.TypeOf((*int)(nil)).Elem()}, - "string": {kind: symType, Type: reflect.TypeOf((*string)(nil)).Elem()}, + "any": {kind: symType, index: -1, Type: reflect.TypeOf((*any)(nil)).Elem()}, + "bool": {kind: symType, index: -1, Type: reflect.TypeOf((*bool)(nil)).Elem()}, + "error": {kind: symType, index: -1, Type: reflect.TypeOf((*error)(nil)).Elem()}, + "int": {kind: symType, index: -1, Type: reflect.TypeOf((*int)(nil)).Elem()}, + "string": {kind: symType, index: -1, Type: reflect.TypeOf((*string)(nil)).Elem()}, - "nil": {}, - "iota": {value: 0}, - "true": {value: true}, - "false": {value: false}, + "nil": {index: -1}, + "iota": {index: -1, value: 0}, + "true": {index: -1, value: true, Type: reflect.TypeOf(true)}, + "false": {index: -1, value: false, Type: reflect.TypeOf(false)}, + + "println": {index: -1, value: func(v ...any) { fmt.Println(v...) }}, } } @@ -12,55 +12,59 @@ const debug = true // Byte-code instruction set. const ( // instruction effect on stack: values consumed -- values produced - Nop = iota // -- - Add // n1 n2 -- sum ; sum = n1+n2 - Assign // val -- ; mem[$1] = val - Fassign // val -- ; mem[$1] = val - Call // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...) - Calli // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...) - CallX // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = mem[f](a1, ...) - Dup // addr -- value ; value = mem[addr] - Fdup // addr -- value ; value = mem[addr] - Equal // n1 n2 -- cond ; cond = n1 == n2 - Exit // -- ; - Greater // n1 n2 -- cond; cond = n1 > n2 - Jump // -- ; ip += $1 - JumpTrue // cond -- ; if cond { ip += $1 } - JumpFalse // cond -- ; if cond { ip += $1 } - Lower // n1 n2 -- cond ; cond = n1 < n2 - Loweri // n1 -- cond ; cond = n1 < $1 - Mul // n1 n2 -- prod ; prod = n1*n2 - Pop // v -- - Push // -- v - Return // [r1 .. ri] -- ; exit frame: sp = fp, fp = pop - Sub // n1 n2 -- diff ; diff = n1 - n2 - Subi // n1 -- diff ; diff = n1 - $1 + Nop = iota // -- + Add // n1 n2 -- sum ; sum = n1+n2 + Assign // val -- ; mem[$1] = val + Fassign // val -- ; mem[$1] = val + Call // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...) + Calli // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...) + CallX // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = mem[f](a1, ...) + Dup // addr -- value ; value = mem[addr] + Fdup // addr -- value ; value = mem[addr] + Equal // n1 n2 -- cond ; cond = n1 == n2 + Exit // -- ; + Greater // n1 n2 -- cond; cond = n1 > n2 + Jump // -- ; ip += $1 + JumpTrue // cond -- ; if cond { ip += $1 } + JumpFalse // cond -- ; if cond { ip += $1 } + JumpSetTrue // + JumpSetFalse // + Lower // n1 n2 -- cond ; cond = n1 < n2 + Loweri // n1 -- cond ; cond = n1 < $1 + Mul // n1 n2 -- prod ; prod = n1*n2 + Pop // v -- + Push // -- v + Return // [r1 .. ri] -- ; exit frame: sp = fp, fp = pop + Sub // n1 n2 -- diff ; diff = n1 - n2 + Subi // n1 -- diff ; diff = n1 - $1 ) var strop = [...]string{ // for VM tracing. - Nop: "Nop", - Add: "Add", - Assign: "Assign", - Call: "Call", - Calli: "Calli", - CallX: "CallX", - Dup: "Dup", - Equal: "Equal", - Exit: "Exit", - Fassign: "Fassign", - Fdup: "Fdup", - Greater: "Greater", - Jump: "Jump", - JumpTrue: "JumpTrue", - JumpFalse: "JumpFalse", - Lower: "Lower", - Loweri: "Loweri", - Mul: "Mul", - Pop: "Pop", - Push: "Push", - Return: "Return", - Sub: "Sub", - Subi: "Subi", + Nop: "Nop", + Add: "Add", + Assign: "Assign", + Call: "Call", + Calli: "Calli", + CallX: "CallX", + Dup: "Dup", + Equal: "Equal", + Exit: "Exit", + Fassign: "Fassign", + Fdup: "Fdup", + Greater: "Greater", + Jump: "Jump", + JumpTrue: "JumpTrue", + JumpFalse: "JumpFalse", + JumpSetTrue: "JumpSetTrue", + JumpSetFalse: "JumpSetFalse", + Lower: "Lower", + Loweri: "Loweri", + Mul: "Mul", + Pop: "Pop", + Push: "Push", + Return: "Return", + Sub: "Sub", + Subi: "Subi", } // Machine represents a virtual machine. @@ -90,7 +94,7 @@ func (m *Machine) Run() (err error) { op3 = strconv.Itoa(int(c[3])) } } - log.Printf("ip:%-4d sp:%-4d fp:%-4d op:[%-9s %-4s %-4s] mem:%v\n", ip, sp, fp, strop[c[1]], op2, op3, mem) + log.Printf("ip:%-4d sp:%-4d fp:%-4d op:[%-12s %-4s %-4s] mem:%v\n", ip, sp, fp, strop[c[1]], op2, op3, mem) } for { @@ -158,6 +162,22 @@ func (m *Machine) Run() (err error) { ip += int(op[2]) continue } + case JumpSetTrue: + cond := mem[sp-1].(bool) + if cond { + ip += int(op[2]) + // Note that stack is not modified if cond is true + continue + } + mem = mem[:sp-1] + case JumpSetFalse: + cond := mem[sp-1].(bool) + if !cond { + ip += int(op[2]) + // Note that stack is not modified if cond is false + continue + } + mem = mem[:sp-1] case Greater: mem[sp-2] = mem[sp-1].(int) > mem[sp-2].(int) mem = mem[:sp-1] |
