From 851c793da43be9e4d3319afe440d603c85834045 Mon Sep 17 00:00:00 2001 From: Marc Vertes Date: Thu, 31 Aug 2023 17:53:54 +0200 Subject: codegen: fix interpreter re-entrance So multiple successive incremental Evals function correctly. Also improve the following: - Apply the same Eval API to vm0 and vm1 - parser: dot diagram display is now synchronous - codegen: outsource complex code generation for readability - vm1: Pop take the number of values to pop as operand --- cmd/gint/main.go | 17 ++++++----- codegen/compiler.go | 75 ++++++++++++++++++++---------------------------- codegen/compiler_test.go | 8 +++--- codegen/expression.go | 41 ++++++++++++++++++++++++++ codegen/interpreter.go | 24 ++++++++++------ parser/dot.go | 7 +++-- vm0/func.go | 2 +- vm0/vm.go | 8 +++--- vm0/vm_test.go | 4 +-- vm1/vm.go | 14 +++++++-- 10 files changed, 124 insertions(+), 76 deletions(-) create mode 100644 codegen/expression.go diff --git a/cmd/gint/main.go b/cmd/gint/main.go index 5b2b3a2..037614c 100644 --- a/cmd/gint/main.go +++ b/cmd/gint/main.go @@ -15,7 +15,7 @@ import ( ) type Interpreter interface { - Eval(string) error + Eval(string) (any, error) } func main() { @@ -23,7 +23,7 @@ func main() { var interp Interpreter = vm0.New(golang.GoParser) if len(os.Args) > 1 && os.Args[1] == "1" { interp = codegen.NewInterpreter(golang.GoParser) - interp.(*codegen.Interpreter).AddSym(fmt.Println, "println", false) + interp.(*codegen.Interpreter).AddSym(fmt.Println, "println") } in := os.Stdin @@ -31,19 +31,22 @@ func main() { // Provide an interactive line oriented Read Eval Print Loop (REPL). liner := bufio.NewScanner(in) text, prompt := "", "> " - fmt.Printf(prompt) + fmt.Print(prompt) for liner.Scan() { text += liner.Text() - err := interp.Eval(text + "\n") + res, err := interp.Eval(text + "\n") if err == nil { + if res != nil { + fmt.Println(": ", res) + } text, prompt = "", "> " } else if errors.Is(err, scanner.ErrBlock) { prompt = ">> " } else { - text, prompt = "", "> " fmt.Println("Error:", err) + text, prompt = "", "> " } - fmt.Printf(prompt) + fmt.Print(prompt) } return } @@ -52,7 +55,7 @@ func main() { if err != nil { log.Fatal(err) } - if err := interp.Eval(string(buf)); err != nil { + if _, err := interp.Eval(string(buf)); err != nil { log.Fatal(err) } } diff --git a/codegen/compiler.go b/codegen/compiler.go index 6f41850..48d3d41 100644 --- a/codegen/compiler.go +++ b/codegen/compiler.go @@ -9,24 +9,32 @@ import ( ) type symbol struct { - index int // address of symbol in frame - local bool // if true address is relative to local frame, otherwise global + index int // address of symbol in frame + local bool // if true address is relative to local frame, otherwise global + node *parser.Node // symbol definition in AST } type Compiler struct { Code [][]int64 // produced code, to fill VM with Data []any // produced data, will be at the bottom of VM stack - Entry int + Entry int // offset in Code to start execution from (skip function defintions) - symbols map[string]symbol + symbols map[string]*symbol } -func NewCompiler() *Compiler { return &Compiler{symbols: map[string]symbol{}, Entry: -1} } +func NewCompiler() *Compiler { return &Compiler{symbols: map[string]*symbol{}, Entry: -1} } type nodedata struct { ipstart, ipend, fsp int // CFG and symbol node annotations } +type extNode struct { + *Compiler + *parser.Node + anc *parser.Node // node ancestor + rank int // node rank in ancestor's children +} + func (c *Compiler) CodeGen(node *parser.Node) (err error) { notes := map[*parser.Node]*nodedata{} // AST node annotations for CFG, symbols, ... scope := "" @@ -41,13 +49,13 @@ func (c *Compiler) CodeGen(node *parser.Node) (err error) { switch n.Kind { case parser.FuncDecl: fname := n.Child[0].Content() - c.AddSym(len(c.Code), scope+fname, false) + c.addSym(len(c.Code), scope+fname, false, n) scope = pushScope(scope, fname) frameNode = append(frameNode, n) fnote = notes[n] for j, child := range n.Child[1].Child { vname := child.Content() - c.AddSym(-j-2, scope+vname, true) + c.addSym(-j-2, scope+vname, true, child) fnote.fsp++ } @@ -61,27 +69,18 @@ func (c *Compiler) CodeGen(node *parser.Node) (err error) { }, func(n, a *parser.Node, k int) (ok bool) { // Node post-order processing callback. nd := notes[n] + x := extNode{c, n, a, k} switch n.Kind { case parser.AddOp: c.Emit(n, vm1.Add) case parser.CallExpr: - if c.isExternalSymbol(n.Child[0].Content()) { - // External call, using absolute addr in symtable - c.Emit(n, vm1.CallX, int64(len(n.Child[1].Child))) - break - } - // Internal call is always relative to instruction pointer. - i, ok := c.symInt(n.Child[0].Content()) - if !ok { - err = fmt.Errorf("invalid symbol %s", n.Child[0].Content()) - } - c.Emit(n, vm1.Call, int64(i-len(c.Code))) + err = postCallExpr(x) case parser.DefOp: // Define operation, global vars only. TODO: on local frame too - l := c.AddSym(nil, n.Child[0].Content(), false) + l := c.addSym(nil, n.Child[0].Content(), false, n) c.Emit(n, vm1.Assign, int64(l)) case parser.FuncDecl: @@ -98,7 +97,7 @@ func (c *Compiler) CodeGen(node *parser.Node) (err error) { c.Emit(n, vm1.Fdup, int64(s.index)) } else if a != nil && a.Kind == parser.AssignOp { c.Emit(n, vm1.Push, int64(s.index)) - } else if c.isExternalSymbol(ident) { + } else if _, ok := c.Data[s.index].(int); !ok { c.Emit(n, vm1.Dup, int64(s.index)) } } @@ -116,10 +115,8 @@ func (c *Compiler) CodeGen(node *parser.Node) (err error) { c.Emit(n, vm1.Push, v) case error: err = v - return false default: err = fmt.Errorf("type not supported: %T\n", v) - return false } case parser.ReturnStmt: @@ -141,6 +138,10 @@ func (c *Compiler) CodeGen(node *parser.Node) (err error) { c.Emit(n, vm1.Sub) } + if err != nil { + return false + } + // TODO: Fix this temporary hack to compute an entry point if c.Entry < 0 && len(scope) == 0 && n.Kind != parser.FuncDecl { c.Entry = len(c.Code) - 1 @@ -153,14 +154,16 @@ func (c *Compiler) CodeGen(node *parser.Node) (err error) { return } -func (c *Compiler) AddSym(v any, name string, local bool) int { +func (c *Compiler) AddSym(v any, name string) int { return c.addSym(v, name, false, nil) } + +func (c *Compiler) addSym(v any, name string, local bool, n *parser.Node) int { l := len(c.Data) if local { l = v.(int) } else { c.Data = append(c.Data, v) } - c.symbols[name] = symbol{index: l, local: local} + c.symbols[name] = &symbol{index: l, local: local, node: n} return l } @@ -171,25 +174,9 @@ func (c *Compiler) Emit(n *parser.Node, op ...int64) int { return l } -func (c *Compiler) isExternalSymbol(name string) bool { - s, ok := c.symbols[name] - if !ok { - return false - } - _, isInt := c.Data[s.index].(int) - return !isInt -} - -func (c *Compiler) symInt(name string) (int, bool) { - s, ok := c.symbols[name] - if !ok { - return 0, false - } - j, ok := c.Data[s.index].(int) - if !ok { - return 0, false - } - return j, true +func (c *Compiler) codeIndex(s *symbol) (i int, ok bool) { + i, ok = c.Data[s.index].(int) + return } func pushScope(scope, name string) string { @@ -206,7 +193,7 @@ func popScope(scope string) string { } // getSym searches for an existing symbol starting from the deepest scope. -func (c *Compiler) getSym(name, scope string) (sym symbol, sc string, ok bool) { +func (c *Compiler) getSym(name, scope string) (sym *symbol, sc string, ok bool) { for { if sym, ok = c.symbols[scope+name]; ok { return sym, scope, ok diff --git a/codegen/compiler_test.go b/codegen/compiler_test.go index 5989210..a876d52 100644 --- a/codegen/compiler_test.go +++ b/codegen/compiler_test.go @@ -16,7 +16,7 @@ func TestCodeGen(t *testing.T) { test := test t.Run("", func(t *testing.T) { c := NewCompiler() - c.AddSym(fmt.Println, "println", false) + c.AddSym(fmt.Println, "println") n := &parser.Node{} var err error if n.Child, err = golang.GoParser.Parse(test.src); err != nil { @@ -45,13 +45,13 @@ var tests = []struct { asm: "Push 1\nPush 2\nAdd\n", }, { // #01 src: `println("Hello")`, - asm: "Dup 0\nDup 1\nCallX 1\n", + asm: "Dup 0\nDup 1\nCallX 1\nPop 2\n", }, { // #02 src: `a := 2; println(a)`, - asm: "Push 2\nAssign 1\nDup 0\nDup 1\nCallX 1\n", + asm: "Push 2\nAssign 1\nDup 0\nDup 1\nCallX 1\nPop 2\n", }, { // #03 src: `a := 2; if a < 3 {println(a)}; println("bye")`, - asm: "Push 2\nAssign 1\nDup 1\nPush 3\nLower\nJumpFalse 4\nDup 0\nDup 1\nCallX 1\nDup 0\nDup 2\nCallX 1\n", + asm: "Push 2\nAssign 1\nDup 1\nPush 3\nLower\nJumpFalse 5\nDup 0\nDup 1\nCallX 1\nPop 2\nDup 0\nDup 2\nCallX 1\nPop 2\n", }, { // #04 src: "func add(a int, b int) int { return a + b }", asm: "Fdup -2\nFdup -3\nAdd\nReturn 1 2\n", diff --git a/codegen/expression.go b/codegen/expression.go new file mode 100644 index 0000000..b73f4da --- /dev/null +++ b/codegen/expression.go @@ -0,0 +1,41 @@ +package codegen + +import ( + "fmt" + "reflect" + + "github.com/gnolang/parscan/parser" + "github.com/gnolang/parscan/vm1" +) + +func postCallExpr(x extNode) error { + switch x.Child[0].Kind { + case parser.Ident: + var numOut int + s, _, ok := x.getSym(x.Child[0].Content(), "") + if !ok { + return fmt.Errorf("invalid symbol %s", x.Child[0].Content()) + } + if i, ok := x.codeIndex(s); ok { + // Internal call is always relative to instruction pointer. + x.Emit(x.Node, vm1.Call, int64(i-len(x.Code))) + } else { + // External call, using absolute addr in symtable. + x.Emit(x.Node, vm1.CallX, int64(len(x.Child[1].Child))) + numOut = reflect.TypeOf(x.Data[s.index]).NumOut() + } + if !usedRet(x.anc) { + x.Emit(x.Node, vm1.Pop, int64(numOut)) + } + } + return nil +} + +func usedRet(n *parser.Node) bool { + switch n.Kind { + case parser.Undefined, parser.StmtBloc: + return false + default: + return true + } +} diff --git a/codegen/interpreter.go b/codegen/interpreter.go index b26b403..a3268d9 100644 --- a/codegen/interpreter.go +++ b/codegen/interpreter.go @@ -19,24 +19,30 @@ func NewInterpreter(p *parser.Parser) *Interpreter { return &Interpreter{p, NewCompiler(), &vm1.Machine{}} } -func (i *Interpreter) Eval(src string) (err error) { +func (i *Interpreter) Eval(src string) (res any, err error) { n := &parser.Node{} if n.Child, err = i.Parse(src); err != nil { - return err + return res, err } if debug { n.Dot(os.Getenv("DOT"), "") } - offset := len(i.Code) - i.PopExit() // Remove last exit from previous run. + codeOffset := len(i.Code) + dataOffset := 0 + if codeOffset > 0 { + // All data must be copied to the VM the first time only (re-entrance). + dataOffset = len(i.Data) + } + i.PopExit() // Remove last exit from previous run (re-entrance). if err = i.CodeGen(n); err != nil { - return err + return res, err } - i.Push(i.Data...) - i.PushCode(i.Code[offset:]...) + i.Push(i.Data[dataOffset:]...) + i.PushCode(i.Code[codeOffset:]...) i.PushCode([]int64{0, vm1.Exit}) - i.SetIP(max(offset, i.Entry)) - return i.Run() + i.SetIP(max(codeOffset, i.Entry)) + err = i.Run() + return i.Top(), err } func max(a, b int) int { diff --git a/parser/dot.go b/parser/dot.go index aae5c5f..f486cd5 100644 --- a/parser/dot.go +++ b/parser/dot.go @@ -20,8 +20,11 @@ func (*Parser) Adot(nodes []*Node, c string) { func (n *Node) Dot(c, s string) { dw, cmd := dotWriter(c) n.astDot(dw, s) - if cmd != nil { - cmd.Wait() + if cmd == nil { + return + } + if err := cmd.Wait(); err != nil { + log.Fatal(err) } } diff --git a/vm0/func.go b/vm0/func.go index 6976530..ee503bc 100644 --- a/vm0/func.go +++ b/vm0/func.go @@ -60,7 +60,7 @@ func (i *Interp) declareFunc(r *parser.Node, scope string) { for _, arg := range args { i.push(arg.Interface()) } - if _, err := i.Run(r.Child[len(r.Child)-1], fscope); err != nil { + if err := i.Run(r.Child[len(r.Child)-1], fscope); err != nil { panic(err) } b := len(i.stack) - len(out) diff --git a/vm0/vm.go b/vm0/vm.go index 8d0b49e..e26e20f 100644 --- a/vm0/vm.go +++ b/vm0/vm.go @@ -23,7 +23,7 @@ func New(p *parser.Parser) (i *Interp) { return i } -func (i *Interp) Eval(src string) (err error) { +func (i *Interp) Eval(src string) (res any, err error) { n := &parser.Node{} if n.Child, err = i.Parse(src); err != nil { return @@ -32,7 +32,7 @@ func (i *Interp) Eval(src string) (err error) { n.Dot(os.Getenv("DOT"), "") } for _, nod := range n.Child { - if _, err = i.Run(nod, ""); err != nil { + if err = i.Run(nod, ""); err != nil { break } } @@ -40,7 +40,7 @@ func (i *Interp) Eval(src string) (err error) { } // Run implements a stack based virtual machine which directly walks the AST. -func (i *Interp) Run(node *parser.Node, scope string) (res []any, err error) { +func (i *Interp) Run(node *parser.Node, scope string) (err error) { stop := false node.Walk2(nil, 0, func(n, a *parser.Node, k int) (ok bool) { @@ -129,7 +129,7 @@ func (i *Interp) Run(node *parser.Node, scope string) (res []any, err error) { } return true }) - return nil, nil + return nil } // getSym searches for an existing symbol starting from the deepest scope. diff --git a/vm0/vm_test.go b/vm0/vm_test.go index 0e8896b..dc0829b 100644 --- a/vm0/vm_test.go +++ b/vm0/vm_test.go @@ -20,7 +20,7 @@ func TestEval(t *testing.T) { } i.Adot(nodes, os.Getenv("DOT")) for _, n := range nodes { - v, err := i.Run(n, "") - t.Log(v, err) + err := i.Run(n, "") + t.Log(err) } } diff --git a/vm1/vm.go b/vm1/vm.go index c2f4882..572a171 100644 --- a/vm1/vm.go +++ b/vm1/vm.go @@ -57,14 +57,15 @@ type Machine struct { code [][]int64 // code to execute mem []any // memory, as a stack ip, fp int // instruction and frame pointer + ic uint64 // instruction counter, incremented at each instruction executed // flags uint // to set options such as restrict CallX, etc... } // Run runs a program. func (m *Machine) Run() (err error) { - code, mem, ip, fp, sp := m.code, m.mem, m.ip, m.fp, 0 + code, mem, ip, fp, sp, ic := m.code, m.mem, m.ip, m.fp, 0, m.ic - defer func() { m.mem, m.ip, m.fp = mem, ip, fp }() + defer func() { m.mem, m.ip, m.fp, m.ic = mem, ip, fp, ic }() trace := func() { if !debug { @@ -84,6 +85,7 @@ func (m *Machine) Run() (err error) { for { sp = len(mem) // stack pointer trace() + ic++ switch op := code[ip]; op[1] { case Add: mem[sp-2] = mem[sp-2].(int) + mem[sp-1].(int) @@ -136,7 +138,7 @@ func (m *Machine) Run() (err error) { case Loweri: mem[sp-1] = mem[sp-1].(int) < int(op[2]) case Pop: - mem = mem[:sp-1] + mem = mem[:sp-int(op[2])] case Push: mem = append(mem, int(op[2])) case Return: @@ -164,6 +166,12 @@ func (m *Machine) PushCode(code ...[]int64) (p int) { func (m *Machine) SetIP(ip int) { m.ip = ip } func (m *Machine) Push(v ...any) (l int) { l = len(m.mem); m.mem = append(m.mem, v...); return } func (m *Machine) Pop() (v any) { l := len(m.mem) - 1; v = m.mem[l]; m.mem = m.mem[:l]; return } +func (m *Machine) Top() (v any) { + if l := len(m.mem); l > 0 { + v = m.mem[l-1] + } + return +} func (m *Machine) PopExit() { if l := len(m.code); l > 0 && m.code[l-1][1] == Exit { -- cgit v1.2.3