From 9fdef50606a2942389189cd61397e17c0a0ccfd7 Mon Sep 17 00:00:00 2001 From: Marc Vertes Date: Thu, 24 Aug 2023 17:16:39 +0200 Subject: codegen: add Interpreter struct This makes the code easier to use. --- cmd/gint/main.go | 26 +----- codegen/codegen.go | 226 ----------------------------------------------- codegen/codegen_test.go | 58 ------------ codegen/compiler.go | 226 +++++++++++++++++++++++++++++++++++++++++++++++ codegen/compiler_test.go | 58 ++++++++++++ codegen/interpreter.go | 38 ++++++++ vm0/README.md | 27 ++++++ vm1/README.md | 2 +- vm1/vm.go | 19 ++-- 9 files changed, 365 insertions(+), 315 deletions(-) delete mode 100644 codegen/codegen.go delete mode 100644 codegen/codegen_test.go create mode 100644 codegen/compiler.go create mode 100644 codegen/compiler_test.go create mode 100644 codegen/interpreter.go create mode 100644 vm0/README.md diff --git a/cmd/gint/main.go b/cmd/gint/main.go index 85fdba0..15a79b0 100644 --- a/cmd/gint/main.go +++ b/cmd/gint/main.go @@ -7,9 +7,7 @@ import ( "github.com/gnolang/parscan/codegen" "github.com/gnolang/parscan/lang/golang" - "github.com/gnolang/parscan/parser" "github.com/gnolang/parscan/vm0" - "github.com/gnolang/parscan/vm1" ) func main() { @@ -50,25 +48,7 @@ func run0(src string) error { } func run1(src string) (err error) { - m := &vm1.Machine{} - c := codegen.New() - c.AddSym(fmt.Println, "println", false) - n := &parser.Node{} - if n.Child, err = golang.GoParser.Parse(src); err != nil { - return err - } - n.Dot(os.Getenv("DOT"), "") - if err = c.CodeGen(n); err != nil { - return err - } - c.Emit(n, vm1.Exit) - log.Println("data:", c.Data) - log.Println("code:", vm1.Disassemble(c.Code)) - for _, v := range c.Data { - m.Push(v) - } - m.PushCode(c.Code) - m.SetIP(c.Entry) - m.Run() - return + i := codegen.NewInterpreter(golang.GoParser) + i.AddSym(fmt.Println, "println", false) + return i.Eval(src) } diff --git a/codegen/codegen.go b/codegen/codegen.go deleted file mode 100644 index d7702cd..0000000 --- a/codegen/codegen.go +++ /dev/null @@ -1,226 +0,0 @@ -package codegen - -import ( - "fmt" - "strings" - - "github.com/gnolang/parscan/parser" - "github.com/gnolang/parscan/vm1" -) - -type symbol struct { - index int // address of symbol in frame - local bool // if true address is relative to local frame, otherwise global -} - -type Compiler struct { - Code [][]int64 // produced code, to fill VM with - Data []any // produced data, will be at the bottom of VM stack - Entry int - - symbols map[string]symbol -} - -func New() *Compiler { return &Compiler{symbols: map[string]symbol{}, Entry: -1} } - -type nodedata struct { - ipstart, ipend, symind, fsp int // CFG and symbol node annotations -} - -func (c *Compiler) CodeGen(node *parser.Node) (err error) { - notes := map[*parser.Node]*nodedata{} // AST node annotations for CFG, symbols, ... - scope := "" - frameNode := []*parser.Node{node} - fnote := notes[node] - - node.Walk2(nil, 0, func(n, a *parser.Node, k int) (ok bool) { - // Node pre-order processing callback. - notes[n] = &nodedata{} - nd := notes[n] - - switch n.Kind { - case parser.FuncDecl: - fname := n.Child[0].Content() - c.AddSym(len(c.Code), scope+fname, false) - scope = pushScope(scope, fname) - frameNode = append(frameNode, n) - fnote = notes[n] - for j, child := range n.Child[1].Child { - vname := child.Content() - c.AddSym(-j-2, scope+vname, true) - fnote.fsp++ - } - - case parser.StmtBloc: - nd.ipstart = len(c.Code) - if a != nil && a.Kind == parser.IfStmt && k == 1 { - c.Emit(n, vm1.JumpFalse, 0) // location to be updated in post IfStmt - } - } - return true - }, func(n, a *parser.Node, k int) (ok bool) { - // Node post-order processing callback. - nd := notes[n] - - switch n.Kind { - case parser.AddOp: - c.Emit(n, vm1.Add) - - case parser.CallExpr: - if c.isExternalSymbol(n.Child[0].Content()) { - // External call, using absolute addr in symtable - c.Emit(n, vm1.CallX, int64(len(n.Child[1].Child))) - break - } - // Internal call is always relative to instruction pointer. - i, ok := c.symInt(n.Child[0].Content()) - if !ok { - err = fmt.Errorf("invalid symbol %s", n.Child[0].Content()) - } - c.Emit(n, vm1.Call, int64(i-len(c.Code))) - - case parser.DefOp: - // Define operation, global vars only. TODO: on local frame too - l := c.AddSym(nil, n.Child[0].Content(), false) - c.Emit(n, vm1.Assign, int64(l)) - - case parser.FuncDecl: - scope = popScope(scope) - fnote = notes[frameNode[len(frameNode)-1]] - - case parser.Ident: - ident := n.Content() - if len(n.Child) > 0 || a.Kind == parser.FuncDecl { - break - } - if s, _, ok := c.getSym(ident, scope); ok { - if s.local { - c.Emit(n, vm1.Fdup, int64(s.index)) - } else if a != nil && a.Kind == parser.AssignOp { - c.Emit(n, vm1.Push, int64(s.index)) - } else if c.isExternalSymbol(ident) { - c.Emit(n, vm1.Dup, int64(s.index)) - } - } - - case parser.IfStmt: - ifBodyStart := notes[n.Child[1]].ipstart - ifBodyEnd := notes[n.Child[1]].ipend - c.Code[ifBodyStart][2] = int64(ifBodyEnd - ifBodyStart) - // TODO: handle 'else' - - case parser.NumberLit: - // A literal number can be a float or an integer, or a big number - switch v := n.Value().(type) { - case int64: - c.Emit(n, vm1.Push, v) - case error: - err = v - return false - default: - err = fmt.Errorf("type not supported: %T\n", v) - return false - } - - case parser.ReturnStmt: - fun := frameNode[len(frameNode)-1] - c.Emit(n, vm1.Return, int64(len(n.Child)), int64(len(fun.Child[1].Child))) - - case parser.StmtBloc: - nd.ipend = len(c.Code) - - case parser.StringLit: - p := len(c.Data) - c.Data = append(c.Data, n.Block()) - c.Emit(n, vm1.Dup, int64(p)) - - case parser.InfOp: - c.Emit(n, vm1.Lower) - - case parser.SubOp: - c.Emit(n, vm1.Sub) - } - - // TODO: Fix this temporary hack to compute an entry point - if c.Entry < 0 && len(scope) == 0 && n.Kind != parser.FuncDecl { - c.Entry = len(c.Code) - 1 - if c.Entry >= 0 && len(c.Code) > c.Entry && c.Code[c.Entry][1] == vm1.Return { - c.Entry++ - } - } - return true - }) - return -} - -func (c *Compiler) AddSym(v any, name string, local bool) int { - l := len(c.Data) - if local { - l = v.(int) - } else { - c.Data = append(c.Data, v) - } - c.symbols[name] = symbol{index: l, local: local} - return l -} - -func (c *Compiler) Emit(n *parser.Node, op ...int64) int { - op = append([]int64{int64(n.Pos())}, op...) - l := len(c.Code) - c.Code = append(c.Code, op) - return l -} - -func (c *Compiler) isExternalSymbol(name string) bool { - s, ok := c.symbols[name] - if !ok { - return false - } - _, isInt := c.Data[s.index].(int) - return !isInt -} - -func (c *Compiler) symInt(name string) (int, bool) { - s, ok := c.symbols[name] - if !ok { - return 0, false - } - j, ok := c.Data[s.index].(int) - if !ok { - return 0, false - } - return j, true -} - -func pushScope(scope, name string) string { - return strings.TrimPrefix(scope+"/"+name+"/", "/") -} - -func popScope(scope string) string { - scope = strings.TrimSuffix(scope, "/") - j := strings.LastIndex(scope, "/") - if j == -1 { - return "" - } - return scope[:j] -} - -// getSym searches for an existing symbol starting from the deepest scope. -func (c *Compiler) getSym(name, scope string) (sym symbol, sc string, ok bool) { - for { - if sym, ok = c.symbols[scope+name]; ok { - return sym, scope, ok - } - scope = strings.TrimSuffix(scope, "/") - i := strings.LastIndex(scope, "/") - if i == -1 { - scope = "" - break - } - if scope = scope[:i]; scope == "" { - break - } - } - sym, ok = c.symbols[name] - return sym, scope, ok -} diff --git a/codegen/codegen_test.go b/codegen/codegen_test.go deleted file mode 100644 index 7262ddc..0000000 --- a/codegen/codegen_test.go +++ /dev/null @@ -1,58 +0,0 @@ -package codegen - -import ( - "fmt" - "log" - "testing" - - "github.com/gnolang/parscan/lang/golang" - "github.com/gnolang/parscan/parser" - "github.com/gnolang/parscan/vm1" -) - -func TestCodeGen(t *testing.T) { - log.SetFlags(log.Lshortfile) - for _, test := range tests { - test := test - t.Run("", func(t *testing.T) { - c := New() - c.AddSym(fmt.Println, "println", false) - n := &parser.Node{} - var err error - if n.Child, err = golang.GoParser.Parse(test.src); err != nil { - t.Error(err) - } - errStr := "" - if err = c.CodeGen(n); err != nil { - errStr = err.Error() - } - if errStr != test.err { - t.Errorf("got error %#v, want error %#v", errStr, test.err) - } - t.Log("data:", c.Data) - t.Log("code:", vm1.Disassemble(c.Code)) - if s := vm1.Disassemble(c.Code); s != test.asm { - t.Errorf("got error %#v, want error %#v", s, test.asm) - } - }) - } -} - -var tests = []struct { - src, asm, sym, err string -}{{ // #00 - src: "1+2", - asm: "Push 1\nPush 2\nAdd\n", -}, { // #01 - src: `println("Hello")`, - asm: "Dup 0\nDup 1\nCallX 1\n", -}, { // #02 - src: `a := 2; println(a)`, - asm: "Push 2\nAssign 1\nDup 0\nDup 1\nCallX 1\n", -}, { // #03 - src: `a := 2; if a < 3 {println(a)}; println("bye")`, - asm: "Push 2\nAssign 1\nDup 1\nPush 3\nLower\nJumpFalse 4\nDup 0\nDup 1\nCallX 1\nDup 0\nDup 2\nCallX 1\n", -}, { // #04 - src: "func add(a int, b int) int { return a + b }", - asm: "Fdup -2\nFdup -3\nAdd\nReturn 1 2\n", -}, {}} diff --git a/codegen/compiler.go b/codegen/compiler.go new file mode 100644 index 0000000..e0e97ab --- /dev/null +++ b/codegen/compiler.go @@ -0,0 +1,226 @@ +package codegen + +import ( + "fmt" + "strings" + + "github.com/gnolang/parscan/parser" + "github.com/gnolang/parscan/vm1" +) + +type symbol struct { + index int // address of symbol in frame + local bool // if true address is relative to local frame, otherwise global +} + +type Compiler struct { + Code [][]int64 // produced code, to fill VM with + Data []any // produced data, will be at the bottom of VM stack + Entry int + + symbols map[string]symbol +} + +func NewCompiler() *Compiler { return &Compiler{symbols: map[string]symbol{}, Entry: -1} } + +type nodedata struct { + ipstart, ipend, symind, fsp int // CFG and symbol node annotations +} + +func (c *Compiler) CodeGen(node *parser.Node) (err error) { + notes := map[*parser.Node]*nodedata{} // AST node annotations for CFG, symbols, ... + scope := "" + frameNode := []*parser.Node{node} + fnote := notes[node] + + node.Walk2(nil, 0, func(n, a *parser.Node, k int) (ok bool) { + // Node pre-order processing callback. + notes[n] = &nodedata{} + nd := notes[n] + + switch n.Kind { + case parser.FuncDecl: + fname := n.Child[0].Content() + c.AddSym(len(c.Code), scope+fname, false) + scope = pushScope(scope, fname) + frameNode = append(frameNode, n) + fnote = notes[n] + for j, child := range n.Child[1].Child { + vname := child.Content() + c.AddSym(-j-2, scope+vname, true) + fnote.fsp++ + } + + case parser.StmtBloc: + nd.ipstart = len(c.Code) + if a != nil && a.Kind == parser.IfStmt && k == 1 { + c.Emit(n, vm1.JumpFalse, 0) // location to be updated in post IfStmt + } + } + return true + }, func(n, a *parser.Node, k int) (ok bool) { + // Node post-order processing callback. + nd := notes[n] + + switch n.Kind { + case parser.AddOp: + c.Emit(n, vm1.Add) + + case parser.CallExpr: + if c.isExternalSymbol(n.Child[0].Content()) { + // External call, using absolute addr in symtable + c.Emit(n, vm1.CallX, int64(len(n.Child[1].Child))) + break + } + // Internal call is always relative to instruction pointer. + i, ok := c.symInt(n.Child[0].Content()) + if !ok { + err = fmt.Errorf("invalid symbol %s", n.Child[0].Content()) + } + c.Emit(n, vm1.Call, int64(i-len(c.Code))) + + case parser.DefOp: + // Define operation, global vars only. TODO: on local frame too + l := c.AddSym(nil, n.Child[0].Content(), false) + c.Emit(n, vm1.Assign, int64(l)) + + case parser.FuncDecl: + scope = popScope(scope) + fnote = notes[frameNode[len(frameNode)-1]] + + case parser.Ident: + ident := n.Content() + if len(n.Child) > 0 || a.Kind == parser.FuncDecl { + break + } + if s, _, ok := c.getSym(ident, scope); ok { + if s.local { + c.Emit(n, vm1.Fdup, int64(s.index)) + } else if a != nil && a.Kind == parser.AssignOp { + c.Emit(n, vm1.Push, int64(s.index)) + } else if c.isExternalSymbol(ident) { + c.Emit(n, vm1.Dup, int64(s.index)) + } + } + + case parser.IfStmt: + ifBodyStart := notes[n.Child[1]].ipstart + ifBodyEnd := notes[n.Child[1]].ipend + c.Code[ifBodyStart][2] = int64(ifBodyEnd - ifBodyStart) + // TODO: handle 'else' + + case parser.NumberLit: + // A literal number can be a float or an integer, or a big number + switch v := n.Value().(type) { + case int64: + c.Emit(n, vm1.Push, v) + case error: + err = v + return false + default: + err = fmt.Errorf("type not supported: %T\n", v) + return false + } + + case parser.ReturnStmt: + fun := frameNode[len(frameNode)-1] + c.Emit(n, vm1.Return, int64(len(n.Child)), int64(len(fun.Child[1].Child))) + + case parser.StmtBloc: + nd.ipend = len(c.Code) + + case parser.StringLit: + p := len(c.Data) + c.Data = append(c.Data, n.Block()) + c.Emit(n, vm1.Dup, int64(p)) + + case parser.InfOp: + c.Emit(n, vm1.Lower) + + case parser.SubOp: + c.Emit(n, vm1.Sub) + } + + // TODO: Fix this temporary hack to compute an entry point + if c.Entry < 0 && len(scope) == 0 && n.Kind != parser.FuncDecl { + c.Entry = len(c.Code) - 1 + if c.Entry >= 0 && len(c.Code) > c.Entry && c.Code[c.Entry][1] == vm1.Return { + c.Entry++ + } + } + return true + }) + return +} + +func (c *Compiler) AddSym(v any, name string, local bool) int { + l := len(c.Data) + if local { + l = v.(int) + } else { + c.Data = append(c.Data, v) + } + c.symbols[name] = symbol{index: l, local: local} + return l +} + +func (c *Compiler) Emit(n *parser.Node, op ...int64) int { + op = append([]int64{int64(n.Pos())}, op...) + l := len(c.Code) + c.Code = append(c.Code, op) + return l +} + +func (c *Compiler) isExternalSymbol(name string) bool { + s, ok := c.symbols[name] + if !ok { + return false + } + _, isInt := c.Data[s.index].(int) + return !isInt +} + +func (c *Compiler) symInt(name string) (int, bool) { + s, ok := c.symbols[name] + if !ok { + return 0, false + } + j, ok := c.Data[s.index].(int) + if !ok { + return 0, false + } + return j, true +} + +func pushScope(scope, name string) string { + return strings.TrimPrefix(scope+"/"+name+"/", "/") +} + +func popScope(scope string) string { + scope = strings.TrimSuffix(scope, "/") + j := strings.LastIndex(scope, "/") + if j == -1 { + return "" + } + return scope[:j] +} + +// getSym searches for an existing symbol starting from the deepest scope. +func (c *Compiler) getSym(name, scope string) (sym symbol, sc string, ok bool) { + for { + if sym, ok = c.symbols[scope+name]; ok { + return sym, scope, ok + } + scope = strings.TrimSuffix(scope, "/") + i := strings.LastIndex(scope, "/") + if i == -1 { + scope = "" + break + } + if scope = scope[:i]; scope == "" { + break + } + } + sym, ok = c.symbols[name] + return sym, scope, ok +} diff --git a/codegen/compiler_test.go b/codegen/compiler_test.go new file mode 100644 index 0000000..5989210 --- /dev/null +++ b/codegen/compiler_test.go @@ -0,0 +1,58 @@ +package codegen + +import ( + "fmt" + "log" + "testing" + + "github.com/gnolang/parscan/lang/golang" + "github.com/gnolang/parscan/parser" + "github.com/gnolang/parscan/vm1" +) + +func TestCodeGen(t *testing.T) { + log.SetFlags(log.Lshortfile) + for _, test := range tests { + test := test + t.Run("", func(t *testing.T) { + c := NewCompiler() + c.AddSym(fmt.Println, "println", false) + n := &parser.Node{} + var err error + if n.Child, err = golang.GoParser.Parse(test.src); err != nil { + t.Error(err) + } + errStr := "" + if err = c.CodeGen(n); err != nil { + errStr = err.Error() + } + if errStr != test.err { + t.Errorf("got error %#v, want error %#v", errStr, test.err) + } + t.Log("data:", c.Data) + t.Log("code:", vm1.Disassemble(c.Code)) + if s := vm1.Disassemble(c.Code); s != test.asm { + t.Errorf("got error %#v, want error %#v", s, test.asm) + } + }) + } +} + +var tests = []struct { + src, asm, sym, err string +}{{ // #00 + src: "1+2", + asm: "Push 1\nPush 2\nAdd\n", +}, { // #01 + src: `println("Hello")`, + asm: "Dup 0\nDup 1\nCallX 1\n", +}, { // #02 + src: `a := 2; println(a)`, + asm: "Push 2\nAssign 1\nDup 0\nDup 1\nCallX 1\n", +}, { // #03 + src: `a := 2; if a < 3 {println(a)}; println("bye")`, + asm: "Push 2\nAssign 1\nDup 1\nPush 3\nLower\nJumpFalse 4\nDup 0\nDup 1\nCallX 1\nDup 0\nDup 2\nCallX 1\n", +}, { // #04 + src: "func add(a int, b int) int { return a + b }", + asm: "Fdup -2\nFdup -3\nAdd\nReturn 1 2\n", +}, {}} diff --git a/codegen/interpreter.go b/codegen/interpreter.go new file mode 100644 index 0000000..8527631 --- /dev/null +++ b/codegen/interpreter.go @@ -0,0 +1,38 @@ +package codegen + +import ( + "os" + + "github.com/gnolang/parscan/parser" + "github.com/gnolang/parscan/vm1" +) + +const debug = true + +type Interpreter struct { + *parser.Parser + *Compiler + *vm1.Machine +} + +func NewInterpreter(p *parser.Parser) *Interpreter { + return &Interpreter{p, NewCompiler(), &vm1.Machine{}} +} + +func (i *Interpreter) Eval(src string) (err error) { + n := &parser.Node{} + if n.Child, err = i.Parse(src); err != nil { + return err + } + if debug { + n.Dot(os.Getenv("DOT"), "") + } + if err = i.CodeGen(n); err != nil { + return err + } + i.Emit(n, vm1.Exit) + i.Push(i.Data...) + i.PushCode(i.Code) + i.SetIP(i.Entry) + return i.Run() +} diff --git a/vm0/README.md b/vm0/README.md new file mode 100644 index 0000000..fc89429 --- /dev/null +++ b/vm0/README.md @@ -0,0 +1,27 @@ +# vm0 + +vm0 is a virtual machine executing directly the syntax tree. + +```mermaid +graph LR +s[ ] --> |source| a(scanner) +--> |tokens| b(parser) +--> |AST| c(vm) +subgraph vm0 + c +end +style s height:0px; +``` + +The execution is performed by walking the AST and evaluating each +visited node. + + +## Motivation + +- have a reference execution model for each defined language +- usable for compilation time evaluation +- to modelize similar VMs (i.e. gnovm) +- to validate and compare with other VMs (once it is itself validated) +- could serve as a basis for AST based symbolic execution (to be + investigated) diff --git a/vm1/README.md b/vm1/README.md index 7c3cd01..a06dd34 100644 --- a/vm1/README.md +++ b/vm1/README.md @@ -1,6 +1,6 @@ # vm1 -`vm1` is bytecode based stack machine. +`vm1` is a bytecode based stack machine. The purpose of `vm1` is to provide a simple, fast, embeddable and portable Go execution environment. diff --git a/vm1/vm.go b/vm1/vm.go index 6b74e7a..0e9c7f7 100644 --- a/vm1/vm.go +++ b/vm1/vm.go @@ -6,6 +6,8 @@ import ( "strconv" // for tracing only ) +const debug = false + // Byte-code instruction set. const ( // instruction effect on stack: values consumed -- values produced @@ -55,16 +57,19 @@ type Machine struct { code [][]int64 // code to execute mem []any // memory, as a stack ip, fp int // instruction and frame pointer - // flags uint // to set debug mode, restrict CallX, etc... + // flags uint // to set options such as restrict CallX, etc... } // Run runs a program. -func (m *Machine) Run() { +func (m *Machine) Run() (err error) { code, mem, ip, fp, sp := m.code, m.mem, m.ip, m.fp, 0 defer func() { m.mem, m.ip, m.fp = mem, ip, fp }() trace := func() { + if !debug { + return + } var op2, op3 string c := code[ip] if l := len(c); l > 2 { @@ -75,7 +80,6 @@ func (m *Machine) Run() { } fmt.Printf("ip:%-4d sp:%-4d fp:%-4d op:[%-9s %-4s %-4s] mem:%v\n", ip, sp, fp, strop[c[1]], op2, op3, mem) } - _ = trace for { sp = len(mem) // stack pointer @@ -149,17 +153,18 @@ func (m *Machine) Run() { } ip++ } + return } func (m *Machine) PushCode(code [][]int64) (p int) { p = len(m.code) m.code = append(m.code, code...) - return + return p } -func (m *Machine) SetIP(ip int) { m.ip = ip } -func (m *Machine) Push(v any) (l int) { l = len(m.mem); m.mem = append(m.mem, v); return } -func (m *Machine) Pop() (v any) { l := len(m.mem) - 1; v = m.mem[l]; m.mem = m.mem[:l]; return } +func (m *Machine) SetIP(ip int) { m.ip = ip } +func (m *Machine) Push(v ...any) (l int) { l = len(m.mem); m.mem = append(m.mem, v...); return } +func (m *Machine) Pop() (v any) { l := len(m.mem) - 1; v = m.mem[l]; m.mem = m.mem[:l]; return } // Disassemble returns the code as a readable string. func Disassemble(code [][]int64) (asm string) { -- cgit v1.2.3