diff options
| author | Marc Vertes <marc.vertes@tendermint.com> | 2023-10-12 10:51:58 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-10-12 10:51:58 +0200 |
| commit | 37b9da32d3b911091deb254f6cba2a137c471287 (patch) | |
| tree | b4451de0fa0473a937a77d39fd1f8a4f87c8f60d /codegen | |
| parent | a21b9b12ad865a19ff687645082f9093c4101039 (diff) | |
move to a direct byte code compiler (#8)
* chore: refactor to keep only the new parser and bytecode vm
* scanner: remove Token.value field
* scanner: remove scanner.kind field
* chore: move language specification in lang package
This avoid a cyclic dependency in scanner_test which can now use
the golang/GoSpec language specification for Go.
* clean code
* scanner: export scanner fields
Also parser now generate function calls, including externals.
* chore: fix lint issues
* parser: handle strings
* wip
* parser: implement support for 'if, else, else if' statements
Resolving labels in the compiler still in progress.
* parser: support if statements, improve compiler
* improve handling of functions
* improve support of local variables
* scanner: trim leading and trailing spaces
* fixes to make fibonacci work
* parser: improve README, fix function parameters parsing
Diffstat (limited to 'codegen')
| -rw-r--r-- | codegen/compiler.go | 242 | ||||
| -rw-r--r-- | codegen/compiler_test.go | 58 | ||||
| -rw-r--r-- | codegen/expression.go | 41 | ||||
| -rw-r--r-- | codegen/interpreter.go | 53 | ||||
| -rw-r--r-- | codegen/interpreter_test.go | 48 |
5 files changed, 0 insertions, 442 deletions
diff --git a/codegen/compiler.go b/codegen/compiler.go deleted file mode 100644 index 02b67cb..0000000 --- a/codegen/compiler.go +++ /dev/null @@ -1,242 +0,0 @@ -package codegen - -import ( - "fmt" - "log" - "strings" - - "github.com/gnolang/parscan/parser" - "github.com/gnolang/parscan/vm1" -) - -type symbol struct { - index int // address of symbol in frame - local bool // if true address is relative to local frame, otherwise global - node *parser.Node // symbol definition in AST -} - -type Compiler struct { - Code [][]int64 // produced code, to fill VM with - Data []any // produced data, will be at the bottom of VM stack - Entry int // offset in Code to start execution from (skip function defintions) - - symbols map[string]*symbol -} - -func NewCompiler() *Compiler { return &Compiler{symbols: map[string]*symbol{}, Entry: -1} } - -type nodedata struct { - ipstart, ipend, fsp int // CFG and symbol node annotations -} - -type extNode struct { - *Compiler - *parser.Node - anc *parser.Node // node ancestor - rank int // node rank in ancestor's children -} - -func (c *Compiler) CodeGen(node *parser.Node) (err error) { - notes := map[*parser.Node]*nodedata{} // AST node annotations for CFG, symbols, ... - scope := "" - frameNode := []*parser.Node{node} - fnote := notes[node] - - node.Walk2(nil, 0, func(n, a *parser.Node, k int) (ok bool) { - // Node pre-order processing callback. - notes[n] = &nodedata{} - nd := notes[n] - - switch n.Kind { - case parser.DeclFunc: - fname := n.Child[0].Content() - c.addSym(len(c.Code), scope+fname, false, n) - scope = pushScope(scope, fname) - frameNode = append(frameNode, n) - fnote = notes[n] - for j, child := range n.Child[1].Child { - vname := child.Content() - c.addSym(-j-2, scope+vname, true, child) - fnote.fsp++ - } - - case parser.BlockStmt: - nd.ipstart = len(c.Code) - if a != nil && a.Kind == parser.StmtIf && k == 1 { - c.Emit(n, vm1.JumpFalse, 0) // location to be updated in post IfStmt - } - } - return true - }, func(n, a *parser.Node, k int) (ok bool) { - // Node post-order processing callback. - nd := notes[n] - x := extNode{c, n, a, k} - - switch n.Kind { - case parser.OpAdd: - c.Emit(n, vm1.Add) - - case parser.ExprCall: - err = postCallExpr(x) - - case parser.OpDefine: - // Define operation, global vars only. TODO: on local frame too - l := c.addSym(nil, n.Child[0].Content(), false, n) - c.Emit(n, vm1.Assign, int64(l)) - - case parser.DeclFunc: - fun := frameNode[len(frameNode)-1] - if len(fun.Child) == 3 { // no return values - if c.Code[len(c.Code)-1][1] != vm1.Return { - c.Emit(n, vm1.Return, 0, int64(len(fun.Child[1].Child))) - } - } - scope = popScope(scope) - fnote = notes[fun] - - case parser.Ident: - ident := n.Content() - if len(n.Child) > 0 || a.Kind == parser.DeclFunc { - break - } - if s, _, ok := c.getSym(ident, scope); ok { - if s.local { - c.Emit(n, vm1.Fdup, int64(s.index)) - } else if a != nil && a.Kind == parser.OpAssign { - c.Emit(n, vm1.Push, int64(s.index)) - } else if _, ok := c.Data[s.index].(int); !ok { - c.Emit(n, vm1.Dup, int64(s.index)) - } - } - - case parser.StmtIf: - ifBodyStart := notes[n.Child[1]].ipstart - ifBodyEnd := notes[n.Child[1]].ipend - c.Code[ifBodyStart][2] = int64(ifBodyEnd - ifBodyStart) - // TODO: handle 'else' - - case parser.LiteralNumber: - // A literal number can be a float or an integer, or a big number - switch v := n.Value().(type) { - case int64: - c.Emit(n, vm1.Push, v) - case error: - err = v - default: - err = fmt.Errorf("type not supported: %T\n", v) - } - - case parser.StmtReturn: - fun := frameNode[len(frameNode)-1] - nret := 0 - if len(fun.Child) > 3 { - if ret := fun.Child[2]; ret.Kind == parser.BlockParen { - nret = len(ret.Child) - } else { - nret = 1 - } - } - c.Emit(n, vm1.Return, int64(nret), int64(len(fun.Child[1].Child))) - - case parser.BlockStmt: - nd.ipend = len(c.Code) - - case parser.LiteralString: - p := len(c.Data) - c.Data = append(c.Data, n.Block()) - c.Emit(n, vm1.Dup, int64(p)) - - case parser.OpInferior: - c.Emit(n, vm1.Lower) - - case parser.OpSubtract: - c.Emit(n, vm1.Sub) - } - - if err != nil { - return false - } - - // TODO: Fix this temporary hack to compute an entry point - if c.Entry < 0 && len(scope) == 0 && n.Kind != parser.DeclFunc { - c.Entry = len(c.Code) - 1 - if c.Entry >= 0 && len(c.Code) > c.Entry && c.Code[c.Entry][1] == vm1.Return { - c.Entry++ - } - } - return true - }) - - if s, _, ok := c.getSym("main", ""); ok { - if i, ok := c.codeIndex(s); ok { - // Internal call is always relative to instruction pointer. - c.Emit(nil, vm1.Call, int64(i-len(c.Code))) - c.Entry = len(c.Code) - 1 - } - log.Println(vm1.Disassemble(c.Code)) - } - - return -} - -func (c *Compiler) AddSym(v any, name string) int { return c.addSym(v, name, false, nil) } - -func (c *Compiler) addSym(v any, name string, local bool, n *parser.Node) int { - l := len(c.Data) - if local { - l = v.(int) - } else { - c.Data = append(c.Data, v) - } - c.symbols[name] = &symbol{index: l, local: local, node: n} - return l -} - -func (c *Compiler) Emit(n *parser.Node, op ...int64) int { - var pos int64 - if n != nil { - pos = int64(n.Pos()) - } - op = append([]int64{pos}, op...) - l := len(c.Code) - c.Code = append(c.Code, op) - return l -} - -func (c *Compiler) codeIndex(s *symbol) (i int, ok bool) { - i, ok = c.Data[s.index].(int) - return -} - -func pushScope(scope, name string) string { - return strings.TrimPrefix(scope+"/"+name+"/", "/") -} - -func popScope(scope string) string { - scope = strings.TrimSuffix(scope, "/") - j := strings.LastIndex(scope, "/") - if j == -1 { - return "" - } - return scope[:j] -} - -// getSym searches for an existing symbol starting from the deepest scope. -func (c *Compiler) getSym(name, scope string) (sym *symbol, sc string, ok bool) { - for { - if sym, ok = c.symbols[scope+name]; ok { - return sym, scope, ok - } - scope = strings.TrimSuffix(scope, "/") - i := strings.LastIndex(scope, "/") - if i == -1 { - scope = "" - break - } - if scope = scope[:i]; scope == "" { - break - } - } - sym, ok = c.symbols[name] - return sym, scope, ok -} diff --git a/codegen/compiler_test.go b/codegen/compiler_test.go deleted file mode 100644 index 5c7e9d3..0000000 --- a/codegen/compiler_test.go +++ /dev/null @@ -1,58 +0,0 @@ -package codegen - -import ( - "fmt" - "log" - "testing" - - "github.com/gnolang/parscan/lang/golang" - "github.com/gnolang/parscan/parser" - "github.com/gnolang/parscan/vm1" -) - -func TestCodeGen(t *testing.T) { - log.SetFlags(log.Lshortfile) - for _, test := range codeGenTests { - test := test - t.Run("", func(t *testing.T) { - c := NewCompiler() - c.AddSym(fmt.Println, "println") - n := &parser.Node{} - var err error - if n.Child, err = golang.GoParser.Parse(test.src, n); err != nil { - t.Error(err) - } - errStr := "" - if err = c.CodeGen(n); err != nil { - errStr = err.Error() - } - if errStr != test.err { - t.Errorf("got error %#v, want error %#v", errStr, test.err) - } - t.Log("data:", c.Data) - t.Log("code:", vm1.Disassemble(c.Code)) - if s := vm1.Disassemble(c.Code); s != test.asm { - t.Errorf("got %#v, want %#v", s, test.asm) - } - }) - } -} - -var codeGenTests = []struct { - src, asm, sym, err string -}{{ // #00 - src: "1+2", - asm: "Push 1\nPush 2\nAdd\n", -}, { // #01 - src: `println("Hello")`, - asm: "Dup 0\nDup 1\nCallX 1\nPop 2\n", -}, { // #02 - src: `a := 2; println(a)`, - asm: "Push 2\nAssign 1\nDup 0\nDup 1\nCallX 1\nPop 2\n", -}, { // #03 - src: `a := 2; if a < 3 {println(a)}; println("bye")`, - asm: "Push 2\nAssign 1\nDup 1\nPush 3\nLower\nJumpFalse 5\nDup 0\nDup 1\nCallX 1\nPop 2\nDup 0\nDup 2\nCallX 1\nPop 2\n", -}, { // #04 - src: "func add(a int, b int) int { return a + b }", - asm: "Fdup -2\nFdup -3\nAdd\nReturn 1 2\n", -}} diff --git a/codegen/expression.go b/codegen/expression.go deleted file mode 100644 index f3eb38a..0000000 --- a/codegen/expression.go +++ /dev/null @@ -1,41 +0,0 @@ -package codegen - -import ( - "fmt" - "reflect" - - "github.com/gnolang/parscan/parser" - "github.com/gnolang/parscan/vm1" -) - -func postCallExpr(x extNode) error { - switch x.Child[0].Kind { - case parser.Ident: - var numOut int - s, _, ok := x.getSym(x.Child[0].Content(), "") - if !ok { - return fmt.Errorf("invalid symbol %s", x.Child[0].Content()) - } - if i, ok := x.codeIndex(s); ok { - // Internal call is always relative to instruction pointer. - x.Emit(x.Node, vm1.Call, int64(i-len(x.Code))) - } else { - // External call, using absolute addr in symtable. - x.Emit(x.Node, vm1.CallX, int64(len(x.Child[1].Child))) - numOut = reflect.TypeOf(x.Data[s.index]).NumOut() - } - if !usedRet(x.anc) { - x.Emit(x.Node, vm1.Pop, int64(numOut)) - } - } - return nil -} - -func usedRet(n *parser.Node) bool { - switch n.Kind { - case parser.Undefined, parser.BlockStmt: - return false - default: - return true - } -} diff --git a/codegen/interpreter.go b/codegen/interpreter.go deleted file mode 100644 index 18cc5f8..0000000 --- a/codegen/interpreter.go +++ /dev/null @@ -1,53 +0,0 @@ -package codegen - -import ( - "os" - - "github.com/gnolang/parscan/parser" - "github.com/gnolang/parscan/vm1" -) - -const debug = true - -type Interpreter struct { - *parser.Parser - *Compiler - *vm1.Machine -} - -func NewInterpreter(p *parser.Parser) *Interpreter { - return &Interpreter{p, NewCompiler(), &vm1.Machine{}} -} - -func (i *Interpreter) Eval(src string) (res any, err error) { - n := &parser.Node{} - if n.Child, err = i.Parse(src, n); err != nil { - return res, err - } - if debug { - n.Dot(os.Getenv("DOT"), "") - } - codeOffset := len(i.Code) - dataOffset := 0 - if codeOffset > 0 { - // All data must be copied to the VM the first time only (re-entrance). - dataOffset = len(i.Data) - } - i.PopExit() // Remove last exit from previous run (re-entrance). - if err = i.CodeGen(n); err != nil { - return res, err - } - i.Push(i.Data[dataOffset:]...) - i.PushCode(i.Code[codeOffset:]...) - i.PushCode([]int64{0, vm1.Exit}) - i.SetIP(max(codeOffset, i.Entry)) - err = i.Run() - return i.Top(), err -} - -func max(a, b int) int { - if a > b { - return a - } - return b -} diff --git a/codegen/interpreter_test.go b/codegen/interpreter_test.go deleted file mode 100644 index 2563677..0000000 --- a/codegen/interpreter_test.go +++ /dev/null @@ -1,48 +0,0 @@ -package codegen - -import ( - "fmt" - "testing" - - "github.com/gnolang/parscan/lang/golang" -) - -func TestEval(t *testing.T) { - for _, test := range evalTests { - test := test - t.Run("", func(t *testing.T) { - interp := NewInterpreter(golang.GoParser) - errStr := "" - r, e := interp.Eval(test.src) - if e != nil { - errStr = e.Error() - } - if errStr != test.err { - t.Errorf("got error %#v, want error %#v", errStr, test.err) - } - res := fmt.Sprintf("%v", r) - if res != test.res { - t.Errorf("got %#v, want %#v", res, test.res) - } - }) - } -} - -var evalTests = []struct { - name, src, res, err string -}{{ // #00 - src: "1 + 2", - res: "3", -}, { // #01 - src: "a := 2; a = a + 3", - res: "5", -}, { // #02 - src: "func f(a int) int { return a + 1 }; f(5)", - res: "6", -}, { // #03 - src: "func f(a int) (b int) { b = a + 1; return b }; f(5)", - res: "6", -}, { // #04 - src: "func f(a int) (b int) { b = a + 1; return }; f(5)", - res: "6", -}} |
