summaryrefslogtreecommitdiff
path: root/codegen
diff options
context:
space:
mode:
authorMarc Vertes <marc.vertes@tendermint.com>2023-10-12 10:51:58 +0200
committerGitHub <noreply@github.com>2023-10-12 10:51:58 +0200
commit37b9da32d3b911091deb254f6cba2a137c471287 (patch)
treeb4451de0fa0473a937a77d39fd1f8a4f87c8f60d /codegen
parenta21b9b12ad865a19ff687645082f9093c4101039 (diff)
move to a direct byte code compiler (#8)
* chore: refactor to keep only the new parser and bytecode vm * scanner: remove Token.value field * scanner: remove scanner.kind field * chore: move language specification in lang package This avoid a cyclic dependency in scanner_test which can now use the golang/GoSpec language specification for Go. * clean code * scanner: export scanner fields Also parser now generate function calls, including externals. * chore: fix lint issues * parser: handle strings * wip * parser: implement support for 'if, else, else if' statements Resolving labels in the compiler still in progress. * parser: support if statements, improve compiler * improve handling of functions * improve support of local variables * scanner: trim leading and trailing spaces * fixes to make fibonacci work * parser: improve README, fix function parameters parsing
Diffstat (limited to 'codegen')
-rw-r--r--codegen/compiler.go242
-rw-r--r--codegen/compiler_test.go58
-rw-r--r--codegen/expression.go41
-rw-r--r--codegen/interpreter.go53
-rw-r--r--codegen/interpreter_test.go48
5 files changed, 0 insertions, 442 deletions
diff --git a/codegen/compiler.go b/codegen/compiler.go
deleted file mode 100644
index 02b67cb..0000000
--- a/codegen/compiler.go
+++ /dev/null
@@ -1,242 +0,0 @@
-package codegen
-
-import (
- "fmt"
- "log"
- "strings"
-
- "github.com/gnolang/parscan/parser"
- "github.com/gnolang/parscan/vm1"
-)
-
-type symbol struct {
- index int // address of symbol in frame
- local bool // if true address is relative to local frame, otherwise global
- node *parser.Node // symbol definition in AST
-}
-
-type Compiler struct {
- Code [][]int64 // produced code, to fill VM with
- Data []any // produced data, will be at the bottom of VM stack
- Entry int // offset in Code to start execution from (skip function defintions)
-
- symbols map[string]*symbol
-}
-
-func NewCompiler() *Compiler { return &Compiler{symbols: map[string]*symbol{}, Entry: -1} }
-
-type nodedata struct {
- ipstart, ipend, fsp int // CFG and symbol node annotations
-}
-
-type extNode struct {
- *Compiler
- *parser.Node
- anc *parser.Node // node ancestor
- rank int // node rank in ancestor's children
-}
-
-func (c *Compiler) CodeGen(node *parser.Node) (err error) {
- notes := map[*parser.Node]*nodedata{} // AST node annotations for CFG, symbols, ...
- scope := ""
- frameNode := []*parser.Node{node}
- fnote := notes[node]
-
- node.Walk2(nil, 0, func(n, a *parser.Node, k int) (ok bool) {
- // Node pre-order processing callback.
- notes[n] = &nodedata{}
- nd := notes[n]
-
- switch n.Kind {
- case parser.DeclFunc:
- fname := n.Child[0].Content()
- c.addSym(len(c.Code), scope+fname, false, n)
- scope = pushScope(scope, fname)
- frameNode = append(frameNode, n)
- fnote = notes[n]
- for j, child := range n.Child[1].Child {
- vname := child.Content()
- c.addSym(-j-2, scope+vname, true, child)
- fnote.fsp++
- }
-
- case parser.BlockStmt:
- nd.ipstart = len(c.Code)
- if a != nil && a.Kind == parser.StmtIf && k == 1 {
- c.Emit(n, vm1.JumpFalse, 0) // location to be updated in post IfStmt
- }
- }
- return true
- }, func(n, a *parser.Node, k int) (ok bool) {
- // Node post-order processing callback.
- nd := notes[n]
- x := extNode{c, n, a, k}
-
- switch n.Kind {
- case parser.OpAdd:
- c.Emit(n, vm1.Add)
-
- case parser.ExprCall:
- err = postCallExpr(x)
-
- case parser.OpDefine:
- // Define operation, global vars only. TODO: on local frame too
- l := c.addSym(nil, n.Child[0].Content(), false, n)
- c.Emit(n, vm1.Assign, int64(l))
-
- case parser.DeclFunc:
- fun := frameNode[len(frameNode)-1]
- if len(fun.Child) == 3 { // no return values
- if c.Code[len(c.Code)-1][1] != vm1.Return {
- c.Emit(n, vm1.Return, 0, int64(len(fun.Child[1].Child)))
- }
- }
- scope = popScope(scope)
- fnote = notes[fun]
-
- case parser.Ident:
- ident := n.Content()
- if len(n.Child) > 0 || a.Kind == parser.DeclFunc {
- break
- }
- if s, _, ok := c.getSym(ident, scope); ok {
- if s.local {
- c.Emit(n, vm1.Fdup, int64(s.index))
- } else if a != nil && a.Kind == parser.OpAssign {
- c.Emit(n, vm1.Push, int64(s.index))
- } else if _, ok := c.Data[s.index].(int); !ok {
- c.Emit(n, vm1.Dup, int64(s.index))
- }
- }
-
- case parser.StmtIf:
- ifBodyStart := notes[n.Child[1]].ipstart
- ifBodyEnd := notes[n.Child[1]].ipend
- c.Code[ifBodyStart][2] = int64(ifBodyEnd - ifBodyStart)
- // TODO: handle 'else'
-
- case parser.LiteralNumber:
- // A literal number can be a float or an integer, or a big number
- switch v := n.Value().(type) {
- case int64:
- c.Emit(n, vm1.Push, v)
- case error:
- err = v
- default:
- err = fmt.Errorf("type not supported: %T\n", v)
- }
-
- case parser.StmtReturn:
- fun := frameNode[len(frameNode)-1]
- nret := 0
- if len(fun.Child) > 3 {
- if ret := fun.Child[2]; ret.Kind == parser.BlockParen {
- nret = len(ret.Child)
- } else {
- nret = 1
- }
- }
- c.Emit(n, vm1.Return, int64(nret), int64(len(fun.Child[1].Child)))
-
- case parser.BlockStmt:
- nd.ipend = len(c.Code)
-
- case parser.LiteralString:
- p := len(c.Data)
- c.Data = append(c.Data, n.Block())
- c.Emit(n, vm1.Dup, int64(p))
-
- case parser.OpInferior:
- c.Emit(n, vm1.Lower)
-
- case parser.OpSubtract:
- c.Emit(n, vm1.Sub)
- }
-
- if err != nil {
- return false
- }
-
- // TODO: Fix this temporary hack to compute an entry point
- if c.Entry < 0 && len(scope) == 0 && n.Kind != parser.DeclFunc {
- c.Entry = len(c.Code) - 1
- if c.Entry >= 0 && len(c.Code) > c.Entry && c.Code[c.Entry][1] == vm1.Return {
- c.Entry++
- }
- }
- return true
- })
-
- if s, _, ok := c.getSym("main", ""); ok {
- if i, ok := c.codeIndex(s); ok {
- // Internal call is always relative to instruction pointer.
- c.Emit(nil, vm1.Call, int64(i-len(c.Code)))
- c.Entry = len(c.Code) - 1
- }
- log.Println(vm1.Disassemble(c.Code))
- }
-
- return
-}
-
-func (c *Compiler) AddSym(v any, name string) int { return c.addSym(v, name, false, nil) }
-
-func (c *Compiler) addSym(v any, name string, local bool, n *parser.Node) int {
- l := len(c.Data)
- if local {
- l = v.(int)
- } else {
- c.Data = append(c.Data, v)
- }
- c.symbols[name] = &symbol{index: l, local: local, node: n}
- return l
-}
-
-func (c *Compiler) Emit(n *parser.Node, op ...int64) int {
- var pos int64
- if n != nil {
- pos = int64(n.Pos())
- }
- op = append([]int64{pos}, op...)
- l := len(c.Code)
- c.Code = append(c.Code, op)
- return l
-}
-
-func (c *Compiler) codeIndex(s *symbol) (i int, ok bool) {
- i, ok = c.Data[s.index].(int)
- return
-}
-
-func pushScope(scope, name string) string {
- return strings.TrimPrefix(scope+"/"+name+"/", "/")
-}
-
-func popScope(scope string) string {
- scope = strings.TrimSuffix(scope, "/")
- j := strings.LastIndex(scope, "/")
- if j == -1 {
- return ""
- }
- return scope[:j]
-}
-
-// getSym searches for an existing symbol starting from the deepest scope.
-func (c *Compiler) getSym(name, scope string) (sym *symbol, sc string, ok bool) {
- for {
- if sym, ok = c.symbols[scope+name]; ok {
- return sym, scope, ok
- }
- scope = strings.TrimSuffix(scope, "/")
- i := strings.LastIndex(scope, "/")
- if i == -1 {
- scope = ""
- break
- }
- if scope = scope[:i]; scope == "" {
- break
- }
- }
- sym, ok = c.symbols[name]
- return sym, scope, ok
-}
diff --git a/codegen/compiler_test.go b/codegen/compiler_test.go
deleted file mode 100644
index 5c7e9d3..0000000
--- a/codegen/compiler_test.go
+++ /dev/null
@@ -1,58 +0,0 @@
-package codegen
-
-import (
- "fmt"
- "log"
- "testing"
-
- "github.com/gnolang/parscan/lang/golang"
- "github.com/gnolang/parscan/parser"
- "github.com/gnolang/parscan/vm1"
-)
-
-func TestCodeGen(t *testing.T) {
- log.SetFlags(log.Lshortfile)
- for _, test := range codeGenTests {
- test := test
- t.Run("", func(t *testing.T) {
- c := NewCompiler()
- c.AddSym(fmt.Println, "println")
- n := &parser.Node{}
- var err error
- if n.Child, err = golang.GoParser.Parse(test.src, n); err != nil {
- t.Error(err)
- }
- errStr := ""
- if err = c.CodeGen(n); err != nil {
- errStr = err.Error()
- }
- if errStr != test.err {
- t.Errorf("got error %#v, want error %#v", errStr, test.err)
- }
- t.Log("data:", c.Data)
- t.Log("code:", vm1.Disassemble(c.Code))
- if s := vm1.Disassemble(c.Code); s != test.asm {
- t.Errorf("got %#v, want %#v", s, test.asm)
- }
- })
- }
-}
-
-var codeGenTests = []struct {
- src, asm, sym, err string
-}{{ // #00
- src: "1+2",
- asm: "Push 1\nPush 2\nAdd\n",
-}, { // #01
- src: `println("Hello")`,
- asm: "Dup 0\nDup 1\nCallX 1\nPop 2\n",
-}, { // #02
- src: `a := 2; println(a)`,
- asm: "Push 2\nAssign 1\nDup 0\nDup 1\nCallX 1\nPop 2\n",
-}, { // #03
- src: `a := 2; if a < 3 {println(a)}; println("bye")`,
- asm: "Push 2\nAssign 1\nDup 1\nPush 3\nLower\nJumpFalse 5\nDup 0\nDup 1\nCallX 1\nPop 2\nDup 0\nDup 2\nCallX 1\nPop 2\n",
-}, { // #04
- src: "func add(a int, b int) int { return a + b }",
- asm: "Fdup -2\nFdup -3\nAdd\nReturn 1 2\n",
-}}
diff --git a/codegen/expression.go b/codegen/expression.go
deleted file mode 100644
index f3eb38a..0000000
--- a/codegen/expression.go
+++ /dev/null
@@ -1,41 +0,0 @@
-package codegen
-
-import (
- "fmt"
- "reflect"
-
- "github.com/gnolang/parscan/parser"
- "github.com/gnolang/parscan/vm1"
-)
-
-func postCallExpr(x extNode) error {
- switch x.Child[0].Kind {
- case parser.Ident:
- var numOut int
- s, _, ok := x.getSym(x.Child[0].Content(), "")
- if !ok {
- return fmt.Errorf("invalid symbol %s", x.Child[0].Content())
- }
- if i, ok := x.codeIndex(s); ok {
- // Internal call is always relative to instruction pointer.
- x.Emit(x.Node, vm1.Call, int64(i-len(x.Code)))
- } else {
- // External call, using absolute addr in symtable.
- x.Emit(x.Node, vm1.CallX, int64(len(x.Child[1].Child)))
- numOut = reflect.TypeOf(x.Data[s.index]).NumOut()
- }
- if !usedRet(x.anc) {
- x.Emit(x.Node, vm1.Pop, int64(numOut))
- }
- }
- return nil
-}
-
-func usedRet(n *parser.Node) bool {
- switch n.Kind {
- case parser.Undefined, parser.BlockStmt:
- return false
- default:
- return true
- }
-}
diff --git a/codegen/interpreter.go b/codegen/interpreter.go
deleted file mode 100644
index 18cc5f8..0000000
--- a/codegen/interpreter.go
+++ /dev/null
@@ -1,53 +0,0 @@
-package codegen
-
-import (
- "os"
-
- "github.com/gnolang/parscan/parser"
- "github.com/gnolang/parscan/vm1"
-)
-
-const debug = true
-
-type Interpreter struct {
- *parser.Parser
- *Compiler
- *vm1.Machine
-}
-
-func NewInterpreter(p *parser.Parser) *Interpreter {
- return &Interpreter{p, NewCompiler(), &vm1.Machine{}}
-}
-
-func (i *Interpreter) Eval(src string) (res any, err error) {
- n := &parser.Node{}
- if n.Child, err = i.Parse(src, n); err != nil {
- return res, err
- }
- if debug {
- n.Dot(os.Getenv("DOT"), "")
- }
- codeOffset := len(i.Code)
- dataOffset := 0
- if codeOffset > 0 {
- // All data must be copied to the VM the first time only (re-entrance).
- dataOffset = len(i.Data)
- }
- i.PopExit() // Remove last exit from previous run (re-entrance).
- if err = i.CodeGen(n); err != nil {
- return res, err
- }
- i.Push(i.Data[dataOffset:]...)
- i.PushCode(i.Code[codeOffset:]...)
- i.PushCode([]int64{0, vm1.Exit})
- i.SetIP(max(codeOffset, i.Entry))
- err = i.Run()
- return i.Top(), err
-}
-
-func max(a, b int) int {
- if a > b {
- return a
- }
- return b
-}
diff --git a/codegen/interpreter_test.go b/codegen/interpreter_test.go
deleted file mode 100644
index 2563677..0000000
--- a/codegen/interpreter_test.go
+++ /dev/null
@@ -1,48 +0,0 @@
-package codegen
-
-import (
- "fmt"
- "testing"
-
- "github.com/gnolang/parscan/lang/golang"
-)
-
-func TestEval(t *testing.T) {
- for _, test := range evalTests {
- test := test
- t.Run("", func(t *testing.T) {
- interp := NewInterpreter(golang.GoParser)
- errStr := ""
- r, e := interp.Eval(test.src)
- if e != nil {
- errStr = e.Error()
- }
- if errStr != test.err {
- t.Errorf("got error %#v, want error %#v", errStr, test.err)
- }
- res := fmt.Sprintf("%v", r)
- if res != test.res {
- t.Errorf("got %#v, want %#v", res, test.res)
- }
- })
- }
-}
-
-var evalTests = []struct {
- name, src, res, err string
-}{{ // #00
- src: "1 + 2",
- res: "3",
-}, { // #01
- src: "a := 2; a = a + 3",
- res: "5",
-}, { // #02
- src: "func f(a int) int { return a + 1 }; f(5)",
- res: "6",
-}, { // #03
- src: "func f(a int) (b int) { b = a + 1; return b }; f(5)",
- res: "6",
-}, { // #04
- src: "func f(a int) (b int) { b = a + 1; return }; f(5)",
- res: "6",
-}}