move to a direct byte code compiler (#8)

* chore: refactor to keep only the new parser and bytecode vm * scanner: remove Token.value field * scanner: remove scanner.kind field * chore: move language specification in lang package This avoid a cyclic dependency in scanner_test which can now use the golang/GoSpec language specification for Go. * clean code * scanner: export scanner fields Also parser now generate function calls, including externals. * chore: fix lint issues * parser: handle strings * wip * parser: implement support for 'if, else, else if' statements Resolving labels in the compiler still in progress. * parser: support if statements, improve compiler * improve handling of functions * improve support of local variables * scanner: trim leading and trailing spaces * fixes to make fibonacci work * parser: improve README, fix function parameters parsing
author: Marc Vertes <marc.vertes@tendermint.com> 2023-10-12 10:51:58 +0200
committer: GitHub <noreply@github.com> 2023-10-12 10:51:58 +0200
commit: 37b9da32d3b911091deb254f6cba2a137c471287 (patch)
tree: b4451de0fa0473a937a77d39fd1f8a4f87c8f60d /vm
parent: a21b9b12ad865a19ff687645082f9093c4101039 (diff)
3 files changed, 422 insertions, 0 deletions
diff --git a/vm/README.md b/vm/README.md
new file mode 100644
index 0000000..92b1ac8
--- /dev/null
+++ b/vm/README.md
@@ -0,0 +1,38 @@
+# vm
+
+`vm` is a bytecode based stack machine.
+
+The purpose of `vm` is to provide a simple, fast, embeddable and
+portable Go execution environment.
+
+```mermaid
+graph LR
+s[ ] --> |source| a(scanner)
+--> |tokens| b(parser)
+--> |AST| c(codegen)
+--> |bytecode| d[vm]
+subgraph vm
+    d
+end
+style s height:0px;
+```
+
+The bytecode consists of a dozen of instructions, each taking 0 or 1
+immediate argument (non-immediate arguments are only passed through the
+stack). Only a few operators for a few types are implemented. I expect
+to have 1 instruction per operator per numerical type, all with the same
+pattern, which would be generated from a template. Estimate is around 20
+operators and 10 numerical types, so around 200 instructions in final.
+
+Structurally, the vm implements logical and arithmetic operators,
+condional jumps for `if`, `for` and `switch` control flow, and function
+call, return and frame management.
+
+the memory state of the vm is a slice of Go interfaces (`[]any`).
+
+The whole vm is coded in a single function of 80 lines with no
+dependencies. The size will grow as we add missing instructions, but the
+code complexity will remain the same.
+
+the vm1 package is totally standalone and could be used for any purpose
+outside of parscan and/or gno.
diff --git a/vm/vm.go b/vm/vm.go
new file mode 100644
index 0000000..4057fc1
--- /dev/null
+++ b/vm/vm.go
@@ -0,0 +1,229 @@
+package vm
+
+import (
+	"fmt"     // for tracing only
+	"log"     // for tracing only
+	"reflect" // for optional CallX only
+	"strconv" // for tracing only
+)
+
+const debug = true
+
+// Byte-code instruction set.
+const (
+	// instruction effect on stack: values consumed -- values produced
+	Nop       = iota // --
+	Add              // n1 n2 -- sum ; sum = n1+n2
+	Assign           // val -- ; mem[$1] = val
+	Fassign          // val -- ; mem[$1] = val
+	Call             // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...)
+	Calli            // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...)
+	CallX            // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = mem[f](a1, ...)
+	Dup              // addr -- value ; value = mem[addr]
+	Fdup             // addr -- value ; value = mem[addr]
+	Equal            // n1 n2 -- cond ; cond = n1 == n2
+	Exit             // -- ;
+	Jump             // -- ; ip += $1
+	JumpTrue         // cond -- ; if cond { ip += $1 }
+	JumpFalse        // cond -- ; if cond { ip += $1 }
+	Lower            // n1 n2 -- cond ; cond = n1 < n2
+	Loweri           // n1 -- cond ; cond = n1 < $1
+	Pop              // v --
+	Push             // -- v
+	Return           // [r1 .. ri] -- ; exit frame: sp = fp, fp = pop
+	Sub              // n1 n2 -- diff ; diff = n1 - n2
+	Subi             // n1 -- diff ; diff = n1 - $1
+)
+
+var strop = [...]string{ // for VM tracing.
+	Nop:       "Nop",
+	Add:       "Add",
+	Assign:    "Assign",
+	Call:      "Call",
+	Calli:     "Calli",
+	CallX:     "CallX",
+	Dup:       "Dup",
+	Equal:     "Equal",
+	Exit:      "Exit",
+	Fassign:   "Fassign",
+	Fdup:      "Fdup",
+	Jump:      "Jump",
+	JumpTrue:  "JumpTrue",
+	JumpFalse: "JumpFalse",
+	Lower:     "Lower",
+	Loweri:    "Loweri",
+	Pop:       "Pop",
+	Push:      "Push",
+	Return:    "Return",
+	Sub:       "Sub",
+	Subi:      "Subi",
+}
+
+// Machine represents a virtual machine.
+type Machine struct {
+	code   [][]int64 // code to execute
+	mem    []any     // memory, as a stack
+	ip, fp int       // instruction and frame pointer
+	ic     uint64    // instruction counter, incremented at each instruction executed
+	// flags  uint      // to set options such as restrict CallX, etc...
+}
+
+// Run runs a program.
+func (m *Machine) Run() (err error) {
+	code, mem, ip, fp, sp, ic := m.code, m.mem, m.ip, m.fp, 0, m.ic
+
+	defer func() { m.mem, m.ip, m.fp, m.ic = mem, ip, fp, ic }()
+
+	trace := func() {
+		if !debug {
+			return
+		}
+		var op2, op3 string
+		c := code[ip]
+		if l := len(c); l > 2 {
+			op2 = strconv.Itoa(int(c[2]))
+			if l > 3 {
+				op3 = strconv.Itoa(int(c[3]))
+			}
+		}
+		log.Printf("ip:%-4d sp:%-4d fp:%-4d op:[%-9s %-4s %-4s] mem:%v\n", ip, sp, fp, strop[c[1]], op2, op3, mem)
+	}
+
+	for {
+		sp = len(mem) // stack pointer
+		trace()
+		ic++
+		switch op := code[ip]; op[1] {
+		case Add:
+			mem[sp-2] = mem[sp-2].(int) + mem[sp-1].(int)
+			mem = mem[:sp-1]
+		case Assign:
+			mem[op[2]] = mem[sp-1]
+			mem = mem[:sp-1]
+		case Fassign:
+			mem[fp+int(op[2])-1] = mem[sp-1]
+			mem = mem[:sp-1]
+		case Call:
+			nip := mem[sp-1].(int)
+			mem = append(mem[:sp-1], ip+1, fp)
+			ip = nip
+			fp = sp + 1
+			continue
+		case Calli:
+			mem = append(mem, ip+1, fp)
+			fp = sp + 2
+			ip += int(op[2])
+			continue
+		case CallX: // Should be made optional.
+			l := int(op[2])
+			in := make([]reflect.Value, l)
+			for i := range in {
+				in[i] = reflect.ValueOf(mem[sp-2-i])
+			}
+			f := reflect.ValueOf(mem[sp-1])
+			mem = mem[:sp-l-1]
+			for _, v := range f.Call(in) {
+				mem = append(mem, v.Interface())
+			}
+		case Dup:
+			mem = append(mem, mem[int(op[2])])
+		case Equal:
+			mem[sp-2] = mem[sp-2].(int) == mem[sp-1].(int)
+			mem = mem[:sp-1]
+		case Exit:
+			return err
+		case Fdup:
+			mem = append(mem, mem[int(op[2])+fp-1])
+		case Jump:
+			ip += int(op[2])
+			continue
+		case JumpTrue:
+			cond := mem[sp-1].(bool)
+			mem = mem[:sp-1]
+			if cond {
+				ip += int(op[2])
+				continue
+			}
+		case JumpFalse:
+			cond := mem[sp-1].(bool)
+			mem = mem[:sp-1]
+			if !cond {
+				ip += int(op[2])
+				continue
+			}
+		case Lower:
+			mem[sp-2] = mem[sp-1].(int) < mem[sp-2].(int)
+			mem = mem[:sp-1]
+		case Loweri:
+			mem[sp-1] = mem[sp-1].(int) < int(op[2])
+		case Pop:
+			mem = mem[:sp-int(op[2])]
+		case Push:
+			mem = append(mem, int(op[2]))
+		case Return:
+			ip = mem[fp-2].(int)
+			ofp := fp
+			fp = mem[fp-1].(int)
+			mem = append(mem[:ofp-int(op[2])-int(op[3])-1], mem[sp-int(op[2]):]...)
+			continue
+		case Sub:
+			mem[sp-2] = mem[sp-1].(int) - mem[sp-2].(int)
+			mem = mem[:sp-1]
+		case Subi:
+			mem[sp-1] = mem[sp-1].(int) - int(op[2])
+		}
+		ip++
+	}
+}
+
+func (m *Machine) PushCode(code ...[]int64) (p int) {
+	p = len(m.code)
+	m.code = append(m.code, code...)
+	return p
+}
+
+func (m *Machine) SetIP(ip int)          { m.ip = ip }
+func (m *Machine) Push(v ...any) (l int) { l = len(m.mem); m.mem = append(m.mem, v...); return l }
+func (m *Machine) Pop() (v any)          { l := len(m.mem) - 1; v = m.mem[l]; m.mem = m.mem[:l]; return v }
+func (m *Machine) Top() (v any) {
+	if l := len(m.mem); l > 0 {
+		v = m.mem[l-1]
+	}
+	return v
+}
+
+func (m *Machine) PopExit() {
+	if l := len(m.code); l > 0 && m.code[l-1][1] == Exit {
+		m.code = m.code[:l-1]
+	}
+}
+
+func CodeString(op []int64) string {
+	switch len(op) {
+	case 2:
+		return strop[op[1]]
+	case 3:
+		return fmt.Sprintf("%s %d", strop[op[1]], op[2])
+	case 4:
+		return fmt.Sprintf("%s %d %d", strop[op[1]], op[2], op[3])
+	}
+	return ""
+}
+
+// Disassemble returns the code as a readable string.
+func Disassemble(code [][]int64) (asm string) {
+	for _, op := range code {
+		asm += CodeString(op) + "\n"
+		/*
+			switch len(op) {
+			case 2:
+				asm += strop[op[1]] + "\n"
+			case 3:
+				asm += fmt.Sprintf("%s %d\n", strop[op[1]], op[2])
+			case 4:
+				asm += fmt.Sprintf("%s %d %d\n", strop[op[1]], op[2], op[3])
+			}
+		*/
+	}
+	return asm
+}
diff --git a/vm/vm_test.go b/vm/vm_test.go
new file mode 100644
index 0000000..08e8054
--- /dev/null
+++ b/vm/vm_test.go
@@ -0,0 +1,155 @@
+package vm
+
+import (
+	"fmt"
+	"log"
+	"testing"
+)
+
+func init() {
+	log.SetFlags(log.Lshortfile)
+}
+
+func TestVM(t *testing.T) {
+	for _, test := range tests {
+		test := test
+		t.Run("", func(t *testing.T) {
+			m := &Machine{}
+			for _, v := range test.sym {
+				m.Push(v)
+			}
+			m.PushCode(test.code...)
+			if err := m.Run(); err != nil {
+				t.Errorf("run error: %v", err)
+			}
+			t.Log(m.mem)
+			r := fmt.Sprintf("%v", m.mem[test.start:test.end])
+			if r != test.mem {
+				t.Errorf("got %v, want %v", r, test.mem)
+			}
+		})
+	}
+}
+
+func BenchmarkVM(b *testing.B) {
+	for _, test := range tests {
+		test := test
+		b.Run("", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				b.StopTimer()
+				m := &Machine{}
+				m.PushCode(test.code...)
+				b.StartTimer()
+
+				if err := m.Run(); err != nil {
+					b.Errorf("run error: %v", err)
+				}
+			}
+		})
+	}
+}
+
+var tests = []struct {
+	sym        []any     // initial memory values
+	code       [][]int64 // bytecode to execute
+	start, end int       //
+	mem        string    // expected memory content
+}{{ // #00 -- A simple addition.
+	code: [][]int64{
+		{0, Push, 1},
+		{0, Push, 2},
+		{0, Add},
+		{0, Exit},
+	},
+	start: 0, end: 1, mem: "[3]",
+}, { // #01 -- Calling a function defined outside the VM.
+	sym: []any{fmt.Println, "Hello"},
+	code: [][]int64{
+		{0, Dup, 0},
+		{0, CallX, 1},
+		{0, Exit},
+	},
+	start: 1, end: 3, mem: "[6 <nil>]",
+}, { // #02 -- Defining and calling a function in VM.
+	code: [][]int64{
+		{0, Jump, 3},      // 0
+		{0, Push, 3},      // 1
+		{0, Return, 1, 1}, // 2
+		{0, Push, 1},      // 3
+		{0, Calli, -3},    // 4
+		{0, Exit},         // 5
+	},
+	start: 0, end: 1, mem: "[3]",
+}, { // #03 -- Defining and calling a function in VM.
+	code: [][]int64{
+		{0, Jump, 3},      // 0
+		{0, Push, 3},      // 1
+		{0, Return, 1, 1}, // 2
+		{0, Push, 1},      // 3
+		{0, Push, 1},      // 4
+		{0, Call},         // 5
+		{0, Exit},         // 6
+	},
+	start: 0, end: 1, mem: "[3]",
+}, { // #04 -- Defining and calling a function in VM.
+	code: [][]int64{
+		{0, Jump, 5},      // 0
+		{0, Push, 3},      // 1
+		{0, Fassign, -2},  // 2
+		{0, Fdup, -2},     // 3
+		{0, Return, 1, 1}, // 4
+		{0, Push, 1},      // 5
+		{0, Push, 1},      // 6
+		{0, Call},         // 7
+		{0, Exit},         // 8
+	},
+	start: 0, end: 1, mem: "[3]",
+}, { // #05 -- Fibonacci numbers, hand written. Showcase recursivity.
+	code: [][]int64{
+		{0, Jump, 19},     // 0
+		{0, Push, 2},      // 2  [2]
+		{0, Fdup, -2},     // 1  [2 i]
+		{0, Lower},        // 3  [true/false]
+		{0, JumpTrue, 13}, // 4  [], goto 17
+		{0, Push, 2},      // 6  [i 2]
+		{0, Fdup, -2},     // 5  [i]
+		{0, Sub},          // 7  [(i-2)]
+		{0, Push, 1},      // 8
+		{0, Call},         // 9  [fib(i-2)]
+		{0, Push, 1},      // 11 [(i-2) i 1]
+		{0, Fdup, -2},     // 10 [fib(i-2) i]
+		{0, Sub},          // 12 [(i-2) (i-1)]
+		{0, Push, 1},      // 13
+		{0, Call},         // 14 [fib(i-2) fib(i-1)]
+		{0, Add},          // 15 [fib(i-2)+fib(i-1)]
+		{0, Return, 1, 1}, // 16 return i
+		{0, Fdup, -2},     // 17 [i]
+		{0, Return, 1, 1}, // 18 return i
+		{0, Push, 6},      // 19 [1]
+		{0, Push, 1},      // 20
+		{0, Call},         // 21 [fib(*1)]
+		{0, Exit},         // 22
+	},
+	start: 0, end: 1, mem: "[8]",
+}, { // #06 -- Fibonacci with some immediate instructions.
+	code: [][]int64{
+		{0, Jump, 14},     // 0
+		{0, Fdup, -2},     // 1  [i]
+		{0, Loweri, 2},    // 2  [true/false]
+		{0, JumpTrue, 9},  // 3  [], goto 12
+		{0, Fdup, -2},     // 4  [i]
+		{0, Subi, 2},      // 5  [(i-2)]
+		{0, Calli, -5},    // 6  [fib(i-2)]
+		{0, Fdup, -2},     // 7  [fib(i-2) i]
+		{0, Subi, 1},      // 8  [(i-2) (i-1)]
+		{0, Calli, -8},    // 9  [fib(i-2) fib(i-1)], call 1
+		{0, Add},          // 10 [fib(i-2)+fib(i-1)]
+		{0, Return, 1, 1}, // 11 return i
+		{0, Fdup, -2},     // 12 [i]
+		{0, Return, 1, 1}, // 13 return i
+		{0, Push, 6},      // 14 [1]
+		{0, Calli, -14},   // 15 [fib(*1)], call 1
+		{0, Exit},         // 16
+	},
+	start: 0, end: 1, mem: "[8]",
+}}
author	Marc Vertes <marc.vertes@tendermint.com>	2023-10-12 10:51:58 +0200
committer	GitHub <noreply@github.com>	2023-10-12 10:51:58 +0200
commit	37b9da32d3b911091deb254f6cba2a137c471287 (patch)
tree	b4451de0fa0473a937a77d39fd1f8a4f87c8f60d /vm
parent	a21b9b12ad865a19ff687645082f9093c4101039 (diff)