summaryrefslogtreecommitdiff
path: root/vm
diff options
context:
space:
mode:
authorMarc Vertes <marc.vertes@tendermint.com>2023-10-12 10:51:58 +0200
committerGitHub <noreply@github.com>2023-10-12 10:51:58 +0200
commit37b9da32d3b911091deb254f6cba2a137c471287 (patch)
treeb4451de0fa0473a937a77d39fd1f8a4f87c8f60d /vm
parenta21b9b12ad865a19ff687645082f9093c4101039 (diff)
move to a direct byte code compiler (#8)
* chore: refactor to keep only the new parser and bytecode vm * scanner: remove Token.value field * scanner: remove scanner.kind field * chore: move language specification in lang package This avoid a cyclic dependency in scanner_test which can now use the golang/GoSpec language specification for Go. * clean code * scanner: export scanner fields Also parser now generate function calls, including externals. * chore: fix lint issues * parser: handle strings * wip * parser: implement support for 'if, else, else if' statements Resolving labels in the compiler still in progress. * parser: support if statements, improve compiler * improve handling of functions * improve support of local variables * scanner: trim leading and trailing spaces * fixes to make fibonacci work * parser: improve README, fix function parameters parsing
Diffstat (limited to 'vm')
-rw-r--r--vm/README.md38
-rw-r--r--vm/vm.go229
-rw-r--r--vm/vm_test.go155
3 files changed, 422 insertions, 0 deletions
diff --git a/vm/README.md b/vm/README.md
new file mode 100644
index 0000000..92b1ac8
--- /dev/null
+++ b/vm/README.md
@@ -0,0 +1,38 @@
+# vm
+
+`vm` is a bytecode based stack machine.
+
+The purpose of `vm` is to provide a simple, fast, embeddable and
+portable Go execution environment.
+
+```mermaid
+graph LR
+s[ ] --> |source| a(scanner)
+--> |tokens| b(parser)
+--> |AST| c(codegen)
+--> |bytecode| d[vm]
+subgraph vm
+ d
+end
+style s height:0px;
+```
+
+The bytecode consists of a dozen of instructions, each taking 0 or 1
+immediate argument (non-immediate arguments are only passed through the
+stack). Only a few operators for a few types are implemented. I expect
+to have 1 instruction per operator per numerical type, all with the same
+pattern, which would be generated from a template. Estimate is around 20
+operators and 10 numerical types, so around 200 instructions in final.
+
+Structurally, the vm implements logical and arithmetic operators,
+condional jumps for `if`, `for` and `switch` control flow, and function
+call, return and frame management.
+
+the memory state of the vm is a slice of Go interfaces (`[]any`).
+
+The whole vm is coded in a single function of 80 lines with no
+dependencies. The size will grow as we add missing instructions, but the
+code complexity will remain the same.
+
+the vm1 package is totally standalone and could be used for any purpose
+outside of parscan and/or gno.
diff --git a/vm/vm.go b/vm/vm.go
new file mode 100644
index 0000000..4057fc1
--- /dev/null
+++ b/vm/vm.go
@@ -0,0 +1,229 @@
+package vm
+
+import (
+ "fmt" // for tracing only
+ "log" // for tracing only
+ "reflect" // for optional CallX only
+ "strconv" // for tracing only
+)
+
+const debug = true
+
+// Byte-code instruction set.
+const (
+ // instruction effect on stack: values consumed -- values produced
+ Nop = iota // --
+ Add // n1 n2 -- sum ; sum = n1+n2
+ Assign // val -- ; mem[$1] = val
+ Fassign // val -- ; mem[$1] = val
+ Call // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...)
+ Calli // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...)
+ CallX // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = mem[f](a1, ...)
+ Dup // addr -- value ; value = mem[addr]
+ Fdup // addr -- value ; value = mem[addr]
+ Equal // n1 n2 -- cond ; cond = n1 == n2
+ Exit // -- ;
+ Jump // -- ; ip += $1
+ JumpTrue // cond -- ; if cond { ip += $1 }
+ JumpFalse // cond -- ; if cond { ip += $1 }
+ Lower // n1 n2 -- cond ; cond = n1 < n2
+ Loweri // n1 -- cond ; cond = n1 < $1
+ Pop // v --
+ Push // -- v
+ Return // [r1 .. ri] -- ; exit frame: sp = fp, fp = pop
+ Sub // n1 n2 -- diff ; diff = n1 - n2
+ Subi // n1 -- diff ; diff = n1 - $1
+)
+
+var strop = [...]string{ // for VM tracing.
+ Nop: "Nop",
+ Add: "Add",
+ Assign: "Assign",
+ Call: "Call",
+ Calli: "Calli",
+ CallX: "CallX",
+ Dup: "Dup",
+ Equal: "Equal",
+ Exit: "Exit",
+ Fassign: "Fassign",
+ Fdup: "Fdup",
+ Jump: "Jump",
+ JumpTrue: "JumpTrue",
+ JumpFalse: "JumpFalse",
+ Lower: "Lower",
+ Loweri: "Loweri",
+ Pop: "Pop",
+ Push: "Push",
+ Return: "Return",
+ Sub: "Sub",
+ Subi: "Subi",
+}
+
+// Machine represents a virtual machine.
+type Machine struct {
+ code [][]int64 // code to execute
+ mem []any // memory, as a stack
+ ip, fp int // instruction and frame pointer
+ ic uint64 // instruction counter, incremented at each instruction executed
+ // flags uint // to set options such as restrict CallX, etc...
+}
+
+// Run runs a program.
+func (m *Machine) Run() (err error) {
+ code, mem, ip, fp, sp, ic := m.code, m.mem, m.ip, m.fp, 0, m.ic
+
+ defer func() { m.mem, m.ip, m.fp, m.ic = mem, ip, fp, ic }()
+
+ trace := func() {
+ if !debug {
+ return
+ }
+ var op2, op3 string
+ c := code[ip]
+ if l := len(c); l > 2 {
+ op2 = strconv.Itoa(int(c[2]))
+ if l > 3 {
+ op3 = strconv.Itoa(int(c[3]))
+ }
+ }
+ log.Printf("ip:%-4d sp:%-4d fp:%-4d op:[%-9s %-4s %-4s] mem:%v\n", ip, sp, fp, strop[c[1]], op2, op3, mem)
+ }
+
+ for {
+ sp = len(mem) // stack pointer
+ trace()
+ ic++
+ switch op := code[ip]; op[1] {
+ case Add:
+ mem[sp-2] = mem[sp-2].(int) + mem[sp-1].(int)
+ mem = mem[:sp-1]
+ case Assign:
+ mem[op[2]] = mem[sp-1]
+ mem = mem[:sp-1]
+ case Fassign:
+ mem[fp+int(op[2])-1] = mem[sp-1]
+ mem = mem[:sp-1]
+ case Call:
+ nip := mem[sp-1].(int)
+ mem = append(mem[:sp-1], ip+1, fp)
+ ip = nip
+ fp = sp + 1
+ continue
+ case Calli:
+ mem = append(mem, ip+1, fp)
+ fp = sp + 2
+ ip += int(op[2])
+ continue
+ case CallX: // Should be made optional.
+ l := int(op[2])
+ in := make([]reflect.Value, l)
+ for i := range in {
+ in[i] = reflect.ValueOf(mem[sp-2-i])
+ }
+ f := reflect.ValueOf(mem[sp-1])
+ mem = mem[:sp-l-1]
+ for _, v := range f.Call(in) {
+ mem = append(mem, v.Interface())
+ }
+ case Dup:
+ mem = append(mem, mem[int(op[2])])
+ case Equal:
+ mem[sp-2] = mem[sp-2].(int) == mem[sp-1].(int)
+ mem = mem[:sp-1]
+ case Exit:
+ return err
+ case Fdup:
+ mem = append(mem, mem[int(op[2])+fp-1])
+ case Jump:
+ ip += int(op[2])
+ continue
+ case JumpTrue:
+ cond := mem[sp-1].(bool)
+ mem = mem[:sp-1]
+ if cond {
+ ip += int(op[2])
+ continue
+ }
+ case JumpFalse:
+ cond := mem[sp-1].(bool)
+ mem = mem[:sp-1]
+ if !cond {
+ ip += int(op[2])
+ continue
+ }
+ case Lower:
+ mem[sp-2] = mem[sp-1].(int) < mem[sp-2].(int)
+ mem = mem[:sp-1]
+ case Loweri:
+ mem[sp-1] = mem[sp-1].(int) < int(op[2])
+ case Pop:
+ mem = mem[:sp-int(op[2])]
+ case Push:
+ mem = append(mem, int(op[2]))
+ case Return:
+ ip = mem[fp-2].(int)
+ ofp := fp
+ fp = mem[fp-1].(int)
+ mem = append(mem[:ofp-int(op[2])-int(op[3])-1], mem[sp-int(op[2]):]...)
+ continue
+ case Sub:
+ mem[sp-2] = mem[sp-1].(int) - mem[sp-2].(int)
+ mem = mem[:sp-1]
+ case Subi:
+ mem[sp-1] = mem[sp-1].(int) - int(op[2])
+ }
+ ip++
+ }
+}
+
+func (m *Machine) PushCode(code ...[]int64) (p int) {
+ p = len(m.code)
+ m.code = append(m.code, code...)
+ return p
+}
+
+func (m *Machine) SetIP(ip int) { m.ip = ip }
+func (m *Machine) Push(v ...any) (l int) { l = len(m.mem); m.mem = append(m.mem, v...); return l }
+func (m *Machine) Pop() (v any) { l := len(m.mem) - 1; v = m.mem[l]; m.mem = m.mem[:l]; return v }
+func (m *Machine) Top() (v any) {
+ if l := len(m.mem); l > 0 {
+ v = m.mem[l-1]
+ }
+ return v
+}
+
+func (m *Machine) PopExit() {
+ if l := len(m.code); l > 0 && m.code[l-1][1] == Exit {
+ m.code = m.code[:l-1]
+ }
+}
+
+func CodeString(op []int64) string {
+ switch len(op) {
+ case 2:
+ return strop[op[1]]
+ case 3:
+ return fmt.Sprintf("%s %d", strop[op[1]], op[2])
+ case 4:
+ return fmt.Sprintf("%s %d %d", strop[op[1]], op[2], op[3])
+ }
+ return ""
+}
+
+// Disassemble returns the code as a readable string.
+func Disassemble(code [][]int64) (asm string) {
+ for _, op := range code {
+ asm += CodeString(op) + "\n"
+ /*
+ switch len(op) {
+ case 2:
+ asm += strop[op[1]] + "\n"
+ case 3:
+ asm += fmt.Sprintf("%s %d\n", strop[op[1]], op[2])
+ case 4:
+ asm += fmt.Sprintf("%s %d %d\n", strop[op[1]], op[2], op[3])
+ }
+ */
+ }
+ return asm
+}
diff --git a/vm/vm_test.go b/vm/vm_test.go
new file mode 100644
index 0000000..08e8054
--- /dev/null
+++ b/vm/vm_test.go
@@ -0,0 +1,155 @@
+package vm
+
+import (
+ "fmt"
+ "log"
+ "testing"
+)
+
+func init() {
+ log.SetFlags(log.Lshortfile)
+}
+
+func TestVM(t *testing.T) {
+ for _, test := range tests {
+ test := test
+ t.Run("", func(t *testing.T) {
+ m := &Machine{}
+ for _, v := range test.sym {
+ m.Push(v)
+ }
+ m.PushCode(test.code...)
+ if err := m.Run(); err != nil {
+ t.Errorf("run error: %v", err)
+ }
+ t.Log(m.mem)
+ r := fmt.Sprintf("%v", m.mem[test.start:test.end])
+ if r != test.mem {
+ t.Errorf("got %v, want %v", r, test.mem)
+ }
+ })
+ }
+}
+
+func BenchmarkVM(b *testing.B) {
+ for _, test := range tests {
+ test := test
+ b.Run("", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ b.StopTimer()
+ m := &Machine{}
+ m.PushCode(test.code...)
+ b.StartTimer()
+
+ if err := m.Run(); err != nil {
+ b.Errorf("run error: %v", err)
+ }
+ }
+ })
+ }
+}
+
+var tests = []struct {
+ sym []any // initial memory values
+ code [][]int64 // bytecode to execute
+ start, end int //
+ mem string // expected memory content
+}{{ // #00 -- A simple addition.
+ code: [][]int64{
+ {0, Push, 1},
+ {0, Push, 2},
+ {0, Add},
+ {0, Exit},
+ },
+ start: 0, end: 1, mem: "[3]",
+}, { // #01 -- Calling a function defined outside the VM.
+ sym: []any{fmt.Println, "Hello"},
+ code: [][]int64{
+ {0, Dup, 0},
+ {0, CallX, 1},
+ {0, Exit},
+ },
+ start: 1, end: 3, mem: "[6 <nil>]",
+}, { // #02 -- Defining and calling a function in VM.
+ code: [][]int64{
+ {0, Jump, 3}, // 0
+ {0, Push, 3}, // 1
+ {0, Return, 1, 1}, // 2
+ {0, Push, 1}, // 3
+ {0, Calli, -3}, // 4
+ {0, Exit}, // 5
+ },
+ start: 0, end: 1, mem: "[3]",
+}, { // #03 -- Defining and calling a function in VM.
+ code: [][]int64{
+ {0, Jump, 3}, // 0
+ {0, Push, 3}, // 1
+ {0, Return, 1, 1}, // 2
+ {0, Push, 1}, // 3
+ {0, Push, 1}, // 4
+ {0, Call}, // 5
+ {0, Exit}, // 6
+ },
+ start: 0, end: 1, mem: "[3]",
+}, { // #04 -- Defining and calling a function in VM.
+ code: [][]int64{
+ {0, Jump, 5}, // 0
+ {0, Push, 3}, // 1
+ {0, Fassign, -2}, // 2
+ {0, Fdup, -2}, // 3
+ {0, Return, 1, 1}, // 4
+ {0, Push, 1}, // 5
+ {0, Push, 1}, // 6
+ {0, Call}, // 7
+ {0, Exit}, // 8
+ },
+ start: 0, end: 1, mem: "[3]",
+}, { // #05 -- Fibonacci numbers, hand written. Showcase recursivity.
+ code: [][]int64{
+ {0, Jump, 19}, // 0
+ {0, Push, 2}, // 2 [2]
+ {0, Fdup, -2}, // 1 [2 i]
+ {0, Lower}, // 3 [true/false]
+ {0, JumpTrue, 13}, // 4 [], goto 17
+ {0, Push, 2}, // 6 [i 2]
+ {0, Fdup, -2}, // 5 [i]
+ {0, Sub}, // 7 [(i-2)]
+ {0, Push, 1}, // 8
+ {0, Call}, // 9 [fib(i-2)]
+ {0, Push, 1}, // 11 [(i-2) i 1]
+ {0, Fdup, -2}, // 10 [fib(i-2) i]
+ {0, Sub}, // 12 [(i-2) (i-1)]
+ {0, Push, 1}, // 13
+ {0, Call}, // 14 [fib(i-2) fib(i-1)]
+ {0, Add}, // 15 [fib(i-2)+fib(i-1)]
+ {0, Return, 1, 1}, // 16 return i
+ {0, Fdup, -2}, // 17 [i]
+ {0, Return, 1, 1}, // 18 return i
+ {0, Push, 6}, // 19 [1]
+ {0, Push, 1}, // 20
+ {0, Call}, // 21 [fib(*1)]
+ {0, Exit}, // 22
+ },
+ start: 0, end: 1, mem: "[8]",
+}, { // #06 -- Fibonacci with some immediate instructions.
+ code: [][]int64{
+ {0, Jump, 14}, // 0
+ {0, Fdup, -2}, // 1 [i]
+ {0, Loweri, 2}, // 2 [true/false]
+ {0, JumpTrue, 9}, // 3 [], goto 12
+ {0, Fdup, -2}, // 4 [i]
+ {0, Subi, 2}, // 5 [(i-2)]
+ {0, Calli, -5}, // 6 [fib(i-2)]
+ {0, Fdup, -2}, // 7 [fib(i-2) i]
+ {0, Subi, 1}, // 8 [(i-2) (i-1)]
+ {0, Calli, -8}, // 9 [fib(i-2) fib(i-1)], call 1
+ {0, Add}, // 10 [fib(i-2)+fib(i-1)]
+ {0, Return, 1, 1}, // 11 return i
+ {0, Fdup, -2}, // 12 [i]
+ {0, Return, 1, 1}, // 13 return i
+ {0, Push, 6}, // 14 [1]
+ {0, Calli, -14}, // 15 [fib(*1)], call 1
+ {0, Exit}, // 16
+ },
+ start: 0, end: 1, mem: "[8]",
+}}