diff options
Diffstat (limited to 'vm')
| -rw-r--r-- | vm/README.md | 38 | ||||
| -rw-r--r-- | vm/vm.go | 229 | ||||
| -rw-r--r-- | vm/vm_test.go | 155 |
3 files changed, 422 insertions, 0 deletions
diff --git a/vm/README.md b/vm/README.md new file mode 100644 index 0000000..92b1ac8 --- /dev/null +++ b/vm/README.md @@ -0,0 +1,38 @@ +# vm + +`vm` is a bytecode based stack machine. + +The purpose of `vm` is to provide a simple, fast, embeddable and +portable Go execution environment. + +```mermaid +graph LR +s[ ] --> |source| a(scanner) +--> |tokens| b(parser) +--> |AST| c(codegen) +--> |bytecode| d[vm] +subgraph vm + d +end +style s height:0px; +``` + +The bytecode consists of a dozen of instructions, each taking 0 or 1 +immediate argument (non-immediate arguments are only passed through the +stack). Only a few operators for a few types are implemented. I expect +to have 1 instruction per operator per numerical type, all with the same +pattern, which would be generated from a template. Estimate is around 20 +operators and 10 numerical types, so around 200 instructions in final. + +Structurally, the vm implements logical and arithmetic operators, +condional jumps for `if`, `for` and `switch` control flow, and function +call, return and frame management. + +the memory state of the vm is a slice of Go interfaces (`[]any`). + +The whole vm is coded in a single function of 80 lines with no +dependencies. The size will grow as we add missing instructions, but the +code complexity will remain the same. + +the vm1 package is totally standalone and could be used for any purpose +outside of parscan and/or gno. diff --git a/vm/vm.go b/vm/vm.go new file mode 100644 index 0000000..4057fc1 --- /dev/null +++ b/vm/vm.go @@ -0,0 +1,229 @@ +package vm + +import ( + "fmt" // for tracing only + "log" // for tracing only + "reflect" // for optional CallX only + "strconv" // for tracing only +) + +const debug = true + +// Byte-code instruction set. +const ( + // instruction effect on stack: values consumed -- values produced + Nop = iota // -- + Add // n1 n2 -- sum ; sum = n1+n2 + Assign // val -- ; mem[$1] = val + Fassign // val -- ; mem[$1] = val + Call // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...) + Calli // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...) + CallX // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = mem[f](a1, ...) + Dup // addr -- value ; value = mem[addr] + Fdup // addr -- value ; value = mem[addr] + Equal // n1 n2 -- cond ; cond = n1 == n2 + Exit // -- ; + Jump // -- ; ip += $1 + JumpTrue // cond -- ; if cond { ip += $1 } + JumpFalse // cond -- ; if cond { ip += $1 } + Lower // n1 n2 -- cond ; cond = n1 < n2 + Loweri // n1 -- cond ; cond = n1 < $1 + Pop // v -- + Push // -- v + Return // [r1 .. ri] -- ; exit frame: sp = fp, fp = pop + Sub // n1 n2 -- diff ; diff = n1 - n2 + Subi // n1 -- diff ; diff = n1 - $1 +) + +var strop = [...]string{ // for VM tracing. + Nop: "Nop", + Add: "Add", + Assign: "Assign", + Call: "Call", + Calli: "Calli", + CallX: "CallX", + Dup: "Dup", + Equal: "Equal", + Exit: "Exit", + Fassign: "Fassign", + Fdup: "Fdup", + Jump: "Jump", + JumpTrue: "JumpTrue", + JumpFalse: "JumpFalse", + Lower: "Lower", + Loweri: "Loweri", + Pop: "Pop", + Push: "Push", + Return: "Return", + Sub: "Sub", + Subi: "Subi", +} + +// Machine represents a virtual machine. +type Machine struct { + code [][]int64 // code to execute + mem []any // memory, as a stack + ip, fp int // instruction and frame pointer + ic uint64 // instruction counter, incremented at each instruction executed + // flags uint // to set options such as restrict CallX, etc... +} + +// Run runs a program. +func (m *Machine) Run() (err error) { + code, mem, ip, fp, sp, ic := m.code, m.mem, m.ip, m.fp, 0, m.ic + + defer func() { m.mem, m.ip, m.fp, m.ic = mem, ip, fp, ic }() + + trace := func() { + if !debug { + return + } + var op2, op3 string + c := code[ip] + if l := len(c); l > 2 { + op2 = strconv.Itoa(int(c[2])) + if l > 3 { + op3 = strconv.Itoa(int(c[3])) + } + } + log.Printf("ip:%-4d sp:%-4d fp:%-4d op:[%-9s %-4s %-4s] mem:%v\n", ip, sp, fp, strop[c[1]], op2, op3, mem) + } + + for { + sp = len(mem) // stack pointer + trace() + ic++ + switch op := code[ip]; op[1] { + case Add: + mem[sp-2] = mem[sp-2].(int) + mem[sp-1].(int) + mem = mem[:sp-1] + case Assign: + mem[op[2]] = mem[sp-1] + mem = mem[:sp-1] + case Fassign: + mem[fp+int(op[2])-1] = mem[sp-1] + mem = mem[:sp-1] + case Call: + nip := mem[sp-1].(int) + mem = append(mem[:sp-1], ip+1, fp) + ip = nip + fp = sp + 1 + continue + case Calli: + mem = append(mem, ip+1, fp) + fp = sp + 2 + ip += int(op[2]) + continue + case CallX: // Should be made optional. + l := int(op[2]) + in := make([]reflect.Value, l) + for i := range in { + in[i] = reflect.ValueOf(mem[sp-2-i]) + } + f := reflect.ValueOf(mem[sp-1]) + mem = mem[:sp-l-1] + for _, v := range f.Call(in) { + mem = append(mem, v.Interface()) + } + case Dup: + mem = append(mem, mem[int(op[2])]) + case Equal: + mem[sp-2] = mem[sp-2].(int) == mem[sp-1].(int) + mem = mem[:sp-1] + case Exit: + return err + case Fdup: + mem = append(mem, mem[int(op[2])+fp-1]) + case Jump: + ip += int(op[2]) + continue + case JumpTrue: + cond := mem[sp-1].(bool) + mem = mem[:sp-1] + if cond { + ip += int(op[2]) + continue + } + case JumpFalse: + cond := mem[sp-1].(bool) + mem = mem[:sp-1] + if !cond { + ip += int(op[2]) + continue + } + case Lower: + mem[sp-2] = mem[sp-1].(int) < mem[sp-2].(int) + mem = mem[:sp-1] + case Loweri: + mem[sp-1] = mem[sp-1].(int) < int(op[2]) + case Pop: + mem = mem[:sp-int(op[2])] + case Push: + mem = append(mem, int(op[2])) + case Return: + ip = mem[fp-2].(int) + ofp := fp + fp = mem[fp-1].(int) + mem = append(mem[:ofp-int(op[2])-int(op[3])-1], mem[sp-int(op[2]):]...) + continue + case Sub: + mem[sp-2] = mem[sp-1].(int) - mem[sp-2].(int) + mem = mem[:sp-1] + case Subi: + mem[sp-1] = mem[sp-1].(int) - int(op[2]) + } + ip++ + } +} + +func (m *Machine) PushCode(code ...[]int64) (p int) { + p = len(m.code) + m.code = append(m.code, code...) + return p +} + +func (m *Machine) SetIP(ip int) { m.ip = ip } +func (m *Machine) Push(v ...any) (l int) { l = len(m.mem); m.mem = append(m.mem, v...); return l } +func (m *Machine) Pop() (v any) { l := len(m.mem) - 1; v = m.mem[l]; m.mem = m.mem[:l]; return v } +func (m *Machine) Top() (v any) { + if l := len(m.mem); l > 0 { + v = m.mem[l-1] + } + return v +} + +func (m *Machine) PopExit() { + if l := len(m.code); l > 0 && m.code[l-1][1] == Exit { + m.code = m.code[:l-1] + } +} + +func CodeString(op []int64) string { + switch len(op) { + case 2: + return strop[op[1]] + case 3: + return fmt.Sprintf("%s %d", strop[op[1]], op[2]) + case 4: + return fmt.Sprintf("%s %d %d", strop[op[1]], op[2], op[3]) + } + return "" +} + +// Disassemble returns the code as a readable string. +func Disassemble(code [][]int64) (asm string) { + for _, op := range code { + asm += CodeString(op) + "\n" + /* + switch len(op) { + case 2: + asm += strop[op[1]] + "\n" + case 3: + asm += fmt.Sprintf("%s %d\n", strop[op[1]], op[2]) + case 4: + asm += fmt.Sprintf("%s %d %d\n", strop[op[1]], op[2], op[3]) + } + */ + } + return asm +} diff --git a/vm/vm_test.go b/vm/vm_test.go new file mode 100644 index 0000000..08e8054 --- /dev/null +++ b/vm/vm_test.go @@ -0,0 +1,155 @@ +package vm + +import ( + "fmt" + "log" + "testing" +) + +func init() { + log.SetFlags(log.Lshortfile) +} + +func TestVM(t *testing.T) { + for _, test := range tests { + test := test + t.Run("", func(t *testing.T) { + m := &Machine{} + for _, v := range test.sym { + m.Push(v) + } + m.PushCode(test.code...) + if err := m.Run(); err != nil { + t.Errorf("run error: %v", err) + } + t.Log(m.mem) + r := fmt.Sprintf("%v", m.mem[test.start:test.end]) + if r != test.mem { + t.Errorf("got %v, want %v", r, test.mem) + } + }) + } +} + +func BenchmarkVM(b *testing.B) { + for _, test := range tests { + test := test + b.Run("", func(b *testing.B) { + for i := 0; i < b.N; i++ { + b.StopTimer() + m := &Machine{} + m.PushCode(test.code...) + b.StartTimer() + + if err := m.Run(); err != nil { + b.Errorf("run error: %v", err) + } + } + }) + } +} + +var tests = []struct { + sym []any // initial memory values + code [][]int64 // bytecode to execute + start, end int // + mem string // expected memory content +}{{ // #00 -- A simple addition. + code: [][]int64{ + {0, Push, 1}, + {0, Push, 2}, + {0, Add}, + {0, Exit}, + }, + start: 0, end: 1, mem: "[3]", +}, { // #01 -- Calling a function defined outside the VM. + sym: []any{fmt.Println, "Hello"}, + code: [][]int64{ + {0, Dup, 0}, + {0, CallX, 1}, + {0, Exit}, + }, + start: 1, end: 3, mem: "[6 <nil>]", +}, { // #02 -- Defining and calling a function in VM. + code: [][]int64{ + {0, Jump, 3}, // 0 + {0, Push, 3}, // 1 + {0, Return, 1, 1}, // 2 + {0, Push, 1}, // 3 + {0, Calli, -3}, // 4 + {0, Exit}, // 5 + }, + start: 0, end: 1, mem: "[3]", +}, { // #03 -- Defining and calling a function in VM. + code: [][]int64{ + {0, Jump, 3}, // 0 + {0, Push, 3}, // 1 + {0, Return, 1, 1}, // 2 + {0, Push, 1}, // 3 + {0, Push, 1}, // 4 + {0, Call}, // 5 + {0, Exit}, // 6 + }, + start: 0, end: 1, mem: "[3]", +}, { // #04 -- Defining and calling a function in VM. + code: [][]int64{ + {0, Jump, 5}, // 0 + {0, Push, 3}, // 1 + {0, Fassign, -2}, // 2 + {0, Fdup, -2}, // 3 + {0, Return, 1, 1}, // 4 + {0, Push, 1}, // 5 + {0, Push, 1}, // 6 + {0, Call}, // 7 + {0, Exit}, // 8 + }, + start: 0, end: 1, mem: "[3]", +}, { // #05 -- Fibonacci numbers, hand written. Showcase recursivity. + code: [][]int64{ + {0, Jump, 19}, // 0 + {0, Push, 2}, // 2 [2] + {0, Fdup, -2}, // 1 [2 i] + {0, Lower}, // 3 [true/false] + {0, JumpTrue, 13}, // 4 [], goto 17 + {0, Push, 2}, // 6 [i 2] + {0, Fdup, -2}, // 5 [i] + {0, Sub}, // 7 [(i-2)] + {0, Push, 1}, // 8 + {0, Call}, // 9 [fib(i-2)] + {0, Push, 1}, // 11 [(i-2) i 1] + {0, Fdup, -2}, // 10 [fib(i-2) i] + {0, Sub}, // 12 [(i-2) (i-1)] + {0, Push, 1}, // 13 + {0, Call}, // 14 [fib(i-2) fib(i-1)] + {0, Add}, // 15 [fib(i-2)+fib(i-1)] + {0, Return, 1, 1}, // 16 return i + {0, Fdup, -2}, // 17 [i] + {0, Return, 1, 1}, // 18 return i + {0, Push, 6}, // 19 [1] + {0, Push, 1}, // 20 + {0, Call}, // 21 [fib(*1)] + {0, Exit}, // 22 + }, + start: 0, end: 1, mem: "[8]", +}, { // #06 -- Fibonacci with some immediate instructions. + code: [][]int64{ + {0, Jump, 14}, // 0 + {0, Fdup, -2}, // 1 [i] + {0, Loweri, 2}, // 2 [true/false] + {0, JumpTrue, 9}, // 3 [], goto 12 + {0, Fdup, -2}, // 4 [i] + {0, Subi, 2}, // 5 [(i-2)] + {0, Calli, -5}, // 6 [fib(i-2)] + {0, Fdup, -2}, // 7 [fib(i-2) i] + {0, Subi, 1}, // 8 [(i-2) (i-1)] + {0, Calli, -8}, // 9 [fib(i-2) fib(i-1)], call 1 + {0, Add}, // 10 [fib(i-2)+fib(i-1)] + {0, Return, 1, 1}, // 11 return i + {0, Fdup, -2}, // 12 [i] + {0, Return, 1, 1}, // 13 return i + {0, Push, 6}, // 14 [1] + {0, Calli, -14}, // 15 [fib(*1)], call 1 + {0, Exit}, // 16 + }, + start: 0, end: 1, mem: "[8]", +}} |
