From 37b9da32d3b911091deb254f6cba2a137c471287 Mon Sep 17 00:00:00 2001 From: Marc Vertes Date: Thu, 12 Oct 2023 10:51:58 +0200 Subject: move to a direct byte code compiler (#8) * chore: refactor to keep only the new parser and bytecode vm * scanner: remove Token.value field * scanner: remove scanner.kind field * chore: move language specification in lang package This avoid a cyclic dependency in scanner_test which can now use the golang/GoSpec language specification for Go. * clean code * scanner: export scanner fields Also parser now generate function calls, including externals. * chore: fix lint issues * parser: handle strings * wip * parser: implement support for 'if, else, else if' statements Resolving labels in the compiler still in progress. * parser: support if statements, improve compiler * improve handling of functions * improve support of local variables * scanner: trim leading and trailing spaces * fixes to make fibonacci work * parser: improve README, fix function parameters parsing --- vm1/README.md | 38 ----------- vm1/vm.go | 196 --------------------------------------------------------- vm1/vm_test.go | 122 ----------------------------------- 3 files changed, 356 deletions(-) delete mode 100644 vm1/README.md delete mode 100644 vm1/vm.go delete mode 100644 vm1/vm_test.go (limited to 'vm1') diff --git a/vm1/README.md b/vm1/README.md deleted file mode 100644 index a06dd34..0000000 --- a/vm1/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# vm1 - -`vm1` is a bytecode based stack machine. - -The purpose of `vm1` is to provide a simple, fast, embeddable and -portable Go execution environment. - -```mermaid -graph LR -s[ ] --> |source| a(scanner) ---> |tokens| b(parser) ---> |AST| c(codegen) ---> |bytecode| d[vm] -subgraph vm1 - d -end -style s height:0px; -``` - -The bytecode consists of a dozen of instructions, each taking 0 or 1 -immediate argument (non-immediate arguments are only passed through the -stack). Only a few operators for a few types are implemented. I expect -to have 1 instruction per operator per numerical type, all with the same -pattern, which would be generated from a template. Estimate is around 20 -operators and 10 numerical types, so around 200 instructions in final. - -Structurally, the vm implements logical and arithmetic operators, -condional jumps for `if`, `for` and `switch` control flow, and function -call, return and frame management. - -the memory state of the vm is a slice of Go interfaces (`[]any`). - -The whole vm is coded in a single function of 80 lines with no -dependencies. The size will grow as we add missing instructions, but the -code complexity will remain the same. - -the vm1 package is totally standalone and could be used for any purpose -outside of parscan and/or gno. diff --git a/vm1/vm.go b/vm1/vm.go deleted file mode 100644 index 01bba83..0000000 --- a/vm1/vm.go +++ /dev/null @@ -1,196 +0,0 @@ -package vm1 - -import ( - "fmt" // for tracing only - "log" // for tracing only - "reflect" // for optional CallX only - "strconv" // for tracing only -) - -const debug = true - -// Byte-code instruction set. -const ( - // instruction effect on stack: values consumed -- values produced - Nop = iota // -- - Add // n1 n2 -- sum ; sum = n1+n2 - Assign // val -- ; mem[$1] = val - Call // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...) - CallX // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = mem[f](a1, ...) - Dup // addr -- value ; value = mem[addr] - Fdup // addr -- value ; value = mem[addr] - Exit // -- ; - Jump // -- ; ip += $1 - JumpTrue // cond -- ; if cond { ip += $1 } - JumpFalse // cond -- ; if cond { ip += $1 } - Lower // n1 n2 -- cond ; cond = n1 < n2 - Loweri // n1 -- cond ; cond = n1 < $1 - Pop // v -- - Push // -- v - Return // [r1 .. ri] -- ; exit frame: sp = fp, fp = pop - Sub // n1 n2 -- diff ; diff = n1 - n2 - Subi // n1 -- diff ; diff = n1 - $1 -) - -var strop = [...]string{ // for VM tracing. - Nop: "Nop", - Add: "Add", - Assign: "Assign", - Call: "Call", - CallX: "CallX", - Dup: "Dup", - Fdup: "Fdup", - Exit: "Exit", - Jump: "Jump", - JumpTrue: "JumpTrue", - JumpFalse: "JumpFalse", - Lower: "Lower", - Loweri: "Loweri", - Pop: "Pop", - Push: "Push", - Return: "Return", - Sub: "Sub", - Subi: "Subi", -} - -// Machine represents a virtual machine. -type Machine struct { - code [][]int64 // code to execute - mem []any // memory, as a stack - ip, fp int // instruction and frame pointer - ic uint64 // instruction counter, incremented at each instruction executed - // flags uint // to set options such as restrict CallX, etc... -} - -// Run runs a program. -func (m *Machine) Run() (err error) { - code, mem, ip, fp, sp, ic := m.code, m.mem, m.ip, m.fp, 0, m.ic - - defer func() { m.mem, m.ip, m.fp, m.ic = mem, ip, fp, ic }() - - trace := func() { - if !debug { - return - } - var op2, op3 string - c := code[ip] - if l := len(c); l > 2 { - op2 = strconv.Itoa(int(c[2])) - if l > 3 { - op3 = strconv.Itoa(int(c[3])) - } - } - log.Printf("ip:%-4d sp:%-4d fp:%-4d op:[%-9s %-4s %-4s] mem:%v\n", ip, sp, fp, strop[c[1]], op2, op3, mem) - } - - for { - sp = len(mem) // stack pointer - trace() - ic++ - switch op := code[ip]; op[1] { - case Add: - mem[sp-2] = mem[sp-2].(int) + mem[sp-1].(int) - mem = mem[:sp-1] - case Assign: - mem[op[2]] = mem[sp-1] - mem = mem[:sp-1] - case Call: - mem = append(mem, ip+1, fp) - fp = sp + 2 - ip += int(op[2]) - continue - case CallX: // Should be made optional. - l := int(op[2]) - in := make([]reflect.Value, l) - for i := range in { - in[l-1-i] = reflect.ValueOf(mem[sp-1-i]) - } - f := reflect.ValueOf(mem[sp-l-1]) - mem = mem[:sp-l-1] - for _, v := range f.Call(in) { - mem = append(mem, v.Interface()) - } - case Dup: - mem = append(mem, mem[int(op[2])]) - case Exit: - return - case Fdup: - mem = append(mem, mem[int(op[2])+fp-1]) - case Jump: - ip += int(op[2]) - continue - case JumpTrue: - cond := mem[sp-1].(bool) - mem = mem[:sp-1] - if cond { - ip += int(op[2]) - continue - } - case JumpFalse: - cond := mem[sp-1].(bool) - mem = mem[:sp-1] - if !cond { - ip += int(op[2]) - continue - } - case Lower: - mem[sp-2] = mem[sp-2].(int) < mem[sp-1].(int) - mem = mem[:sp-1] - case Loweri: - mem[sp-1] = mem[sp-1].(int) < int(op[2]) - case Pop: - mem = mem[:sp-int(op[2])] - case Push: - mem = append(mem, int(op[2])) - case Return: - ip = mem[fp-2].(int) - ofp := fp - fp = mem[fp-1].(int) - mem = append(mem[:ofp-int(op[2])-int(op[3])-1], mem[sp-int(op[2]):]...) - continue - case Sub: - mem[sp-2] = mem[sp-2].(int) - mem[sp-1].(int) - mem = mem[:sp-1] - case Subi: - mem[sp-1] = mem[sp-1].(int) - int(op[2]) - } - ip++ - } -} - -func (m *Machine) PushCode(code ...[]int64) (p int) { - p = len(m.code) - m.code = append(m.code, code...) - return p -} - -func (m *Machine) SetIP(ip int) { m.ip = ip } -func (m *Machine) Push(v ...any) (l int) { l = len(m.mem); m.mem = append(m.mem, v...); return } -func (m *Machine) Pop() (v any) { l := len(m.mem) - 1; v = m.mem[l]; m.mem = m.mem[:l]; return } -func (m *Machine) Top() (v any) { - if l := len(m.mem); l > 0 { - v = m.mem[l-1] - } - return -} - -func (m *Machine) PopExit() { - if l := len(m.code); l > 0 && m.code[l-1][1] == Exit { - m.code = m.code[:l-1] - } -} - -// Disassemble returns the code as a readable string. -func Disassemble(code [][]int64) (asm string) { - for _, op := range code { - switch len(op) { - case 2: - asm += strop[op[1]] + "\n" - case 3: - asm += fmt.Sprintf("%s %d\n", strop[op[1]], op[2]) - case 4: - asm += fmt.Sprintf("%s %d %d\n", strop[op[1]], op[2], op[3]) - } - } - return asm -} diff --git a/vm1/vm_test.go b/vm1/vm_test.go deleted file mode 100644 index 272a321..0000000 --- a/vm1/vm_test.go +++ /dev/null @@ -1,122 +0,0 @@ -package vm1 - -import ( - "fmt" - "testing" -) - -func TestVM(t *testing.T) { - for _, test := range tests { - test := test - t.Run("", func(t *testing.T) { - m := &Machine{} - for _, v := range test.sym { - m.Push(v) - } - m.PushCode(test.code...) - if err := m.Run(); err != nil { - t.Errorf("run error: %v", err) - } - t.Log(m.mem) - r := fmt.Sprintf("%v", m.mem[test.start:test.end]) - if r != test.mem { - t.Errorf("got %v, want %v", r, test.mem) - } - }) - } -} - -func BenchmarkVM(b *testing.B) { - for _, test := range tests { - test := test - b.Run("", func(b *testing.B) { - for i := 0; i < b.N; i++ { - b.StopTimer() - m := &Machine{} - m.PushCode(test.code...) - b.StartTimer() - - if err := m.Run(); err != nil { - b.Errorf("run error: %v", err) - } - } - }) - } -} - -var tests = []struct { - sym []any // initial memory values - code [][]int64 // bytecode to execute - start, end int // - mem string // expected memory content -}{{ // #00 -- A simple addition. - code: [][]int64{ - {0, Push, 1}, - {0, Push, 2}, - {0, Add}, - {0, Exit}, - }, - start: 0, end: 1, mem: "[3]", -}, { // #01 -- Calling a function defined outside the VM. - sym: []any{fmt.Println, "Hello"}, - code: [][]int64{ - {0, CallX, 1}, - {0, Exit}, - }, - start: 0, end: 2, mem: "[6 ]", -}, { // #02 -- Defining and calling a function in VM. - code: [][]int64{ - {0, Jump, 3}, // 0 - {0, Push, 3}, // 1 - {0, Return, 1, 1}, // 2 - {0, Push, 1}, // 3 - {0, Call, -3}, // 4 - {0, Exit}, // 5 - }, - start: 0, end: 1, mem: "[3]", -}, { // #03 -- Fibonacci numbers, hand written. Showcase recursivity. - code: [][]int64{ - {0, Jump, 17}, // 0 - {0, Fdup, -2}, // 1 [i] - {0, Push, 2}, // 2 [i 2] - {0, Lower}, // 3 [true/false] - {0, JumpTrue, 11}, // 4 [], goto 16 - {0, Fdup, -2}, // 5 [i] - {0, Push, 2}, // 6 [i 2] - {0, Sub}, // 7 [(i-2)] - {0, Call, -7}, // 8 [fib(i-2)] - {0, Fdup, -2}, // 9 [fib(i-2) i] - {0, Push, 1}, // 10 [(i-2) i 1] - {0, Sub}, // 11 [(i-2) (i-1)] - {0, Call, -11}, // 12 [fib(i-2) fib(i-1)] - {0, Add}, // 13 [fib(i-2)+fib(i-1)] - {0, Return, 1, 1}, // 14 return i - {0, Fdup, -2}, // 15 [i] - {0, Return, 1, 1}, // 16 return i - {0, Push, 6}, // 17 [1] - {0, Call, -17}, // 18 [fib(*1)] - {0, Exit}, // 19 - }, - start: 0, end: 1, mem: "[8]", -}, { // #04 -- Fibonacci with some immediate instructions. - code: [][]int64{ - {0, Jump, 14}, // 0 - {0, Fdup, -2}, // 1 [i] - {0, Loweri, 2}, // 2 [true/false] - {0, JumpTrue, 9}, // 3 [], goto 13 - {0, Fdup, -2}, // 4 [i] - {0, Subi, 2}, // 5 [(i-2)] - {0, Call, -5}, // 6 [fib(i-2)] - {0, Fdup, -2}, // 7 [fib(i-2) i] - {0, Subi, 1}, // 8 [(i-2) (i-1)] - {0, Call, -8}, // 9 [fib(i-2) fib(i-1)], call 1 - {0, Add}, // 10 [fib(i-2)+fib(i-1)] - {0, Return, 1, 1}, // 11 return i - {0, Fdup, -2}, // 12 [i] - {0, Return, 1, 1}, // 13 return i - {0, Push, 6}, // 14 [1] - {0, Call, -14}, // 15 [fib(*1)], call 1 - {0, Exit}, // 16 - }, - start: 0, end: 1, mem: "[8]", -}} -- cgit v1.2.3