From 37b9da32d3b911091deb254f6cba2a137c471287 Mon Sep 17 00:00:00 2001
From: Marc Vertes <marc.vertes@tendermint.com>
Date: Thu, 12 Oct 2023 10:51:58 +0200
Subject: move to a direct byte code compiler (#8)

* chore: refactor to keep only the new parser and bytecode vm

* scanner: remove Token.value field

* scanner: remove scanner.kind field

* chore: move language specification in lang package

This avoid a cyclic dependency in scanner_test which can now use
the golang/GoSpec language specification for Go.

* clean code

* scanner: export scanner fields

Also parser now generate function calls, including externals.

* chore: fix lint issues

* parser: handle strings

* wip

* parser: implement support for 'if, else, else if' statements

Resolving labels in the compiler still in progress.

* parser: support if statements, improve compiler

* improve handling of functions

* improve support of local variables

* scanner: trim leading and trailing spaces

* fixes to make fibonacci work

* parser: improve README, fix function parameters parsing
---
 vm1/README.md  |  38 -----------
 vm1/vm.go      | 196 ---------------------------------------------------------
 vm1/vm_test.go | 122 -----------------------------------
 3 files changed, 356 deletions(-)
 delete mode 100644 vm1/README.md
 delete mode 100644 vm1/vm.go
 delete mode 100644 vm1/vm_test.go

(limited to 'vm1')

diff --git a/vm1/README.md b/vm1/README.md
deleted file mode 100644
index a06dd34..0000000
--- a/vm1/README.md
+++ /dev/null
@@ -1,38 +0,0 @@
-# vm1
-
-`vm1` is a bytecode based stack machine.
-
-The purpose of `vm1` is to provide a simple, fast, embeddable and
-portable Go execution environment.
-
-```mermaid
-graph LR
-s[ ] --> |source| a(scanner)
---> |tokens| b(parser)
---> |AST| c(codegen)
---> |bytecode| d[vm]
-subgraph vm1
-    d
-end
-style s height:0px;
-```
-
-The bytecode consists of a dozen of instructions, each taking 0 or 1
-immediate argument (non-immediate arguments are only passed through the
-stack). Only a few operators for a few types are implemented. I expect
-to have 1 instruction per operator per numerical type, all with the same
-pattern, which would be generated from a template. Estimate is around 20
-operators and 10 numerical types, so around 200 instructions in final.
-
-Structurally, the vm implements logical and arithmetic operators,
-condional jumps for `if`, `for` and `switch` control flow, and function
-call, return and frame management.
-
-the memory state of the vm is a slice of Go interfaces (`[]any`).
-
-The whole vm is coded in a single function of 80 lines with no
-dependencies. The size will grow as we add missing instructions, but the
-code complexity will remain the same.
-
-the vm1 package is totally standalone and could be used for any purpose
-outside of parscan and/or gno.
diff --git a/vm1/vm.go b/vm1/vm.go
deleted file mode 100644
index 01bba83..0000000
--- a/vm1/vm.go
+++ /dev/null
@@ -1,196 +0,0 @@
-package vm1
-
-import (
-	"fmt"     // for tracing only
-	"log"     // for tracing only
-	"reflect" // for optional CallX only
-	"strconv" // for tracing only
-)
-
-const debug = true
-
-// Byte-code instruction set.
-const (
-	// instruction effect on stack: values consumed -- values produced
-	Nop       = iota // --
-	Add              // n1 n2 -- sum ; sum = n1+n2
-	Assign           // val -- ; mem[$1] = val
-	Call             // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...)
-	CallX            // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = mem[f](a1, ...)
-	Dup              // addr -- value ; value = mem[addr]
-	Fdup             // addr -- value ; value = mem[addr]
-	Exit             // -- ;
-	Jump             // -- ; ip += $1
-	JumpTrue         // cond -- ; if cond { ip += $1 }
-	JumpFalse        // cond -- ; if cond { ip += $1 }
-	Lower            // n1 n2 -- cond ; cond = n1 < n2
-	Loweri           // n1 -- cond ; cond = n1 < $1
-	Pop              // v --
-	Push             // -- v
-	Return           // [r1 .. ri] -- ; exit frame: sp = fp, fp = pop
-	Sub              // n1 n2 -- diff ; diff = n1 - n2
-	Subi             // n1 -- diff ; diff = n1 - $1
-)
-
-var strop = [...]string{ // for VM tracing.
-	Nop:       "Nop",
-	Add:       "Add",
-	Assign:    "Assign",
-	Call:      "Call",
-	CallX:     "CallX",
-	Dup:       "Dup",
-	Fdup:      "Fdup",
-	Exit:      "Exit",
-	Jump:      "Jump",
-	JumpTrue:  "JumpTrue",
-	JumpFalse: "JumpFalse",
-	Lower:     "Lower",
-	Loweri:    "Loweri",
-	Pop:       "Pop",
-	Push:      "Push",
-	Return:    "Return",
-	Sub:       "Sub",
-	Subi:      "Subi",
-}
-
-// Machine represents a virtual machine.
-type Machine struct {
-	code   [][]int64 // code to execute
-	mem    []any     // memory, as a stack
-	ip, fp int       // instruction and frame pointer
-	ic     uint64    // instruction counter, incremented at each instruction executed
-	// flags  uint      // to set options such as restrict CallX, etc...
-}
-
-// Run runs a program.
-func (m *Machine) Run() (err error) {
-	code, mem, ip, fp, sp, ic := m.code, m.mem, m.ip, m.fp, 0, m.ic
-
-	defer func() { m.mem, m.ip, m.fp, m.ic = mem, ip, fp, ic }()
-
-	trace := func() {
-		if !debug {
-			return
-		}
-		var op2, op3 string
-		c := code[ip]
-		if l := len(c); l > 2 {
-			op2 = strconv.Itoa(int(c[2]))
-			if l > 3 {
-				op3 = strconv.Itoa(int(c[3]))
-			}
-		}
-		log.Printf("ip:%-4d sp:%-4d fp:%-4d op:[%-9s %-4s %-4s] mem:%v\n", ip, sp, fp, strop[c[1]], op2, op3, mem)
-	}
-
-	for {
-		sp = len(mem) // stack pointer
-		trace()
-		ic++
-		switch op := code[ip]; op[1] {
-		case Add:
-			mem[sp-2] = mem[sp-2].(int) + mem[sp-1].(int)
-			mem = mem[:sp-1]
-		case Assign:
-			mem[op[2]] = mem[sp-1]
-			mem = mem[:sp-1]
-		case Call:
-			mem = append(mem, ip+1, fp)
-			fp = sp + 2
-			ip += int(op[2])
-			continue
-		case CallX: // Should be made optional.
-			l := int(op[2])
-			in := make([]reflect.Value, l)
-			for i := range in {
-				in[l-1-i] = reflect.ValueOf(mem[sp-1-i])
-			}
-			f := reflect.ValueOf(mem[sp-l-1])
-			mem = mem[:sp-l-1]
-			for _, v := range f.Call(in) {
-				mem = append(mem, v.Interface())
-			}
-		case Dup:
-			mem = append(mem, mem[int(op[2])])
-		case Exit:
-			return
-		case Fdup:
-			mem = append(mem, mem[int(op[2])+fp-1])
-		case Jump:
-			ip += int(op[2])
-			continue
-		case JumpTrue:
-			cond := mem[sp-1].(bool)
-			mem = mem[:sp-1]
-			if cond {
-				ip += int(op[2])
-				continue
-			}
-		case JumpFalse:
-			cond := mem[sp-1].(bool)
-			mem = mem[:sp-1]
-			if !cond {
-				ip += int(op[2])
-				continue
-			}
-		case Lower:
-			mem[sp-2] = mem[sp-2].(int) < mem[sp-1].(int)
-			mem = mem[:sp-1]
-		case Loweri:
-			mem[sp-1] = mem[sp-1].(int) < int(op[2])
-		case Pop:
-			mem = mem[:sp-int(op[2])]
-		case Push:
-			mem = append(mem, int(op[2]))
-		case Return:
-			ip = mem[fp-2].(int)
-			ofp := fp
-			fp = mem[fp-1].(int)
-			mem = append(mem[:ofp-int(op[2])-int(op[3])-1], mem[sp-int(op[2]):]...)
-			continue
-		case Sub:
-			mem[sp-2] = mem[sp-2].(int) - mem[sp-1].(int)
-			mem = mem[:sp-1]
-		case Subi:
-			mem[sp-1] = mem[sp-1].(int) - int(op[2])
-		}
-		ip++
-	}
-}
-
-func (m *Machine) PushCode(code ...[]int64) (p int) {
-	p = len(m.code)
-	m.code = append(m.code, code...)
-	return p
-}
-
-func (m *Machine) SetIP(ip int)          { m.ip = ip }
-func (m *Machine) Push(v ...any) (l int) { l = len(m.mem); m.mem = append(m.mem, v...); return }
-func (m *Machine) Pop() (v any)          { l := len(m.mem) - 1; v = m.mem[l]; m.mem = m.mem[:l]; return }
-func (m *Machine) Top() (v any) {
-	if l := len(m.mem); l > 0 {
-		v = m.mem[l-1]
-	}
-	return
-}
-
-func (m *Machine) PopExit() {
-	if l := len(m.code); l > 0 && m.code[l-1][1] == Exit {
-		m.code = m.code[:l-1]
-	}
-}
-
-// Disassemble returns the code as a readable string.
-func Disassemble(code [][]int64) (asm string) {
-	for _, op := range code {
-		switch len(op) {
-		case 2:
-			asm += strop[op[1]] + "\n"
-		case 3:
-			asm += fmt.Sprintf("%s %d\n", strop[op[1]], op[2])
-		case 4:
-			asm += fmt.Sprintf("%s %d %d\n", strop[op[1]], op[2], op[3])
-		}
-	}
-	return asm
-}
diff --git a/vm1/vm_test.go b/vm1/vm_test.go
deleted file mode 100644
index 272a321..0000000
--- a/vm1/vm_test.go
+++ /dev/null
@@ -1,122 +0,0 @@
-package vm1
-
-import (
-	"fmt"
-	"testing"
-)
-
-func TestVM(t *testing.T) {
-	for _, test := range tests {
-		test := test
-		t.Run("", func(t *testing.T) {
-			m := &Machine{}
-			for _, v := range test.sym {
-				m.Push(v)
-			}
-			m.PushCode(test.code...)
-			if err := m.Run(); err != nil {
-				t.Errorf("run error: %v", err)
-			}
-			t.Log(m.mem)
-			r := fmt.Sprintf("%v", m.mem[test.start:test.end])
-			if r != test.mem {
-				t.Errorf("got %v, want %v", r, test.mem)
-			}
-		})
-	}
-}
-
-func BenchmarkVM(b *testing.B) {
-	for _, test := range tests {
-		test := test
-		b.Run("", func(b *testing.B) {
-			for i := 0; i < b.N; i++ {
-				b.StopTimer()
-				m := &Machine{}
-				m.PushCode(test.code...)
-				b.StartTimer()
-
-				if err := m.Run(); err != nil {
-					b.Errorf("run error: %v", err)
-				}
-			}
-		})
-	}
-}
-
-var tests = []struct {
-	sym        []any     // initial memory values
-	code       [][]int64 // bytecode to execute
-	start, end int       //
-	mem        string    // expected memory content
-}{{ // #00 -- A simple addition.
-	code: [][]int64{
-		{0, Push, 1},
-		{0, Push, 2},
-		{0, Add},
-		{0, Exit},
-	},
-	start: 0, end: 1, mem: "[3]",
-}, { // #01 -- Calling a function defined outside the VM.
-	sym: []any{fmt.Println, "Hello"},
-	code: [][]int64{
-		{0, CallX, 1},
-		{0, Exit},
-	},
-	start: 0, end: 2, mem: "[6 <nil>]",
-}, { // #02 -- Defining and calling a function in VM.
-	code: [][]int64{
-		{0, Jump, 3},      // 0
-		{0, Push, 3},      // 1
-		{0, Return, 1, 1}, // 2
-		{0, Push, 1},      // 3
-		{0, Call, -3},     // 4
-		{0, Exit},         // 5
-	},
-	start: 0, end: 1, mem: "[3]",
-}, { // #03 -- Fibonacci numbers, hand written. Showcase recursivity.
-	code: [][]int64{
-		{0, Jump, 17},     // 0
-		{0, Fdup, -2},     // 1  [i]
-		{0, Push, 2},      // 2  [i 2]
-		{0, Lower},        // 3  [true/false]
-		{0, JumpTrue, 11}, // 4  [], goto 16
-		{0, Fdup, -2},     // 5  [i]
-		{0, Push, 2},      // 6  [i 2]
-		{0, Sub},          // 7  [(i-2)]
-		{0, Call, -7},     // 8  [fib(i-2)]
-		{0, Fdup, -2},     // 9  [fib(i-2) i]
-		{0, Push, 1},      // 10 [(i-2) i 1]
-		{0, Sub},          // 11 [(i-2) (i-1)]
-		{0, Call, -11},    // 12 [fib(i-2) fib(i-1)]
-		{0, Add},          // 13 [fib(i-2)+fib(i-1)]
-		{0, Return, 1, 1}, // 14 return i
-		{0, Fdup, -2},     // 15 [i]
-		{0, Return, 1, 1}, // 16 return i
-		{0, Push, 6},      // 17 [1]
-		{0, Call, -17},    // 18 [fib(*1)]
-		{0, Exit},         // 19
-	},
-	start: 0, end: 1, mem: "[8]",
-}, { // #04 -- Fibonacci with some immediate instructions.
-	code: [][]int64{
-		{0, Jump, 14},     // 0
-		{0, Fdup, -2},     // 1  [i]
-		{0, Loweri, 2},    // 2  [true/false]
-		{0, JumpTrue, 9},  // 3  [], goto 13
-		{0, Fdup, -2},     // 4  [i]
-		{0, Subi, 2},      // 5  [(i-2)]
-		{0, Call, -5},     // 6  [fib(i-2)]
-		{0, Fdup, -2},     // 7  [fib(i-2) i]
-		{0, Subi, 1},      // 8  [(i-2) (i-1)]
-		{0, Call, -8},     // 9  [fib(i-2) fib(i-1)], call 1
-		{0, Add},          // 10 [fib(i-2)+fib(i-1)]
-		{0, Return, 1, 1}, // 11 return i
-		{0, Fdup, -2},     // 12 [i]
-		{0, Return, 1, 1}, // 13 return i
-		{0, Push, 6},      // 14 [1]
-		{0, Call, -14},    // 15 [fib(*1)], call 1
-		{0, Exit},         // 16
-	},
-	start: 0, end: 1, mem: "[8]",
-}}
-- 
cgit v1.2.3