codegen: add a bytecode generator (#5)

* codegen: add a bytecode generator * cleaning scanner, parser and vm1.
author: Marc Vertes <marc.vertes@tendermint.com> 2023-08-09 11:47:39 +0200
committer: GitHub <noreply@github.com> 2023-08-09 11:47:39 +0200
commit: 947873b34aabe46dfb9f8d06214736cb11b5a6b2 (patch)
tree: 9fc4728cf39017ee0275d62a7578881cbb3073bb
parent: 355750be61fbf4b90d132a9560e01113f22f4c38 (diff)
16 files changed, 488 insertions, 93 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..385d6a8
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.out
+*.test
diff --git a/cmd/gint/main.go b/cmd/gint/main.go
index b36eb5d..85fdba0 100644
--- a/cmd/gint/main.go
+++ b/cmd/gint/main.go
@@ -18,15 +18,20 @@ func main() {
 	if err != nil {
 		log.Fatal(err)
 	}
+	run := run0
 	if len(os.Args) > 1 {
-		if err := run1(string(buf)); err != nil {
-			log.Fatal(err)
-		}
-	} else {
-		if err := run0(string(buf)); err != nil {
-			log.Fatal(err)
+		v := "vm" + os.Args[1]
+		switch v {
+		case "vm0":
+		case "vm1":
+			run = run1
+		default:
+			log.Fatal("invalid argument", os.Args[1])
 		}
 	}
+	if err := run(string(buf)); err != nil {
+		log.Fatal(err)
+	}
 }
 
 func run0(src string) error {
@@ -46,8 +51,8 @@ func run0(src string) error {
 
 func run1(src string) (err error) {
 	m := &vm1.Machine{}
-	c := &codegen.Compiler{Symbols: map[string]int{}}
-	c.AddSym(fmt.Println, "println")
+	c := codegen.New()
+	c.AddSym(fmt.Println, "println", false)
 	n := &parser.Node{}
 	if n.Child, err = golang.GoParser.Parse(src); err != nil {
 		return err
@@ -58,11 +63,12 @@ func run1(src string) (err error) {
 	}
 	c.Emit(n, vm1.Exit)
 	log.Println("data:", c.Data)
-	log.Println("code:", vm1.Disas(c.Code))
+	log.Println("code:", vm1.Disassemble(c.Code))
 	for _, v := range c.Data {
 		m.Push(v)
 	}
 	m.PushCode(c.Code)
+	m.SetIP(c.Entry)
 	m.Run()
 	return
 }
diff --git a/codegen/codegen.go b/codegen/codegen.go
new file mode 100644
index 0000000..048b20f
--- /dev/null
+++ b/codegen/codegen.go
@@ -0,0 +1,226 @@
+package codegen
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/gnolang/parscan/parser"
+	"github.com/gnolang/parscan/vm1"
+)
+
+type symbol struct {
+	index int  // address of symbol in frame
+	local bool // if true address is relative to local frame, otherwise global
+}
+
+type Compiler struct {
+	Code  [][]int64 // produced code, to fill VM with
+	Data  []any     // produced data, will be at the bottom of VM stack
+	Entry int
+
+	symbols map[string]symbol
+}
+
+func New() *Compiler { return &Compiler{symbols: map[string]symbol{}, Entry: -1} }
+
+type nodedata struct {
+	ipstart, ipend, symind, fsp int // CFG and symbol node annotations
+}
+
+func (c *Compiler) CodeGen(node *parser.Node) (err error) {
+	notes := map[*parser.Node]*nodedata{} // AST node annotations for CFG, symbols, ...
+	scope := ""
+	frameNode := []*parser.Node{node}
+	fnote := notes[node]
+
+	node.Walk2(nil, 0, func(n, a *parser.Node, k int) (ok bool) {
+		// Node pre-order processing callback.
+		notes[n] = &nodedata{}
+		nd := notes[n]
+
+		switch n.Kind {
+		case parser.FuncDecl:
+			fname := n.Child[0].Content()
+			i := c.Emit(n, vm1.Enter)
+			c.AddSym(i, scope+fname, false)
+			scope = pushScope(scope, fname)
+			frameNode = append(frameNode, n)
+			fnote = notes[n]
+			for j, child := range n.Child[1].Child {
+				vname := child.Content()
+				c.AddSym(-j-2, scope+vname, true)
+				fnote.fsp++
+			}
+
+		case parser.StmtBloc:
+			nd.ipstart = len(c.Code)
+			if a != nil && a.Kind == parser.IfStmt && k == 1 {
+				c.Emit(n, vm1.JumpFalse, 0) // location to be updated in post IfStmt
+			}
+		}
+		return true
+	}, func(n, a *parser.Node, k int) (ok bool) {
+		// Node post-order processing callback.
+		nd := notes[n]
+
+		switch n.Kind {
+		case parser.AddOp:
+			c.Emit(n, vm1.Add)
+
+		case parser.CallExpr:
+			if c.isExternalSymbol(n.Child[0].Content()) {
+				// External call, using absolute addr in symtable
+				c.Emit(n, vm1.CallX, int64(len(n.Child[1].Child)))
+				break
+			}
+			// Internal call is always relative to instruction pointer.
+			i, ok := c.symInt(n.Child[0].Content())
+			if !ok {
+				err = fmt.Errorf("invalid symbol %s", n.Child[0].Content())
+			}
+			c.Emit(n, vm1.Call, int64(i-len(c.Code)))
+
+		case parser.DefOp:
+			// Define operation, global vars only. TODO: on local frame too
+			l := c.AddSym(nil, n.Child[0].Content(), false)
+			c.Emit(n, vm1.Assign, int64(l))
+
+		case parser.FuncDecl:
+			scope = popScope(scope)
+			fnote = notes[frameNode[len(frameNode)-1]]
+
+		case parser.Ident:
+			ident := n.Content()
+			if len(n.Child) > 0 || a.Kind == parser.FuncDecl {
+				break
+			}
+			if s, _, ok := c.getSym(ident, scope); ok {
+				if s.local {
+					c.Emit(n, vm1.Fdup, int64(s.index))
+				} else if a != nil && a.Kind == parser.AssignOp {
+					c.Emit(n, vm1.Push, int64(s.index))
+				} else if c.isExternalSymbol(ident) {
+					c.Emit(n, vm1.Dup, int64(s.index))
+				}
+			}
+
+		case parser.IfStmt:
+			ifBodyStart := notes[n.Child[1]].ipstart
+			ifBodyEnd := notes[n.Child[1]].ipend
+			c.Code[ifBodyStart][2] = int64(ifBodyEnd - ifBodyStart)
+			// TODO: handle 'else'
+
+		case parser.NumberLit:
+			// A literal number can be a float or an integer, or a big number
+			switch v := n.Value().(type) {
+			case int64:
+				c.Emit(n, vm1.Push, v)
+			case error:
+				err = v
+				return false
+			default:
+				err = fmt.Errorf("type not supported: %T\n", v)
+				return false
+			}
+
+		case parser.ReturnStmt:
+			c.Emit(n, vm1.Return, int64(len(n.Child)))
+
+		case parser.StmtBloc:
+			nd.ipend = len(c.Code)
+
+		case parser.StringLit:
+			p := len(c.Data)
+			c.Data = append(c.Data, n.Block())
+			c.Emit(n, vm1.Dup, int64(p))
+
+		case parser.InfOp:
+			c.Emit(n, vm1.Lower)
+
+		case parser.SubOp:
+			c.Emit(n, vm1.Sub)
+		}
+
+		// TODO: Fix this temporary hack to compute an entry point
+		if c.Entry < 0 && len(scope) == 0 && n.Kind != parser.FuncDecl {
+			c.Entry = len(c.Code) - 1
+			if c.Code[c.Entry][1] == vm1.Return {
+				c.Entry++
+			}
+		}
+		return true
+	})
+	return
+}
+
+func (c *Compiler) AddSym(v any, name string, local bool) int {
+	l := len(c.Data)
+	if local {
+		l = v.(int)
+	} else {
+		c.Data = append(c.Data, v)
+	}
+	c.symbols[name] = symbol{index: l, local: local}
+	return l
+}
+
+func (c *Compiler) Emit(n *parser.Node, op ...int64) int {
+	op = append([]int64{int64(n.Pos())}, op...)
+	l := len(c.Code)
+	c.Code = append(c.Code, op)
+	return l
+}
+
+func (c *Compiler) isExternalSymbol(name string) bool {
+	s, ok := c.symbols[name]
+	if !ok {
+		return false
+	}
+	_, isInt := c.Data[s.index].(int)
+	return !isInt
+}
+
+func (c *Compiler) symInt(name string) (int, bool) {
+	s, ok := c.symbols[name]
+	if !ok {
+		return 0, false
+	}
+	j, ok := c.Data[s.index].(int)
+	if !ok {
+		return 0, false
+	}
+	return j, true
+}
+
+func pushScope(scope, name string) string {
+	return strings.TrimPrefix(scope+"/"+name+"/", "/")
+}
+
+func popScope(scope string) string {
+	scope = strings.TrimSuffix(scope, "/")
+	j := strings.LastIndex(scope, "/")
+	if j == -1 {
+		return ""
+	}
+	return scope[:j]
+}
+
+// getSym searches for an existing symbol starting from the deepest scope.
+func (c *Compiler) getSym(name, scope string) (sym symbol, sc string, ok bool) {
+	for {
+		if sym, ok = c.symbols[scope+name]; ok {
+			return sym, scope, ok
+		}
+		scope = strings.TrimSuffix(scope, "/")
+		i := strings.LastIndex(scope, "/")
+		if i == -1 {
+			scope = ""
+			break
+		}
+		if scope = scope[:i]; scope == "" {
+			break
+		}
+	}
+	sym, ok = c.symbols[name]
+	return sym, scope, ok
+}
diff --git a/codegen/codegen_test.go b/codegen/codegen_test.go
new file mode 100644
index 0000000..d2f8033
--- /dev/null
+++ b/codegen/codegen_test.go
@@ -0,0 +1,51 @@
+package codegen
+
+import (
+	"fmt"
+	"log"
+	"testing"
+
+	"github.com/gnolang/parscan/lang/golang"
+	"github.com/gnolang/parscan/parser"
+	"github.com/gnolang/parscan/vm1"
+)
+
+func TestCodeGen(t *testing.T) {
+	log.SetFlags(log.Lshortfile)
+	for _, test := range tests {
+		test := test
+		t.Run("", func(t *testing.T) {
+			c := New()
+			c.AddSym(fmt.Println, "println")
+			n := &parser.Node{}
+			var err error
+			if n.Child, err = golang.GoParser.Parse(test.src); err != nil {
+				t.Error(err)
+			}
+			errStr := ""
+			if err = c.CodeGen(n); err != nil {
+				errStr = err.Error()
+			}
+			if errStr != test.err {
+				t.Errorf("got error %#v, want error %#v", errStr, test.err)
+			}
+			t.Log("data:", c.Data)
+			t.Log("code:", vm1.Disassemble(c.Code))
+		})
+	}
+}
+
+var tests = []struct {
+	src, asm, sym, err string
+}{{ // #00
+	src: "1+2",
+	asm: "Push 1\nPush 2\nAdd\n",
+}, { // #01
+	src: `println("Hello")`,
+	asm: "Dup 0\nDup 1\nCallX 1\n",
+}, { // #02
+	src: `a := 2; println(a)`,
+	asm: "Push 2\nAssign 1\nDup 0\nDup 1\nCallX 1",
+}, { // #03
+	src: `a := 2; if a < 3 {println(a)}; println("bye")`,
+}}
diff --git a/lang/golang/go.go b/lang/golang/go.go
index 49b12d8..1517689 100644
--- a/lang/golang/go.go
+++ b/lang/golang/go.go
@@ -55,16 +55,11 @@ var GoParser = &parser.Parser{
 		"<":      {parser.InfOp, 0, 6},
 		":=":     {parser.DefOp, 0, 7},
 		"=":      {parser.AssignOp, 0, 7},
-		"#call":  {parser.CallExpr, 0, 0},
-		"#id":    {parser.Ident, 0, 0},
-		"#num":   {parser.NumberLit, 0, 0},
 		"if":     {parser.IfStmt, parser.Stmt | parser.ExprSep, 0},
 		"func":   {parser.FuncDecl, parser.Decl | parser.Call, 0},
 		"return": {parser.ReturnStmt, parser.Stmt, 0},
 		"{..}":   {parser.StmtBloc, parser.ExprSep, 0},
-		"{":      {parser.StmtBloc, parser.ExprSep, 0}, // FIXME: redundant with above
 		"(..)":   {parser.ParBloc, parser.Call, 0},
-		"(":      {parser.ParBloc, parser.Call, 0}, // FIXME: redundant with above
 		`".."`:   {parser.StringLit, 0, 0},
 	},
 }
diff --git a/parser/node.go b/parser/node.go
index f81285a..d2f13ef 100644
--- a/parser/node.go
+++ b/parser/node.go
@@ -6,8 +6,6 @@ type Node struct {
 	Child         []*Node // sub-tree nodes
 	scanner.Token         // token at origin of the node
 	Kind                  // Node kind, depends on the language spec
-
-	unary bool // TODO(marc): get rid of it.
 }
 
 // TODO: remove it in favor of Walk2
diff --git a/parser/parse.go b/parser/parse.go
index cfbff27..4f186bd 100644
--- a/parser/parse.go
+++ b/parser/parse.go
@@ -1,8 +1,6 @@
 package parser
 
-import (
-	"github.com/gnolang/parscan/scanner"
-)
+import "github.com/gnolang/parscan/scanner"
 
 const (
 	Stmt = 1 << iota
@@ -31,25 +29,29 @@ func (p *Parser) Parse(src string) (n []*Node, err error) {
 	return p.ParseTokens(tokens)
 }
 
-func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
+func (p *Parser) ParseTokens(tokens []scanner.Token) (roots []*Node, err error) {
 	// TODO: error handling.
-	var root *Node // current root node
-	var expr *Node // current expression root node
-	var prev, c *Node
-	var lce *Node
-	for i, t := range token {
+	var root *Node              // current root node
+	var expr *Node              // current expression root node
+	var prev, c *Node           // previous and current nodes
+	var lce *Node               // last complete expression node
+	unaryOp := map[*Node]bool{} // unaryOp indicates if a node is an unary operator.
+
+	for i, t := range tokens {
 		prev = c
 		c = &Node{
 			Token: t,
 			Kind:  p.Spec[t.Name()].Kind,
-			unary: t.Kind() == scanner.Operator && (i == 0 || token[i-1].Kind() == scanner.Operator),
 		}
-		if c.Kind == 0 {
+		if t.IsOperator() && (i == 0 || tokens[i-1].IsOperator()) {
+			unaryOp[c] = true
+		}
+		if c.Kind == Undefined {
 			switch t.Kind() {
 			case scanner.Number:
-				c.Kind = p.Spec["#num"].Kind
+				c.Kind = NumberLit
 			case scanner.Identifier:
-				c.Kind = p.Spec["#id"].Kind
+				c.Kind = Ident
 			}
 		}
 
@@ -65,20 +67,20 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
 			continue
 		}
 
-		if t.Kind() == scanner.Block {
+		if t.IsBlock() {
 			if expr != nil {
-				if p.hasBlockProp(c, ExprSep) && p.isExprSep(root) {
+				if p.hasProp(c, ExprSep) && p.isExprSep(root) {
 					// A bracket block may end a previous expression.
 					root.Child = append(root.Child, expr)
 					expr = nil
-				} else if p.hasBlockProp(c, Call) && !p.hasProp(root, Decl) && p.canCallToken(token[i-1]) {
+				} else if p.hasProp(c, Call) && !p.hasProp(root, Decl) && p.canCallToken(tokens[i-1]) {
 					// Handle (possibly nested) call expressions.
 					if lce == nil || lce != expr { // TODO(marc): not general, fix it.
 						lce = prev
 					}
 					lce.Child = []*Node{{Token: lce.Token, Child: lce.Child, Kind: lce.Kind}}
 					lce.Token = scanner.NewToken("Call", c.Pos())
-					lce.Kind = p.Spec["#call"].Kind
+					lce.Kind = CallExpr
 				}
 			}
 			tcont := t.Content()
@@ -91,10 +93,10 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
 		}
 
 		// Process the end of an expression or a statement.
-		if t.Kind() == scanner.Separator {
+		if t.IsSeparator() {
 			if expr != nil && p.hasProp(root, Stmt) {
 				root.Child = append(root.Child, expr)
-				if p.hasBlockProp(expr, ExprSep) {
+				if p.hasProp(expr, ExprSep) {
 					roots = append(roots, root)
 					root = nil
 				}
@@ -129,7 +131,7 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
 		ep := p.Spec[expr.Content()].Order
 		cp := p.Spec[c.Content()].Order
 		a := expr
-		if c.unary {
+		if unaryOp[c] {
 			cp = 0
 		}
 		if cp != 0 {
@@ -143,7 +145,7 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
 				c1 := expr
 				for {
 					a = c1
-					if c1.unary {
+					if unaryOp[c1] {
 						c1, c = c, c1
 						a = c1
 						if c == expr {
@@ -155,7 +157,7 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
 						break
 					}
 					c1 = c1.Child[1]
-					if c1.Token.Kind() != scanner.Operator || c1.unary || cp > p.Spec[c1.Content()].Order {
+					if !c1.IsOperator() || unaryOp[c1] || cp > p.Spec[c1.Content()].Order {
 						break
 					}
 				}
@@ -194,9 +196,7 @@ func (p *Parser) isExprSep(n *Node) bool          { return p.Spec[n.Content()].F
 func (p *Parser) isExpr(n *Node) bool             { return !p.isStatement(n) && !p.isExprSep(n) }
 func (p *Parser) isSep(n *Node) bool              { return n.Token.Kind() == scanner.Separator }
 func (p *Parser) IsBlock(n *Node) bool            { return n.Token.Kind() == scanner.Block }
-func (p *Parser) hasBlockProp(n *Node, prop uint) bool {
-	return p.Spec[n.Content()[:n.Start()]].Flags&prop != 0
-}
+
 func (p *Parser) precedenceToken(t scanner.Token) int {
 	s := t.Content()
 	if l := t.Start(); l > 0 {
diff --git a/parser/parse_test.go b/parser/parse_test.go
index 3bf6955..838e1c8 100644
--- a/parser/parse_test.go
+++ b/parser/parse_test.go
@@ -57,19 +57,17 @@ var GoParser = &Parser{
 		"<":      {InfOp, 0, 6},
 		":=":     {DefOp, 0, 7},
 		"=":      {AssignOp, 0, 7},
-		"#Call":  {CallExpr, 0, 0},
 		"if":     {IfStmt, Stmt | ExprSep, 0},
 		"func":   {FuncDecl, Decl | Call, 0},
 		"return": {ReturnStmt, Stmt, 0},
 		"{..}":   {StmtBloc, ExprSep, 0},
-		"{":      {StmtBloc, ExprSep, 0},
-		"(":      {ParBloc, Call, 0},
 		"(..)":   {ParBloc, Call, 0},
 	},
 }
 
 func TestParse(t *testing.T) {
 	for _, test := range goTests {
+		test := test
 		t.Run("", func(t *testing.T) {
 			var err error
 			errStr := ""
diff --git a/parser/readme.md b/parser/readme.md
new file mode 100644
index 0000000..19d8778
--- /dev/null
+++ b/parser/readme.md
@@ -0,0 +1,60 @@
+# Parser
+
+A parser takes an array of tokens (produced by the scanner) in input and
+returns a node representing a syntax tree. A node is an object
+containing a kind, the corresponding token and the ordered references to
+descendent nodes.
+
+A goal is to make the parser generic enough so it can generate syntax
+trees for most of existing programming languages (no claim of generality
+yet), provided a small set of generating rules per language, and a small
+set of validating rules (yet to be defined) to detect invalid
+constructs.
+
+The input tokens are particular in the sense that they include classical
+lexical items such as words, separators, numbers, but also strings and
+nested blocks, which are resolved at scanning stage rather than parsing
+stage. See the scanner for more details.
+
+The language specification includes the following:
+
+- a scanner specification, to produce the set of possible tokens.
+- a map of node specification per token name. The node specification
+  defines some parameters influing how the tree is generated.
+
+## Development status
+
+A successful test must be provided to check the status.
+
+- [x] binary operator expressions
+- [x] unary operator (prefix) expressions
+- [ ] unary operator (suffix) expressions
+- [x] operator precedence rules
+- [x] parenthesis in expressions
+- [ ] semi-colon automatic insertion rules
+- [x] call expressions
+- [ ] nested calls
+- [x] index expressions
+- [x] single assignments
+- [ ] multi assignments
+- [x] simple `if` statement (no `else`)
+- [ ] full `if` statement (including `else`, `else if`)
+- [x] init expressions in `if` statements
+- [x] statement blocks
+- [ ] comments
+- [ ] for statement
+- [ ] switch statement
+- [ ] select statement
+- [x] return statement
+- [x] function declaration
+- [ ] method declaration 
+- [ ] anonymous functions (closures)
+- [ ] type declaration
+- [ ] var, const, type single declaration
+- [ ] var, const, type multi declaration
+- [ ] type parametric expressions
+- [x] literal numbers (see scanner)
+- [x] literal strings 
+- [ ] composite literal
+- [ ] import statements
+- [ ] go.mod syntax
diff --git a/samples/add b/samples/add
new file mode 100644
index 0000000..a403485
--- /dev/null
+++ b/samples/add
@@ -0,0 +1,2 @@
+func add(a int, b int) int { return a + b }
+add(4, 3)
diff --git a/samples/fib b/samples/fib
index 0ff570c..03545d3 100644
--- a/samples/fib
+++ b/samples/fib
@@ -3,4 +3,4 @@ func fib(i int) int {
 	return fib(i-2) + fib(i-1)
 }
 
-println(fib(20))
+println(fib(5))
diff --git a/scanner/readme.md b/scanner/readme.md
new file mode 100644
index 0000000..b8b31fb
--- /dev/null
+++ b/scanner/readme.md
@@ -0,0 +1,42 @@
+# Scanner
+
+A scanner takes a string in input and returns an array of tokens.
+
+Tokens can be of the following kinds:
+- identifier
+- number
+- operator
+- separator
+- string
+- block
+
+Resolving nested blocks in the scanner is making the parser simple
+and generic, without having to resort to parse tables.
+
+The lexical rules are provided by a language specification at language
+level which includes the following:
+
+- a set of composable properties (1 per bit, on an integer) for each
+  character in the ASCII range (where all separator, operators and
+  reserved keywords must be defined).
+- for each block or string, the specification of starting and ending
+  delimiter.
+
+## Development status
+
+A successful test must be provided to check the status.
+
+- [x] numbers starting with a digit
+- [ ] numbers starting otherwise
+- [x] unescaped strings (including multiline)
+- [x] escaped string (including multiline)
+- [x] separators (in UTF-8 range)
+- [ ] single line string (\n not allowed)
+- [x] identifiers (in UTF-8 range)
+- [x] operators, concatenated or not
+- [x] single character block/string delimiters
+- [x] arbitrarly nested blocks and strings
+- [ ] multiple characters block/string delimiters
+- [ ] blocks delimited by identifiers/operators/separators
+- [ ] blocks with delimiter inclusion/exclusion rules
+- [ ] blocks delimited by indentation level
diff --git a/scanner/scan.go b/scanner/scan.go
index 89d660e..066fc2a 100644
--- a/scanner/scan.go
+++ b/scanner/scan.go
@@ -46,14 +46,17 @@ type Token struct {
 	value   any
 }
 
-func (t *Token) Kind() Kind      { return t.kind }
-func (t *Token) Content() string { return t.content }
-func (t *Token) Start() int      { return t.start }
-func (t *Token) End() int        { return t.end }
-func (t *Token) Pos() int        { return t.pos }
-func (t *Token) Block() string   { return t.content[t.start : len(t.content)-t.end] }
-func (t *Token) Prefix() string  { return t.content[:t.start] }
-func (t *Token) Value() any      { return t.value }
+func (t *Token) Kind() Kind        { return t.kind }
+func (t *Token) Content() string   { return t.content }
+func (t *Token) Start() int        { return t.start }
+func (t *Token) End() int          { return t.end }
+func (t *Token) Pos() int          { return t.pos }
+func (t *Token) Block() string     { return t.content[t.start : len(t.content)-t.end] }
+func (t *Token) Prefix() string    { return t.content[:t.start] }
+func (t *Token) Value() any        { return t.value }
+func (t *Token) IsBlock() bool     { return t.kind == Block }
+func (t *Token) IsOperator() bool  { return t.kind == Operator }
+func (t *Token) IsSeparator() bool { return t.kind == Separator }
 
 func (t *Token) Name() string {
 	name := t.content
diff --git a/scanner/scan_test.go b/scanner/scan_test.go
index 6a54d8e..6be60a4 100644
--- a/scanner/scan_test.go
+++ b/scanner/scan_test.go
@@ -76,6 +76,7 @@ def"`, "[]", "1:1: block not terminated"},
 	}
 
 	for _, test := range tests {
+		test := test
 		t.Run("", func(t *testing.T) {
 			errStr := ""
 			token, err := GoScanner.Scan(test.src)
diff --git a/vm1/vm.go b/vm1/vm.go
index 152fba4..54e5223 100644
--- a/vm1/vm.go
+++ b/vm1/vm.go
@@ -9,45 +9,47 @@ import (
 // Byte-code instruction set.
 const (
 	// instruction effect on stack: values consumed -- values produced
-	Nop      = iota // --
-	Add             // n1 n2 -- sum ; sum = n1+n2
-	Assign          // addr val -- ; mem[addr] = val
-	Call            // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...)
-	CallX           // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = mem[f](a1, ...)
-	Dup             // addr -- value ; value = mem[addr]
-	Fdup            // addr -- value ; value = mem[addr]
-	Enter           // -- ; enter frame: push(fp), fp = sp
-	Exit            // -- ;
-	Jump            // -- ; ip += $1
-	JumpTrue        // cond -- ; if cond { ip += $1 }
-	Lower           // n1 n2 -- cond ; cond = n1 < n2
-	Loweri          // n1 -- cond ; cond = n1 < $1
-	Pop             // v --
-	Push            // -- v
-	Return          // [r1 .. ri] -- ; exit frame: sp = fp, fp = pop
-	Sub             // n1 n2 -- diff ; diff = n1 - n2
-	Subi            // n1 -- diff ; diff = n1 - $1
+	Nop       = iota // --
+	Add              // n1 n2 -- sum ; sum = n1+n2
+	Assign           // val -- ; mem[$1] = val
+	Call             // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...)
+	CallX            // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = mem[f](a1, ...)
+	Dup              // addr -- value ; value = mem[addr]
+	Fdup             // addr -- value ; value = mem[addr]
+	Enter            // -- ; enter frame: push(fp), fp = sp
+	Exit             // -- ;
+	Jump             // -- ; ip += $1
+	JumpTrue         // cond -- ; if cond { ip += $1 }
+	JumpFalse        // cond -- ; if cond { ip += $1 }
+	Lower            // n1 n2 -- cond ; cond = n1 < n2
+	Loweri           // n1 -- cond ; cond = n1 < $1
+	Pop              // v --
+	Push             // -- v
+	Return           // [r1 .. ri] -- ; exit frame: sp = fp, fp = pop
+	Sub              // n1 n2 -- diff ; diff = n1 - n2
+	Subi             // n1 -- diff ; diff = n1 - $1
 )
 
 var strop = [...]string{ // for VM tracing.
-	Nop:      "Nop",
-	Add:      "Add",
-	Assign:   "Assign",
-	Call:     "Call",
-	CallX:    "CallX",
-	Dup:      "Dup",
-	Fdup:     "Fdup",
-	Enter:    "Enter",
-	Exit:     "Exit",
-	Jump:     "Jump",
-	JumpTrue: "JumpTrue",
-	Lower:    "Lower",
-	Loweri:   "Loweri",
-	Pop:      "Pop",
-	Push:     "Push",
-	Return:   "Return",
-	Sub:      "Sub",
-	Subi:     "Subi",
+	Nop:       "Nop",
+	Add:       "Add",
+	Assign:    "Assign",
+	Call:      "Call",
+	CallX:     "CallX",
+	Dup:       "Dup",
+	Fdup:      "Fdup",
+	Enter:     "Enter",
+	Exit:      "Exit",
+	Jump:      "Jump",
+	JumpTrue:  "JumpTrue",
+	JumpFalse: "JumpFalse",
+	Lower:     "Lower",
+	Loweri:    "Loweri",
+	Pop:       "Pop",
+	Push:      "Push",
+	Return:    "Return",
+	Sub:       "Sub",
+	Subi:      "Subi",
 }
 
 // Machine represents a virtual machine.
@@ -69,7 +71,7 @@ func (m *Machine) Run() {
 		if len(code[ip]) > 2 {
 			op2 = strconv.Itoa(int(code[ip][2]))
 		}
-		fmt.Printf("ip:%-4d sp:%-4d fp:%-4d op:[%-8s %-4s] mem:%v\n", ip, sp, fp, strop[code[ip][1]], op2, mem)
+		fmt.Printf("ip:%-4d sp:%-4d fp:%-4d op:[%-9s %-4s] mem:%v\n", ip, sp, fp, strop[code[ip][1]], op2, mem)
 	}
 	_ = trace
 
@@ -81,7 +83,7 @@ func (m *Machine) Run() {
 			mem[sp-2] = mem[sp-2].(int) + mem[sp-1].(int)
 			mem = mem[:sp-1]
 		case Assign:
-			mem[mem[sp-2].(int)] = mem[sp-1]
+			mem[op[2]] = mem[sp-1]
 			mem = mem[:sp-1]
 		case Call:
 			mem = append(mem, ip+1)
@@ -117,6 +119,13 @@ func (m *Machine) Run() {
 				ip += int(op[2])
 				continue
 			}
+		case JumpFalse:
+			cond := mem[sp-1].(bool)
+			mem = mem[:sp-1]
+			if !cond {
+				ip += int(op[2])
+				continue
+			}
 		case Lower:
 			mem[sp-2] = mem[sp-2].(int) < mem[sp-1].(int)
 			mem = mem[:sp-1]
@@ -147,12 +156,13 @@ func (m *Machine) PushCode(code [][]int64) (p int) {
 	m.code = append(m.code, code...)
 	return
 }
+
 func (m *Machine) SetIP(ip int)       { m.ip = ip }
 func (m *Machine) Push(v any) (l int) { l = len(m.mem); m.mem = append(m.mem, v); return }
 func (m *Machine) Pop() (v any)       { l := len(m.mem) - 1; v = m.mem[l]; m.mem = m.mem[:l]; return }
 
-// Disas returns the code as a readable string.
-func Disas(code [][]int64) (asm string) {
+// Disassemble returns the code as a readable string.
+func Disassemble(code [][]int64) (asm string) {
 	for _, op := range code {
 		if len(op) > 2 {
 			asm += fmt.Sprintf("%s %d\n", strop[op[1]], op[2])
diff --git a/vm1/vm_test.go b/vm1/vm_test.go
index 1268bba..6a00808 100644
--- a/vm1/vm_test.go
+++ b/vm1/vm_test.go
@@ -25,6 +25,7 @@ func TestVM(t *testing.T) {
 
 func BenchmarkVM(b *testing.B) {
 	for _, test := range tests {
+		test := test
 		b.Run("", func(b *testing.B) {
 			for i := 0; i < b.N; i++ {
 				b.StopTimer()
author	Marc Vertes <marc.vertes@tendermint.com>	2023-08-09 11:47:39 +0200
committer	GitHub <noreply@github.com>	2023-08-09 11:47:39 +0200
commit	947873b34aabe46dfb9f8d06214736cb11b5a6b2 (patch)
tree	9fc4728cf39017ee0275d62a7578881cbb3073bb
parent	355750be61fbf4b90d132a9560e01113f22f4c38 (diff)