summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc Vertes <marc.vertes@tendermint.com>2023-08-09 11:47:39 +0200
committerGitHub <noreply@github.com>2023-08-09 11:47:39 +0200
commit947873b34aabe46dfb9f8d06214736cb11b5a6b2 (patch)
tree9fc4728cf39017ee0275d62a7578881cbb3073bb
parent355750be61fbf4b90d132a9560e01113f22f4c38 (diff)
codegen: add a bytecode generator (#5)
* codegen: add a bytecode generator * cleaning scanner, parser and vm1.
-rw-r--r--.gitignore2
-rw-r--r--cmd/gint/main.go24
-rw-r--r--codegen/codegen.go226
-rw-r--r--codegen/codegen_test.go51
-rw-r--r--lang/golang/go.go5
-rw-r--r--parser/node.go2
-rw-r--r--parser/parse.go50
-rw-r--r--parser/parse_test.go4
-rw-r--r--parser/readme.md60
-rw-r--r--samples/add2
-rw-r--r--samples/fib2
-rw-r--r--scanner/readme.md42
-rw-r--r--scanner/scan.go19
-rw-r--r--scanner/scan_test.go1
-rw-r--r--vm1/vm.go90
-rw-r--r--vm1/vm_test.go1
16 files changed, 488 insertions, 93 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..385d6a8
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.out
+*.test
diff --git a/cmd/gint/main.go b/cmd/gint/main.go
index b36eb5d..85fdba0 100644
--- a/cmd/gint/main.go
+++ b/cmd/gint/main.go
@@ -18,15 +18,20 @@ func main() {
if err != nil {
log.Fatal(err)
}
+ run := run0
if len(os.Args) > 1 {
- if err := run1(string(buf)); err != nil {
- log.Fatal(err)
- }
- } else {
- if err := run0(string(buf)); err != nil {
- log.Fatal(err)
+ v := "vm" + os.Args[1]
+ switch v {
+ case "vm0":
+ case "vm1":
+ run = run1
+ default:
+ log.Fatal("invalid argument", os.Args[1])
}
}
+ if err := run(string(buf)); err != nil {
+ log.Fatal(err)
+ }
}
func run0(src string) error {
@@ -46,8 +51,8 @@ func run0(src string) error {
func run1(src string) (err error) {
m := &vm1.Machine{}
- c := &codegen.Compiler{Symbols: map[string]int{}}
- c.AddSym(fmt.Println, "println")
+ c := codegen.New()
+ c.AddSym(fmt.Println, "println", false)
n := &parser.Node{}
if n.Child, err = golang.GoParser.Parse(src); err != nil {
return err
@@ -58,11 +63,12 @@ func run1(src string) (err error) {
}
c.Emit(n, vm1.Exit)
log.Println("data:", c.Data)
- log.Println("code:", vm1.Disas(c.Code))
+ log.Println("code:", vm1.Disassemble(c.Code))
for _, v := range c.Data {
m.Push(v)
}
m.PushCode(c.Code)
+ m.SetIP(c.Entry)
m.Run()
return
}
diff --git a/codegen/codegen.go b/codegen/codegen.go
new file mode 100644
index 0000000..048b20f
--- /dev/null
+++ b/codegen/codegen.go
@@ -0,0 +1,226 @@
+package codegen
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/gnolang/parscan/parser"
+ "github.com/gnolang/parscan/vm1"
+)
+
+type symbol struct {
+ index int // address of symbol in frame
+ local bool // if true address is relative to local frame, otherwise global
+}
+
+type Compiler struct {
+ Code [][]int64 // produced code, to fill VM with
+ Data []any // produced data, will be at the bottom of VM stack
+ Entry int
+
+ symbols map[string]symbol
+}
+
+func New() *Compiler { return &Compiler{symbols: map[string]symbol{}, Entry: -1} }
+
+type nodedata struct {
+ ipstart, ipend, symind, fsp int // CFG and symbol node annotations
+}
+
+func (c *Compiler) CodeGen(node *parser.Node) (err error) {
+ notes := map[*parser.Node]*nodedata{} // AST node annotations for CFG, symbols, ...
+ scope := ""
+ frameNode := []*parser.Node{node}
+ fnote := notes[node]
+
+ node.Walk2(nil, 0, func(n, a *parser.Node, k int) (ok bool) {
+ // Node pre-order processing callback.
+ notes[n] = &nodedata{}
+ nd := notes[n]
+
+ switch n.Kind {
+ case parser.FuncDecl:
+ fname := n.Child[0].Content()
+ i := c.Emit(n, vm1.Enter)
+ c.AddSym(i, scope+fname, false)
+ scope = pushScope(scope, fname)
+ frameNode = append(frameNode, n)
+ fnote = notes[n]
+ for j, child := range n.Child[1].Child {
+ vname := child.Content()
+ c.AddSym(-j-2, scope+vname, true)
+ fnote.fsp++
+ }
+
+ case parser.StmtBloc:
+ nd.ipstart = len(c.Code)
+ if a != nil && a.Kind == parser.IfStmt && k == 1 {
+ c.Emit(n, vm1.JumpFalse, 0) // location to be updated in post IfStmt
+ }
+ }
+ return true
+ }, func(n, a *parser.Node, k int) (ok bool) {
+ // Node post-order processing callback.
+ nd := notes[n]
+
+ switch n.Kind {
+ case parser.AddOp:
+ c.Emit(n, vm1.Add)
+
+ case parser.CallExpr:
+ if c.isExternalSymbol(n.Child[0].Content()) {
+ // External call, using absolute addr in symtable
+ c.Emit(n, vm1.CallX, int64(len(n.Child[1].Child)))
+ break
+ }
+ // Internal call is always relative to instruction pointer.
+ i, ok := c.symInt(n.Child[0].Content())
+ if !ok {
+ err = fmt.Errorf("invalid symbol %s", n.Child[0].Content())
+ }
+ c.Emit(n, vm1.Call, int64(i-len(c.Code)))
+
+ case parser.DefOp:
+ // Define operation, global vars only. TODO: on local frame too
+ l := c.AddSym(nil, n.Child[0].Content(), false)
+ c.Emit(n, vm1.Assign, int64(l))
+
+ case parser.FuncDecl:
+ scope = popScope(scope)
+ fnote = notes[frameNode[len(frameNode)-1]]
+
+ case parser.Ident:
+ ident := n.Content()
+ if len(n.Child) > 0 || a.Kind == parser.FuncDecl {
+ break
+ }
+ if s, _, ok := c.getSym(ident, scope); ok {
+ if s.local {
+ c.Emit(n, vm1.Fdup, int64(s.index))
+ } else if a != nil && a.Kind == parser.AssignOp {
+ c.Emit(n, vm1.Push, int64(s.index))
+ } else if c.isExternalSymbol(ident) {
+ c.Emit(n, vm1.Dup, int64(s.index))
+ }
+ }
+
+ case parser.IfStmt:
+ ifBodyStart := notes[n.Child[1]].ipstart
+ ifBodyEnd := notes[n.Child[1]].ipend
+ c.Code[ifBodyStart][2] = int64(ifBodyEnd - ifBodyStart)
+ // TODO: handle 'else'
+
+ case parser.NumberLit:
+ // A literal number can be a float or an integer, or a big number
+ switch v := n.Value().(type) {
+ case int64:
+ c.Emit(n, vm1.Push, v)
+ case error:
+ err = v
+ return false
+ default:
+ err = fmt.Errorf("type not supported: %T\n", v)
+ return false
+ }
+
+ case parser.ReturnStmt:
+ c.Emit(n, vm1.Return, int64(len(n.Child)))
+
+ case parser.StmtBloc:
+ nd.ipend = len(c.Code)
+
+ case parser.StringLit:
+ p := len(c.Data)
+ c.Data = append(c.Data, n.Block())
+ c.Emit(n, vm1.Dup, int64(p))
+
+ case parser.InfOp:
+ c.Emit(n, vm1.Lower)
+
+ case parser.SubOp:
+ c.Emit(n, vm1.Sub)
+ }
+
+ // TODO: Fix this temporary hack to compute an entry point
+ if c.Entry < 0 && len(scope) == 0 && n.Kind != parser.FuncDecl {
+ c.Entry = len(c.Code) - 1
+ if c.Code[c.Entry][1] == vm1.Return {
+ c.Entry++
+ }
+ }
+ return true
+ })
+ return
+}
+
+func (c *Compiler) AddSym(v any, name string, local bool) int {
+ l := len(c.Data)
+ if local {
+ l = v.(int)
+ } else {
+ c.Data = append(c.Data, v)
+ }
+ c.symbols[name] = symbol{index: l, local: local}
+ return l
+}
+
+func (c *Compiler) Emit(n *parser.Node, op ...int64) int {
+ op = append([]int64{int64(n.Pos())}, op...)
+ l := len(c.Code)
+ c.Code = append(c.Code, op)
+ return l
+}
+
+func (c *Compiler) isExternalSymbol(name string) bool {
+ s, ok := c.symbols[name]
+ if !ok {
+ return false
+ }
+ _, isInt := c.Data[s.index].(int)
+ return !isInt
+}
+
+func (c *Compiler) symInt(name string) (int, bool) {
+ s, ok := c.symbols[name]
+ if !ok {
+ return 0, false
+ }
+ j, ok := c.Data[s.index].(int)
+ if !ok {
+ return 0, false
+ }
+ return j, true
+}
+
+func pushScope(scope, name string) string {
+ return strings.TrimPrefix(scope+"/"+name+"/", "/")
+}
+
+func popScope(scope string) string {
+ scope = strings.TrimSuffix(scope, "/")
+ j := strings.LastIndex(scope, "/")
+ if j == -1 {
+ return ""
+ }
+ return scope[:j]
+}
+
+// getSym searches for an existing symbol starting from the deepest scope.
+func (c *Compiler) getSym(name, scope string) (sym symbol, sc string, ok bool) {
+ for {
+ if sym, ok = c.symbols[scope+name]; ok {
+ return sym, scope, ok
+ }
+ scope = strings.TrimSuffix(scope, "/")
+ i := strings.LastIndex(scope, "/")
+ if i == -1 {
+ scope = ""
+ break
+ }
+ if scope = scope[:i]; scope == "" {
+ break
+ }
+ }
+ sym, ok = c.symbols[name]
+ return sym, scope, ok
+}
diff --git a/codegen/codegen_test.go b/codegen/codegen_test.go
new file mode 100644
index 0000000..d2f8033
--- /dev/null
+++ b/codegen/codegen_test.go
@@ -0,0 +1,51 @@
+package codegen
+
+import (
+ "fmt"
+ "log"
+ "testing"
+
+ "github.com/gnolang/parscan/lang/golang"
+ "github.com/gnolang/parscan/parser"
+ "github.com/gnolang/parscan/vm1"
+)
+
+func TestCodeGen(t *testing.T) {
+ log.SetFlags(log.Lshortfile)
+ for _, test := range tests {
+ test := test
+ t.Run("", func(t *testing.T) {
+ c := New()
+ c.AddSym(fmt.Println, "println")
+ n := &parser.Node{}
+ var err error
+ if n.Child, err = golang.GoParser.Parse(test.src); err != nil {
+ t.Error(err)
+ }
+ errStr := ""
+ if err = c.CodeGen(n); err != nil {
+ errStr = err.Error()
+ }
+ if errStr != test.err {
+ t.Errorf("got error %#v, want error %#v", errStr, test.err)
+ }
+ t.Log("data:", c.Data)
+ t.Log("code:", vm1.Disassemble(c.Code))
+ })
+ }
+}
+
+var tests = []struct {
+ src, asm, sym, err string
+}{{ // #00
+ src: "1+2",
+ asm: "Push 1\nPush 2\nAdd\n",
+}, { // #01
+ src: `println("Hello")`,
+ asm: "Dup 0\nDup 1\nCallX 1\n",
+}, { // #02
+ src: `a := 2; println(a)`,
+ asm: "Push 2\nAssign 1\nDup 0\nDup 1\nCallX 1",
+}, { // #03
+ src: `a := 2; if a < 3 {println(a)}; println("bye")`,
+}}
diff --git a/lang/golang/go.go b/lang/golang/go.go
index 49b12d8..1517689 100644
--- a/lang/golang/go.go
+++ b/lang/golang/go.go
@@ -55,16 +55,11 @@ var GoParser = &parser.Parser{
"<": {parser.InfOp, 0, 6},
":=": {parser.DefOp, 0, 7},
"=": {parser.AssignOp, 0, 7},
- "#call": {parser.CallExpr, 0, 0},
- "#id": {parser.Ident, 0, 0},
- "#num": {parser.NumberLit, 0, 0},
"if": {parser.IfStmt, parser.Stmt | parser.ExprSep, 0},
"func": {parser.FuncDecl, parser.Decl | parser.Call, 0},
"return": {parser.ReturnStmt, parser.Stmt, 0},
"{..}": {parser.StmtBloc, parser.ExprSep, 0},
- "{": {parser.StmtBloc, parser.ExprSep, 0}, // FIXME: redundant with above
"(..)": {parser.ParBloc, parser.Call, 0},
- "(": {parser.ParBloc, parser.Call, 0}, // FIXME: redundant with above
`".."`: {parser.StringLit, 0, 0},
},
}
diff --git a/parser/node.go b/parser/node.go
index f81285a..d2f13ef 100644
--- a/parser/node.go
+++ b/parser/node.go
@@ -6,8 +6,6 @@ type Node struct {
Child []*Node // sub-tree nodes
scanner.Token // token at origin of the node
Kind // Node kind, depends on the language spec
-
- unary bool // TODO(marc): get rid of it.
}
// TODO: remove it in favor of Walk2
diff --git a/parser/parse.go b/parser/parse.go
index cfbff27..4f186bd 100644
--- a/parser/parse.go
+++ b/parser/parse.go
@@ -1,8 +1,6 @@
package parser
-import (
- "github.com/gnolang/parscan/scanner"
-)
+import "github.com/gnolang/parscan/scanner"
const (
Stmt = 1 << iota
@@ -31,25 +29,29 @@ func (p *Parser) Parse(src string) (n []*Node, err error) {
return p.ParseTokens(tokens)
}
-func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
+func (p *Parser) ParseTokens(tokens []scanner.Token) (roots []*Node, err error) {
// TODO: error handling.
- var root *Node // current root node
- var expr *Node // current expression root node
- var prev, c *Node
- var lce *Node
- for i, t := range token {
+ var root *Node // current root node
+ var expr *Node // current expression root node
+ var prev, c *Node // previous and current nodes
+ var lce *Node // last complete expression node
+ unaryOp := map[*Node]bool{} // unaryOp indicates if a node is an unary operator.
+
+ for i, t := range tokens {
prev = c
c = &Node{
Token: t,
Kind: p.Spec[t.Name()].Kind,
- unary: t.Kind() == scanner.Operator && (i == 0 || token[i-1].Kind() == scanner.Operator),
}
- if c.Kind == 0 {
+ if t.IsOperator() && (i == 0 || tokens[i-1].IsOperator()) {
+ unaryOp[c] = true
+ }
+ if c.Kind == Undefined {
switch t.Kind() {
case scanner.Number:
- c.Kind = p.Spec["#num"].Kind
+ c.Kind = NumberLit
case scanner.Identifier:
- c.Kind = p.Spec["#id"].Kind
+ c.Kind = Ident
}
}
@@ -65,20 +67,20 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
continue
}
- if t.Kind() == scanner.Block {
+ if t.IsBlock() {
if expr != nil {
- if p.hasBlockProp(c, ExprSep) && p.isExprSep(root) {
+ if p.hasProp(c, ExprSep) && p.isExprSep(root) {
// A bracket block may end a previous expression.
root.Child = append(root.Child, expr)
expr = nil
- } else if p.hasBlockProp(c, Call) && !p.hasProp(root, Decl) && p.canCallToken(token[i-1]) {
+ } else if p.hasProp(c, Call) && !p.hasProp(root, Decl) && p.canCallToken(tokens[i-1]) {
// Handle (possibly nested) call expressions.
if lce == nil || lce != expr { // TODO(marc): not general, fix it.
lce = prev
}
lce.Child = []*Node{{Token: lce.Token, Child: lce.Child, Kind: lce.Kind}}
lce.Token = scanner.NewToken("Call", c.Pos())
- lce.Kind = p.Spec["#call"].Kind
+ lce.Kind = CallExpr
}
}
tcont := t.Content()
@@ -91,10 +93,10 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
}
// Process the end of an expression or a statement.
- if t.Kind() == scanner.Separator {
+ if t.IsSeparator() {
if expr != nil && p.hasProp(root, Stmt) {
root.Child = append(root.Child, expr)
- if p.hasBlockProp(expr, ExprSep) {
+ if p.hasProp(expr, ExprSep) {
roots = append(roots, root)
root = nil
}
@@ -129,7 +131,7 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
ep := p.Spec[expr.Content()].Order
cp := p.Spec[c.Content()].Order
a := expr
- if c.unary {
+ if unaryOp[c] {
cp = 0
}
if cp != 0 {
@@ -143,7 +145,7 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
c1 := expr
for {
a = c1
- if c1.unary {
+ if unaryOp[c1] {
c1, c = c, c1
a = c1
if c == expr {
@@ -155,7 +157,7 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
break
}
c1 = c1.Child[1]
- if c1.Token.Kind() != scanner.Operator || c1.unary || cp > p.Spec[c1.Content()].Order {
+ if !c1.IsOperator() || unaryOp[c1] || cp > p.Spec[c1.Content()].Order {
break
}
}
@@ -194,9 +196,7 @@ func (p *Parser) isExprSep(n *Node) bool { return p.Spec[n.Content()].F
func (p *Parser) isExpr(n *Node) bool { return !p.isStatement(n) && !p.isExprSep(n) }
func (p *Parser) isSep(n *Node) bool { return n.Token.Kind() == scanner.Separator }
func (p *Parser) IsBlock(n *Node) bool { return n.Token.Kind() == scanner.Block }
-func (p *Parser) hasBlockProp(n *Node, prop uint) bool {
- return p.Spec[n.Content()[:n.Start()]].Flags&prop != 0
-}
+
func (p *Parser) precedenceToken(t scanner.Token) int {
s := t.Content()
if l := t.Start(); l > 0 {
diff --git a/parser/parse_test.go b/parser/parse_test.go
index 3bf6955..838e1c8 100644
--- a/parser/parse_test.go
+++ b/parser/parse_test.go
@@ -57,19 +57,17 @@ var GoParser = &Parser{
"<": {InfOp, 0, 6},
":=": {DefOp, 0, 7},
"=": {AssignOp, 0, 7},
- "#Call": {CallExpr, 0, 0},
"if": {IfStmt, Stmt | ExprSep, 0},
"func": {FuncDecl, Decl | Call, 0},
"return": {ReturnStmt, Stmt, 0},
"{..}": {StmtBloc, ExprSep, 0},
- "{": {StmtBloc, ExprSep, 0},
- "(": {ParBloc, Call, 0},
"(..)": {ParBloc, Call, 0},
},
}
func TestParse(t *testing.T) {
for _, test := range goTests {
+ test := test
t.Run("", func(t *testing.T) {
var err error
errStr := ""
diff --git a/parser/readme.md b/parser/readme.md
new file mode 100644
index 0000000..19d8778
--- /dev/null
+++ b/parser/readme.md
@@ -0,0 +1,60 @@
+# Parser
+
+A parser takes an array of tokens (produced by the scanner) in input and
+returns a node representing a syntax tree. A node is an object
+containing a kind, the corresponding token and the ordered references to
+descendent nodes.
+
+A goal is to make the parser generic enough so it can generate syntax
+trees for most of existing programming languages (no claim of generality
+yet), provided a small set of generating rules per language, and a small
+set of validating rules (yet to be defined) to detect invalid
+constructs.
+
+The input tokens are particular in the sense that they include classical
+lexical items such as words, separators, numbers, but also strings and
+nested blocks, which are resolved at scanning stage rather than parsing
+stage. See the scanner for more details.
+
+The language specification includes the following:
+
+- a scanner specification, to produce the set of possible tokens.
+- a map of node specification per token name. The node specification
+ defines some parameters influing how the tree is generated.
+
+## Development status
+
+A successful test must be provided to check the status.
+
+- [x] binary operator expressions
+- [x] unary operator (prefix) expressions
+- [ ] unary operator (suffix) expressions
+- [x] operator precedence rules
+- [x] parenthesis in expressions
+- [ ] semi-colon automatic insertion rules
+- [x] call expressions
+- [ ] nested calls
+- [x] index expressions
+- [x] single assignments
+- [ ] multi assignments
+- [x] simple `if` statement (no `else`)
+- [ ] full `if` statement (including `else`, `else if`)
+- [x] init expressions in `if` statements
+- [x] statement blocks
+- [ ] comments
+- [ ] for statement
+- [ ] switch statement
+- [ ] select statement
+- [x] return statement
+- [x] function declaration
+- [ ] method declaration
+- [ ] anonymous functions (closures)
+- [ ] type declaration
+- [ ] var, const, type single declaration
+- [ ] var, const, type multi declaration
+- [ ] type parametric expressions
+- [x] literal numbers (see scanner)
+- [x] literal strings
+- [ ] composite literal
+- [ ] import statements
+- [ ] go.mod syntax
diff --git a/samples/add b/samples/add
new file mode 100644
index 0000000..a403485
--- /dev/null
+++ b/samples/add
@@ -0,0 +1,2 @@
+func add(a int, b int) int { return a + b }
+add(4, 3)
diff --git a/samples/fib b/samples/fib
index 0ff570c..03545d3 100644
--- a/samples/fib
+++ b/samples/fib
@@ -3,4 +3,4 @@ func fib(i int) int {
return fib(i-2) + fib(i-1)
}
-println(fib(20))
+println(fib(5))
diff --git a/scanner/readme.md b/scanner/readme.md
new file mode 100644
index 0000000..b8b31fb
--- /dev/null
+++ b/scanner/readme.md
@@ -0,0 +1,42 @@
+# Scanner
+
+A scanner takes a string in input and returns an array of tokens.
+
+Tokens can be of the following kinds:
+- identifier
+- number
+- operator
+- separator
+- string
+- block
+
+Resolving nested blocks in the scanner is making the parser simple
+and generic, without having to resort to parse tables.
+
+The lexical rules are provided by a language specification at language
+level which includes the following:
+
+- a set of composable properties (1 per bit, on an integer) for each
+ character in the ASCII range (where all separator, operators and
+ reserved keywords must be defined).
+- for each block or string, the specification of starting and ending
+ delimiter.
+
+## Development status
+
+A successful test must be provided to check the status.
+
+- [x] numbers starting with a digit
+- [ ] numbers starting otherwise
+- [x] unescaped strings (including multiline)
+- [x] escaped string (including multiline)
+- [x] separators (in UTF-8 range)
+- [ ] single line string (\n not allowed)
+- [x] identifiers (in UTF-8 range)
+- [x] operators, concatenated or not
+- [x] single character block/string delimiters
+- [x] arbitrarly nested blocks and strings
+- [ ] multiple characters block/string delimiters
+- [ ] blocks delimited by identifiers/operators/separators
+- [ ] blocks with delimiter inclusion/exclusion rules
+- [ ] blocks delimited by indentation level
diff --git a/scanner/scan.go b/scanner/scan.go
index 89d660e..066fc2a 100644
--- a/scanner/scan.go
+++ b/scanner/scan.go
@@ -46,14 +46,17 @@ type Token struct {
value any
}
-func (t *Token) Kind() Kind { return t.kind }
-func (t *Token) Content() string { return t.content }
-func (t *Token) Start() int { return t.start }
-func (t *Token) End() int { return t.end }
-func (t *Token) Pos() int { return t.pos }
-func (t *Token) Block() string { return t.content[t.start : len(t.content)-t.end] }
-func (t *Token) Prefix() string { return t.content[:t.start] }
-func (t *Token) Value() any { return t.value }
+func (t *Token) Kind() Kind { return t.kind }
+func (t *Token) Content() string { return t.content }
+func (t *Token) Start() int { return t.start }
+func (t *Token) End() int { return t.end }
+func (t *Token) Pos() int { return t.pos }
+func (t *Token) Block() string { return t.content[t.start : len(t.content)-t.end] }
+func (t *Token) Prefix() string { return t.content[:t.start] }
+func (t *Token) Value() any { return t.value }
+func (t *Token) IsBlock() bool { return t.kind == Block }
+func (t *Token) IsOperator() bool { return t.kind == Operator }
+func (t *Token) IsSeparator() bool { return t.kind == Separator }
func (t *Token) Name() string {
name := t.content
diff --git a/scanner/scan_test.go b/scanner/scan_test.go
index 6a54d8e..6be60a4 100644
--- a/scanner/scan_test.go
+++ b/scanner/scan_test.go
@@ -76,6 +76,7 @@ def"`, "[]", "1:1: block not terminated"},
}
for _, test := range tests {
+ test := test
t.Run("", func(t *testing.T) {
errStr := ""
token, err := GoScanner.Scan(test.src)
diff --git a/vm1/vm.go b/vm1/vm.go
index 152fba4..54e5223 100644
--- a/vm1/vm.go
+++ b/vm1/vm.go
@@ -9,45 +9,47 @@ import (
// Byte-code instruction set.
const (
// instruction effect on stack: values consumed -- values produced
- Nop = iota // --
- Add // n1 n2 -- sum ; sum = n1+n2
- Assign // addr val -- ; mem[addr] = val
- Call // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...)
- CallX // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = mem[f](a1, ...)
- Dup // addr -- value ; value = mem[addr]
- Fdup // addr -- value ; value = mem[addr]
- Enter // -- ; enter frame: push(fp), fp = sp
- Exit // -- ;
- Jump // -- ; ip += $1
- JumpTrue // cond -- ; if cond { ip += $1 }
- Lower // n1 n2 -- cond ; cond = n1 < n2
- Loweri // n1 -- cond ; cond = n1 < $1
- Pop // v --
- Push // -- v
- Return // [r1 .. ri] -- ; exit frame: sp = fp, fp = pop
- Sub // n1 n2 -- diff ; diff = n1 - n2
- Subi // n1 -- diff ; diff = n1 - $1
+ Nop = iota // --
+ Add // n1 n2 -- sum ; sum = n1+n2
+ Assign // val -- ; mem[$1] = val
+ Call // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...)
+ CallX // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = mem[f](a1, ...)
+ Dup // addr -- value ; value = mem[addr]
+ Fdup // addr -- value ; value = mem[addr]
+ Enter // -- ; enter frame: push(fp), fp = sp
+ Exit // -- ;
+ Jump // -- ; ip += $1
+ JumpTrue // cond -- ; if cond { ip += $1 }
+ JumpFalse // cond -- ; if cond { ip += $1 }
+ Lower // n1 n2 -- cond ; cond = n1 < n2
+ Loweri // n1 -- cond ; cond = n1 < $1
+ Pop // v --
+ Push // -- v
+ Return // [r1 .. ri] -- ; exit frame: sp = fp, fp = pop
+ Sub // n1 n2 -- diff ; diff = n1 - n2
+ Subi // n1 -- diff ; diff = n1 - $1
)
var strop = [...]string{ // for VM tracing.
- Nop: "Nop",
- Add: "Add",
- Assign: "Assign",
- Call: "Call",
- CallX: "CallX",
- Dup: "Dup",
- Fdup: "Fdup",
- Enter: "Enter",
- Exit: "Exit",
- Jump: "Jump",
- JumpTrue: "JumpTrue",
- Lower: "Lower",
- Loweri: "Loweri",
- Pop: "Pop",
- Push: "Push",
- Return: "Return",
- Sub: "Sub",
- Subi: "Subi",
+ Nop: "Nop",
+ Add: "Add",
+ Assign: "Assign",
+ Call: "Call",
+ CallX: "CallX",
+ Dup: "Dup",
+ Fdup: "Fdup",
+ Enter: "Enter",
+ Exit: "Exit",
+ Jump: "Jump",
+ JumpTrue: "JumpTrue",
+ JumpFalse: "JumpFalse",
+ Lower: "Lower",
+ Loweri: "Loweri",
+ Pop: "Pop",
+ Push: "Push",
+ Return: "Return",
+ Sub: "Sub",
+ Subi: "Subi",
}
// Machine represents a virtual machine.
@@ -69,7 +71,7 @@ func (m *Machine) Run() {
if len(code[ip]) > 2 {
op2 = strconv.Itoa(int(code[ip][2]))
}
- fmt.Printf("ip:%-4d sp:%-4d fp:%-4d op:[%-8s %-4s] mem:%v\n", ip, sp, fp, strop[code[ip][1]], op2, mem)
+ fmt.Printf("ip:%-4d sp:%-4d fp:%-4d op:[%-9s %-4s] mem:%v\n", ip, sp, fp, strop[code[ip][1]], op2, mem)
}
_ = trace
@@ -81,7 +83,7 @@ func (m *Machine) Run() {
mem[sp-2] = mem[sp-2].(int) + mem[sp-1].(int)
mem = mem[:sp-1]
case Assign:
- mem[mem[sp-2].(int)] = mem[sp-1]
+ mem[op[2]] = mem[sp-1]
mem = mem[:sp-1]
case Call:
mem = append(mem, ip+1)
@@ -117,6 +119,13 @@ func (m *Machine) Run() {
ip += int(op[2])
continue
}
+ case JumpFalse:
+ cond := mem[sp-1].(bool)
+ mem = mem[:sp-1]
+ if !cond {
+ ip += int(op[2])
+ continue
+ }
case Lower:
mem[sp-2] = mem[sp-2].(int) < mem[sp-1].(int)
mem = mem[:sp-1]
@@ -147,12 +156,13 @@ func (m *Machine) PushCode(code [][]int64) (p int) {
m.code = append(m.code, code...)
return
}
+
func (m *Machine) SetIP(ip int) { m.ip = ip }
func (m *Machine) Push(v any) (l int) { l = len(m.mem); m.mem = append(m.mem, v); return }
func (m *Machine) Pop() (v any) { l := len(m.mem) - 1; v = m.mem[l]; m.mem = m.mem[:l]; return }
-// Disas returns the code as a readable string.
-func Disas(code [][]int64) (asm string) {
+// Disassemble returns the code as a readable string.
+func Disassemble(code [][]int64) (asm string) {
for _, op := range code {
if len(op) > 2 {
asm += fmt.Sprintf("%s %d\n", strop[op[1]], op[2])
diff --git a/vm1/vm_test.go b/vm1/vm_test.go
index 1268bba..6a00808 100644
--- a/vm1/vm_test.go
+++ b/vm1/vm_test.go
@@ -25,6 +25,7 @@ func TestVM(t *testing.T) {
func BenchmarkVM(b *testing.B) {
for _, test := range tests {
+ test := test
b.Run("", func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()