summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--_samples/add (renamed from testdata/add)0
-rw-r--r--_samples/fib (renamed from testdata/fib)0
-rw-r--r--_samples/p00 (renamed from testdata/p00)0
-rw-r--r--_samples/p01 (renamed from testdata/p01)0
-rw-r--r--_samples/p02 (renamed from testdata/p02)0
-rw-r--r--_samples/p03 (renamed from testdata/p03)0
-rw-r--r--_samples/p04 (renamed from testdata/p04)0
-rw-r--r--_samples/p05 (renamed from testdata/p05)0
-rw-r--r--_samples/p06 (renamed from testdata/p06)0
-rw-r--r--_samples/p07 (renamed from testdata/p07)0
-rw-r--r--codegen/compiler.go242
-rw-r--r--codegen/compiler_test.go58
-rw-r--r--codegen/expression.go41
-rw-r--r--codegen/interpreter_test.go48
-rw-r--r--lang/golang/go.go183
-rw-r--r--lang/spec.go43
-rw-r--r--lang/token.go110
-rw-r--r--main.go23
-rw-r--r--parser/README.md157
-rw-r--r--parser/compiler.go250
-rw-r--r--parser/dot.go89
-rw-r--r--parser/interpreter.go (renamed from codegen/interpreter.go)35
-rw-r--r--parser/interpreter_test.go66
-rw-r--r--parser/kind.go56
-rw-r--r--parser/node.go50
-rw-r--r--parser/parse.go519
-rw-r--r--parser/parse_test.go257
-rw-r--r--parser/symbol.go67
-rw-r--r--parser/type.go117
-rw-r--r--scanner/scan.go240
-rw-r--r--scanner/scan_test.go101
-rw-r--r--vm/README.md (renamed from vm1/README.md)8
-rw-r--r--vm/vm.go (renamed from vm1/vm.go)69
-rw-r--r--vm/vm_test.go (renamed from vm1/vm_test.go)83
-rw-r--r--vm0/README.md27
-rw-r--r--vm0/func.go75
-rw-r--r--vm0/vm.go157
-rw-r--r--vm0/vm_test.go26
39 files changed, 1424 insertions, 1775 deletions
diff --git a/.gitignore b/.gitignore
index 9529633..e2bf6fd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
*.out
*.test
-gint
+parscan
diff --git a/testdata/add b/_samples/add
index a403485..a403485 100644
--- a/testdata/add
+++ b/_samples/add
diff --git a/testdata/fib b/_samples/fib
index 654c5c0..654c5c0 100644
--- a/testdata/fib
+++ b/_samples/fib
diff --git a/testdata/p00 b/_samples/p00
index 19f5084..19f5084 100644
--- a/testdata/p00
+++ b/_samples/p00
diff --git a/testdata/p01 b/_samples/p01
index baafaa9..baafaa9 100644
--- a/testdata/p01
+++ b/_samples/p01
diff --git a/testdata/p02 b/_samples/p02
index 1aeb4a9..1aeb4a9 100644
--- a/testdata/p02
+++ b/_samples/p02
diff --git a/testdata/p03 b/_samples/p03
index 39a3cda..39a3cda 100644
--- a/testdata/p03
+++ b/_samples/p03
diff --git a/testdata/p04 b/_samples/p04
index f2a85c8..f2a85c8 100644
--- a/testdata/p04
+++ b/_samples/p04
diff --git a/testdata/p05 b/_samples/p05
index 51c2c9b..51c2c9b 100644
--- a/testdata/p05
+++ b/_samples/p05
diff --git a/testdata/p06 b/_samples/p06
index 88029cc..88029cc 100644
--- a/testdata/p06
+++ b/_samples/p06
diff --git a/testdata/p07 b/_samples/p07
index 2fa7ee0..2fa7ee0 100644
--- a/testdata/p07
+++ b/_samples/p07
diff --git a/codegen/compiler.go b/codegen/compiler.go
deleted file mode 100644
index 02b67cb..0000000
--- a/codegen/compiler.go
+++ /dev/null
@@ -1,242 +0,0 @@
-package codegen
-
-import (
- "fmt"
- "log"
- "strings"
-
- "github.com/gnolang/parscan/parser"
- "github.com/gnolang/parscan/vm1"
-)
-
-type symbol struct {
- index int // address of symbol in frame
- local bool // if true address is relative to local frame, otherwise global
- node *parser.Node // symbol definition in AST
-}
-
-type Compiler struct {
- Code [][]int64 // produced code, to fill VM with
- Data []any // produced data, will be at the bottom of VM stack
- Entry int // offset in Code to start execution from (skip function defintions)
-
- symbols map[string]*symbol
-}
-
-func NewCompiler() *Compiler { return &Compiler{symbols: map[string]*symbol{}, Entry: -1} }
-
-type nodedata struct {
- ipstart, ipend, fsp int // CFG and symbol node annotations
-}
-
-type extNode struct {
- *Compiler
- *parser.Node
- anc *parser.Node // node ancestor
- rank int // node rank in ancestor's children
-}
-
-func (c *Compiler) CodeGen(node *parser.Node) (err error) {
- notes := map[*parser.Node]*nodedata{} // AST node annotations for CFG, symbols, ...
- scope := ""
- frameNode := []*parser.Node{node}
- fnote := notes[node]
-
- node.Walk2(nil, 0, func(n, a *parser.Node, k int) (ok bool) {
- // Node pre-order processing callback.
- notes[n] = &nodedata{}
- nd := notes[n]
-
- switch n.Kind {
- case parser.DeclFunc:
- fname := n.Child[0].Content()
- c.addSym(len(c.Code), scope+fname, false, n)
- scope = pushScope(scope, fname)
- frameNode = append(frameNode, n)
- fnote = notes[n]
- for j, child := range n.Child[1].Child {
- vname := child.Content()
- c.addSym(-j-2, scope+vname, true, child)
- fnote.fsp++
- }
-
- case parser.BlockStmt:
- nd.ipstart = len(c.Code)
- if a != nil && a.Kind == parser.StmtIf && k == 1 {
- c.Emit(n, vm1.JumpFalse, 0) // location to be updated in post IfStmt
- }
- }
- return true
- }, func(n, a *parser.Node, k int) (ok bool) {
- // Node post-order processing callback.
- nd := notes[n]
- x := extNode{c, n, a, k}
-
- switch n.Kind {
- case parser.OpAdd:
- c.Emit(n, vm1.Add)
-
- case parser.ExprCall:
- err = postCallExpr(x)
-
- case parser.OpDefine:
- // Define operation, global vars only. TODO: on local frame too
- l := c.addSym(nil, n.Child[0].Content(), false, n)
- c.Emit(n, vm1.Assign, int64(l))
-
- case parser.DeclFunc:
- fun := frameNode[len(frameNode)-1]
- if len(fun.Child) == 3 { // no return values
- if c.Code[len(c.Code)-1][1] != vm1.Return {
- c.Emit(n, vm1.Return, 0, int64(len(fun.Child[1].Child)))
- }
- }
- scope = popScope(scope)
- fnote = notes[fun]
-
- case parser.Ident:
- ident := n.Content()
- if len(n.Child) > 0 || a.Kind == parser.DeclFunc {
- break
- }
- if s, _, ok := c.getSym(ident, scope); ok {
- if s.local {
- c.Emit(n, vm1.Fdup, int64(s.index))
- } else if a != nil && a.Kind == parser.OpAssign {
- c.Emit(n, vm1.Push, int64(s.index))
- } else if _, ok := c.Data[s.index].(int); !ok {
- c.Emit(n, vm1.Dup, int64(s.index))
- }
- }
-
- case parser.StmtIf:
- ifBodyStart := notes[n.Child[1]].ipstart
- ifBodyEnd := notes[n.Child[1]].ipend
- c.Code[ifBodyStart][2] = int64(ifBodyEnd - ifBodyStart)
- // TODO: handle 'else'
-
- case parser.LiteralNumber:
- // A literal number can be a float or an integer, or a big number
- switch v := n.Value().(type) {
- case int64:
- c.Emit(n, vm1.Push, v)
- case error:
- err = v
- default:
- err = fmt.Errorf("type not supported: %T\n", v)
- }
-
- case parser.StmtReturn:
- fun := frameNode[len(frameNode)-1]
- nret := 0
- if len(fun.Child) > 3 {
- if ret := fun.Child[2]; ret.Kind == parser.BlockParen {
- nret = len(ret.Child)
- } else {
- nret = 1
- }
- }
- c.Emit(n, vm1.Return, int64(nret), int64(len(fun.Child[1].Child)))
-
- case parser.BlockStmt:
- nd.ipend = len(c.Code)
-
- case parser.LiteralString:
- p := len(c.Data)
- c.Data = append(c.Data, n.Block())
- c.Emit(n, vm1.Dup, int64(p))
-
- case parser.OpInferior:
- c.Emit(n, vm1.Lower)
-
- case parser.OpSubtract:
- c.Emit(n, vm1.Sub)
- }
-
- if err != nil {
- return false
- }
-
- // TODO: Fix this temporary hack to compute an entry point
- if c.Entry < 0 && len(scope) == 0 && n.Kind != parser.DeclFunc {
- c.Entry = len(c.Code) - 1
- if c.Entry >= 0 && len(c.Code) > c.Entry && c.Code[c.Entry][1] == vm1.Return {
- c.Entry++
- }
- }
- return true
- })
-
- if s, _, ok := c.getSym("main", ""); ok {
- if i, ok := c.codeIndex(s); ok {
- // Internal call is always relative to instruction pointer.
- c.Emit(nil, vm1.Call, int64(i-len(c.Code)))
- c.Entry = len(c.Code) - 1
- }
- log.Println(vm1.Disassemble(c.Code))
- }
-
- return
-}
-
-func (c *Compiler) AddSym(v any, name string) int { return c.addSym(v, name, false, nil) }
-
-func (c *Compiler) addSym(v any, name string, local bool, n *parser.Node) int {
- l := len(c.Data)
- if local {
- l = v.(int)
- } else {
- c.Data = append(c.Data, v)
- }
- c.symbols[name] = &symbol{index: l, local: local, node: n}
- return l
-}
-
-func (c *Compiler) Emit(n *parser.Node, op ...int64) int {
- var pos int64
- if n != nil {
- pos = int64(n.Pos())
- }
- op = append([]int64{pos}, op...)
- l := len(c.Code)
- c.Code = append(c.Code, op)
- return l
-}
-
-func (c *Compiler) codeIndex(s *symbol) (i int, ok bool) {
- i, ok = c.Data[s.index].(int)
- return
-}
-
-func pushScope(scope, name string) string {
- return strings.TrimPrefix(scope+"/"+name+"/", "/")
-}
-
-func popScope(scope string) string {
- scope = strings.TrimSuffix(scope, "/")
- j := strings.LastIndex(scope, "/")
- if j == -1 {
- return ""
- }
- return scope[:j]
-}
-
-// getSym searches for an existing symbol starting from the deepest scope.
-func (c *Compiler) getSym(name, scope string) (sym *symbol, sc string, ok bool) {
- for {
- if sym, ok = c.symbols[scope+name]; ok {
- return sym, scope, ok
- }
- scope = strings.TrimSuffix(scope, "/")
- i := strings.LastIndex(scope, "/")
- if i == -1 {
- scope = ""
- break
- }
- if scope = scope[:i]; scope == "" {
- break
- }
- }
- sym, ok = c.symbols[name]
- return sym, scope, ok
-}
diff --git a/codegen/compiler_test.go b/codegen/compiler_test.go
deleted file mode 100644
index 5c7e9d3..0000000
--- a/codegen/compiler_test.go
+++ /dev/null
@@ -1,58 +0,0 @@
-package codegen
-
-import (
- "fmt"
- "log"
- "testing"
-
- "github.com/gnolang/parscan/lang/golang"
- "github.com/gnolang/parscan/parser"
- "github.com/gnolang/parscan/vm1"
-)
-
-func TestCodeGen(t *testing.T) {
- log.SetFlags(log.Lshortfile)
- for _, test := range codeGenTests {
- test := test
- t.Run("", func(t *testing.T) {
- c := NewCompiler()
- c.AddSym(fmt.Println, "println")
- n := &parser.Node{}
- var err error
- if n.Child, err = golang.GoParser.Parse(test.src, n); err != nil {
- t.Error(err)
- }
- errStr := ""
- if err = c.CodeGen(n); err != nil {
- errStr = err.Error()
- }
- if errStr != test.err {
- t.Errorf("got error %#v, want error %#v", errStr, test.err)
- }
- t.Log("data:", c.Data)
- t.Log("code:", vm1.Disassemble(c.Code))
- if s := vm1.Disassemble(c.Code); s != test.asm {
- t.Errorf("got %#v, want %#v", s, test.asm)
- }
- })
- }
-}
-
-var codeGenTests = []struct {
- src, asm, sym, err string
-}{{ // #00
- src: "1+2",
- asm: "Push 1\nPush 2\nAdd\n",
-}, { // #01
- src: `println("Hello")`,
- asm: "Dup 0\nDup 1\nCallX 1\nPop 2\n",
-}, { // #02
- src: `a := 2; println(a)`,
- asm: "Push 2\nAssign 1\nDup 0\nDup 1\nCallX 1\nPop 2\n",
-}, { // #03
- src: `a := 2; if a < 3 {println(a)}; println("bye")`,
- asm: "Push 2\nAssign 1\nDup 1\nPush 3\nLower\nJumpFalse 5\nDup 0\nDup 1\nCallX 1\nPop 2\nDup 0\nDup 2\nCallX 1\nPop 2\n",
-}, { // #04
- src: "func add(a int, b int) int { return a + b }",
- asm: "Fdup -2\nFdup -3\nAdd\nReturn 1 2\n",
-}}
diff --git a/codegen/expression.go b/codegen/expression.go
deleted file mode 100644
index f3eb38a..0000000
--- a/codegen/expression.go
+++ /dev/null
@@ -1,41 +0,0 @@
-package codegen
-
-import (
- "fmt"
- "reflect"
-
- "github.com/gnolang/parscan/parser"
- "github.com/gnolang/parscan/vm1"
-)
-
-func postCallExpr(x extNode) error {
- switch x.Child[0].Kind {
- case parser.Ident:
- var numOut int
- s, _, ok := x.getSym(x.Child[0].Content(), "")
- if !ok {
- return fmt.Errorf("invalid symbol %s", x.Child[0].Content())
- }
- if i, ok := x.codeIndex(s); ok {
- // Internal call is always relative to instruction pointer.
- x.Emit(x.Node, vm1.Call, int64(i-len(x.Code)))
- } else {
- // External call, using absolute addr in symtable.
- x.Emit(x.Node, vm1.CallX, int64(len(x.Child[1].Child)))
- numOut = reflect.TypeOf(x.Data[s.index]).NumOut()
- }
- if !usedRet(x.anc) {
- x.Emit(x.Node, vm1.Pop, int64(numOut))
- }
- }
- return nil
-}
-
-func usedRet(n *parser.Node) bool {
- switch n.Kind {
- case parser.Undefined, parser.BlockStmt:
- return false
- default:
- return true
- }
-}
diff --git a/codegen/interpreter_test.go b/codegen/interpreter_test.go
deleted file mode 100644
index 2563677..0000000
--- a/codegen/interpreter_test.go
+++ /dev/null
@@ -1,48 +0,0 @@
-package codegen
-
-import (
- "fmt"
- "testing"
-
- "github.com/gnolang/parscan/lang/golang"
-)
-
-func TestEval(t *testing.T) {
- for _, test := range evalTests {
- test := test
- t.Run("", func(t *testing.T) {
- interp := NewInterpreter(golang.GoParser)
- errStr := ""
- r, e := interp.Eval(test.src)
- if e != nil {
- errStr = e.Error()
- }
- if errStr != test.err {
- t.Errorf("got error %#v, want error %#v", errStr, test.err)
- }
- res := fmt.Sprintf("%v", r)
- if res != test.res {
- t.Errorf("got %#v, want %#v", res, test.res)
- }
- })
- }
-}
-
-var evalTests = []struct {
- name, src, res, err string
-}{{ // #00
- src: "1 + 2",
- res: "3",
-}, { // #01
- src: "a := 2; a = a + 3",
- res: "5",
-}, { // #02
- src: "func f(a int) int { return a + 1 }; f(5)",
- res: "6",
-}, { // #03
- src: "func f(a int) (b int) { b = a + 1; return b }; f(5)",
- res: "6",
-}, { // #04
- src: "func f(a int) (b int) { b = a + 1; return }; f(5)",
- res: "6",
-}}
diff --git a/lang/golang/go.go b/lang/golang/go.go
index 47ca6db..7f66594 100644
--- a/lang/golang/go.go
+++ b/lang/golang/go.go
@@ -1,37 +1,35 @@
package golang
-import (
- "github.com/gnolang/parscan/parser"
- "github.com/gnolang/parscan/scanner"
-)
+import "github.com/gnolang/parscan/lang"
-var GoScanner = &scanner.Scanner{
- CharProp: [scanner.ASCIILen]uint{
- '\t': scanner.CharSep,
- '\n': scanner.CharLineSep,
- ' ': scanner.CharSep,
- '!': scanner.CharOp,
- '"': scanner.CharStr,
- '%': scanner.CharOp,
- '&': scanner.CharOp,
- '\'': scanner.CharStr,
- '(': scanner.CharBlock,
- '*': scanner.CharOp,
- '+': scanner.CharOp,
- ',': scanner.CharGroupSep,
- '-': scanner.CharOp,
- '.': scanner.CharOp,
- '/': scanner.CharOp,
- ':': scanner.CharOp,
- ';': scanner.CharGroupSep,
- '<': scanner.CharOp,
- '=': scanner.CharOp,
- '>': scanner.CharOp,
- '[': scanner.CharBlock,
- '^': scanner.CharOp,
- '{': scanner.CharBlock,
- '|': scanner.CharOp,
- '~': scanner.CharOp,
+var GoSpec = &lang.Spec{
+ CharProp: [lang.ASCIILen]uint{
+ '\t': lang.CharSep,
+ '\n': lang.CharLineSep,
+ ' ': lang.CharSep,
+ '!': lang.CharOp,
+ '"': lang.CharStr,
+ '%': lang.CharOp,
+ '&': lang.CharOp,
+ '\'': lang.CharStr,
+ '(': lang.CharBlock,
+ '*': lang.CharOp,
+ '+': lang.CharOp,
+ ',': lang.CharGroupSep,
+ '-': lang.CharOp,
+ '`': lang.CharStr,
+ '.': lang.CharOp,
+ '/': lang.CharOp,
+ ':': lang.CharOp,
+ ';': lang.CharGroupSep,
+ '<': lang.CharOp,
+ '=': lang.CharOp,
+ '>': lang.CharOp,
+ '[': lang.CharBlock,
+ '^': lang.CharOp,
+ '{': lang.CharBlock,
+ '|': lang.CharOp,
+ '~': lang.CharOp,
},
End: map[string]string{
"(": ")",
@@ -44,61 +42,76 @@ var GoScanner = &scanner.Scanner{
"//": "\n",
},
BlockProp: map[string]uint{
- "(": scanner.CharBlock,
- "{": scanner.CharBlock,
- "[": scanner.CharBlock,
- `"`: scanner.CharStr | scanner.StrEsc | scanner.StrNonl,
- "`": scanner.CharStr,
- "'": scanner.CharStr | scanner.StrEsc,
- "/*": scanner.CharStr,
- "//": scanner.CharStr | scanner.ExcludeEnd | scanner.EosValidEnd,
+ "(": lang.CharBlock,
+ "{": lang.CharBlock,
+ "[": lang.CharBlock,
+ `"`: lang.CharStr | lang.StrEsc | lang.StrNonl,
+ "`": lang.CharStr,
+ "'": lang.CharStr | lang.StrEsc,
+ "/*": lang.CharStr,
+ "//": lang.CharStr | lang.ExcludeEnd | lang.EosValidEnd,
},
- SkipSemi: map[string]bool{
- "++": true,
- "--": true,
- "case": true,
- "chan": true,
- "const": true,
- "default": true,
- "defer": true,
- "else": true,
- "for": true,
- "func": true,
- "go": true,
- "goto": true,
- "if": true,
- "import": true,
- "interface": true,
- "map": true,
- "package": true,
- "range": true,
- "select": true,
- "struct": true,
- "switch": true,
- "type": true,
- "var": true,
- },
-}
+ TokenProps: map[string]lang.TokenProp{
+ // Block tokens (can be nested)
+ "{..}": {TokenId: lang.BraceBlock},
+ "[..]": {TokenId: lang.BracketBlock},
+ "(..)": {TokenId: lang.ParenBlock},
+
+ // String tokens (not nested)
+ "//..": {TokenId: lang.Comment},
+ "/*..": {TokenId: lang.Comment},
+ `".."`: {TokenId: lang.String},
+ "`..`": {TokenId: lang.String},
-var GoParser = &parser.Parser{
- Scanner: GoScanner,
- Spec: map[string]parser.NodeSpec{
- ".": {Kind: parser.OpDot, Flags: parser.Call, Order: 3},
- "*": {Kind: parser.OpMultiply, Order: 4},
- "+": {Kind: parser.OpAdd, Order: 5},
- "-": {Kind: parser.OpSubtract, Order: 5},
- "<": {Kind: parser.OpInferior, Order: 6},
- ":=": {Kind: parser.OpDefine, Order: 7},
- "=": {Kind: parser.OpAssign, Order: 7},
- "if": {Kind: parser.StmtIf, Flags: parser.Stmt | parser.ExprSep},
- "func": {Kind: parser.DeclFunc, Flags: parser.Decl | parser.Call},
- "return": {Kind: parser.StmtReturn, Flags: parser.Stmt},
- "{..}": {Kind: parser.BlockStmt, Flags: parser.ExprSep},
- "(..)": {Kind: parser.BlockParen, Flags: parser.Call},
- `".."`: {Kind: parser.LiteralString},
- "//..": {Kind: parser.Comment},
- "/*..": {Kind: parser.Comment},
+ // Separators
+ ",": {TokenId: lang.Comma},
+ ";": {TokenId: lang.Semicolon},
+ ".": {TokenId: lang.Period},
+ ":": {TokenId: lang.Colon},
+
+ // Operators
+ "&": {TokenId: lang.And},
+ "*": {TokenId: lang.Mul},
+ "+": {TokenId: lang.Add},
+ "-": {TokenId: lang.Sub},
+ "=": {TokenId: lang.Assign},
+ "<": {TokenId: lang.Less},
+ ">": {TokenId: lang.Greater},
+ "^": {TokenId: lang.Xor},
+ "~": {TokenId: lang.Tilde},
+ ":=": {TokenId: lang.Define},
+ "==": {TokenId: lang.Equal},
+ "<=": {TokenId: lang.LessEqual},
+ ">=": {TokenId: lang.GreaterEqual},
+ "->": {TokenId: lang.Arrow},
+ "++": {TokenId: lang.Inc, SkipSemi: true},
+ "--": {TokenId: lang.Dec, SkipSemi: true},
+
+ // Reserved keywords
+ "break": {TokenId: lang.Break},
+ "case": {TokenId: lang.Case, SkipSemi: true},
+ "chan": {TokenId: lang.Chan, SkipSemi: true},
+ "const": {TokenId: lang.Const, SkipSemi: true},
+ "continue": {TokenId: lang.Continue},
+ "default": {TokenId: lang.Default, SkipSemi: true},
+ "defer": {TokenId: lang.Defer, SkipSemi: true},
+ "else": {TokenId: lang.Else, SkipSemi: true},
+ "fallthrough": {TokenId: lang.Fallthrough},
+ "for": {TokenId: lang.For, SkipSemi: true},
+ "func": {TokenId: lang.Func, SkipSemi: true},
+ "go": {TokenId: lang.Go, SkipSemi: true},
+ "goto": {TokenId: lang.Goto, SkipSemi: true},
+ "if": {TokenId: lang.If, SkipSemi: true},
+ "import": {TokenId: lang.Import, SkipSemi: true},
+ "interface": {TokenId: lang.Interface, SkipSemi: true},
+ "map": {TokenId: lang.Map, SkipSemi: true},
+ "package": {TokenId: lang.Package, SkipSemi: true},
+ "range": {TokenId: lang.Range, SkipSemi: true},
+ "return": {TokenId: lang.Return},
+ "select": {TokenId: lang.Select, SkipSemi: true},
+ "struct": {TokenId: lang.Struct, SkipSemi: true},
+ "switch": {TokenId: lang.Switch, SkipSemi: true},
+ "type": {TokenId: lang.Type, SkipSemi: true},
+ "var": {TokenId: lang.Var, SkipSemi: true},
},
}
-
-func init() { GoParser.Init() }
diff --git a/lang/spec.go b/lang/spec.go
new file mode 100644
index 0000000..c5b50a4
--- /dev/null
+++ b/lang/spec.go
@@ -0,0 +1,43 @@
+package lang
+
+const (
+ CharIllegal = 1 << iota
+ CharOp
+ CharNum
+ CharAlpha
+ CharSep
+ CharLineSep
+ CharGroupSep
+ CharStr
+ CharBlock
+ StrEsc
+ StrNonl
+ ExcludeEnd // exclude end delimiter from content
+ EosValidEnd // end of input string terminates block or string token
+)
+
+const ASCIILen = 1 << 7 // 128
+
+type TokenProp struct {
+ TokenId
+ SkipSemi bool // automatic semicolon insertion after newline
+}
+
+type Spec struct {
+ CharProp [ASCIILen]uint // special Character properties
+ End map[string]string // end delimiters, indexed by start
+ BlockProp map[string]uint // block properties
+ TokenProps map[string]TokenProp // token properties
+ DotNum bool // true if a number can start with '.'
+ IdAscii bool // true if an identifier can be in ASCII only
+ Num_ bool // true if a number can contain _ character
+}
+
+// HasInit stores if a statement may contain a simple init statement
+var HasInit = map[TokenId]bool{
+ Case: true,
+ For: true,
+ If: true,
+ Select: true,
+ Switch: true,
+}
diff --git a/lang/token.go b/lang/token.go
new file mode 100644
index 0000000..3a980a4
--- /dev/null
+++ b/lang/token.go
@@ -0,0 +1,110 @@
+package lang
+
+type TokenId int
+
+const (
+ Illegal = iota
+ Comment
+ Ident
+ Int
+ Float
+ Imag
+ Char
+ String
+
+ // Operators
+ Add
+ Sub
+ Mul
+ Quo
+ Rem
+ And
+ Or
+ Xor
+ Shl // <<
+ Shr // >>
+ AndNot //
+
+ AddAssign
+ SubAssign
+ MulAssign
+ QuoAssign
+ RemAssign
+ AndAssign
+ OrAssign
+ XorAssign
+ ShlAssign
+ ShrAssign
+ AndNotAssign
+
+ Land
+ Lor
+ Arrow
+ Inc
+ Dec
+ Equal
+ Less
+ Greater
+ Assign
+ Not
+ Plus // unitary +
+ Minus // unitary -
+ Address // unitary &
+ Deref // unitary *
+ NotEqual
+ LessEqual
+ GreaterEqual
+ Define
+ Ellipsis
+ Period
+ Tilde
+
+ // Separators
+ Comma
+ Semicolon
+ Colon
+
+ // Block tokens
+ ParenBlock // (..)
+ BracketBlock // [..]
+ BraceBlock // {..}
+
+ // Reserved keywords
+ Break
+ Case
+ Chan
+ Const
+ Continue
+ Default
+ Defer
+ Else
+ Fallthrough
+ For
+ Func
+ Go
+ Goto
+ If
+ Import
+ Interface
+ Map
+ Package
+ Range
+ Return
+ Select
+ Struct
+ Switch
+ Type
+ Var
+
+ // Internal tokens (no corresponding keyword)
+ Call
+ CallX
+ Label
+ JumpFalse
+ Enter // entering in function context
+ Exit // exiting from function context
+)
+
+func (t TokenId) IsKeyword() bool { return t >= Break && t <= Var }
+func (t TokenId) IsOperator() bool { return t >= Add && t <= Tilde }
+func (t TokenId) IsBlock() bool { return t >= ParenBlock && t <= BraceBlock }
diff --git a/main.go b/main.go
index 4ae57da..6994038 100644
--- a/main.go
+++ b/main.go
@@ -9,10 +9,9 @@ import (
"log"
"os"
- "github.com/gnolang/parscan/codegen"
"github.com/gnolang/parscan/lang/golang"
+ "github.com/gnolang/parscan/parser"
"github.com/gnolang/parscan/scanner"
- "github.com/gnolang/parscan/vm0"
)
type Interpreter interface {
@@ -61,32 +60,20 @@ func repl(interp Interpreter, in io.Reader) (err error) {
}
func run(arg []string) (err error) {
- var i int
-
rflag := flag.NewFlagSet("run", flag.ContinueOnError)
- rflag.IntVar(&i, "i", 1, "set interpreter version for execution, possible values: 0, 1")
rflag.Usage = func() {
fmt.Println("Usage: parscan run [options] [path] [args]")
- fmt.Println("Options:")
- rflag.PrintDefaults()
+ //fmt.Println("Options:")
+ //rflag.PrintDefaults()
}
if err = rflag.Parse(arg); err != nil {
return err
}
args := rflag.Args()
- var interp Interpreter
- switch i {
- case 0:
- interp = vm0.New(golang.GoParser)
- case 1:
- interp = codegen.NewInterpreter(golang.GoParser)
- interp.(*codegen.Interpreter).AddSym(fmt.Println, "println")
- default:
- return fmt.Errorf("invalid interpreter version: %v", i)
- }
+ interp := parser.NewInterpreter(scanner.NewScanner(golang.GoSpec))
+ interp.AddSym("println", fmt.Println)
- log.Println("args:", args)
in := os.Stdin
if len(args) > 0 {
if in, err = os.Open(arg[0]); err != nil {
diff --git a/parser/README.md b/parser/README.md
index b8bc0d4..d46608c 100644
--- a/parser/README.md
+++ b/parser/README.md
@@ -1,71 +1,112 @@
# Parser
-A parser takes an array of tokens (produced by the scanner) in input and
-returns a node representing a syntax tree. A node is an object
-containing a kind, the corresponding token and the ordered references to
-descendent nodes.
+See if we can build an almost single pass compiler for Go doing syntax
+directed translation, without any complex data structure (no syntax
+tree), only lists of tokens.
-```mermaid
-graph LR
-s[ ] --> |source| a(scanner)
---> |tokens| b(parser)
---> |AST| c[ ]
-subgraph parser
- b
-end
-style s height:0px;
-style c height:0px;
-```
+The goal is to have the shortest and simplest path from source to
+bytecode.
-A goal is to make the parser generic enough so it can generate syntax
-trees for most of existing programming languages (no claim of generality
-yet), provided a small set of generating rules per language, and a small
-set of validating rules (yet to be defined) to detect invalid
-constructs.
+## Design
-The input tokens are particular in the sense that they include classical
-lexical items such as words, separators, numbers, but also strings and
-nested blocks, which are resolved at scanning stage rather than parsing
-stage. See the scanner for more details.
+The input of parser is a list of tokens produced by the scanner.
+Multiple tokens are processed at once. The minimal set to get
+meaningful results (not an error or nil) is a complete statemement.
-The language specification includes the following:
+The output of parser is also a list of tokens, to be consummed by
+the compiler to produce bytecode. The output tokens set is identical
+to the bytecode instructions set except that:
-- a scanner specification, to produce the set of possible tokens.
-- a map of node specification per token name. The node specification
- defines some parameters influing how the tree is generated.
+- code locations may be provided as as labels instead of numerical
+ values,
+- memory locations for constants and variables may be provided as
+ symbol names instead of numerical values.
-## Development status
+## Status
-A successful test must be provided to check the status.
+Go language support:
-- [x] binary operator expressions
-- [x] unary operator (prefix) expressions
-- [ ] unary operator (suffix) expressions
-- [x] operator precedence rules
-- [x] parenthesis in expressions
-- [x] call expressions
-- [ ] nested calls
-- [x] index expressions
-- [x] single assignments
-- [ ] multi assignments
-- [x] simple `if` statement (no `else`)
-- [ ] full `if` statement (including `else`, `else if`)
-- [x] init expressions in `if` statements
-- [x] statement blocks
-- [x] comments
+- [x] named functions
+- [ ] anonymous functions (closures)
+- [ ] methods
+- [x] internal function calls
+- [x] external function calls (calling runtime symbols in interpreter)
+- [ ] export to runtime
+- [ ] builtin calls (new, make, copy, delete, len, cap, ...)
+- [ ] out of order declarations
+- [ ] arbirtrary precision constants
+- [x] basic types
+- [ ] complete numeric types
+- [x] function types
+- [ ] variadic functions
+- [ ] pointers
+- [ ] structures
+- [ ] embedded structures
+- [ ] recursive structures
+- [ ] interfaces
+- [ ] arrays, slices
+- [ ] maps
+- [ ] deterministic maps
+- [ ] channel types
+- [ ] channel operations
+- [x] var defined by assign :=
+- [x] var assign =
+- [ ] var declaration
+- [ ] type declaration
+- [x] func declaration
+- [ ] const declaration
+- [ ] iota expression
+- [ ] defer statement
+- [ ] recover statement
+- [ ] go statement
+- [x] if statement (including else and else if)
- [ ] for statement
- [ ] switch statement
+- [ ] break statement
+- [ ] continue statement
+- [ ] fallthrough statement
+- [ ] goto statement
- [ ] select statement
-- [x] return statement
-- [x] function declaration
-- [ ] method declaration
-- [ ] anonymous functions (closures)
-- [ ] type declaration
-- [ ] var, const, type single declaration
-- [ ] var, const, type multi declaration
-- [ ] type parametric expressions
-- [x] literal numbers (see scanner)
-- [x] literal strings
-- [ ] composite literal
-- [ ] import statements
-- [ ] go.mod syntax
+- [x] binary operators
+- [ ] unary operators
+- [ ] logical operators && and ||
+- [ ] assign operators
+- [ ] operator precedence
+- [x] parenthesis expressions
+- [x] call expressions
+- [ ] index expressions
+- [ ] selector expressions
+- [ ] type conversions
+- [ ] type assertions
+- [ ] parametric types (generic)
+- [ ] type parametric functions (generic)
+- [ ] type constraints (generic)
+- [ ] type checking
+- [ ] comment pragmas
+- [ ] packages import
+- [ ] modules
+
+Other items:
+
+- [x] REPL
+- [x] multiline statements in REPL
+- [ ] completion, history in REPL
+- [x] eval strings
+- [x] eval files (including stdin, ...)
+- [x] debug traces for scanner, parser, compiler, bytecode vm
+- [x] simple interpreter tests to exercise from source to execution
+- [ ] compile time error detection and diagnosis
+- [ ] stack dump
+- [ ] symbol tables, data tables, code binded to source lines
+- [ ] interactive debugger: breaks, continue, instrospection, ...
+- [ ] machine level debugger
+- [ ] source level debugger
+- [ ] replay debugger, backward instruction execution
+- [ ] vm monitor: live memory / code display during run
+- [ ] stdlib wrappers a la yaegi
+- [ ] system and environment sandboxing
+- [ ] build constraints (arch, sys, etc)
+- [ ] test command (running go test / benchmark / example files)
+- [ ] skipping / including test files
+- [ ] test coverage
+- [ ] fuzzy tests for scanner, vm, ...
diff --git a/parser/compiler.go b/parser/compiler.go
new file mode 100644
index 0000000..5b0ed81
--- /dev/null
+++ b/parser/compiler.go
@@ -0,0 +1,250 @@
+package parser
+
+import (
+ "fmt"
+ "log"
+ "reflect"
+ "strconv"
+
+ "github.com/gnolang/parscan/lang"
+ "github.com/gnolang/parscan/scanner"
+ "github.com/gnolang/parscan/vm"
+)
+
+type Compiler struct {
+ *Parser
+ Code [][]int64 // produced code, to fill VM with
+ Data []any // produced data, will be at the bottom of VM stack
+ Entry int // offset in Code to start execution from (skip function defintions)
+
+ strings map[string]int // locations of strings in Data
+}
+
+func NewCompiler(scanner *scanner.Scanner) *Compiler {
+ return &Compiler{
+ Parser: &Parser{Scanner: scanner, symbols: initUniverse(), labelCount: map[string]int{}},
+ Entry: -1,
+ strings: map[string]int{},
+ }
+}
+
+func (c *Compiler) Emit(op ...int64) int {
+ op = append([]int64{}, op...)
+ l := len(c.Code)
+ c.Code = append(c.Code, op)
+ return l
+}
+
+func (c *Compiler) AddSym(name string, value any) int {
+ p := len(c.Data)
+ c.Data = append(c.Data, value)
+ c.Parser.AddSym(p, name, value)
+ return p
+}
+
+func (c *Compiler) Codegen(tokens Tokens) (err error) {
+ fixList := Tokens{}
+ function := []string{""}
+
+ log.Println("Codegen tokens:", tokens)
+
+ for i, t := range tokens {
+ scope := function[len(function)-1]
+ switch t.Id {
+ case lang.Int:
+ n, err := strconv.Atoi(t.Str)
+ if err != nil {
+ return err
+ }
+ c.Emit(int64(t.Pos), vm.Push, int64(n))
+
+ case lang.String:
+ s := t.Block()
+ i, ok := c.strings[s]
+ if !ok {
+ i = len(c.Data)
+ c.Data = append(c.Data, s)
+ c.strings[s] = i
+ }
+ c.Emit(int64(t.Pos), vm.Dup, int64(i))
+
+ case lang.Add:
+ c.Emit(int64(t.Pos), vm.Add)
+
+ case lang.Sub:
+ c.Emit(int64(t.Pos), vm.Sub)
+
+ case lang.Less:
+ c.Emit(int64(t.Pos), vm.Lower)
+
+ case lang.Call:
+ c.Emit(int64(t.Pos), vm.Call)
+
+ case lang.CallX:
+ c.Emit(int64(t.Pos), vm.CallX, int64(t.Beg))
+
+ case lang.Define:
+ // TODO: support assignment to local, composite objects
+ st := tokens[i-1]
+ l := len(c.Data)
+ c.Data = append(c.Data, nil)
+ c.addSym(l, st.Str, nil, symVar, nil, false)
+ c.Emit(int64(st.Pos), vm.Assign, int64(l))
+
+ case lang.Enter:
+ // TODO: merge with label ?
+ function = append(function, t.Str[6:])
+
+ case lang.Exit:
+ function = function[:len(function)-1]
+
+ case lang.Assign:
+ st := tokens[i-1]
+ s, _, ok := c.getSym(st.Str, scope)
+ if !ok {
+ return fmt.Errorf("symbol not found: %s", st.Str)
+ }
+ if s.local {
+ c.Emit(int64(st.Pos), vm.Fassign, int64(s.index))
+ } else {
+ c.Emit(int64(st.Pos), vm.Assign, int64(s.index))
+ }
+
+ case lang.Equal:
+ c.Emit(int64(t.Pos), vm.Equal)
+
+ case lang.Ident:
+ if i < len(tokens)-1 {
+ switch t1 := tokens[i+1]; t1.Id {
+ case lang.Define, lang.Assign, lang.Colon:
+ continue
+ }
+ }
+ s, _, ok := c.getSym(t.Str, scope)
+ if !ok {
+ return fmt.Errorf("symbol not found: %s", t.Str)
+ }
+ if s.local {
+ c.Emit(int64(t.Pos), vm.Fdup, int64(s.index))
+ } else {
+ c.Emit(int64(t.Pos), vm.Dup, int64(s.index))
+ }
+
+ case lang.Label:
+ // If the label is a function, the symbol already exists
+ s, _, ok := c.getSym(t.Str, scope)
+ lc := len(c.Code)
+ if ok {
+ s.value = lc
+ if s.kind == symFunc {
+ // label is a function entry point, register its code address in data.
+ s.index = len(c.Data)
+ c.Data = append(c.Data, lc)
+ } else {
+ c.Data[s.index] = lc
+ }
+ } else {
+ ld := len(c.Data)
+ c.Data = append(c.Data, lc)
+ c.addSym(ld, t.Str, lc, symLabel, nil, false)
+ }
+
+ case lang.JumpFalse:
+ label := t.Str[10:]
+ i := 0
+ if s, _, ok := c.getSym(label, scope); !ok {
+ // t.Beg contains the position in code which needs to be fixed.
+ t.Beg = len(c.Code)
+ fixList = append(fixList, t)
+ } else {
+ i = s.value.(int)
+ }
+ c.Emit(int64(t.Pos), vm.JumpFalse, int64(i))
+
+ case lang.Goto:
+ label := t.Str[5:]
+ i := 0
+ if s, _, ok := c.getSym(label, scope); !ok {
+ t.Beg = len(c.Code)
+ fixList = append(fixList, t)
+ } else {
+ i = s.value.(int)
+ }
+ c.Emit(int64(t.Pos), vm.Jump, int64(i))
+
+ case lang.Return:
+ c.Emit(int64(t.Pos), vm.Return, int64(t.Beg), int64(t.End))
+
+ default:
+ return fmt.Errorf("Codegen: unsupported token %v", t)
+ }
+ }
+
+ // Finally we fix unresolved labels for jump destinations.
+ for _, t := range fixList {
+ var label string
+ switch t.Id {
+ case lang.Goto:
+ label = t.Str[5:]
+ case lang.JumpFalse:
+ label = t.Str[10:]
+ }
+ s, _, ok := c.getSym(label, "")
+ if !ok {
+ return fmt.Errorf("label not found: %v", label)
+ }
+ c.Code[t.Beg][2] = int64(s.value.(int) - t.Beg)
+
+ }
+ return err
+}
+
+func (c *Compiler) PrintCode() {
+ labels := map[int]string{}
+ for name, sym := range c.symbols {
+ if sym.kind == symLabel || sym.kind == symFunc {
+ labels[sym.value.(int)] = name
+ }
+ }
+ fmt.Println("# Code:")
+ for i, l := range c.Code {
+ if label, ok := labels[i]; ok {
+ fmt.Println(label + ":")
+ }
+ extra := ""
+ switch l[1] {
+ case vm.Jump, vm.JumpFalse, vm.JumpTrue, vm.Calli:
+ if d, ok := labels[i+(int)(l[2])]; ok {
+ extra = "// " + d
+ }
+ }
+ fmt.Printf("%4d %-14v %v\n", i, vm.CodeString(l), extra)
+ }
+}
+
+type entry struct {
+ name string
+ *symbol
+}
+
+func (c *Compiler) PrintData() {
+ dict := map[int]entry{}
+ for name, sym := range c.symbols {
+ if !sym.used || sym.local || sym.kind == symLabel {
+ continue
+ }
+ dict[sym.index] = entry{name, sym}
+ }
+ fmt.Println("# Data:")
+ for i, d := range c.Data {
+ fmt.Printf("%4d %T %v %v\n", i, d, d, dict[i])
+ }
+}
+
+func (c *Compiler) NumIn(i int) (int, bool) {
+ t := reflect.TypeOf(c.Data[i])
+ if t.Kind() == reflect.Func {
+ return t.NumIn(), t.IsVariadic()
+ }
+ return -1, false
+}
diff --git a/parser/dot.go b/parser/dot.go
deleted file mode 100644
index 11d5014..0000000
--- a/parser/dot.go
+++ /dev/null
@@ -1,89 +0,0 @@
-package parser
-
-import (
- "bytes"
- "fmt"
- "io"
- "log"
- "os/exec"
- "strings"
-)
-
-func (*Parser) Adot(nodes []*Node, c string) {
- if c == "" {
- return
- }
- n := &Node{Child: nodes}
- n.Dot(c, "")
-}
-
-func (n *Node) Dot(c, s string) {
- dw, cmd := dotWriter(c)
- n.astDot(dw, s)
- if cmd == nil {
- return
- }
- if err := cmd.Wait(); err != nil {
- log.Fatal(err)
- }
-}
-
-func (n *Node) Sdot(s string) string {
- var buf bytes.Buffer
- n.astDot(&buf, s)
- return buf.String()
-}
-
-// TODO: rewrite it using Walk2
-func (n *Node) astDot(out io.Writer, label string) {
- fmt.Fprintf(out, "digraph ast { ")
- if label != "" {
- fmt.Fprintf(out, "labelloc=\"t\"; label=\"%s\";", label)
- }
- anc := map[*Node]*Node{}
- index := map[*Node]int{}
- count := 0
- n.Walk(func(nod *Node) bool {
- index[nod] = count
- count++
-
- for _, c := range nod.Child {
- anc[c] = nod
- }
- name := ""
- if nod.Token != nil {
- name = strings.ReplaceAll(nod.Name(), `"`, `\"`)
- }
- fmt.Fprintf(out, "%d [label=\"%s\"]; ", index[nod], name)
- if anc[nod] != nil {
- fmt.Fprintf(out, "%d -> %d; ", index[anc[nod]], index[nod])
- }
- return true
- }, nil)
- fmt.Fprintf(out, "}")
- if c, ok := out.(io.Closer); ok {
- c.Close()
- }
-}
-
-type nopCloser struct {
- io.Writer
-}
-
-func (nopCloser) Close() error { return nil }
-
-func dotWriter(dotCmd string) (io.WriteCloser, *exec.Cmd) {
- if dotCmd == "" {
- return nopCloser{io.Discard}, nil
- }
- fields := strings.Fields(dotCmd)
- cmd := exec.Command(fields[0], fields[1:]...)
- dotin, err := cmd.StdinPipe()
- if err != nil {
- log.Fatal(err)
- }
- if err = cmd.Start(); err != nil {
- log.Fatal(err)
- }
- return dotin, cmd
-}
diff --git a/codegen/interpreter.go b/parser/interpreter.go
index 18cc5f8..72fc667 100644
--- a/codegen/interpreter.go
+++ b/parser/interpreter.go
@@ -1,32 +1,22 @@
-package codegen
+package parser
import (
- "os"
-
- "github.com/gnolang/parscan/parser"
- "github.com/gnolang/parscan/vm1"
+ "github.com/gnolang/parscan/scanner"
+ "github.com/gnolang/parscan/vm"
)
const debug = true
type Interpreter struct {
- *parser.Parser
*Compiler
- *vm1.Machine
+ *vm.Machine
}
-func NewInterpreter(p *parser.Parser) *Interpreter {
- return &Interpreter{p, NewCompiler(), &vm1.Machine{}}
+func NewInterpreter(s *scanner.Scanner) *Interpreter {
+ return &Interpreter{NewCompiler(s), &vm.Machine{}}
}
func (i *Interpreter) Eval(src string) (res any, err error) {
- n := &parser.Node{}
- if n.Child, err = i.Parse(src, n); err != nil {
- return res, err
- }
- if debug {
- n.Dot(os.Getenv("DOT"), "")
- }
codeOffset := len(i.Code)
dataOffset := 0
if codeOffset > 0 {
@@ -34,13 +24,22 @@ func (i *Interpreter) Eval(src string) (res any, err error) {
dataOffset = len(i.Data)
}
i.PopExit() // Remove last exit from previous run (re-entrance).
- if err = i.CodeGen(n); err != nil {
+
+ t, err := i.Parse(src)
+ if err != nil {
+ return res, err
+ }
+ if err = i.Codegen(t); err != nil {
return res, err
}
i.Push(i.Data[dataOffset:]...)
i.PushCode(i.Code[codeOffset:]...)
- i.PushCode([]int64{0, vm1.Exit})
+ i.PushCode([]int64{0, vm.Exit})
i.SetIP(max(codeOffset, i.Entry))
+ if debug {
+ i.PrintData()
+ i.PrintCode()
+ }
err = i.Run()
return i.Top(), err
}
diff --git a/parser/interpreter_test.go b/parser/interpreter_test.go
new file mode 100644
index 0000000..ee927a5
--- /dev/null
+++ b/parser/interpreter_test.go
@@ -0,0 +1,66 @@
+package parser_test
+
+import (
+ "fmt"
+ "log"
+ "testing"
+
+ "github.com/gnolang/parscan/lang/golang"
+ "github.com/gnolang/parscan/parser"
+ "github.com/gnolang/parscan/scanner"
+)
+
+var GoScanner *scanner.Scanner
+
+func init() {
+ log.SetFlags(log.Lshortfile)
+ GoScanner = scanner.NewScanner(golang.GoSpec)
+}
+
+func TestEval(t *testing.T) {
+ for _, test := range goTests {
+ test := test
+ t.Run("", func(t *testing.T) {
+ interp := parser.NewInterpreter(GoScanner)
+ errStr := ""
+ r, e := interp.Eval(test.src)
+ t.Log(r, e)
+ if e != nil {
+ errStr = e.Error()
+ }
+ if errStr != test.err {
+ t.Errorf("got error %#v, want error %#v", errStr, test.err)
+ }
+ if res := fmt.Sprintf("%v", r); test.err == "" && res != test.res {
+ t.Errorf("got %#v, want %#v", res, test.res)
+ }
+ })
+ }
+}
+
+var goTests = []struct {
+ src, res, err string
+}{
+ 0: {src: "", res: "<nil>"},
+ 1: {src: "1+2", res: "3"},
+ 2: {src: "1+", err: "block not terminated"},
+ 3: {src: "a := 1 + 2; b := 0; a + 1", res: "4"},
+ 4: {src: "1+(2+3)", res: "6"},
+ 5: {src: "(1+2)+3", res: "6"},
+ 6: {src: "(6+(1+2)+3)+5", res: "17"},
+ 7: {src: "(6+(1+2+3)+5", err: "1:1: block not terminated"},
+ 8: {src: "a := 2; a = 3; a", res: "3"},
+ 9: {src: "a := 0; if a == 0 { a = 2 } else { a = 1 }; a", res: "2"},
+ 10: {src: "a := 0; if a == 1 { a = 2 } else { a = 1 }; a", res: "1"},
+ 11: {src: "a := 0; if a == 1 { a = 2 } else if a == 0 { a = 3 } else { a = 1 }; a", res: "3"},
+ 12: {src: "a := 0; if a == 1 { a = 2 } else if a == 2 { a = 3 } else { a = 1 }; a", res: "1"},
+ 13: {src: "func f() int {return 2}; a := f(); a", res: "2"},
+ 14: {src: "func f() int {return 2}; f()", res: "2"},
+ 15: {src: "func f(a int) int {return a+2}; f(3)", res: "5"},
+ 16: {src: "func f(a int) int {if a < 4 {a = 5}; return a }; f(3)", res: "5"},
+ 17: {src: "func f(a int) int {return a+2}; 7 - f(3)", res: "2"},
+ 18: {src: "func f(a int) int {return a+2}; f(5) - f(3)", res: "2"},
+ 19: {src: "func f(a int) int {return a+2}; f(3) - 2", res: "3"},
+ 20: {src: "func f(a int, b int, c int) int {return a+b-c} ; f(7, 1, 3)", res: "5"},
+ 21: {src: "func f(a, b, c int) int {return a+b-c} ; f(7, 1, 3)", res: "5"},
+}
diff --git a/parser/kind.go b/parser/kind.go
deleted file mode 100644
index d004471..0000000
--- a/parser/kind.go
+++ /dev/null
@@ -1,56 +0,0 @@
-package parser
-
-import "fmt"
-
-// kind defines the AST node kind. Its name is the concatenation
-// of a category (Block, Decl, Expr, Op, Stmt) and an instance name.
-type Kind int
-
-const (
- Undefined = Kind(iota)
- BlockParen
- BlockStmt
- Comment
- DeclFunc
- ExprCall
- Ident
- LiteralNumber
- LiteralString
- OpAdd
- OpAssign
- OpDefine
- OpDot
- OpInferior
- OpMultiply
- OpSubtract
- StmtIf
- StmtReturn
-)
-
-var kindString = [...]string{
- Undefined: "Undefined",
- BlockParen: "BlockParen",
- BlockStmt: "BlockStmt",
- Comment: "Comment",
- DeclFunc: "DeclFunc",
- ExprCall: "ExprCall",
- Ident: "Ident",
- LiteralString: "LiteralString",
- LiteralNumber: "LiteralNumber",
- OpAdd: "OpAdd",
- OpAssign: "OpAssign",
- OpDefine: "OpDefine",
- OpDot: "OpDot",
- OpInferior: "OpInferior",
- OpMultiply: "OpMultiply",
- OpSubtract: "OpSubtract",
- StmtIf: "StmtIf",
- StmtReturn: "StmtReturn",
-}
-
-func (k Kind) String() string {
- if int(k) < 0 || int(k) > len(kindString) {
- return fmt.Sprintf("unknown kind %d", k)
- }
- return kindString[k]
-}
diff --git a/parser/node.go b/parser/node.go
deleted file mode 100644
index b6e34cd..0000000
--- a/parser/node.go
+++ /dev/null
@@ -1,50 +0,0 @@
-package parser
-
-import "github.com/gnolang/parscan/scanner"
-
-type Node struct {
- Child []*Node // sub-tree nodes
- *scanner.Token // token at origin of the node
- Kind // Node kind, depends on the language spec
-}
-
-// TODO: remove it in favor of Walk2
-func (n *Node) Walk(in, out func(*Node) bool) (stop bool) {
- if in != nil && !in(n) {
- return true
- }
- for _, child := range n.Child {
- if child.Walk(in, out) {
- return
- }
- }
- if out != nil {
- stop = !out(n)
- }
- return
-}
-
-// Idem to walk, but also propagates the ancestor of visited node and child index.
-func (n *Node) Walk2(a *Node, i int, in, out func(*Node, *Node, int) bool) (stop bool) {
- if in != nil && !in(n, a, i) {
- return true
- }
- for j, child := range n.Child {
- if child.Walk2(n, j, in, out) {
- return
- }
- }
- if out != nil {
- stop = !out(n, a, i)
- }
- return
-}
-
-func (n *Node) RemoveChild(i int) {
- n.Child = append(n.Child[:i], n.Child[i+1:]...)
-}
-
-func (n *Node) InsertChild(node *Node, i int) {
- n.Child = append(n.Child[:i+1], n.Child[i:]...)
- n.Child[i] = node
-}
diff --git a/parser/parse.go b/parser/parse.go
index ca89467..5f0b643 100644
--- a/parser/parse.go
+++ b/parser/parse.go
@@ -1,244 +1,373 @@
package parser
import (
+ "fmt"
+ "log"
+ "strconv"
+ "strings"
+
+ "github.com/gnolang/parscan/lang"
"github.com/gnolang/parscan/scanner"
)
-const (
- Stmt = 1 << iota
- ExprSep
- Call
- Index
- Decl
- MultiOp
-)
+type Parser struct {
+ *scanner.Scanner
-type NodeSpec struct {
- Kind // AST node kind
- Flags uint // composable properties used for AST generation
- Order int // operator precedence order
+ symbols map[string]*symbol
+ function *symbol
+ scope string
+ fname string
+ labelCount map[string]int
}
-type Parser struct {
- *scanner.Scanner
- Spec map[string]NodeSpec
+func (p *Parser) Scan(s string, endSemi bool) (Tokens, error) {
+ return p.Scanner.Scan(s, endSemi)
}
-func (p *Parser) Parse(src string, ctx *Node) (nodes []*Node, err error) {
- tokens, err := p.Scan(src)
- if err != nil {
- return
+type Tokens []scanner.Token
+
+func (toks Tokens) String() (s string) {
+ for _, t := range toks {
+ s += fmt.Sprintf("%#v ", t.Str)
}
- return p.ParseTokens(tokens, ctx)
+ return s
}
-func (p *Parser) ParseTokens(tokens []*scanner.Token, ctx *Node) (nodes []*Node, err error) {
- // TODO: error handling.
- var root *Node // current root node
- var expr *Node // current expression root node
- var prev, c *Node // previous and current nodes
- var lce *Node // last complete expression node
- unaryOp := map[*Node]bool{} // unaryOp indicates if a node is an unary operator.
- prevToken := map[*Node]*scanner.Token{}
-
- for i, t := range tokens {
- prev = c
- c = &Node{
- Token: t,
- Kind: p.Spec[t.Name()].Kind,
+func (toks Tokens) Index(id lang.TokenId) int {
+ for i, t := range toks {
+ if t.Id == id {
+ return i
}
- if i > 0 {
- prevToken[c] = tokens[i-1]
+ }
+ return -1
+}
+
+func (toks Tokens) LastIndex(id lang.TokenId) int {
+ for i := len(toks) - 1; i >= 0; i-- {
+ if toks[i].Id == id {
+ return i
}
- if c.Kind == Comment {
- continue
+ }
+ return -1
+}
+
+func (toks Tokens) Split(id lang.TokenId) (result []Tokens) {
+ for {
+ i := toks.Index(id)
+ if i < 0 {
+ return append(result, toks)
}
- if t.IsOperator() && (i == 0 || tokens[i-1].IsOperator()) {
- unaryOp[c] = true
+ result = append(result, toks[:i])
+ toks = toks[i+1:]
+ }
+}
+
+func (p *Parser) Parse(src string) (out Tokens, err error) {
+ log.Printf("Parse src: %#v\n", src)
+ in, err := p.Scan(src, true)
+ if err != nil {
+ return out, err
+ }
+ log.Println("Parse in:", in)
+ for len(in) > 0 {
+ endstmt := in.Index(lang.Semicolon)
+ if endstmt == -1 {
+ return out, scanner.ErrBlock
}
- if c.Kind == Undefined {
- switch t.Kind() {
- case scanner.Number:
- c.Kind = LiteralNumber
- case scanner.Identifier:
- c.Kind = Ident
+
+ // Skip over simple init statements for some tokens (if, for, ...)
+ if lang.HasInit[in[0].Id] {
+ log.Println("may have init")
+ for in[endstmt-1].Id != lang.BraceBlock {
+ e2 := in[endstmt+1:].Index(lang.Semicolon)
+ if e2 == -1 {
+ return out, scanner.ErrBlock
+ }
+ endstmt += 1 + e2
}
}
+ o, err := p.ParseStmt(in[:endstmt])
+ if err != nil {
+ return out, err
+ }
+ out = append(out, o...)
+ in = in[endstmt+1:]
+ }
+ return out, err
+}
- if root == nil {
- if p.isSep(c) {
- continue
- }
- lce = nil
- root = c
- if p.isExpr(c) {
- expr = c
- }
+func (p *Parser) ParseStmt(in Tokens) (out Tokens, err error) {
+ log.Println("ParseStmt in:", in)
+ if len(in) == 0 {
+ return nil, nil
+ }
+ switch t := in[0]; t.Id {
+ case lang.Func:
+ return p.ParseFunc(in)
+ case lang.If:
+ return p.ParseIf(in)
+ case lang.Return:
+ return p.ParseReturn(in)
+ default:
+ return p.ParseExpr(in)
+ }
+}
+
+func (p *Parser) ParseFunc(in Tokens) (out Tokens, err error) {
+ // TODO: handle anonymous functions (no function name)
+ // TODO: handle receiver (methods)
+ // TODO: handle parametric types (generics)
+ // TODO: handle variadic parameters
+ fname := in[1].Str
+ ofname := p.fname
+ p.fname = fname
+ ofunc := p.function
+ s, _, ok := p.getSym(fname, p.scope)
+ if !ok {
+ s = &symbol{}
+ p.symbols[p.scope+fname] = s
+ }
+ p.pushScope(fname)
+ defer func() {
+ p.fname = ofname // TODO remove if favor of function.
+ p.function = ofunc
+ p.popScope()
+ }()
+
+ out = Tokens{
+ {Id: lang.Enter, Str: "enter " + p.scope},
+ {Id: lang.Goto, Str: "goto " + fname + "_end"}, // Skip function definition.
+ {Id: lang.Label, Pos: in[0].Pos, Str: fname},
+ }
+
+ bi := in.Index(lang.BraceBlock)
+ if bi < 0 {
+ return out, fmt.Errorf("no function body")
+ }
+ typ, err := p.ParseType(in[:bi])
+ if err != nil {
+ return out, err
+ }
+ s.kind = symFunc
+ s.Type = typ
+ p.function = s
+
+ log.Println("body:", in[len(in)-1].Block())
+ toks, err := p.Parse(in[len(in)-1].Block())
+ if err != nil {
+ return out, err
+ }
+ out = append(out, toks...)
+ out = append(out,
+ scanner.Token{Id: lang.Label, Str: fname + "_end"},
+ scanner.Token{Id: lang.Exit},
+ )
+ log.Println("symbols", p.symbols)
+ return out, err
+}
+
+func (p *Parser) ParseIf(in Tokens) (out Tokens, err error) {
+ prefix := p.fname + "_if" + strconv.Itoa(p.labelCount[p.scope+p.fname])
+ p.labelCount[p.scope+p.fname]++
+ sc := 0
+ ssc := strconv.Itoa(sc)
+ // We start from the end of the statement and examine tokens backward to
+ // get the destination labels already computed when jumps are set.
+ i := len(in) - 1
+ for i > 0 {
+ if in[i].Id != lang.BraceBlock {
+ return nil, fmt.Errorf("expected '{', got %v", in[i])
+ }
+ pre, err := p.Parse(in[i].Block())
+ if err != nil {
+ return nil, err
+ }
+ //pre := append(Tokens{{Id: lang.Label, Str: prefix + "_b" + ssc}}, blockout...)
+ if sc > 0 {
+ pre = append(pre, scanner.Token{Id: lang.Goto, Str: "goto " + prefix + "_e0"})
+ }
+ pre = append(pre, scanner.Token{Id: lang.Label, Str: prefix + "_e" + ssc})
+ out = append(pre, out...)
+
+ i--
+ ifp := in[:i].LastIndex(lang.If)
+
+ if in[i].Id == lang.Else { // Step over final 'else'.
+ i--
+ sc++
+ ssc = strconv.Itoa(sc)
continue
}
- if t.IsBlock() {
- if expr != nil {
- if p.hasProp(c, ExprSep) && p.isExprSep(root) {
- // A bracket block may end a previous expression.
- root.Child = append(root.Child, expr)
- expr = nil
- } else if p.hasProp(c, Call) && !p.hasProp(root, Decl) && p.canCallToken(tokens[i-1]) {
- // Handle (possibly nested) call expressions.
- if lce == nil || lce != expr { // TODO(marc): not general, fix it.
- lce = prev
- }
- lce.Child = []*Node{{Token: lce.Token, Child: lce.Child, Kind: lce.Kind}}
- lce.Token = scanner.NewToken("Call", c.Pos())
- lce.Kind = ExprCall
- }
- }
- tcont := t.Content()
- s := tcont[t.Start() : len(tcont)-t.End()]
- n2, err := p.Parse(s, c)
- if err != nil {
+ pre = Tokens{}
+ var init, cond Tokens
+ initcond := in[ifp+1 : i+1]
+ if ii := initcond.Index(lang.Semicolon); ii < 0 {
+ cond = initcond
+ } else {
+ init = initcond[:ii]
+ cond = initcond[ii+1:]
+ }
+ if len(init) > 0 {
+ if init, err = p.ParseStmt(init); err != nil {
return nil, err
}
- c.Child = append(c.Child, n2...)
- }
-
- // Process the end of an expression or a statement.
- if t.IsSeparator() {
- if t.Content() == "," && ctx.Kind != BlockParen {
- // ignore comma separator in field lists
- } else if expr != nil && p.hasProp(root, Stmt) {
- root.Child = append(root.Child, expr)
- if p.hasProp(expr, ExprSep) {
- nodes = append(nodes, root)
- root = nil
- }
- expr = nil
- } else {
- if expr != nil {
- root = expr
- }
- nodes = append(nodes, root)
- expr = nil
- root = nil
- }
- continue
+ pre = append(pre, init...)
}
+ if cond, err = p.ParseExpr(cond); err != nil {
+ return nil, err
+ }
+ pre = append(pre, cond...)
+ pre = append(pre, scanner.Token{Id: lang.JumpFalse, Str: "JumpFalse " + prefix + "_e" + ssc})
+ out = append(pre, out...)
+ i = ifp
+ if i > 1 && in[i].Id == lang.If && in[i-1].Id == lang.Else { // Step over 'else if'.
+ i -= 2
+ }
+ sc++
+ ssc = strconv.Itoa(sc)
+ }
+ log.Println("prefix:", prefix)
+ log.Println("if tokens:", out)
+ return out, err
+}
- // We assume from now that current node is part of an expression subtree.
- if expr == nil {
- if p.isStatement(root) {
- expr = c
- continue
+func (p *Parser) ParseReturn(in Tokens) (out Tokens, err error) {
+ if len(in) > 1 {
+ if out, err = p.ParseExpr(in[1:]); err != nil {
+ return out, err
+ }
+ }
+
+ // TODO: the function symbol should be already present in the parser context.
+ // otherwise no way to handle anonymous func.
+ s := p.function
+ in[0].Beg = s.Type.NumOut()
+ in[0].End = s.Type.NumIn()
+ log.Println("ParseReturn:", p.fname, in[0])
+ out = append(out, in[0])
+ return out, err
+}
+
+func (p *Parser) ParseExpr(in Tokens) (out Tokens, err error) {
+ log.Println("ParseExpr in:", in)
+ var ops Tokens
+ var vl int
+ //
+ // Process tokens from last to first, the goal is to reorder the tokens in
+ // a stack machine processing order, so it can be directly interpreted.
+ //
+ for i := len(in) - 1; i >= 0; i-- {
+ t := in[i]
+ // temporary assumptions: binary operators, returning 1 value
+ switch t.Id {
+ case lang.Ident, lang.Int, lang.String:
+ out = append(out, t)
+ vl++
+ case lang.Define, lang.Add, lang.Sub, lang.Assign, lang.Equal, lang.Less:
+ // TODO: handle operator precedence to swap operators / operands if necessary
+ if vl < 2 {
+ ops = append(ops, t)
+ break
}
- expr = root
- }
-
- // Update the expression subtree according to binary operator precedence rules.
- // - operators are binary infix by default.
- // - if an operator follows another, then it's unary prefix.
- // - if an expression starts by an operator, then it's unary prefix.
- // - non operator nodes have a default precedence of 0.
- // TODO: handle postfix unary (i.e. ++) and ternary (i.e. ?:)
- //
- ep := p.Spec[expr.Content()].Order
- cp := p.Spec[c.Content()].Order
- a := expr
- if unaryOp[c] {
- cp = 0
- }
- if cp != 0 {
- if cp > ep {
- // Perform an operator permutation at expr root as required by precedence.
- // TODO(marc): maybe it can be generalized in below else branch.
- expr, c = c, expr
- a = expr // Temporary ancestor: its children may have to be permuted.
- } else {
- // Findout if an operator permutation is necessary in subtree.
- c1 := expr
- for {
- a = c1
- if unaryOp[c1] {
- c1, c = c, c1
- a = c1
- if c == expr {
- expr = a
- }
- break
- }
- if len(c1.Child) < 2 {
- break
- }
- c1 = c1.Child[1]
- if !c1.IsOperator() || unaryOp[c1] || cp > p.Spec[c1.Content()].Order {
+ case lang.ParenBlock:
+ // If the previous token is an arithmetic, logic or assign operator then
+ // this parenthesis block is an enclosed expr, otherwise a call expr.
+ if i == 0 || in[i-1].Id.IsOperator() {
+ out = append(out, t)
+ vl++
+ break
+ }
+ // The call expression can be a function call, a conversion,
+ // a type assersion (including for type switch)
+ // func call: push args and func address then call
+ out = append(out, t)
+ vl++
+ if t2 := in[i-1]; t2.Id == lang.Ident {
+ if s, sc, ok := p.getSym(t2.Str, p.scope); ok {
+ log.Println("callExpr:", t2.Str, p.scope, s, ok, sc)
+ if s.kind == symValue {
+ // Store the number of input parameters in the token Beg field.
+ ops = append(ops, scanner.Token{Str: "callX", Id: lang.CallX, Pos: t.Pos, Beg: p.numItems(t.Block(), lang.Comma)})
break
}
}
- // No permutation occured. Append current to last visited ancestor.
- if len(a.Child) > 1 {
- a.Child = a.Child[:1]
- c.Child = append(c.Child, c1)
- }
- }
- } else if ep != 0 {
- for len(a.Child) > 1 {
- a = a.Child[1]
}
+ ops = append(ops, scanner.Token{Str: "call", Id: lang.Call, Pos: t.Pos})
}
- a.Child = append(a.Child, c)
- if p.hasProp(a, Call) {
- lce = a
- }
- }
- if root != nil && p.isStatement(root) {
- if expr != nil {
- root.Child = append(root.Child, expr)
+ if ol := len(ops); ol > 0 && vl > ol {
+ op := ops[ol-1]
+ ops = ops[:ol-1]
+ out = append(out, op)
+ vl--
}
- } else if expr != nil {
- root = expr
}
- if root != nil {
- // /*
- if p.hasProp(root, MultiOp) {
- for {
- if !p.fixMultiOp(root, prevToken) {
- break
- }
+ out = append(out, ops...) // TODO: verify that ops should be added in this order.
+
+ log.Println("ParseExpr out:", out, "vl:", vl, "ops:", ops)
+ // The tokens are now properly ordered, process nested blocks.
+ for i := len(out) - 1; i >= 0; i-- {
+ t := out[i]
+ var toks Tokens
+ switch t.Id {
+ case lang.ParenBlock, lang.BracketBlock:
+ if toks, err = p.ParseExprStr(t.Block()); err != nil {
+ return out, err
}
+ default:
+ continue
}
- // */
- nodes = append(nodes, root)
+
+ // replace block token by its parsed result.
+ log.Println("toks:", toks)
+ out2 := append(Tokens{}, out[:i]...)
+ out2 = append(out2, toks...)
+ out = append(out2, out[i+1:]...)
}
- return nodes, err
+ log.Println("Final out:", out)
+ return out, err
}
-func (p *Parser) fixMultiOp(root *Node, prevToken map[*Node]*scanner.Token) bool {
- for i, c := range root.Child {
- for j, cc := range c.Child {
- if pt := prevToken[cc]; pt != nil && pt.Content() == "," {
- c.RemoveChild(j)
- root.InsertChild(cc, i)
- return true
- }
+func (p *Parser) ParseExprStr(s string) (tokens Tokens, err error) {
+ if tokens, err = p.Scan(s, false); err != nil {
+ return
+ }
+ var result Tokens
+ for _, sub := range tokens.Split(lang.Comma) {
+ toks, err := p.ParseExpr(sub)
+ if err != nil {
+ return result, err
}
+ result = append(toks, result...)
}
- return false
+ return result, err
}
-func (p *Parser) hasProp(n *Node, prop uint) bool { return p.Spec[n.Name()].Flags&prop != 0 }
-func (p *Parser) isStatement(n *Node) bool { return p.Spec[n.Content()].Flags&Stmt != 0 }
-func (p *Parser) isExprSep(n *Node) bool { return p.Spec[n.Content()].Flags&ExprSep != 0 }
-func (p *Parser) isExpr(n *Node) bool { return !p.isStatement(n) && !p.isExprSep(n) }
-func (p *Parser) isSep(n *Node) bool { return n.Token.Kind() == scanner.Separator }
-func (p *Parser) IsBlock(n *Node) bool { return n.Token.Kind() == scanner.Block }
-
-func (p *Parser) precedenceToken(t *scanner.Token) int {
- s := t.Content()
- if l := t.Start(); l > 0 {
- s = s[:l]
+func (p *Parser) numItems(s string, sep lang.TokenId) int {
+ tokens, err := p.Scan(s, false)
+ if err != nil {
+ return -1
}
- return p.Spec[s].Order
+ r := 0
+ for _, t := range tokens.Split(sep) {
+ if len(t) == 0 {
+ continue
+ }
+ r++
+ }
+ return r
+}
+
+func (p *Parser) pushScope(name string) {
+ p.scope = strings.TrimPrefix(p.scope+"/"+name+"/", "/")
}
-func (p *Parser) canCallToken(t *scanner.Token) bool {
- return p.precedenceToken(t) == 0 || p.Spec[t.Name()].Flags&Call != 0
+func (p *Parser) popScope() {
+ p.scope = strings.TrimSuffix(p.scope, "/")
+ j := strings.LastIndex(p.scope, "/")
+ if j == -1 {
+ p.scope = ""
+ return
+ }
+ p.scope = p.scope[:j]
}
diff --git a/parser/parse_test.go b/parser/parse_test.go
deleted file mode 100644
index 4559acf..0000000
--- a/parser/parse_test.go
+++ /dev/null
@@ -1,257 +0,0 @@
-package parser
-
-import (
- "log"
- "os"
- "testing"
-
- "github.com/gnolang/parscan/scanner"
-)
-
-var GoScanner = &scanner.Scanner{
- CharProp: [scanner.ASCIILen]uint{
- '\t': scanner.CharSep,
- '\n': scanner.CharLineSep,
- ' ': scanner.CharSep,
- '!': scanner.CharOp,
- '"': scanner.CharStr,
- '%': scanner.CharOp,
- '&': scanner.CharOp,
- '\'': scanner.CharStr,
- '(': scanner.CharBlock,
- '*': scanner.CharOp,
- '+': scanner.CharOp,
- ',': scanner.CharGroupSep,
- '-': scanner.CharOp,
- '.': scanner.CharOp,
- '/': scanner.CharOp,
- ':': scanner.CharOp,
- ';': scanner.CharGroupSep,
- '<': scanner.CharOp,
- '=': scanner.CharOp,
- '>': scanner.CharOp,
- '[': scanner.CharBlock,
- '^': scanner.CharOp,
- '{': scanner.CharBlock,
- '|': scanner.CharOp,
- '~': scanner.CharOp,
- },
- End: map[string]string{
- "(": ")",
- "{": "}",
- "[": "]",
- "/*": "*/",
- `"`: `"`,
- "'": "'",
- "`": "`",
- "//": "\n",
- },
- BlockProp: map[string]uint{
- "(": scanner.CharBlock,
- "{": scanner.CharBlock,
- "[": scanner.CharBlock,
- `"`: scanner.CharStr | scanner.StrEsc | scanner.StrNonl,
- "`": scanner.CharStr,
- "'": scanner.CharStr | scanner.StrEsc,
- "/*": scanner.CharStr,
- "//": scanner.CharStr | scanner.ExcludeEnd | scanner.EosValidEnd,
- },
- SkipSemi: map[string]bool{
- "++": true,
- "--": true,
- "case": true,
- "chan": true,
- "const": true,
- "default": true,
- "defer": true,
- "else": true,
- "for": true,
- "func": true,
- "go": true,
- "goto": true,
- "if": true,
- "import": true,
- "interface": true,
- "map": true,
- "package": true,
- "range": true,
- "select": true,
- "struct": true,
- "switch": true,
- "type": true,
- "var": true,
- },
-}
-
-var GoParser = &Parser{
- Scanner: GoScanner,
- Spec: map[string]NodeSpec{
- ".": {Kind: OpDot, Flags: Call, Order: 3},
- "*": {Kind: OpMultiply, Order: 4},
- "+": {Kind: OpAdd, Order: 5},
- "-": {Kind: OpSubtract, Order: 5},
- "<": {Kind: OpInferior, Order: 6},
- ":=": {Kind: OpDefine, Flags: MultiOp, Order: 7},
- "=": {Kind: OpAssign, Flags: MultiOp, Order: 7},
- "if": {Kind: StmtIf, Flags: Stmt | ExprSep},
- "func": {Kind: DeclFunc, Flags: Decl | Call},
- "return": {Kind: StmtReturn, Flags: Stmt},
- "{..}": {Kind: BlockStmt, Flags: ExprSep},
- "(..)": {Kind: BlockParen, Flags: Call},
- "//..": {Kind: Comment},
- "/*..": {Kind: Comment},
- },
-}
-
-func init() {
- GoParser.Init()
- log.SetFlags(log.Lshortfile)
-}
-
-func TestParse(t *testing.T) {
- for _, test := range goTests {
- test := test
- t.Run("", func(t *testing.T) {
- var err error
- errStr := ""
- n := &Node{}
- if n.Child, err = GoParser.Parse(test.src, n); err != nil {
- errStr = err.Error()
- }
- if errStr != test.err {
- t.Errorf("got error %#v, want error %#v", errStr, test.err)
- }
- if dot := n.Sdot(""); dot != test.dot {
- t.Errorf("got %#v, want %#v", dot, test.dot)
- }
- t.Log(test.src)
- t.Log(n.Sdot(""))
- if dotCmd := os.Getenv("DOT"); dotCmd != "" {
- n.Dot(dotCmd, "")
- }
- })
- }
-}
-
-var goTests = []struct {
- src, dot, err string
- skip bool
-}{{ // #00
- src: "",
- dot: `digraph ast { 0 [label=""]; }`,
-}, { // #01
- src: "12",
- dot: `digraph ast { 0 [label=""]; 1 [label="12"]; 0 -> 1; }`,
-}, { // #02
- src: "1 + 2",
- dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="1"]; 1 -> 2; 3 [label="2"]; 1 -> 3; }`,
-}, { // #03
- src: "1 + 2 * 3",
- dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="1"]; 1 -> 2; 3 [label="*"]; 1 -> 3; 4 [label="2"]; 3 -> 4; 5 [label="3"]; 3 -> 5; }`,
-}, { // #04
- src: "1 * 2 + 3",
- dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="*"]; 1 -> 2; 3 [label="1"]; 2 -> 3; 4 [label="2"]; 2 -> 4; 5 [label="3"]; 1 -> 5; }`,
-}, { // #05
- src: "a := 2 + 5",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="2"]; 3 -> 4; 5 [label="5"]; 3 -> 5; }`,
-}, { // #06
- src: "a := 1 * 2 + 3",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="*"]; 3 -> 4; 5 [label="1"]; 4 -> 5; 6 [label="2"]; 4 -> 6; 7 [label="3"]; 3 -> 7; }`,
-}, { // #07
- src: "a := 1 + 2 * 3",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="1"]; 3 -> 4; 5 [label="*"]; 3 -> 5; 6 [label="2"]; 5 -> 6; 7 [label="3"]; 5 -> 7; }`,
-}, { // #08
- src: "a := 1 + 2 * 3 + 4",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="1"]; 3 -> 4; 5 [label="+"]; 3 -> 5; 6 [label="*"]; 5 -> 6; 7 [label="2"]; 6 -> 7; 8 [label="3"]; 6 -> 8; 9 [label="4"]; 5 -> 9; }`,
-}, { // #09
- src: "a := 1 + 2 + 3 * 4",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="1"]; 3 -> 4; 5 [label="+"]; 3 -> 5; 6 [label="2"]; 5 -> 6; 7 [label="*"]; 5 -> 7; 8 [label="3"]; 7 -> 8; 9 [label="4"]; 7 -> 9; }`,
-}, { // #10
- src: "a := 1 * 2 + 3 * 4",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="*"]; 3 -> 4; 5 [label="1"]; 4 -> 5; 6 [label="2"]; 4 -> 6; 7 [label="*"]; 3 -> 7; 8 [label="3"]; 7 -> 8; 9 [label="4"]; 7 -> 9; }`,
-}, { // #11
- src: "a := (1 + 2) * 3",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="*"]; 1 -> 3; 4 [label="(..)"]; 3 -> 4; 5 [label="+"]; 4 -> 5; 6 [label="1"]; 5 -> 6; 7 [label="2"]; 5 -> 7; 8 [label="3"]; 3 -> 8; }`,
-}, { // #12
- src: "a := 2 * -3",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="*"]; 1 -> 3; 4 [label="2"]; 3 -> 4; 5 [label="-"]; 3 -> 5; 6 [label="3"]; 5 -> 6; }`,
-}, { // #13
- src: "-5 + 4",
- dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="-"]; 1 -> 2; 3 [label="5"]; 2 -> 3; 4 [label="4"]; 1 -> 4; }`,
-}, { // #14
- src: "-5 + -4",
- dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="-"]; 1 -> 2; 3 [label="5"]; 2 -> 3; 4 [label="-"]; 1 -> 4; 5 [label="4"]; 4 -> 5; }`,
-}, { // #15
- src: "a := -5 + -4",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="-"]; 3 -> 4; 5 [label="5"]; 4 -> 5; 6 [label="-"]; 3 -> 6; 7 [label="4"]; 6 -> 7; }`,
-}, { // #16
- src: "*a := 5 * -3",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="*"]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="*"]; 1 -> 4; 5 [label="5"]; 4 -> 5; 6 [label="-"]; 4 -> 6; 7 [label="3"]; 6 -> 7; }`,
-}, { // #17
- src: "*a := -5 * 3",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="*"]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="*"]; 1 -> 4; 5 [label="-"]; 4 -> 5; 6 [label="5"]; 5 -> 6; 7 [label="3"]; 4 -> 7; }`,
-}, { // #18
- src: "1+2\n3-4",
- dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="1"]; 1 -> 2; 3 [label="2"]; 1 -> 3; 4 [label="-"]; 0 -> 4; 5 [label="3"]; 4 -> 5; 6 [label="4"]; 4 -> 6; }`,
-}, { // #19
- src: "i = i+1",
- dot: `digraph ast { 0 [label=""]; 1 [label="="]; 0 -> 1; 2 [label="i"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="i"]; 3 -> 4; 5 [label="1"]; 3 -> 5; }`,
-}, { // #20
- src: "a[12] = 5",
- dot: `digraph ast { 0 [label=""]; 1 [label="="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="[..]"]; 2 -> 3; 4 [label="12"]; 3 -> 4; 5 [label="5"]; 1 -> 5; }`,
-}, { // #21
- src: "a[12][0] = 3",
- dot: `digraph ast { 0 [label=""]; 1 [label="="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="[..]"]; 2 -> 3; 4 [label="12"]; 3 -> 4; 5 [label="[..]"]; 2 -> 5; 6 [label="0"]; 5 -> 6; 7 [label="3"]; 1 -> 7; }`,
-}, { // #22
- src: "a.b = 34",
- dot: `digraph ast { 0 [label=""]; 1 [label="="]; 0 -> 1; 2 [label="."]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="b"]; 2 -> 4; 5 [label="34"]; 1 -> 5; }`,
-}, { // #23
- src: "if i < 2 { return j }",
- dot: `digraph ast { 0 [label=""]; 1 [label="if"]; 0 -> 1; 2 [label="<"]; 1 -> 2; 3 [label="i"]; 2 -> 3; 4 [label="2"]; 2 -> 4; 5 [label="{..}"]; 1 -> 5; 6 [label="return"]; 5 -> 6; 7 [label="j"]; 6 -> 7; }`,
-}, { // #24
- src: "if i:=1; i < 2 { return j }",
- dot: `digraph ast { 0 [label=""]; 1 [label="if"]; 0 -> 1; 2 [label=":="]; 1 -> 2; 3 [label="i"]; 2 -> 3; 4 [label="1"]; 2 -> 4; 5 [label="<"]; 1 -> 5; 6 [label="i"]; 5 -> 6; 7 [label="2"]; 5 -> 7; 8 [label="{..}"]; 1 -> 8; 9 [label="return"]; 8 -> 9; 10 [label="j"]; 9 -> 10; }`,
-}, { // #25
- src: "f(i) + f(j)",
- dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="Call"]; 1 -> 2; 3 [label="f"]; 2 -> 3; 4 [label="(..)"]; 2 -> 4; 5 [label="i"]; 4 -> 5; 6 [label="Call"]; 1 -> 6; 7 [label="f"]; 6 -> 7; 8 [label="(..)"]; 6 -> 8; 9 [label="j"]; 8 -> 9; }`,
-}, { // #26
- src: "a := 1 // This is a comment",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="1"]; 1 -> 3; }`,
- /*
- }, { // #27
- src: "a, b, c = 1, f(2), 3",
- //src: "f(i) + f(j)(4)", // not ok
- }, { // #26
- src: "if i < 2 {return i}; return f(i-2) + f(i-1)",
- }, { // #27
- src: "for i < 2 { println(i) }",
- }, { // #28
- src: "func f(i int) (int) { if i < 2 { return i}; return f(i-2) + f(i-1) }",
- }, { // #29
- src: "a := []int{3, 4}",
- }, { // #30
- //src: "a := struct{int}",
- src: "a, b = c, d",
- }, { // #31
- //src: "a := [2]int{3, 4}",
- src: `fmt.Println("Hello")`,
- //src: "(1 + 2) * (3 - 4)",
- //src: "1 + (1 + 2)",
- }, { // #32
- //src: `a(3)(4)`,
- //src: `3 + 2 * a(3) + 5`,
- //src: `3 + 2 * a(3)(4) + (5)`,
- //src: `(a(3))(4)`,
- src: `a(3)(4)`,
- dot: `digraph ast { 0 [label=""]; 1 [label="Call"]; 0 -> 1; 2 [label="Call"]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="(..)"]; 2 -> 4; 5 [label="3"]; 4 -> 5; 6 [label="(..)"]; 1 -> 6; 7 [label="4"]; 6 -> 7; }`,
- //src: `println("Hello")`,
- //src: `a.b.c + 3`,
- }, { // #33
- src: `func f(a int, b int) {return a + b}; f(1+2)`,
- }, { // #34
- src: `if a == 1 {
- println(2)
- }
- println("bye")`,
- */
-}}
diff --git a/parser/symbol.go b/parser/symbol.go
new file mode 100644
index 0000000..9975c24
--- /dev/null
+++ b/parser/symbol.go
@@ -0,0 +1,67 @@
+package parser
+
+import (
+ "reflect"
+ "strings"
+)
+
+type symKind int
+
+const (
+ symValue symKind = iota // a Go value defined in the runtime
+ symType // a Go type
+ symLabel // a label indication a position in the VM code
+ symConst // a Go constant
+ symVar // a Go variable, located in the VM memory
+ symFunc // a Go function, located in the VM code
+)
+
+type symbol struct {
+ kind symKind
+ index int // address of symbol in frame
+ value any
+ Type reflect.Type
+ local bool // if true address is relative to local frame, otherwise global
+ used bool
+}
+
+func (p *Parser) AddSym(i int, name string, v any) { p.addSym(i, name, v, symValue, nil, false) }
+
+func (p *Parser) addSym(i int, name string, v any, k symKind, t reflect.Type, local bool) {
+ p.symbols[name] = &symbol{kind: k, index: i, local: local, value: v, Type: t, used: true}
+}
+
+// getSym searches for an existing symbol starting from the deepest scope.
+func (p *Parser) getSym(name, scope string) (sym *symbol, sc string, ok bool) {
+ for {
+ if sym, ok = p.symbols[scope+name]; ok {
+ return sym, scope, ok
+ }
+ scope = strings.TrimSuffix(scope, "/")
+ i := strings.LastIndex(scope, "/")
+ if i == -1 {
+ scope = ""
+ break
+ }
+ if scope = scope[:i]; scope == "" {
+ break
+ }
+ }
+ sym, ok = p.symbols[name]
+ return sym, scope, ok
+}
+
+func initUniverse() map[string]*symbol {
+ return map[string]*symbol{
+ "any": {kind: symType, Type: reflect.TypeOf((*any)(nil)).Elem()},
+ "bool": {kind: symType, Type: reflect.TypeOf((*bool)(nil)).Elem()},
+ "error": {kind: symType, Type: reflect.TypeOf((*error)(nil)).Elem()},
+ "int": {kind: symType, Type: reflect.TypeOf((*int)(nil)).Elem()},
+ "string": {kind: symType, Type: reflect.TypeOf((*string)(nil)).Elem()},
+
+ "nil": {},
+ "iota": {value: 0},
+ "true": {value: true},
+ "false": {value: false},
+ }
+}
diff --git a/parser/type.go b/parser/type.go
new file mode 100644
index 0000000..77ccc6e
--- /dev/null
+++ b/parser/type.go
@@ -0,0 +1,117 @@
+package parser
+
+import (
+ "fmt"
+ "log"
+ "reflect"
+
+ "github.com/gnolang/parscan/lang"
+)
+
+// ParseType parses a list of tokens defining a type expresssion and returns
+// the corresponding runtime type or an error.
+func (p *Parser) ParseType(in Tokens) (typ reflect.Type, err error) {
+ log.Println("ParseType", in)
+ switch in[0].Id {
+ case lang.Func:
+ // Get argument and return token positions depending on function pattern:
+ // method with receiver, named function or anonymous closure.
+ // TODO: handle variadics
+ var out Tokens
+ var indexArgs int
+ switch l, in1 := len(in), in[1]; {
+ case l >= 4 && in1.Id == lang.ParenBlock && in[2].Id == lang.Ident:
+ indexArgs, out = 3, in[4:]
+ case l >= 3 && in1.Id == lang.Ident:
+ indexArgs, out = 2, in[3:]
+ case l >= 2 && in1.Id == lang.ParenBlock:
+ indexArgs, out = 1, in[2:]
+ default:
+ return nil, fmt.Errorf("invalid func signature")
+ }
+
+ // We can now parse function input and output parameter types.
+ // Input parameters are always enclosed by parenthesis.
+ iargs, err := p.Scan(in[indexArgs].Block(), false)
+ if err != nil {
+ return nil, err
+ }
+ arg, err := p.parseParamTypes(iargs, true)
+ if err != nil {
+ return nil, err
+ }
+ // Output parameters may be empty, or enclosed or not by parenthesis.
+ if len(out) == 1 && out[0].Id == lang.ParenBlock {
+ if out, err = p.Scan(out[0].Block(), false); err != nil {
+ return nil, err
+ }
+ }
+ ret, err := p.parseParamTypes(out, false)
+ if err != nil {
+ return nil, err
+ }
+ return reflect.FuncOf(arg, ret, false), nil
+
+ case lang.Ident:
+ // TODO: selector expression (pkg.type)
+ s, _, ok := p.getSym(in[0].Str, p.scope)
+ if !ok || s.kind != symType {
+ return nil, fmt.Errorf("invalid type %s", in[0].Str)
+ }
+ return s.Type, nil
+ }
+ return typ, err
+}
+
+// parseParamTypes parses a list of comma separated typed parameters and returns a list of
+// runtime types. Implicit parameter names and types are supported.
+func (p *Parser) parseParamTypes(in Tokens, arg bool) (types []reflect.Type, err error) {
+ // Parse from right to left, to allow multiple comma separated parameters of the same type.
+ list := in.Split(lang.Comma)
+ for i := len(list) - 1; i >= 0; i-- {
+ t := list[i]
+ if len(t) == 0 {
+ continue
+ }
+ param := ""
+ if p.hasFirstParam(t) {
+ param = t[0].Str
+ t = t[1:]
+ if len(t) == 0 {
+ if len(types) == 0 {
+ return nil, fmt.Errorf("Invalid type %v", t[0])
+ }
+ // Type was ommitted, apply the previous one from the right.
+ types = append([]reflect.Type{types[0]}, types...)
+ if arg {
+ p.addSym(-i-2, p.scope+param, nil, symVar, types[0], true)
+ } else {
+ p.addSym(i, p.scope+param, nil, symVar, types[0], true)
+ }
+ continue
+ }
+ }
+ typ, err := p.ParseType(t)
+ if err != nil {
+ return nil, err
+ }
+ if param != "" {
+ if arg {
+ p.addSym(-i-2, p.scope+param, nil, symVar, typ, true)
+ } else {
+ p.addSym(i, p.scope+param, nil, symVar, typ, true)
+ }
+ }
+ types = append([]reflect.Type{typ}, types...)
+ }
+ return types, err
+}
+
+// hasFirstParam returns true if the first token of a list is a parameter name.
+func (p *Parser) hasFirstParam(in Tokens) bool {
+ if in[0].Id != lang.Ident {
+ return false
+ }
+ s, _, ok := p.getSym(in[0].Str, p.scope)
+ return !ok || s.kind != symType
+}
diff --git a/scanner/scan.go b/scanner/scan.go
index e0a32ca..99d6768 100644
--- a/scanner/scan.go
+++ b/scanner/scan.go
@@ -4,124 +4,55 @@ import (
"errors"
"fmt"
"regexp"
- "strconv"
"strings"
-)
-
-// Kind is the token type kind.
-type Kind uint
-const (
- Undefined Kind = iota
- Identifier
- Number
- Operator
- Separator
- String
- Block
- Custom
+ "github.com/gnolang/parscan/lang"
)
-const (
- CharOp = 1 << iota
- CharNum
- CharAlpha
- CharSep
- CharLineSep
- CharGroupSep
- CharStr
- CharBlock
- StrEsc
- StrNonl
- ExcludeEnd // exclude end delimiter from content
- EosValidEnd // end of input string terminates block or string token
+var (
+ ErrBlock = errors.New("block not terminated")
+ ErrIllegal = errors.New("illegal token")
)
-var ErrBlock = errors.New("block not terminated")
-
// Token defines a scanner token.
type Token struct {
- pos int
- kind Kind
- content string
- start int
- end int
- value any
-}
-
-type Tokens []*Token
-
-func (tks Tokens) String() (s string) {
- for _, t := range tks {
- s += fmt.Sprintf("%#v ", t.content)
- }
- return
+ Id lang.TokenId // token identificator
+ Pos int // position in source
+ Str string // string in source
+ Beg int // length of begin delimiter (block, string)
+ End int // length of end delimiter (block, string)
}
-func (t *Token) Kind() Kind { return t.kind }
-func (t *Token) Content() string { return t.content }
-func (t *Token) Start() int { return t.start }
-func (t *Token) End() int { return t.end }
-func (t *Token) Pos() int { return t.pos }
-func (t *Token) Block() string { return t.content[t.start : len(t.content)-t.end] }
-func (t *Token) Prefix() string { return t.content[:t.start] }
-func (t *Token) Value() any { return t.value }
-func (t *Token) IsBlock() bool { return t.kind == Block }
-func (t *Token) IsOperator() bool { return t.kind == Operator }
-func (t *Token) IsSeparator() bool { return t.kind == Separator }
+func (t *Token) Block() string { return t.Str[t.Beg : len(t.Str)-t.End] }
+func (t *Token) Prefix() string { return t.Str[:t.Beg] }
func (t *Token) Name() string {
- name := t.content
- if t.start > 1 {
- return name[:t.start] + ".."
+ name := t.Str
+ if t.Beg > 1 {
+ return name[:t.Beg] + ".."
}
- if t.start > 0 {
- return name[:t.start] + ".." + name[len(name)-t.end:]
+ if t.Beg > 0 {
+ return name[:t.Beg] + ".." + name[len(name)-t.End:]
}
return name
}
-func NewToken(content string, pos int) *Token {
- return &Token{pos, Custom, content, 0, 0, nil}
-}
-
-const ASCIILen = 1 << 7 // 128
-
// Scanner contains the scanner rules for a language.
type Scanner struct {
- CharProp [ASCIILen]uint // special Character properties
- End map[string]string // end delimiters, indexed by start
- BlockProp map[string]uint // block properties
- SkipSemi map[string]bool // words skipping automatic semicolon insertion after newline
- DotNum bool // true if a number can start with '.'
- IdAscii bool // true if an identifier can be in ASCII only
- Num_ bool // true if a number can contain _ character
+ *lang.Spec
sdre *regexp.Regexp // string delimiters regular expression
}
-func (sc *Scanner) HasProp(r rune, p uint) bool {
- if r >= ASCIILen {
- return false
- }
- return sc.CharProp[r]&p != 0
-}
+func NewScanner(spec *lang.Spec) *Scanner {
+ sc := &Scanner{Spec: spec}
-func (sc *Scanner) isOp(r rune) bool { return sc.HasProp(r, CharOp) }
-func (sc *Scanner) isSep(r rune) bool { return sc.HasProp(r, CharSep) }
-func (sc *Scanner) isLineSep(r rune) bool { return sc.HasProp(r, CharLineSep) }
-func (sc *Scanner) isGroupSep(r rune) bool { return sc.HasProp(r, CharGroupSep) }
-func (sc *Scanner) isStr(r rune) bool { return sc.HasProp(r, CharStr) }
-func (sc *Scanner) isBlock(r rune) bool { return sc.HasProp(r, CharBlock) }
-func (sc *Scanner) isDir(r rune) bool {
- return !sc.HasProp(r, CharOp|CharSep|CharLineSep|CharGroupSep|CharStr|CharBlock)
-}
+ // TODO: Mark unset ASCII char other than alphanum illegal
-func (sc *Scanner) Init() {
// Build a regular expression to match all string start delimiters at once.
re := "("
for s, p := range sc.BlockProp {
- if p&CharStr == 0 {
+ if p&lang.CharStr == 0 {
continue
}
// TODO: sort keys in decreasing length order.
@@ -132,46 +63,69 @@ func (sc *Scanner) Init() {
}
re = strings.TrimSuffix(re, "|") + ")$"
sc.sdre = regexp.MustCompile(re)
+
+ return sc
+}
+
+func (sc *Scanner) HasProp(r rune, p uint) bool {
+ if r >= lang.ASCIILen {
+ return false
+ }
+ return sc.CharProp[r]&p != 0
+}
+
+func (sc *Scanner) isOp(r rune) bool { return sc.HasProp(r, lang.CharOp) }
+func (sc *Scanner) isSep(r rune) bool { return sc.HasProp(r, lang.CharSep) }
+func (sc *Scanner) isLineSep(r rune) bool { return sc.HasProp(r, lang.CharLineSep) }
+func (sc *Scanner) isGroupSep(r rune) bool { return sc.HasProp(r, lang.CharGroupSep) }
+func (sc *Scanner) isStr(r rune) bool { return sc.HasProp(r, lang.CharStr) }
+func (sc *Scanner) isBlock(r rune) bool { return sc.HasProp(r, lang.CharBlock) }
+func (sc *Scanner) isDir(r rune) bool {
+ return !sc.HasProp(r, lang.CharOp|lang.CharSep|lang.CharLineSep|lang.CharGroupSep|lang.CharStr|lang.CharBlock)
}
func isNum(r rune) bool { return '0' <= r && r <= '9' }
-func (sc *Scanner) Scan(src string) (tokens Tokens, err error) {
+func (sc *Scanner) Scan(src string, semiEOF bool) (tokens []Token, err error) {
offset := 0
- s := src
+ s := strings.TrimSpace(src)
for len(s) > 0 {
t, err := sc.Next(s)
if err != nil {
- return nil, fmt.Errorf("%s: %w", loc(src, offset+t.pos), err)
+ return nil, fmt.Errorf("%s: %w", loc(src, offset+t.Pos), err)
}
- if t.kind == Undefined {
+ if t.Id == lang.Illegal && t.Str == "" {
break
}
skip := false
- if len(tokens) > 0 && len(sc.SkipSemi) > 0 && t.kind == Separator && t.content == " " {
+ if len(tokens) > 0 && t.Str == "\n" {
// Check for automatic semi-colon insertion after newline.
last := tokens[len(tokens)-1]
- if last.kind == Identifier && sc.SkipSemi[last.content] ||
- last.kind == Operator && !sc.SkipSemi[last.content] {
+ if last.Id.IsKeyword() && sc.TokenProps[last.Str].SkipSemi ||
+ last.Id.IsOperator() && !sc.TokenProps[last.Str].SkipSemi {
skip = true
} else {
- t.content = ";"
+ t.Id = lang.Semicolon
+ t.Str = ";"
}
}
- nr := t.pos + len(t.content)
+ nr := t.Pos + len(t.Str)
s = s[nr:]
- t.pos += offset
+ t.Pos += offset
offset += nr
if !skip {
tokens = append(tokens, t)
}
}
- // Insertion of semi-colon at the end of the token stream.
- if len(tokens) > 0 && len(sc.SkipSemi) > 0 {
+ // Optional insertion of semi-colon at the end of the token stream.
+ if semiEOF && len(tokens) > 0 {
last := tokens[len(tokens)-1]
- if !(last.kind == Identifier && sc.SkipSemi[last.content] ||
- last.kind == Operator && !sc.SkipSemi[last.content]) {
- tokens = append(tokens, &Token{kind: Separator, content: ";"})
+ if last.Str == ";" {
+ return tokens, nil
+ }
+ if !(last.Id == lang.Ident && sc.TokenProps[last.Str].SkipSemi ||
+ last.Id.IsOperator() && !sc.TokenProps[last.Str].SkipSemi) {
+ tokens = append(tokens, Token{Id: lang.Semicolon, Str: ";"})
}
}
return tokens, nil
@@ -188,7 +142,7 @@ func loc(s string, p int) string {
}
// Next returns the next token in string.
-func (sc *Scanner) Next(src string) (tok *Token, err error) {
+func (sc *Scanner) Next(src string) (tok Token, err error) {
p := 0
// Skip initial separators.
@@ -204,56 +158,65 @@ func (sc *Scanner) Next(src string) (tok *Token, err error) {
for i, r := range src {
switch {
case sc.isSep(r):
- return &Token{}, nil
+ return Token{}, nil
case sc.isGroupSep(r):
// TODO: handle group separators.
- return &Token{kind: Separator, pos: p + i, content: string(r)}, nil
+ return Token{Id: sc.TokenProps[string(r)].TokenId, Pos: p + i, Str: string(r)}, nil
case sc.isLineSep(r):
- return &Token{kind: Separator, pos: p + i, content: " "}, nil
+ return Token{Pos: p + i, Str: "\n"}, nil
case sc.isStr(r):
s, ok := sc.getStr(src[i:], 1)
if !ok {
err = ErrBlock
}
- return &Token{kind: String, pos: p + i, content: s, start: 1, end: 1}, err
+ return Token{Id: lang.String, Pos: p + i, Str: s, Beg: 1, End: 1}, err
case sc.isBlock(r):
b, ok := sc.getBlock(src[i:], 1)
if !ok {
err = ErrBlock
}
- return &Token{kind: Block, pos: p + i, content: b, start: 1, end: 1}, err
+ tok := Token{Pos: p + i, Str: b, Beg: 1, End: 1}
+ tok.Id = sc.TokenProps[tok.Name()].TokenId
+ return tok, err
case sc.isOp(r):
op, isOp := sc.getOp(src[i:])
if isOp {
- return &Token{kind: Operator, pos: p + i, content: op}, nil
+ id := sc.TokenProps[op].TokenId
+ if id == lang.Illegal {
+ err = fmt.Errorf("%w: %s", ErrIllegal, op)
+ }
+ return Token{Id: id, Pos: p + i, Str: op}, err
}
flag := sc.BlockProp[op]
- if flag&CharStr != 0 {
+ if flag&lang.CharStr != 0 {
s, ok := sc.getStr(src[i:], len(op))
if !ok {
err = ErrBlock
}
- return &Token{kind: String, pos: p + i, content: s, start: len(op), end: len(op)}, err
+ return Token{Id: lang.Comment, Pos: p + i, Str: s, Beg: len(op), End: len(op)}, err
}
case isNum(r):
- c, v := sc.getNum(src[i:])
- return &Token{kind: Number, pos: p + i, content: c, value: v}, nil
+ return Token{Id: lang.Int, Pos: p + i, Str: sc.getNum(src[i:])}, nil
default:
id, isId := sc.getId(src[i:])
if isId {
- return &Token{kind: Identifier, pos: p + i, content: id}, nil
+ ident := sc.TokenProps[id].TokenId
+ if ident == lang.Illegal {
+ ident = lang.Ident
+ }
+ return Token{Id: ident, Pos: p + i, Str: id}, nil
}
flag := sc.BlockProp[id]
- if flag&CharBlock != 0 {
+ if flag&lang.CharBlock != 0 {
s, ok := sc.getBlock(src[i:], len(id))
if !ok {
err = ErrBlock
}
- return &Token{kind: Block, pos: p + i, content: s, start: len(id), end: len(id)}, err
+ return Token{Pos: p + i, Str: s, Beg: len(id), End: len(id)}, err
}
}
}
- return &Token{}, nil
+ return Token{}, nil
}
func (sc *Scanner) getId(src string) (s string, isId bool) {
@@ -287,7 +250,7 @@ func (sc *Scanner) getOp(src string) (s string, isOp bool) {
return s, true
}
-func (sc *Scanner) getNum(src string) (s string, v any) {
+func (sc *Scanner) getNum(src string) (s string) {
// TODO: handle hexa, binary, octal, float and eng notations.
for i, r := range src {
if !isNum(r) {
@@ -295,31 +258,22 @@ func (sc *Scanner) getNum(src string) (s string, v any) {
}
s = src[:i+1]
}
- var err error
- if strings.ContainsRune(s, '.') {
- v, err = strconv.ParseFloat(s, 64)
- } else {
- v, err = strconv.ParseInt(s, 0, 64)
- }
- if err != nil {
- v = err
- }
- return s, v
+ return s
}
func (sc *Scanner) getStr(src string, nstart int) (s string, ok bool) {
start := src[:nstart]
end := sc.End[start]
prop := sc.BlockProp[start]
- canEscape := prop&StrEsc != 0
- nonl := prop&StrNonl != 0
- excludeEnd := prop&ExcludeEnd != 0
+ canEscape := prop&lang.StrEsc != 0
+ nonl := prop&lang.StrNonl != 0
+ excludeEnd := prop&lang.ExcludeEnd != 0
var esc bool
for i, r := range src[nstart:] {
s = src[:nstart+i+1]
if r == '\n' && nonl {
- return
+ return s, ok
}
if strings.HasSuffix(s, end) && !esc {
if excludeEnd {
@@ -329,7 +283,7 @@ func (sc *Scanner) getStr(src string, nstart int) (s string, ok bool) {
}
esc = canEscape && r == '\\' && !esc
}
- ok = prop&EosValidEnd != 0
+ ok = prop&lang.EosValidEnd != 0
return s, ok
}
@@ -359,22 +313,12 @@ func (sc *Scanner) getBlock(src string, nstart int) (s string, ok bool) {
skip = nstart + i + len(str) - l1
}
if n == 0 {
- if prop&ExcludeEnd != 0 {
+ if prop&lang.ExcludeEnd != 0 {
s = s[:len(s)-len(end)]
}
return s, true
}
}
- ok = prop&EosValidEnd != 0
+ ok = prop&lang.EosValidEnd != 0
return s, ok
}
-
-// Index returns the index of the first instance with content s in tokens, or -1 if not found.
-func Index(tokens []*Token, s string) int {
- for i, t := range tokens {
- if t.content == s {
- return i
- }
- }
- return -1
-}
diff --git a/scanner/scan_test.go b/scanner/scan_test.go
index bd0a13c..496e4e4 100644
--- a/scanner/scan_test.go
+++ b/scanner/scan_test.go
@@ -1,107 +1,47 @@
-package scanner
+package scanner_test
import (
+ "fmt"
"log"
"testing"
+
+ "github.com/gnolang/parscan/lang/golang"
+ "github.com/gnolang/parscan/scanner"
)
-var GoScanner = &Scanner{
- CharProp: [ASCIILen]uint{
- '\t': CharSep,
- '\n': CharLineSep,
- ' ': CharSep,
- '!': CharOp,
- '"': CharStr | StrEsc | StrNonl,
- '%': CharOp,
- '&': CharOp,
- '\'': CharStr | StrEsc,
- '(': CharBlock,
- '*': CharOp,
- '+': CharOp,
- ',': CharGroupSep,
- '-': CharOp,
- '`': CharStr,
- '.': CharOp,
- '/': CharOp,
- ':': CharOp,
- ';': CharGroupSep,
- '<': CharOp,
- '=': CharOp,
- '>': CharOp,
- '[': CharBlock,
- '^': CharOp,
- '{': CharBlock,
- '|': CharOp,
- '~': CharOp,
- },
- End: map[string]string{
- "(": ")",
- "{": "}",
- "[": "]",
- "/*": "*/",
- `"`: `"`,
- "'": "'",
- "`": "`",
- "//": "\n",
- },
- BlockProp: map[string]uint{
- "(": CharBlock,
- "{": CharBlock,
- "[": CharBlock,
- `"`: CharStr | StrEsc | StrNonl,
- "`": CharStr,
- "'": CharStr | StrEsc,
- "/*": CharStr,
- "//": CharStr | ExcludeEnd | EosValidEnd,
- },
- SkipSemi: map[string]bool{
- "++": true,
- "--": true,
- "case": true,
- "chan": true,
- "const": true,
- "default": true,
- "defer": true,
- "else": true,
- "for": true,
- "func": true,
- "go": true,
- "goto": true,
- "if": true,
- "import": true,
- "interface": true,
- "map": true,
- "package": true,
- "range": true,
- "select": true,
- "struct": true,
- "switch": true,
- "type": true,
- "var": true,
- },
+var GoScanner *scanner.Scanner
+
+func init() {
+ log.SetFlags(log.Lshortfile)
+ GoScanner = scanner.NewScanner(golang.GoSpec)
}
func TestScan(t *testing.T) {
- log.SetFlags(log.Lshortfile)
- GoScanner.Init()
for _, test := range tests {
test := test
t.Run("", func(t *testing.T) {
errStr := ""
- tokens, err := GoScanner.Scan(test.src)
+ tokens, err := GoScanner.Scan(test.src, true)
if err != nil {
errStr = err.Error()
}
if errStr != test.err {
t.Errorf("got error %v, want error %#v", errStr, test.err)
}
- if result := tokens.String(); result != test.tok {
+ if result := tokStr(tokens); result != test.tok {
t.Errorf("got %v, want %v", result, test.tok)
}
})
}
}
+func tokStr(tokens []scanner.Token) (s string) {
+ for _, t := range tokens {
+ s += fmt.Sprintf("%#v ", t.Str)
+ }
+ return
+}
+
var tests = []struct {
src, tok, err string
}{{ // #00
@@ -192,4 +132,7 @@ def"`,
}, { // #28
src: "f(3).\nfield",
tok: `"f" "(3)" "." "field" ";" `,
+}, { // #29
+ src: "\n\n\tif i < 1 {return 0}",
+ tok: `"if" "i" "<" "1" "{return 0}" ";" `,
}}
diff --git a/vm1/README.md b/vm/README.md
index a06dd34..92b1ac8 100644
--- a/vm1/README.md
+++ b/vm/README.md
@@ -1,8 +1,8 @@
-# vm1
+# vm
-`vm1` is a bytecode based stack machine.
+`vm` is a bytecode based stack machine.
-The purpose of `vm1` is to provide a simple, fast, embeddable and
+The purpose of `vm` is to provide a simple, fast, embeddable and
portable Go execution environment.
```mermaid
@@ -11,7 +11,7 @@ s[ ] --> |source| a(scanner)
--> |tokens| b(parser)
--> |AST| c(codegen)
--> |bytecode| d[vm]
-subgraph vm1
+subgraph vm
d
end
style s height:0px;
diff --git a/vm1/vm.go b/vm/vm.go
index 01bba83..4057fc1 100644
--- a/vm1/vm.go
+++ b/vm/vm.go
@@ -1,4 +1,4 @@
-package vm1
+package vm
import (
"fmt" // for tracing only
@@ -15,10 +15,13 @@ const (
Nop = iota // --
Add // n1 n2 -- sum ; sum = n1+n2
Assign // val -- ; mem[$1] = val
+ Fassign // val -- ; mem[$1] = val
Call // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...)
+ Calli // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = prog[f](a1, ...)
CallX // f [a1 .. ai] -- [r1 .. rj] ; r1, ... = mem[f](a1, ...)
Dup // addr -- value ; value = mem[addr]
Fdup // addr -- value ; value = mem[addr]
+ Equal // n1 n2 -- cond ; cond = n1 == n2
Exit // -- ;
Jump // -- ; ip += $1
JumpTrue // cond -- ; if cond { ip += $1 }
@@ -37,10 +40,13 @@ var strop = [...]string{ // for VM tracing.
Add: "Add",
Assign: "Assign",
Call: "Call",
+ Calli: "Calli",
CallX: "CallX",
Dup: "Dup",
- Fdup: "Fdup",
+ Equal: "Equal",
Exit: "Exit",
+ Fassign: "Fassign",
+ Fdup: "Fdup",
Jump: "Jump",
JumpTrue: "JumpTrue",
JumpFalse: "JumpFalse",
@@ -94,7 +100,16 @@ func (m *Machine) Run() (err error) {
case Assign:
mem[op[2]] = mem[sp-1]
mem = mem[:sp-1]
+ case Fassign:
+ mem[fp+int(op[2])-1] = mem[sp-1]
+ mem = mem[:sp-1]
case Call:
+ nip := mem[sp-1].(int)
+ mem = append(mem[:sp-1], ip+1, fp)
+ ip = nip
+ fp = sp + 1
+ continue
+ case Calli:
mem = append(mem, ip+1, fp)
fp = sp + 2
ip += int(op[2])
@@ -103,17 +118,20 @@ func (m *Machine) Run() (err error) {
l := int(op[2])
in := make([]reflect.Value, l)
for i := range in {
- in[l-1-i] = reflect.ValueOf(mem[sp-1-i])
+ in[i] = reflect.ValueOf(mem[sp-2-i])
}
- f := reflect.ValueOf(mem[sp-l-1])
+ f := reflect.ValueOf(mem[sp-1])
mem = mem[:sp-l-1]
for _, v := range f.Call(in) {
mem = append(mem, v.Interface())
}
case Dup:
mem = append(mem, mem[int(op[2])])
+ case Equal:
+ mem[sp-2] = mem[sp-2].(int) == mem[sp-1].(int)
+ mem = mem[:sp-1]
case Exit:
- return
+ return err
case Fdup:
mem = append(mem, mem[int(op[2])+fp-1])
case Jump:
@@ -134,7 +152,7 @@ func (m *Machine) Run() (err error) {
continue
}
case Lower:
- mem[sp-2] = mem[sp-2].(int) < mem[sp-1].(int)
+ mem[sp-2] = mem[sp-1].(int) < mem[sp-2].(int)
mem = mem[:sp-1]
case Loweri:
mem[sp-1] = mem[sp-1].(int) < int(op[2])
@@ -149,7 +167,7 @@ func (m *Machine) Run() (err error) {
mem = append(mem[:ofp-int(op[2])-int(op[3])-1], mem[sp-int(op[2]):]...)
continue
case Sub:
- mem[sp-2] = mem[sp-2].(int) - mem[sp-1].(int)
+ mem[sp-2] = mem[sp-1].(int) - mem[sp-2].(int)
mem = mem[:sp-1]
case Subi:
mem[sp-1] = mem[sp-1].(int) - int(op[2])
@@ -165,13 +183,13 @@ func (m *Machine) PushCode(code ...[]int64) (p int) {
}
func (m *Machine) SetIP(ip int) { m.ip = ip }
-func (m *Machine) Push(v ...any) (l int) { l = len(m.mem); m.mem = append(m.mem, v...); return }
-func (m *Machine) Pop() (v any) { l := len(m.mem) - 1; v = m.mem[l]; m.mem = m.mem[:l]; return }
+func (m *Machine) Push(v ...any) (l int) { l = len(m.mem); m.mem = append(m.mem, v...); return l }
+func (m *Machine) Pop() (v any) { l := len(m.mem) - 1; v = m.mem[l]; m.mem = m.mem[:l]; return v }
func (m *Machine) Top() (v any) {
if l := len(m.mem); l > 0 {
v = m.mem[l-1]
}
- return
+ return v
}
func (m *Machine) PopExit() {
@@ -180,17 +198,32 @@ func (m *Machine) PopExit() {
}
}
+func CodeString(op []int64) string {
+ switch len(op) {
+ case 2:
+ return strop[op[1]]
+ case 3:
+ return fmt.Sprintf("%s %d", strop[op[1]], op[2])
+ case 4:
+ return fmt.Sprintf("%s %d %d", strop[op[1]], op[2], op[3])
+ }
+ return ""
+}
+
// Disassemble returns the code as a readable string.
func Disassemble(code [][]int64) (asm string) {
for _, op := range code {
- switch len(op) {
- case 2:
- asm += strop[op[1]] + "\n"
- case 3:
- asm += fmt.Sprintf("%s %d\n", strop[op[1]], op[2])
- case 4:
- asm += fmt.Sprintf("%s %d %d\n", strop[op[1]], op[2], op[3])
- }
+ asm += CodeString(op) + "\n"
+ /*
+ switch len(op) {
+ case 2:
+ asm += strop[op[1]] + "\n"
+ case 3:
+ asm += fmt.Sprintf("%s %d\n", strop[op[1]], op[2])
+ case 4:
+ asm += fmt.Sprintf("%s %d %d\n", strop[op[1]], op[2], op[3])
+ }
+ */
}
return asm
}
diff --git a/vm1/vm_test.go b/vm/vm_test.go
index 272a321..08e8054 100644
--- a/vm1/vm_test.go
+++ b/vm/vm_test.go
@@ -1,10 +1,15 @@
-package vm1
+package vm
import (
"fmt"
+ "log"
"testing"
)
+func init() {
+ log.SetFlags(log.Lshortfile)
+}
+
func TestVM(t *testing.T) {
for _, test := range tests {
test := test
@@ -60,62 +65,90 @@ var tests = []struct {
}, { // #01 -- Calling a function defined outside the VM.
sym: []any{fmt.Println, "Hello"},
code: [][]int64{
+ {0, Dup, 0},
{0, CallX, 1},
{0, Exit},
},
- start: 0, end: 2, mem: "[6 <nil>]",
+ start: 1, end: 3, mem: "[6 <nil>]",
}, { // #02 -- Defining and calling a function in VM.
code: [][]int64{
{0, Jump, 3}, // 0
{0, Push, 3}, // 1
{0, Return, 1, 1}, // 2
{0, Push, 1}, // 3
- {0, Call, -3}, // 4
+ {0, Calli, -3}, // 4
{0, Exit}, // 5
},
start: 0, end: 1, mem: "[3]",
-}, { // #03 -- Fibonacci numbers, hand written. Showcase recursivity.
+}, { // #03 -- Defining and calling a function in VM.
code: [][]int64{
- {0, Jump, 17}, // 0
- {0, Fdup, -2}, // 1 [i]
- {0, Push, 2}, // 2 [i 2]
+ {0, Jump, 3}, // 0
+ {0, Push, 3}, // 1
+ {0, Return, 1, 1}, // 2
+ {0, Push, 1}, // 3
+ {0, Push, 1}, // 4
+ {0, Call}, // 5
+ {0, Exit}, // 6
+ },
+ start: 0, end: 1, mem: "[3]",
+}, { // #04 -- Defining and calling a function in VM.
+ code: [][]int64{
+ {0, Jump, 5}, // 0
+ {0, Push, 3}, // 1
+ {0, Fassign, -2}, // 2
+ {0, Fdup, -2}, // 3
+ {0, Return, 1, 1}, // 4
+ {0, Push, 1}, // 5
+ {0, Push, 1}, // 6
+ {0, Call}, // 7
+ {0, Exit}, // 8
+ },
+ start: 0, end: 1, mem: "[3]",
+}, { // #05 -- Fibonacci numbers, hand written. Showcase recursivity.
+ code: [][]int64{
+ {0, Jump, 19}, // 0
+ {0, Push, 2}, // 2 [2]
+ {0, Fdup, -2}, // 1 [2 i]
{0, Lower}, // 3 [true/false]
- {0, JumpTrue, 11}, // 4 [], goto 16
- {0, Fdup, -2}, // 5 [i]
+ {0, JumpTrue, 13}, // 4 [], goto 17
{0, Push, 2}, // 6 [i 2]
+ {0, Fdup, -2}, // 5 [i]
{0, Sub}, // 7 [(i-2)]
- {0, Call, -7}, // 8 [fib(i-2)]
- {0, Fdup, -2}, // 9 [fib(i-2) i]
- {0, Push, 1}, // 10 [(i-2) i 1]
- {0, Sub}, // 11 [(i-2) (i-1)]
- {0, Call, -11}, // 12 [fib(i-2) fib(i-1)]
- {0, Add}, // 13 [fib(i-2)+fib(i-1)]
- {0, Return, 1, 1}, // 14 return i
- {0, Fdup, -2}, // 15 [i]
+ {0, Push, 1}, // 8
+ {0, Call}, // 9 [fib(i-2)]
+ {0, Push, 1}, // 11 [(i-2) i 1]
+ {0, Fdup, -2}, // 10 [fib(i-2) i]
+ {0, Sub}, // 12 [(i-2) (i-1)]
+ {0, Push, 1}, // 13
+ {0, Call}, // 14 [fib(i-2) fib(i-1)]
+ {0, Add}, // 15 [fib(i-2)+fib(i-1)]
{0, Return, 1, 1}, // 16 return i
- {0, Push, 6}, // 17 [1]
- {0, Call, -17}, // 18 [fib(*1)]
- {0, Exit}, // 19
+ {0, Fdup, -2}, // 17 [i]
+ {0, Return, 1, 1}, // 18 return i
+ {0, Push, 6}, // 19 [1]
+ {0, Push, 1}, // 20
+ {0, Call}, // 21 [fib(*1)]
+ {0, Exit}, // 22
},
start: 0, end: 1, mem: "[8]",
-}, { // #04 -- Fibonacci with some immediate instructions.
+}, { // #06 -- Fibonacci with some immediate instructions.
code: [][]int64{
{0, Jump, 14}, // 0
{0, Fdup, -2}, // 1 [i]
{0, Loweri, 2}, // 2 [true/false]
- {0, JumpTrue, 9}, // 3 [], goto 13
+ {0, JumpTrue, 9}, // 3 [], goto 12
{0, Fdup, -2}, // 4 [i]
{0, Subi, 2}, // 5 [(i-2)]
- {0, Call, -5}, // 6 [fib(i-2)]
+ {0, Calli, -5}, // 6 [fib(i-2)]
{0, Fdup, -2}, // 7 [fib(i-2) i]
{0, Subi, 1}, // 8 [(i-2) (i-1)]
- {0, Call, -8}, // 9 [fib(i-2) fib(i-1)], call 1
+ {0, Calli, -8}, // 9 [fib(i-2) fib(i-1)], call 1
{0, Add}, // 10 [fib(i-2)+fib(i-1)]
{0, Return, 1, 1}, // 11 return i
{0, Fdup, -2}, // 12 [i]
{0, Return, 1, 1}, // 13 return i
{0, Push, 6}, // 14 [1]
- {0, Call, -14}, // 15 [fib(*1)], call 1
+ {0, Calli, -14}, // 15 [fib(*1)], call 1
{0, Exit}, // 16
},
start: 0, end: 1, mem: "[8]",
diff --git a/vm0/README.md b/vm0/README.md
deleted file mode 100644
index fc89429..0000000
--- a/vm0/README.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# vm0
-
-vm0 is a virtual machine executing directly the syntax tree.
-
-```mermaid
-graph LR
-s[ ] --> |source| a(scanner)
---> |tokens| b(parser)
---> |AST| c(vm)
-subgraph vm0
- c
-end
-style s height:0px;
-```
-
-The execution is performed by walking the AST and evaluating each
-visited node.
-
-
-## Motivation
-
-- have a reference execution model for each defined language
-- usable for compilation time evaluation
-- to modelize similar VMs (i.e. gnovm)
-- to validate and compare with other VMs (once it is itself validated)
-- could serve as a basis for AST based symbolic execution (to be
- investigated)
diff --git a/vm0/func.go b/vm0/func.go
deleted file mode 100644
index ee503bc..0000000
--- a/vm0/func.go
+++ /dev/null
@@ -1,75 +0,0 @@
-package vm0
-
-import (
- "reflect"
- "strings"
-
- "github.com/gnolang/parscan/parser"
-)
-
-var types = map[string]reflect.Type{
- "int": reflect.TypeOf(0),
- "string": reflect.TypeOf(""),
-}
-
-func (i *Interp) callFunc(n *parser.Node) {
- fp := i.fp
- l := len(i.stack)
- nargs := i.stack[l-1].(int)
- args := make([]reflect.Value, nargs)
- for j := range args {
- args[nargs-j-1] = reflect.ValueOf(i.stack[l-2-j])
- }
- f := reflect.ValueOf(i.stack[l-2-nargs])
- out := f.Call(args)
- i.fp = fp
- i.stack = i.stack[:l-2-nargs]
- for _, v := range out {
- i.push(v.Interface())
- }
-}
-
-func (i *Interp) declareFunc(r *parser.Node, scope string) {
- fname := r.Child[0].Content()
-
- // Add symbols for input and output function arguments.
- inArgs := r.Child[1].Child
- fscope := strings.TrimPrefix(scope+"/"+fname+"/", "/")
- in := make([]reflect.Type, len(inArgs))
- for j, c := range inArgs {
- i.sym[fscope+c.Content()] = j
- in[j] = types[c.Child[0].Content()]
- }
- var out []reflect.Type
- if len(r.Child) > 3 { // function has return values
- if i.IsBlock(r.Child[2]) {
- outArgs := r.Child[2].Child
- out = make([]reflect.Type, len(outArgs))
- for j, c := range outArgs {
- out[j] = types[c.Content()]
- }
- } else {
- out = []reflect.Type{types[r.Child[2].Content()]}
- }
- }
- funT := reflect.FuncOf(in, out, false)
-
- // Generate a wrapper function which will run function body AST.
- f := reflect.MakeFunc(funT, func(args []reflect.Value) (res []reflect.Value) {
- i.fp = len(i.stack) // fp will be restored by caller (callFunc).
- for _, arg := range args {
- i.push(arg.Interface())
- }
- if err := i.Run(r.Child[len(r.Child)-1], fscope); err != nil {
- panic(err)
- }
- b := len(i.stack) - len(out)
- for j := range out {
- res = append(res, reflect.ValueOf(i.stack[b+j]))
- }
- return res
- })
-
- // Add a symbol for newly created func.
- i.sym[scope+fname] = i.push(f.Interface()) - i.fp
-}
diff --git a/vm0/vm.go b/vm0/vm.go
deleted file mode 100644
index 62344ed..0000000
--- a/vm0/vm.go
+++ /dev/null
@@ -1,157 +0,0 @@
-package vm0
-
-import (
- "fmt"
- "os"
- "strings"
-
- "github.com/gnolang/parscan/parser"
-)
-
-const debug = true
-
-type Interp struct {
- *parser.Parser
- stack []any // stack memory space
- fp int // frame pointer: index of current frame in stack
- sym map[string]int // symbol table, maps scoped identifiers to offsets relative to fp
-}
-
-func New(p *parser.Parser) (i *Interp) {
- i = &Interp{Parser: p, stack: []any{}, sym: map[string]int{}}
- i.sym["println"] = i.push(fmt.Println)
- return i
-}
-
-func (i *Interp) Eval(src string) (res any, err error) {
- n := &parser.Node{}
- if n.Child, err = i.Parse(src, n); err != nil {
- return
- }
- if debug {
- n.Dot(os.Getenv("DOT"), "")
- }
- for _, nod := range n.Child {
- if err = i.Run(nod, ""); err != nil {
- break
- }
- }
- return
-}
-
-// Run implements a stack based virtual machine which directly walks the AST.
-func (i *Interp) Run(node *parser.Node, scope string) (err error) {
- stop := false
-
- node.Walk2(nil, 0, func(n, a *parser.Node, k int) (ok bool) {
- // Node pre-order processing.
- switch n.Kind {
- case parser.BlockStmt:
- if a != nil && a.Kind == parser.StmtIf {
- // Control-flow in 'if' sub-tree
- if k == 1 {
- // 'if' first body branch, evaluated when condition is true.
- if len(a.Child) > 2 {
- return i.peek().(bool) // keep condition on stack for else branch
- }
- return i.pop().(bool)
- }
- // 'else' body branch, evaluated when condition is false.
- return !i.pop().(bool)
- }
- case parser.DeclFunc:
- i.declareFunc(n, scope)
- return false
- }
- return true
- }, func(n, a *parser.Node, k int) (ok bool) {
- // Node post-order processing.
- if stop {
- return false
- }
- l := len(i.stack)
- switch n.Kind {
- case parser.LiteralNumber:
- switch v := n.Value().(type) {
- case int64:
- i.push(int(v))
- case error:
- err = v
- return false
- default:
- err = fmt.Errorf("type not supported: %T\n", v)
- return false
- }
- case parser.LiteralString:
- i.push(n.Block())
- case parser.OpInferior:
- i.stack[l-2] = i.stack[l-2].(int) < i.stack[l-1].(int)
- i.stack = i.stack[:l-1]
- case parser.OpAdd:
- i.stack[l-2] = i.stack[l-2].(int) + i.stack[l-1].(int)
- i.stack = i.stack[:l-1]
- case parser.OpSubtract:
- i.stack[l-2] = i.stack[l-2].(int) - i.stack[l-1].(int)
- i.stack = i.stack[:l-1]
- case parser.OpMultiply:
- i.stack[l-2] = i.stack[l-2].(int) * i.stack[l-1].(int)
- i.stack = i.stack[:l-1]
- case parser.OpAssign, parser.OpDefine:
- i.stack[i.stack[l-2].(int)] = i.stack[l-1]
- i.stack = i.stack[:l-2]
- case parser.StmtReturn:
- stop = true
- return false
- case parser.ExprCall:
- i.push(len(n.Child[1].Child)) // number of arguments to call
- i.callFunc(n)
- case parser.Ident:
- name := n.Content()
- v, sc, ok := i.getSym(name, scope)
- fp := i.fp
- if sc == "" {
- fp = 0
- }
- if ok {
- if a.Content() == ":=" {
- i.push(fp + v) // reference for assign (absolute index)
- break
- }
- i.push(i.stack[fp+v]) // value
- break
- }
- if a.Content() != ":=" {
- fmt.Println("error: undefined:", name, "scope:", scope)
- }
- v = i.push(any(nil)) - i.fp // v is the address of new value, relative to frame pointer
- i.sym[scope+name] = v // bind scoped name to address in symbol table
- i.push(v)
- }
- return true
- })
- return nil
-}
-
-// getSym searches for an existing symbol starting from the deepest scope.
-func (i *Interp) getSym(name, scope string) (index int, sc string, ok bool) {
- for {
- if index, ok = i.sym[scope+name]; ok {
- return index, scope, ok
- }
- scope = strings.TrimSuffix(scope, "/")
- j := strings.LastIndex(scope, "/")
- if j == -1 {
- scope = ""
- break
- }
- if scope = scope[:j]; scope == "" {
- break
- }
- }
- index, ok = i.sym[name]
- return index, scope, ok
-}
-
-func (i *Interp) peek() any { return i.stack[len(i.stack)-1] }
-func (i *Interp) push(v any) (l int) { l = len(i.stack); i.stack = append(i.stack, v); return }
-func (i *Interp) pop() (v any) { l := len(i.stack) - 1; v = i.stack[l]; i.stack = i.stack[:l]; return }
diff --git a/vm0/vm_test.go b/vm0/vm_test.go
deleted file mode 100644
index a4a24e6..0000000
--- a/vm0/vm_test.go
+++ /dev/null
@@ -1,26 +0,0 @@
-package vm0
-
-import (
- "os"
- "testing"
-
- "github.com/gnolang/parscan/lang/golang"
-)
-
-func TestEval(t *testing.T) {
- i := New(golang.GoParser)
- t.Logf("%#v\n", i.Parser)
- //i.Eval("println(2*5)")
- //n, _ := i.Parse("println(2*5)")
- //n, _ := i.Parse(`a := 2 + 5`)
- src := `a := 2`
- nodes, err := i.Parse(src, nil)
- if err != nil {
- t.Errorf("error %v", err)
- }
- i.Adot(nodes, os.Getenv("DOT"))
- for _, n := range nodes {
- err := i.Run(n, "")
- t.Log(err)
- }
-}