summaryrefslogtreecommitdiff
path: root/parser
diff options
context:
space:
mode:
authorMarc Vertes <marc.vertes@tendermint.com>2023-10-12 10:51:58 +0200
committerGitHub <noreply@github.com>2023-10-12 10:51:58 +0200
commit37b9da32d3b911091deb254f6cba2a137c471287 (patch)
treeb4451de0fa0473a937a77d39fd1f8a4f87c8f60d /parser
parenta21b9b12ad865a19ff687645082f9093c4101039 (diff)
move to a direct byte code compiler (#8)
* chore: refactor to keep only the new parser and bytecode vm * scanner: remove Token.value field * scanner: remove scanner.kind field * chore: move language specification in lang package This avoid a cyclic dependency in scanner_test which can now use the golang/GoSpec language specification for Go. * clean code * scanner: export scanner fields Also parser now generate function calls, including externals. * chore: fix lint issues * parser: handle strings * wip * parser: implement support for 'if, else, else if' statements Resolving labels in the compiler still in progress. * parser: support if statements, improve compiler * improve handling of functions * improve support of local variables * scanner: trim leading and trailing spaces * fixes to make fibonacci work * parser: improve README, fix function parameters parsing
Diffstat (limited to 'parser')
-rw-r--r--parser/README.md157
-rw-r--r--parser/compiler.go250
-rw-r--r--parser/dot.go89
-rw-r--r--parser/interpreter.go52
-rw-r--r--parser/interpreter_test.go66
-rw-r--r--parser/kind.go56
-rw-r--r--parser/node.go50
-rw-r--r--parser/parse.go519
-rw-r--r--parser/parse_test.go257
-rw-r--r--parser/symbol.go67
-rw-r--r--parser/type.go117
11 files changed, 975 insertions, 705 deletions
diff --git a/parser/README.md b/parser/README.md
index b8bc0d4..d46608c 100644
--- a/parser/README.md
+++ b/parser/README.md
@@ -1,71 +1,112 @@
# Parser
-A parser takes an array of tokens (produced by the scanner) in input and
-returns a node representing a syntax tree. A node is an object
-containing a kind, the corresponding token and the ordered references to
-descendent nodes.
+See if we can build an almost single pass compiler for Go doing syntax
+directed translation, without any complex data structure (no syntax
+tree), only lists of tokens.
-```mermaid
-graph LR
-s[ ] --> |source| a(scanner)
---> |tokens| b(parser)
---> |AST| c[ ]
-subgraph parser
- b
-end
-style s height:0px;
-style c height:0px;
-```
+The goal is to have the shortest and simplest path from source to
+bytecode.
-A goal is to make the parser generic enough so it can generate syntax
-trees for most of existing programming languages (no claim of generality
-yet), provided a small set of generating rules per language, and a small
-set of validating rules (yet to be defined) to detect invalid
-constructs.
+## Design
-The input tokens are particular in the sense that they include classical
-lexical items such as words, separators, numbers, but also strings and
-nested blocks, which are resolved at scanning stage rather than parsing
-stage. See the scanner for more details.
+The input of parser is a list of tokens produced by the scanner.
+Multiple tokens are processed at once. The minimal set to get
+meaningful results (not an error or nil) is a complete statemement.
-The language specification includes the following:
+The output of parser is also a list of tokens, to be consummed by
+the compiler to produce bytecode. The output tokens set is identical
+to the bytecode instructions set except that:
-- a scanner specification, to produce the set of possible tokens.
-- a map of node specification per token name. The node specification
- defines some parameters influing how the tree is generated.
+- code locations may be provided as as labels instead of numerical
+ values,
+- memory locations for constants and variables may be provided as
+ symbol names instead of numerical values.
-## Development status
+## Status
-A successful test must be provided to check the status.
+Go language support:
-- [x] binary operator expressions
-- [x] unary operator (prefix) expressions
-- [ ] unary operator (suffix) expressions
-- [x] operator precedence rules
-- [x] parenthesis in expressions
-- [x] call expressions
-- [ ] nested calls
-- [x] index expressions
-- [x] single assignments
-- [ ] multi assignments
-- [x] simple `if` statement (no `else`)
-- [ ] full `if` statement (including `else`, `else if`)
-- [x] init expressions in `if` statements
-- [x] statement blocks
-- [x] comments
+- [x] named functions
+- [ ] anonymous functions (closures)
+- [ ] methods
+- [x] internal function calls
+- [x] external function calls (calling runtime symbols in interpreter)
+- [ ] export to runtime
+- [ ] builtin calls (new, make, copy, delete, len, cap, ...)
+- [ ] out of order declarations
+- [ ] arbirtrary precision constants
+- [x] basic types
+- [ ] complete numeric types
+- [x] function types
+- [ ] variadic functions
+- [ ] pointers
+- [ ] structures
+- [ ] embedded structures
+- [ ] recursive structures
+- [ ] interfaces
+- [ ] arrays, slices
+- [ ] maps
+- [ ] deterministic maps
+- [ ] channel types
+- [ ] channel operations
+- [x] var defined by assign :=
+- [x] var assign =
+- [ ] var declaration
+- [ ] type declaration
+- [x] func declaration
+- [ ] const declaration
+- [ ] iota expression
+- [ ] defer statement
+- [ ] recover statement
+- [ ] go statement
+- [x] if statement (including else and else if)
- [ ] for statement
- [ ] switch statement
+- [ ] break statement
+- [ ] continue statement
+- [ ] fallthrough statement
+- [ ] goto statement
- [ ] select statement
-- [x] return statement
-- [x] function declaration
-- [ ] method declaration
-- [ ] anonymous functions (closures)
-- [ ] type declaration
-- [ ] var, const, type single declaration
-- [ ] var, const, type multi declaration
-- [ ] type parametric expressions
-- [x] literal numbers (see scanner)
-- [x] literal strings
-- [ ] composite literal
-- [ ] import statements
-- [ ] go.mod syntax
+- [x] binary operators
+- [ ] unary operators
+- [ ] logical operators && and ||
+- [ ] assign operators
+- [ ] operator precedence
+- [x] parenthesis expressions
+- [x] call expressions
+- [ ] index expressions
+- [ ] selector expressions
+- [ ] type conversions
+- [ ] type assertions
+- [ ] parametric types (generic)
+- [ ] type parametric functions (generic)
+- [ ] type constraints (generic)
+- [ ] type checking
+- [ ] comment pragmas
+- [ ] packages import
+- [ ] modules
+
+Other items:
+
+- [x] REPL
+- [x] multiline statements in REPL
+- [ ] completion, history in REPL
+- [x] eval strings
+- [x] eval files (including stdin, ...)
+- [x] debug traces for scanner, parser, compiler, bytecode vm
+- [x] simple interpreter tests to exercise from source to execution
+- [ ] compile time error detection and diagnosis
+- [ ] stack dump
+- [ ] symbol tables, data tables, code binded to source lines
+- [ ] interactive debugger: breaks, continue, instrospection, ...
+- [ ] machine level debugger
+- [ ] source level debugger
+- [ ] replay debugger, backward instruction execution
+- [ ] vm monitor: live memory / code display during run
+- [ ] stdlib wrappers a la yaegi
+- [ ] system and environment sandboxing
+- [ ] build constraints (arch, sys, etc)
+- [ ] test command (running go test / benchmark / example files)
+- [ ] skipping / including test files
+- [ ] test coverage
+- [ ] fuzzy tests for scanner, vm, ...
diff --git a/parser/compiler.go b/parser/compiler.go
new file mode 100644
index 0000000..5b0ed81
--- /dev/null
+++ b/parser/compiler.go
@@ -0,0 +1,250 @@
+package parser
+
+import (
+ "fmt"
+ "log"
+ "reflect"
+ "strconv"
+
+ "github.com/gnolang/parscan/lang"
+ "github.com/gnolang/parscan/scanner"
+ "github.com/gnolang/parscan/vm"
+)
+
+type Compiler struct {
+ *Parser
+ Code [][]int64 // produced code, to fill VM with
+ Data []any // produced data, will be at the bottom of VM stack
+ Entry int // offset in Code to start execution from (skip function defintions)
+
+ strings map[string]int // locations of strings in Data
+}
+
+func NewCompiler(scanner *scanner.Scanner) *Compiler {
+ return &Compiler{
+ Parser: &Parser{Scanner: scanner, symbols: initUniverse(), labelCount: map[string]int{}},
+ Entry: -1,
+ strings: map[string]int{},
+ }
+}
+
+func (c *Compiler) Emit(op ...int64) int {
+ op = append([]int64{}, op...)
+ l := len(c.Code)
+ c.Code = append(c.Code, op)
+ return l
+}
+
+func (c *Compiler) AddSym(name string, value any) int {
+ p := len(c.Data)
+ c.Data = append(c.Data, value)
+ c.Parser.AddSym(p, name, value)
+ return p
+}
+
+func (c *Compiler) Codegen(tokens Tokens) (err error) {
+ fixList := Tokens{}
+ function := []string{""}
+
+ log.Println("Codegen tokens:", tokens)
+
+ for i, t := range tokens {
+ scope := function[len(function)-1]
+ switch t.Id {
+ case lang.Int:
+ n, err := strconv.Atoi(t.Str)
+ if err != nil {
+ return err
+ }
+ c.Emit(int64(t.Pos), vm.Push, int64(n))
+
+ case lang.String:
+ s := t.Block()
+ i, ok := c.strings[s]
+ if !ok {
+ i = len(c.Data)
+ c.Data = append(c.Data, s)
+ c.strings[s] = i
+ }
+ c.Emit(int64(t.Pos), vm.Dup, int64(i))
+
+ case lang.Add:
+ c.Emit(int64(t.Pos), vm.Add)
+
+ case lang.Sub:
+ c.Emit(int64(t.Pos), vm.Sub)
+
+ case lang.Less:
+ c.Emit(int64(t.Pos), vm.Lower)
+
+ case lang.Call:
+ c.Emit(int64(t.Pos), vm.Call)
+
+ case lang.CallX:
+ c.Emit(int64(t.Pos), vm.CallX, int64(t.Beg))
+
+ case lang.Define:
+ // TODO: support assignment to local, composite objects
+ st := tokens[i-1]
+ l := len(c.Data)
+ c.Data = append(c.Data, nil)
+ c.addSym(l, st.Str, nil, symVar, nil, false)
+ c.Emit(int64(st.Pos), vm.Assign, int64(l))
+
+ case lang.Enter:
+ // TODO: merge with label ?
+ function = append(function, t.Str[6:])
+
+ case lang.Exit:
+ function = function[:len(function)-1]
+
+ case lang.Assign:
+ st := tokens[i-1]
+ s, _, ok := c.getSym(st.Str, scope)
+ if !ok {
+ return fmt.Errorf("symbol not found: %s", st.Str)
+ }
+ if s.local {
+ c.Emit(int64(st.Pos), vm.Fassign, int64(s.index))
+ } else {
+ c.Emit(int64(st.Pos), vm.Assign, int64(s.index))
+ }
+
+ case lang.Equal:
+ c.Emit(int64(t.Pos), vm.Equal)
+
+ case lang.Ident:
+ if i < len(tokens)-1 {
+ switch t1 := tokens[i+1]; t1.Id {
+ case lang.Define, lang.Assign, lang.Colon:
+ continue
+ }
+ }
+ s, _, ok := c.getSym(t.Str, scope)
+ if !ok {
+ return fmt.Errorf("symbol not found: %s", t.Str)
+ }
+ if s.local {
+ c.Emit(int64(t.Pos), vm.Fdup, int64(s.index))
+ } else {
+ c.Emit(int64(t.Pos), vm.Dup, int64(s.index))
+ }
+
+ case lang.Label:
+ // If the label is a function, the symbol already exists
+ s, _, ok := c.getSym(t.Str, scope)
+ lc := len(c.Code)
+ if ok {
+ s.value = lc
+ if s.kind == symFunc {
+ // label is a function entry point, register its code address in data.
+ s.index = len(c.Data)
+ c.Data = append(c.Data, lc)
+ } else {
+ c.Data[s.index] = lc
+ }
+ } else {
+ ld := len(c.Data)
+ c.Data = append(c.Data, lc)
+ c.addSym(ld, t.Str, lc, symLabel, nil, false)
+ }
+
+ case lang.JumpFalse:
+ label := t.Str[10:]
+ i := 0
+ if s, _, ok := c.getSym(label, scope); !ok {
+ // t.Beg contains the position in code which needs to be fixed.
+ t.Beg = len(c.Code)
+ fixList = append(fixList, t)
+ } else {
+ i = s.value.(int)
+ }
+ c.Emit(int64(t.Pos), vm.JumpFalse, int64(i))
+
+ case lang.Goto:
+ label := t.Str[5:]
+ i := 0
+ if s, _, ok := c.getSym(label, scope); !ok {
+ t.Beg = len(c.Code)
+ fixList = append(fixList, t)
+ } else {
+ i = s.value.(int)
+ }
+ c.Emit(int64(t.Pos), vm.Jump, int64(i))
+
+ case lang.Return:
+ c.Emit(int64(t.Pos), vm.Return, int64(t.Beg), int64(t.End))
+
+ default:
+ return fmt.Errorf("Codegen: unsupported token %v", t)
+ }
+ }
+
+ // Finally we fix unresolved labels for jump destinations.
+ for _, t := range fixList {
+ var label string
+ switch t.Id {
+ case lang.Goto:
+ label = t.Str[5:]
+ case lang.JumpFalse:
+ label = t.Str[10:]
+ }
+ s, _, ok := c.getSym(label, "")
+ if !ok {
+ return fmt.Errorf("label not found: %v", label)
+ }
+ c.Code[t.Beg][2] = int64(s.value.(int) - t.Beg)
+
+ }
+ return err
+}
+
+func (c *Compiler) PrintCode() {
+ labels := map[int]string{}
+ for name, sym := range c.symbols {
+ if sym.kind == symLabel || sym.kind == symFunc {
+ labels[sym.value.(int)] = name
+ }
+ }
+ fmt.Println("# Code:")
+ for i, l := range c.Code {
+ if label, ok := labels[i]; ok {
+ fmt.Println(label + ":")
+ }
+ extra := ""
+ switch l[1] {
+ case vm.Jump, vm.JumpFalse, vm.JumpTrue, vm.Calli:
+ if d, ok := labels[i+(int)(l[2])]; ok {
+ extra = "// " + d
+ }
+ }
+ fmt.Printf("%4d %-14v %v\n", i, vm.CodeString(l), extra)
+ }
+}
+
+type entry struct {
+ name string
+ *symbol
+}
+
+func (c *Compiler) PrintData() {
+ dict := map[int]entry{}
+ for name, sym := range c.symbols {
+ if !sym.used || sym.local || sym.kind == symLabel {
+ continue
+ }
+ dict[sym.index] = entry{name, sym}
+ }
+ fmt.Println("# Data:")
+ for i, d := range c.Data {
+ fmt.Printf("%4d %T %v %v\n", i, d, d, dict[i])
+ }
+}
+
+func (c *Compiler) NumIn(i int) (int, bool) {
+ t := reflect.TypeOf(c.Data[i])
+ if t.Kind() == reflect.Func {
+ return t.NumIn(), t.IsVariadic()
+ }
+ return -1, false
+}
diff --git a/parser/dot.go b/parser/dot.go
deleted file mode 100644
index 11d5014..0000000
--- a/parser/dot.go
+++ /dev/null
@@ -1,89 +0,0 @@
-package parser
-
-import (
- "bytes"
- "fmt"
- "io"
- "log"
- "os/exec"
- "strings"
-)
-
-func (*Parser) Adot(nodes []*Node, c string) {
- if c == "" {
- return
- }
- n := &Node{Child: nodes}
- n.Dot(c, "")
-}
-
-func (n *Node) Dot(c, s string) {
- dw, cmd := dotWriter(c)
- n.astDot(dw, s)
- if cmd == nil {
- return
- }
- if err := cmd.Wait(); err != nil {
- log.Fatal(err)
- }
-}
-
-func (n *Node) Sdot(s string) string {
- var buf bytes.Buffer
- n.astDot(&buf, s)
- return buf.String()
-}
-
-// TODO: rewrite it using Walk2
-func (n *Node) astDot(out io.Writer, label string) {
- fmt.Fprintf(out, "digraph ast { ")
- if label != "" {
- fmt.Fprintf(out, "labelloc=\"t\"; label=\"%s\";", label)
- }
- anc := map[*Node]*Node{}
- index := map[*Node]int{}
- count := 0
- n.Walk(func(nod *Node) bool {
- index[nod] = count
- count++
-
- for _, c := range nod.Child {
- anc[c] = nod
- }
- name := ""
- if nod.Token != nil {
- name = strings.ReplaceAll(nod.Name(), `"`, `\"`)
- }
- fmt.Fprintf(out, "%d [label=\"%s\"]; ", index[nod], name)
- if anc[nod] != nil {
- fmt.Fprintf(out, "%d -> %d; ", index[anc[nod]], index[nod])
- }
- return true
- }, nil)
- fmt.Fprintf(out, "}")
- if c, ok := out.(io.Closer); ok {
- c.Close()
- }
-}
-
-type nopCloser struct {
- io.Writer
-}
-
-func (nopCloser) Close() error { return nil }
-
-func dotWriter(dotCmd string) (io.WriteCloser, *exec.Cmd) {
- if dotCmd == "" {
- return nopCloser{io.Discard}, nil
- }
- fields := strings.Fields(dotCmd)
- cmd := exec.Command(fields[0], fields[1:]...)
- dotin, err := cmd.StdinPipe()
- if err != nil {
- log.Fatal(err)
- }
- if err = cmd.Start(); err != nil {
- log.Fatal(err)
- }
- return dotin, cmd
-}
diff --git a/parser/interpreter.go b/parser/interpreter.go
new file mode 100644
index 0000000..72fc667
--- /dev/null
+++ b/parser/interpreter.go
@@ -0,0 +1,52 @@
+package parser
+
+import (
+ "github.com/gnolang/parscan/scanner"
+ "github.com/gnolang/parscan/vm"
+)
+
+const debug = true
+
+type Interpreter struct {
+ *Compiler
+ *vm.Machine
+}
+
+func NewInterpreter(s *scanner.Scanner) *Interpreter {
+ return &Interpreter{NewCompiler(s), &vm.Machine{}}
+}
+
+func (i *Interpreter) Eval(src string) (res any, err error) {
+ codeOffset := len(i.Code)
+ dataOffset := 0
+ if codeOffset > 0 {
+ // All data must be copied to the VM the first time only (re-entrance).
+ dataOffset = len(i.Data)
+ }
+ i.PopExit() // Remove last exit from previous run (re-entrance).
+
+ t, err := i.Parse(src)
+ if err != nil {
+ return res, err
+ }
+ if err = i.Codegen(t); err != nil {
+ return res, err
+ }
+ i.Push(i.Data[dataOffset:]...)
+ i.PushCode(i.Code[codeOffset:]...)
+ i.PushCode([]int64{0, vm.Exit})
+ i.SetIP(max(codeOffset, i.Entry))
+ if debug {
+ i.PrintData()
+ i.PrintCode()
+ }
+ err = i.Run()
+ return i.Top(), err
+}
+
+func max(a, b int) int {
+ if a > b {
+ return a
+ }
+ return b
+}
diff --git a/parser/interpreter_test.go b/parser/interpreter_test.go
new file mode 100644
index 0000000..ee927a5
--- /dev/null
+++ b/parser/interpreter_test.go
@@ -0,0 +1,66 @@
+package parser_test
+
+import (
+ "fmt"
+ "log"
+ "testing"
+
+ "github.com/gnolang/parscan/lang/golang"
+ "github.com/gnolang/parscan/parser"
+ "github.com/gnolang/parscan/scanner"
+)
+
+var GoScanner *scanner.Scanner
+
+func init() {
+ log.SetFlags(log.Lshortfile)
+ GoScanner = scanner.NewScanner(golang.GoSpec)
+}
+
+func TestEval(t *testing.T) {
+ for _, test := range goTests {
+ test := test
+ t.Run("", func(t *testing.T) {
+ interp := parser.NewInterpreter(GoScanner)
+ errStr := ""
+ r, e := interp.Eval(test.src)
+ t.Log(r, e)
+ if e != nil {
+ errStr = e.Error()
+ }
+ if errStr != test.err {
+ t.Errorf("got error %#v, want error %#v", errStr, test.err)
+ }
+ if res := fmt.Sprintf("%v", r); test.err == "" && res != test.res {
+ t.Errorf("got %#v, want %#v", res, test.res)
+ }
+ })
+ }
+}
+
+var goTests = []struct {
+ src, res, err string
+}{
+ 0: {src: "", res: "<nil>"},
+ 1: {src: "1+2", res: "3"},
+ 2: {src: "1+", err: "block not terminated"},
+ 3: {src: "a := 1 + 2; b := 0; a + 1", res: "4"},
+ 4: {src: "1+(2+3)", res: "6"},
+ 5: {src: "(1+2)+3", res: "6"},
+ 6: {src: "(6+(1+2)+3)+5", res: "17"},
+ 7: {src: "(6+(1+2+3)+5", err: "1:1: block not terminated"},
+ 8: {src: "a := 2; a = 3; a", res: "3"},
+ 9: {src: "a := 0; if a == 0 { a = 2 } else { a = 1 }; a", res: "2"},
+ 10: {src: "a := 0; if a == 1 { a = 2 } else { a = 1 }; a", res: "1"},
+ 11: {src: "a := 0; if a == 1 { a = 2 } else if a == 0 { a = 3 } else { a = 1 }; a", res: "3"},
+ 12: {src: "a := 0; if a == 1 { a = 2 } else if a == 2 { a = 3 } else { a = 1 }; a", res: "1"},
+ 13: {src: "func f() int {return 2}; a := f(); a", res: "2"},
+ 14: {src: "func f() int {return 2}; f()", res: "2"},
+ 15: {src: "func f(a int) int {return a+2}; f(3)", res: "5"},
+ 16: {src: "func f(a int) int {if a < 4 {a = 5}; return a }; f(3)", res: "5"},
+ 17: {src: "func f(a int) int {return a+2}; 7 - f(3)", res: "2"},
+ 18: {src: "func f(a int) int {return a+2}; f(5) - f(3)", res: "2"},
+ 19: {src: "func f(a int) int {return a+2}; f(3) - 2", res: "3"},
+ 20: {src: "func f(a int, b int, c int) int {return a+b-c} ; f(7, 1, 3)", res: "5"},
+ 21: {src: "func f(a, b, c int) int {return a+b-c} ; f(7, 1, 3)", res: "5"},
+}
diff --git a/parser/kind.go b/parser/kind.go
deleted file mode 100644
index d004471..0000000
--- a/parser/kind.go
+++ /dev/null
@@ -1,56 +0,0 @@
-package parser
-
-import "fmt"
-
-// kind defines the AST node kind. Its name is the concatenation
-// of a category (Block, Decl, Expr, Op, Stmt) and an instance name.
-type Kind int
-
-const (
- Undefined = Kind(iota)
- BlockParen
- BlockStmt
- Comment
- DeclFunc
- ExprCall
- Ident
- LiteralNumber
- LiteralString
- OpAdd
- OpAssign
- OpDefine
- OpDot
- OpInferior
- OpMultiply
- OpSubtract
- StmtIf
- StmtReturn
-)
-
-var kindString = [...]string{
- Undefined: "Undefined",
- BlockParen: "BlockParen",
- BlockStmt: "BlockStmt",
- Comment: "Comment",
- DeclFunc: "DeclFunc",
- ExprCall: "ExprCall",
- Ident: "Ident",
- LiteralString: "LiteralString",
- LiteralNumber: "LiteralNumber",
- OpAdd: "OpAdd",
- OpAssign: "OpAssign",
- OpDefine: "OpDefine",
- OpDot: "OpDot",
- OpInferior: "OpInferior",
- OpMultiply: "OpMultiply",
- OpSubtract: "OpSubtract",
- StmtIf: "StmtIf",
- StmtReturn: "StmtReturn",
-}
-
-func (k Kind) String() string {
- if int(k) < 0 || int(k) > len(kindString) {
- return fmt.Sprintf("unknown kind %d", k)
- }
- return kindString[k]
-}
diff --git a/parser/node.go b/parser/node.go
deleted file mode 100644
index b6e34cd..0000000
--- a/parser/node.go
+++ /dev/null
@@ -1,50 +0,0 @@
-package parser
-
-import "github.com/gnolang/parscan/scanner"
-
-type Node struct {
- Child []*Node // sub-tree nodes
- *scanner.Token // token at origin of the node
- Kind // Node kind, depends on the language spec
-}
-
-// TODO: remove it in favor of Walk2
-func (n *Node) Walk(in, out func(*Node) bool) (stop bool) {
- if in != nil && !in(n) {
- return true
- }
- for _, child := range n.Child {
- if child.Walk(in, out) {
- return
- }
- }
- if out != nil {
- stop = !out(n)
- }
- return
-}
-
-// Idem to walk, but also propagates the ancestor of visited node and child index.
-func (n *Node) Walk2(a *Node, i int, in, out func(*Node, *Node, int) bool) (stop bool) {
- if in != nil && !in(n, a, i) {
- return true
- }
- for j, child := range n.Child {
- if child.Walk2(n, j, in, out) {
- return
- }
- }
- if out != nil {
- stop = !out(n, a, i)
- }
- return
-}
-
-func (n *Node) RemoveChild(i int) {
- n.Child = append(n.Child[:i], n.Child[i+1:]...)
-}
-
-func (n *Node) InsertChild(node *Node, i int) {
- n.Child = append(n.Child[:i+1], n.Child[i:]...)
- n.Child[i] = node
-}
diff --git a/parser/parse.go b/parser/parse.go
index ca89467..5f0b643 100644
--- a/parser/parse.go
+++ b/parser/parse.go
@@ -1,244 +1,373 @@
package parser
import (
+ "fmt"
+ "log"
+ "strconv"
+ "strings"
+
+ "github.com/gnolang/parscan/lang"
"github.com/gnolang/parscan/scanner"
)
-const (
- Stmt = 1 << iota
- ExprSep
- Call
- Index
- Decl
- MultiOp
-)
+type Parser struct {
+ *scanner.Scanner
-type NodeSpec struct {
- Kind // AST node kind
- Flags uint // composable properties used for AST generation
- Order int // operator precedence order
+ symbols map[string]*symbol
+ function *symbol
+ scope string
+ fname string
+ labelCount map[string]int
}
-type Parser struct {
- *scanner.Scanner
- Spec map[string]NodeSpec
+func (p *Parser) Scan(s string, endSemi bool) (Tokens, error) {
+ return p.Scanner.Scan(s, endSemi)
}
-func (p *Parser) Parse(src string, ctx *Node) (nodes []*Node, err error) {
- tokens, err := p.Scan(src)
- if err != nil {
- return
+type Tokens []scanner.Token
+
+func (toks Tokens) String() (s string) {
+ for _, t := range toks {
+ s += fmt.Sprintf("%#v ", t.Str)
}
- return p.ParseTokens(tokens, ctx)
+ return s
}
-func (p *Parser) ParseTokens(tokens []*scanner.Token, ctx *Node) (nodes []*Node, err error) {
- // TODO: error handling.
- var root *Node // current root node
- var expr *Node // current expression root node
- var prev, c *Node // previous and current nodes
- var lce *Node // last complete expression node
- unaryOp := map[*Node]bool{} // unaryOp indicates if a node is an unary operator.
- prevToken := map[*Node]*scanner.Token{}
-
- for i, t := range tokens {
- prev = c
- c = &Node{
- Token: t,
- Kind: p.Spec[t.Name()].Kind,
+func (toks Tokens) Index(id lang.TokenId) int {
+ for i, t := range toks {
+ if t.Id == id {
+ return i
}
- if i > 0 {
- prevToken[c] = tokens[i-1]
+ }
+ return -1
+}
+
+func (toks Tokens) LastIndex(id lang.TokenId) int {
+ for i := len(toks) - 1; i >= 0; i-- {
+ if toks[i].Id == id {
+ return i
}
- if c.Kind == Comment {
- continue
+ }
+ return -1
+}
+
+func (toks Tokens) Split(id lang.TokenId) (result []Tokens) {
+ for {
+ i := toks.Index(id)
+ if i < 0 {
+ return append(result, toks)
}
- if t.IsOperator() && (i == 0 || tokens[i-1].IsOperator()) {
- unaryOp[c] = true
+ result = append(result, toks[:i])
+ toks = toks[i+1:]
+ }
+}
+
+func (p *Parser) Parse(src string) (out Tokens, err error) {
+ log.Printf("Parse src: %#v\n", src)
+ in, err := p.Scan(src, true)
+ if err != nil {
+ return out, err
+ }
+ log.Println("Parse in:", in)
+ for len(in) > 0 {
+ endstmt := in.Index(lang.Semicolon)
+ if endstmt == -1 {
+ return out, scanner.ErrBlock
}
- if c.Kind == Undefined {
- switch t.Kind() {
- case scanner.Number:
- c.Kind = LiteralNumber
- case scanner.Identifier:
- c.Kind = Ident
+
+ // Skip over simple init statements for some tokens (if, for, ...)
+ if lang.HasInit[in[0].Id] {
+ log.Println("may have init")
+ for in[endstmt-1].Id != lang.BraceBlock {
+ e2 := in[endstmt+1:].Index(lang.Semicolon)
+ if e2 == -1 {
+ return out, scanner.ErrBlock
+ }
+ endstmt += 1 + e2
}
}
+ o, err := p.ParseStmt(in[:endstmt])
+ if err != nil {
+ return out, err
+ }
+ out = append(out, o...)
+ in = in[endstmt+1:]
+ }
+ return out, err
+}
- if root == nil {
- if p.isSep(c) {
- continue
- }
- lce = nil
- root = c
- if p.isExpr(c) {
- expr = c
- }
+func (p *Parser) ParseStmt(in Tokens) (out Tokens, err error) {
+ log.Println("ParseStmt in:", in)
+ if len(in) == 0 {
+ return nil, nil
+ }
+ switch t := in[0]; t.Id {
+ case lang.Func:
+ return p.ParseFunc(in)
+ case lang.If:
+ return p.ParseIf(in)
+ case lang.Return:
+ return p.ParseReturn(in)
+ default:
+ return p.ParseExpr(in)
+ }
+}
+
+func (p *Parser) ParseFunc(in Tokens) (out Tokens, err error) {
+ // TODO: handle anonymous functions (no function name)
+ // TODO: handle receiver (methods)
+ // TODO: handle parametric types (generics)
+ // TODO: handle variadic parameters
+ fname := in[1].Str
+ ofname := p.fname
+ p.fname = fname
+ ofunc := p.function
+ s, _, ok := p.getSym(fname, p.scope)
+ if !ok {
+ s = &symbol{}
+ p.symbols[p.scope+fname] = s
+ }
+ p.pushScope(fname)
+ defer func() {
+ p.fname = ofname // TODO remove if favor of function.
+ p.function = ofunc
+ p.popScope()
+ }()
+
+ out = Tokens{
+ {Id: lang.Enter, Str: "enter " + p.scope},
+ {Id: lang.Goto, Str: "goto " + fname + "_end"}, // Skip function definition.
+ {Id: lang.Label, Pos: in[0].Pos, Str: fname},
+ }
+
+ bi := in.Index(lang.BraceBlock)
+ if bi < 0 {
+ return out, fmt.Errorf("no function body")
+ }
+ typ, err := p.ParseType(in[:bi])
+ if err != nil {
+ return out, err
+ }
+ s.kind = symFunc
+ s.Type = typ
+ p.function = s
+
+ log.Println("body:", in[len(in)-1].Block())
+ toks, err := p.Parse(in[len(in)-1].Block())
+ if err != nil {
+ return out, err
+ }
+ out = append(out, toks...)
+ out = append(out,
+ scanner.Token{Id: lang.Label, Str: fname + "_end"},
+ scanner.Token{Id: lang.Exit},
+ )
+ log.Println("symbols", p.symbols)
+ return out, err
+}
+
+func (p *Parser) ParseIf(in Tokens) (out Tokens, err error) {
+ prefix := p.fname + "_if" + strconv.Itoa(p.labelCount[p.scope+p.fname])
+ p.labelCount[p.scope+p.fname]++
+ sc := 0
+ ssc := strconv.Itoa(sc)
+ // We start from the end of the statement and examine tokens backward to
+ // get the destination labels already computed when jumps are set.
+ i := len(in) - 1
+ for i > 0 {
+ if in[i].Id != lang.BraceBlock {
+ return nil, fmt.Errorf("expected '{', got %v", in[i])
+ }
+ pre, err := p.Parse(in[i].Block())
+ if err != nil {
+ return nil, err
+ }
+ //pre := append(Tokens{{Id: lang.Label, Str: prefix + "_b" + ssc}}, blockout...)
+ if sc > 0 {
+ pre = append(pre, scanner.Token{Id: lang.Goto, Str: "goto " + prefix + "_e0"})
+ }
+ pre = append(pre, scanner.Token{Id: lang.Label, Str: prefix + "_e" + ssc})
+ out = append(pre, out...)
+
+ i--
+ ifp := in[:i].LastIndex(lang.If)
+
+ if in[i].Id == lang.Else { // Step over final 'else'.
+ i--
+ sc++
+ ssc = strconv.Itoa(sc)
continue
}
- if t.IsBlock() {
- if expr != nil {
- if p.hasProp(c, ExprSep) && p.isExprSep(root) {
- // A bracket block may end a previous expression.
- root.Child = append(root.Child, expr)
- expr = nil
- } else if p.hasProp(c, Call) && !p.hasProp(root, Decl) && p.canCallToken(tokens[i-1]) {
- // Handle (possibly nested) call expressions.
- if lce == nil || lce != expr { // TODO(marc): not general, fix it.
- lce = prev
- }
- lce.Child = []*Node{{Token: lce.Token, Child: lce.Child, Kind: lce.Kind}}
- lce.Token = scanner.NewToken("Call", c.Pos())
- lce.Kind = ExprCall
- }
- }
- tcont := t.Content()
- s := tcont[t.Start() : len(tcont)-t.End()]
- n2, err := p.Parse(s, c)
- if err != nil {
+ pre = Tokens{}
+ var init, cond Tokens
+ initcond := in[ifp+1 : i+1]
+ if ii := initcond.Index(lang.Semicolon); ii < 0 {
+ cond = initcond
+ } else {
+ init = initcond[:ii]
+ cond = initcond[ii+1:]
+ }
+ if len(init) > 0 {
+ if init, err = p.ParseStmt(init); err != nil {
return nil, err
}
- c.Child = append(c.Child, n2...)
- }
-
- // Process the end of an expression or a statement.
- if t.IsSeparator() {
- if t.Content() == "," && ctx.Kind != BlockParen {
- // ignore comma separator in field lists
- } else if expr != nil && p.hasProp(root, Stmt) {
- root.Child = append(root.Child, expr)
- if p.hasProp(expr, ExprSep) {
- nodes = append(nodes, root)
- root = nil
- }
- expr = nil
- } else {
- if expr != nil {
- root = expr
- }
- nodes = append(nodes, root)
- expr = nil
- root = nil
- }
- continue
+ pre = append(pre, init...)
}
+ if cond, err = p.ParseExpr(cond); err != nil {
+ return nil, err
+ }
+ pre = append(pre, cond...)
+ pre = append(pre, scanner.Token{Id: lang.JumpFalse, Str: "JumpFalse " + prefix + "_e" + ssc})
+ out = append(pre, out...)
+ i = ifp
+ if i > 1 && in[i].Id == lang.If && in[i-1].Id == lang.Else { // Step over 'else if'.
+ i -= 2
+ }
+ sc++
+ ssc = strconv.Itoa(sc)
+ }
+ log.Println("prefix:", prefix)
+ log.Println("if tokens:", out)
+ return out, err
+}
- // We assume from now that current node is part of an expression subtree.
- if expr == nil {
- if p.isStatement(root) {
- expr = c
- continue
+func (p *Parser) ParseReturn(in Tokens) (out Tokens, err error) {
+ if len(in) > 1 {
+ if out, err = p.ParseExpr(in[1:]); err != nil {
+ return out, err
+ }
+ }
+
+ // TODO: the function symbol should be already present in the parser context.
+ // otherwise no way to handle anonymous func.
+ s := p.function
+ in[0].Beg = s.Type.NumOut()
+ in[0].End = s.Type.NumIn()
+ log.Println("ParseReturn:", p.fname, in[0])
+ out = append(out, in[0])
+ return out, err
+}
+
+func (p *Parser) ParseExpr(in Tokens) (out Tokens, err error) {
+ log.Println("ParseExpr in:", in)
+ var ops Tokens
+ var vl int
+ //
+ // Process tokens from last to first, the goal is to reorder the tokens in
+ // a stack machine processing order, so it can be directly interpreted.
+ //
+ for i := len(in) - 1; i >= 0; i-- {
+ t := in[i]
+ // temporary assumptions: binary operators, returning 1 value
+ switch t.Id {
+ case lang.Ident, lang.Int, lang.String:
+ out = append(out, t)
+ vl++
+ case lang.Define, lang.Add, lang.Sub, lang.Assign, lang.Equal, lang.Less:
+ // TODO: handle operator precedence to swap operators / operands if necessary
+ if vl < 2 {
+ ops = append(ops, t)
+ break
}
- expr = root
- }
-
- // Update the expression subtree according to binary operator precedence rules.
- // - operators are binary infix by default.
- // - if an operator follows another, then it's unary prefix.
- // - if an expression starts by an operator, then it's unary prefix.
- // - non operator nodes have a default precedence of 0.
- // TODO: handle postfix unary (i.e. ++) and ternary (i.e. ?:)
- //
- ep := p.Spec[expr.Content()].Order
- cp := p.Spec[c.Content()].Order
- a := expr
- if unaryOp[c] {
- cp = 0
- }
- if cp != 0 {
- if cp > ep {
- // Perform an operator permutation at expr root as required by precedence.
- // TODO(marc): maybe it can be generalized in below else branch.
- expr, c = c, expr
- a = expr // Temporary ancestor: its children may have to be permuted.
- } else {
- // Findout if an operator permutation is necessary in subtree.
- c1 := expr
- for {
- a = c1
- if unaryOp[c1] {
- c1, c = c, c1
- a = c1
- if c == expr {
- expr = a
- }
- break
- }
- if len(c1.Child) < 2 {
- break
- }
- c1 = c1.Child[1]
- if !c1.IsOperator() || unaryOp[c1] || cp > p.Spec[c1.Content()].Order {
+ case lang.ParenBlock:
+ // If the previous token is an arithmetic, logic or assign operator then
+ // this parenthesis block is an enclosed expr, otherwise a call expr.
+ if i == 0 || in[i-1].Id.IsOperator() {
+ out = append(out, t)
+ vl++
+ break
+ }
+ // The call expression can be a function call, a conversion,
+ // a type assersion (including for type switch)
+ // func call: push args and func address then call
+ out = append(out, t)
+ vl++
+ if t2 := in[i-1]; t2.Id == lang.Ident {
+ if s, sc, ok := p.getSym(t2.Str, p.scope); ok {
+ log.Println("callExpr:", t2.Str, p.scope, s, ok, sc)
+ if s.kind == symValue {
+ // Store the number of input parameters in the token Beg field.
+ ops = append(ops, scanner.Token{Str: "callX", Id: lang.CallX, Pos: t.Pos, Beg: p.numItems(t.Block(), lang.Comma)})
break
}
}
- // No permutation occured. Append current to last visited ancestor.
- if len(a.Child) > 1 {
- a.Child = a.Child[:1]
- c.Child = append(c.Child, c1)
- }
- }
- } else if ep != 0 {
- for len(a.Child) > 1 {
- a = a.Child[1]
}
+ ops = append(ops, scanner.Token{Str: "call", Id: lang.Call, Pos: t.Pos})
}
- a.Child = append(a.Child, c)
- if p.hasProp(a, Call) {
- lce = a
- }
- }
- if root != nil && p.isStatement(root) {
- if expr != nil {
- root.Child = append(root.Child, expr)
+ if ol := len(ops); ol > 0 && vl > ol {
+ op := ops[ol-1]
+ ops = ops[:ol-1]
+ out = append(out, op)
+ vl--
}
- } else if expr != nil {
- root = expr
}
- if root != nil {
- // /*
- if p.hasProp(root, MultiOp) {
- for {
- if !p.fixMultiOp(root, prevToken) {
- break
- }
+ out = append(out, ops...) // TODO: verify that ops should be added in this order.
+
+ log.Println("ParseExpr out:", out, "vl:", vl, "ops:", ops)
+ // The tokens are now properly ordered, process nested blocks.
+ for i := len(out) - 1; i >= 0; i-- {
+ t := out[i]
+ var toks Tokens
+ switch t.Id {
+ case lang.ParenBlock, lang.BracketBlock:
+ if toks, err = p.ParseExprStr(t.Block()); err != nil {
+ return out, err
}
+ default:
+ continue
}
- // */
- nodes = append(nodes, root)
+
+ // replace block token by its parsed result.
+ log.Println("toks:", toks)
+ out2 := append(Tokens{}, out[:i]...)
+ out2 = append(out2, toks...)
+ out = append(out2, out[i+1:]...)
}
- return nodes, err
+ log.Println("Final out:", out)
+ return out, err
}
-func (p *Parser) fixMultiOp(root *Node, prevToken map[*Node]*scanner.Token) bool {
- for i, c := range root.Child {
- for j, cc := range c.Child {
- if pt := prevToken[cc]; pt != nil && pt.Content() == "," {
- c.RemoveChild(j)
- root.InsertChild(cc, i)
- return true
- }
+func (p *Parser) ParseExprStr(s string) (tokens Tokens, err error) {
+ if tokens, err = p.Scan(s, false); err != nil {
+ return
+ }
+ var result Tokens
+ for _, sub := range tokens.Split(lang.Comma) {
+ toks, err := p.ParseExpr(sub)
+ if err != nil {
+ return result, err
}
+ result = append(toks, result...)
}
- return false
+ return result, err
}
-func (p *Parser) hasProp(n *Node, prop uint) bool { return p.Spec[n.Name()].Flags&prop != 0 }
-func (p *Parser) isStatement(n *Node) bool { return p.Spec[n.Content()].Flags&Stmt != 0 }
-func (p *Parser) isExprSep(n *Node) bool { return p.Spec[n.Content()].Flags&ExprSep != 0 }
-func (p *Parser) isExpr(n *Node) bool { return !p.isStatement(n) && !p.isExprSep(n) }
-func (p *Parser) isSep(n *Node) bool { return n.Token.Kind() == scanner.Separator }
-func (p *Parser) IsBlock(n *Node) bool { return n.Token.Kind() == scanner.Block }
-
-func (p *Parser) precedenceToken(t *scanner.Token) int {
- s := t.Content()
- if l := t.Start(); l > 0 {
- s = s[:l]
+func (p *Parser) numItems(s string, sep lang.TokenId) int {
+ tokens, err := p.Scan(s, false)
+ if err != nil {
+ return -1
}
- return p.Spec[s].Order
+ r := 0
+ for _, t := range tokens.Split(sep) {
+ if len(t) == 0 {
+ continue
+ }
+ r++
+ }
+ return r
+}
+
+func (p *Parser) pushScope(name string) {
+ p.scope = strings.TrimPrefix(p.scope+"/"+name+"/", "/")
}
-func (p *Parser) canCallToken(t *scanner.Token) bool {
- return p.precedenceToken(t) == 0 || p.Spec[t.Name()].Flags&Call != 0
+func (p *Parser) popScope() {
+ p.scope = strings.TrimSuffix(p.scope, "/")
+ j := strings.LastIndex(p.scope, "/")
+ if j == -1 {
+ p.scope = ""
+ return
+ }
+ p.scope = p.scope[:j]
}
diff --git a/parser/parse_test.go b/parser/parse_test.go
deleted file mode 100644
index 4559acf..0000000
--- a/parser/parse_test.go
+++ /dev/null
@@ -1,257 +0,0 @@
-package parser
-
-import (
- "log"
- "os"
- "testing"
-
- "github.com/gnolang/parscan/scanner"
-)
-
-var GoScanner = &scanner.Scanner{
- CharProp: [scanner.ASCIILen]uint{
- '\t': scanner.CharSep,
- '\n': scanner.CharLineSep,
- ' ': scanner.CharSep,
- '!': scanner.CharOp,
- '"': scanner.CharStr,
- '%': scanner.CharOp,
- '&': scanner.CharOp,
- '\'': scanner.CharStr,
- '(': scanner.CharBlock,
- '*': scanner.CharOp,
- '+': scanner.CharOp,
- ',': scanner.CharGroupSep,
- '-': scanner.CharOp,
- '.': scanner.CharOp,
- '/': scanner.CharOp,
- ':': scanner.CharOp,
- ';': scanner.CharGroupSep,
- '<': scanner.CharOp,
- '=': scanner.CharOp,
- '>': scanner.CharOp,
- '[': scanner.CharBlock,
- '^': scanner.CharOp,
- '{': scanner.CharBlock,
- '|': scanner.CharOp,
- '~': scanner.CharOp,
- },
- End: map[string]string{
- "(": ")",
- "{": "}",
- "[": "]",
- "/*": "*/",
- `"`: `"`,
- "'": "'",
- "`": "`",
- "//": "\n",
- },
- BlockProp: map[string]uint{
- "(": scanner.CharBlock,
- "{": scanner.CharBlock,
- "[": scanner.CharBlock,
- `"`: scanner.CharStr | scanner.StrEsc | scanner.StrNonl,
- "`": scanner.CharStr,
- "'": scanner.CharStr | scanner.StrEsc,
- "/*": scanner.CharStr,
- "//": scanner.CharStr | scanner.ExcludeEnd | scanner.EosValidEnd,
- },
- SkipSemi: map[string]bool{
- "++": true,
- "--": true,
- "case": true,
- "chan": true,
- "const": true,
- "default": true,
- "defer": true,
- "else": true,
- "for": true,
- "func": true,
- "go": true,
- "goto": true,
- "if": true,
- "import": true,
- "interface": true,
- "map": true,
- "package": true,
- "range": true,
- "select": true,
- "struct": true,
- "switch": true,
- "type": true,
- "var": true,
- },
-}
-
-var GoParser = &Parser{
- Scanner: GoScanner,
- Spec: map[string]NodeSpec{
- ".": {Kind: OpDot, Flags: Call, Order: 3},
- "*": {Kind: OpMultiply, Order: 4},
- "+": {Kind: OpAdd, Order: 5},
- "-": {Kind: OpSubtract, Order: 5},
- "<": {Kind: OpInferior, Order: 6},
- ":=": {Kind: OpDefine, Flags: MultiOp, Order: 7},
- "=": {Kind: OpAssign, Flags: MultiOp, Order: 7},
- "if": {Kind: StmtIf, Flags: Stmt | ExprSep},
- "func": {Kind: DeclFunc, Flags: Decl | Call},
- "return": {Kind: StmtReturn, Flags: Stmt},
- "{..}": {Kind: BlockStmt, Flags: ExprSep},
- "(..)": {Kind: BlockParen, Flags: Call},
- "//..": {Kind: Comment},
- "/*..": {Kind: Comment},
- },
-}
-
-func init() {
- GoParser.Init()
- log.SetFlags(log.Lshortfile)
-}
-
-func TestParse(t *testing.T) {
- for _, test := range goTests {
- test := test
- t.Run("", func(t *testing.T) {
- var err error
- errStr := ""
- n := &Node{}
- if n.Child, err = GoParser.Parse(test.src, n); err != nil {
- errStr = err.Error()
- }
- if errStr != test.err {
- t.Errorf("got error %#v, want error %#v", errStr, test.err)
- }
- if dot := n.Sdot(""); dot != test.dot {
- t.Errorf("got %#v, want %#v", dot, test.dot)
- }
- t.Log(test.src)
- t.Log(n.Sdot(""))
- if dotCmd := os.Getenv("DOT"); dotCmd != "" {
- n.Dot(dotCmd, "")
- }
- })
- }
-}
-
-var goTests = []struct {
- src, dot, err string
- skip bool
-}{{ // #00
- src: "",
- dot: `digraph ast { 0 [label=""]; }`,
-}, { // #01
- src: "12",
- dot: `digraph ast { 0 [label=""]; 1 [label="12"]; 0 -> 1; }`,
-}, { // #02
- src: "1 + 2",
- dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="1"]; 1 -> 2; 3 [label="2"]; 1 -> 3; }`,
-}, { // #03
- src: "1 + 2 * 3",
- dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="1"]; 1 -> 2; 3 [label="*"]; 1 -> 3; 4 [label="2"]; 3 -> 4; 5 [label="3"]; 3 -> 5; }`,
-}, { // #04
- src: "1 * 2 + 3",
- dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="*"]; 1 -> 2; 3 [label="1"]; 2 -> 3; 4 [label="2"]; 2 -> 4; 5 [label="3"]; 1 -> 5; }`,
-}, { // #05
- src: "a := 2 + 5",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="2"]; 3 -> 4; 5 [label="5"]; 3 -> 5; }`,
-}, { // #06
- src: "a := 1 * 2 + 3",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="*"]; 3 -> 4; 5 [label="1"]; 4 -> 5; 6 [label="2"]; 4 -> 6; 7 [label="3"]; 3 -> 7; }`,
-}, { // #07
- src: "a := 1 + 2 * 3",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="1"]; 3 -> 4; 5 [label="*"]; 3 -> 5; 6 [label="2"]; 5 -> 6; 7 [label="3"]; 5 -> 7; }`,
-}, { // #08
- src: "a := 1 + 2 * 3 + 4",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="1"]; 3 -> 4; 5 [label="+"]; 3 -> 5; 6 [label="*"]; 5 -> 6; 7 [label="2"]; 6 -> 7; 8 [label="3"]; 6 -> 8; 9 [label="4"]; 5 -> 9; }`,
-}, { // #09
- src: "a := 1 + 2 + 3 * 4",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="1"]; 3 -> 4; 5 [label="+"]; 3 -> 5; 6 [label="2"]; 5 -> 6; 7 [label="*"]; 5 -> 7; 8 [label="3"]; 7 -> 8; 9 [label="4"]; 7 -> 9; }`,
-}, { // #10
- src: "a := 1 * 2 + 3 * 4",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="*"]; 3 -> 4; 5 [label="1"]; 4 -> 5; 6 [label="2"]; 4 -> 6; 7 [label="*"]; 3 -> 7; 8 [label="3"]; 7 -> 8; 9 [label="4"]; 7 -> 9; }`,
-}, { // #11
- src: "a := (1 + 2) * 3",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="*"]; 1 -> 3; 4 [label="(..)"]; 3 -> 4; 5 [label="+"]; 4 -> 5; 6 [label="1"]; 5 -> 6; 7 [label="2"]; 5 -> 7; 8 [label="3"]; 3 -> 8; }`,
-}, { // #12
- src: "a := 2 * -3",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="*"]; 1 -> 3; 4 [label="2"]; 3 -> 4; 5 [label="-"]; 3 -> 5; 6 [label="3"]; 5 -> 6; }`,
-}, { // #13
- src: "-5 + 4",
- dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="-"]; 1 -> 2; 3 [label="5"]; 2 -> 3; 4 [label="4"]; 1 -> 4; }`,
-}, { // #14
- src: "-5 + -4",
- dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="-"]; 1 -> 2; 3 [label="5"]; 2 -> 3; 4 [label="-"]; 1 -> 4; 5 [label="4"]; 4 -> 5; }`,
-}, { // #15
- src: "a := -5 + -4",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="-"]; 3 -> 4; 5 [label="5"]; 4 -> 5; 6 [label="-"]; 3 -> 6; 7 [label="4"]; 6 -> 7; }`,
-}, { // #16
- src: "*a := 5 * -3",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="*"]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="*"]; 1 -> 4; 5 [label="5"]; 4 -> 5; 6 [label="-"]; 4 -> 6; 7 [label="3"]; 6 -> 7; }`,
-}, { // #17
- src: "*a := -5 * 3",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="*"]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="*"]; 1 -> 4; 5 [label="-"]; 4 -> 5; 6 [label="5"]; 5 -> 6; 7 [label="3"]; 4 -> 7; }`,
-}, { // #18
- src: "1+2\n3-4",
- dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="1"]; 1 -> 2; 3 [label="2"]; 1 -> 3; 4 [label="-"]; 0 -> 4; 5 [label="3"]; 4 -> 5; 6 [label="4"]; 4 -> 6; }`,
-}, { // #19
- src: "i = i+1",
- dot: `digraph ast { 0 [label=""]; 1 [label="="]; 0 -> 1; 2 [label="i"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="i"]; 3 -> 4; 5 [label="1"]; 3 -> 5; }`,
-}, { // #20
- src: "a[12] = 5",
- dot: `digraph ast { 0 [label=""]; 1 [label="="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="[..]"]; 2 -> 3; 4 [label="12"]; 3 -> 4; 5 [label="5"]; 1 -> 5; }`,
-}, { // #21
- src: "a[12][0] = 3",
- dot: `digraph ast { 0 [label=""]; 1 [label="="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="[..]"]; 2 -> 3; 4 [label="12"]; 3 -> 4; 5 [label="[..]"]; 2 -> 5; 6 [label="0"]; 5 -> 6; 7 [label="3"]; 1 -> 7; }`,
-}, { // #22
- src: "a.b = 34",
- dot: `digraph ast { 0 [label=""]; 1 [label="="]; 0 -> 1; 2 [label="."]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="b"]; 2 -> 4; 5 [label="34"]; 1 -> 5; }`,
-}, { // #23
- src: "if i < 2 { return j }",
- dot: `digraph ast { 0 [label=""]; 1 [label="if"]; 0 -> 1; 2 [label="<"]; 1 -> 2; 3 [label="i"]; 2 -> 3; 4 [label="2"]; 2 -> 4; 5 [label="{..}"]; 1 -> 5; 6 [label="return"]; 5 -> 6; 7 [label="j"]; 6 -> 7; }`,
-}, { // #24
- src: "if i:=1; i < 2 { return j }",
- dot: `digraph ast { 0 [label=""]; 1 [label="if"]; 0 -> 1; 2 [label=":="]; 1 -> 2; 3 [label="i"]; 2 -> 3; 4 [label="1"]; 2 -> 4; 5 [label="<"]; 1 -> 5; 6 [label="i"]; 5 -> 6; 7 [label="2"]; 5 -> 7; 8 [label="{..}"]; 1 -> 8; 9 [label="return"]; 8 -> 9; 10 [label="j"]; 9 -> 10; }`,
-}, { // #25
- src: "f(i) + f(j)",
- dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="Call"]; 1 -> 2; 3 [label="f"]; 2 -> 3; 4 [label="(..)"]; 2 -> 4; 5 [label="i"]; 4 -> 5; 6 [label="Call"]; 1 -> 6; 7 [label="f"]; 6 -> 7; 8 [label="(..)"]; 6 -> 8; 9 [label="j"]; 8 -> 9; }`,
-}, { // #26
- src: "a := 1 // This is a comment",
- dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="1"]; 1 -> 3; }`,
- /*
- }, { // #27
- src: "a, b, c = 1, f(2), 3",
- //src: "f(i) + f(j)(4)", // not ok
- }, { // #26
- src: "if i < 2 {return i}; return f(i-2) + f(i-1)",
- }, { // #27
- src: "for i < 2 { println(i) }",
- }, { // #28
- src: "func f(i int) (int) { if i < 2 { return i}; return f(i-2) + f(i-1) }",
- }, { // #29
- src: "a := []int{3, 4}",
- }, { // #30
- //src: "a := struct{int}",
- src: "a, b = c, d",
- }, { // #31
- //src: "a := [2]int{3, 4}",
- src: `fmt.Println("Hello")`,
- //src: "(1 + 2) * (3 - 4)",
- //src: "1 + (1 + 2)",
- }, { // #32
- //src: `a(3)(4)`,
- //src: `3 + 2 * a(3) + 5`,
- //src: `3 + 2 * a(3)(4) + (5)`,
- //src: `(a(3))(4)`,
- src: `a(3)(4)`,
- dot: `digraph ast { 0 [label=""]; 1 [label="Call"]; 0 -> 1; 2 [label="Call"]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="(..)"]; 2 -> 4; 5 [label="3"]; 4 -> 5; 6 [label="(..)"]; 1 -> 6; 7 [label="4"]; 6 -> 7; }`,
- //src: `println("Hello")`,
- //src: `a.b.c + 3`,
- }, { // #33
- src: `func f(a int, b int) {return a + b}; f(1+2)`,
- }, { // #34
- src: `if a == 1 {
- println(2)
- }
- println("bye")`,
- */
-}}
diff --git a/parser/symbol.go b/parser/symbol.go
new file mode 100644
index 0000000..9975c24
--- /dev/null
+++ b/parser/symbol.go
@@ -0,0 +1,67 @@
+package parser
+
+import (
+ "reflect"
+ "strings"
+)
+
+type symKind int
+
+const (
+ symValue symKind = iota // a Go value defined in the runtime
+ symType // a Go type
+ symLabel // a label indication a position in the VM code
+ symConst // a Go constant
+ symVar // a Go variable, located in the VM memory
+ symFunc // a Go function, located in the VM code
+)
+
+type symbol struct {
+ kind symKind
+ index int // address of symbol in frame
+ value any
+ Type reflect.Type
+ local bool // if true address is relative to local frame, otherwise global
+ used bool
+}
+
+func (p *Parser) AddSym(i int, name string, v any) { p.addSym(i, name, v, symValue, nil, false) }
+
+func (p *Parser) addSym(i int, name string, v any, k symKind, t reflect.Type, local bool) {
+ p.symbols[name] = &symbol{kind: k, index: i, local: local, value: v, Type: t, used: true}
+}
+
+// getSym searches for an existing symbol starting from the deepest scope.
+func (p *Parser) getSym(name, scope string) (sym *symbol, sc string, ok bool) {
+ for {
+ if sym, ok = p.symbols[scope+name]; ok {
+ return sym, scope, ok
+ }
+ scope = strings.TrimSuffix(scope, "/")
+ i := strings.LastIndex(scope, "/")
+ if i == -1 {
+ scope = ""
+ break
+ }
+ if scope = scope[:i]; scope == "" {
+ break
+ }
+ }
+ sym, ok = p.symbols[name]
+ return sym, scope, ok
+}
+
+func initUniverse() map[string]*symbol {
+ return map[string]*symbol{
+ "any": {kind: symType, Type: reflect.TypeOf((*any)(nil)).Elem()},
+ "bool": {kind: symType, Type: reflect.TypeOf((*bool)(nil)).Elem()},
+ "error": {kind: symType, Type: reflect.TypeOf((*error)(nil)).Elem()},
+ "int": {kind: symType, Type: reflect.TypeOf((*int)(nil)).Elem()},
+ "string": {kind: symType, Type: reflect.TypeOf((*string)(nil)).Elem()},
+
+ "nil": {},
+ "iota": {value: 0},
+ "true": {value: true},
+ "false": {value: false},
+ }
+}
diff --git a/parser/type.go b/parser/type.go
new file mode 100644
index 0000000..77ccc6e
--- /dev/null
+++ b/parser/type.go
@@ -0,0 +1,117 @@
+package parser
+
+import (
+ "fmt"
+ "log"
+ "reflect"
+
+ "github.com/gnolang/parscan/lang"
+)
+
+// ParseType parses a list of tokens defining a type expresssion and returns
+// the corresponding runtime type or an error.
+func (p *Parser) ParseType(in Tokens) (typ reflect.Type, err error) {
+ log.Println("ParseType", in)
+ switch in[0].Id {
+ case lang.Func:
+ // Get argument and return token positions depending on function pattern:
+ // method with receiver, named function or anonymous closure.
+ // TODO: handle variadics
+ var out Tokens
+ var indexArgs int
+ switch l, in1 := len(in), in[1]; {
+ case l >= 4 && in1.Id == lang.ParenBlock && in[2].Id == lang.Ident:
+ indexArgs, out = 3, in[4:]
+ case l >= 3 && in1.Id == lang.Ident:
+ indexArgs, out = 2, in[3:]
+ case l >= 2 && in1.Id == lang.ParenBlock:
+ indexArgs, out = 1, in[2:]
+ default:
+ return nil, fmt.Errorf("invalid func signature")
+ }
+
+ // We can now parse function input and output parameter types.
+ // Input parameters are always enclosed by parenthesis.
+ iargs, err := p.Scan(in[indexArgs].Block(), false)
+ if err != nil {
+ return nil, err
+ }
+ arg, err := p.parseParamTypes(iargs, true)
+ if err != nil {
+ return nil, err
+ }
+ // Output parameters may be empty, or enclosed or not by parenthesis.
+ if len(out) == 1 && out[0].Id == lang.ParenBlock {
+ if out, err = p.Scan(out[0].Block(), false); err != nil {
+ return nil, err
+ }
+ }
+ ret, err := p.parseParamTypes(out, false)
+ if err != nil {
+ return nil, err
+ }
+ return reflect.FuncOf(arg, ret, false), nil
+
+ case lang.Ident:
+ // TODO: selector expression (pkg.type)
+ s, _, ok := p.getSym(in[0].Str, p.scope)
+ if !ok || s.kind != symType {
+ return nil, fmt.Errorf("invalid type %s", in[0].Str)
+ }
+ return s.Type, nil
+ }
+ return typ, err
+}
+
+// parseParamTypes parses a list of comma separated typed parameters and returns a list of
+// runtime types. Implicit parameter names and types are supported.
+func (p *Parser) parseParamTypes(in Tokens, arg bool) (types []reflect.Type, err error) {
+ // Parse from right to left, to allow multiple comma separated parameters of the same type.
+ list := in.Split(lang.Comma)
+ for i := len(list) - 1; i >= 0; i-- {
+ t := list[i]
+ if len(t) == 0 {
+ continue
+ }
+ param := ""
+ if p.hasFirstParam(t) {
+ param = t[0].Str
+ t = t[1:]
+ if len(t) == 0 {
+ if len(types) == 0 {
+ return nil, fmt.Errorf("Invalid type %v", t[0])
+ }
+ // Type was ommitted, apply the previous one from the right.
+ types = append([]reflect.Type{types[0]}, types...)
+ if arg {
+ p.addSym(-i-2, p.scope+param, nil, symVar, types[0], true)
+ } else {
+ p.addSym(i, p.scope+param, nil, symVar, types[0], true)
+ }
+ continue
+ }
+ }
+ typ, err := p.ParseType(t)
+ if err != nil {
+ return nil, err
+ }
+ if param != "" {
+ if arg {
+ p.addSym(-i-2, p.scope+param, nil, symVar, typ, true)
+ } else {
+ p.addSym(i, p.scope+param, nil, symVar, typ, true)
+ }
+ }
+ types = append([]reflect.Type{typ}, types...)
+ }
+ return types, err
+}
+
+// hasFirstParam returns true if the first token of a list is a parameter name.
+func (p *Parser) hasFirstParam(in Tokens) bool {
+ if in[0].Id != lang.Ident {
+ return false
+ }
+ s, _, ok := p.getSym(in[0].Str, p.scope)
+ return !ok || s.kind != symType
+}