diff options
Diffstat (limited to 'parser')
| -rw-r--r-- | parser/README.md | 157 | ||||
| -rw-r--r-- | parser/compiler.go | 250 | ||||
| -rw-r--r-- | parser/dot.go | 89 | ||||
| -rw-r--r-- | parser/interpreter.go | 52 | ||||
| -rw-r--r-- | parser/interpreter_test.go | 66 | ||||
| -rw-r--r-- | parser/kind.go | 56 | ||||
| -rw-r--r-- | parser/node.go | 50 | ||||
| -rw-r--r-- | parser/parse.go | 519 | ||||
| -rw-r--r-- | parser/parse_test.go | 257 | ||||
| -rw-r--r-- | parser/symbol.go | 67 | ||||
| -rw-r--r-- | parser/type.go | 117 |
11 files changed, 975 insertions, 705 deletions
diff --git a/parser/README.md b/parser/README.md index b8bc0d4..d46608c 100644 --- a/parser/README.md +++ b/parser/README.md @@ -1,71 +1,112 @@ # Parser -A parser takes an array of tokens (produced by the scanner) in input and -returns a node representing a syntax tree. A node is an object -containing a kind, the corresponding token and the ordered references to -descendent nodes. +See if we can build an almost single pass compiler for Go doing syntax +directed translation, without any complex data structure (no syntax +tree), only lists of tokens. -```mermaid -graph LR -s[ ] --> |source| a(scanner) ---> |tokens| b(parser) ---> |AST| c[ ] -subgraph parser - b -end -style s height:0px; -style c height:0px; -``` +The goal is to have the shortest and simplest path from source to +bytecode. -A goal is to make the parser generic enough so it can generate syntax -trees for most of existing programming languages (no claim of generality -yet), provided a small set of generating rules per language, and a small -set of validating rules (yet to be defined) to detect invalid -constructs. +## Design -The input tokens are particular in the sense that they include classical -lexical items such as words, separators, numbers, but also strings and -nested blocks, which are resolved at scanning stage rather than parsing -stage. See the scanner for more details. +The input of parser is a list of tokens produced by the scanner. +Multiple tokens are processed at once. The minimal set to get +meaningful results (not an error or nil) is a complete statemement. -The language specification includes the following: +The output of parser is also a list of tokens, to be consummed by +the compiler to produce bytecode. The output tokens set is identical +to the bytecode instructions set except that: -- a scanner specification, to produce the set of possible tokens. -- a map of node specification per token name. The node specification - defines some parameters influing how the tree is generated. +- code locations may be provided as as labels instead of numerical + values, +- memory locations for constants and variables may be provided as + symbol names instead of numerical values. -## Development status +## Status -A successful test must be provided to check the status. +Go language support: -- [x] binary operator expressions -- [x] unary operator (prefix) expressions -- [ ] unary operator (suffix) expressions -- [x] operator precedence rules -- [x] parenthesis in expressions -- [x] call expressions -- [ ] nested calls -- [x] index expressions -- [x] single assignments -- [ ] multi assignments -- [x] simple `if` statement (no `else`) -- [ ] full `if` statement (including `else`, `else if`) -- [x] init expressions in `if` statements -- [x] statement blocks -- [x] comments +- [x] named functions +- [ ] anonymous functions (closures) +- [ ] methods +- [x] internal function calls +- [x] external function calls (calling runtime symbols in interpreter) +- [ ] export to runtime +- [ ] builtin calls (new, make, copy, delete, len, cap, ...) +- [ ] out of order declarations +- [ ] arbirtrary precision constants +- [x] basic types +- [ ] complete numeric types +- [x] function types +- [ ] variadic functions +- [ ] pointers +- [ ] structures +- [ ] embedded structures +- [ ] recursive structures +- [ ] interfaces +- [ ] arrays, slices +- [ ] maps +- [ ] deterministic maps +- [ ] channel types +- [ ] channel operations +- [x] var defined by assign := +- [x] var assign = +- [ ] var declaration +- [ ] type declaration +- [x] func declaration +- [ ] const declaration +- [ ] iota expression +- [ ] defer statement +- [ ] recover statement +- [ ] go statement +- [x] if statement (including else and else if) - [ ] for statement - [ ] switch statement +- [ ] break statement +- [ ] continue statement +- [ ] fallthrough statement +- [ ] goto statement - [ ] select statement -- [x] return statement -- [x] function declaration -- [ ] method declaration -- [ ] anonymous functions (closures) -- [ ] type declaration -- [ ] var, const, type single declaration -- [ ] var, const, type multi declaration -- [ ] type parametric expressions -- [x] literal numbers (see scanner) -- [x] literal strings -- [ ] composite literal -- [ ] import statements -- [ ] go.mod syntax +- [x] binary operators +- [ ] unary operators +- [ ] logical operators && and || +- [ ] assign operators +- [ ] operator precedence +- [x] parenthesis expressions +- [x] call expressions +- [ ] index expressions +- [ ] selector expressions +- [ ] type conversions +- [ ] type assertions +- [ ] parametric types (generic) +- [ ] type parametric functions (generic) +- [ ] type constraints (generic) +- [ ] type checking +- [ ] comment pragmas +- [ ] packages import +- [ ] modules + +Other items: + +- [x] REPL +- [x] multiline statements in REPL +- [ ] completion, history in REPL +- [x] eval strings +- [x] eval files (including stdin, ...) +- [x] debug traces for scanner, parser, compiler, bytecode vm +- [x] simple interpreter tests to exercise from source to execution +- [ ] compile time error detection and diagnosis +- [ ] stack dump +- [ ] symbol tables, data tables, code binded to source lines +- [ ] interactive debugger: breaks, continue, instrospection, ... +- [ ] machine level debugger +- [ ] source level debugger +- [ ] replay debugger, backward instruction execution +- [ ] vm monitor: live memory / code display during run +- [ ] stdlib wrappers a la yaegi +- [ ] system and environment sandboxing +- [ ] build constraints (arch, sys, etc) +- [ ] test command (running go test / benchmark / example files) +- [ ] skipping / including test files +- [ ] test coverage +- [ ] fuzzy tests for scanner, vm, ... diff --git a/parser/compiler.go b/parser/compiler.go new file mode 100644 index 0000000..5b0ed81 --- /dev/null +++ b/parser/compiler.go @@ -0,0 +1,250 @@ +package parser + +import ( + "fmt" + "log" + "reflect" + "strconv" + + "github.com/gnolang/parscan/lang" + "github.com/gnolang/parscan/scanner" + "github.com/gnolang/parscan/vm" +) + +type Compiler struct { + *Parser + Code [][]int64 // produced code, to fill VM with + Data []any // produced data, will be at the bottom of VM stack + Entry int // offset in Code to start execution from (skip function defintions) + + strings map[string]int // locations of strings in Data +} + +func NewCompiler(scanner *scanner.Scanner) *Compiler { + return &Compiler{ + Parser: &Parser{Scanner: scanner, symbols: initUniverse(), labelCount: map[string]int{}}, + Entry: -1, + strings: map[string]int{}, + } +} + +func (c *Compiler) Emit(op ...int64) int { + op = append([]int64{}, op...) + l := len(c.Code) + c.Code = append(c.Code, op) + return l +} + +func (c *Compiler) AddSym(name string, value any) int { + p := len(c.Data) + c.Data = append(c.Data, value) + c.Parser.AddSym(p, name, value) + return p +} + +func (c *Compiler) Codegen(tokens Tokens) (err error) { + fixList := Tokens{} + function := []string{""} + + log.Println("Codegen tokens:", tokens) + + for i, t := range tokens { + scope := function[len(function)-1] + switch t.Id { + case lang.Int: + n, err := strconv.Atoi(t.Str) + if err != nil { + return err + } + c.Emit(int64(t.Pos), vm.Push, int64(n)) + + case lang.String: + s := t.Block() + i, ok := c.strings[s] + if !ok { + i = len(c.Data) + c.Data = append(c.Data, s) + c.strings[s] = i + } + c.Emit(int64(t.Pos), vm.Dup, int64(i)) + + case lang.Add: + c.Emit(int64(t.Pos), vm.Add) + + case lang.Sub: + c.Emit(int64(t.Pos), vm.Sub) + + case lang.Less: + c.Emit(int64(t.Pos), vm.Lower) + + case lang.Call: + c.Emit(int64(t.Pos), vm.Call) + + case lang.CallX: + c.Emit(int64(t.Pos), vm.CallX, int64(t.Beg)) + + case lang.Define: + // TODO: support assignment to local, composite objects + st := tokens[i-1] + l := len(c.Data) + c.Data = append(c.Data, nil) + c.addSym(l, st.Str, nil, symVar, nil, false) + c.Emit(int64(st.Pos), vm.Assign, int64(l)) + + case lang.Enter: + // TODO: merge with label ? + function = append(function, t.Str[6:]) + + case lang.Exit: + function = function[:len(function)-1] + + case lang.Assign: + st := tokens[i-1] + s, _, ok := c.getSym(st.Str, scope) + if !ok { + return fmt.Errorf("symbol not found: %s", st.Str) + } + if s.local { + c.Emit(int64(st.Pos), vm.Fassign, int64(s.index)) + } else { + c.Emit(int64(st.Pos), vm.Assign, int64(s.index)) + } + + case lang.Equal: + c.Emit(int64(t.Pos), vm.Equal) + + case lang.Ident: + if i < len(tokens)-1 { + switch t1 := tokens[i+1]; t1.Id { + case lang.Define, lang.Assign, lang.Colon: + continue + } + } + s, _, ok := c.getSym(t.Str, scope) + if !ok { + return fmt.Errorf("symbol not found: %s", t.Str) + } + if s.local { + c.Emit(int64(t.Pos), vm.Fdup, int64(s.index)) + } else { + c.Emit(int64(t.Pos), vm.Dup, int64(s.index)) + } + + case lang.Label: + // If the label is a function, the symbol already exists + s, _, ok := c.getSym(t.Str, scope) + lc := len(c.Code) + if ok { + s.value = lc + if s.kind == symFunc { + // label is a function entry point, register its code address in data. + s.index = len(c.Data) + c.Data = append(c.Data, lc) + } else { + c.Data[s.index] = lc + } + } else { + ld := len(c.Data) + c.Data = append(c.Data, lc) + c.addSym(ld, t.Str, lc, symLabel, nil, false) + } + + case lang.JumpFalse: + label := t.Str[10:] + i := 0 + if s, _, ok := c.getSym(label, scope); !ok { + // t.Beg contains the position in code which needs to be fixed. + t.Beg = len(c.Code) + fixList = append(fixList, t) + } else { + i = s.value.(int) + } + c.Emit(int64(t.Pos), vm.JumpFalse, int64(i)) + + case lang.Goto: + label := t.Str[5:] + i := 0 + if s, _, ok := c.getSym(label, scope); !ok { + t.Beg = len(c.Code) + fixList = append(fixList, t) + } else { + i = s.value.(int) + } + c.Emit(int64(t.Pos), vm.Jump, int64(i)) + + case lang.Return: + c.Emit(int64(t.Pos), vm.Return, int64(t.Beg), int64(t.End)) + + default: + return fmt.Errorf("Codegen: unsupported token %v", t) + } + } + + // Finally we fix unresolved labels for jump destinations. + for _, t := range fixList { + var label string + switch t.Id { + case lang.Goto: + label = t.Str[5:] + case lang.JumpFalse: + label = t.Str[10:] + } + s, _, ok := c.getSym(label, "") + if !ok { + return fmt.Errorf("label not found: %v", label) + } + c.Code[t.Beg][2] = int64(s.value.(int) - t.Beg) + + } + return err +} + +func (c *Compiler) PrintCode() { + labels := map[int]string{} + for name, sym := range c.symbols { + if sym.kind == symLabel || sym.kind == symFunc { + labels[sym.value.(int)] = name + } + } + fmt.Println("# Code:") + for i, l := range c.Code { + if label, ok := labels[i]; ok { + fmt.Println(label + ":") + } + extra := "" + switch l[1] { + case vm.Jump, vm.JumpFalse, vm.JumpTrue, vm.Calli: + if d, ok := labels[i+(int)(l[2])]; ok { + extra = "// " + d + } + } + fmt.Printf("%4d %-14v %v\n", i, vm.CodeString(l), extra) + } +} + +type entry struct { + name string + *symbol +} + +func (c *Compiler) PrintData() { + dict := map[int]entry{} + for name, sym := range c.symbols { + if !sym.used || sym.local || sym.kind == symLabel { + continue + } + dict[sym.index] = entry{name, sym} + } + fmt.Println("# Data:") + for i, d := range c.Data { + fmt.Printf("%4d %T %v %v\n", i, d, d, dict[i]) + } +} + +func (c *Compiler) NumIn(i int) (int, bool) { + t := reflect.TypeOf(c.Data[i]) + if t.Kind() == reflect.Func { + return t.NumIn(), t.IsVariadic() + } + return -1, false +} diff --git a/parser/dot.go b/parser/dot.go deleted file mode 100644 index 11d5014..0000000 --- a/parser/dot.go +++ /dev/null @@ -1,89 +0,0 @@ -package parser - -import ( - "bytes" - "fmt" - "io" - "log" - "os/exec" - "strings" -) - -func (*Parser) Adot(nodes []*Node, c string) { - if c == "" { - return - } - n := &Node{Child: nodes} - n.Dot(c, "") -} - -func (n *Node) Dot(c, s string) { - dw, cmd := dotWriter(c) - n.astDot(dw, s) - if cmd == nil { - return - } - if err := cmd.Wait(); err != nil { - log.Fatal(err) - } -} - -func (n *Node) Sdot(s string) string { - var buf bytes.Buffer - n.astDot(&buf, s) - return buf.String() -} - -// TODO: rewrite it using Walk2 -func (n *Node) astDot(out io.Writer, label string) { - fmt.Fprintf(out, "digraph ast { ") - if label != "" { - fmt.Fprintf(out, "labelloc=\"t\"; label=\"%s\";", label) - } - anc := map[*Node]*Node{} - index := map[*Node]int{} - count := 0 - n.Walk(func(nod *Node) bool { - index[nod] = count - count++ - - for _, c := range nod.Child { - anc[c] = nod - } - name := "" - if nod.Token != nil { - name = strings.ReplaceAll(nod.Name(), `"`, `\"`) - } - fmt.Fprintf(out, "%d [label=\"%s\"]; ", index[nod], name) - if anc[nod] != nil { - fmt.Fprintf(out, "%d -> %d; ", index[anc[nod]], index[nod]) - } - return true - }, nil) - fmt.Fprintf(out, "}") - if c, ok := out.(io.Closer); ok { - c.Close() - } -} - -type nopCloser struct { - io.Writer -} - -func (nopCloser) Close() error { return nil } - -func dotWriter(dotCmd string) (io.WriteCloser, *exec.Cmd) { - if dotCmd == "" { - return nopCloser{io.Discard}, nil - } - fields := strings.Fields(dotCmd) - cmd := exec.Command(fields[0], fields[1:]...) - dotin, err := cmd.StdinPipe() - if err != nil { - log.Fatal(err) - } - if err = cmd.Start(); err != nil { - log.Fatal(err) - } - return dotin, cmd -} diff --git a/parser/interpreter.go b/parser/interpreter.go new file mode 100644 index 0000000..72fc667 --- /dev/null +++ b/parser/interpreter.go @@ -0,0 +1,52 @@ +package parser + +import ( + "github.com/gnolang/parscan/scanner" + "github.com/gnolang/parscan/vm" +) + +const debug = true + +type Interpreter struct { + *Compiler + *vm.Machine +} + +func NewInterpreter(s *scanner.Scanner) *Interpreter { + return &Interpreter{NewCompiler(s), &vm.Machine{}} +} + +func (i *Interpreter) Eval(src string) (res any, err error) { + codeOffset := len(i.Code) + dataOffset := 0 + if codeOffset > 0 { + // All data must be copied to the VM the first time only (re-entrance). + dataOffset = len(i.Data) + } + i.PopExit() // Remove last exit from previous run (re-entrance). + + t, err := i.Parse(src) + if err != nil { + return res, err + } + if err = i.Codegen(t); err != nil { + return res, err + } + i.Push(i.Data[dataOffset:]...) + i.PushCode(i.Code[codeOffset:]...) + i.PushCode([]int64{0, vm.Exit}) + i.SetIP(max(codeOffset, i.Entry)) + if debug { + i.PrintData() + i.PrintCode() + } + err = i.Run() + return i.Top(), err +} + +func max(a, b int) int { + if a > b { + return a + } + return b +} diff --git a/parser/interpreter_test.go b/parser/interpreter_test.go new file mode 100644 index 0000000..ee927a5 --- /dev/null +++ b/parser/interpreter_test.go @@ -0,0 +1,66 @@ +package parser_test + +import ( + "fmt" + "log" + "testing" + + "github.com/gnolang/parscan/lang/golang" + "github.com/gnolang/parscan/parser" + "github.com/gnolang/parscan/scanner" +) + +var GoScanner *scanner.Scanner + +func init() { + log.SetFlags(log.Lshortfile) + GoScanner = scanner.NewScanner(golang.GoSpec) +} + +func TestEval(t *testing.T) { + for _, test := range goTests { + test := test + t.Run("", func(t *testing.T) { + interp := parser.NewInterpreter(GoScanner) + errStr := "" + r, e := interp.Eval(test.src) + t.Log(r, e) + if e != nil { + errStr = e.Error() + } + if errStr != test.err { + t.Errorf("got error %#v, want error %#v", errStr, test.err) + } + if res := fmt.Sprintf("%v", r); test.err == "" && res != test.res { + t.Errorf("got %#v, want %#v", res, test.res) + } + }) + } +} + +var goTests = []struct { + src, res, err string +}{ + 0: {src: "", res: "<nil>"}, + 1: {src: "1+2", res: "3"}, + 2: {src: "1+", err: "block not terminated"}, + 3: {src: "a := 1 + 2; b := 0; a + 1", res: "4"}, + 4: {src: "1+(2+3)", res: "6"}, + 5: {src: "(1+2)+3", res: "6"}, + 6: {src: "(6+(1+2)+3)+5", res: "17"}, + 7: {src: "(6+(1+2+3)+5", err: "1:1: block not terminated"}, + 8: {src: "a := 2; a = 3; a", res: "3"}, + 9: {src: "a := 0; if a == 0 { a = 2 } else { a = 1 }; a", res: "2"}, + 10: {src: "a := 0; if a == 1 { a = 2 } else { a = 1 }; a", res: "1"}, + 11: {src: "a := 0; if a == 1 { a = 2 } else if a == 0 { a = 3 } else { a = 1 }; a", res: "3"}, + 12: {src: "a := 0; if a == 1 { a = 2 } else if a == 2 { a = 3 } else { a = 1 }; a", res: "1"}, + 13: {src: "func f() int {return 2}; a := f(); a", res: "2"}, + 14: {src: "func f() int {return 2}; f()", res: "2"}, + 15: {src: "func f(a int) int {return a+2}; f(3)", res: "5"}, + 16: {src: "func f(a int) int {if a < 4 {a = 5}; return a }; f(3)", res: "5"}, + 17: {src: "func f(a int) int {return a+2}; 7 - f(3)", res: "2"}, + 18: {src: "func f(a int) int {return a+2}; f(5) - f(3)", res: "2"}, + 19: {src: "func f(a int) int {return a+2}; f(3) - 2", res: "3"}, + 20: {src: "func f(a int, b int, c int) int {return a+b-c} ; f(7, 1, 3)", res: "5"}, + 21: {src: "func f(a, b, c int) int {return a+b-c} ; f(7, 1, 3)", res: "5"}, +} diff --git a/parser/kind.go b/parser/kind.go deleted file mode 100644 index d004471..0000000 --- a/parser/kind.go +++ /dev/null @@ -1,56 +0,0 @@ -package parser - -import "fmt" - -// kind defines the AST node kind. Its name is the concatenation -// of a category (Block, Decl, Expr, Op, Stmt) and an instance name. -type Kind int - -const ( - Undefined = Kind(iota) - BlockParen - BlockStmt - Comment - DeclFunc - ExprCall - Ident - LiteralNumber - LiteralString - OpAdd - OpAssign - OpDefine - OpDot - OpInferior - OpMultiply - OpSubtract - StmtIf - StmtReturn -) - -var kindString = [...]string{ - Undefined: "Undefined", - BlockParen: "BlockParen", - BlockStmt: "BlockStmt", - Comment: "Comment", - DeclFunc: "DeclFunc", - ExprCall: "ExprCall", - Ident: "Ident", - LiteralString: "LiteralString", - LiteralNumber: "LiteralNumber", - OpAdd: "OpAdd", - OpAssign: "OpAssign", - OpDefine: "OpDefine", - OpDot: "OpDot", - OpInferior: "OpInferior", - OpMultiply: "OpMultiply", - OpSubtract: "OpSubtract", - StmtIf: "StmtIf", - StmtReturn: "StmtReturn", -} - -func (k Kind) String() string { - if int(k) < 0 || int(k) > len(kindString) { - return fmt.Sprintf("unknown kind %d", k) - } - return kindString[k] -} diff --git a/parser/node.go b/parser/node.go deleted file mode 100644 index b6e34cd..0000000 --- a/parser/node.go +++ /dev/null @@ -1,50 +0,0 @@ -package parser - -import "github.com/gnolang/parscan/scanner" - -type Node struct { - Child []*Node // sub-tree nodes - *scanner.Token // token at origin of the node - Kind // Node kind, depends on the language spec -} - -// TODO: remove it in favor of Walk2 -func (n *Node) Walk(in, out func(*Node) bool) (stop bool) { - if in != nil && !in(n) { - return true - } - for _, child := range n.Child { - if child.Walk(in, out) { - return - } - } - if out != nil { - stop = !out(n) - } - return -} - -// Idem to walk, but also propagates the ancestor of visited node and child index. -func (n *Node) Walk2(a *Node, i int, in, out func(*Node, *Node, int) bool) (stop bool) { - if in != nil && !in(n, a, i) { - return true - } - for j, child := range n.Child { - if child.Walk2(n, j, in, out) { - return - } - } - if out != nil { - stop = !out(n, a, i) - } - return -} - -func (n *Node) RemoveChild(i int) { - n.Child = append(n.Child[:i], n.Child[i+1:]...) -} - -func (n *Node) InsertChild(node *Node, i int) { - n.Child = append(n.Child[:i+1], n.Child[i:]...) - n.Child[i] = node -} diff --git a/parser/parse.go b/parser/parse.go index ca89467..5f0b643 100644 --- a/parser/parse.go +++ b/parser/parse.go @@ -1,244 +1,373 @@ package parser import ( + "fmt" + "log" + "strconv" + "strings" + + "github.com/gnolang/parscan/lang" "github.com/gnolang/parscan/scanner" ) -const ( - Stmt = 1 << iota - ExprSep - Call - Index - Decl - MultiOp -) +type Parser struct { + *scanner.Scanner -type NodeSpec struct { - Kind // AST node kind - Flags uint // composable properties used for AST generation - Order int // operator precedence order + symbols map[string]*symbol + function *symbol + scope string + fname string + labelCount map[string]int } -type Parser struct { - *scanner.Scanner - Spec map[string]NodeSpec +func (p *Parser) Scan(s string, endSemi bool) (Tokens, error) { + return p.Scanner.Scan(s, endSemi) } -func (p *Parser) Parse(src string, ctx *Node) (nodes []*Node, err error) { - tokens, err := p.Scan(src) - if err != nil { - return +type Tokens []scanner.Token + +func (toks Tokens) String() (s string) { + for _, t := range toks { + s += fmt.Sprintf("%#v ", t.Str) } - return p.ParseTokens(tokens, ctx) + return s } -func (p *Parser) ParseTokens(tokens []*scanner.Token, ctx *Node) (nodes []*Node, err error) { - // TODO: error handling. - var root *Node // current root node - var expr *Node // current expression root node - var prev, c *Node // previous and current nodes - var lce *Node // last complete expression node - unaryOp := map[*Node]bool{} // unaryOp indicates if a node is an unary operator. - prevToken := map[*Node]*scanner.Token{} - - for i, t := range tokens { - prev = c - c = &Node{ - Token: t, - Kind: p.Spec[t.Name()].Kind, +func (toks Tokens) Index(id lang.TokenId) int { + for i, t := range toks { + if t.Id == id { + return i } - if i > 0 { - prevToken[c] = tokens[i-1] + } + return -1 +} + +func (toks Tokens) LastIndex(id lang.TokenId) int { + for i := len(toks) - 1; i >= 0; i-- { + if toks[i].Id == id { + return i } - if c.Kind == Comment { - continue + } + return -1 +} + +func (toks Tokens) Split(id lang.TokenId) (result []Tokens) { + for { + i := toks.Index(id) + if i < 0 { + return append(result, toks) } - if t.IsOperator() && (i == 0 || tokens[i-1].IsOperator()) { - unaryOp[c] = true + result = append(result, toks[:i]) + toks = toks[i+1:] + } +} + +func (p *Parser) Parse(src string) (out Tokens, err error) { + log.Printf("Parse src: %#v\n", src) + in, err := p.Scan(src, true) + if err != nil { + return out, err + } + log.Println("Parse in:", in) + for len(in) > 0 { + endstmt := in.Index(lang.Semicolon) + if endstmt == -1 { + return out, scanner.ErrBlock } - if c.Kind == Undefined { - switch t.Kind() { - case scanner.Number: - c.Kind = LiteralNumber - case scanner.Identifier: - c.Kind = Ident + + // Skip over simple init statements for some tokens (if, for, ...) + if lang.HasInit[in[0].Id] { + log.Println("may have init") + for in[endstmt-1].Id != lang.BraceBlock { + e2 := in[endstmt+1:].Index(lang.Semicolon) + if e2 == -1 { + return out, scanner.ErrBlock + } + endstmt += 1 + e2 } } + o, err := p.ParseStmt(in[:endstmt]) + if err != nil { + return out, err + } + out = append(out, o...) + in = in[endstmt+1:] + } + return out, err +} - if root == nil { - if p.isSep(c) { - continue - } - lce = nil - root = c - if p.isExpr(c) { - expr = c - } +func (p *Parser) ParseStmt(in Tokens) (out Tokens, err error) { + log.Println("ParseStmt in:", in) + if len(in) == 0 { + return nil, nil + } + switch t := in[0]; t.Id { + case lang.Func: + return p.ParseFunc(in) + case lang.If: + return p.ParseIf(in) + case lang.Return: + return p.ParseReturn(in) + default: + return p.ParseExpr(in) + } +} + +func (p *Parser) ParseFunc(in Tokens) (out Tokens, err error) { + // TODO: handle anonymous functions (no function name) + // TODO: handle receiver (methods) + // TODO: handle parametric types (generics) + // TODO: handle variadic parameters + fname := in[1].Str + ofname := p.fname + p.fname = fname + ofunc := p.function + s, _, ok := p.getSym(fname, p.scope) + if !ok { + s = &symbol{} + p.symbols[p.scope+fname] = s + } + p.pushScope(fname) + defer func() { + p.fname = ofname // TODO remove if favor of function. + p.function = ofunc + p.popScope() + }() + + out = Tokens{ + {Id: lang.Enter, Str: "enter " + p.scope}, + {Id: lang.Goto, Str: "goto " + fname + "_end"}, // Skip function definition. + {Id: lang.Label, Pos: in[0].Pos, Str: fname}, + } + + bi := in.Index(lang.BraceBlock) + if bi < 0 { + return out, fmt.Errorf("no function body") + } + typ, err := p.ParseType(in[:bi]) + if err != nil { + return out, err + } + s.kind = symFunc + s.Type = typ + p.function = s + + log.Println("body:", in[len(in)-1].Block()) + toks, err := p.Parse(in[len(in)-1].Block()) + if err != nil { + return out, err + } + out = append(out, toks...) + out = append(out, + scanner.Token{Id: lang.Label, Str: fname + "_end"}, + scanner.Token{Id: lang.Exit}, + ) + log.Println("symbols", p.symbols) + return out, err +} + +func (p *Parser) ParseIf(in Tokens) (out Tokens, err error) { + prefix := p.fname + "_if" + strconv.Itoa(p.labelCount[p.scope+p.fname]) + p.labelCount[p.scope+p.fname]++ + sc := 0 + ssc := strconv.Itoa(sc) + // We start from the end of the statement and examine tokens backward to + // get the destination labels already computed when jumps are set. + i := len(in) - 1 + for i > 0 { + if in[i].Id != lang.BraceBlock { + return nil, fmt.Errorf("expected '{', got %v", in[i]) + } + pre, err := p.Parse(in[i].Block()) + if err != nil { + return nil, err + } + //pre := append(Tokens{{Id: lang.Label, Str: prefix + "_b" + ssc}}, blockout...) + if sc > 0 { + pre = append(pre, scanner.Token{Id: lang.Goto, Str: "goto " + prefix + "_e0"}) + } + pre = append(pre, scanner.Token{Id: lang.Label, Str: prefix + "_e" + ssc}) + out = append(pre, out...) + + i-- + ifp := in[:i].LastIndex(lang.If) + + if in[i].Id == lang.Else { // Step over final 'else'. + i-- + sc++ + ssc = strconv.Itoa(sc) continue } - if t.IsBlock() { - if expr != nil { - if p.hasProp(c, ExprSep) && p.isExprSep(root) { - // A bracket block may end a previous expression. - root.Child = append(root.Child, expr) - expr = nil - } else if p.hasProp(c, Call) && !p.hasProp(root, Decl) && p.canCallToken(tokens[i-1]) { - // Handle (possibly nested) call expressions. - if lce == nil || lce != expr { // TODO(marc): not general, fix it. - lce = prev - } - lce.Child = []*Node{{Token: lce.Token, Child: lce.Child, Kind: lce.Kind}} - lce.Token = scanner.NewToken("Call", c.Pos()) - lce.Kind = ExprCall - } - } - tcont := t.Content() - s := tcont[t.Start() : len(tcont)-t.End()] - n2, err := p.Parse(s, c) - if err != nil { + pre = Tokens{} + var init, cond Tokens + initcond := in[ifp+1 : i+1] + if ii := initcond.Index(lang.Semicolon); ii < 0 { + cond = initcond + } else { + init = initcond[:ii] + cond = initcond[ii+1:] + } + if len(init) > 0 { + if init, err = p.ParseStmt(init); err != nil { return nil, err } - c.Child = append(c.Child, n2...) - } - - // Process the end of an expression or a statement. - if t.IsSeparator() { - if t.Content() == "," && ctx.Kind != BlockParen { - // ignore comma separator in field lists - } else if expr != nil && p.hasProp(root, Stmt) { - root.Child = append(root.Child, expr) - if p.hasProp(expr, ExprSep) { - nodes = append(nodes, root) - root = nil - } - expr = nil - } else { - if expr != nil { - root = expr - } - nodes = append(nodes, root) - expr = nil - root = nil - } - continue + pre = append(pre, init...) } + if cond, err = p.ParseExpr(cond); err != nil { + return nil, err + } + pre = append(pre, cond...) + pre = append(pre, scanner.Token{Id: lang.JumpFalse, Str: "JumpFalse " + prefix + "_e" + ssc}) + out = append(pre, out...) + i = ifp + if i > 1 && in[i].Id == lang.If && in[i-1].Id == lang.Else { // Step over 'else if'. + i -= 2 + } + sc++ + ssc = strconv.Itoa(sc) + } + log.Println("prefix:", prefix) + log.Println("if tokens:", out) + return out, err +} - // We assume from now that current node is part of an expression subtree. - if expr == nil { - if p.isStatement(root) { - expr = c - continue +func (p *Parser) ParseReturn(in Tokens) (out Tokens, err error) { + if len(in) > 1 { + if out, err = p.ParseExpr(in[1:]); err != nil { + return out, err + } + } + + // TODO: the function symbol should be already present in the parser context. + // otherwise no way to handle anonymous func. + s := p.function + in[0].Beg = s.Type.NumOut() + in[0].End = s.Type.NumIn() + log.Println("ParseReturn:", p.fname, in[0]) + out = append(out, in[0]) + return out, err +} + +func (p *Parser) ParseExpr(in Tokens) (out Tokens, err error) { + log.Println("ParseExpr in:", in) + var ops Tokens + var vl int + // + // Process tokens from last to first, the goal is to reorder the tokens in + // a stack machine processing order, so it can be directly interpreted. + // + for i := len(in) - 1; i >= 0; i-- { + t := in[i] + // temporary assumptions: binary operators, returning 1 value + switch t.Id { + case lang.Ident, lang.Int, lang.String: + out = append(out, t) + vl++ + case lang.Define, lang.Add, lang.Sub, lang.Assign, lang.Equal, lang.Less: + // TODO: handle operator precedence to swap operators / operands if necessary + if vl < 2 { + ops = append(ops, t) + break } - expr = root - } - - // Update the expression subtree according to binary operator precedence rules. - // - operators are binary infix by default. - // - if an operator follows another, then it's unary prefix. - // - if an expression starts by an operator, then it's unary prefix. - // - non operator nodes have a default precedence of 0. - // TODO: handle postfix unary (i.e. ++) and ternary (i.e. ?:) - // - ep := p.Spec[expr.Content()].Order - cp := p.Spec[c.Content()].Order - a := expr - if unaryOp[c] { - cp = 0 - } - if cp != 0 { - if cp > ep { - // Perform an operator permutation at expr root as required by precedence. - // TODO(marc): maybe it can be generalized in below else branch. - expr, c = c, expr - a = expr // Temporary ancestor: its children may have to be permuted. - } else { - // Findout if an operator permutation is necessary in subtree. - c1 := expr - for { - a = c1 - if unaryOp[c1] { - c1, c = c, c1 - a = c1 - if c == expr { - expr = a - } - break - } - if len(c1.Child) < 2 { - break - } - c1 = c1.Child[1] - if !c1.IsOperator() || unaryOp[c1] || cp > p.Spec[c1.Content()].Order { + case lang.ParenBlock: + // If the previous token is an arithmetic, logic or assign operator then + // this parenthesis block is an enclosed expr, otherwise a call expr. + if i == 0 || in[i-1].Id.IsOperator() { + out = append(out, t) + vl++ + break + } + // The call expression can be a function call, a conversion, + // a type assersion (including for type switch) + // func call: push args and func address then call + out = append(out, t) + vl++ + if t2 := in[i-1]; t2.Id == lang.Ident { + if s, sc, ok := p.getSym(t2.Str, p.scope); ok { + log.Println("callExpr:", t2.Str, p.scope, s, ok, sc) + if s.kind == symValue { + // Store the number of input parameters in the token Beg field. + ops = append(ops, scanner.Token{Str: "callX", Id: lang.CallX, Pos: t.Pos, Beg: p.numItems(t.Block(), lang.Comma)}) break } } - // No permutation occured. Append current to last visited ancestor. - if len(a.Child) > 1 { - a.Child = a.Child[:1] - c.Child = append(c.Child, c1) - } - } - } else if ep != 0 { - for len(a.Child) > 1 { - a = a.Child[1] } + ops = append(ops, scanner.Token{Str: "call", Id: lang.Call, Pos: t.Pos}) } - a.Child = append(a.Child, c) - if p.hasProp(a, Call) { - lce = a - } - } - if root != nil && p.isStatement(root) { - if expr != nil { - root.Child = append(root.Child, expr) + if ol := len(ops); ol > 0 && vl > ol { + op := ops[ol-1] + ops = ops[:ol-1] + out = append(out, op) + vl-- } - } else if expr != nil { - root = expr } - if root != nil { - // /* - if p.hasProp(root, MultiOp) { - for { - if !p.fixMultiOp(root, prevToken) { - break - } + out = append(out, ops...) // TODO: verify that ops should be added in this order. + + log.Println("ParseExpr out:", out, "vl:", vl, "ops:", ops) + // The tokens are now properly ordered, process nested blocks. + for i := len(out) - 1; i >= 0; i-- { + t := out[i] + var toks Tokens + switch t.Id { + case lang.ParenBlock, lang.BracketBlock: + if toks, err = p.ParseExprStr(t.Block()); err != nil { + return out, err } + default: + continue } - // */ - nodes = append(nodes, root) + + // replace block token by its parsed result. + log.Println("toks:", toks) + out2 := append(Tokens{}, out[:i]...) + out2 = append(out2, toks...) + out = append(out2, out[i+1:]...) } - return nodes, err + log.Println("Final out:", out) + return out, err } -func (p *Parser) fixMultiOp(root *Node, prevToken map[*Node]*scanner.Token) bool { - for i, c := range root.Child { - for j, cc := range c.Child { - if pt := prevToken[cc]; pt != nil && pt.Content() == "," { - c.RemoveChild(j) - root.InsertChild(cc, i) - return true - } +func (p *Parser) ParseExprStr(s string) (tokens Tokens, err error) { + if tokens, err = p.Scan(s, false); err != nil { + return + } + var result Tokens + for _, sub := range tokens.Split(lang.Comma) { + toks, err := p.ParseExpr(sub) + if err != nil { + return result, err } + result = append(toks, result...) } - return false + return result, err } -func (p *Parser) hasProp(n *Node, prop uint) bool { return p.Spec[n.Name()].Flags&prop != 0 } -func (p *Parser) isStatement(n *Node) bool { return p.Spec[n.Content()].Flags&Stmt != 0 } -func (p *Parser) isExprSep(n *Node) bool { return p.Spec[n.Content()].Flags&ExprSep != 0 } -func (p *Parser) isExpr(n *Node) bool { return !p.isStatement(n) && !p.isExprSep(n) } -func (p *Parser) isSep(n *Node) bool { return n.Token.Kind() == scanner.Separator } -func (p *Parser) IsBlock(n *Node) bool { return n.Token.Kind() == scanner.Block } - -func (p *Parser) precedenceToken(t *scanner.Token) int { - s := t.Content() - if l := t.Start(); l > 0 { - s = s[:l] +func (p *Parser) numItems(s string, sep lang.TokenId) int { + tokens, err := p.Scan(s, false) + if err != nil { + return -1 } - return p.Spec[s].Order + r := 0 + for _, t := range tokens.Split(sep) { + if len(t) == 0 { + continue + } + r++ + } + return r +} + +func (p *Parser) pushScope(name string) { + p.scope = strings.TrimPrefix(p.scope+"/"+name+"/", "/") } -func (p *Parser) canCallToken(t *scanner.Token) bool { - return p.precedenceToken(t) == 0 || p.Spec[t.Name()].Flags&Call != 0 +func (p *Parser) popScope() { + p.scope = strings.TrimSuffix(p.scope, "/") + j := strings.LastIndex(p.scope, "/") + if j == -1 { + p.scope = "" + return + } + p.scope = p.scope[:j] } diff --git a/parser/parse_test.go b/parser/parse_test.go deleted file mode 100644 index 4559acf..0000000 --- a/parser/parse_test.go +++ /dev/null @@ -1,257 +0,0 @@ -package parser - -import ( - "log" - "os" - "testing" - - "github.com/gnolang/parscan/scanner" -) - -var GoScanner = &scanner.Scanner{ - CharProp: [scanner.ASCIILen]uint{ - '\t': scanner.CharSep, - '\n': scanner.CharLineSep, - ' ': scanner.CharSep, - '!': scanner.CharOp, - '"': scanner.CharStr, - '%': scanner.CharOp, - '&': scanner.CharOp, - '\'': scanner.CharStr, - '(': scanner.CharBlock, - '*': scanner.CharOp, - '+': scanner.CharOp, - ',': scanner.CharGroupSep, - '-': scanner.CharOp, - '.': scanner.CharOp, - '/': scanner.CharOp, - ':': scanner.CharOp, - ';': scanner.CharGroupSep, - '<': scanner.CharOp, - '=': scanner.CharOp, - '>': scanner.CharOp, - '[': scanner.CharBlock, - '^': scanner.CharOp, - '{': scanner.CharBlock, - '|': scanner.CharOp, - '~': scanner.CharOp, - }, - End: map[string]string{ - "(": ")", - "{": "}", - "[": "]", - "/*": "*/", - `"`: `"`, - "'": "'", - "`": "`", - "//": "\n", - }, - BlockProp: map[string]uint{ - "(": scanner.CharBlock, - "{": scanner.CharBlock, - "[": scanner.CharBlock, - `"`: scanner.CharStr | scanner.StrEsc | scanner.StrNonl, - "`": scanner.CharStr, - "'": scanner.CharStr | scanner.StrEsc, - "/*": scanner.CharStr, - "//": scanner.CharStr | scanner.ExcludeEnd | scanner.EosValidEnd, - }, - SkipSemi: map[string]bool{ - "++": true, - "--": true, - "case": true, - "chan": true, - "const": true, - "default": true, - "defer": true, - "else": true, - "for": true, - "func": true, - "go": true, - "goto": true, - "if": true, - "import": true, - "interface": true, - "map": true, - "package": true, - "range": true, - "select": true, - "struct": true, - "switch": true, - "type": true, - "var": true, - }, -} - -var GoParser = &Parser{ - Scanner: GoScanner, - Spec: map[string]NodeSpec{ - ".": {Kind: OpDot, Flags: Call, Order: 3}, - "*": {Kind: OpMultiply, Order: 4}, - "+": {Kind: OpAdd, Order: 5}, - "-": {Kind: OpSubtract, Order: 5}, - "<": {Kind: OpInferior, Order: 6}, - ":=": {Kind: OpDefine, Flags: MultiOp, Order: 7}, - "=": {Kind: OpAssign, Flags: MultiOp, Order: 7}, - "if": {Kind: StmtIf, Flags: Stmt | ExprSep}, - "func": {Kind: DeclFunc, Flags: Decl | Call}, - "return": {Kind: StmtReturn, Flags: Stmt}, - "{..}": {Kind: BlockStmt, Flags: ExprSep}, - "(..)": {Kind: BlockParen, Flags: Call}, - "//..": {Kind: Comment}, - "/*..": {Kind: Comment}, - }, -} - -func init() { - GoParser.Init() - log.SetFlags(log.Lshortfile) -} - -func TestParse(t *testing.T) { - for _, test := range goTests { - test := test - t.Run("", func(t *testing.T) { - var err error - errStr := "" - n := &Node{} - if n.Child, err = GoParser.Parse(test.src, n); err != nil { - errStr = err.Error() - } - if errStr != test.err { - t.Errorf("got error %#v, want error %#v", errStr, test.err) - } - if dot := n.Sdot(""); dot != test.dot { - t.Errorf("got %#v, want %#v", dot, test.dot) - } - t.Log(test.src) - t.Log(n.Sdot("")) - if dotCmd := os.Getenv("DOT"); dotCmd != "" { - n.Dot(dotCmd, "") - } - }) - } -} - -var goTests = []struct { - src, dot, err string - skip bool -}{{ // #00 - src: "", - dot: `digraph ast { 0 [label=""]; }`, -}, { // #01 - src: "12", - dot: `digraph ast { 0 [label=""]; 1 [label="12"]; 0 -> 1; }`, -}, { // #02 - src: "1 + 2", - dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="1"]; 1 -> 2; 3 [label="2"]; 1 -> 3; }`, -}, { // #03 - src: "1 + 2 * 3", - dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="1"]; 1 -> 2; 3 [label="*"]; 1 -> 3; 4 [label="2"]; 3 -> 4; 5 [label="3"]; 3 -> 5; }`, -}, { // #04 - src: "1 * 2 + 3", - dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="*"]; 1 -> 2; 3 [label="1"]; 2 -> 3; 4 [label="2"]; 2 -> 4; 5 [label="3"]; 1 -> 5; }`, -}, { // #05 - src: "a := 2 + 5", - dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="2"]; 3 -> 4; 5 [label="5"]; 3 -> 5; }`, -}, { // #06 - src: "a := 1 * 2 + 3", - dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="*"]; 3 -> 4; 5 [label="1"]; 4 -> 5; 6 [label="2"]; 4 -> 6; 7 [label="3"]; 3 -> 7; }`, -}, { // #07 - src: "a := 1 + 2 * 3", - dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="1"]; 3 -> 4; 5 [label="*"]; 3 -> 5; 6 [label="2"]; 5 -> 6; 7 [label="3"]; 5 -> 7; }`, -}, { // #08 - src: "a := 1 + 2 * 3 + 4", - dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="1"]; 3 -> 4; 5 [label="+"]; 3 -> 5; 6 [label="*"]; 5 -> 6; 7 [label="2"]; 6 -> 7; 8 [label="3"]; 6 -> 8; 9 [label="4"]; 5 -> 9; }`, -}, { // #09 - src: "a := 1 + 2 + 3 * 4", - dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="1"]; 3 -> 4; 5 [label="+"]; 3 -> 5; 6 [label="2"]; 5 -> 6; 7 [label="*"]; 5 -> 7; 8 [label="3"]; 7 -> 8; 9 [label="4"]; 7 -> 9; }`, -}, { // #10 - src: "a := 1 * 2 + 3 * 4", - dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="*"]; 3 -> 4; 5 [label="1"]; 4 -> 5; 6 [label="2"]; 4 -> 6; 7 [label="*"]; 3 -> 7; 8 [label="3"]; 7 -> 8; 9 [label="4"]; 7 -> 9; }`, -}, { // #11 - src: "a := (1 + 2) * 3", - dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="*"]; 1 -> 3; 4 [label="(..)"]; 3 -> 4; 5 [label="+"]; 4 -> 5; 6 [label="1"]; 5 -> 6; 7 [label="2"]; 5 -> 7; 8 [label="3"]; 3 -> 8; }`, -}, { // #12 - src: "a := 2 * -3", - dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="*"]; 1 -> 3; 4 [label="2"]; 3 -> 4; 5 [label="-"]; 3 -> 5; 6 [label="3"]; 5 -> 6; }`, -}, { // #13 - src: "-5 + 4", - dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="-"]; 1 -> 2; 3 [label="5"]; 2 -> 3; 4 [label="4"]; 1 -> 4; }`, -}, { // #14 - src: "-5 + -4", - dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="-"]; 1 -> 2; 3 [label="5"]; 2 -> 3; 4 [label="-"]; 1 -> 4; 5 [label="4"]; 4 -> 5; }`, -}, { // #15 - src: "a := -5 + -4", - dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="-"]; 3 -> 4; 5 [label="5"]; 4 -> 5; 6 [label="-"]; 3 -> 6; 7 [label="4"]; 6 -> 7; }`, -}, { // #16 - src: "*a := 5 * -3", - dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="*"]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="*"]; 1 -> 4; 5 [label="5"]; 4 -> 5; 6 [label="-"]; 4 -> 6; 7 [label="3"]; 6 -> 7; }`, -}, { // #17 - src: "*a := -5 * 3", - dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="*"]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="*"]; 1 -> 4; 5 [label="-"]; 4 -> 5; 6 [label="5"]; 5 -> 6; 7 [label="3"]; 4 -> 7; }`, -}, { // #18 - src: "1+2\n3-4", - dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="1"]; 1 -> 2; 3 [label="2"]; 1 -> 3; 4 [label="-"]; 0 -> 4; 5 [label="3"]; 4 -> 5; 6 [label="4"]; 4 -> 6; }`, -}, { // #19 - src: "i = i+1", - dot: `digraph ast { 0 [label=""]; 1 [label="="]; 0 -> 1; 2 [label="i"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="i"]; 3 -> 4; 5 [label="1"]; 3 -> 5; }`, -}, { // #20 - src: "a[12] = 5", - dot: `digraph ast { 0 [label=""]; 1 [label="="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="[..]"]; 2 -> 3; 4 [label="12"]; 3 -> 4; 5 [label="5"]; 1 -> 5; }`, -}, { // #21 - src: "a[12][0] = 3", - dot: `digraph ast { 0 [label=""]; 1 [label="="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="[..]"]; 2 -> 3; 4 [label="12"]; 3 -> 4; 5 [label="[..]"]; 2 -> 5; 6 [label="0"]; 5 -> 6; 7 [label="3"]; 1 -> 7; }`, -}, { // #22 - src: "a.b = 34", - dot: `digraph ast { 0 [label=""]; 1 [label="="]; 0 -> 1; 2 [label="."]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="b"]; 2 -> 4; 5 [label="34"]; 1 -> 5; }`, -}, { // #23 - src: "if i < 2 { return j }", - dot: `digraph ast { 0 [label=""]; 1 [label="if"]; 0 -> 1; 2 [label="<"]; 1 -> 2; 3 [label="i"]; 2 -> 3; 4 [label="2"]; 2 -> 4; 5 [label="{..}"]; 1 -> 5; 6 [label="return"]; 5 -> 6; 7 [label="j"]; 6 -> 7; }`, -}, { // #24 - src: "if i:=1; i < 2 { return j }", - dot: `digraph ast { 0 [label=""]; 1 [label="if"]; 0 -> 1; 2 [label=":="]; 1 -> 2; 3 [label="i"]; 2 -> 3; 4 [label="1"]; 2 -> 4; 5 [label="<"]; 1 -> 5; 6 [label="i"]; 5 -> 6; 7 [label="2"]; 5 -> 7; 8 [label="{..}"]; 1 -> 8; 9 [label="return"]; 8 -> 9; 10 [label="j"]; 9 -> 10; }`, -}, { // #25 - src: "f(i) + f(j)", - dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="Call"]; 1 -> 2; 3 [label="f"]; 2 -> 3; 4 [label="(..)"]; 2 -> 4; 5 [label="i"]; 4 -> 5; 6 [label="Call"]; 1 -> 6; 7 [label="f"]; 6 -> 7; 8 [label="(..)"]; 6 -> 8; 9 [label="j"]; 8 -> 9; }`, -}, { // #26 - src: "a := 1 // This is a comment", - dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="1"]; 1 -> 3; }`, - /* - }, { // #27 - src: "a, b, c = 1, f(2), 3", - //src: "f(i) + f(j)(4)", // not ok - }, { // #26 - src: "if i < 2 {return i}; return f(i-2) + f(i-1)", - }, { // #27 - src: "for i < 2 { println(i) }", - }, { // #28 - src: "func f(i int) (int) { if i < 2 { return i}; return f(i-2) + f(i-1) }", - }, { // #29 - src: "a := []int{3, 4}", - }, { // #30 - //src: "a := struct{int}", - src: "a, b = c, d", - }, { // #31 - //src: "a := [2]int{3, 4}", - src: `fmt.Println("Hello")`, - //src: "(1 + 2) * (3 - 4)", - //src: "1 + (1 + 2)", - }, { // #32 - //src: `a(3)(4)`, - //src: `3 + 2 * a(3) + 5`, - //src: `3 + 2 * a(3)(4) + (5)`, - //src: `(a(3))(4)`, - src: `a(3)(4)`, - dot: `digraph ast { 0 [label=""]; 1 [label="Call"]; 0 -> 1; 2 [label="Call"]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="(..)"]; 2 -> 4; 5 [label="3"]; 4 -> 5; 6 [label="(..)"]; 1 -> 6; 7 [label="4"]; 6 -> 7; }`, - //src: `println("Hello")`, - //src: `a.b.c + 3`, - }, { // #33 - src: `func f(a int, b int) {return a + b}; f(1+2)`, - }, { // #34 - src: `if a == 1 { - println(2) - } - println("bye")`, - */ -}} diff --git a/parser/symbol.go b/parser/symbol.go new file mode 100644 index 0000000..9975c24 --- /dev/null +++ b/parser/symbol.go @@ -0,0 +1,67 @@ +package parser + +import ( + "reflect" + "strings" +) + +type symKind int + +const ( + symValue symKind = iota // a Go value defined in the runtime + symType // a Go type + symLabel // a label indication a position in the VM code + symConst // a Go constant + symVar // a Go variable, located in the VM memory + symFunc // a Go function, located in the VM code +) + +type symbol struct { + kind symKind + index int // address of symbol in frame + value any + Type reflect.Type + local bool // if true address is relative to local frame, otherwise global + used bool +} + +func (p *Parser) AddSym(i int, name string, v any) { p.addSym(i, name, v, symValue, nil, false) } + +func (p *Parser) addSym(i int, name string, v any, k symKind, t reflect.Type, local bool) { + p.symbols[name] = &symbol{kind: k, index: i, local: local, value: v, Type: t, used: true} +} + +// getSym searches for an existing symbol starting from the deepest scope. +func (p *Parser) getSym(name, scope string) (sym *symbol, sc string, ok bool) { + for { + if sym, ok = p.symbols[scope+name]; ok { + return sym, scope, ok + } + scope = strings.TrimSuffix(scope, "/") + i := strings.LastIndex(scope, "/") + if i == -1 { + scope = "" + break + } + if scope = scope[:i]; scope == "" { + break + } + } + sym, ok = p.symbols[name] + return sym, scope, ok +} + +func initUniverse() map[string]*symbol { + return map[string]*symbol{ + "any": {kind: symType, Type: reflect.TypeOf((*any)(nil)).Elem()}, + "bool": {kind: symType, Type: reflect.TypeOf((*bool)(nil)).Elem()}, + "error": {kind: symType, Type: reflect.TypeOf((*error)(nil)).Elem()}, + "int": {kind: symType, Type: reflect.TypeOf((*int)(nil)).Elem()}, + "string": {kind: symType, Type: reflect.TypeOf((*string)(nil)).Elem()}, + + "nil": {}, + "iota": {value: 0}, + "true": {value: true}, + "false": {value: false}, + } +} diff --git a/parser/type.go b/parser/type.go new file mode 100644 index 0000000..77ccc6e --- /dev/null +++ b/parser/type.go @@ -0,0 +1,117 @@ +package parser + +import ( + "fmt" + "log" + "reflect" + + "github.com/gnolang/parscan/lang" +) + +// ParseType parses a list of tokens defining a type expresssion and returns +// the corresponding runtime type or an error. +func (p *Parser) ParseType(in Tokens) (typ reflect.Type, err error) { + log.Println("ParseType", in) + switch in[0].Id { + case lang.Func: + // Get argument and return token positions depending on function pattern: + // method with receiver, named function or anonymous closure. + // TODO: handle variadics + var out Tokens + var indexArgs int + switch l, in1 := len(in), in[1]; { + case l >= 4 && in1.Id == lang.ParenBlock && in[2].Id == lang.Ident: + indexArgs, out = 3, in[4:] + case l >= 3 && in1.Id == lang.Ident: + indexArgs, out = 2, in[3:] + case l >= 2 && in1.Id == lang.ParenBlock: + indexArgs, out = 1, in[2:] + default: + return nil, fmt.Errorf("invalid func signature") + } + + // We can now parse function input and output parameter types. + // Input parameters are always enclosed by parenthesis. + iargs, err := p.Scan(in[indexArgs].Block(), false) + if err != nil { + return nil, err + } + arg, err := p.parseParamTypes(iargs, true) + if err != nil { + return nil, err + } + // Output parameters may be empty, or enclosed or not by parenthesis. + if len(out) == 1 && out[0].Id == lang.ParenBlock { + if out, err = p.Scan(out[0].Block(), false); err != nil { + return nil, err + } + } + ret, err := p.parseParamTypes(out, false) + if err != nil { + return nil, err + } + return reflect.FuncOf(arg, ret, false), nil + + case lang.Ident: + // TODO: selector expression (pkg.type) + s, _, ok := p.getSym(in[0].Str, p.scope) + if !ok || s.kind != symType { + return nil, fmt.Errorf("invalid type %s", in[0].Str) + } + return s.Type, nil + } + return typ, err +} + +// parseParamTypes parses a list of comma separated typed parameters and returns a list of +// runtime types. Implicit parameter names and types are supported. +func (p *Parser) parseParamTypes(in Tokens, arg bool) (types []reflect.Type, err error) { + // Parse from right to left, to allow multiple comma separated parameters of the same type. + list := in.Split(lang.Comma) + for i := len(list) - 1; i >= 0; i-- { + t := list[i] + if len(t) == 0 { + continue + } + param := "" + if p.hasFirstParam(t) { + param = t[0].Str + t = t[1:] + if len(t) == 0 { + if len(types) == 0 { + return nil, fmt.Errorf("Invalid type %v", t[0]) + } + // Type was ommitted, apply the previous one from the right. + types = append([]reflect.Type{types[0]}, types...) + if arg { + p.addSym(-i-2, p.scope+param, nil, symVar, types[0], true) + } else { + p.addSym(i, p.scope+param, nil, symVar, types[0], true) + } + continue + } + } + typ, err := p.ParseType(t) + if err != nil { + return nil, err + } + if param != "" { + if arg { + p.addSym(-i-2, p.scope+param, nil, symVar, typ, true) + } else { + p.addSym(i, p.scope+param, nil, symVar, typ, true) + } + } + types = append([]reflect.Type{typ}, types...) + } + return types, err +} + +// hasFirstParam returns true if the first token of a list is a parameter name. +func (p *Parser) hasFirstParam(in Tokens) bool { + if in[0].Id != lang.Ident { + return false + } + s, _, ok := p.getSym(in[0].Str, p.scope) + return !ok || s.kind != symType +} |
