From 6dd78f44adf6fb032d0ecd9db813651b9524fcac Mon Sep 17 00:00:00 2001 From: Marc Vertes Date: Wed, 6 Sep 2023 08:49:19 +0200 Subject: chore: refactor some APIs The scanner returns a slice of pointers to tokens instead of a slice of tokens. The parser now pass the initial node context. --- codegen/compiler.go | 1 - codegen/compiler_test.go | 2 +- codegen/interpreter.go | 2 +- codegen/interpreter_test.go | 7 +++-- parser/dot.go | 5 ++- parser/node.go | 15 +++++++-- parser/parse.go | 51 ++++++++++++++++++++++++------- parser/parse_test.go | 74 +++++++++++++++++++++++---------------------- scanner/scan.go | 35 ++++++++++----------- scanner/scan_test.go | 5 ++- vm0/vm.go | 2 +- vm0/vm_test.go | 2 +- 12 files changed, 124 insertions(+), 77 deletions(-) diff --git a/codegen/compiler.go b/codegen/compiler.go index 30fc070..02b67cb 100644 --- a/codegen/compiler.go +++ b/codegen/compiler.go @@ -167,7 +167,6 @@ func (c *Compiler) CodeGen(node *parser.Node) (err error) { return true }) - log.Println("main:", c.symbols["main"]) if s, _, ok := c.getSym("main", ""); ok { if i, ok := c.codeIndex(s); ok { // Internal call is always relative to instruction pointer. diff --git a/codegen/compiler_test.go b/codegen/compiler_test.go index fbe8bb2..5c7e9d3 100644 --- a/codegen/compiler_test.go +++ b/codegen/compiler_test.go @@ -19,7 +19,7 @@ func TestCodeGen(t *testing.T) { c.AddSym(fmt.Println, "println") n := &parser.Node{} var err error - if n.Child, err = golang.GoParser.Parse(test.src); err != nil { + if n.Child, err = golang.GoParser.Parse(test.src, n); err != nil { t.Error(err) } errStr := "" diff --git a/codegen/interpreter.go b/codegen/interpreter.go index a3268d9..18cc5f8 100644 --- a/codegen/interpreter.go +++ b/codegen/interpreter.go @@ -21,7 +21,7 @@ func NewInterpreter(p *parser.Parser) *Interpreter { func (i *Interpreter) Eval(src string) (res any, err error) { n := &parser.Node{} - if n.Child, err = i.Parse(src); err != nil { + if n.Child, err = i.Parse(src, n); err != nil { return res, err } if debug { diff --git a/codegen/interpreter_test.go b/codegen/interpreter_test.go index 1d8a6b5..2563677 100644 --- a/codegen/interpreter_test.go +++ b/codegen/interpreter_test.go @@ -2,7 +2,6 @@ package codegen import ( "fmt" - "log" "testing" "github.com/gnolang/parscan/lang/golang" @@ -25,14 +24,13 @@ func TestEval(t *testing.T) { if res != test.res { t.Errorf("got %#v, want %#v", res, test.res) } - log.Println(r, e) }) } } var evalTests = []struct { name, src, res, err string -}{{ /* #00 */ +}{{ // #00 src: "1 + 2", res: "3", }, { // #01 @@ -42,6 +40,9 @@ var evalTests = []struct { src: "func f(a int) int { return a + 1 }; f(5)", res: "6", }, { // #03 + src: "func f(a int) (b int) { b = a + 1; return b }; f(5)", + res: "6", +}, { // #04 src: "func f(a int) (b int) { b = a + 1; return }; f(5)", res: "6", }} diff --git a/parser/dot.go b/parser/dot.go index f486cd5..11d5014 100644 --- a/parser/dot.go +++ b/parser/dot.go @@ -50,7 +50,10 @@ func (n *Node) astDot(out io.Writer, label string) { for _, c := range nod.Child { anc[c] = nod } - name := strings.ReplaceAll(nod.Name(), `"`, `\"`) + name := "" + if nod.Token != nil { + name = strings.ReplaceAll(nod.Name(), `"`, `\"`) + } fmt.Fprintf(out, "%d [label=\"%s\"]; ", index[nod], name) if anc[nod] != nil { fmt.Fprintf(out, "%d -> %d; ", index[anc[nod]], index[nod]) diff --git a/parser/node.go b/parser/node.go index d2f13ef..b6e34cd 100644 --- a/parser/node.go +++ b/parser/node.go @@ -3,9 +3,9 @@ package parser import "github.com/gnolang/parscan/scanner" type Node struct { - Child []*Node // sub-tree nodes - scanner.Token // token at origin of the node - Kind // Node kind, depends on the language spec + Child []*Node // sub-tree nodes + *scanner.Token // token at origin of the node + Kind // Node kind, depends on the language spec } // TODO: remove it in favor of Walk2 @@ -39,3 +39,12 @@ func (n *Node) Walk2(a *Node, i int, in, out func(*Node, *Node, int) bool) (stop } return } + +func (n *Node) RemoveChild(i int) { + n.Child = append(n.Child[:i], n.Child[i+1:]...) +} + +func (n *Node) InsertChild(node *Node, i int) { + n.Child = append(n.Child[:i+1], n.Child[i:]...) + n.Child[i] = node +} diff --git a/parser/parse.go b/parser/parse.go index 08fbdbd..ca89467 100644 --- a/parser/parse.go +++ b/parser/parse.go @@ -10,6 +10,7 @@ const ( Call Index Decl + MultiOp ) type NodeSpec struct { @@ -23,21 +24,22 @@ type Parser struct { Spec map[string]NodeSpec } -func (p *Parser) Parse(src string) (n []*Node, err error) { +func (p *Parser) Parse(src string, ctx *Node) (nodes []*Node, err error) { tokens, err := p.Scan(src) if err != nil { return } - return p.ParseTokens(tokens) + return p.ParseTokens(tokens, ctx) } -func (p *Parser) ParseTokens(tokens []scanner.Token) (roots []*Node, err error) { +func (p *Parser) ParseTokens(tokens []*scanner.Token, ctx *Node) (nodes []*Node, err error) { // TODO: error handling. var root *Node // current root node var expr *Node // current expression root node var prev, c *Node // previous and current nodes var lce *Node // last complete expression node unaryOp := map[*Node]bool{} // unaryOp indicates if a node is an unary operator. + prevToken := map[*Node]*scanner.Token{} for i, t := range tokens { prev = c @@ -45,6 +47,9 @@ func (p *Parser) ParseTokens(tokens []scanner.Token) (roots []*Node, err error) Token: t, Kind: p.Spec[t.Name()].Kind, } + if i > 0 { + prevToken[c] = tokens[i-1] + } if c.Kind == Comment { continue } @@ -90,7 +95,7 @@ func (p *Parser) ParseTokens(tokens []scanner.Token) (roots []*Node, err error) } tcont := t.Content() s := tcont[t.Start() : len(tcont)-t.End()] - n2, err := p.Parse(s) + n2, err := p.Parse(s, c) if err != nil { return nil, err } @@ -99,10 +104,12 @@ func (p *Parser) ParseTokens(tokens []scanner.Token) (roots []*Node, err error) // Process the end of an expression or a statement. if t.IsSeparator() { - if expr != nil && p.hasProp(root, Stmt) { + if t.Content() == "," && ctx.Kind != BlockParen { + // ignore comma separator in field lists + } else if expr != nil && p.hasProp(root, Stmt) { root.Child = append(root.Child, expr) if p.hasProp(expr, ExprSep) { - roots = append(roots, root) + nodes = append(nodes, root) root = nil } expr = nil @@ -110,7 +117,7 @@ func (p *Parser) ParseTokens(tokens []scanner.Token) (roots []*Node, err error) if expr != nil { root = expr } - roots = append(roots, root) + nodes = append(nodes, root) expr = nil root = nil } @@ -190,9 +197,31 @@ func (p *Parser) ParseTokens(tokens []scanner.Token) (roots []*Node, err error) root = expr } if root != nil { - roots = append(roots, root) + // /* + if p.hasProp(root, MultiOp) { + for { + if !p.fixMultiOp(root, prevToken) { + break + } + } + } + // */ + nodes = append(nodes, root) + } + return nodes, err +} + +func (p *Parser) fixMultiOp(root *Node, prevToken map[*Node]*scanner.Token) bool { + for i, c := range root.Child { + for j, cc := range c.Child { + if pt := prevToken[cc]; pt != nil && pt.Content() == "," { + c.RemoveChild(j) + root.InsertChild(cc, i) + return true + } + } } - return roots, err + return false } func (p *Parser) hasProp(n *Node, prop uint) bool { return p.Spec[n.Name()].Flags&prop != 0 } @@ -202,7 +231,7 @@ func (p *Parser) isExpr(n *Node) bool { return !p.isStatement(n) && func (p *Parser) isSep(n *Node) bool { return n.Token.Kind() == scanner.Separator } func (p *Parser) IsBlock(n *Node) bool { return n.Token.Kind() == scanner.Block } -func (p *Parser) precedenceToken(t scanner.Token) int { +func (p *Parser) precedenceToken(t *scanner.Token) int { s := t.Content() if l := t.Start(); l > 0 { s = s[:l] @@ -210,6 +239,6 @@ func (p *Parser) precedenceToken(t scanner.Token) int { return p.Spec[s].Order } -func (p *Parser) canCallToken(t scanner.Token) bool { +func (p *Parser) canCallToken(t *scanner.Token) bool { return p.precedenceToken(t) == 0 || p.Spec[t.Name()].Flags&Call != 0 } diff --git a/parser/parse_test.go b/parser/parse_test.go index 0a41d09..ffc0f53 100644 --- a/parser/parse_test.go +++ b/parser/parse_test.go @@ -66,8 +66,8 @@ var GoParser = &Parser{ "+": {Kind: OpAdd, Order: 5}, "-": {Kind: OpSubtract, Order: 5}, "<": {Kind: OpInferior, Order: 6}, - ":=": {Kind: OpDefine, Order: 7}, - "=": {Kind: OpAssign, Order: 7}, + ":=": {Kind: OpDefine, Flags: MultiOp, Order: 7}, + "=": {Kind: OpAssign, Flags: MultiOp, Order: 7}, "if": {Kind: StmtIf, Flags: Stmt | ExprSep}, "func": {Kind: DeclFunc, Flags: Decl | Call}, "return": {Kind: StmtReturn, Flags: Stmt}, @@ -90,7 +90,7 @@ func TestParse(t *testing.T) { var err error errStr := "" n := &Node{} - if n.Child, err = GoParser.Parse(test.src); err != nil { + if n.Child, err = GoParser.Parse(test.src, n); err != nil { errStr = err.Error() } if errStr != test.err { @@ -192,39 +192,41 @@ var goTests = []struct { }, { // #26 src: "a := 1 // This is a comment", dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="1"]; 1 -> 3; }`, - //src: "f(i) + f(j)(4)", // not ok /* - }, { // #26 - src: "if i < 2 {return i}; return f(i-2) + f(i-1)", - }, { // #27 - src: "for i < 2 { println(i) }", - }, { // #28 - src: "func f(i int) (int) { if i < 2 { return i}; return f(i-2) + f(i-1) }", - }, { // #29 - src: "a := []int{3, 4}", - }, { // #30 - //src: "a := struct{int}", - src: "a, b = c, d", - }, { // #31 - //src: "a := [2]int{3, 4}", - src: `fmt.Println("Hello")`, - //src: "(1 + 2) * (3 - 4)", - //src: "1 + (1 + 2)", - }, { // #32 - //src: `a(3)(4)`, - //src: `3 + 2 * a(3) + 5`, - //src: `3 + 2 * a(3)(4) + (5)`, - //src: `(a(3))(4)`, - src: `a(3)(4)`, - dot: `digraph ast { 0 [label=""]; 1 [label="Call"]; 0 -> 1; 2 [label="Call"]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="(..)"]; 2 -> 4; 5 [label="3"]; 4 -> 5; 6 [label="(..)"]; 1 -> 6; 7 [label="4"]; 6 -> 7; }`, - //src: `println("Hello")`, - //src: `a.b.c + 3`, - }, { // #33 - src: `func f(a int, b int) {return a + b}; f(1+2)`, - }, { // #34 - src: `if a == 1 { - println(2) - } - println("bye")`, + }, { // #27 + src: "a, b, c = 1, f(2), 3", + //src: "f(i) + f(j)(4)", // not ok + }, { // #26 + src: "if i < 2 {return i}; return f(i-2) + f(i-1)", + }, { // #27 + src: "for i < 2 { println(i) }", + }, { // #28 + src: "func f(i int) (int) { if i < 2 { return i}; return f(i-2) + f(i-1) }", + }, { // #29 + src: "a := []int{3, 4}", + }, { // #30 + //src: "a := struct{int}", + src: "a, b = c, d", + }, { // #31 + //src: "a := [2]int{3, 4}", + src: `fmt.Println("Hello")`, + //src: "(1 + 2) * (3 - 4)", + //src: "1 + (1 + 2)", + }, { // #32 + //src: `a(3)(4)`, + //src: `3 + 2 * a(3) + 5`, + //src: `3 + 2 * a(3)(4) + (5)`, + //src: `(a(3))(4)`, + src: `a(3)(4)`, + dot: `digraph ast { 0 [label=""]; 1 [label="Call"]; 0 -> 1; 2 [label="Call"]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="(..)"]; 2 -> 4; 5 [label="3"]; 4 -> 5; 6 [label="(..)"]; 1 -> 6; 7 [label="4"]; 6 -> 7; }`, + //src: `println("Hello")`, + //src: `a.b.c + 3`, + }, { // #33 + src: `func f(a int, b int) {return a + b}; f(1+2)`, + }, { // #34 + src: `if a == 1 { + println(2) + } + println("bye")`, */ }} diff --git a/scanner/scan.go b/scanner/scan.go index 1cc36a7..27b6669 100644 --- a/scanner/scan.go +++ b/scanner/scan.go @@ -72,8 +72,8 @@ func (t *Token) Name() string { return name } -func NewToken(content string, pos int) Token { - return Token{pos, Custom, content, 0, 0, nil} +func NewToken(content string, pos int) *Token { + return &Token{pos, Custom, content, 0, 0, nil} } const ASCIILen = 1 << 7 // 128 @@ -126,7 +126,7 @@ func (sc *Scanner) Init() { func IsNum(r rune) bool { return '0' <= r && r <= '9' } -func (sc *Scanner) Scan(src string) (tokens []Token, err error) { +func (sc *Scanner) Scan(src string) (tokens []*Token, err error) { offset := 0 s := src for len(s) > 0 { @@ -157,7 +157,7 @@ func loc(s string, p int) string { } // Next returns the next token in string. -func (sc *Scanner) Next(src string) (tok Token, err error) { +func (sc *Scanner) Next(src string) (tok *Token, err error) { p := 0 // Skip initial separators. @@ -173,28 +173,28 @@ func (sc *Scanner) Next(src string) (tok Token, err error) { for i, r := range src { switch { case sc.IsSep(r): - return Token{}, nil + return &Token{}, nil case sc.IsGroupSep(r): // TODO: handle group separators. - return Token{kind: Separator, pos: p + i, content: string(r)}, nil + return &Token{kind: Separator, pos: p + i, content: string(r)}, nil case sc.IsLineSep(r): - return Token{kind: Separator, pos: p + i, content: " "}, nil + return &Token{kind: Separator, pos: p + i, content: " "}, nil case sc.IsStr(r): s, ok := sc.getStr(src[i:], 1) if !ok { err = ErrBlock } - return Token{kind: String, pos: p + i, content: s, start: 1, end: 1}, err + return &Token{kind: String, pos: p + i, content: s, start: 1, end: 1}, err case sc.IsBlock(r): b, ok := sc.getBlock(src[i:], 1) if !ok { err = ErrBlock } - return Token{kind: Block, pos: p + i, content: b, start: 1, end: 1}, err + return &Token{kind: Block, pos: p + i, content: b, start: 1, end: 1}, err case sc.IsOp(r): op, isOp := sc.getOp(src[i:]) if isOp { - return Token{kind: Operator, pos: p + i, content: op}, nil + return &Token{kind: Operator, pos: p + i, content: op}, nil } flag := sc.BlockProp[op] if flag&CharStr != 0 { @@ -202,15 +202,15 @@ func (sc *Scanner) Next(src string) (tok Token, err error) { if !ok { err = ErrBlock } - return Token{kind: String, pos: p + i, content: s, start: len(op), end: len(op)}, err + return &Token{kind: String, pos: p + i, content: s, start: len(op), end: len(op)}, err } case IsNum(r): c, v := sc.getNum(src[i:]) - return Token{kind: Number, pos: p + i, content: c, value: v}, nil + return &Token{kind: Number, pos: p + i, content: c, value: v}, nil default: id, isId := sc.getId(src[i:]) if isId { - return Token{kind: Identifier, pos: p + i, content: id}, nil + return &Token{kind: Identifier, pos: p + i, content: id}, nil } flag := sc.BlockProp[id] if flag&CharBlock != 0 { @@ -218,11 +218,11 @@ func (sc *Scanner) Next(src string) (tok Token, err error) { if !ok { err = ErrBlock } - return Token{kind: Block, pos: p + i, content: s, start: len(id), end: len(id)}, err + return &Token{kind: Block, pos: p + i, content: s, start: len(id), end: len(id)}, err } } } - return Token{}, nil + return &Token{}, nil } func (sc *Scanner) getId(src string) (s string, isId bool) { @@ -320,11 +320,12 @@ func (sc *Scanner) getBlock(src string, nstart int) (s string, ok bool) { } else if strings.HasSuffix(s, start) { n++ } else if m := sc.sdre.FindStringSubmatch(s); len(m) > 1 { - str, ok := sc.getStr(src[nstart+i:], len(m[1])) + l1 := len(m[1]) + str, ok := sc.getStr(src[nstart+i+1-l1:], l1) if !ok { return s, false } - skip = nstart + i + len(str) - 1 + skip = nstart + i + len(str) - l1 } if n == 0 { if prop&ExcludeEnd != 0 { diff --git a/scanner/scan_test.go b/scanner/scan_test.go index bdd3176..41ee62e 100644 --- a/scanner/scan_test.go +++ b/scanner/scan_test.go @@ -79,7 +79,7 @@ func TestScan(t *testing.T) { } } -func tokStr(tokens []Token) (s string) { +func tokStr(tokens []*Token) (s string) { for _, t := range tokens { s += fmt.Sprintf("%#v ", t.content) } @@ -155,4 +155,7 @@ def"`, }, { // #21 src: `println("in f")`, tok: `"println" "(\"in f\")" `, +}, { // #22 + src: "a, b = 1, 2", + tok: `"a" "," "b" "=" "1" "," "2" `, }} diff --git a/vm0/vm.go b/vm0/vm.go index d9c9a8d..62344ed 100644 --- a/vm0/vm.go +++ b/vm0/vm.go @@ -25,7 +25,7 @@ func New(p *parser.Parser) (i *Interp) { func (i *Interp) Eval(src string) (res any, err error) { n := &parser.Node{} - if n.Child, err = i.Parse(src); err != nil { + if n.Child, err = i.Parse(src, n); err != nil { return } if debug { diff --git a/vm0/vm_test.go b/vm0/vm_test.go index dc0829b..a4a24e6 100644 --- a/vm0/vm_test.go +++ b/vm0/vm_test.go @@ -14,7 +14,7 @@ func TestEval(t *testing.T) { //n, _ := i.Parse("println(2*5)") //n, _ := i.Parse(`a := 2 + 5`) src := `a := 2` - nodes, err := i.Parse(src) + nodes, err := i.Parse(src, nil) if err != nil { t.Errorf("error %v", err) } -- cgit v1.2.3