diff options
Diffstat (limited to 'parser/parse.go')
| -rw-r--r-- | parser/parse.go | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/parser/parse.go b/parser/parse.go new file mode 100644 index 0000000..cfbff27 --- /dev/null +++ b/parser/parse.go @@ -0,0 +1,210 @@ +package parser + +import ( + "github.com/gnolang/parscan/scanner" +) + +const ( + Stmt = 1 << iota + ExprSep + Call + Index + Decl +) + +type NodeSpec struct { + Kind // AST node kind + Flags uint // composable properties used for AST generation + Order int // operator precedence order +} + +type Parser struct { + *scanner.Scanner + Spec map[string]NodeSpec +} + +func (p *Parser) Parse(src string) (n []*Node, err error) { + tokens, err := p.Scan(src) + if err != nil { + return + } + return p.ParseTokens(tokens) +} + +func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) { + // TODO: error handling. + var root *Node // current root node + var expr *Node // current expression root node + var prev, c *Node + var lce *Node + for i, t := range token { + prev = c + c = &Node{ + Token: t, + Kind: p.Spec[t.Name()].Kind, + unary: t.Kind() == scanner.Operator && (i == 0 || token[i-1].Kind() == scanner.Operator), + } + if c.Kind == 0 { + switch t.Kind() { + case scanner.Number: + c.Kind = p.Spec["#num"].Kind + case scanner.Identifier: + c.Kind = p.Spec["#id"].Kind + } + } + + if root == nil { + if p.isSep(c) { + continue + } + lce = nil + root = c + if p.isExpr(c) { + expr = c + } + continue + } + + if t.Kind() == scanner.Block { + if expr != nil { + if p.hasBlockProp(c, ExprSep) && p.isExprSep(root) { + // A bracket block may end a previous expression. + root.Child = append(root.Child, expr) + expr = nil + } else if p.hasBlockProp(c, Call) && !p.hasProp(root, Decl) && p.canCallToken(token[i-1]) { + // Handle (possibly nested) call expressions. + if lce == nil || lce != expr { // TODO(marc): not general, fix it. + lce = prev + } + lce.Child = []*Node{{Token: lce.Token, Child: lce.Child, Kind: lce.Kind}} + lce.Token = scanner.NewToken("Call", c.Pos()) + lce.Kind = p.Spec["#call"].Kind + } + } + tcont := t.Content() + s := tcont[t.Start() : len(tcont)-t.End()] + n2, err := p.Parse(s) + if err != nil { + return nil, err + } + c.Child = append(c.Child, n2...) + } + + // Process the end of an expression or a statement. + if t.Kind() == scanner.Separator { + if expr != nil && p.hasProp(root, Stmt) { + root.Child = append(root.Child, expr) + if p.hasBlockProp(expr, ExprSep) { + roots = append(roots, root) + root = nil + } + expr = nil + } else { + if expr != nil { + root = expr + } + roots = append(roots, root) + expr = nil + root = nil + } + continue + } + + // We assume from now that current node is part of an expression subtree. + if expr == nil { + if p.isStatement(root) { + expr = c + continue + } + expr = root + } + + // Update the expression subtree according to binary operator precedence rules. + // - operators are binary infix by default. + // - if an operator follows another, then it's unary prefix. + // - if an expression starts by an operator, then it's unary prefix. + // - non operator nodes have a default precedence of 0. + // TODO: handle postfix unary (i.e. ++) and ternary (i.e. ?:) + // + ep := p.Spec[expr.Content()].Order + cp := p.Spec[c.Content()].Order + a := expr + if c.unary { + cp = 0 + } + if cp != 0 { + if cp > ep { + // Perform an operator permutation at expr root as required by precedence. + // TODO(marc): maybe it can be generalized in below else branch. + expr, c = c, expr + a = expr // Temporary ancestor: its children may have to be permuted. + } else { + // Findout if an operator permutation is necessary in subtree. + c1 := expr + for { + a = c1 + if c1.unary { + c1, c = c, c1 + a = c1 + if c == expr { + expr = a + } + break + } + if len(c1.Child) < 2 { + break + } + c1 = c1.Child[1] + if c1.Token.Kind() != scanner.Operator || c1.unary || cp > p.Spec[c1.Content()].Order { + break + } + } + // No permutation occured. Append current to last visited ancestor. + if len(a.Child) > 1 { + a.Child = a.Child[:1] + c.Child = append(c.Child, c1) + } + } + } else if ep != 0 { + for len(a.Child) > 1 { + a = a.Child[1] + } + } + a.Child = append(a.Child, c) + if p.hasProp(a, Call) { + lce = a + } + } + if root != nil && p.isStatement(root) { + if expr != nil { + root.Child = append(root.Child, expr) + } + } else if expr != nil { + root = expr + } + if root != nil { + roots = append(roots, root) + } + return roots, err +} + +func (p *Parser) hasProp(n *Node, prop uint) bool { return p.Spec[n.Name()].Flags&prop != 0 } +func (p *Parser) isStatement(n *Node) bool { return p.Spec[n.Content()].Flags&Stmt != 0 } +func (p *Parser) isExprSep(n *Node) bool { return p.Spec[n.Content()].Flags&ExprSep != 0 } +func (p *Parser) isExpr(n *Node) bool { return !p.isStatement(n) && !p.isExprSep(n) } +func (p *Parser) isSep(n *Node) bool { return n.Token.Kind() == scanner.Separator } +func (p *Parser) IsBlock(n *Node) bool { return n.Token.Kind() == scanner.Block } +func (p *Parser) hasBlockProp(n *Node, prop uint) bool { + return p.Spec[n.Content()[:n.Start()]].Flags&prop != 0 +} +func (p *Parser) precedenceToken(t scanner.Token) int { + s := t.Content() + if l := t.Start(); l > 0 { + s = s[:l] + } + return p.Spec[s].Order +} + +func (p *Parser) canCallToken(t scanner.Token) bool { + return p.precedenceToken(t) == 0 || p.Spec[t.Name()].Flags&Call != 0 +} |
