summaryrefslogtreecommitdiff
path: root/parser
diff options
context:
space:
mode:
authorMarc Vertes <marc.vertes@tendermint.com>2023-08-09 11:47:39 +0200
committerGitHub <noreply@github.com>2023-08-09 11:47:39 +0200
commit947873b34aabe46dfb9f8d06214736cb11b5a6b2 (patch)
tree9fc4728cf39017ee0275d62a7578881cbb3073bb /parser
parent355750be61fbf4b90d132a9560e01113f22f4c38 (diff)
codegen: add a bytecode generator (#5)
* codegen: add a bytecode generator * cleaning scanner, parser and vm1.
Diffstat (limited to 'parser')
-rw-r--r--parser/node.go2
-rw-r--r--parser/parse.go50
-rw-r--r--parser/parse_test.go4
-rw-r--r--parser/readme.md60
4 files changed, 86 insertions, 30 deletions
diff --git a/parser/node.go b/parser/node.go
index f81285a..d2f13ef 100644
--- a/parser/node.go
+++ b/parser/node.go
@@ -6,8 +6,6 @@ type Node struct {
Child []*Node // sub-tree nodes
scanner.Token // token at origin of the node
Kind // Node kind, depends on the language spec
-
- unary bool // TODO(marc): get rid of it.
}
// TODO: remove it in favor of Walk2
diff --git a/parser/parse.go b/parser/parse.go
index cfbff27..4f186bd 100644
--- a/parser/parse.go
+++ b/parser/parse.go
@@ -1,8 +1,6 @@
package parser
-import (
- "github.com/gnolang/parscan/scanner"
-)
+import "github.com/gnolang/parscan/scanner"
const (
Stmt = 1 << iota
@@ -31,25 +29,29 @@ func (p *Parser) Parse(src string) (n []*Node, err error) {
return p.ParseTokens(tokens)
}
-func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
+func (p *Parser) ParseTokens(tokens []scanner.Token) (roots []*Node, err error) {
// TODO: error handling.
- var root *Node // current root node
- var expr *Node // current expression root node
- var prev, c *Node
- var lce *Node
- for i, t := range token {
+ var root *Node // current root node
+ var expr *Node // current expression root node
+ var prev, c *Node // previous and current nodes
+ var lce *Node // last complete expression node
+ unaryOp := map[*Node]bool{} // unaryOp indicates if a node is an unary operator.
+
+ for i, t := range tokens {
prev = c
c = &Node{
Token: t,
Kind: p.Spec[t.Name()].Kind,
- unary: t.Kind() == scanner.Operator && (i == 0 || token[i-1].Kind() == scanner.Operator),
}
- if c.Kind == 0 {
+ if t.IsOperator() && (i == 0 || tokens[i-1].IsOperator()) {
+ unaryOp[c] = true
+ }
+ if c.Kind == Undefined {
switch t.Kind() {
case scanner.Number:
- c.Kind = p.Spec["#num"].Kind
+ c.Kind = NumberLit
case scanner.Identifier:
- c.Kind = p.Spec["#id"].Kind
+ c.Kind = Ident
}
}
@@ -65,20 +67,20 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
continue
}
- if t.Kind() == scanner.Block {
+ if t.IsBlock() {
if expr != nil {
- if p.hasBlockProp(c, ExprSep) && p.isExprSep(root) {
+ if p.hasProp(c, ExprSep) && p.isExprSep(root) {
// A bracket block may end a previous expression.
root.Child = append(root.Child, expr)
expr = nil
- } else if p.hasBlockProp(c, Call) && !p.hasProp(root, Decl) && p.canCallToken(token[i-1]) {
+ } else if p.hasProp(c, Call) && !p.hasProp(root, Decl) && p.canCallToken(tokens[i-1]) {
// Handle (possibly nested) call expressions.
if lce == nil || lce != expr { // TODO(marc): not general, fix it.
lce = prev
}
lce.Child = []*Node{{Token: lce.Token, Child: lce.Child, Kind: lce.Kind}}
lce.Token = scanner.NewToken("Call", c.Pos())
- lce.Kind = p.Spec["#call"].Kind
+ lce.Kind = CallExpr
}
}
tcont := t.Content()
@@ -91,10 +93,10 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
}
// Process the end of an expression or a statement.
- if t.Kind() == scanner.Separator {
+ if t.IsSeparator() {
if expr != nil && p.hasProp(root, Stmt) {
root.Child = append(root.Child, expr)
- if p.hasBlockProp(expr, ExprSep) {
+ if p.hasProp(expr, ExprSep) {
roots = append(roots, root)
root = nil
}
@@ -129,7 +131,7 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
ep := p.Spec[expr.Content()].Order
cp := p.Spec[c.Content()].Order
a := expr
- if c.unary {
+ if unaryOp[c] {
cp = 0
}
if cp != 0 {
@@ -143,7 +145,7 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
c1 := expr
for {
a = c1
- if c1.unary {
+ if unaryOp[c1] {
c1, c = c, c1
a = c1
if c == expr {
@@ -155,7 +157,7 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
break
}
c1 = c1.Child[1]
- if c1.Token.Kind() != scanner.Operator || c1.unary || cp > p.Spec[c1.Content()].Order {
+ if !c1.IsOperator() || unaryOp[c1] || cp > p.Spec[c1.Content()].Order {
break
}
}
@@ -194,9 +196,7 @@ func (p *Parser) isExprSep(n *Node) bool { return p.Spec[n.Content()].F
func (p *Parser) isExpr(n *Node) bool { return !p.isStatement(n) && !p.isExprSep(n) }
func (p *Parser) isSep(n *Node) bool { return n.Token.Kind() == scanner.Separator }
func (p *Parser) IsBlock(n *Node) bool { return n.Token.Kind() == scanner.Block }
-func (p *Parser) hasBlockProp(n *Node, prop uint) bool {
- return p.Spec[n.Content()[:n.Start()]].Flags&prop != 0
-}
+
func (p *Parser) precedenceToken(t scanner.Token) int {
s := t.Content()
if l := t.Start(); l > 0 {
diff --git a/parser/parse_test.go b/parser/parse_test.go
index 3bf6955..838e1c8 100644
--- a/parser/parse_test.go
+++ b/parser/parse_test.go
@@ -57,19 +57,17 @@ var GoParser = &Parser{
"<": {InfOp, 0, 6},
":=": {DefOp, 0, 7},
"=": {AssignOp, 0, 7},
- "#Call": {CallExpr, 0, 0},
"if": {IfStmt, Stmt | ExprSep, 0},
"func": {FuncDecl, Decl | Call, 0},
"return": {ReturnStmt, Stmt, 0},
"{..}": {StmtBloc, ExprSep, 0},
- "{": {StmtBloc, ExprSep, 0},
- "(": {ParBloc, Call, 0},
"(..)": {ParBloc, Call, 0},
},
}
func TestParse(t *testing.T) {
for _, test := range goTests {
+ test := test
t.Run("", func(t *testing.T) {
var err error
errStr := ""
diff --git a/parser/readme.md b/parser/readme.md
new file mode 100644
index 0000000..19d8778
--- /dev/null
+++ b/parser/readme.md
@@ -0,0 +1,60 @@
+# Parser
+
+A parser takes an array of tokens (produced by the scanner) in input and
+returns a node representing a syntax tree. A node is an object
+containing a kind, the corresponding token and the ordered references to
+descendent nodes.
+
+A goal is to make the parser generic enough so it can generate syntax
+trees for most of existing programming languages (no claim of generality
+yet), provided a small set of generating rules per language, and a small
+set of validating rules (yet to be defined) to detect invalid
+constructs.
+
+The input tokens are particular in the sense that they include classical
+lexical items such as words, separators, numbers, but also strings and
+nested blocks, which are resolved at scanning stage rather than parsing
+stage. See the scanner for more details.
+
+The language specification includes the following:
+
+- a scanner specification, to produce the set of possible tokens.
+- a map of node specification per token name. The node specification
+ defines some parameters influing how the tree is generated.
+
+## Development status
+
+A successful test must be provided to check the status.
+
+- [x] binary operator expressions
+- [x] unary operator (prefix) expressions
+- [ ] unary operator (suffix) expressions
+- [x] operator precedence rules
+- [x] parenthesis in expressions
+- [ ] semi-colon automatic insertion rules
+- [x] call expressions
+- [ ] nested calls
+- [x] index expressions
+- [x] single assignments
+- [ ] multi assignments
+- [x] simple `if` statement (no `else`)
+- [ ] full `if` statement (including `else`, `else if`)
+- [x] init expressions in `if` statements
+- [x] statement blocks
+- [ ] comments
+- [ ] for statement
+- [ ] switch statement
+- [ ] select statement
+- [x] return statement
+- [x] function declaration
+- [ ] method declaration
+- [ ] anonymous functions (closures)
+- [ ] type declaration
+- [ ] var, const, type single declaration
+- [ ] var, const, type multi declaration
+- [ ] type parametric expressions
+- [x] literal numbers (see scanner)
+- [x] literal strings
+- [ ] composite literal
+- [ ] import statements
+- [ ] go.mod syntax