codegen: add a bytecode generator (#5)

* codegen: add a bytecode generator * cleaning scanner, parser and vm1.
author: Marc Vertes <marc.vertes@tendermint.com> 2023-08-09 11:47:39 +0200
committer: GitHub <noreply@github.com> 2023-08-09 11:47:39 +0200
commit: 947873b34aabe46dfb9f8d06214736cb11b5a6b2 (patch)
tree: 9fc4728cf39017ee0275d62a7578881cbb3073bb /parser
parent: 355750be61fbf4b90d132a9560e01113f22f4c38 (diff)
4 files changed, 86 insertions, 30 deletions
diff --git a/parser/node.go b/parser/node.go
index f81285a..d2f13ef 100644
--- a/parser/node.go
+++ b/parser/node.go
@@ -6,8 +6,6 @@ type Node struct {
 	Child         []*Node // sub-tree nodes
 	scanner.Token         // token at origin of the node
 	Kind                  // Node kind, depends on the language spec
-
-	unary bool // TODO(marc): get rid of it.
 }
 
 // TODO: remove it in favor of Walk2
diff --git a/parser/parse.go b/parser/parse.go
index cfbff27..4f186bd 100644
--- a/parser/parse.go
+++ b/parser/parse.go
@@ -1,8 +1,6 @@
 package parser
 
-import (
-	"github.com/gnolang/parscan/scanner"
-)
+import "github.com/gnolang/parscan/scanner"
 
 const (
 	Stmt = 1 << iota
@@ -31,25 +29,29 @@ func (p *Parser) Parse(src string) (n []*Node, err error) {
 	return p.ParseTokens(tokens)
 }
 
-func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
+func (p *Parser) ParseTokens(tokens []scanner.Token) (roots []*Node, err error) {
 	// TODO: error handling.
-	var root *Node // current root node
-	var expr *Node // current expression root node
-	var prev, c *Node
-	var lce *Node
-	for i, t := range token {
+	var root *Node              // current root node
+	var expr *Node              // current expression root node
+	var prev, c *Node           // previous and current nodes
+	var lce *Node               // last complete expression node
+	unaryOp := map[*Node]bool{} // unaryOp indicates if a node is an unary operator.
+
+	for i, t := range tokens {
 		prev = c
 		c = &Node{
 			Token: t,
 			Kind:  p.Spec[t.Name()].Kind,
-			unary: t.Kind() == scanner.Operator && (i == 0 || token[i-1].Kind() == scanner.Operator),
 		}
-		if c.Kind == 0 {
+		if t.IsOperator() && (i == 0 || tokens[i-1].IsOperator()) {
+			unaryOp[c] = true
+		}
+		if c.Kind == Undefined {
 			switch t.Kind() {
 			case scanner.Number:
-				c.Kind = p.Spec["#num"].Kind
+				c.Kind = NumberLit
 			case scanner.Identifier:
-				c.Kind = p.Spec["#id"].Kind
+				c.Kind = Ident
 			}
 		}
 
@@ -65,20 +67,20 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
 			continue
 		}
 
-		if t.Kind() == scanner.Block {
+		if t.IsBlock() {
 			if expr != nil {
-				if p.hasBlockProp(c, ExprSep) && p.isExprSep(root) {
+				if p.hasProp(c, ExprSep) && p.isExprSep(root) {
 					// A bracket block may end a previous expression.
 					root.Child = append(root.Child, expr)
 					expr = nil
-				} else if p.hasBlockProp(c, Call) && !p.hasProp(root, Decl) && p.canCallToken(token[i-1]) {
+				} else if p.hasProp(c, Call) && !p.hasProp(root, Decl) && p.canCallToken(tokens[i-1]) {
 					// Handle (possibly nested) call expressions.
 					if lce == nil || lce != expr { // TODO(marc): not general, fix it.
 						lce = prev
 					}
 					lce.Child = []*Node{{Token: lce.Token, Child: lce.Child, Kind: lce.Kind}}
 					lce.Token = scanner.NewToken("Call", c.Pos())
-					lce.Kind = p.Spec["#call"].Kind
+					lce.Kind = CallExpr
 				}
 			}
 			tcont := t.Content()
@@ -91,10 +93,10 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
 		}
 
 		// Process the end of an expression or a statement.
-		if t.Kind() == scanner.Separator {
+		if t.IsSeparator() {
 			if expr != nil && p.hasProp(root, Stmt) {
 				root.Child = append(root.Child, expr)
-				if p.hasBlockProp(expr, ExprSep) {
+				if p.hasProp(expr, ExprSep) {
 					roots = append(roots, root)
 					root = nil
 				}
@@ -129,7 +131,7 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
 		ep := p.Spec[expr.Content()].Order
 		cp := p.Spec[c.Content()].Order
 		a := expr
-		if c.unary {
+		if unaryOp[c] {
 			cp = 0
 		}
 		if cp != 0 {
@@ -143,7 +145,7 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
 				c1 := expr
 				for {
 					a = c1
-					if c1.unary {
+					if unaryOp[c1] {
 						c1, c = c, c1
 						a = c1
 						if c == expr {
@@ -155,7 +157,7 @@ func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) {
 						break
 					}
 					c1 = c1.Child[1]
-					if c1.Token.Kind() != scanner.Operator || c1.unary || cp > p.Spec[c1.Content()].Order {
+					if !c1.IsOperator() || unaryOp[c1] || cp > p.Spec[c1.Content()].Order {
 						break
 					}
 				}
@@ -194,9 +196,7 @@ func (p *Parser) isExprSep(n *Node) bool          { return p.Spec[n.Content()].F
 func (p *Parser) isExpr(n *Node) bool             { return !p.isStatement(n) && !p.isExprSep(n) }
 func (p *Parser) isSep(n *Node) bool              { return n.Token.Kind() == scanner.Separator }
 func (p *Parser) IsBlock(n *Node) bool            { return n.Token.Kind() == scanner.Block }
-func (p *Parser) hasBlockProp(n *Node, prop uint) bool {
-	return p.Spec[n.Content()[:n.Start()]].Flags&prop != 0
-}
+
 func (p *Parser) precedenceToken(t scanner.Token) int {
 	s := t.Content()
 	if l := t.Start(); l > 0 {
diff --git a/parser/parse_test.go b/parser/parse_test.go
index 3bf6955..838e1c8 100644
--- a/parser/parse_test.go
+++ b/parser/parse_test.go
@@ -57,19 +57,17 @@ var GoParser = &Parser{
 		"<":      {InfOp, 0, 6},
 		":=":     {DefOp, 0, 7},
 		"=":      {AssignOp, 0, 7},
-		"#Call":  {CallExpr, 0, 0},
 		"if":     {IfStmt, Stmt | ExprSep, 0},
 		"func":   {FuncDecl, Decl | Call, 0},
 		"return": {ReturnStmt, Stmt, 0},
 		"{..}":   {StmtBloc, ExprSep, 0},
-		"{":      {StmtBloc, ExprSep, 0},
-		"(":      {ParBloc, Call, 0},
 		"(..)":   {ParBloc, Call, 0},
 	},
 }
 
 func TestParse(t *testing.T) {
 	for _, test := range goTests {
+		test := test
 		t.Run("", func(t *testing.T) {
 			var err error
 			errStr := ""
diff --git a/parser/readme.md b/parser/readme.md
new file mode 100644
index 0000000..19d8778
--- /dev/null
+++ b/parser/readme.md
@@ -0,0 +1,60 @@
+# Parser
+
+A parser takes an array of tokens (produced by the scanner) in input and
+returns a node representing a syntax tree. A node is an object
+containing a kind, the corresponding token and the ordered references to
+descendent nodes.
+
+A goal is to make the parser generic enough so it can generate syntax
+trees for most of existing programming languages (no claim of generality
+yet), provided a small set of generating rules per language, and a small
+set of validating rules (yet to be defined) to detect invalid
+constructs.
+
+The input tokens are particular in the sense that they include classical
+lexical items such as words, separators, numbers, but also strings and
+nested blocks, which are resolved at scanning stage rather than parsing
+stage. See the scanner for more details.
+
+The language specification includes the following:
+
+- a scanner specification, to produce the set of possible tokens.
+- a map of node specification per token name. The node specification
+  defines some parameters influing how the tree is generated.
+
+## Development status
+
+A successful test must be provided to check the status.
+
+- [x] binary operator expressions
+- [x] unary operator (prefix) expressions
+- [ ] unary operator (suffix) expressions
+- [x] operator precedence rules
+- [x] parenthesis in expressions
+- [ ] semi-colon automatic insertion rules
+- [x] call expressions
+- [ ] nested calls
+- [x] index expressions
+- [x] single assignments
+- [ ] multi assignments
+- [x] simple `if` statement (no `else`)
+- [ ] full `if` statement (including `else`, `else if`)
+- [x] init expressions in `if` statements
+- [x] statement blocks
+- [ ] comments
+- [ ] for statement
+- [ ] switch statement
+- [ ] select statement
+- [x] return statement
+- [x] function declaration
+- [ ] method declaration 
+- [ ] anonymous functions (closures)
+- [ ] type declaration
+- [ ] var, const, type single declaration
+- [ ] var, const, type multi declaration
+- [ ] type parametric expressions
+- [x] literal numbers (see scanner)
+- [x] literal strings 
+- [ ] composite literal
+- [ ] import statements
+- [ ] go.mod syntax
author	Marc Vertes <marc.vertes@tendermint.com>	2023-08-09 11:47:39 +0200
committer	GitHub <noreply@github.com>	2023-08-09 11:47:39 +0200
commit	947873b34aabe46dfb9f8d06214736cb11b5a6b2 (patch)
tree	9fc4728cf39017ee0275d62a7578881cbb3073bb /parser
parent	355750be61fbf4b90d132a9560e01113f22f4c38 (diff)