diff options
| -rw-r--r-- | cmd/gint/main.go | 35 | ||||
| -rw-r--r-- | go.mod | 3 | ||||
| -rw-r--r-- | lang/golang/go.go | 90 | ||||
| -rw-r--r-- | parser/dot.go | 74 | ||||
| -rw-r--r-- | parser/node.go | 45 | ||||
| -rw-r--r-- | parser/parse.go | 210 | ||||
| -rw-r--r-- | parser/parse_test.go | 228 | ||||
| -rw-r--r-- | readme.md | 17 | ||||
| -rw-r--r-- | samples/fib | 6 | ||||
| -rw-r--r-- | samples/p00 | 1 | ||||
| -rw-r--r-- | samples/p01 | 2 | ||||
| -rw-r--r-- | samples/p02 | 7 | ||||
| -rw-r--r-- | samples/p03 | 3 | ||||
| -rw-r--r-- | samples/p04 | 4 | ||||
| -rw-r--r-- | samples/p05 | 10 | ||||
| -rw-r--r-- | samples/p06 | 7 | ||||
| -rw-r--r-- | scanner/scan.go | 265 | ||||
| -rw-r--r-- | scanner/scan_test.go | 95 | ||||
| -rw-r--r-- | vm0/func.go | 73 | ||||
| -rw-r--r-- | vm0/vm.go | 146 | ||||
| -rw-r--r-- | vm0/vm_test.go | 26 |
21 files changed, 1347 insertions, 0 deletions
diff --git a/cmd/gint/main.go b/cmd/gint/main.go new file mode 100644 index 0000000..44f59ac --- /dev/null +++ b/cmd/gint/main.go @@ -0,0 +1,35 @@ +package main + +import ( + "log" + "os" + + "github.com/gnolang/parscan/lang/golang" + "github.com/gnolang/parscan/vm0" +) + +func main() { + log.SetFlags(log.Lshortfile) + buf, err := os.ReadFile("/dev/stdin") + if err != nil { + log.Fatal(err) + } + if err := runSrc(string(buf)); err != nil { + log.Fatal(err) + } +} + +func runSrc(src string) error { + i := vm0.New(golang.GoParser) + nodes, err := i.Parse(src) + if err != nil { + return err + } + i.Adot(nodes, os.Getenv("DOT")) + for _, n := range nodes { + if _, err := i.Run(n, ""); err != nil { + return err + } + } + return nil +} @@ -0,0 +1,3 @@ +module github.com/gnolang/parscan + +go 1.20 diff --git a/lang/golang/go.go b/lang/golang/go.go new file mode 100644 index 0000000..cc13a24 --- /dev/null +++ b/lang/golang/go.go @@ -0,0 +1,90 @@ +package golang + +import ( + "github.com/gnolang/parscan/parser" + "github.com/gnolang/parscan/scanner" +) + +var GoScanner = &scanner.Scanner{ + CharProp: [scanner.ASCIILen]uint{ + '\t': scanner.CharSep, + '\n': scanner.CharLineSep, + ' ': scanner.CharSep, + '!': scanner.CharOp, + '"': scanner.CharStr, + '%': scanner.CharOp, + '&': scanner.CharOp, + '\'': scanner.CharStr, + '(': scanner.CharBlock, + '*': scanner.CharOp, + '+': scanner.CharOp, + ',': scanner.CharGroupSep, + '-': scanner.CharOp, + '.': scanner.CharOp, + '/': scanner.CharOp, + ':': scanner.CharOp, + ';': scanner.CharGroupSep, + '<': scanner.CharOp, + '=': scanner.CharOp, + '>': scanner.CharOp, + '[': scanner.CharBlock, + '^': scanner.CharOp, + '{': scanner.CharBlock, + '|': scanner.CharOp, + '~': scanner.CharOp, + }, + End: map[string]string{ + "(": ")", + "{": "}", + "[": "]", + "/*": "*/", + `"`: `"`, + "'": "'", + "`": "`", + "//": "\n", + }, +} + +const ( + Undefined = parser.Kind(iota) + FuncDecl + CallExpr + IfStmt + StmtBloc + ReturnStmt + Ident + StringLit + NumberLit + ParBloc + DotOp + MulOp + AddOp + SubOp + AssignOp + DefOp + InfOp +) + +var GoParser = &parser.Parser{ + Scanner: GoScanner, + Spec: map[string]parser.NodeSpec{ + ".": {DotOp, parser.Call, 3}, + "*": {MulOp, 0, 4}, + "+": {AddOp, 0, 5}, + "-": {SubOp, 0, 5}, + "<": {InfOp, 0, 6}, + ":=": {DefOp, 0, 7}, + "=": {AssignOp, 0, 7}, + "#call": {CallExpr, 0, 0}, + "#id": {Ident, 0, 0}, + "#num": {NumberLit, 0, 0}, + "if": {IfStmt, parser.Stmt | parser.ExprSep, 0}, + "func": {FuncDecl, parser.Decl | parser.Call, 0}, + "return": {ReturnStmt, parser.Stmt, 0}, + "{..}": {StmtBloc, parser.ExprSep, 0}, + "{": {StmtBloc, parser.ExprSep, 0}, // FIXME: redundant with above + "(..)": {ParBloc, parser.Call, 0}, + "(": {ParBloc, parser.Call, 0}, // FIXME: redundant with above + `".."`: {StringLit, 0, 0}, + }, +} diff --git a/parser/dot.go b/parser/dot.go new file mode 100644 index 0000000..cef1acb --- /dev/null +++ b/parser/dot.go @@ -0,0 +1,74 @@ +package parser + +import ( + "bytes" + "fmt" + "io" + "log" + "os/exec" + "strings" +) + +func (*Parser) Adot(nodes []*Node, c string) { + if c == "" { + return + } + n := &Node{Child: nodes} + n.Dot(c, "") +} + +func (n *Node) Dot(c, s string) { n.astDot(dotWriter(c), s) } + +func (n *Node) Sdot(s string) string { + var buf bytes.Buffer + n.astDot(&buf, s) + return buf.String() +} + +// TODO: rewrite it using Walk2 +func (n *Node) astDot(out io.Writer, label string) { + fmt.Fprintf(out, "digraph ast { ") + if label != "" { + fmt.Fprintf(out, "labelloc=\"t\"; label=\"%s\";", label) + } + anc := map[*Node]*Node{} + index := map[*Node]int{} + count := 0 + n.Walk(func(nod *Node) bool { + index[nod] = count + count++ + + for _, c := range nod.Child { + anc[c] = nod + } + name := strings.ReplaceAll(nod.Name(), `"`, `\"`) + fmt.Fprintf(out, "%d [label=\"%s\"]; ", index[nod], name) + if anc[nod] != nil { + fmt.Fprintf(out, "%d -> %d; ", index[anc[nod]], index[nod]) + } + return true + }, nil) + fmt.Fprintf(out, "}") +} + +type nopCloser struct { + io.Writer +} + +func (nopCloser) Close() error { return nil } + +func dotWriter(dotCmd string) io.WriteCloser { + if dotCmd == "" { + return nopCloser{io.Discard} + } + fields := strings.Fields(dotCmd) + cmd := exec.Command(fields[0], fields[1:]...) + dotin, err := cmd.StdinPipe() + if err != nil { + log.Fatal(err) + } + if err = cmd.Start(); err != nil { + log.Fatal(err) + } + return dotin +} diff --git a/parser/node.go b/parser/node.go new file mode 100644 index 0000000..6ebc2a7 --- /dev/null +++ b/parser/node.go @@ -0,0 +1,45 @@ +package parser + +import "github.com/gnolang/parscan/scanner" + +type Kind int + +type Node struct { + Child []*Node // sub-tree nodes + scanner.Token // token at origin of the node + Kind // Node kind, depends on the language spec + + unary bool // TODO(marc): get rid of it. +} + +// TODO: remove it in favor of Walk2 +func (n *Node) Walk(in, out func(*Node) bool) (stop bool) { + if in != nil && !in(n) { + return true + } + for _, child := range n.Child { + if child.Walk(in, out) { + return + } + } + if out != nil { + stop = !out(n) + } + return +} + +// Idem to walk, but also propagates the ancestor of visited node and child index. +func (n *Node) Walk2(a *Node, i int, in, out func(*Node, *Node, int) bool) (stop bool) { + if in != nil && !in(n, a, i) { + return true + } + for j, child := range n.Child { + if child.Walk2(n, j, in, out) { + return + } + } + if out != nil { + stop = !out(n, a, i) + } + return +} diff --git a/parser/parse.go b/parser/parse.go new file mode 100644 index 0000000..cfbff27 --- /dev/null +++ b/parser/parse.go @@ -0,0 +1,210 @@ +package parser + +import ( + "github.com/gnolang/parscan/scanner" +) + +const ( + Stmt = 1 << iota + ExprSep + Call + Index + Decl +) + +type NodeSpec struct { + Kind // AST node kind + Flags uint // composable properties used for AST generation + Order int // operator precedence order +} + +type Parser struct { + *scanner.Scanner + Spec map[string]NodeSpec +} + +func (p *Parser) Parse(src string) (n []*Node, err error) { + tokens, err := p.Scan(src) + if err != nil { + return + } + return p.ParseTokens(tokens) +} + +func (p *Parser) ParseTokens(token []scanner.Token) (roots []*Node, err error) { + // TODO: error handling. + var root *Node // current root node + var expr *Node // current expression root node + var prev, c *Node + var lce *Node + for i, t := range token { + prev = c + c = &Node{ + Token: t, + Kind: p.Spec[t.Name()].Kind, + unary: t.Kind() == scanner.Operator && (i == 0 || token[i-1].Kind() == scanner.Operator), + } + if c.Kind == 0 { + switch t.Kind() { + case scanner.Number: + c.Kind = p.Spec["#num"].Kind + case scanner.Identifier: + c.Kind = p.Spec["#id"].Kind + } + } + + if root == nil { + if p.isSep(c) { + continue + } + lce = nil + root = c + if p.isExpr(c) { + expr = c + } + continue + } + + if t.Kind() == scanner.Block { + if expr != nil { + if p.hasBlockProp(c, ExprSep) && p.isExprSep(root) { + // A bracket block may end a previous expression. + root.Child = append(root.Child, expr) + expr = nil + } else if p.hasBlockProp(c, Call) && !p.hasProp(root, Decl) && p.canCallToken(token[i-1]) { + // Handle (possibly nested) call expressions. + if lce == nil || lce != expr { // TODO(marc): not general, fix it. + lce = prev + } + lce.Child = []*Node{{Token: lce.Token, Child: lce.Child, Kind: lce.Kind}} + lce.Token = scanner.NewToken("Call", c.Pos()) + lce.Kind = p.Spec["#call"].Kind + } + } + tcont := t.Content() + s := tcont[t.Start() : len(tcont)-t.End()] + n2, err := p.Parse(s) + if err != nil { + return nil, err + } + c.Child = append(c.Child, n2...) + } + + // Process the end of an expression or a statement. + if t.Kind() == scanner.Separator { + if expr != nil && p.hasProp(root, Stmt) { + root.Child = append(root.Child, expr) + if p.hasBlockProp(expr, ExprSep) { + roots = append(roots, root) + root = nil + } + expr = nil + } else { + if expr != nil { + root = expr + } + roots = append(roots, root) + expr = nil + root = nil + } + continue + } + + // We assume from now that current node is part of an expression subtree. + if expr == nil { + if p.isStatement(root) { + expr = c + continue + } + expr = root + } + + // Update the expression subtree according to binary operator precedence rules. + // - operators are binary infix by default. + // - if an operator follows another, then it's unary prefix. + // - if an expression starts by an operator, then it's unary prefix. + // - non operator nodes have a default precedence of 0. + // TODO: handle postfix unary (i.e. ++) and ternary (i.e. ?:) + // + ep := p.Spec[expr.Content()].Order + cp := p.Spec[c.Content()].Order + a := expr + if c.unary { + cp = 0 + } + if cp != 0 { + if cp > ep { + // Perform an operator permutation at expr root as required by precedence. + // TODO(marc): maybe it can be generalized in below else branch. + expr, c = c, expr + a = expr // Temporary ancestor: its children may have to be permuted. + } else { + // Findout if an operator permutation is necessary in subtree. + c1 := expr + for { + a = c1 + if c1.unary { + c1, c = c, c1 + a = c1 + if c == expr { + expr = a + } + break + } + if len(c1.Child) < 2 { + break + } + c1 = c1.Child[1] + if c1.Token.Kind() != scanner.Operator || c1.unary || cp > p.Spec[c1.Content()].Order { + break + } + } + // No permutation occured. Append current to last visited ancestor. + if len(a.Child) > 1 { + a.Child = a.Child[:1] + c.Child = append(c.Child, c1) + } + } + } else if ep != 0 { + for len(a.Child) > 1 { + a = a.Child[1] + } + } + a.Child = append(a.Child, c) + if p.hasProp(a, Call) { + lce = a + } + } + if root != nil && p.isStatement(root) { + if expr != nil { + root.Child = append(root.Child, expr) + } + } else if expr != nil { + root = expr + } + if root != nil { + roots = append(roots, root) + } + return roots, err +} + +func (p *Parser) hasProp(n *Node, prop uint) bool { return p.Spec[n.Name()].Flags&prop != 0 } +func (p *Parser) isStatement(n *Node) bool { return p.Spec[n.Content()].Flags&Stmt != 0 } +func (p *Parser) isExprSep(n *Node) bool { return p.Spec[n.Content()].Flags&ExprSep != 0 } +func (p *Parser) isExpr(n *Node) bool { return !p.isStatement(n) && !p.isExprSep(n) } +func (p *Parser) isSep(n *Node) bool { return n.Token.Kind() == scanner.Separator } +func (p *Parser) IsBlock(n *Node) bool { return n.Token.Kind() == scanner.Block } +func (p *Parser) hasBlockProp(n *Node, prop uint) bool { + return p.Spec[n.Content()[:n.Start()]].Flags&prop != 0 +} +func (p *Parser) precedenceToken(t scanner.Token) int { + s := t.Content() + if l := t.Start(); l > 0 { + s = s[:l] + } + return p.Spec[s].Order +} + +func (p *Parser) canCallToken(t scanner.Token) bool { + return p.precedenceToken(t) == 0 || p.Spec[t.Name()].Flags&Call != 0 +} diff --git a/parser/parse_test.go b/parser/parse_test.go new file mode 100644 index 0000000..dc363cb --- /dev/null +++ b/parser/parse_test.go @@ -0,0 +1,228 @@ +package parser + +import ( + "os" + "testing" + + "github.com/gnolang/parscan/scanner" +) + +var GoScanner = &scanner.Scanner{ + CharProp: [scanner.ASCIILen]uint{ + '\t': scanner.CharSep, + '\n': scanner.CharLineSep, + ' ': scanner.CharSep, + '!': scanner.CharOp, + '"': scanner.CharStr, + '%': scanner.CharOp, + '&': scanner.CharOp, + '\'': scanner.CharStr, + '(': scanner.CharBlock, + '*': scanner.CharOp, + '+': scanner.CharOp, + ',': scanner.CharGroupSep, + '-': scanner.CharOp, + '.': scanner.CharOp, + '/': scanner.CharOp, + ':': scanner.CharOp, + ';': scanner.CharGroupSep, + '<': scanner.CharOp, + '=': scanner.CharOp, + '>': scanner.CharOp, + '[': scanner.CharBlock, + '^': scanner.CharOp, + '{': scanner.CharBlock, + '|': scanner.CharOp, + '~': scanner.CharOp, + }, + End: map[string]string{ + "(": ")", + "{": "}", + "[": "]", + "/*": "*/", + `"`: `"`, + "'": "'", + "`": "`", + "//": "\n", + }, +} + +const ( + Undefined = Kind(iota) + FuncDecl + CallExpr + IfStmt + StmtBloc + ReturnStmt + ParBloc + MulOp + AddOp + SubOp + DotOp + AssignOp + DefOp + InfOp +) + +var GoParser = &Parser{ + Scanner: GoScanner, + Spec: map[string]NodeSpec{ + ".": {DotOp, Call, 3}, + "*": {MulOp, 0, 4}, + "+": {AddOp, 0, 5}, + "-": {SubOp, 0, 5}, + "<": {InfOp, 0, 6}, + ":=": {DefOp, 0, 7}, + "=": {AssignOp, 0, 7}, + "#Call": {CallExpr, 0, 0}, + "if": {IfStmt, Stmt | ExprSep, 0}, + "func": {FuncDecl, Decl | Call, 0}, + "return": {ReturnStmt, Stmt, 0}, + "{..}": {StmtBloc, ExprSep, 0}, + "{": {StmtBloc, ExprSep, 0}, + "(": {ParBloc, Call, 0}, + "(..)": {ParBloc, Call, 0}, + }, +} + +func TestParse(t *testing.T) { + for _, test := range goTests { + t.Run("", func(t *testing.T) { + var err error + errStr := "" + n := &Node{} + if n.Child, err = GoParser.Parse(test.src); err != nil { + errStr = err.Error() + } + if errStr != test.err { + t.Errorf("got error %#v, want error %#v", errStr, test.err) + } + if dot := n.Sdot(""); dot != test.dot { + t.Errorf("got %#v, want %#v", dot, test.dot) + } + t.Log(test.src) + t.Log(n.Sdot("")) + if dotCmd := os.Getenv("DOT"); dotCmd != "" { + n.Dot(dotCmd, "") + } + }) + } +} + +var goTests = []struct { + src, dot, err string + skip bool +}{{ // #00 + src: "", + dot: `digraph ast { 0 [label=""]; }`, +}, { // #01 + src: "12", + dot: `digraph ast { 0 [label=""]; 1 [label="12"]; 0 -> 1; }`, +}, { // #02 + src: "1 + 2", + dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="1"]; 1 -> 2; 3 [label="2"]; 1 -> 3; }`, +}, { // #03 + src: "1 + 2 * 3", + dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="1"]; 1 -> 2; 3 [label="*"]; 1 -> 3; 4 [label="2"]; 3 -> 4; 5 [label="3"]; 3 -> 5; }`, +}, { // #04 + src: "1 * 2 + 3", + dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="*"]; 1 -> 2; 3 [label="1"]; 2 -> 3; 4 [label="2"]; 2 -> 4; 5 [label="3"]; 1 -> 5; }`, +}, { // #05 + src: "a := 2 + 5", + dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="2"]; 3 -> 4; 5 [label="5"]; 3 -> 5; }`, +}, { // #06 + src: "a := 1 * 2 + 3", + dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="*"]; 3 -> 4; 5 [label="1"]; 4 -> 5; 6 [label="2"]; 4 -> 6; 7 [label="3"]; 3 -> 7; }`, +}, { // #07 + src: "a := 1 + 2 * 3", + dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="1"]; 3 -> 4; 5 [label="*"]; 3 -> 5; 6 [label="2"]; 5 -> 6; 7 [label="3"]; 5 -> 7; }`, +}, { // #08 + src: "a := 1 + 2 * 3 + 4", + dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="1"]; 3 -> 4; 5 [label="+"]; 3 -> 5; 6 [label="*"]; 5 -> 6; 7 [label="2"]; 6 -> 7; 8 [label="3"]; 6 -> 8; 9 [label="4"]; 5 -> 9; }`, +}, { // #09 + src: "a := 1 + 2 + 3 * 4", + dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="1"]; 3 -> 4; 5 [label="+"]; 3 -> 5; 6 [label="2"]; 5 -> 6; 7 [label="*"]; 5 -> 7; 8 [label="3"]; 7 -> 8; 9 [label="4"]; 7 -> 9; }`, +}, { // #10 + src: "a := 1 * 2 + 3 * 4", + dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="*"]; 3 -> 4; 5 [label="1"]; 4 -> 5; 6 [label="2"]; 4 -> 6; 7 [label="*"]; 3 -> 7; 8 [label="3"]; 7 -> 8; 9 [label="4"]; 7 -> 9; }`, +}, { // #11 + src: "a := (1 + 2) * 3", + dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="*"]; 1 -> 3; 4 [label="(..)"]; 3 -> 4; 5 [label="+"]; 4 -> 5; 6 [label="1"]; 5 -> 6; 7 [label="2"]; 5 -> 7; 8 [label="3"]; 3 -> 8; }`, +}, { // #12 + src: "a := 2 * -3", + dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="*"]; 1 -> 3; 4 [label="2"]; 3 -> 4; 5 [label="-"]; 3 -> 5; 6 [label="3"]; 5 -> 6; }`, +}, { // #13 + src: "-5 + 4", + dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="-"]; 1 -> 2; 3 [label="5"]; 2 -> 3; 4 [label="4"]; 1 -> 4; }`, +}, { // #14 + src: "-5 + -4", + dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="-"]; 1 -> 2; 3 [label="5"]; 2 -> 3; 4 [label="-"]; 1 -> 4; 5 [label="4"]; 4 -> 5; }`, +}, { // #15 + src: "a := -5 + -4", + dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="-"]; 3 -> 4; 5 [label="5"]; 4 -> 5; 6 [label="-"]; 3 -> 6; 7 [label="4"]; 6 -> 7; }`, +}, { // #16 + src: "*a := 5 * -3", + dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="*"]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="*"]; 1 -> 4; 5 [label="5"]; 4 -> 5; 6 [label="-"]; 4 -> 6; 7 [label="3"]; 6 -> 7; }`, +}, { // #17 + src: "*a := -5 * 3", + dot: `digraph ast { 0 [label=""]; 1 [label=":="]; 0 -> 1; 2 [label="*"]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="*"]; 1 -> 4; 5 [label="-"]; 4 -> 5; 6 [label="5"]; 5 -> 6; 7 [label="3"]; 4 -> 7; }`, +}, { // #18 + src: "1+2\n3-4", + dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="1"]; 1 -> 2; 3 [label="2"]; 1 -> 3; 4 [label="-"]; 0 -> 4; 5 [label="3"]; 4 -> 5; 6 [label="4"]; 4 -> 6; }`, +}, { // #19 + src: "i = i+1", + dot: `digraph ast { 0 [label=""]; 1 [label="="]; 0 -> 1; 2 [label="i"]; 1 -> 2; 3 [label="+"]; 1 -> 3; 4 [label="i"]; 3 -> 4; 5 [label="1"]; 3 -> 5; }`, +}, { // #20 + src: "a[12] = 5", + dot: `digraph ast { 0 [label=""]; 1 [label="="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="[..]"]; 2 -> 3; 4 [label="12"]; 3 -> 4; 5 [label="5"]; 1 -> 5; }`, +}, { // #21 + src: "a[12][0] = 3", + dot: `digraph ast { 0 [label=""]; 1 [label="="]; 0 -> 1; 2 [label="a"]; 1 -> 2; 3 [label="[..]"]; 2 -> 3; 4 [label="12"]; 3 -> 4; 5 [label="[..]"]; 2 -> 5; 6 [label="0"]; 5 -> 6; 7 [label="3"]; 1 -> 7; }`, +}, { // #22 + src: "a.b = 34", + dot: `digraph ast { 0 [label=""]; 1 [label="="]; 0 -> 1; 2 [label="."]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="b"]; 2 -> 4; 5 [label="34"]; 1 -> 5; }`, +}, { // #23 + src: "if i < 2 { return j }", + dot: `digraph ast { 0 [label=""]; 1 [label="if"]; 0 -> 1; 2 [label="<"]; 1 -> 2; 3 [label="i"]; 2 -> 3; 4 [label="2"]; 2 -> 4; 5 [label="{..}"]; 1 -> 5; 6 [label="return"]; 5 -> 6; 7 [label="j"]; 6 -> 7; }`, +}, { // #24 + src: "if i:=1; i < 2 { return j }", + dot: `digraph ast { 0 [label=""]; 1 [label="if"]; 0 -> 1; 2 [label=":="]; 1 -> 2; 3 [label="i"]; 2 -> 3; 4 [label="1"]; 2 -> 4; 5 [label="<"]; 1 -> 5; 6 [label="i"]; 5 -> 6; 7 [label="2"]; 5 -> 7; 8 [label="{..}"]; 1 -> 8; 9 [label="return"]; 8 -> 9; 10 [label="j"]; 9 -> 10; }`, +}, { // #25 + src: "f(i) + f(j)", + dot: `digraph ast { 0 [label=""]; 1 [label="+"]; 0 -> 1; 2 [label="Call"]; 1 -> 2; 3 [label="f"]; 2 -> 3; 4 [label="(..)"]; 2 -> 4; 5 [label="i"]; 4 -> 5; 6 [label="Call"]; 1 -> 6; 7 [label="f"]; 6 -> 7; 8 [label="(..)"]; 6 -> 8; 9 [label="j"]; 8 -> 9; }`, + //src: "f(i) + f(j)(4)", // not ok + /* + }, { // #26 + src: "if i < 2 {return i}; return f(i-2) + f(i-1)", + }, { // #27 + src: "for i < 2 { println(i) }", + }, { // #28 + src: "func f(i int) (int) { if i < 2 { return i}; return f(i-2) + f(i-1) }", + }, { // #29 + src: "a := []int{3, 4}", + }, { // #30 + //src: "a := struct{int}", + src: "a, b = c, d", + }, { // #31 + //src: "a := [2]int{3, 4}", + src: `fmt.Println("Hello")`, + //src: "(1 + 2) * (3 - 4)", + //src: "1 + (1 + 2)", + }, { // #32 + //src: `a(3)(4)`, + //src: `3 + 2 * a(3) + 5`, + //src: `3 + 2 * a(3)(4) + (5)`, + //src: `(a(3))(4)`, + src: `a(3)(4)`, + dot: `digraph ast { 0 [label=""]; 1 [label="Call"]; 0 -> 1; 2 [label="Call"]; 1 -> 2; 3 [label="a"]; 2 -> 3; 4 [label="(..)"]; 2 -> 4; 5 [label="3"]; 4 -> 5; 6 [label="(..)"]; 1 -> 6; 7 [label="4"]; 6 -> 7; }`, + //src: `println("Hello")`, + //src: `a.b.c + 3`, + }, { // #33 + src: `func f(a int, b int) {return a + b}; f(1+2)`, + }, { // #34 + src: `if a == 1 { + println(2) + } + println("bye")`, + */ +}} diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..3f58f6d --- /dev/null +++ b/readme.md @@ -0,0 +1,17 @@ +# parscan + +Parscan is an experimental project to test a single parser to multiple +languages and virtual machines. + +The first language definition is a subset of Go, enough to implement +simple benchmarks, as fibonacci numbers. + +The first VM is a stack machine, operated by walking directly the AST. + +The next step is to add a byte-code based VM and the corresponding byte code +generator. + +Further steps is to get closer to full Go spec and / or introduce new +languages definitions and new VM implementations. + +Note: this is highly experimental and unstable. diff --git a/samples/fib b/samples/fib new file mode 100644 index 0000000..0ff570c --- /dev/null +++ b/samples/fib @@ -0,0 +1,6 @@ +func fib(i int) int { + if i < 2 { return i } + return fib(i-2) + fib(i-1) +} + +println(fib(20)) diff --git a/samples/p00 b/samples/p00 new file mode 100644 index 0000000..19f5084 --- /dev/null +++ b/samples/p00 @@ -0,0 +1 @@ +1+2 diff --git a/samples/p01 b/samples/p01 new file mode 100644 index 0000000..baafaa9 --- /dev/null +++ b/samples/p01 @@ -0,0 +1,2 @@ +s := "Hello" +println(s, 5+2) diff --git a/samples/p02 b/samples/p02 new file mode 100644 index 0000000..1aeb4a9 --- /dev/null +++ b/samples/p02 @@ -0,0 +1,7 @@ +a := 1 + +if a < 2 { + println("yep") +} + +println("ok") diff --git a/samples/p03 b/samples/p03 new file mode 100644 index 0000000..39a3cda --- /dev/null +++ b/samples/p03 @@ -0,0 +1,3 @@ +func f(a int, b int) int { return a + b } + +println("f:", f(3, 4), f(5, 6)) diff --git a/samples/p04 b/samples/p04 new file mode 100644 index 0000000..f2a85c8 --- /dev/null +++ b/samples/p04 @@ -0,0 +1,4 @@ +func f() int { println(a) } + +a := 4 +f() diff --git a/samples/p05 b/samples/p05 new file mode 100644 index 0000000..51c2c9b --- /dev/null +++ b/samples/p05 @@ -0,0 +1,10 @@ +func f(i int) int { + if i < 2 { + if i == 1 { + return + } } + println("i", i) +} + +f(1) +println("bye") diff --git a/samples/p06 b/samples/p06 new file mode 100644 index 0000000..88029cc --- /dev/null +++ b/samples/p06 @@ -0,0 +1,7 @@ +func f(i int) { + if i < 2 { return } + println("i > 1:", i) +} + +f(1) +println("bye") diff --git a/scanner/scan.go b/scanner/scan.go new file mode 100644 index 0000000..854df6d --- /dev/null +++ b/scanner/scan.go @@ -0,0 +1,265 @@ +package scanner + +import ( + "errors" + "fmt" + "strings" +) + +// Kind is the token type kind. +type Kind uint + +const ( + Undefined Kind = iota + Identifier + Number + Operator + Separator + String + Block + Custom +) + +const ( + CharOp = 1 << iota + CharNum + CharAlpha + CharSep + CharLineSep + CharGroupSep + CharStr + CharBlock + StrEsc + StrNonl +) + +var ErrBlock = errors.New("block not terminated") + +// Token defines a scanner token. +type Token struct { + pos int + kind Kind + content string + start int + end int +} + +func (t *Token) Kind() Kind { return t.kind } +func (t *Token) Content() string { return t.content } +func (t *Token) Start() int { return t.start } +func (t *Token) End() int { return t.end } +func (t *Token) Pos() int { return t.pos } +func (t *Token) Block() string { return t.content[t.start : len(t.content)-t.end] } +func (t *Token) Prefix() string { return t.content[:t.start] } + +func (t *Token) Name() string { + name := t.content + if t.start > 0 { + name = name[:t.start] + ".." + name[len(name)-t.end:] + } + return name +} + +func NewToken(content string, pos int) Token { + return Token{pos, Custom, content, 0, 0} +} + +const ASCIILen = 1 << 7 // 128 + +// Scanner contains the scanner rules for a language. +type Scanner struct { + CharProp [ASCIILen]uint // Special Character properties. + End map[string]string // End delimiters. + DotNum bool // True if a number can start with '.'. + IdAscii bool // True if an identifier can be in ASCII only. + Num_ bool // True if a number can contain _ character. +} + +func (sc *Scanner) HasProp(r rune, p uint) bool { + if r >= ASCIILen { + return false + } + return sc.CharProp[r]&p != 0 +} + +func (sc *Scanner) IsOp(r rune) bool { return sc.HasProp(r, CharOp) } +func (sc *Scanner) IsSep(r rune) bool { return sc.HasProp(r, CharSep) } +func (sc *Scanner) IsLineSep(r rune) bool { return sc.HasProp(r, CharLineSep) } +func (sc *Scanner) IsGroupSep(r rune) bool { return sc.HasProp(r, CharGroupSep) } +func (sc *Scanner) IsStr(r rune) bool { return sc.HasProp(r, CharStr) } +func (sc *Scanner) IsBlock(r rune) bool { return sc.HasProp(r, CharBlock) } +func (sc *Scanner) IsId(r rune) bool { + return !sc.HasProp(r, CharOp|CharSep|CharLineSep|CharGroupSep|CharStr|CharBlock) +} + +func IsNum(r rune) bool { return '0' <= r && r <= '9' } + +func (sc *Scanner) Scan(src string) (tokens []Token, err error) { + offset := 0 + s := src + for len(s) > 0 { + t, err := sc.Next(s) + if err != nil { + //return nil, fmt.Errorf("%s: %w '%s'", loc(src, offset+t.pos), err, t.delim) + return nil, fmt.Errorf("%s: %w", loc(src, offset+t.pos), err) + } + if t.kind == Undefined { + break + } + nr := t.pos + len(t.content) + s = s[nr:] + t.pos += offset + offset += nr + tokens = append(tokens, t) + } + return tokens, nil +} + +func loc(s string, p int) string { + s = s[:p] + l := strings.Count(s, "\n") + li := strings.LastIndex(s, "\n") + if li < 0 { + li = 0 + } + return fmt.Sprintf("%d:%d", 1+l, 1+len(s)-li) +} + +// Next returns the next token in string. +func (sc *Scanner) Next(src string) (tok Token, err error) { + p := 0 + + // Skip initial separators. + for i, r := range src { + p = i + if !sc.IsSep(r) { + break + } + } + src = src[p:] + + // Get token according to its first characters. + for i, r := range src { + switch { + case sc.IsSep(r): + return Token{}, nil + case sc.IsGroupSep(r): + // TODO: handle group separators. + return Token{kind: Separator, pos: p + i, content: string(r)}, nil + case sc.IsLineSep(r): + return Token{kind: Separator, pos: p + i, content: " "}, nil + case sc.IsStr(r): + s, ok := sc.GetStr(src[i:]) + if !ok { + err = ErrBlock + } + return Token{kind: String, pos: p + i, content: s, start: 1, end: 1}, err + case sc.IsBlock(r): + b, ok := sc.GetBlock(src[i:]) + if !ok { + err = ErrBlock + } + return Token{kind: Block, pos: p + i, content: b, start: 1, end: 1}, err + case sc.IsOp(r): + return Token{kind: Operator, pos: p + i, content: sc.GetOp(src[i:])}, nil + case IsNum(r): + return Token{kind: Number, pos: p + i, content: sc.GetNum(src[i:])}, nil + default: + return Token{kind: Identifier, pos: p + i, content: sc.GetId(src[i:])}, nil + } + } + return Token{}, nil +} + +func (sc *Scanner) GetId(src string) (s string) { + for _, r := range src { + if !sc.IsId(r) { + break + } + s += string(r) + } + return s +} + +func (sc *Scanner) GetOp(src string) (s string) { + for _, r := range src { + if !sc.IsOp(r) { + break + } + s += string(r) + } + return s +} + +func (sc *Scanner) GetNum(src string) (s string) { + // TODO: handle hexa, binary, octal, float and eng notations. + for _, r := range src { + if !IsNum(r) { + break + } + s += string(r) + } + return s +} + +func (sc *Scanner) GetGroupSep(src string) (s string) { + for _, r := range src { + return string(r) + } + return s +} + +func (sc *Scanner) GetStr(src string) (s string, ok bool) { + // TODO: handle long delimiters. + var delim rune + var esc, canEscape, nonl bool + for i, r := range src { + s += string(r) + if i == 0 { + delim = r + canEscape = sc.HasProp(r, StrEsc) + nonl = sc.HasProp(r, StrNonl) + continue + } + if r == '\n' && nonl { + return + } + if r == delim && !(esc && canEscape) { + return s, true + } + esc = r == '\\' && !esc + } + return +} + +func (sc *Scanner) GetBlock(src string) (s string, ok bool) { + // TODO: handle long and word delimiters. + var start, end rune + skip := 0 + n := 1 + for i, r := range src { + s += string(r) + if i == 0 { + start = r + end = rune(sc.End[string(r)][0]) // FIXME: not robust. + continue + } + if i < skip { + continue + } else if r == start { + n++ + } else if r == end { + n-- + } else if sc.IsStr(r) { + str, ok := sc.GetStr(src[i:]) + if !ok { + return s, false + } + skip = i + len(str) + } + if n == 0 { + break + } + } + return s, n == 0 +} diff --git a/scanner/scan_test.go b/scanner/scan_test.go new file mode 100644 index 0000000..dd48faf --- /dev/null +++ b/scanner/scan_test.go @@ -0,0 +1,95 @@ +package scanner + +import ( + "fmt" + "testing" +) + +var GoScanner = &Scanner{ + CharProp: [ASCIILen]uint{ + '\t': CharSep, + '\n': CharLineSep, + ' ': CharSep, + '!': CharOp, + '"': CharStr | StrEsc | StrNonl, + '%': CharOp, + '&': CharOp, + '\'': CharStr | StrEsc, + '(': CharBlock, + '*': CharOp, + '+': CharOp, + ',': CharGroupSep, + '-': CharOp, + '`': CharStr, + '.': CharOp, + '/': CharOp, + ':': CharOp, + ';': CharGroupSep, + '<': CharOp, + '=': CharOp, + '>': CharOp, + '[': CharBlock, + '^': CharOp, + '{': CharBlock, + '|': CharOp, + '~': CharOp, + }, + End: map[string]string{ + "(": ")", + "{": "}", + "[": "]", + "/*": "*/", + `"`: `"`, + "'": "'", + "`": "`", + "//": "\n", + }, +} + +func TestScan(t *testing.T) { + tests := []struct{ src, result, errStr string }{ + // Simple tokens: separators, identifiers, numbers, operators. + {"", "[]", ""}, + {" abc + 5", "[{3 1 abc 0 0} {7 3 + 0 0} {9 2 5 0 0}]", ""}, + {"abc0+5 ", "[{0 1 abc0 0 0} {4 3 + 0 0} {5 2 5 0 0}]", ""}, + {"a+5\na=x-4", "[{0 1 a 0 0} {1 3 + 0 0} {2 2 5 0 0} {3 4 0 0} {4 1 a 0 0} {5 3 = 0 0} {6 1 x 0 0} {7 3 - 0 0} {8 2 4 0 0}]", ""}, + + // Strings. + {`return "hello world" + 4`, `[{0 1 return 0 0} {7 5 "hello world" 1 1} {21 3 + 0 0} {23 2 4 0 0}]`, ""}, + {`print(4 * (3+7))`, "[{0 1 print 0 0} {5 6 (4 * (3+7)) 1 1}]", ""}, + {`"foo`, "[]", "1:1: block not terminated"}, + {`abc +def "foo truc`, "[]", "2:6: block not terminated"}, + {`"ab\"`, "[]", "1:1: block not terminated"}, + {`"ab\\"`, `[{0 5 "ab\\" 1 1}]`, ""}, + {`"ab\\\"`, "[]", "1:1: block not terminated"}, + {`"ab\\\\"`, `[{0 5 "ab\\\\" 1 1}]`, ""}, + {`"abc +def"`, "[]", "1:1: block not terminated"}, + {"`hello\nworld`", "[{0 5 `hello\nworld` 1 1}]", ""}, + + // Nested blocks. + // {`f("a)bc")+1, 3)`, "[{0 1 f } {1 6 (\"a)bc\", 3) (}]", ""}, + {"2* (3+4", "[]", "1:4: block not terminated"}, + {`("fo)o")+1`, "[{0 6 (\"fo)o\") 1 1} {8 3 + 0 0} {9 2 1 0 0}]", ""}, + {`"foo""bar"`, "[{0 5 \"foo\" 1 1} {5 5 \"bar\" 1 1}]", ""}, + } + + for _, test := range tests { + t.Run("", func(t *testing.T) { + errStr := "" + token, err := GoScanner.Scan(test.src) + if err != nil { + errStr = err.Error() + } + if errStr != test.errStr { + t.Errorf("got error %#v, want error %#v", errStr, test.errStr) + } + result := fmt.Sprintf("%v", token) + t.Logf("%#v\n%v %v\n", test.src, result, errStr) + if result != test.result { + t.Errorf("got %#v, want %#v", result, test.result) + } + }) + } +} diff --git a/vm0/func.go b/vm0/func.go new file mode 100644 index 0000000..6c95383 --- /dev/null +++ b/vm0/func.go @@ -0,0 +1,73 @@ +package vm0 + +import ( + "reflect" + "strings" + + "github.com/gnolang/parscan/parser" +) + +var types = map[string]reflect.Type{ + "int": reflect.TypeOf(0), + "string": reflect.TypeOf(""), +} + +func (i *Interp) callFunc(n *parser.Node) { + fp := i.fp + l := len(i.stack) + nargs := i.stack[l-1].(int) + args := make([]reflect.Value, nargs) + for j := range args { + args[nargs-j-1] = reflect.ValueOf(i.stack[l-2-j]) + } + f := reflect.ValueOf(i.stack[l-2-nargs]) + out := f.Call(args) + i.fp = fp + i.stack = i.stack[:l-2-nargs] + for _, v := range out { + i.push(v.Interface()) + } +} + +func (i *Interp) declareFunc(r *parser.Node, scope string) { + fname := r.Child[0].Content() + + // Add symbols for input and output function arguments. + inArgs := r.Child[1].Child + fscope := strings.TrimPrefix(scope+"/"+fname+"/", "/") + in := make([]reflect.Type, len(inArgs)) + for j, c := range inArgs { + i.sym[fscope+c.Content()] = j + in[j] = types[c.Child[0].Content()] + } + var out []reflect.Type + if len(r.Child) > 3 { // function has return values + if i.IsBlock(r.Child[2]) { + outArgs := r.Child[2].Child + out = make([]reflect.Type, len(outArgs)) + for j, c := range outArgs { + out[j] = types[c.Content()] + } + } else { + out = []reflect.Type{types[r.Child[2].Content()]} + } + } + funT := reflect.FuncOf(in, out, false) + + // Generate a wrapper function which will run function body AST. + f := reflect.MakeFunc(funT, func(args []reflect.Value) (res []reflect.Value) { + i.fp = len(i.stack) // fp will be restored by caller (callFunc). + for _, arg := range args { + i.push(arg.Interface()) + } + i.Run(r.Child[len(r.Child)-1], fscope) + b := len(i.stack) - len(out) + for j := range out { + res = append(res, reflect.ValueOf(i.stack[b+j])) + } + return res + }) + + // Add a symbol for newly created func. + i.sym[scope+fname] = i.push(f.Interface()) - i.fp +} diff --git a/vm0/vm.go b/vm0/vm.go new file mode 100644 index 0000000..4c726ed --- /dev/null +++ b/vm0/vm.go @@ -0,0 +1,146 @@ +package vm0 + +import ( + "fmt" + "strconv" + "strings" + + "github.com/gnolang/parscan/lang/golang" + "github.com/gnolang/parscan/parser" +) + +type Interp struct { + *parser.Parser + stack []any // stack memory space + fp int // frame pointer: index of current frame in stack + sym map[string]int // symbol table, maps scoped identifiers to offsets relative to fp +} + +func New(p *parser.Parser) (i *Interp) { + i = &Interp{Parser: p, stack: []any{}, sym: map[string]int{}} + i.sym["println"] = i.push(fmt.Println) + return i +} + +func (i *Interp) Eval(src string) (r []any, err error) { + n, err := i.Parse(src) + if err != nil { + return nil, err + } + for _, nod := range n { + r, err = i.Run(nod, "") + if err != nil { + break + } + } + return +} + +// Run implements a stack based virtual machine which directly walks the AST. +func (i *Interp) Run(node *parser.Node, scope string) ([]any, error) { + stop := false + + node.Walk2(nil, 0, func(n, a *parser.Node, k int) (ok bool) { + // Node pre-order processing. + switch n.Kind { + case golang.StmtBloc: + if a != nil && a.Kind == golang.IfStmt { + // Control-flow in 'if' sub-tree + if k == 1 { + // 'if' first body branch, evaluated when condition is true. + if len(a.Child) > 2 { + return i.peek().(bool) // keep condition on stack for else branch + } + return i.pop().(bool) + } + // 'else' body branch, evaluated when condition is false. + return !i.pop().(bool) + } + case golang.FuncDecl: + i.declareFunc(n, scope) + return false + } + return true + }, func(n, a *parser.Node, k int) (ok bool) { + // Node post-order processing. + if stop { + return false + } + l := len(i.stack) + switch n.Kind { + case golang.NumberLit: + num, _ := strconv.Atoi(n.Content()) // TODO(marc): compute num value at scanning. + i.push(num) + case golang.StringLit: + i.push(n.Block()) + case golang.InfOp: + i.stack[l-2] = i.stack[l-2].(int) < i.stack[l-1].(int) + i.stack = i.stack[:l-1] + case golang.AddOp: + i.stack[l-2] = i.stack[l-2].(int) + i.stack[l-1].(int) + i.stack = i.stack[:l-1] + case golang.SubOp: + i.stack[l-2] = i.stack[l-2].(int) - i.stack[l-1].(int) + i.stack = i.stack[:l-1] + case golang.MulOp: + i.stack[l-2] = i.stack[l-2].(int) * i.stack[l-1].(int) + i.stack = i.stack[:l-1] + case golang.AssignOp, golang.DefOp: + i.stack[i.stack[l-2].(int)] = i.stack[l-1] + i.stack = i.stack[:l-2] + case golang.ReturnStmt: + stop = true + return false + case golang.CallExpr: + i.push(len(n.Child[1].Child)) // number of arguments to call + i.callFunc(n) + case golang.Ident: + name := n.Content() + v, sc, ok := i.getSym(name, scope) + fp := i.fp + if sc == "" { + fp = 0 + } + if ok { + if a.Content() == ":=" { + i.push(fp + v) // reference for assign (absolute index) + break + } + i.push(i.stack[fp+v]) // value + break + } + if a.Content() != ":=" { + fmt.Println("error: undefined:", name, "scope:", scope) + } + v = i.push(any(nil)) - i.fp // v is the address of new value, relative to frame pointer + i.sym[scope+name] = v // bind scoped name to address in symbol table + i.push(v) + } + return true + }) + return nil, nil +} + +// getSym searches for an existing symbol starting from the deepest scope. +func (i *Interp) getSym(name, scope string) (index int, sc string, ok bool) { + for { + if index, ok = i.sym[scope+name]; ok { + return index, scope, ok + } + scope = strings.TrimSuffix(scope, "/") + j := strings.LastIndex(scope, "/") + if j == -1 { + scope = "" + break + } + if scope = scope[:j]; scope == "" { + break + } + } + index, ok = i.sym[name] + return index, scope, ok +} + +func (i *Interp) peek() any { return i.stack[len(i.stack)-1] } +func (i *Interp) push(v any) (l int) { l = len(i.stack); i.stack = append(i.stack, v); return } +func (i *Interp) pop() (v any) { l := len(i.stack) - 1; v = i.stack[l]; i.stack = i.stack[:l]; return } diff --git a/vm0/vm_test.go b/vm0/vm_test.go new file mode 100644 index 0000000..0e8896b --- /dev/null +++ b/vm0/vm_test.go @@ -0,0 +1,26 @@ +package vm0 + +import ( + "os" + "testing" + + "github.com/gnolang/parscan/lang/golang" +) + +func TestEval(t *testing.T) { + i := New(golang.GoParser) + t.Logf("%#v\n", i.Parser) + //i.Eval("println(2*5)") + //n, _ := i.Parse("println(2*5)") + //n, _ := i.Parse(`a := 2 + 5`) + src := `a := 2` + nodes, err := i.Parse(src) + if err != nil { + t.Errorf("error %v", err) + } + i.Adot(nodes, os.Getenv("DOT")) + for _, n := range nodes { + v, err := i.Run(n, "") + t.Log(v, err) + } +} |
