diff options
| -rw-r--r-- | interp/interpreter_test.go | 58 | ||||
| -rw-r--r-- | lang/golang/go.go | 239 | ||||
| -rw-r--r-- | lang/spec.go | 28 | ||||
| -rw-r--r-- | lang/token.go | 3 | ||||
| -rw-r--r-- | parser/expr.go | 46 | ||||
| -rw-r--r-- | parser/parse.go | 2 | ||||
| -rw-r--r-- | scanner/scan.go | 16 |
7 files changed, 281 insertions, 111 deletions
diff --git a/interp/interpreter_test.go b/interp/interpreter_test.go index bc353bd..bce97b9 100644 --- a/interp/interpreter_test.go +++ b/interp/interpreter_test.go @@ -48,39 +48,41 @@ func run(t *testing.T, tests []etest) { func TestExpr(t *testing.T) { run(t, []etest{ - {src: "", res: "<invalid reflect.Value>"}, - {src: "1+2", res: "3"}, - {src: "1+", err: "block not terminated"}, - {src: "a := 1 + 2; b := 0; a + 1", res: "4"}, - {src: "1+(2+3)", res: "6"}, - {src: "(1+2)+3", res: "6"}, - {src: "(6+(1+2)+3)+5", res: "17"}, - {src: "(6+(1+2+3)+5", err: "1:1: block not terminated"}, - {src: "a := 2; a = 3; a", res: "3"}, - {src: "2 * 3 + 1 == 7", res: "true"}, - {src: "7 == 2 * 3 + 1", res: "true"}, - {src: "1 + 3 * 2 == 2 * 3 + 1", res: "true"}, - {src: "a := 1 + 3 * 2 == 2 * 3 + 1; a", res: "true"}, - {src: "-2", res: "-2"}, - {src: "-2 + 5", res: "3"}, - {src: "5 + -2", res: "3"}, - {src: "!false", res: "true"}, - {src: `a := "hello"`, res: "hello"}, + {src: "", res: "<invalid reflect.Value>"}, // #00 + {src: "1+2", res: "3"}, // #01 + {src: "1+", err: "block not terminated"}, // #02 + {src: "a := 1 + 2; b := 0; a + 1", res: "4"}, // #03 + {src: "1+(2+3)", res: "6"}, // #04 + {src: "(1+2)+3", res: "6"}, // #05 + {src: "(6+(1+2)+3)+5", res: "17"}, // #06 + {src: "(6+(1+2+3)+5", err: "1:1: block not terminated"}, // #07 + {src: "a := 2; a = 3; a", res: "3"}, // #08 + {src: "2 * 3 + 1 == 7", res: "true"}, // #09 + {src: "7 == 2 * 3 + 1", res: "true"}, // #10 + {src: "1 + 3 * 2 == 2 * 3 + 1", res: "true"}, // #11 + {src: "a := 1 + 3 * 2 == 2 * 3 + 1; a", res: "true"}, // #12 + {src: "-2", res: "-2"}, // #13 + {src: "-2 + 5", res: "3"}, // #14 + {src: "5 + -2", res: "3"}, // #15 + {src: "!false", res: "true"}, // #16 + {src: `a := "hello"`, res: "hello"}, // #17 }) } func TestLogical(t *testing.T) { run(t, []etest{ - {src: "true && false", res: "false"}, - {src: "true && true", res: "true"}, - {src: "true && true && false", res: "false"}, - {src: "false || true && true", res: "true"}, - {src: "2 < 3 && 1 > 2 || 3 == 3", res: "true"}, - {src: "2 > 3 && 1 > 2 || 3 == 3", res: "true"}, - {src: "2 > 3 || 2 == 1+1 && 3>0", res: "true"}, - {src: "2 > 3 || 2 == 1+1 && 3>4 || 1<2", res: "true"}, - {src: "a := 1+1 < 3 && 4 == 2+2; a", res: "true"}, - {src: "a := 1+1 < 3 || 3 == 2+2; a", res: "true"}, + {src: "true", res: "true"}, // #00 + {src: "false", res: "false"}, // #01 + {src: "true && false", res: "false"}, // #02 + {src: "true && true", res: "true"}, // #03 + {src: "true && true && false", res: "false"}, // #04 + {src: "false || true && true", res: "true"}, // #05 + {src: "2 < 3 && 1 > 2 || 3 == 3", res: "true"}, // #06 + {src: "2 > 3 && 1 > 2 || 3 == 3", res: "true"}, // #07 + {src: "2 > 3 || 2 == 1+1 && 3>0", res: "true"}, // #08 + {src: "2 > 3 || 2 == 1+1 && 3>4 || 1<2", res: "true"}, // #09 + {src: "a := 1+1 < 3 && 4 == 2+2; a", res: "true"}, // #10 + {src: "a := 1+1 < 3 || 3 == 2+2; a", res: "true"}, // #11 }) } diff --git a/lang/golang/go.go b/lang/golang/go.go index 47baee2..b1213e5 100644 --- a/lang/golang/go.go +++ b/lang/golang/go.go @@ -53,75 +53,180 @@ var GoSpec = &lang.Spec{ "/*": lang.CharStr, "//": lang.CharStr | lang.ExcludeEnd | lang.EosValidEnd, }, - TokenProps: map[string]lang.TokenProp{ - // Block tokens (can be nested) - "{..}": {Token: lang.BraceBlock}, - "[..]": {Token: lang.BracketBlock}, - "(..)": {Token: lang.ParenBlock}, + Tokens: map[string]lang.Token{ + "{..}": lang.BraceBlock, + "[..]": lang.BracketBlock, + "(..)": lang.ParenBlock, + "//..": lang.Comment, + "/*..": lang.Comment, + `".."`: lang.String, + "`..`": lang.String, + ",": lang.Comma, + ";": lang.Semicolon, + ".": lang.Period, + ":": lang.Colon, + "&": lang.And, + "*": lang.Mul, + "/": lang.Quo, + "%": lang.Rem, + "<<": lang.Shl, + ">>": lang.Shr, + "+": lang.Add, + "-": lang.Sub, + "=": lang.Assign, + "+=": lang.AddAssign, + "<": lang.Less, + ">": lang.Greater, + "^": lang.Xor, + "~": lang.Tilde, + "&&": lang.Land, + "||": lang.Lor, + ":=": lang.Define, + "==": lang.Equal, + "<=": lang.LessEqual, + ">=": lang.GreaterEqual, + "->": lang.Arrow, + "!": lang.Not, + "++": lang.Inc, + "--": lang.Dec, + "break": lang.Break, + "case": lang.Case, + "chan": lang.Chan, + "const": lang.Const, + "continue": lang.Continue, + "default": lang.Default, + "defer": lang.Defer, + "else": lang.Else, + "fallthrough": lang.Fallthrough, + "for": lang.For, + "func": lang.Func, + "go": lang.Go, + "goto": lang.Goto, + "if": lang.If, + "import": lang.Import, + "interface": lang.Interface, + "map": lang.Map, + "package": lang.Package, + "range": lang.Range, + "return": lang.Return, + "select": lang.Select, + "struct": lang.Struct, + "switch": lang.Switch, + "type": lang.Type, + "var": lang.Var, + }, + TokenProps: []lang.TokenProp{ + lang.And: {Precedence: 5}, + lang.Mul: {Precedence: 5}, + lang.Quo: {Precedence: 5}, + lang.Rem: {Precedence: 5}, + lang.Shl: {Precedence: 5}, + lang.Shr: {Precedence: 5}, + lang.Add: {Precedence: 4}, + lang.Sub: {Precedence: 4}, + lang.Xor: {Precedence: 4}, + lang.Or: {Precedence: 4}, + lang.LessEqual: {Precedence: 3}, + lang.GreaterEqual: {Precedence: 3}, + lang.Less: {Precedence: 3}, + lang.Greater: {Precedence: 3}, + lang.Land: {Precedence: 2}, + lang.Lor: {Precedence: 1}, + lang.Inc: {SkipSemi: true}, + lang.Dec: {SkipSemi: true}, + lang.Case: {SkipSemi: true}, + lang.Chan: {SkipSemi: true}, + lang.Const: {SkipSemi: true}, + lang.Default: {SkipSemi: true}, + lang.Defer: {SkipSemi: true}, + lang.Else: {SkipSemi: true}, + lang.For: {SkipSemi: true}, + lang.Func: {SkipSemi: true}, + lang.Go: {SkipSemi: true}, + lang.Goto: {SkipSemi: true}, + lang.If: {SkipSemi: true}, + lang.Import: {SkipSemi: true}, + lang.Interface: {SkipSemi: true}, + lang.Map: {SkipSemi: true}, + lang.Package: {SkipSemi: true}, + lang.Range: {SkipSemi: true}, + lang.Select: {SkipSemi: true}, + lang.Struct: {SkipSemi: true}, + lang.Switch: {SkipSemi: true}, + lang.Type: {SkipSemi: true}, + lang.Var: {SkipSemi: true}, + }, + /* + TokenProps: map[string]lang.TokenProp{ + // Block tokens (can be nested) + "{..}": {Token: lang.BraceBlock}, + "[..]": {Token: lang.BracketBlock}, + "(..)": {Token: lang.ParenBlock}, - // String tokens (not nested) - "//..": {Token: lang.Comment}, - "/*..": {Token: lang.Comment}, - `".."`: {Token: lang.String}, - "`..`": {Token: lang.String}, + // String tokens (not nested) + "//..": {Token: lang.Comment}, + "/*..": {Token: lang.Comment}, + `".."`: {Token: lang.String}, + "`..`": {Token: lang.String}, - // Separators - ",": {Token: lang.Comma}, - ";": {Token: lang.Semicolon}, - ".": {Token: lang.Period}, - ":": {Token: lang.Colon}, + // Separators + ",": {Token: lang.Comma}, + ";": {Token: lang.Semicolon}, + ".": {Token: lang.Period}, + ":": {Token: lang.Colon}, - // Operators - "&": {Token: lang.And, Precedence: 1}, - "*": {Token: lang.Mul, Precedence: 1}, - "/": {Token: lang.Quo, Precedence: 1}, - "%": {Token: lang.Rem, Precedence: 1}, - "<<": {Token: lang.Shl, Precedence: 1}, - ">>": {Token: lang.Shr, Precedence: 1}, - "+": {Token: lang.Add, Precedence: 2}, - "-": {Token: lang.Sub, Precedence: 2}, - "=": {Token: lang.Assign, Precedence: 6}, - "+=": {Token: lang.AddAssign, Precedence: 6}, - "<": {Token: lang.Less, Precedence: 3}, - ">": {Token: lang.Greater, Precedence: 3}, - "^": {Token: lang.Xor, Precedence: 2}, - "~": {Token: lang.Tilde}, - "&&": {Token: lang.Land, Precedence: 4}, - "||": {Token: lang.Lor, Precedence: 5}, - ":=": {Token: lang.Define, Precedence: 6}, - "==": {Token: lang.Equal, Precedence: 3}, - "<=": {Token: lang.LessEqual, Precedence: 3}, - ">=": {Token: lang.GreaterEqual, Precedence: 3}, - "->": {Token: lang.Arrow}, - "!": {Token: lang.Not}, - "++": {Token: lang.Inc, SkipSemi: true}, - "--": {Token: lang.Dec, SkipSemi: true}, + // Operators + "&": {Token: lang.And, Precedence: 1}, + "*": {Token: lang.Mul, Precedence: 3}, + "/": {Token: lang.Quo, Precedence: 1}, + "%": {Token: lang.Rem, Precedence: 1}, + "<<": {Token: lang.Shl, Precedence: 1}, + ">>": {Token: lang.Shr, Precedence: 1}, + "+": {Token: lang.Add, Precedence: 2}, + "-": {Token: lang.Sub, Precedence: 2}, + "=": {Token: lang.Assign, Precedence: 1}, + "+=": {Token: lang.AddAssign, Precedence: 1}, + "<": {Token: lang.Less, Precedence: 1}, + ">": {Token: lang.Greater, Precedence: 1}, + "^": {Token: lang.Xor, Precedence: 2}, + "~": {Token: lang.Tilde}, + "&&": {Token: lang.Land, Precedence: 4}, + "||": {Token: lang.Lor, Precedence: 5}, + ":=": {Token: lang.Define, Precedence: 1}, + "==": {Token: lang.Equal, Precedence: 1}, + "<=": {Token: lang.LessEqual, Precedence: 1}, + ">=": {Token: lang.GreaterEqual, Precedence: 1}, + "->": {Token: lang.Arrow}, + "!": {Token: lang.Not}, + "++": {Token: lang.Inc, SkipSemi: true}, + "--": {Token: lang.Dec, SkipSemi: true}, - // Reserved keywords - "break": {Token: lang.Break}, - "case": {Token: lang.Case, SkipSemi: true}, - "chan": {Token: lang.Chan, SkipSemi: true}, - "const": {Token: lang.Const, SkipSemi: true}, - "continue": {Token: lang.Continue}, - "default": {Token: lang.Case, SkipSemi: true}, - "defer": {Token: lang.Defer, SkipSemi: true}, - "else": {Token: lang.Else, SkipSemi: true}, - "fallthrough": {Token: lang.Fallthrough}, - "for": {Token: lang.For, SkipSemi: true}, - "func": {Token: lang.Func, SkipSemi: true}, - "go": {Token: lang.Go, SkipSemi: true}, - "goto": {Token: lang.Goto, SkipSemi: true}, - "if": {Token: lang.If, SkipSemi: true}, - "import": {Token: lang.Import, SkipSemi: true}, - "interface": {Token: lang.Interface, SkipSemi: true}, - "map": {Token: lang.Map, SkipSemi: true}, - "package": {Token: lang.Package, SkipSemi: true}, - "range": {Token: lang.Range, SkipSemi: true}, - "return": {Token: lang.Return}, - "select": {Token: lang.Select, SkipSemi: true}, - "struct": {Token: lang.Struct, SkipSemi: true}, - "switch": {Token: lang.Switch, SkipSemi: true}, - "type": {Token: lang.Type, SkipSemi: true}, - "var": {Token: lang.Var, SkipSemi: true}, - }, + // Reserved keywords + "break": {Token: lang.Break}, + "case": {Token: lang.Case, SkipSemi: true}, + "chan": {Token: lang.Chan, SkipSemi: true}, + "const": {Token: lang.Const, SkipSemi: true}, + "continue": {Token: lang.Continue}, + "default": {Token: lang.Case, SkipSemi: true}, + "defer": {Token: lang.Defer, SkipSemi: true}, + "else": {Token: lang.Else, SkipSemi: true}, + "fallthrough": {Token: lang.Fallthrough}, + "for": {Token: lang.For, SkipSemi: true}, + "func": {Token: lang.Func, SkipSemi: true}, + "go": {Token: lang.Go, SkipSemi: true}, + "goto": {Token: lang.Goto, SkipSemi: true}, + "if": {Token: lang.If, SkipSemi: true}, + "import": {Token: lang.Import, SkipSemi: true}, + "interface": {Token: lang.Interface, SkipSemi: true}, + "map": {Token: lang.Map, SkipSemi: true}, + "package": {Token: lang.Package, SkipSemi: true}, + "range": {Token: lang.Range, SkipSemi: true}, + "return": {Token: lang.Return}, + "select": {Token: lang.Select, SkipSemi: true}, + "struct": {Token: lang.Struct, SkipSemi: true}, + "switch": {Token: lang.Switch, SkipSemi: true}, + "type": {Token: lang.Type, SkipSemi: true}, + "var": {Token: lang.Var, SkipSemi: true}, + }, + */ } diff --git a/lang/spec.go b/lang/spec.go index b1b2580..37017e7 100644 --- a/lang/spec.go +++ b/lang/spec.go @@ -21,22 +21,36 @@ const ( // ASCIILen is the length of the ASCII characters set. const ASCIILen = 1 << 7 // 128 +// Associativity represent the associativity rule of an operator. +type Associativity int + +// Associativity kinds for operators. +const ( + Aboth Associativity = iota // both left and right associative + Aleft // left associative only + Aright // right associative only + Anon // non associative +) + // TokenProp represent token properties for parsing. type TokenProp struct { Token SkipSemi bool // automatic semicolon insertion after newline Precedence int // operator precedence + Associativity } // Spec represents the language specification for scanning. type Spec struct { - CharProp [ASCIILen]uint // special Character properties - End map[string]string // end delimiters, indexed by start - BlockProp map[string]uint // block properties - TokenProps map[string]TokenProp // token properties - DotNum bool // true if a number can start with '.' - IdentASCII bool // true if an identifier can be in ASCII only - NumUnder bool // true if a number can contain _ character + CharProp [ASCIILen]uint // special Character properties + End map[string]string // end delimiters, indexed by start + BlockProp map[string]uint // block properties + Tokens map[string]Token // token per string + TokenProps []TokenProp // token properties, indexed by token + DotNum bool // true if a number can start with '.' + IdentASCII bool // true if an identifier can be in ASCII only + NumUnder bool // true if a number can contain _ character + // TokenProps map[string]TokenProp // token properties } // HasInit stores if a statement may contain a simple init statement. diff --git a/lang/token.go b/lang/token.go index 41c8439..4f5da35 100644 --- a/lang/token.go +++ b/lang/token.go @@ -120,6 +120,9 @@ const ( JumpSetTrue Label New + + // This must be the last token value. + MaxTok ) // UnaryOp contains the set of unary operators. diff --git a/parser/expr.go b/parser/expr.go index 5958279..d3161a5 100644 --- a/parser/expr.go +++ b/parser/expr.go @@ -11,7 +11,53 @@ import ( "github.com/mvertes/parscan/vm" ) +// parseExpr transform an infix expression into a postfix notation func (p *Parser) parseExpr(in Tokens) (out Tokens, err error) { + log.Println("parseExpr2 in:", in) + var ops Tokens + + popop := func() (t scanner.Token) { l := len(ops) - 1; t = ops[l]; ops = ops[:l]; return t } + + for i, t := range in { + switch t.Tok { + case lang.Int, lang.String: + out = append(out, t) + + case lang.Add, lang.Assign, lang.Define, lang.Equal, lang.Greater, lang.Less, lang.Mul: + // Apply operator precedence rule. + for len(ops) > 0 && p.precedence(t) < p.precedence(ops[len(ops)-1]) { + out = append(out, popop()) + } + ops = append(ops, t) + + case lang.Ident: + _, sc, ok := p.Symbols.Get(t.Str, p.scope) + if ok { + // t.Str = sc + "/" + t.Str + _ = sc + } + out = append(out, t) + if i+1 < len(in) && in[i+1].Tok == lang.Define { + log.Println("t:", t.Str) + p.Symbols.Add(symbol.UnsetAddr, t.Str, vm.Value{}, symbol.Var, nil, false) + } + + case lang.ParenBlock: + toks, err := p.parseExprStr(t.Block()) + if err != nil { + return out, err + } + out = append(out, toks...) + } + } + for len(ops) > 0 { + out = append(out, popop()) + } + log.Println("Final out:", out) + return out, err +} + +func (p *Parser) parseExpr2(in Tokens) (out Tokens, err error) { log.Println("parseExpr in:", in) var ops, selectors Tokens var vl int diff --git a/parser/parse.go b/parser/parse.go index e7d5399..75bc963 100644 --- a/parser/parse.go +++ b/parser/parse.go @@ -519,5 +519,5 @@ func (p *Parser) popScope() { } func (p *Parser) precedence(t scanner.Token) int { - return p.TokenProps[t.Str].Precedence + return p.TokenProps[t.Tok].Precedence } diff --git a/scanner/scan.go b/scanner/scan.go index edd8b1c..33c198c 100644 --- a/scanner/scan.go +++ b/scanner/scan.go @@ -119,8 +119,8 @@ func (sc *Scanner) Scan(src string, semiEOF bool) (tokens []Token, err error) { if len(tokens) > 0 && t.Str == "\n" { // Check for automatic semi-colon insertion after newline. last := tokens[len(tokens)-1] - if last.Tok.IsKeyword() && sc.TokenProps[last.Str].SkipSemi || - last.Tok.IsOperator() && !sc.TokenProps[last.Str].SkipSemi { + if last.Tok.IsKeyword() && sc.TokenProps[last.Tok].SkipSemi || + last.Tok.IsOperator() && !sc.TokenProps[last.Tok].SkipSemi { skip = true } else { t.Tok = lang.Semicolon @@ -142,8 +142,8 @@ func (sc *Scanner) Scan(src string, semiEOF bool) (tokens []Token, err error) { if last.Str == ";" { return tokens, nil } - if last.Tok == lang.Ident && sc.TokenProps[last.Str].SkipSemi || - last.Tok.IsOperator() && !sc.TokenProps[last.Str].SkipSemi { + if last.Tok == lang.Ident && sc.TokenProps[last.Tok].SkipSemi || + last.Tok.IsOperator() && !sc.TokenProps[last.Tok].SkipSemi { return tokens, nil } tokens = append(tokens, Token{Tok: lang.Semicolon, Str: ";"}) @@ -179,7 +179,7 @@ func (sc *Scanner) Next(src string) (tok Token, err error) { return Token{}, nil case sc.isGroupSep(r): // TODO: handle group separators. - return Token{Tok: sc.TokenProps[string(r)].Token, Pos: p + i, Str: string(r)}, nil + return Token{Tok: sc.Tokens[string(r)], Pos: p + i, Str: string(r)}, nil case sc.isLineSep(r): return Token{Pos: p + i, Str: "\n"}, nil case sc.isStr(r): @@ -194,12 +194,12 @@ func (sc *Scanner) Next(src string) (tok Token, err error) { err = ErrBlock } tok := Token{Pos: p + i, Str: b, Beg: 1, End: 1} - tok.Tok = sc.TokenProps[tok.Name()].Token + tok.Tok = sc.Tokens[tok.Name()] return tok, err case sc.isOp(r): op, isOp := sc.getOp(src[i:]) if isOp { - t := sc.TokenProps[op].Token + t := sc.Tokens[op] if t == lang.Illegal { err = fmt.Errorf("%w: %s", ErrIllegal, op) } @@ -218,7 +218,7 @@ func (sc *Scanner) Next(src string) (tok Token, err error) { default: t, isDefined := sc.getToken(src[i:]) if isDefined { - ident := sc.TokenProps[t].Token + ident := sc.Tokens[t] if ident == lang.Illegal { ident = lang.Ident } |
