summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc Vertes <mvertes@free.fr>2025-12-19 18:00:34 +0100
committerMarc Vertes <mvertes@free.fr>2025-12-19 18:00:34 +0100
commit3cf8207c708f23d1bd8400de5483b6b8eadb01ca (patch)
tree7b10d04e1075490272e99c3e9bac6118406bc4ab
parentf07fc0178831432b68f1b9bd6c96b257aa2e9abe (diff)
fix lang: attribute properties to tokens, not strings
-rw-r--r--interp/interpreter_test.go58
-rw-r--r--lang/golang/go.go239
-rw-r--r--lang/spec.go28
-rw-r--r--lang/token.go3
-rw-r--r--parser/expr.go46
-rw-r--r--parser/parse.go2
-rw-r--r--scanner/scan.go16
7 files changed, 281 insertions, 111 deletions
diff --git a/interp/interpreter_test.go b/interp/interpreter_test.go
index bc353bd..bce97b9 100644
--- a/interp/interpreter_test.go
+++ b/interp/interpreter_test.go
@@ -48,39 +48,41 @@ func run(t *testing.T, tests []etest) {
func TestExpr(t *testing.T) {
run(t, []etest{
- {src: "", res: "<invalid reflect.Value>"},
- {src: "1+2", res: "3"},
- {src: "1+", err: "block not terminated"},
- {src: "a := 1 + 2; b := 0; a + 1", res: "4"},
- {src: "1+(2+3)", res: "6"},
- {src: "(1+2)+3", res: "6"},
- {src: "(6+(1+2)+3)+5", res: "17"},
- {src: "(6+(1+2+3)+5", err: "1:1: block not terminated"},
- {src: "a := 2; a = 3; a", res: "3"},
- {src: "2 * 3 + 1 == 7", res: "true"},
- {src: "7 == 2 * 3 + 1", res: "true"},
- {src: "1 + 3 * 2 == 2 * 3 + 1", res: "true"},
- {src: "a := 1 + 3 * 2 == 2 * 3 + 1; a", res: "true"},
- {src: "-2", res: "-2"},
- {src: "-2 + 5", res: "3"},
- {src: "5 + -2", res: "3"},
- {src: "!false", res: "true"},
- {src: `a := "hello"`, res: "hello"},
+ {src: "", res: "<invalid reflect.Value>"}, // #00
+ {src: "1+2", res: "3"}, // #01
+ {src: "1+", err: "block not terminated"}, // #02
+ {src: "a := 1 + 2; b := 0; a + 1", res: "4"}, // #03
+ {src: "1+(2+3)", res: "6"}, // #04
+ {src: "(1+2)+3", res: "6"}, // #05
+ {src: "(6+(1+2)+3)+5", res: "17"}, // #06
+ {src: "(6+(1+2+3)+5", err: "1:1: block not terminated"}, // #07
+ {src: "a := 2; a = 3; a", res: "3"}, // #08
+ {src: "2 * 3 + 1 == 7", res: "true"}, // #09
+ {src: "7 == 2 * 3 + 1", res: "true"}, // #10
+ {src: "1 + 3 * 2 == 2 * 3 + 1", res: "true"}, // #11
+ {src: "a := 1 + 3 * 2 == 2 * 3 + 1; a", res: "true"}, // #12
+ {src: "-2", res: "-2"}, // #13
+ {src: "-2 + 5", res: "3"}, // #14
+ {src: "5 + -2", res: "3"}, // #15
+ {src: "!false", res: "true"}, // #16
+ {src: `a := "hello"`, res: "hello"}, // #17
})
}
func TestLogical(t *testing.T) {
run(t, []etest{
- {src: "true && false", res: "false"},
- {src: "true && true", res: "true"},
- {src: "true && true && false", res: "false"},
- {src: "false || true && true", res: "true"},
- {src: "2 < 3 && 1 > 2 || 3 == 3", res: "true"},
- {src: "2 > 3 && 1 > 2 || 3 == 3", res: "true"},
- {src: "2 > 3 || 2 == 1+1 && 3>0", res: "true"},
- {src: "2 > 3 || 2 == 1+1 && 3>4 || 1<2", res: "true"},
- {src: "a := 1+1 < 3 && 4 == 2+2; a", res: "true"},
- {src: "a := 1+1 < 3 || 3 == 2+2; a", res: "true"},
+ {src: "true", res: "true"}, // #00
+ {src: "false", res: "false"}, // #01
+ {src: "true && false", res: "false"}, // #02
+ {src: "true && true", res: "true"}, // #03
+ {src: "true && true && false", res: "false"}, // #04
+ {src: "false || true && true", res: "true"}, // #05
+ {src: "2 < 3 && 1 > 2 || 3 == 3", res: "true"}, // #06
+ {src: "2 > 3 && 1 > 2 || 3 == 3", res: "true"}, // #07
+ {src: "2 > 3 || 2 == 1+1 && 3>0", res: "true"}, // #08
+ {src: "2 > 3 || 2 == 1+1 && 3>4 || 1<2", res: "true"}, // #09
+ {src: "a := 1+1 < 3 && 4 == 2+2; a", res: "true"}, // #10
+ {src: "a := 1+1 < 3 || 3 == 2+2; a", res: "true"}, // #11
})
}
diff --git a/lang/golang/go.go b/lang/golang/go.go
index 47baee2..b1213e5 100644
--- a/lang/golang/go.go
+++ b/lang/golang/go.go
@@ -53,75 +53,180 @@ var GoSpec = &lang.Spec{
"/*": lang.CharStr,
"//": lang.CharStr | lang.ExcludeEnd | lang.EosValidEnd,
},
- TokenProps: map[string]lang.TokenProp{
- // Block tokens (can be nested)
- "{..}": {Token: lang.BraceBlock},
- "[..]": {Token: lang.BracketBlock},
- "(..)": {Token: lang.ParenBlock},
+ Tokens: map[string]lang.Token{
+ "{..}": lang.BraceBlock,
+ "[..]": lang.BracketBlock,
+ "(..)": lang.ParenBlock,
+ "//..": lang.Comment,
+ "/*..": lang.Comment,
+ `".."`: lang.String,
+ "`..`": lang.String,
+ ",": lang.Comma,
+ ";": lang.Semicolon,
+ ".": lang.Period,
+ ":": lang.Colon,
+ "&": lang.And,
+ "*": lang.Mul,
+ "/": lang.Quo,
+ "%": lang.Rem,
+ "<<": lang.Shl,
+ ">>": lang.Shr,
+ "+": lang.Add,
+ "-": lang.Sub,
+ "=": lang.Assign,
+ "+=": lang.AddAssign,
+ "<": lang.Less,
+ ">": lang.Greater,
+ "^": lang.Xor,
+ "~": lang.Tilde,
+ "&&": lang.Land,
+ "||": lang.Lor,
+ ":=": lang.Define,
+ "==": lang.Equal,
+ "<=": lang.LessEqual,
+ ">=": lang.GreaterEqual,
+ "->": lang.Arrow,
+ "!": lang.Not,
+ "++": lang.Inc,
+ "--": lang.Dec,
+ "break": lang.Break,
+ "case": lang.Case,
+ "chan": lang.Chan,
+ "const": lang.Const,
+ "continue": lang.Continue,
+ "default": lang.Default,
+ "defer": lang.Defer,
+ "else": lang.Else,
+ "fallthrough": lang.Fallthrough,
+ "for": lang.For,
+ "func": lang.Func,
+ "go": lang.Go,
+ "goto": lang.Goto,
+ "if": lang.If,
+ "import": lang.Import,
+ "interface": lang.Interface,
+ "map": lang.Map,
+ "package": lang.Package,
+ "range": lang.Range,
+ "return": lang.Return,
+ "select": lang.Select,
+ "struct": lang.Struct,
+ "switch": lang.Switch,
+ "type": lang.Type,
+ "var": lang.Var,
+ },
+ TokenProps: []lang.TokenProp{
+ lang.And: {Precedence: 5},
+ lang.Mul: {Precedence: 5},
+ lang.Quo: {Precedence: 5},
+ lang.Rem: {Precedence: 5},
+ lang.Shl: {Precedence: 5},
+ lang.Shr: {Precedence: 5},
+ lang.Add: {Precedence: 4},
+ lang.Sub: {Precedence: 4},
+ lang.Xor: {Precedence: 4},
+ lang.Or: {Precedence: 4},
+ lang.LessEqual: {Precedence: 3},
+ lang.GreaterEqual: {Precedence: 3},
+ lang.Less: {Precedence: 3},
+ lang.Greater: {Precedence: 3},
+ lang.Land: {Precedence: 2},
+ lang.Lor: {Precedence: 1},
+ lang.Inc: {SkipSemi: true},
+ lang.Dec: {SkipSemi: true},
+ lang.Case: {SkipSemi: true},
+ lang.Chan: {SkipSemi: true},
+ lang.Const: {SkipSemi: true},
+ lang.Default: {SkipSemi: true},
+ lang.Defer: {SkipSemi: true},
+ lang.Else: {SkipSemi: true},
+ lang.For: {SkipSemi: true},
+ lang.Func: {SkipSemi: true},
+ lang.Go: {SkipSemi: true},
+ lang.Goto: {SkipSemi: true},
+ lang.If: {SkipSemi: true},
+ lang.Import: {SkipSemi: true},
+ lang.Interface: {SkipSemi: true},
+ lang.Map: {SkipSemi: true},
+ lang.Package: {SkipSemi: true},
+ lang.Range: {SkipSemi: true},
+ lang.Select: {SkipSemi: true},
+ lang.Struct: {SkipSemi: true},
+ lang.Switch: {SkipSemi: true},
+ lang.Type: {SkipSemi: true},
+ lang.Var: {SkipSemi: true},
+ },
+ /*
+ TokenProps: map[string]lang.TokenProp{
+ // Block tokens (can be nested)
+ "{..}": {Token: lang.BraceBlock},
+ "[..]": {Token: lang.BracketBlock},
+ "(..)": {Token: lang.ParenBlock},
- // String tokens (not nested)
- "//..": {Token: lang.Comment},
- "/*..": {Token: lang.Comment},
- `".."`: {Token: lang.String},
- "`..`": {Token: lang.String},
+ // String tokens (not nested)
+ "//..": {Token: lang.Comment},
+ "/*..": {Token: lang.Comment},
+ `".."`: {Token: lang.String},
+ "`..`": {Token: lang.String},
- // Separators
- ",": {Token: lang.Comma},
- ";": {Token: lang.Semicolon},
- ".": {Token: lang.Period},
- ":": {Token: lang.Colon},
+ // Separators
+ ",": {Token: lang.Comma},
+ ";": {Token: lang.Semicolon},
+ ".": {Token: lang.Period},
+ ":": {Token: lang.Colon},
- // Operators
- "&": {Token: lang.And, Precedence: 1},
- "*": {Token: lang.Mul, Precedence: 1},
- "/": {Token: lang.Quo, Precedence: 1},
- "%": {Token: lang.Rem, Precedence: 1},
- "<<": {Token: lang.Shl, Precedence: 1},
- ">>": {Token: lang.Shr, Precedence: 1},
- "+": {Token: lang.Add, Precedence: 2},
- "-": {Token: lang.Sub, Precedence: 2},
- "=": {Token: lang.Assign, Precedence: 6},
- "+=": {Token: lang.AddAssign, Precedence: 6},
- "<": {Token: lang.Less, Precedence: 3},
- ">": {Token: lang.Greater, Precedence: 3},
- "^": {Token: lang.Xor, Precedence: 2},
- "~": {Token: lang.Tilde},
- "&&": {Token: lang.Land, Precedence: 4},
- "||": {Token: lang.Lor, Precedence: 5},
- ":=": {Token: lang.Define, Precedence: 6},
- "==": {Token: lang.Equal, Precedence: 3},
- "<=": {Token: lang.LessEqual, Precedence: 3},
- ">=": {Token: lang.GreaterEqual, Precedence: 3},
- "->": {Token: lang.Arrow},
- "!": {Token: lang.Not},
- "++": {Token: lang.Inc, SkipSemi: true},
- "--": {Token: lang.Dec, SkipSemi: true},
+ // Operators
+ "&": {Token: lang.And, Precedence: 1},
+ "*": {Token: lang.Mul, Precedence: 3},
+ "/": {Token: lang.Quo, Precedence: 1},
+ "%": {Token: lang.Rem, Precedence: 1},
+ "<<": {Token: lang.Shl, Precedence: 1},
+ ">>": {Token: lang.Shr, Precedence: 1},
+ "+": {Token: lang.Add, Precedence: 2},
+ "-": {Token: lang.Sub, Precedence: 2},
+ "=": {Token: lang.Assign, Precedence: 1},
+ "+=": {Token: lang.AddAssign, Precedence: 1},
+ "<": {Token: lang.Less, Precedence: 1},
+ ">": {Token: lang.Greater, Precedence: 1},
+ "^": {Token: lang.Xor, Precedence: 2},
+ "~": {Token: lang.Tilde},
+ "&&": {Token: lang.Land, Precedence: 4},
+ "||": {Token: lang.Lor, Precedence: 5},
+ ":=": {Token: lang.Define, Precedence: 1},
+ "==": {Token: lang.Equal, Precedence: 1},
+ "<=": {Token: lang.LessEqual, Precedence: 1},
+ ">=": {Token: lang.GreaterEqual, Precedence: 1},
+ "->": {Token: lang.Arrow},
+ "!": {Token: lang.Not},
+ "++": {Token: lang.Inc, SkipSemi: true},
+ "--": {Token: lang.Dec, SkipSemi: true},
- // Reserved keywords
- "break": {Token: lang.Break},
- "case": {Token: lang.Case, SkipSemi: true},
- "chan": {Token: lang.Chan, SkipSemi: true},
- "const": {Token: lang.Const, SkipSemi: true},
- "continue": {Token: lang.Continue},
- "default": {Token: lang.Case, SkipSemi: true},
- "defer": {Token: lang.Defer, SkipSemi: true},
- "else": {Token: lang.Else, SkipSemi: true},
- "fallthrough": {Token: lang.Fallthrough},
- "for": {Token: lang.For, SkipSemi: true},
- "func": {Token: lang.Func, SkipSemi: true},
- "go": {Token: lang.Go, SkipSemi: true},
- "goto": {Token: lang.Goto, SkipSemi: true},
- "if": {Token: lang.If, SkipSemi: true},
- "import": {Token: lang.Import, SkipSemi: true},
- "interface": {Token: lang.Interface, SkipSemi: true},
- "map": {Token: lang.Map, SkipSemi: true},
- "package": {Token: lang.Package, SkipSemi: true},
- "range": {Token: lang.Range, SkipSemi: true},
- "return": {Token: lang.Return},
- "select": {Token: lang.Select, SkipSemi: true},
- "struct": {Token: lang.Struct, SkipSemi: true},
- "switch": {Token: lang.Switch, SkipSemi: true},
- "type": {Token: lang.Type, SkipSemi: true},
- "var": {Token: lang.Var, SkipSemi: true},
- },
+ // Reserved keywords
+ "break": {Token: lang.Break},
+ "case": {Token: lang.Case, SkipSemi: true},
+ "chan": {Token: lang.Chan, SkipSemi: true},
+ "const": {Token: lang.Const, SkipSemi: true},
+ "continue": {Token: lang.Continue},
+ "default": {Token: lang.Case, SkipSemi: true},
+ "defer": {Token: lang.Defer, SkipSemi: true},
+ "else": {Token: lang.Else, SkipSemi: true},
+ "fallthrough": {Token: lang.Fallthrough},
+ "for": {Token: lang.For, SkipSemi: true},
+ "func": {Token: lang.Func, SkipSemi: true},
+ "go": {Token: lang.Go, SkipSemi: true},
+ "goto": {Token: lang.Goto, SkipSemi: true},
+ "if": {Token: lang.If, SkipSemi: true},
+ "import": {Token: lang.Import, SkipSemi: true},
+ "interface": {Token: lang.Interface, SkipSemi: true},
+ "map": {Token: lang.Map, SkipSemi: true},
+ "package": {Token: lang.Package, SkipSemi: true},
+ "range": {Token: lang.Range, SkipSemi: true},
+ "return": {Token: lang.Return},
+ "select": {Token: lang.Select, SkipSemi: true},
+ "struct": {Token: lang.Struct, SkipSemi: true},
+ "switch": {Token: lang.Switch, SkipSemi: true},
+ "type": {Token: lang.Type, SkipSemi: true},
+ "var": {Token: lang.Var, SkipSemi: true},
+ },
+ */
}
diff --git a/lang/spec.go b/lang/spec.go
index b1b2580..37017e7 100644
--- a/lang/spec.go
+++ b/lang/spec.go
@@ -21,22 +21,36 @@ const (
// ASCIILen is the length of the ASCII characters set.
const ASCIILen = 1 << 7 // 128
+// Associativity represent the associativity rule of an operator.
+type Associativity int
+
+// Associativity kinds for operators.
+const (
+ Aboth Associativity = iota // both left and right associative
+ Aleft // left associative only
+ Aright // right associative only
+ Anon // non associative
+)
+
// TokenProp represent token properties for parsing.
type TokenProp struct {
Token
SkipSemi bool // automatic semicolon insertion after newline
Precedence int // operator precedence
+ Associativity
}
// Spec represents the language specification for scanning.
type Spec struct {
- CharProp [ASCIILen]uint // special Character properties
- End map[string]string // end delimiters, indexed by start
- BlockProp map[string]uint // block properties
- TokenProps map[string]TokenProp // token properties
- DotNum bool // true if a number can start with '.'
- IdentASCII bool // true if an identifier can be in ASCII only
- NumUnder bool // true if a number can contain _ character
+ CharProp [ASCIILen]uint // special Character properties
+ End map[string]string // end delimiters, indexed by start
+ BlockProp map[string]uint // block properties
+ Tokens map[string]Token // token per string
+ TokenProps []TokenProp // token properties, indexed by token
+ DotNum bool // true if a number can start with '.'
+ IdentASCII bool // true if an identifier can be in ASCII only
+ NumUnder bool // true if a number can contain _ character
+ // TokenProps map[string]TokenProp // token properties
}
// HasInit stores if a statement may contain a simple init statement.
diff --git a/lang/token.go b/lang/token.go
index 41c8439..4f5da35 100644
--- a/lang/token.go
+++ b/lang/token.go
@@ -120,6 +120,9 @@ const (
JumpSetTrue
Label
New
+
+ // This must be the last token value.
+ MaxTok
)
// UnaryOp contains the set of unary operators.
diff --git a/parser/expr.go b/parser/expr.go
index 5958279..d3161a5 100644
--- a/parser/expr.go
+++ b/parser/expr.go
@@ -11,7 +11,53 @@ import (
"github.com/mvertes/parscan/vm"
)
+// parseExpr transform an infix expression into a postfix notation
func (p *Parser) parseExpr(in Tokens) (out Tokens, err error) {
+ log.Println("parseExpr2 in:", in)
+ var ops Tokens
+
+ popop := func() (t scanner.Token) { l := len(ops) - 1; t = ops[l]; ops = ops[:l]; return t }
+
+ for i, t := range in {
+ switch t.Tok {
+ case lang.Int, lang.String:
+ out = append(out, t)
+
+ case lang.Add, lang.Assign, lang.Define, lang.Equal, lang.Greater, lang.Less, lang.Mul:
+ // Apply operator precedence rule.
+ for len(ops) > 0 && p.precedence(t) < p.precedence(ops[len(ops)-1]) {
+ out = append(out, popop())
+ }
+ ops = append(ops, t)
+
+ case lang.Ident:
+ _, sc, ok := p.Symbols.Get(t.Str, p.scope)
+ if ok {
+ // t.Str = sc + "/" + t.Str
+ _ = sc
+ }
+ out = append(out, t)
+ if i+1 < len(in) && in[i+1].Tok == lang.Define {
+ log.Println("t:", t.Str)
+ p.Symbols.Add(symbol.UnsetAddr, t.Str, vm.Value{}, symbol.Var, nil, false)
+ }
+
+ case lang.ParenBlock:
+ toks, err := p.parseExprStr(t.Block())
+ if err != nil {
+ return out, err
+ }
+ out = append(out, toks...)
+ }
+ }
+ for len(ops) > 0 {
+ out = append(out, popop())
+ }
+ log.Println("Final out:", out)
+ return out, err
+}
+
+func (p *Parser) parseExpr2(in Tokens) (out Tokens, err error) {
log.Println("parseExpr in:", in)
var ops, selectors Tokens
var vl int
diff --git a/parser/parse.go b/parser/parse.go
index e7d5399..75bc963 100644
--- a/parser/parse.go
+++ b/parser/parse.go
@@ -519,5 +519,5 @@ func (p *Parser) popScope() {
}
func (p *Parser) precedence(t scanner.Token) int {
- return p.TokenProps[t.Str].Precedence
+ return p.TokenProps[t.Tok].Precedence
}
diff --git a/scanner/scan.go b/scanner/scan.go
index edd8b1c..33c198c 100644
--- a/scanner/scan.go
+++ b/scanner/scan.go
@@ -119,8 +119,8 @@ func (sc *Scanner) Scan(src string, semiEOF bool) (tokens []Token, err error) {
if len(tokens) > 0 && t.Str == "\n" {
// Check for automatic semi-colon insertion after newline.
last := tokens[len(tokens)-1]
- if last.Tok.IsKeyword() && sc.TokenProps[last.Str].SkipSemi ||
- last.Tok.IsOperator() && !sc.TokenProps[last.Str].SkipSemi {
+ if last.Tok.IsKeyword() && sc.TokenProps[last.Tok].SkipSemi ||
+ last.Tok.IsOperator() && !sc.TokenProps[last.Tok].SkipSemi {
skip = true
} else {
t.Tok = lang.Semicolon
@@ -142,8 +142,8 @@ func (sc *Scanner) Scan(src string, semiEOF bool) (tokens []Token, err error) {
if last.Str == ";" {
return tokens, nil
}
- if last.Tok == lang.Ident && sc.TokenProps[last.Str].SkipSemi ||
- last.Tok.IsOperator() && !sc.TokenProps[last.Str].SkipSemi {
+ if last.Tok == lang.Ident && sc.TokenProps[last.Tok].SkipSemi ||
+ last.Tok.IsOperator() && !sc.TokenProps[last.Tok].SkipSemi {
return tokens, nil
}
tokens = append(tokens, Token{Tok: lang.Semicolon, Str: ";"})
@@ -179,7 +179,7 @@ func (sc *Scanner) Next(src string) (tok Token, err error) {
return Token{}, nil
case sc.isGroupSep(r):
// TODO: handle group separators.
- return Token{Tok: sc.TokenProps[string(r)].Token, Pos: p + i, Str: string(r)}, nil
+ return Token{Tok: sc.Tokens[string(r)], Pos: p + i, Str: string(r)}, nil
case sc.isLineSep(r):
return Token{Pos: p + i, Str: "\n"}, nil
case sc.isStr(r):
@@ -194,12 +194,12 @@ func (sc *Scanner) Next(src string) (tok Token, err error) {
err = ErrBlock
}
tok := Token{Pos: p + i, Str: b, Beg: 1, End: 1}
- tok.Tok = sc.TokenProps[tok.Name()].Token
+ tok.Tok = sc.Tokens[tok.Name()]
return tok, err
case sc.isOp(r):
op, isOp := sc.getOp(src[i:])
if isOp {
- t := sc.TokenProps[op].Token
+ t := sc.Tokens[op]
if t == lang.Illegal {
err = fmt.Errorf("%w: %s", ErrIllegal, op)
}
@@ -218,7 +218,7 @@ func (sc *Scanner) Next(src string) (tok Token, err error) {
default:
t, isDefined := sc.getToken(src[i:])
if isDefined {
- ident := sc.TokenProps[t].Token
+ ident := sc.Tokens[t]
if ident == lang.Illegal {
ident = lang.Ident
}