From aa4cc2b45ebadf4cea16d1e27149e13669f3a5fc Mon Sep 17 00:00:00 2001 From: Marc Vertes Date: Mon, 24 Jul 2023 14:18:27 +0200 Subject: scanner: compute numerical values (#2) The conversion to numerical values is done by the scanner so it's only done once. This will simplify and accelerate vm0 and the code generator. --- scanner/scan.go | 21 +++++++++++++++++---- scanner/scan_test.go | 20 ++++++++++---------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/scanner/scan.go b/scanner/scan.go index 854df6d..89d660e 100644 --- a/scanner/scan.go +++ b/scanner/scan.go @@ -3,6 +3,7 @@ package scanner import ( "errors" "fmt" + "strconv" "strings" ) @@ -42,6 +43,7 @@ type Token struct { content string start int end int + value any } func (t *Token) Kind() Kind { return t.kind } @@ -51,6 +53,7 @@ func (t *Token) End() int { return t.end } func (t *Token) Pos() int { return t.pos } func (t *Token) Block() string { return t.content[t.start : len(t.content)-t.end] } func (t *Token) Prefix() string { return t.content[:t.start] } +func (t *Token) Value() any { return t.value } func (t *Token) Name() string { name := t.content @@ -61,7 +64,7 @@ func (t *Token) Name() string { } func NewToken(content string, pos int) Token { - return Token{pos, Custom, content, 0, 0} + return Token{pos, Custom, content, 0, 0, nil} } const ASCIILen = 1 << 7 // 128 @@ -163,7 +166,8 @@ func (sc *Scanner) Next(src string) (tok Token, err error) { case sc.IsOp(r): return Token{kind: Operator, pos: p + i, content: sc.GetOp(src[i:])}, nil case IsNum(r): - return Token{kind: Number, pos: p + i, content: sc.GetNum(src[i:])}, nil + c, v := sc.GetNum(src[i:]) + return Token{kind: Number, pos: p + i, content: c, value: v}, nil default: return Token{kind: Identifier, pos: p + i, content: sc.GetId(src[i:])}, nil } @@ -191,7 +195,7 @@ func (sc *Scanner) GetOp(src string) (s string) { return s } -func (sc *Scanner) GetNum(src string) (s string) { +func (sc *Scanner) GetNum(src string) (s string, v any) { // TODO: handle hexa, binary, octal, float and eng notations. for _, r := range src { if !IsNum(r) { @@ -199,7 +203,16 @@ func (sc *Scanner) GetNum(src string) (s string) { } s += string(r) } - return s + var err error + if strings.ContainsRune(s, '.') { + v, err = strconv.ParseFloat(s, 64) + } else { + v, err = strconv.ParseInt(s, 0, 64) + } + if err != nil { + v = err + } + return s, v } func (sc *Scanner) GetGroupSep(src string) (s string) { diff --git a/scanner/scan_test.go b/scanner/scan_test.go index dd48faf..6a54d8e 100644 --- a/scanner/scan_test.go +++ b/scanner/scan_test.go @@ -50,29 +50,29 @@ func TestScan(t *testing.T) { tests := []struct{ src, result, errStr string }{ // Simple tokens: separators, identifiers, numbers, operators. {"", "[]", ""}, - {" abc + 5", "[{3 1 abc 0 0} {7 3 + 0 0} {9 2 5 0 0}]", ""}, - {"abc0+5 ", "[{0 1 abc0 0 0} {4 3 + 0 0} {5 2 5 0 0}]", ""}, - {"a+5\na=x-4", "[{0 1 a 0 0} {1 3 + 0 0} {2 2 5 0 0} {3 4 0 0} {4 1 a 0 0} {5 3 = 0 0} {6 1 x 0 0} {7 3 - 0 0} {8 2 4 0 0}]", ""}, + {" abc + 5", "[{3 1 abc 0 0 } {7 3 + 0 0 } {9 2 5 0 0 5}]", ""}, + {"abc0+5 ", "[{0 1 abc0 0 0 } {4 3 + 0 0 } {5 2 5 0 0 5}]", ""}, + {"a+5\na=x-4", "[{0 1 a 0 0 } {1 3 + 0 0 } {2 2 5 0 0 5} {3 4 0 0 } {4 1 a 0 0 } {5 3 = 0 0 } {6 1 x 0 0 } {7 3 - 0 0 } {8 2 4 0 0 4}]", ""}, // Strings. - {`return "hello world" + 4`, `[{0 1 return 0 0} {7 5 "hello world" 1 1} {21 3 + 0 0} {23 2 4 0 0}]`, ""}, - {`print(4 * (3+7))`, "[{0 1 print 0 0} {5 6 (4 * (3+7)) 1 1}]", ""}, + {`return "hello world" + 4`, `[{0 1 return 0 0 } {7 5 "hello world" 1 1 } {21 3 + 0 0 } {23 2 4 0 0 4}]`, ""}, + {`print(4 * (3+7))`, "[{0 1 print 0 0 } {5 6 (4 * (3+7)) 1 1 }]", ""}, {`"foo`, "[]", "1:1: block not terminated"}, {`abc def "foo truc`, "[]", "2:6: block not terminated"}, {`"ab\"`, "[]", "1:1: block not terminated"}, - {`"ab\\"`, `[{0 5 "ab\\" 1 1}]`, ""}, + {`"ab\\"`, `[{0 5 "ab\\" 1 1 }]`, ""}, {`"ab\\\"`, "[]", "1:1: block not terminated"}, - {`"ab\\\\"`, `[{0 5 "ab\\\\" 1 1}]`, ""}, + {`"ab\\\\"`, `[{0 5 "ab\\\\" 1 1 }]`, ""}, {`"abc def"`, "[]", "1:1: block not terminated"}, - {"`hello\nworld`", "[{0 5 `hello\nworld` 1 1}]", ""}, + {"`hello\nworld`", "[{0 5 `hello\nworld` 1 1 }]", ""}, // Nested blocks. // {`f("a)bc")+1, 3)`, "[{0 1 f } {1 6 (\"a)bc\", 3) (}]", ""}, {"2* (3+4", "[]", "1:4: block not terminated"}, - {`("fo)o")+1`, "[{0 6 (\"fo)o\") 1 1} {8 3 + 0 0} {9 2 1 0 0}]", ""}, - {`"foo""bar"`, "[{0 5 \"foo\" 1 1} {5 5 \"bar\" 1 1}]", ""}, + {`("fo)o")+1`, "[{0 6 (\"fo)o\") 1 1 } {8 3 + 0 0 } {9 2 1 0 0 1}]", ""}, + {`"foo""bar"`, "[{0 5 \"foo\" 1 1 } {5 5 \"bar\" 1 1 }]", ""}, } for _, test := range tests { -- cgit v1.2.3