From a21b9b12ad865a19ff687645082f9093c4101039 Mon Sep 17 00:00:00 2001 From: Marc Vertes Date: Fri, 8 Sep 2023 11:48:35 +0200 Subject: scanner: automatic semi-colon insertion at EOF --- scanner/scan.go | 31 +++++++++++++++++++++++++++-- scanner/scan_test.go | 56 ++++++++++++++++++++++------------------------------ 2 files changed, 53 insertions(+), 34 deletions(-) (limited to 'scanner') diff --git a/scanner/scan.go b/scanner/scan.go index 5579dc3..e0a32ca 100644 --- a/scanner/scan.go +++ b/scanner/scan.go @@ -49,6 +49,15 @@ type Token struct { value any } +type Tokens []*Token + +func (tks Tokens) String() (s string) { + for _, t := range tks { + s += fmt.Sprintf("%#v ", t.content) + } + return +} + func (t *Token) Kind() Kind { return t.kind } func (t *Token) Content() string { return t.content } func (t *Token) Start() int { return t.start } @@ -127,7 +136,7 @@ func (sc *Scanner) Init() { func isNum(r rune) bool { return '0' <= r && r <= '9' } -func (sc *Scanner) Scan(src string) (tokens []*Token, err error) { +func (sc *Scanner) Scan(src string) (tokens Tokens, err error) { offset := 0 s := src for len(s) > 0 { @@ -139,7 +148,7 @@ func (sc *Scanner) Scan(src string) (tokens []*Token, err error) { break } skip := false - if t.kind == Separator && t.content == " " && len(sc.SkipSemi) > 0 { + if len(tokens) > 0 && len(sc.SkipSemi) > 0 && t.kind == Separator && t.content == " " { // Check for automatic semi-colon insertion after newline. last := tokens[len(tokens)-1] if last.kind == Identifier && sc.SkipSemi[last.content] || @@ -157,6 +166,14 @@ func (sc *Scanner) Scan(src string) (tokens []*Token, err error) { tokens = append(tokens, t) } } + // Insertion of semi-colon at the end of the token stream. + if len(tokens) > 0 && len(sc.SkipSemi) > 0 { + last := tokens[len(tokens)-1] + if !(last.kind == Identifier && sc.SkipSemi[last.content] || + last.kind == Operator && !sc.SkipSemi[last.content]) { + tokens = append(tokens, &Token{kind: Separator, content: ";"}) + } + } return tokens, nil } @@ -351,3 +368,13 @@ func (sc *Scanner) getBlock(src string, nstart int) (s string, ok bool) { ok = prop&EosValidEnd != 0 return s, ok } + +// Index returns the index of the first instance with content s in tokens, or -1 if not found. +func Index(tokens []*Token, s string) int { + for i, t := range tokens { + if t.content == s { + return i + } + } + return -1 +} diff --git a/scanner/scan_test.go b/scanner/scan_test.go index 695b2a3..bd0a13c 100644 --- a/scanner/scan_test.go +++ b/scanner/scan_test.go @@ -1,7 +1,6 @@ package scanner import ( - "fmt" "log" "testing" ) @@ -89,46 +88,39 @@ func TestScan(t *testing.T) { test := test t.Run("", func(t *testing.T) { errStr := "" - token, err := GoScanner.Scan(test.src) + tokens, err := GoScanner.Scan(test.src) if err != nil { errStr = err.Error() } if errStr != test.err { t.Errorf("got error %v, want error %#v", errStr, test.err) } - if result := tokStr(token); result != test.tok { + if result := tokens.String(); result != test.tok { t.Errorf("got %v, want %v", result, test.tok) } }) } } -func tokStr(tokens []*Token) (s string) { - for _, t := range tokens { - s += fmt.Sprintf("%#v ", t.content) - } - return s -} - var tests = []struct { src, tok, err string }{{ // #00 src: "", }, { // #01 src: " abc + 5", - tok: `"abc" "+" "5" `, + tok: `"abc" "+" "5" ";" `, }, { // #02 src: "abc0+5 ", - tok: `"abc0" "+" "5" `, + tok: `"abc0" "+" "5" ";" `, }, { // #03 src: "a+5\na=x-4", - tok: `"a" "+" "5" ";" "a" "=" "x" "-" "4" `, + tok: `"a" "+" "5" ";" "a" "=" "x" "-" "4" ";" `, }, { // #04 src: `return "hello world" + 4`, - tok: `"return" "\"hello world\"" "+" "4" `, + tok: `"return" "\"hello world\"" "+" "4" ";" `, }, { // #05 src: `print(4 * (3+7))`, - tok: `"print" "(4 * (3+7))" `, + tok: `"print" "(4 * (3+7))" ";" `, }, { // #06 src: `"foo`, err: "1:1: block not terminated", @@ -141,63 +133,63 @@ def "foo truc`, err: "1:1: block not terminated", }, { // #09 src: `"ab\\"`, - tok: `"\"ab\\\\\"" `, + tok: `"\"ab\\\\\"" ";" `, }, { // #10 src: `"ab\\\"`, err: "1:1: block not terminated", }, { // #11 src: `"ab\\\\"`, - tok: `"\"ab\\\\\\\\\"" `, + tok: `"\"ab\\\\\\\\\"" ";" `, }, { // #12 src: `"abc def"`, err: "1:1: block not terminated", }, { // #13 src: "`hello\nworld`", - tok: "\"`hello\\nworld`\" ", + tok: "\"`hello\\nworld`\" \";\" ", }, { // #14 src: "2* (3+4", err: "1:4: block not terminated", }, { // #15 src: `("fo)o")+1`, - tok: `"(\"fo)o\")" "+" "1" `, + tok: `"(\"fo)o\")" "+" "1" ";" `, }, { // #16 src: `"foo""bar"`, - tok: `"\"foo\"" "\"bar\"" `, + tok: `"\"foo\"" "\"bar\"" ";" `, }, { // #17 src: "/* a comment */ a = 2", - tok: `"/* a comment */" "a" "=" "2" `, + tok: `"/* a comment */" "a" "=" "2" ";" `, }, { // #18 src: "return // quit\nbegin", - tok: `"return" "// quit" ";" "begin" `, + tok: `"return" "// quit" ";" "begin" ";" `, }, { // #19 src: "return // quit", - tok: `"return" "// quit" `, + tok: `"return" "// quit" ";" `, }, { // #20 src: "println(3 /* argh ) */)", - tok: `"println" "(3 /* argh ) */)" `, + tok: `"println" "(3 /* argh ) */)" ";" `, }, { // #21 src: `println("in f")`, - tok: `"println" "(\"in f\")" `, + tok: `"println" "(\"in f\")" ";" `, }, { // #22 src: "a, b = 1, 2", - tok: `"a" "," "b" "=" "1" "," "2" `, + tok: `"a" "," "b" "=" "1" "," "2" ";" `, }, { // #23 src: "1 + \n2 + 3", - tok: `"1" "+" "2" "+" "3" `, + tok: `"1" "+" "2" "+" "3" ";" `, }, { // #24 src: "i++\n2 + 3", - tok: `"i" "++" ";" "2" "+" "3" `, + tok: `"i" "++" ";" "2" "+" "3" ";" `, }, { // #25 src: "return\na = 1", - tok: `"return" ";" "a" "=" "1" `, + tok: `"return" ";" "a" "=" "1" ";" `, }, { // #26 src: "if\na == 2 { return }", - tok: `"if" "a" "==" "2" "{ return }" `, + tok: `"if" "a" "==" "2" "{ return }" ";" `, }, { // #27 src: "f(4)\nreturn", - tok: `"f" "(4)" ";" "return" `, + tok: `"f" "(4)" ";" "return" ";" `, }, { // #28 src: "f(3).\nfield", - tok: `"f" "(3)" "." "field" `, + tok: `"f" "(3)" "." "field" ";" `, }} -- cgit v1.2.3