summaryrefslogtreecommitdiff
path: root/scanner
diff options
context:
space:
mode:
authorMarc Vertes <mvertes@free.fr>2023-09-08 11:48:35 +0200
committerMarc Vertes <mvertes@free.fr>2023-09-08 11:48:35 +0200
commita21b9b12ad865a19ff687645082f9093c4101039 (patch)
tree2b4863b152f8947b0cfb074f233fb127d4e6deca /scanner
parente141f7da4832580aed8c238197e06f6d00737615 (diff)
scanner: automatic semi-colon insertion at EOF
Diffstat (limited to 'scanner')
-rw-r--r--scanner/scan.go31
-rw-r--r--scanner/scan_test.go56
2 files changed, 53 insertions, 34 deletions
diff --git a/scanner/scan.go b/scanner/scan.go
index 5579dc3..e0a32ca 100644
--- a/scanner/scan.go
+++ b/scanner/scan.go
@@ -49,6 +49,15 @@ type Token struct {
value any
}
+type Tokens []*Token
+
+func (tks Tokens) String() (s string) {
+ for _, t := range tks {
+ s += fmt.Sprintf("%#v ", t.content)
+ }
+ return
+}
+
func (t *Token) Kind() Kind { return t.kind }
func (t *Token) Content() string { return t.content }
func (t *Token) Start() int { return t.start }
@@ -127,7 +136,7 @@ func (sc *Scanner) Init() {
func isNum(r rune) bool { return '0' <= r && r <= '9' }
-func (sc *Scanner) Scan(src string) (tokens []*Token, err error) {
+func (sc *Scanner) Scan(src string) (tokens Tokens, err error) {
offset := 0
s := src
for len(s) > 0 {
@@ -139,7 +148,7 @@ func (sc *Scanner) Scan(src string) (tokens []*Token, err error) {
break
}
skip := false
- if t.kind == Separator && t.content == " " && len(sc.SkipSemi) > 0 {
+ if len(tokens) > 0 && len(sc.SkipSemi) > 0 && t.kind == Separator && t.content == " " {
// Check for automatic semi-colon insertion after newline.
last := tokens[len(tokens)-1]
if last.kind == Identifier && sc.SkipSemi[last.content] ||
@@ -157,6 +166,14 @@ func (sc *Scanner) Scan(src string) (tokens []*Token, err error) {
tokens = append(tokens, t)
}
}
+ // Insertion of semi-colon at the end of the token stream.
+ if len(tokens) > 0 && len(sc.SkipSemi) > 0 {
+ last := tokens[len(tokens)-1]
+ if !(last.kind == Identifier && sc.SkipSemi[last.content] ||
+ last.kind == Operator && !sc.SkipSemi[last.content]) {
+ tokens = append(tokens, &Token{kind: Separator, content: ";"})
+ }
+ }
return tokens, nil
}
@@ -351,3 +368,13 @@ func (sc *Scanner) getBlock(src string, nstart int) (s string, ok bool) {
ok = prop&EosValidEnd != 0
return s, ok
}
+
+// Index returns the index of the first instance with content s in tokens, or -1 if not found.
+func Index(tokens []*Token, s string) int {
+ for i, t := range tokens {
+ if t.content == s {
+ return i
+ }
+ }
+ return -1
+}
diff --git a/scanner/scan_test.go b/scanner/scan_test.go
index 695b2a3..bd0a13c 100644
--- a/scanner/scan_test.go
+++ b/scanner/scan_test.go
@@ -1,7 +1,6 @@
package scanner
import (
- "fmt"
"log"
"testing"
)
@@ -89,46 +88,39 @@ func TestScan(t *testing.T) {
test := test
t.Run("", func(t *testing.T) {
errStr := ""
- token, err := GoScanner.Scan(test.src)
+ tokens, err := GoScanner.Scan(test.src)
if err != nil {
errStr = err.Error()
}
if errStr != test.err {
t.Errorf("got error %v, want error %#v", errStr, test.err)
}
- if result := tokStr(token); result != test.tok {
+ if result := tokens.String(); result != test.tok {
t.Errorf("got %v, want %v", result, test.tok)
}
})
}
}
-func tokStr(tokens []*Token) (s string) {
- for _, t := range tokens {
- s += fmt.Sprintf("%#v ", t.content)
- }
- return s
-}
-
var tests = []struct {
src, tok, err string
}{{ // #00
src: "",
}, { // #01
src: " abc + 5",
- tok: `"abc" "+" "5" `,
+ tok: `"abc" "+" "5" ";" `,
}, { // #02
src: "abc0+5 ",
- tok: `"abc0" "+" "5" `,
+ tok: `"abc0" "+" "5" ";" `,
}, { // #03
src: "a+5\na=x-4",
- tok: `"a" "+" "5" ";" "a" "=" "x" "-" "4" `,
+ tok: `"a" "+" "5" ";" "a" "=" "x" "-" "4" ";" `,
}, { // #04
src: `return "hello world" + 4`,
- tok: `"return" "\"hello world\"" "+" "4" `,
+ tok: `"return" "\"hello world\"" "+" "4" ";" `,
}, { // #05
src: `print(4 * (3+7))`,
- tok: `"print" "(4 * (3+7))" `,
+ tok: `"print" "(4 * (3+7))" ";" `,
}, { // #06
src: `"foo`,
err: "1:1: block not terminated",
@@ -141,63 +133,63 @@ def "foo truc`,
err: "1:1: block not terminated",
}, { // #09
src: `"ab\\"`,
- tok: `"\"ab\\\\\"" `,
+ tok: `"\"ab\\\\\"" ";" `,
}, { // #10
src: `"ab\\\"`,
err: "1:1: block not terminated",
}, { // #11
src: `"ab\\\\"`,
- tok: `"\"ab\\\\\\\\\"" `,
+ tok: `"\"ab\\\\\\\\\"" ";" `,
}, { // #12
src: `"abc
def"`,
err: "1:1: block not terminated",
}, { // #13
src: "`hello\nworld`",
- tok: "\"`hello\\nworld`\" ",
+ tok: "\"`hello\\nworld`\" \";\" ",
}, { // #14
src: "2* (3+4",
err: "1:4: block not terminated",
}, { // #15
src: `("fo)o")+1`,
- tok: `"(\"fo)o\")" "+" "1" `,
+ tok: `"(\"fo)o\")" "+" "1" ";" `,
}, { // #16
src: `"foo""bar"`,
- tok: `"\"foo\"" "\"bar\"" `,
+ tok: `"\"foo\"" "\"bar\"" ";" `,
}, { // #17
src: "/* a comment */ a = 2",
- tok: `"/* a comment */" "a" "=" "2" `,
+ tok: `"/* a comment */" "a" "=" "2" ";" `,
}, { // #18
src: "return // quit\nbegin",
- tok: `"return" "// quit" ";" "begin" `,
+ tok: `"return" "// quit" ";" "begin" ";" `,
}, { // #19
src: "return // quit",
- tok: `"return" "// quit" `,
+ tok: `"return" "// quit" ";" `,
}, { // #20
src: "println(3 /* argh ) */)",
- tok: `"println" "(3 /* argh ) */)" `,
+ tok: `"println" "(3 /* argh ) */)" ";" `,
}, { // #21
src: `println("in f")`,
- tok: `"println" "(\"in f\")" `,
+ tok: `"println" "(\"in f\")" ";" `,
}, { // #22
src: "a, b = 1, 2",
- tok: `"a" "," "b" "=" "1" "," "2" `,
+ tok: `"a" "," "b" "=" "1" "," "2" ";" `,
}, { // #23
src: "1 + \n2 + 3",
- tok: `"1" "+" "2" "+" "3" `,
+ tok: `"1" "+" "2" "+" "3" ";" `,
}, { // #24
src: "i++\n2 + 3",
- tok: `"i" "++" ";" "2" "+" "3" `,
+ tok: `"i" "++" ";" "2" "+" "3" ";" `,
}, { // #25
src: "return\na = 1",
- tok: `"return" ";" "a" "=" "1" `,
+ tok: `"return" ";" "a" "=" "1" ";" `,
}, { // #26
src: "if\na == 2 { return }",
- tok: `"if" "a" "==" "2" "{ return }" `,
+ tok: `"if" "a" "==" "2" "{ return }" ";" `,
}, { // #27
src: "f(4)\nreturn",
- tok: `"f" "(4)" ";" "return" `,
+ tok: `"f" "(4)" ";" "return" ";" `,
}, { // #28
src: "f(3).\nfield",
- tok: `"f" "(3)" "." "field" `,
+ tok: `"f" "(3)" "." "field" ";" `,
}}