diff options
| author | Marc Vertes <mvertes@free.fr> | 2023-09-06 11:55:09 +0200 |
|---|---|---|
| committer | Marc Vertes <mvertes@free.fr> | 2023-09-06 11:55:09 +0200 |
| commit | 555e5bfc03daaf40c78eb2ad540cd73ec4ae2831 (patch) | |
| tree | 9261e96271d544d2cc9ef8c4402cf065d6c4de63 /scanner | |
| parent | 6dd78f44adf6fb032d0ecd9db813651b9524fcac (diff) | |
scanner: add automatic insertion of semi-colon after newline
As specified in the Go specification, adapted to the following:
- the scanner recognise blocks as tokens
- the scanner is multi-language: define keywords in scanner spec
- as a result, we define how to skip semi-colon insertion rather
than how to add it.
Diffstat (limited to 'scanner')
| -rw-r--r-- | scanner/README.md | 1 | ||||
| -rw-r--r-- | scanner/scan.go | 54 | ||||
| -rw-r--r-- | scanner/scan_test.go | 50 |
3 files changed, 81 insertions, 24 deletions
diff --git a/scanner/README.md b/scanner/README.md index a51d8a7..76c174b 100644 --- a/scanner/README.md +++ b/scanner/README.md @@ -49,6 +49,7 @@ A successful test must be provided to check the status. - [x] single character block/string delimiters - [x] arbitrarly nested blocks and strings - [x] multiple characters block/string delimiters +- [x] semi-colon automatic insertion after newline - [x] blocks delimited by operator characters - [ ] blocks delimited by identifiers - [x] blocks with delimiter inclusion/exclusion rules diff --git a/scanner/scan.go b/scanner/scan.go index 27b6669..5579dc3 100644 --- a/scanner/scan.go +++ b/scanner/scan.go @@ -83,6 +83,7 @@ type Scanner struct { CharProp [ASCIILen]uint // special Character properties End map[string]string // end delimiters, indexed by start BlockProp map[string]uint // block properties + SkipSemi map[string]bool // words skipping automatic semicolon insertion after newline DotNum bool // true if a number can start with '.' IdAscii bool // true if an identifier can be in ASCII only Num_ bool // true if a number can contain _ character @@ -97,13 +98,13 @@ func (sc *Scanner) HasProp(r rune, p uint) bool { return sc.CharProp[r]&p != 0 } -func (sc *Scanner) IsOp(r rune) bool { return sc.HasProp(r, CharOp) } -func (sc *Scanner) IsSep(r rune) bool { return sc.HasProp(r, CharSep) } -func (sc *Scanner) IsLineSep(r rune) bool { return sc.HasProp(r, CharLineSep) } -func (sc *Scanner) IsGroupSep(r rune) bool { return sc.HasProp(r, CharGroupSep) } -func (sc *Scanner) IsStr(r rune) bool { return sc.HasProp(r, CharStr) } -func (sc *Scanner) IsBlock(r rune) bool { return sc.HasProp(r, CharBlock) } -func (sc *Scanner) IsId(r rune) bool { +func (sc *Scanner) isOp(r rune) bool { return sc.HasProp(r, CharOp) } +func (sc *Scanner) isSep(r rune) bool { return sc.HasProp(r, CharSep) } +func (sc *Scanner) isLineSep(r rune) bool { return sc.HasProp(r, CharLineSep) } +func (sc *Scanner) isGroupSep(r rune) bool { return sc.HasProp(r, CharGroupSep) } +func (sc *Scanner) isStr(r rune) bool { return sc.HasProp(r, CharStr) } +func (sc *Scanner) isBlock(r rune) bool { return sc.HasProp(r, CharBlock) } +func (sc *Scanner) isDir(r rune) bool { return !sc.HasProp(r, CharOp|CharSep|CharLineSep|CharGroupSep|CharStr|CharBlock) } @@ -124,7 +125,7 @@ func (sc *Scanner) Init() { sc.sdre = regexp.MustCompile(re) } -func IsNum(r rune) bool { return '0' <= r && r <= '9' } +func isNum(r rune) bool { return '0' <= r && r <= '9' } func (sc *Scanner) Scan(src string) (tokens []*Token, err error) { offset := 0 @@ -137,11 +138,24 @@ func (sc *Scanner) Scan(src string) (tokens []*Token, err error) { if t.kind == Undefined { break } + skip := false + if t.kind == Separator && t.content == " " && len(sc.SkipSemi) > 0 { + // Check for automatic semi-colon insertion after newline. + last := tokens[len(tokens)-1] + if last.kind == Identifier && sc.SkipSemi[last.content] || + last.kind == Operator && !sc.SkipSemi[last.content] { + skip = true + } else { + t.content = ";" + } + } nr := t.pos + len(t.content) s = s[nr:] t.pos += offset offset += nr - tokens = append(tokens, t) + if !skip { + tokens = append(tokens, t) + } } return tokens, nil } @@ -163,7 +177,7 @@ func (sc *Scanner) Next(src string) (tok *Token, err error) { // Skip initial separators. for i, r := range src { p = i - if !sc.IsSep(r) { + if !sc.isSep(r) { break } } @@ -172,26 +186,26 @@ func (sc *Scanner) Next(src string) (tok *Token, err error) { // Get token according to its first characters. for i, r := range src { switch { - case sc.IsSep(r): + case sc.isSep(r): return &Token{}, nil - case sc.IsGroupSep(r): + case sc.isGroupSep(r): // TODO: handle group separators. return &Token{kind: Separator, pos: p + i, content: string(r)}, nil - case sc.IsLineSep(r): + case sc.isLineSep(r): return &Token{kind: Separator, pos: p + i, content: " "}, nil - case sc.IsStr(r): + case sc.isStr(r): s, ok := sc.getStr(src[i:], 1) if !ok { err = ErrBlock } return &Token{kind: String, pos: p + i, content: s, start: 1, end: 1}, err - case sc.IsBlock(r): + case sc.isBlock(r): b, ok := sc.getBlock(src[i:], 1) if !ok { err = ErrBlock } return &Token{kind: Block, pos: p + i, content: b, start: 1, end: 1}, err - case sc.IsOp(r): + case sc.isOp(r): op, isOp := sc.getOp(src[i:]) if isOp { return &Token{kind: Operator, pos: p + i, content: op}, nil @@ -204,7 +218,7 @@ func (sc *Scanner) Next(src string) (tok *Token, err error) { } return &Token{kind: String, pos: p + i, content: s, start: len(op), end: len(op)}, err } - case IsNum(r): + case isNum(r): c, v := sc.getNum(src[i:]) return &Token{kind: Number, pos: p + i, content: c, value: v}, nil default: @@ -235,7 +249,7 @@ func (sc *Scanner) getId(src string) (s string, isId bool) { func (sc *Scanner) nextId(src string) (s string) { for i, r := range src { - if !sc.IsId(r) { + if !sc.isDir(r) { break } s = src[:i+1] @@ -245,7 +259,7 @@ func (sc *Scanner) nextId(src string) (s string) { func (sc *Scanner) getOp(src string) (s string, isOp bool) { for i, r := range src { - if !sc.IsOp(r) { + if !sc.isOp(r) { break } s = src[:i+1] @@ -259,7 +273,7 @@ func (sc *Scanner) getOp(src string) (s string, isOp bool) { func (sc *Scanner) getNum(src string) (s string, v any) { // TODO: handle hexa, binary, octal, float and eng notations. for i, r := range src { - if !IsNum(r) { + if !isNum(r) { break } s = src[:i+1] diff --git a/scanner/scan_test.go b/scanner/scan_test.go index 41ee62e..695b2a3 100644 --- a/scanner/scan_test.go +++ b/scanner/scan_test.go @@ -55,6 +55,31 @@ var GoScanner = &Scanner{ "/*": CharStr, "//": CharStr | ExcludeEnd | EosValidEnd, }, + SkipSemi: map[string]bool{ + "++": true, + "--": true, + "case": true, + "chan": true, + "const": true, + "default": true, + "defer": true, + "else": true, + "for": true, + "func": true, + "go": true, + "goto": true, + "if": true, + "import": true, + "interface": true, + "map": true, + "package": true, + "range": true, + "select": true, + "struct": true, + "switch": true, + "type": true, + "var": true, + }, } func TestScan(t *testing.T) { @@ -69,9 +94,8 @@ func TestScan(t *testing.T) { errStr = err.Error() } if errStr != test.err { - t.Errorf("got error %#v, want error %#v", errStr, test.err) + t.Errorf("got error %v, want error %#v", errStr, test.err) } - t.Logf("%#v\n%v %v\n", test.src, token, errStr) if result := tokStr(token); result != test.tok { t.Errorf("got %v, want %v", result, test.tok) } @@ -98,7 +122,7 @@ var tests = []struct { tok: `"abc0" "+" "5" `, }, { // #03 src: "a+5\na=x-4", - tok: `"a" "+" "5" " " "a" "=" "x" "-" "4" `, + tok: `"a" "+" "5" ";" "a" "=" "x" "-" "4" `, }, { // #04 src: `return "hello world" + 4`, tok: `"return" "\"hello world\"" "+" "4" `, @@ -145,7 +169,7 @@ def"`, tok: `"/* a comment */" "a" "=" "2" `, }, { // #18 src: "return // quit\nbegin", - tok: `"return" "// quit" " " "begin" `, + tok: `"return" "// quit" ";" "begin" `, }, { // #19 src: "return // quit", tok: `"return" "// quit" `, @@ -158,4 +182,22 @@ def"`, }, { // #22 src: "a, b = 1, 2", tok: `"a" "," "b" "=" "1" "," "2" `, +}, { // #23 + src: "1 + \n2 + 3", + tok: `"1" "+" "2" "+" "3" `, +}, { // #24 + src: "i++\n2 + 3", + tok: `"i" "++" ";" "2" "+" "3" `, +}, { // #25 + src: "return\na = 1", + tok: `"return" ";" "a" "=" "1" `, +}, { // #26 + src: "if\na == 2 { return }", + tok: `"if" "a" "==" "2" "{ return }" `, +}, { // #27 + src: "f(4)\nreturn", + tok: `"f" "(4)" ";" "return" `, +}, { // #28 + src: "f(3).\nfield", + tok: `"f" "(3)" "." "field" `, }} |
