diff options
Diffstat (limited to 'scanner')
| -rw-r--r-- | scanner/readme.md | 42 | ||||
| -rw-r--r-- | scanner/scan.go | 19 | ||||
| -rw-r--r-- | scanner/scan_test.go | 1 |
3 files changed, 54 insertions, 8 deletions
diff --git a/scanner/readme.md b/scanner/readme.md new file mode 100644 index 0000000..b8b31fb --- /dev/null +++ b/scanner/readme.md @@ -0,0 +1,42 @@ +# Scanner + +A scanner takes a string in input and returns an array of tokens. + +Tokens can be of the following kinds: +- identifier +- number +- operator +- separator +- string +- block + +Resolving nested blocks in the scanner is making the parser simple +and generic, without having to resort to parse tables. + +The lexical rules are provided by a language specification at language +level which includes the following: + +- a set of composable properties (1 per bit, on an integer) for each + character in the ASCII range (where all separator, operators and + reserved keywords must be defined). +- for each block or string, the specification of starting and ending + delimiter. + +## Development status + +A successful test must be provided to check the status. + +- [x] numbers starting with a digit +- [ ] numbers starting otherwise +- [x] unescaped strings (including multiline) +- [x] escaped string (including multiline) +- [x] separators (in UTF-8 range) +- [ ] single line string (\n not allowed) +- [x] identifiers (in UTF-8 range) +- [x] operators, concatenated or not +- [x] single character block/string delimiters +- [x] arbitrarly nested blocks and strings +- [ ] multiple characters block/string delimiters +- [ ] blocks delimited by identifiers/operators/separators +- [ ] blocks with delimiter inclusion/exclusion rules +- [ ] blocks delimited by indentation level diff --git a/scanner/scan.go b/scanner/scan.go index 89d660e..066fc2a 100644 --- a/scanner/scan.go +++ b/scanner/scan.go @@ -46,14 +46,17 @@ type Token struct { value any } -func (t *Token) Kind() Kind { return t.kind } -func (t *Token) Content() string { return t.content } -func (t *Token) Start() int { return t.start } -func (t *Token) End() int { return t.end } -func (t *Token) Pos() int { return t.pos } -func (t *Token) Block() string { return t.content[t.start : len(t.content)-t.end] } -func (t *Token) Prefix() string { return t.content[:t.start] } -func (t *Token) Value() any { return t.value } +func (t *Token) Kind() Kind { return t.kind } +func (t *Token) Content() string { return t.content } +func (t *Token) Start() int { return t.start } +func (t *Token) End() int { return t.end } +func (t *Token) Pos() int { return t.pos } +func (t *Token) Block() string { return t.content[t.start : len(t.content)-t.end] } +func (t *Token) Prefix() string { return t.content[:t.start] } +func (t *Token) Value() any { return t.value } +func (t *Token) IsBlock() bool { return t.kind == Block } +func (t *Token) IsOperator() bool { return t.kind == Operator } +func (t *Token) IsSeparator() bool { return t.kind == Separator } func (t *Token) Name() string { name := t.content diff --git a/scanner/scan_test.go b/scanner/scan_test.go index 6a54d8e..6be60a4 100644 --- a/scanner/scan_test.go +++ b/scanner/scan_test.go @@ -76,6 +76,7 @@ def"`, "[]", "1:1: block not terminated"}, } for _, test := range tests { + test := test t.Run("", func(t *testing.T) { errStr := "" token, err := GoScanner.Scan(test.src) |
