summaryrefslogtreecommitdiff
path: root/scanner
diff options
context:
space:
mode:
Diffstat (limited to 'scanner')
-rw-r--r--scanner/readme.md42
-rw-r--r--scanner/scan.go19
-rw-r--r--scanner/scan_test.go1
3 files changed, 54 insertions, 8 deletions
diff --git a/scanner/readme.md b/scanner/readme.md
new file mode 100644
index 0000000..b8b31fb
--- /dev/null
+++ b/scanner/readme.md
@@ -0,0 +1,42 @@
+# Scanner
+
+A scanner takes a string in input and returns an array of tokens.
+
+Tokens can be of the following kinds:
+- identifier
+- number
+- operator
+- separator
+- string
+- block
+
+Resolving nested blocks in the scanner is making the parser simple
+and generic, without having to resort to parse tables.
+
+The lexical rules are provided by a language specification at language
+level which includes the following:
+
+- a set of composable properties (1 per bit, on an integer) for each
+ character in the ASCII range (where all separator, operators and
+ reserved keywords must be defined).
+- for each block or string, the specification of starting and ending
+ delimiter.
+
+## Development status
+
+A successful test must be provided to check the status.
+
+- [x] numbers starting with a digit
+- [ ] numbers starting otherwise
+- [x] unescaped strings (including multiline)
+- [x] escaped string (including multiline)
+- [x] separators (in UTF-8 range)
+- [ ] single line string (\n not allowed)
+- [x] identifiers (in UTF-8 range)
+- [x] operators, concatenated or not
+- [x] single character block/string delimiters
+- [x] arbitrarly nested blocks and strings
+- [ ] multiple characters block/string delimiters
+- [ ] blocks delimited by identifiers/operators/separators
+- [ ] blocks with delimiter inclusion/exclusion rules
+- [ ] blocks delimited by indentation level
diff --git a/scanner/scan.go b/scanner/scan.go
index 89d660e..066fc2a 100644
--- a/scanner/scan.go
+++ b/scanner/scan.go
@@ -46,14 +46,17 @@ type Token struct {
value any
}
-func (t *Token) Kind() Kind { return t.kind }
-func (t *Token) Content() string { return t.content }
-func (t *Token) Start() int { return t.start }
-func (t *Token) End() int { return t.end }
-func (t *Token) Pos() int { return t.pos }
-func (t *Token) Block() string { return t.content[t.start : len(t.content)-t.end] }
-func (t *Token) Prefix() string { return t.content[:t.start] }
-func (t *Token) Value() any { return t.value }
+func (t *Token) Kind() Kind { return t.kind }
+func (t *Token) Content() string { return t.content }
+func (t *Token) Start() int { return t.start }
+func (t *Token) End() int { return t.end }
+func (t *Token) Pos() int { return t.pos }
+func (t *Token) Block() string { return t.content[t.start : len(t.content)-t.end] }
+func (t *Token) Prefix() string { return t.content[:t.start] }
+func (t *Token) Value() any { return t.value }
+func (t *Token) IsBlock() bool { return t.kind == Block }
+func (t *Token) IsOperator() bool { return t.kind == Operator }
+func (t *Token) IsSeparator() bool { return t.kind == Separator }
func (t *Token) Name() string {
name := t.content
diff --git a/scanner/scan_test.go b/scanner/scan_test.go
index 6a54d8e..6be60a4 100644
--- a/scanner/scan_test.go
+++ b/scanner/scan_test.go
@@ -76,6 +76,7 @@ def"`, "[]", "1:1: block not terminated"},
}
for _, test := range tests {
+ test := test
t.Run("", func(t *testing.T) {
errStr := ""
token, err := GoScanner.Scan(test.src)