From 8080afb5a9e38116646d69155365fb6bd09ea40c Mon Sep 17 00:00:00 2001 From: Marc Vertes Date: Thu, 24 Aug 2023 10:59:39 +0200 Subject: fix: parser must be initialized before use --- lang/golang/go.go | 2 ++ parser/README.md | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++ parser/parse_test.go | 2 ++ parser/readme.md | 60 ---------------------------------------------------- scanner/README.md | 43 +++++++++++++++++++++++++++++++++++++ scanner/readme.md | 43 ------------------------------------- 6 files changed, 107 insertions(+), 103 deletions(-) create mode 100644 parser/README.md delete mode 100644 parser/readme.md create mode 100644 scanner/README.md delete mode 100644 scanner/readme.md diff --git a/lang/golang/go.go b/lang/golang/go.go index 1517689..604f24c 100644 --- a/lang/golang/go.go +++ b/lang/golang/go.go @@ -63,3 +63,5 @@ var GoParser = &parser.Parser{ `".."`: {parser.StringLit, 0, 0}, }, } + +func init() { GoParser.Init() } diff --git a/parser/README.md b/parser/README.md new file mode 100644 index 0000000..19d8778 --- /dev/null +++ b/parser/README.md @@ -0,0 +1,60 @@ +# Parser + +A parser takes an array of tokens (produced by the scanner) in input and +returns a node representing a syntax tree. A node is an object +containing a kind, the corresponding token and the ordered references to +descendent nodes. + +A goal is to make the parser generic enough so it can generate syntax +trees for most of existing programming languages (no claim of generality +yet), provided a small set of generating rules per language, and a small +set of validating rules (yet to be defined) to detect invalid +constructs. + +The input tokens are particular in the sense that they include classical +lexical items such as words, separators, numbers, but also strings and +nested blocks, which are resolved at scanning stage rather than parsing +stage. See the scanner for more details. + +The language specification includes the following: + +- a scanner specification, to produce the set of possible tokens. +- a map of node specification per token name. The node specification + defines some parameters influing how the tree is generated. + +## Development status + +A successful test must be provided to check the status. + +- [x] binary operator expressions +- [x] unary operator (prefix) expressions +- [ ] unary operator (suffix) expressions +- [x] operator precedence rules +- [x] parenthesis in expressions +- [ ] semi-colon automatic insertion rules +- [x] call expressions +- [ ] nested calls +- [x] index expressions +- [x] single assignments +- [ ] multi assignments +- [x] simple `if` statement (no `else`) +- [ ] full `if` statement (including `else`, `else if`) +- [x] init expressions in `if` statements +- [x] statement blocks +- [ ] comments +- [ ] for statement +- [ ] switch statement +- [ ] select statement +- [x] return statement +- [x] function declaration +- [ ] method declaration +- [ ] anonymous functions (closures) +- [ ] type declaration +- [ ] var, const, type single declaration +- [ ] var, const, type multi declaration +- [ ] type parametric expressions +- [x] literal numbers (see scanner) +- [x] literal strings +- [ ] composite literal +- [ ] import statements +- [ ] go.mod syntax diff --git a/parser/parse_test.go b/parser/parse_test.go index 838e1c8..d13f893 100644 --- a/parser/parse_test.go +++ b/parser/parse_test.go @@ -65,6 +65,8 @@ var GoParser = &Parser{ }, } +func init() { GoParser.Init() } + func TestParse(t *testing.T) { for _, test := range goTests { test := test diff --git a/parser/readme.md b/parser/readme.md deleted file mode 100644 index 19d8778..0000000 --- a/parser/readme.md +++ /dev/null @@ -1,60 +0,0 @@ -# Parser - -A parser takes an array of tokens (produced by the scanner) in input and -returns a node representing a syntax tree. A node is an object -containing a kind, the corresponding token and the ordered references to -descendent nodes. - -A goal is to make the parser generic enough so it can generate syntax -trees for most of existing programming languages (no claim of generality -yet), provided a small set of generating rules per language, and a small -set of validating rules (yet to be defined) to detect invalid -constructs. - -The input tokens are particular in the sense that they include classical -lexical items such as words, separators, numbers, but also strings and -nested blocks, which are resolved at scanning stage rather than parsing -stage. See the scanner for more details. - -The language specification includes the following: - -- a scanner specification, to produce the set of possible tokens. -- a map of node specification per token name. The node specification - defines some parameters influing how the tree is generated. - -## Development status - -A successful test must be provided to check the status. - -- [x] binary operator expressions -- [x] unary operator (prefix) expressions -- [ ] unary operator (suffix) expressions -- [x] operator precedence rules -- [x] parenthesis in expressions -- [ ] semi-colon automatic insertion rules -- [x] call expressions -- [ ] nested calls -- [x] index expressions -- [x] single assignments -- [ ] multi assignments -- [x] simple `if` statement (no `else`) -- [ ] full `if` statement (including `else`, `else if`) -- [x] init expressions in `if` statements -- [x] statement blocks -- [ ] comments -- [ ] for statement -- [ ] switch statement -- [ ] select statement -- [x] return statement -- [x] function declaration -- [ ] method declaration -- [ ] anonymous functions (closures) -- [ ] type declaration -- [ ] var, const, type single declaration -- [ ] var, const, type multi declaration -- [ ] type parametric expressions -- [x] literal numbers (see scanner) -- [x] literal strings -- [ ] composite literal -- [ ] import statements -- [ ] go.mod syntax diff --git a/scanner/README.md b/scanner/README.md new file mode 100644 index 0000000..c131a9f --- /dev/null +++ b/scanner/README.md @@ -0,0 +1,43 @@ +# Scanner + +A scanner takes a string in input and returns an array of tokens. + +Tokens can be of the following kinds: +- identifier +- number +- operator +- separator +- string +- block + +Resolving nested blocks in the scanner is making the parser simple +and generic, without having to resort to parse tables. + +The lexical rules are provided by a language specification at language +level which includes the following: + +- a set of composable properties (1 per bit, on an integer) for each + character in the ASCII range (where all separator, operators and + reserved keywords must be defined). +- for each block or string, the specification of starting and ending + delimiter. + +## Development status + +A successful test must be provided to check the status. + +- [x] numbers starting with a digit +- [ ] numbers starting otherwise +- [x] unescaped strings (including multiline) +- [x] escaped string (including multiline) +- [x] separators (in UTF-8 range) +- [x] single line string (\n not allowed) +- [x] identifiers (in UTF-8 range) +- [x] operators, concatenated or not +- [x] single character block/string delimiters +- [x] arbitrarly nested blocks and strings +- [x] multiple characters block/string delimiters +- [x] blocks delimited by operator characters +- [ ] blocks delimited by identifiers +- [x] blocks with delimiter inclusion/exclusion rules +- [ ] blocks delimited by indentation level (python, yaml, ...) diff --git a/scanner/readme.md b/scanner/readme.md deleted file mode 100644 index c131a9f..0000000 --- a/scanner/readme.md +++ /dev/null @@ -1,43 +0,0 @@ -# Scanner - -A scanner takes a string in input and returns an array of tokens. - -Tokens can be of the following kinds: -- identifier -- number -- operator -- separator -- string -- block - -Resolving nested blocks in the scanner is making the parser simple -and generic, without having to resort to parse tables. - -The lexical rules are provided by a language specification at language -level which includes the following: - -- a set of composable properties (1 per bit, on an integer) for each - character in the ASCII range (where all separator, operators and - reserved keywords must be defined). -- for each block or string, the specification of starting and ending - delimiter. - -## Development status - -A successful test must be provided to check the status. - -- [x] numbers starting with a digit -- [ ] numbers starting otherwise -- [x] unescaped strings (including multiline) -- [x] escaped string (including multiline) -- [x] separators (in UTF-8 range) -- [x] single line string (\n not allowed) -- [x] identifiers (in UTF-8 range) -- [x] operators, concatenated or not -- [x] single character block/string delimiters -- [x] arbitrarly nested blocks and strings -- [x] multiple characters block/string delimiters -- [x] blocks delimited by operator characters -- [ ] blocks delimited by identifiers -- [x] blocks with delimiter inclusion/exclusion rules -- [ ] blocks delimited by indentation level (python, yaml, ...) -- cgit v1.2.3