diff options
| author | Marc Vertes <mvertes@free.fr> | 2024-04-02 11:27:13 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-04-02 11:27:13 +0200 |
| commit | 1bff92c52b27b9a516599e172fe9852c3d99be38 (patch) | |
| tree | 26b30b5ec1a5537dcafd806d23e03a062475705d | |
| parent | 362f7c9c45598b429c92e67756f41b690043e0c4 (diff) | |
chore: add linters and some lint fixes (#8)
* chore: add linters and some lint fixes
Configure some golangci-lint linters to get the code quality right.
Apply the first fixes.
Next step will be to add github actions to run lint and tests in
github CI.
* chore: more lint, fixed comments and variable names. no semantic change.
* chore: add Makefile
This makefile is intended to be used as a local substitute to github
actions.
| -rw-r--r-- | .golangci.yaml | 10 | ||||
| -rw-r--r-- | Makefile | 11 | ||||
| -rw-r--r-- | lang/golang/go.go | 122 | ||||
| -rw-r--r-- | lang/spec.go | 15 | ||||
| -rw-r--r-- | lang/token.go | 64 | ||||
| -rw-r--r-- | lang/token_string.go (renamed from lang/tokenid_string.go) | 14 | ||||
| -rw-r--r-- | main.go | 11 | ||||
| -rw-r--r-- | parser/compiler.go | 18 | ||||
| -rw-r--r-- | parser/decl.go | 42 | ||||
| -rw-r--r-- | parser/expr.go | 68 | ||||
| -rw-r--r-- | parser/interpreter.go | 3 | ||||
| -rw-r--r-- | parser/parse.go | 161 | ||||
| -rw-r--r-- | parser/symbol.go | 1 | ||||
| -rw-r--r-- | parser/tokens.go | 21 | ||||
| -rw-r--r-- | parser/type.go | 33 | ||||
| -rw-r--r-- | scanner/scan.go | 92 | ||||
| -rw-r--r-- | vm/vm.go | 6 |
17 files changed, 386 insertions, 306 deletions
diff --git a/.golangci.yaml b/.golangci.yaml new file mode 100644 index 0000000..64e12a5 --- /dev/null +++ b/.golangci.yaml @@ -0,0 +1,10 @@ +linters: + enable: + - gocritic + - godot + - gofumpt + - gosec + - misspell + - predeclared + - reassign + - revive diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a55652e --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +# Static linting of source files. See .golangci.yaml for options. +lint: + golangci-lint run + +# Run tests with race detector, measure coverage. +test: + go test -race -covermode=atomic -coverprofile=cover.out ./... + +# Open coverage info in browser +cover: test + go tool cover -html=cover.out diff --git a/lang/golang/go.go b/lang/golang/go.go index 5b0ffa6..47baee2 100644 --- a/lang/golang/go.go +++ b/lang/golang/go.go @@ -1,7 +1,9 @@ +// Package golang provides the lexical specification of Go language. package golang import "github.com/mvertes/parscan/lang" +// GoSpec contains the lexical specification of Go. var GoSpec = &lang.Spec{ CharProp: [lang.ASCIILen]uint{ '\t': lang.CharSep, @@ -53,73 +55,73 @@ var GoSpec = &lang.Spec{ }, TokenProps: map[string]lang.TokenProp{ // Block tokens (can be nested) - "{..}": {TokenId: lang.BraceBlock}, - "[..]": {TokenId: lang.BracketBlock}, - "(..)": {TokenId: lang.ParenBlock}, + "{..}": {Token: lang.BraceBlock}, + "[..]": {Token: lang.BracketBlock}, + "(..)": {Token: lang.ParenBlock}, // String tokens (not nested) - "//..": {TokenId: lang.Comment}, - "/*..": {TokenId: lang.Comment}, - `".."`: {TokenId: lang.String}, - "`..`": {TokenId: lang.String}, + "//..": {Token: lang.Comment}, + "/*..": {Token: lang.Comment}, + `".."`: {Token: lang.String}, + "`..`": {Token: lang.String}, // Separators - ",": {TokenId: lang.Comma}, - ";": {TokenId: lang.Semicolon}, - ".": {TokenId: lang.Period}, - ":": {TokenId: lang.Colon}, + ",": {Token: lang.Comma}, + ";": {Token: lang.Semicolon}, + ".": {Token: lang.Period}, + ":": {Token: lang.Colon}, // Operators - "&": {TokenId: lang.And, Precedence: 1}, - "*": {TokenId: lang.Mul, Precedence: 1}, - "/": {TokenId: lang.Quo, Precedence: 1}, - "%": {TokenId: lang.Rem, Precedence: 1}, - "<<": {TokenId: lang.Shl, Precedence: 1}, - ">>": {TokenId: lang.Shr, Precedence: 1}, - "+": {TokenId: lang.Add, Precedence: 2}, - "-": {TokenId: lang.Sub, Precedence: 2}, - "=": {TokenId: lang.Assign, Precedence: 6}, - "+=": {TokenId: lang.AddAssign, Precedence: 6}, - "<": {TokenId: lang.Less, Precedence: 3}, - ">": {TokenId: lang.Greater, Precedence: 3}, - "^": {TokenId: lang.Xor, Precedence: 2}, - "~": {TokenId: lang.Tilde}, - "&&": {TokenId: lang.Land, Precedence: 4}, - "||": {TokenId: lang.Lor, Precedence: 5}, - ":=": {TokenId: lang.Define, Precedence: 6}, - "==": {TokenId: lang.Equal, Precedence: 3}, - "<=": {TokenId: lang.LessEqual, Precedence: 3}, - ">=": {TokenId: lang.GreaterEqual, Precedence: 3}, - "->": {TokenId: lang.Arrow}, - "!": {TokenId: lang.Not}, - "++": {TokenId: lang.Inc, SkipSemi: true}, - "--": {TokenId: lang.Dec, SkipSemi: true}, + "&": {Token: lang.And, Precedence: 1}, + "*": {Token: lang.Mul, Precedence: 1}, + "/": {Token: lang.Quo, Precedence: 1}, + "%": {Token: lang.Rem, Precedence: 1}, + "<<": {Token: lang.Shl, Precedence: 1}, + ">>": {Token: lang.Shr, Precedence: 1}, + "+": {Token: lang.Add, Precedence: 2}, + "-": {Token: lang.Sub, Precedence: 2}, + "=": {Token: lang.Assign, Precedence: 6}, + "+=": {Token: lang.AddAssign, Precedence: 6}, + "<": {Token: lang.Less, Precedence: 3}, + ">": {Token: lang.Greater, Precedence: 3}, + "^": {Token: lang.Xor, Precedence: 2}, + "~": {Token: lang.Tilde}, + "&&": {Token: lang.Land, Precedence: 4}, + "||": {Token: lang.Lor, Precedence: 5}, + ":=": {Token: lang.Define, Precedence: 6}, + "==": {Token: lang.Equal, Precedence: 3}, + "<=": {Token: lang.LessEqual, Precedence: 3}, + ">=": {Token: lang.GreaterEqual, Precedence: 3}, + "->": {Token: lang.Arrow}, + "!": {Token: lang.Not}, + "++": {Token: lang.Inc, SkipSemi: true}, + "--": {Token: lang.Dec, SkipSemi: true}, // Reserved keywords - "break": {TokenId: lang.Break}, - "case": {TokenId: lang.Case, SkipSemi: true}, - "chan": {TokenId: lang.Chan, SkipSemi: true}, - "const": {TokenId: lang.Const, SkipSemi: true}, - "continue": {TokenId: lang.Continue}, - "default": {TokenId: lang.Case, SkipSemi: true}, - "defer": {TokenId: lang.Defer, SkipSemi: true}, - "else": {TokenId: lang.Else, SkipSemi: true}, - "fallthrough": {TokenId: lang.Fallthrough}, - "for": {TokenId: lang.For, SkipSemi: true}, - "func": {TokenId: lang.Func, SkipSemi: true}, - "go": {TokenId: lang.Go, SkipSemi: true}, - "goto": {TokenId: lang.Goto, SkipSemi: true}, - "if": {TokenId: lang.If, SkipSemi: true}, - "import": {TokenId: lang.Import, SkipSemi: true}, - "interface": {TokenId: lang.Interface, SkipSemi: true}, - "map": {TokenId: lang.Map, SkipSemi: true}, - "package": {TokenId: lang.Package, SkipSemi: true}, - "range": {TokenId: lang.Range, SkipSemi: true}, - "return": {TokenId: lang.Return}, - "select": {TokenId: lang.Select, SkipSemi: true}, - "struct": {TokenId: lang.Struct, SkipSemi: true}, - "switch": {TokenId: lang.Switch, SkipSemi: true}, - "type": {TokenId: lang.Type, SkipSemi: true}, - "var": {TokenId: lang.Var, SkipSemi: true}, + "break": {Token: lang.Break}, + "case": {Token: lang.Case, SkipSemi: true}, + "chan": {Token: lang.Chan, SkipSemi: true}, + "const": {Token: lang.Const, SkipSemi: true}, + "continue": {Token: lang.Continue}, + "default": {Token: lang.Case, SkipSemi: true}, + "defer": {Token: lang.Defer, SkipSemi: true}, + "else": {Token: lang.Else, SkipSemi: true}, + "fallthrough": {Token: lang.Fallthrough}, + "for": {Token: lang.For, SkipSemi: true}, + "func": {Token: lang.Func, SkipSemi: true}, + "go": {Token: lang.Go, SkipSemi: true}, + "goto": {Token: lang.Goto, SkipSemi: true}, + "if": {Token: lang.If, SkipSemi: true}, + "import": {Token: lang.Import, SkipSemi: true}, + "interface": {Token: lang.Interface, SkipSemi: true}, + "map": {Token: lang.Map, SkipSemi: true}, + "package": {Token: lang.Package, SkipSemi: true}, + "range": {Token: lang.Range, SkipSemi: true}, + "return": {Token: lang.Return}, + "select": {Token: lang.Select, SkipSemi: true}, + "struct": {Token: lang.Struct, SkipSemi: true}, + "switch": {Token: lang.Switch, SkipSemi: true}, + "type": {Token: lang.Type, SkipSemi: true}, + "var": {Token: lang.Var, SkipSemi: true}, }, } diff --git a/lang/spec.go b/lang/spec.go index a910f70..92d90f7 100644 --- a/lang/spec.go +++ b/lang/spec.go @@ -1,5 +1,7 @@ +// Package lang provides tokens for possibly multiple languages. package lang +// Lexical properties of tokens to allow scanning. const ( CharIllegal = 1 << iota CharOp @@ -16,26 +18,29 @@ const ( EosValidEnd // end of input string terminates block or string token ) +// ASCIILen is the length of the ASCII characters set. const ASCIILen = 1 << 7 // 128 +// TokenProp represent token properties for parsing. type TokenProp struct { - TokenId + Token SkipSemi bool // automatic semicolon insertion after newline Precedence int // operator precedence } +// Spec represents the token specification for scanning. type Spec struct { CharProp [ASCIILen]uint // special Character properties End map[string]string // end delimiters, indexed by start BlockProp map[string]uint // block properties TokenProps map[string]TokenProp // token properties DotNum bool // true if a number can start with '.' - IdAscii bool // true if an identifier can be in ASCII only - Num_ bool // true if a number can contain _ character + IdentASCII bool // true if an identifier can be in ASCII only + NumUnder bool // true if a number can contain _ character } -// HasInit stores if a statement may contain a simple init statement -var HasInit = map[TokenId]bool{ +// HasInit stores if a statement may contain a simple init statement. +var HasInit = map[Token]bool{ Case: true, For: true, If: true, diff --git a/lang/token.go b/lang/token.go index 613f2c6..7ad7bf1 100644 --- a/lang/token.go +++ b/lang/token.go @@ -1,23 +1,25 @@ package lang -//go:generate stringer -type=TokenId +//go:generate stringer -type=Token -type TokenId int +// Token represents a lexical token. +type Token int +// All known tokens for the set of supported languages. const ( - Illegal TokenId = iota + Illegal Token = iota Comment Ident - // Literal values + // Literal values. Char Float Imag Int String - // Binary operators (except indicated) - // Arithmetic and bitwise binary operators + // Binary operators (except indicated). + // Arithmetic and bitwise binary operators. Add // + Sub // - Mul // * @@ -31,7 +33,7 @@ const ( AndNot // &^ Period // . - // Binary operators returning a boolean + // Binary operators returning a boolean. Equal // == Greater // > GreaterEqual // >= @@ -41,7 +43,7 @@ const ( Lor // || NotEqual // != - // Assigment operators (arithmetic and bitwise) + // Assigment operators (arithmetic and bitwise). Define // := Assign // = AddAssign // += @@ -58,7 +60,7 @@ const ( Inc // ++ Dec // -- - // Unary operations + // Unary operations. Plus // unary + Minus // unary - Addr // unary & @@ -69,17 +71,17 @@ const ( Not // unary ! Tilde // unary ~ (underlying type) - // Separators (punctuation) + // Separators (punctuation). Comma // , Semicolon // ; Colon // : - // Block tokens + // Block tokens. ParenBlock // (..) BracketBlock // [..] BraceBlock // {..} - // Reserved keywords + // Reserved keywords. Break Case Chan @@ -106,7 +108,7 @@ const ( Type Var - // Internal virtual machine tokens (no corresponding keyword) + // Internal virtual machine tokens (no corresponding keyword). Call CallX EqualSet @@ -119,8 +121,9 @@ const ( New ) -// TODO: define UnaryOp per language -var UnaryOp = map[TokenId]TokenId{ +// UnaryOp contains the set of unary operators. +// TODO: define UnaryOp per language. +var UnaryOp = map[Token]Token{ Add: Plus, // + And: Addr, // & Not: Not, // ! @@ -130,11 +133,26 @@ var UnaryOp = map[TokenId]TokenId{ Xor: BitComp, // ^ } -func (t TokenId) IsKeyword() bool { return t >= Break && t <= Var } -func (t TokenId) IsLiteral() bool { return t >= Char && t <= String } -func (t TokenId) IsOperator() bool { return t >= Add && t <= Tilde } -func (t TokenId) IsBlock() bool { return t >= ParenBlock && t <= BraceBlock } -func (t TokenId) IsBoolOp() bool { return t >= Equal && t <= NotEqual || t == Not } -func (t TokenId) IsBinaryOp() bool { return t >= Add && t <= NotEqual } -func (t TokenId) IsUnaryOp() bool { return t >= Plus && t <= Tilde } -func (t TokenId) IsLogicalOp() bool { return t == Land || t == Lor } +// IsKeyword returns true if t is a keyword. +func (t Token) IsKeyword() bool { return t >= Break && t <= Var } + +// IsLiteral returns true if t is a literal value. +func (t Token) IsLiteral() bool { return t >= Char && t <= String } + +// IsOperator returns true if t is an operator. +func (t Token) IsOperator() bool { return t >= Add && t <= Tilde } + +// IsBlock returns true if t is a block kind of token. +func (t Token) IsBlock() bool { return t >= ParenBlock && t <= BraceBlock } + +// IsBoolOp returns true if t is boolean operator. +func (t Token) IsBoolOp() bool { return t >= Equal && t <= NotEqual || t == Not } + +// IsBinaryOp returns true if t is a binary operator (takes 2 operands). +func (t Token) IsBinaryOp() bool { return t >= Add && t <= NotEqual } + +// IsUnaryOp returns true if t is an unary operator (takes 1 operand). +func (t Token) IsUnaryOp() bool { return t >= Plus && t <= Tilde } + +// IsLogicalOp returns true if t is a logical operator. +func (t Token) IsLogicalOp() bool { return t == Land || t == Lor } diff --git a/lang/tokenid_string.go b/lang/token_string.go index 705edc6..6b19fca 100644 --- a/lang/tokenid_string.go +++ b/lang/token_string.go @@ -1,4 +1,4 @@ -// Code generated by "stringer -type=TokenId"; DO NOT EDIT. +// Code generated by "stringer -type=Token"; DO NOT EDIT. package lang @@ -103,13 +103,13 @@ func _() { _ = x[New-92] } -const _TokenId_name = "IllegalCommentIdentCharFloatImagIntStringAddSubMulQuoRemAndOrXorShlShrAndNotPeriodEqualGreaterGreaterEqualLandLessLessEqualLorNotEqualDefineAssignAddAssignSubAssignMulAssignQuoAssignRemAssignAndAssignOrAssignXorAssignShlAssignShrAssignAndNotAssignIncDecPlusMinusAddrDerefBitCompArrowEllipsisNotTildeCommaSemicolonColonParenBlockBracketBlockBraceBlockBreakCaseChanConstContinueDefaultDeferElseFallthroughForFuncGoGotoIfImportInterfaceMapPackageRangeReturnSelectStructSwitchTypeVarCallCallXEqualSetGrowIndexJumpFalseJumpSetFalseJumpSetTrueLabelNew" +const _Token_name = "IllegalCommentIdentCharFloatImagIntStringAddSubMulQuoRemAndOrXorShlShrAndNotPeriodEqualGreaterGreaterEqualLandLessLessEqualLorNotEqualDefineAssignAddAssignSubAssignMulAssignQuoAssignRemAssignAndAssignOrAssignXorAssignShlAssignShrAssignAndNotAssignIncDecPlusMinusAddrDerefBitCompArrowEllipsisNotTildeCommaSemicolonColonParenBlockBracketBlockBraceBlockBreakCaseChanConstContinueDefaultDeferElseFallthroughForFuncGoGotoIfImportInterfaceMapPackageRangeReturnSelectStructSwitchTypeVarCallCallXEqualSetGrowIndexJumpFalseJumpSetFalseJumpSetTrueLabelNew" -var _TokenId_index = [...]uint16{0, 7, 14, 19, 23, 28, 32, 35, 41, 44, 47, 50, 53, 56, 59, 61, 64, 67, 70, 76, 82, 87, 94, 106, 110, 114, 123, 126, 134, 140, 146, 155, 164, 173, 182, 191, 200, 208, 217, 226, 235, 247, 250, 253, 257, 262, 266, 271, 278, 283, 291, 294, 299, 304, 313, 318, 328, 340, 350, 355, 359, 363, 368, 376, 383, 388, 392, 403, 406, 410, 412, 416, 418, 424, 433, 436, 443, 448, 454, 460, 466, 472, 476, 479, 483, 488, 496, 500, 505, 514, 526, 537, 542, 545} +var _Token_index = [...]uint16{0, 7, 14, 19, 23, 28, 32, 35, 41, 44, 47, 50, 53, 56, 59, 61, 64, 67, 70, 76, 82, 87, 94, 106, 110, 114, 123, 126, 134, 140, 146, 155, 164, 173, 182, 191, 200, 208, 217, 226, 235, 247, 250, 253, 257, 262, 266, 271, 278, 283, 291, 294, 299, 304, 313, 318, 328, 340, 350, 355, 359, 363, 368, 376, 383, 388, 392, 403, 406, 410, 412, 416, 418, 424, 433, 436, 443, 448, 454, 460, 466, 472, 476, 479, 483, 488, 496, 500, 505, 514, 526, 537, 542, 545} -func (i TokenId) String() string { - if i < 0 || i >= TokenId(len(_TokenId_index)-1) { - return "TokenId(" + strconv.FormatInt(int64(i), 10) + ")" +func (i Token) String() string { + if i < 0 || i >= Token(len(_Token_index)-1) { + return "Token(" + strconv.FormatInt(int64(i), 10) + ")" } - return _TokenId_name[_TokenId_index[i]:_TokenId_index[i+1]] + return _Token_name[_Token_index[i]:_Token_index[i+1]] } @@ -44,14 +44,15 @@ func repl(interp Interpreter, in io.Reader) (err error) { for liner.Scan() { text += liner.Text() res, err := interp.Eval(text + "\n") - if err == nil { + switch { + case err == nil: if !res.IsNil() { fmt.Println(": ", res) } text, prompt = "", "> " - } else if errors.Is(err, scanner.ErrBlock) { + case errors.Is(err, scanner.ErrBlock): prompt = ">> " - } else { + default: fmt.Println("Error:", err) text, prompt = "", "> " } @@ -64,8 +65,8 @@ func run(arg []string) (err error) { rflag := flag.NewFlagSet("run", flag.ContinueOnError) rflag.Usage = func() { fmt.Println("Usage: parscan run [options] [path] [args]") - //fmt.Println("Options:") - //rflag.PrintDefaults() + // fmt.Println("Options:") + // rflag.PrintDefaults() } if err = rflag.Parse(arg); err != nil { return err diff --git a/parser/compiler.go b/parser/compiler.go index 57e176f..7a90597 100644 --- a/parser/compiler.go +++ b/parser/compiler.go @@ -12,6 +12,7 @@ import ( "github.com/mvertes/parscan/vm" ) +// Compiler represents the state of a compiler. type Compiler struct { *Parser vm.Code // produced code, to fill VM with @@ -21,6 +22,7 @@ type Compiler struct { strings map[string]int // locations of strings in Data } +// NewCompiler returns a new compiler state for a given scanner. func NewCompiler(scanner *scanner.Scanner) *Compiler { return &Compiler{ Parser: &Parser{Scanner: scanner, symbols: initUniverse(), framelen: map[string]int{}, labelCount: map[string]int{}}, @@ -29,6 +31,7 @@ func NewCompiler(scanner *scanner.Scanner) *Compiler { } } +// AddSym adds a new named value to the compiler symbol table, and returns its index in memory. func (c *Compiler) AddSym(name string, value vm.Value) int { p := len(c.Data) c.Data = append(c.Data, value) @@ -36,6 +39,7 @@ func (c *Compiler) AddSym(name string, value vm.Value) int { return p } +// Codegen generates vm code from parsed tokens. func (c *Compiler) Codegen(tokens Tokens) (err error) { log.Println("Codegen tokens:", tokens) fixList := Tokens{} // list of tokens to fix after we gathered all necessary information @@ -46,7 +50,7 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { pop := func() *symbol { l := len(stack) - 1; s := stack[l]; stack = stack[:l]; return s } for i, t := range tokens { - switch t.Id { + switch t.Tok { case lang.Int: n, err := strconv.Atoi(t.Str) if err != nil { @@ -145,7 +149,7 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { case lang.Assign: st := tokens[i-1] - if st.Id == lang.Period || st.Id == lang.Index { + if st.Tok == lang.Period || st.Tok == lang.Index { emit(int64(t.Pos), vm.Vassign) break } @@ -182,7 +186,7 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { case lang.Ident: if i < len(tokens)-1 { - switch t1 := tokens[i+1]; t1.Id { + switch t1 := tokens[i+1]; t1.Tok { case lang.Define, lang.Assign, lang.Colon: continue } @@ -317,9 +321,10 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { return err } -func arithmeticOpType(s1, s2 *symbol) *vm.Type { return symtype(s1) } -func booleanOpType(s1, s2 *symbol) *vm.Type { return vm.TypeOf(true) } +func arithmeticOpType(s1, _ *symbol) *vm.Type { return symtype(s1) } +func booleanOpType(_, _ *symbol) *vm.Type { return vm.TypeOf(true) } +// PrintCode pretty prints the generated code in compiler. func (c *Compiler) PrintCode() { labels := map[int][]string{} // labels indexed by code location data := map[int]string{} // data indexed by frame location @@ -379,6 +384,7 @@ func (e entry) String() string { return e.name } +// PrintData pretty prints the generated global data symbols in compiler. func (c *Compiler) PrintData() { dict := c.symbolsByIndex() @@ -400,10 +406,12 @@ func (c *Compiler) symbolsByIndex() map[int]entry { return dict } +// Dump represents the state of a data dump. type Dump struct { Values []*DumpValue } +// DumpValue is a value of a dump state. type DumpValue struct { Index int Name string diff --git a/parser/decl.go b/parser/decl.go index 7638b75..b1cd13b 100644 --- a/parser/decl.go +++ b/parser/decl.go @@ -15,11 +15,11 @@ import ( var nilValue = vm.ValueOf(nil) -func (p *Parser) ParseConst(in Tokens) (out Tokens, err error) { +func (p *Parser) parseConst(in Tokens) (out Tokens, err error) { if len(in) < 2 { return out, errors.New("missing expression") } - if in[1].Id != lang.ParenBlock { + if in[1].Tok != lang.ParenBlock { return p.parseConstLine(in[1:]) } if in, err = p.Scan(in[1].Block(), false); err != nil { @@ -53,7 +53,7 @@ func (p *Parser) parseConstLine(in Tokens) (out Tokens, err error) { } var vars []string if _, vars, err = p.parseParamTypes(decl, parseTypeVar); err != nil { - if errors.Is(err, MissingTypeErr) { + if errors.Is(err, ErrMissingType) { for _, lt := range decl.Split(lang.Comma) { vars = append(vars, lt[0].Str) name := strings.TrimPrefix(p.scope+"/"+lt[0].Str, "/") @@ -68,7 +68,7 @@ func (p *Parser) parseConstLine(in Tokens) (out Tokens, err error) { values = nil } for i, v := range values { - if v, err = p.ParseExpr(v); err != nil { + if v, err = p.parseExpr(v); err != nil { return out, err } cval, _, err := p.evalConstExpr(v) @@ -95,7 +95,7 @@ func (p *Parser) evalConstExpr(in Tokens) (cval constant.Value, length int, err return nil, 0, errors.New("missing argument") } t := in[l] - id := t.Id + id := t.Tok switch { case id.IsBinaryOp(): op1, l1, err := p.evalConstExpr(in[:l]) @@ -163,7 +163,7 @@ func constValue(c constant.Value) any { return nil } -var gotok = map[lang.TokenId]token.Token{ +var gotok = map[lang.Token]token.Token{ lang.Char: token.CHAR, lang.Imag: token.IMAG, lang.Int: token.INT, @@ -191,14 +191,14 @@ var gotok = map[lang.TokenId]token.Token{ lang.Not: token.NOT, } -func (p *Parser) ParseImport(in Tokens) (out Tokens, err error) { +func (p *Parser) parseImports(in Tokens) (out Tokens, err error) { if p.fname != "" { return out, errors.New("unexpected import") } if len(in) < 2 { return out, errors.New("missing expression") } - if in[1].Id != lang.ParenBlock { + if in[1].Tok != lang.ParenBlock { return p.parseImportLine(in[1:]) } if in, err = p.Scan(in[1].Block(), false); err != nil { @@ -219,7 +219,7 @@ func (p *Parser) parseImportLine(in Tokens) (out Tokens, err error) { if l != 1 && l != 2 { return out, errors.New("invalid number of arguments") } - if in[l-1].Id != lang.String { + if in[l-1].Tok != lang.String { return out, fmt.Errorf("invalid argument %v", in[0]) } pp := in[l-1].Block() @@ -248,11 +248,11 @@ func (p *Parser) parseImportLine(in Tokens) (out Tokens, err error) { return out, err } -func (p *Parser) ParseType(in Tokens) (out Tokens, err error) { +func (p *Parser) parseType(in Tokens) (out Tokens, err error) { if len(in) < 2 { - return out, MissingTypeErr + return out, ErrMissingType } - if in[1].Id != lang.ParenBlock { + if in[1].Tok != lang.ParenBlock { return p.parseTypeLine(in[1:]) } if in, err = p.Scan(in[1].Block(), false); err != nil { @@ -270,12 +270,12 @@ func (p *Parser) ParseType(in Tokens) (out Tokens, err error) { func (p *Parser) parseTypeLine(in Tokens) (out Tokens, err error) { if len(in) < 2 { - return out, MissingTypeErr + return out, ErrMissingType } - if in[0].Id != lang.Ident { + if in[0].Tok != lang.Ident { return out, errors.New("not an ident") } - isAlias := in[1].Id == lang.Assign + isAlias := in[1].Tok == lang.Assign toks := in[1:] if isAlias { toks = toks[1:] @@ -288,11 +288,11 @@ func (p *Parser) parseTypeLine(in Tokens) (out Tokens, err error) { return out, err } -func (p *Parser) ParseVar(in Tokens) (out Tokens, err error) { +func (p *Parser) parseVar(in Tokens) (out Tokens, err error) { if len(in) < 2 { return out, errors.New("missing expression") } - if in[1].Id != lang.ParenBlock { + if in[1].Tok != lang.ParenBlock { return p.parseVarLine(in[1:]) } if in, err = p.Scan(in[1].Block(), false); err != nil { @@ -316,7 +316,7 @@ func (p *Parser) parseVarLine(in Tokens) (out Tokens, err error) { } var vars []string if _, vars, err = p.parseParamTypes(decl, parseTypeVar); err != nil { - if errors.Is(err, MissingTypeErr) { + if errors.Is(err, ErrMissingType) { for _, lt := range decl.Split(lang.Comma) { vars = append(vars, lt[0].Str) name := strings.TrimPrefix(p.scope+"/"+lt[0].Str, "/") @@ -336,13 +336,13 @@ func (p *Parser) parseVarLine(in Tokens) (out Tokens, err error) { values = nil } for i, v := range values { - if v, err = p.ParseExpr(v); err != nil { + if v, err = p.parseExpr(v); err != nil { return out, err } out = append(out, v...) out = append(out, - scanner.Token{Id: lang.Ident, Str: vars[i]}, - scanner.Token{Id: lang.Assign}) + scanner.Token{Tok: lang.Ident, Str: vars[i]}, + scanner.Token{Tok: lang.Assign}) } return out, err } diff --git a/parser/expr.go b/parser/expr.go index 4145240..9e96e42 100644 --- a/parser/expr.go +++ b/parser/expr.go @@ -9,33 +9,33 @@ import ( "github.com/mvertes/parscan/scanner" ) -func (p *Parser) ParseExpr(in Tokens) (out Tokens, err error) { +func (p *Parser) parseExpr(in Tokens) (out Tokens, err error) { log.Println("ParseExpr in:", in) var ops, selectors Tokens var vl int - var selectorId string + var selectorIndex string // // Process tokens from last to first, the goal is to reorder the tokens in // a stack machine processing order, so it can be directly interpreted. // - if len(in) > 1 && in[0].Id == lang.Func { + if len(in) > 1 && in[0].Tok == lang.Func { // Function as value (i.e closure). - if out, err = p.ParseFunc(in); err != nil { + if out, err = p.parseFunc(in); err != nil { return out, err } // Get function label and use it as a symbol ident. fid := out[1] - fid.Id = lang.Ident + fid.Tok = lang.Ident out = append(out, fid) return out, err } for i := len(in) - 1; i >= 0; i-- { t := in[i] // temporary assumptions: binary operators, returning 1 value - switch t.Id { + switch t.Tok { case lang.Ident: - if i > 0 && in[i-1].Id == lang.Period { - selectorId = t.Str + if i > 0 && in[i-1].Tok == lang.Period { + selectorIndex = t.Str continue } // resolve symbol if not a selector rhs. @@ -48,16 +48,16 @@ func (p *Parser) ParseExpr(in Tokens) (out Tokens, err error) { out = append(out, t) vl++ case lang.Period: - t.Str += selectorId + t.Str += selectorIndex selectors = append(Tokens{t}, selectors...) continue case lang.Int, lang.String: out = append(out, t) vl++ case lang.Define, lang.Add, lang.Sub, lang.Assign, lang.Equal, lang.Greater, lang.Less, lang.Mul, lang.Land, lang.Lor, lang.Shl, lang.Shr, lang.Not, lang.And: - if i == 0 || in[i-1].Id.IsOperator() { + if i == 0 || in[i-1].Tok.IsOperator() { // An operator preceded by an operator or no token is unary. - t.Id = lang.UnaryOp[t.Id] + t.Tok = lang.UnaryOp[t.Tok] j := len(out) - 1 l := out[j] if p.precedence(l) > 0 { @@ -73,7 +73,7 @@ func (p *Parser) ParseExpr(in Tokens) (out Tokens, err error) { case lang.ParenBlock: // If the previous token is an arithmetic, logic or assign operator then // this parenthesis block is an enclosed expr, otherwise a call expr. - if i == 0 || in[i-1].Id.IsOperator() { + if i == 0 || in[i-1].Tok.IsOperator() { out = append(out, t) vl++ break @@ -83,15 +83,15 @@ func (p *Parser) ParseExpr(in Tokens) (out Tokens, err error) { // func call: push args and func address then call out = append(out, t) vl++ - ops = append(ops, scanner.Token{Id: lang.Call, Pos: t.Pos, Beg: p.numItems(t.Block(), lang.Comma)}) + ops = append(ops, scanner.Token{Tok: lang.Call, Pos: t.Pos, Beg: p.numItems(t.Block(), lang.Comma)}) case lang.BracketBlock: out = append(out, t) vl++ - ops = append(ops, scanner.Token{Id: lang.Index, Pos: t.Pos}) + ops = append(ops, scanner.Token{Tok: lang.Index, Pos: t.Pos}) case lang.Comment: return out, nil default: - return nil, fmt.Errorf("expression not supported yet: %v: %q", t.Id, t.Str) + return nil, fmt.Errorf("expression not supported yet: %v: %q", t.Tok, t.Str) } if len(selectors) > 0 { out = append(out, selectors...) @@ -115,13 +115,13 @@ func (p *Parser) ParseExpr(in Tokens) (out Tokens, err error) { log.Println("ParseExpr out:", out, "vl:", vl, "ops:", ops) // A logical operator (&&, ||) involves additional control flow operations. - if out, err = p.ParseLogical(out); err != nil { + if out, err = p.parseLogical(out); err != nil { return out, err } - if l := len(out) - 1; l >= 0 && (out[l].Id == lang.Define || out[l].Id == lang.Assign) { + if l := len(out) - 1; l >= 0 && (out[l].Tok == lang.Define || out[l].Tok == lang.Assign) { // Handle the assignment of a logical expression. s1 := p.subExprLen(out[:l]) - head, err := p.ParseLogical(out[:l-s1]) + head, err := p.parseLogical(out[:l-s1]) if err != nil { return out, err } @@ -132,9 +132,9 @@ func (p *Parser) ParseExpr(in Tokens) (out Tokens, err error) { for i := len(out) - 1; i >= 0; i-- { t := out[i] var toks Tokens - switch t.Id { + switch t.Tok { case lang.ParenBlock, lang.BracketBlock: - if toks, err = p.ParseExprStr(t.Block()); err != nil { + if toks, err = p.parseExprStr(t.Block()); err != nil { return out, err } default: @@ -151,13 +151,13 @@ func (p *Parser) ParseExpr(in Tokens) (out Tokens, err error) { return out, err } -func (p *Parser) ParseExprStr(s string) (tokens Tokens, err error) { +func (p *Parser) parseExprStr(s string) (tokens Tokens, err error) { if tokens, err = p.Scan(s, false); err != nil { return } var result Tokens for _, sub := range tokens.Split(lang.Comma) { - toks, err := p.ParseExpr(sub) + toks, err := p.parseExpr(sub) if err != nil { return result, err } @@ -166,34 +166,34 @@ func (p *Parser) ParseExprStr(s string) (tokens Tokens, err error) { return result, err } -// ParseLogical handles logical expressions with control flow (&& and ||) by +// parseLogical handles logical expressions with control flow (&& and ||) by // ensuring the left hand side is evaluated unconditionally first, then the // right hand side can be skipped or not by inserting a conditional jump and label. // If the last token is not a logical operator then the function is idempotent. -func (p *Parser) ParseLogical(in Tokens) (out Tokens, err error) { +func (p *Parser) parseLogical(in Tokens) (out Tokens, err error) { l := len(in) - 1 - if l < 0 || !in[l].Id.IsLogicalOp() { + if l < 0 || !in[l].Tok.IsLogicalOp() { return in, nil } xp := strconv.Itoa(p.labelCount[p.scope]) p.labelCount[p.scope]++ rhsIndex := p.subExprLen(in[:l]) - lhs, err := p.ParseLogical(in[l-rhsIndex : l]) + lhs, err := p.parseLogical(in[l-rhsIndex : l]) if err != nil { return out, err } - rhs, err := p.ParseLogical(in[:l-rhsIndex]) + rhs, err := p.parseLogical(in[:l-rhsIndex]) if err != nil { return out, err } out = append(out, lhs...) - if in[l].Id == lang.Lor { - out = append(out, scanner.Token{Id: lang.JumpSetTrue, Str: p.scope + "x" + xp}) + if in[l].Tok == lang.Lor { + out = append(out, scanner.Token{Tok: lang.JumpSetTrue, Str: p.scope + "x" + xp}) } else { - out = append(out, scanner.Token{Id: lang.JumpSetFalse, Str: p.scope + "x" + xp}) + out = append(out, scanner.Token{Tok: lang.JumpSetFalse, Str: p.scope + "x" + xp}) } out = append(out, rhs...) - out = append(out, scanner.Token{Id: lang.Label, Str: p.scope + "x" + xp}) + out = append(out, scanner.Token{Tok: lang.Label, Str: p.scope + "x" + xp}) return out, err } @@ -201,7 +201,7 @@ func (p *Parser) ParseLogical(in Tokens) (out Tokens, err error) { func (p *Parser) subExprLen(in Tokens) int { l := len(in) - 1 last := in[l] - switch last.Id { + switch last.Tok { case lang.Int, lang.Float, lang.String, lang.Char, lang.Ident, lang.ParenBlock, lang.BracketBlock: return 1 case lang.Call: @@ -209,11 +209,11 @@ func (p *Parser) subExprLen(in Tokens) int { return 1 + s1 + p.subExprLen(in[:l-s1]) // TODO: add selector and index operators when ready } - if last.Id.IsBinaryOp() { + if last.Tok.IsBinaryOp() { s1 := p.subExprLen(in[:l]) return 1 + s1 + p.subExprLen(in[:l-s1]) } - if last.Id.IsUnaryOp() { + if last.Tok.IsUnaryOp() { return 1 + p.subExprLen(in[:l]) } return 0 // should not occur. TODO: diplay some error here. diff --git a/parser/interpreter.go b/parser/interpreter.go index d820416..e6ac95c 100644 --- a/parser/interpreter.go +++ b/parser/interpreter.go @@ -9,15 +9,18 @@ import ( const debug = true +// Interpreter represents the state of an interpreter. type Interpreter struct { *Compiler *vm.Machine } +// NewInterpreter returns a new interpreter state. func NewInterpreter(s *scanner.Scanner) *Interpreter { return &Interpreter{NewCompiler(s), &vm.Machine{}} } +// Eval interprets a src program and return the last produced value if any, or an error. func (i *Interpreter) Eval(src string) (res reflect.Value, err error) { codeOffset := len(i.Code) dataOffset := 0 diff --git a/parser/parse.go b/parser/parse.go index ffcb8e2..bd19d81 100644 --- a/parser/parse.go +++ b/parser/parse.go @@ -1,3 +1,4 @@ +// Package parser implements a parser and compiler. package parser import ( @@ -11,6 +12,7 @@ import ( "github.com/mvertes/parscan/scanner" ) +// Parser represents the state of a parser. type Parser struct { *scanner.Scanner @@ -27,28 +29,30 @@ type Parser struct { clonum int // closure instance number } +// Scan performs lexical analysis on s and returns Tokens or an error. func (p *Parser) Scan(s string, endSemi bool) (Tokens, error) { return p.Scanner.Scan(s, endSemi) } +// Parse performs syntax analysis on s and return Tokens or an error. func (p *Parser) Parse(src string) (out Tokens, err error) { log.Printf("Parse src: %#v\n", src) in, err := p.Scan(src, true) if err != nil { return out, err } - return p.ParseStmts(in) + return p.parseStmts(in) } -func (p *Parser) ParseStmts(in Tokens) (out Tokens, err error) { +func (p *Parser) parseStmts(in Tokens) (out Tokens, err error) { for len(in) > 0 { endstmt := in.Index(lang.Semicolon) if endstmt == -1 { return out, scanner.ErrBlock } // Skip over simple init statements for some tokens (if, for, ...) - if lang.HasInit[in[0].Id] { - for in[endstmt-1].Id != lang.BraceBlock { + if lang.HasInit[in[0].Tok] { + for in[endstmt-1].Tok != lang.BraceBlock { e2 := in[endstmt+1:].Index(lang.Semicolon) if e2 == -1 { return out, scanner.ErrBlock @@ -56,7 +60,7 @@ func (p *Parser) ParseStmts(in Tokens) (out Tokens, err error) { endstmt += 1 + e2 } } - o, err := p.ParseStmt(in[:endstmt]) + o, err := p.parseStmt(in[:endstmt]) if err != nil { return out, err } @@ -66,58 +70,58 @@ func (p *Parser) ParseStmts(in Tokens) (out Tokens, err error) { return out, err } -func (p *Parser) ParseStmt(in Tokens) (out Tokens, err error) { +func (p *Parser) parseStmt(in Tokens) (out Tokens, err error) { if len(in) == 0 { return nil, nil } log.Println("ParseStmt in:", in) - switch t := in[0]; t.Id { + switch t := in[0]; t.Tok { case lang.Break: - return p.ParseBreak(in) + return p.parseBreak(in) case lang.Continue: - return p.ParseContinue(in) + return p.parseContinue(in) case lang.Const: - return p.ParseConst(in) + return p.parseConst(in) case lang.For: - return p.ParseFor(in) + return p.parseFor(in) case lang.Func: - return p.ParseFunc(in) + return p.parseFunc(in) case lang.Defer, lang.Go, lang.Fallthrough, lang.Select: - return out, fmt.Errorf("not yet implemented: %v", t.Id) + return out, fmt.Errorf("not yet implemented: %v", t.Tok) case lang.Goto: - return p.ParseGoto(in) + return p.parseGoto(in) case lang.If: - return p.ParseIf(in) + return p.parseIf(in) case lang.Import: - return p.ParseImport(in) + return p.parseImports(in) case lang.Package: // TODO: support packages return out, err case lang.Return: - return p.ParseReturn(in) + return p.parseReturn(in) case lang.Switch: - return p.ParseSwitch(in) + return p.parseSwitch(in) case lang.Type: - return p.ParseType(in) + return p.parseType(in) case lang.Var: - return p.ParseVar(in) + return p.parseVar(in) case lang.Ident: - if len(in) == 2 && in[1].Id == lang.Colon { - return p.ParseLabel(in) + if len(in) == 2 && in[1].Tok == lang.Colon { + return p.parseLabel(in) } fallthrough default: - return p.ParseExpr(in) + return p.parseExpr(in) } } -func (p *Parser) ParseBreak(in Tokens) (out Tokens, err error) { +func (p *Parser) parseBreak(in Tokens) (out Tokens, err error) { var label string switch len(in) { case 1: label = p.breakLabel case 2: - if in[1].Id != lang.Ident { + if in[1].Tok != lang.Ident { return nil, fmt.Errorf("invalid break statement") } // TODO: check validity of user provided label @@ -125,17 +129,17 @@ func (p *Parser) ParseBreak(in Tokens) (out Tokens, err error) { default: return nil, fmt.Errorf("invalid break statement") } - out = Tokens{{Id: lang.Goto, Str: label}} + out = Tokens{{Tok: lang.Goto, Str: label}} return out, err } -func (p *Parser) ParseContinue(in Tokens) (out Tokens, err error) { +func (p *Parser) parseContinue(in Tokens) (out Tokens, err error) { var label string switch len(in) { case 1: label = p.continueLabel case 2: - if in[1].Id != lang.Ident { + if in[1].Tok != lang.Ident { return nil, fmt.Errorf("invalid continue statement") } // TODO: check validity of user provided label @@ -143,19 +147,19 @@ func (p *Parser) ParseContinue(in Tokens) (out Tokens, err error) { default: return nil, fmt.Errorf("invalid continue statement") } - out = Tokens{{Id: lang.Goto, Str: label}} + out = Tokens{{Tok: lang.Goto, Str: label}} return out, err } -func (p *Parser) ParseGoto(in Tokens) (out Tokens, err error) { - if len(in) != 2 || in[1].Id != lang.Ident { +func (p *Parser) parseGoto(in Tokens) (out Tokens, err error) { + if len(in) != 2 || in[1].Tok != lang.Ident { return nil, fmt.Errorf("invalid goto statement") } // TODO: check validity of user provided label - return Tokens{{Id: lang.Goto, Str: p.funcScope + "/" + in[1].Str}}, nil + return Tokens{{Tok: lang.Goto, Str: p.funcScope + "/" + in[1].Str}}, nil } -func (p *Parser) ParseFor(in Tokens) (out Tokens, err error) { +func (p *Parser) parseFor(in Tokens) (out Tokens, err error) { // TODO: detect invalid code. fc := strconv.Itoa(p.labelCount[p.scope]) p.labelCount[p.scope]++ @@ -177,42 +181,42 @@ func (p *Parser) ParseFor(in Tokens) (out Tokens, err error) { p.popScope() }() if len(init) > 0 { - if init, err = p.ParseStmt(init); err != nil { + if init, err = p.parseStmt(init); err != nil { return nil, err } out = init } - out = append(out, scanner.Token{Id: lang.Label, Str: p.scope + "b"}) + out = append(out, scanner.Token{Tok: lang.Label, Str: p.scope + "b"}) if len(cond) > 0 { - if cond, err = p.ParseExpr(cond); err != nil { + if cond, err = p.parseExpr(cond); err != nil { return nil, err } out = append(out, cond...) - out = append(out, scanner.Token{Id: lang.JumpFalse, Str: p.scope + "e"}) + out = append(out, scanner.Token{Tok: lang.JumpFalse, Str: p.scope + "e"}) } if body, err = p.Parse(in[len(in)-1].Block()); err != nil { return nil, err } out = append(out, body...) if len(post) > 0 { - if post, err = p.ParseStmt(post); err != nil { + if post, err = p.parseStmt(post); err != nil { return nil, err } out = append(out, post...) } out = append(out, - scanner.Token{Id: lang.Goto, Str: p.scope + "b"}, - scanner.Token{Id: lang.Label, Str: p.scope + "e"}) + scanner.Token{Tok: lang.Goto, Str: p.scope + "b"}, + scanner.Token{Tok: lang.Label, Str: p.scope + "e"}) return out, err } -func (p *Parser) ParseFunc(in Tokens) (out Tokens, err error) { +func (p *Parser) parseFunc(in Tokens) (out Tokens, err error) { // TODO: handle anonymous functions (no function name) // TODO: handle receiver (methods) // TODO: handle parametric types (generics) // TODO: handle variadic parameters var fname string - if in[1].Id == lang.Ident { + if in[1].Tok == lang.Ident { fname = in[1].Str } else { fname = "#f" + strconv.Itoa(p.clonum) @@ -237,8 +241,9 @@ func (p *Parser) ParseFunc(in Tokens) (out Tokens, err error) { }() out = Tokens{ - {Id: lang.Goto, Str: fname + "_end"}, // Skip function definition. - {Id: lang.Label, Pos: in[0].Pos, Str: fname}} + {Tok: lang.Goto, Str: fname + "_end"}, // Skip function definition. + {Tok: lang.Label, Pos: in[0].Pos, Str: fname}, + } bi := in.Index(lang.BraceBlock) if bi < 0 { @@ -257,23 +262,23 @@ func (p *Parser) ParseFunc(in Tokens) (out Tokens, err error) { return out, err } if l := p.framelen[p.funcScope] - 1; l > 0 { - out = append(out, scanner.Token{Id: lang.Grow, Beg: l}) + out = append(out, scanner.Token{Tok: lang.Grow, Beg: l}) } out = append(out, toks...) - if out[len(out)-1].Id != lang.Return { - // Ensure that a return statment is always added at end of function. + if out[len(out)-1].Tok != lang.Return { + // Ensure that a return statement is always added at end of function. // TODO: detect missing or wrong returns. - x, err := p.ParseReturn(nil) + x, err := p.parseReturn(nil) if err != nil { return out, err } out = append(out, x...) } - out = append(out, scanner.Token{Id: lang.Label, Str: fname + "_end"}) + out = append(out, scanner.Token{Tok: lang.Label, Str: fname + "_end"}) return out, err } -func (p *Parser) ParseIf(in Tokens) (out Tokens, err error) { +func (p *Parser) parseIf(in Tokens) (out Tokens, err error) { label := "if" + strconv.Itoa(p.labelCount[p.scope]) p.labelCount[p.scope]++ p.pushScope(label) @@ -282,7 +287,7 @@ func (p *Parser) ParseIf(in Tokens) (out Tokens, err error) { // get the destination labels already computed when jumps are set. for sc, i := 0, len(in)-1; i > 0; sc++ { ssc := strconv.Itoa(sc) - if in[i].Id != lang.BraceBlock { + if in[i].Tok != lang.BraceBlock { return nil, fmt.Errorf("expected '{', got %v", in[i]) } pre, err := p.Parse(in[i].Block()) @@ -290,13 +295,13 @@ func (p *Parser) ParseIf(in Tokens) (out Tokens, err error) { return nil, err } if sc > 0 { - pre = append(pre, scanner.Token{Id: lang.Goto, Str: p.scope + "e0"}) + pre = append(pre, scanner.Token{Tok: lang.Goto, Str: p.scope + "e0"}) } - pre = append(pre, scanner.Token{Id: lang.Label, Str: p.scope + "e" + ssc}) + pre = append(pre, scanner.Token{Tok: lang.Label, Str: p.scope + "e" + ssc}) out = append(pre, out...) i-- - if in[i].Id == lang.Else { // Step over final 'else'. + if in[i].Tok == lang.Else { // Step over final 'else'. i-- continue } @@ -311,26 +316,26 @@ func (p *Parser) ParseIf(in Tokens) (out Tokens, err error) { cond = initcond[ii+1:] } if len(init) > 0 { - if init, err = p.ParseStmt(init); err != nil { + if init, err = p.parseStmt(init); err != nil { return nil, err } pre = append(pre, init...) } - if cond, err = p.ParseExpr(cond); err != nil { + if cond, err = p.parseExpr(cond); err != nil { return nil, err } pre = append(pre, cond...) - pre = append(pre, scanner.Token{Id: lang.JumpFalse, Str: p.scope + "e" + ssc}) + pre = append(pre, scanner.Token{Tok: lang.JumpFalse, Str: p.scope + "e" + ssc}) out = append(pre, out...) i = ifp - if i > 1 && in[i].Id == lang.If && in[i-1].Id == lang.Else { // Step over 'else if'. + if i > 1 && in[i].Tok == lang.If && in[i-1].Tok == lang.Else { // Step over 'else if'. i -= 2 } } return out, err } -func (p *Parser) ParseSwitch(in Tokens) (out Tokens, err error) { +func (p *Parser) parseSwitch(in Tokens) (out Tokens, err error) { var init, cond, clauses Tokens initcond := in[1 : len(in)-1] if ii := initcond.Index(lang.Semicolon); ii < 0 { @@ -349,14 +354,14 @@ func (p *Parser) ParseSwitch(in Tokens) (out Tokens, err error) { p.popScope() }() if len(init) > 0 { - if init, err = p.ParseStmt(init); err != nil { + if init, err = p.parseStmt(init); err != nil { return nil, err } out = init } condSwitch := false if len(cond) > 0 { - if cond, err = p.ParseExpr(cond); err != nil { + if cond, err = p.parseExpr(cond); err != nil { return nil, err } out = append(out, cond...) @@ -368,7 +373,7 @@ func (p *Parser) ParseSwitch(in Tokens) (out Tokens, err error) { // Make sure that the default clause is the last. lsc := len(sc) - 1 for i, cl := range sc { - if cl[1].Id == lang.Colon && i != lsc { + if cl[1].Tok == lang.Colon && i != lsc { sc[i], sc[lsc] = sc[lsc], sc[i] break } @@ -376,30 +381,30 @@ func (p *Parser) ParseSwitch(in Tokens) (out Tokens, err error) { // Process each clause. nc := len(sc) - 1 for i, cl := range sc { - co, err := p.ParseCaseClause(cl, i, nc, condSwitch) + co, err := p.parseCaseClause(cl, i, nc, condSwitch) if err != nil { return nil, err } out = append(out, co...) } - out = append(out, scanner.Token{Id: lang.Label, Str: p.breakLabel}) + out = append(out, scanner.Token{Tok: lang.Label, Str: p.breakLabel}) return out, err } -func (p *Parser) ParseCaseClause(in Tokens, index, max int, condSwitch bool) (out Tokens, err error) { - in = append(in, scanner.Token{Id: lang.Semicolon}) // Force a ';' at the end of body clause. +func (p *Parser) parseCaseClause(in Tokens, index, max int, condSwitch bool) (out Tokens, err error) { + in = append(in, scanner.Token{Tok: lang.Semicolon}) // Force a ';' at the end of body clause. var conds, body Tokens tl := in.Split(lang.Colon) if len(tl) != 2 { return nil, errors.New("invalid case clause") } conds = tl[0][1:] - if body, err = p.ParseStmts(tl[1]); err != nil { + if body, err = p.parseStmts(tl[1]); err != nil { return out, err } lcond := conds.Split(lang.Comma) for i, cond := range lcond { - if cond, err = p.ParseExpr(cond); err != nil { + if cond, err = p.parseExpr(cond); err != nil { return out, err } txt := fmt.Sprintf("%sc%d.%d", p.scope, index, i) @@ -413,33 +418,33 @@ func (p *Parser) ParseCaseClause(in Tokens, index, max int, condSwitch bool) (ou } else { next = fmt.Sprintf("%sc%d.%d", p.scope, index, i+1) } - out = append(out, scanner.Token{Id: lang.Label, Str: txt}) + out = append(out, scanner.Token{Tok: lang.Label, Str: txt}) if len(cond) > 0 { out = append(out, cond...) if condSwitch { - out = append(out, scanner.Token{Id: lang.EqualSet}) + out = append(out, scanner.Token{Tok: lang.EqualSet}) } - out = append(out, scanner.Token{Id: lang.JumpFalse, Str: next}) + out = append(out, scanner.Token{Tok: lang.JumpFalse, Str: next}) } out = append(out, body...) if i != len(lcond)-1 || index != max { - out = append(out, scanner.Token{Id: lang.Goto, Str: p.scope + "e"}) + out = append(out, scanner.Token{Tok: lang.Goto, Str: p.scope + "e"}) } } return out, err } -func (p *Parser) ParseLabel(in Tokens) (out Tokens, err error) { - return Tokens{{Id: lang.Label, Str: p.funcScope + "/" + in[0].Str}}, nil +func (p *Parser) parseLabel(in Tokens) (out Tokens, err error) { + return Tokens{{Tok: lang.Label, Str: p.funcScope + "/" + in[0].Str}}, nil } -func (p *Parser) ParseReturn(in Tokens) (out Tokens, err error) { +func (p *Parser) parseReturn(in Tokens) (out Tokens, err error) { if l := len(in); l > 1 { - if out, err = p.ParseExpr(in[1:]); err != nil { + if out, err = p.parseExpr(in[1:]); err != nil { return out, err } } else if l == 0 { - in = Tokens{{Id: lang.Return}} // Implicit return in functions with no return parameters. + in = Tokens{{Tok: lang.Return}} // Implicit return in functions with no return parameters. } // TODO: the function symbol should be already present in the parser context. @@ -451,7 +456,7 @@ func (p *Parser) ParseReturn(in Tokens) (out Tokens, err error) { return out, err } -func (p *Parser) numItems(s string, sep lang.TokenId) int { +func (p *Parser) numItems(s string, sep lang.Token) int { tokens, err := p.Scan(s, false) if err != nil { return -1 diff --git a/parser/symbol.go b/parser/symbol.go index c8d89db..499e121 100644 --- a/parser/symbol.go +++ b/parser/symbol.go @@ -40,6 +40,7 @@ func symtype(s *symbol) *vm.Type { return vm.TypeOf(s.value) } +// AddSym add a new named value at memory position i in the parser symbol table. func (p *Parser) AddSym(i int, name string, v vm.Value) { p.addSym(i, name, v, symValue, nil, false) } diff --git a/parser/tokens.go b/parser/tokens.go index 51085d9..78467c6 100644 --- a/parser/tokens.go +++ b/parser/tokens.go @@ -5,6 +5,7 @@ import ( "github.com/mvertes/parscan/scanner" ) +// Tokens represents slice of tokens. type Tokens []scanner.Token func (toks Tokens) String() (s string) { @@ -14,27 +15,30 @@ func (toks Tokens) String() (s string) { return s } -func (toks Tokens) Index(id lang.TokenId) int { +// Index returns the index in toks of the first matching tok, or -1. +func (toks Tokens) Index(tok lang.Token) int { for i, t := range toks { - if t.Id == id { + if t.Tok == tok { return i } } return -1 } -func (toks Tokens) LastIndex(id lang.TokenId) int { +// LastIndex returns the index in toks of the last matching tok, or -1. +func (toks Tokens) LastIndex(tok lang.Token) int { for i := len(toks) - 1; i >= 0; i-- { - if toks[i].Id == id { + if toks[i].Tok == tok { return i } } return -1 } -func (toks Tokens) Split(id lang.TokenId) (result []Tokens) { +// Split returns a slice of token arrays, separated by tok. +func (toks Tokens) Split(tok lang.Token) (result []Tokens) { for { - i := toks.Index(id) + i := toks.Index(tok) if i < 0 { return append(result, toks) } @@ -43,9 +47,10 @@ func (toks Tokens) Split(id lang.TokenId) (result []Tokens) { } } -func (toks Tokens) SplitStart(id lang.TokenId) (result []Tokens) { +// SplitStart is similar to Split, except the first token in toks is skipped. +func (toks Tokens) SplitStart(tok lang.Token) (result []Tokens) { for { - i := toks[1:].Index(id) + i := toks[1:].Index(tok) if i < 0 { return append(result, toks) } diff --git a/parser/type.go b/parser/type.go index 16390e9..b9ab0f9 100644 --- a/parser/type.go +++ b/parser/type.go @@ -18,17 +18,18 @@ const ( parseTypeType ) +// Type parsing error definitions. var ( - InvalidTypeErr = errors.New("invalid type") - MissingTypeErr = errors.New("missing type") - SyntaxErr = errors.New("syntax error") - TypeNotImplementedErr = errors.New("not implemented") + ErrInvalidType = errors.New("invalid type") + ErrMissingType = errors.New("missing type") + ErrSyntax = errors.New("syntax error") + ErrTypeNotImplemented = errors.New("not implemented") ) // ParseTypeExpr parses a list of tokens defining a type expresssion and returns // the corresponding runtime type or an error. func (p *Parser) ParseTypeExpr(in Tokens) (typ *vm.Type, err error) { - switch in[0].Id { + switch in[0].Tok { case lang.BracketBlock: typ, err := p.ParseTypeExpr(in[1:]) if err != nil { @@ -65,11 +66,11 @@ func (p *Parser) ParseTypeExpr(in Tokens) (typ *vm.Type, err error) { var out Tokens var indexArgs int switch l, in1 := len(in), in[1]; { - case l >= 4 && in1.Id == lang.ParenBlock && in[2].Id == lang.Ident: + case l >= 4 && in1.Tok == lang.ParenBlock && in[2].Tok == lang.Ident: indexArgs, out = 3, in[4:] - case l >= 3 && in1.Id == lang.Ident: + case l >= 3 && in1.Tok == lang.Ident: indexArgs, out = 2, in[3:] - case l >= 2 && in1.Id == lang.ParenBlock: + case l >= 2 && in1.Tok == lang.ParenBlock: indexArgs, out = 1, in[2:] default: return nil, fmt.Errorf("invalid func signature") @@ -86,7 +87,7 @@ func (p *Parser) ParseTypeExpr(in Tokens) (typ *vm.Type, err error) { return nil, err } // Output parameters may be empty, or enclosed or not by parenthesis. - if len(out) == 1 && out[0].Id == lang.ParenBlock { + if len(out) == 1 && out[0].Tok == lang.ParenBlock { if out, err = p.Scan(out[0].Block(), false); err != nil { return nil, err } @@ -101,13 +102,13 @@ func (p *Parser) ParseTypeExpr(in Tokens) (typ *vm.Type, err error) { // TODO: selector expression (pkg.type) s, _, ok := p.getSym(in[0].Str, p.scope) if !ok || s.kind != symType { - return nil, fmt.Errorf("%w: %s", InvalidTypeErr, in[0].Str) + return nil, fmt.Errorf("%w: %s", ErrInvalidType, in[0].Str) } return s.Type, nil case lang.Struct: - if len(in) != 2 || in[1].Id != lang.BraceBlock { - return nil, fmt.Errorf("%w: %v", SyntaxErr, in) + if len(in) != 2 || in[1].Tok != lang.BraceBlock { + return nil, fmt.Errorf("%w: %v", ErrSyntax, in) } if in, err = p.Scan(in[1].Block(), false); err != nil { return nil, err @@ -126,7 +127,7 @@ func (p *Parser) ParseTypeExpr(in Tokens) (typ *vm.Type, err error) { return vm.StructOf(fields), nil default: - return nil, fmt.Errorf("%w: %v", TypeNotImplementedErr, in[0].Name()) + return nil, fmt.Errorf("%w: %v", ErrTypeNotImplemented, in[0].Name()) } } @@ -147,9 +148,9 @@ func (p *Parser) parseParamTypes(in Tokens, flag typeFlag) (types []*vm.Type, va t = t[1:] if len(t) == 0 { if len(types) == 0 { - return nil, nil, MissingTypeErr + return nil, nil, ErrMissingType } - // Type was ommitted, apply the previous one from the right. + // Type was omitted, apply the previous one from the right. types = append([]*vm.Type{types[0]}, types...) p.addSymVar(i, param, types[0], flag, local) vars = append(vars, param) @@ -189,7 +190,7 @@ func (p *Parser) addSymVar(index int, name string, typ *vm.Type, flag typeFlag, // hasFirstParam returns true if the first token of a list is a parameter name. func (p *Parser) hasFirstParam(in Tokens) bool { - if in[0].Id != lang.Ident { + if in[0].Tok != lang.Ident { return false } s, _, ok := p.getSym(in[0].Str, p.scope) diff --git a/scanner/scan.go b/scanner/scan.go index 4c787fb..6ea99a9 100644 --- a/scanner/scan.go +++ b/scanner/scan.go @@ -1,3 +1,4 @@ +// Package scanner provide a language independent scanner. package scanner import ( @@ -10,6 +11,7 @@ import ( "github.com/mvertes/parscan/lang" ) +// Error definitions. var ( ErrBlock = errors.New("block not terminated") ErrIllegal = errors.New("illegal token") @@ -17,16 +19,20 @@ var ( // Token defines a scanner token. type Token struct { - Id lang.TokenId // token identificator - Pos int // position in source - Str string // string in source - Beg int // length of begin delimiter (block, string) - End int // length of end delimiter (block, string) + Tok lang.Token // token identificator + Pos int // position in source + Str string // string in source + Beg int // length of begin delimiter (block, string) + End int // length of end delimiter (block, string) } -func (t *Token) Block() string { return t.Str[t.Beg : len(t.Str)-t.End] } +// Block return the block content of t. +func (t *Token) Block() string { return t.Str[t.Beg : len(t.Str)-t.End] } + +// Prefix returns the block starting delimiter of t. func (t *Token) Prefix() string { return t.Str[:t.Beg] } +// Name return the name of t (short string for debugging). func (t *Token) Name() string { name := t.Str if t.Beg > 1 { @@ -39,9 +45,9 @@ func (t *Token) Name() string { } func (t *Token) String() string { - s := t.Id.String() - if t.Id.IsLiteral() || t.Id.IsBlock() || t.Id == lang.Ident || t.Id == lang.Comment || - t.Id == lang.Period || t.Id == lang.Label || t.Id == lang.Goto { + s := t.Tok.String() + if t.Tok.IsLiteral() || t.Tok.IsBlock() || t.Tok == lang.Ident || t.Tok == lang.Comment || + t.Tok == lang.Period || t.Tok == lang.Label || t.Tok == lang.Goto { s += strconv.Quote(t.Str) } return s @@ -54,6 +60,7 @@ type Scanner struct { sdre *regexp.Regexp // string delimiters regular expression } +// NewScanner returns a new scanner for a given language specification. func NewScanner(spec *lang.Spec) *Scanner { sc := &Scanner{Spec: spec} @@ -77,25 +84,26 @@ func NewScanner(spec *lang.Spec) *Scanner { return sc } -func (sc *Scanner) HasProp(r rune, p uint) bool { +func (sc *Scanner) hasProp(r rune, p uint) bool { if r >= lang.ASCIILen { return false } return sc.CharProp[r]&p != 0 } -func (sc *Scanner) isOp(r rune) bool { return sc.HasProp(r, lang.CharOp) } -func (sc *Scanner) isSep(r rune) bool { return sc.HasProp(r, lang.CharSep) } -func (sc *Scanner) isLineSep(r rune) bool { return sc.HasProp(r, lang.CharLineSep) } -func (sc *Scanner) isGroupSep(r rune) bool { return sc.HasProp(r, lang.CharGroupSep) } -func (sc *Scanner) isStr(r rune) bool { return sc.HasProp(r, lang.CharStr) } -func (sc *Scanner) isBlock(r rune) bool { return sc.HasProp(r, lang.CharBlock) } +func (sc *Scanner) isOp(r rune) bool { return sc.hasProp(r, lang.CharOp) } +func (sc *Scanner) isSep(r rune) bool { return sc.hasProp(r, lang.CharSep) } +func (sc *Scanner) isLineSep(r rune) bool { return sc.hasProp(r, lang.CharLineSep) } +func (sc *Scanner) isGroupSep(r rune) bool { return sc.hasProp(r, lang.CharGroupSep) } +func (sc *Scanner) isStr(r rune) bool { return sc.hasProp(r, lang.CharStr) } +func (sc *Scanner) isBlock(r rune) bool { return sc.hasProp(r, lang.CharBlock) } func (sc *Scanner) isDir(r rune) bool { - return !sc.HasProp(r, lang.CharOp|lang.CharSep|lang.CharLineSep|lang.CharGroupSep|lang.CharStr|lang.CharBlock) + return !sc.hasProp(r, lang.CharOp|lang.CharSep|lang.CharLineSep|lang.CharGroupSep|lang.CharStr|lang.CharBlock) } func isNum(r rune) bool { return '0' <= r && r <= '9' } +// Scan performs a lexical analysis on src and returns tokens or an error. func (sc *Scanner) Scan(src string, semiEOF bool) (tokens []Token, err error) { offset := 0 s := strings.TrimSpace(src) @@ -104,18 +112,18 @@ func (sc *Scanner) Scan(src string, semiEOF bool) (tokens []Token, err error) { if err != nil { return nil, fmt.Errorf("%s: %w", loc(src, offset+t.Pos), err) } - if t.Id == lang.Illegal && t.Str == "" { + if t.Tok == lang.Illegal && t.Str == "" { break } skip := false if len(tokens) > 0 && t.Str == "\n" { // Check for automatic semi-colon insertion after newline. last := tokens[len(tokens)-1] - if last.Id.IsKeyword() && sc.TokenProps[last.Str].SkipSemi || - last.Id.IsOperator() && !sc.TokenProps[last.Str].SkipSemi { + if last.Tok.IsKeyword() && sc.TokenProps[last.Str].SkipSemi || + last.Tok.IsOperator() && !sc.TokenProps[last.Str].SkipSemi { skip = true } else { - t.Id = lang.Semicolon + t.Tok = lang.Semicolon t.Str = ";" } } @@ -133,9 +141,9 @@ func (sc *Scanner) Scan(src string, semiEOF bool) (tokens []Token, err error) { if last.Str == ";" { return tokens, nil } - if !(last.Id == lang.Ident && sc.TokenProps[last.Str].SkipSemi || - last.Id.IsOperator() && !sc.TokenProps[last.Str].SkipSemi) { - tokens = append(tokens, Token{Id: lang.Semicolon, Str: ";"}) + if !(last.Tok == lang.Ident && sc.TokenProps[last.Str].SkipSemi || + last.Tok.IsOperator() && !sc.TokenProps[last.Str].SkipSemi) { + tokens = append(tokens, Token{Tok: lang.Semicolon, Str: ";"}) } } return tokens, nil @@ -171,7 +179,7 @@ func (sc *Scanner) Next(src string) (tok Token, err error) { return Token{}, nil case sc.isGroupSep(r): // TODO: handle group separators. - return Token{Id: sc.TokenProps[string(r)].TokenId, Pos: p + i, Str: string(r)}, nil + return Token{Tok: sc.TokenProps[string(r)].Token, Pos: p + i, Str: string(r)}, nil case sc.isLineSep(r): return Token{Pos: p + i, Str: "\n"}, nil case sc.isStr(r): @@ -179,23 +187,23 @@ func (sc *Scanner) Next(src string) (tok Token, err error) { if !ok { err = ErrBlock } - return Token{Id: lang.String, Pos: p + i, Str: s, Beg: 1, End: 1}, err + return Token{Tok: lang.String, Pos: p + i, Str: s, Beg: 1, End: 1}, err case sc.isBlock(r): b, ok := sc.getBlock(src[i:], 1) if !ok { err = ErrBlock } tok := Token{Pos: p + i, Str: b, Beg: 1, End: 1} - tok.Id = sc.TokenProps[tok.Name()].TokenId + tok.Tok = sc.TokenProps[tok.Name()].Token return tok, err case sc.isOp(r): op, isOp := sc.getOp(src[i:]) if isOp { - id := sc.TokenProps[op].TokenId - if id == lang.Illegal { + t := sc.TokenProps[op].Token + if t == lang.Illegal { err = fmt.Errorf("%w: %s", ErrIllegal, op) } - return Token{Id: id, Pos: p + i, Str: op}, err + return Token{Tok: t, Pos: p + i, Str: op}, err } flag := sc.BlockProp[op] if flag&lang.CharStr != 0 { @@ -203,41 +211,41 @@ func (sc *Scanner) Next(src string) (tok Token, err error) { if !ok { err = ErrBlock } - return Token{Id: lang.Comment, Pos: p + i, Str: s, Beg: len(op), End: len(op)}, err + return Token{Tok: lang.Comment, Pos: p + i, Str: s, Beg: len(op), End: len(op)}, err } case isNum(r): - return Token{Id: lang.Int, Pos: p + i, Str: sc.getNum(src[i:])}, nil + return Token{Tok: lang.Int, Pos: p + i, Str: sc.getNum(src[i:])}, nil default: - id, isId := sc.getId(src[i:]) - if isId { - ident := sc.TokenProps[id].TokenId + t, isDefined := sc.getToken(src[i:]) + if isDefined { + ident := sc.TokenProps[t].Token if ident == lang.Illegal { ident = lang.Ident } - return Token{Id: ident, Pos: p + i, Str: id}, nil + return Token{Tok: ident, Pos: p + i, Str: t}, nil } - flag := sc.BlockProp[id] + flag := sc.BlockProp[t] if flag&lang.CharBlock != 0 { - s, ok := sc.getBlock(src[i:], len(id)) + s, ok := sc.getBlock(src[i:], len(t)) if !ok { err = ErrBlock } - return Token{Pos: p + i, Str: s, Beg: len(id), End: len(id)}, err + return Token{Pos: p + i, Str: s, Beg: len(t), End: len(t)}, err } } } return Token{}, nil } -func (sc *Scanner) getId(src string) (s string, isId bool) { - s = sc.nextId(src) +func (sc *Scanner) getToken(src string) (s string, isDefined bool) { + s = sc.nextToken(src) if _, match := sc.BlockProp[s]; match { return s, false } return s, true } -func (sc *Scanner) nextId(src string) (s string) { +func (sc *Scanner) nextToken(src string) (s string) { for i, r := range src { if !sc.isDir(r) { break @@ -11,7 +11,7 @@ const debug = true // Byte-code instruction set. const ( - // instruction effect on stack: values consumed -- values produced + // Instruction effect on stack: values consumed -- values produced. Nop = iota // -- Add // n1 n2 -- sum ; sum = n1+n2 Addr // a -- &a ; @@ -229,7 +229,7 @@ func (m *Machine) Run() (err error) { case Pop: mem = mem[:sp-int(op[2])] case Push: - //mem = append(mem, reflect.ValueOf(int(op[2]))) + // mem = append(mem, reflect.ValueOf(int(op[2]))) mem = append(mem, NewValue(TypeOf(0))) mem[sp].Data.SetInt(op[2]) case Grow: @@ -268,12 +268,14 @@ func (m *Machine) Push(v ...Value) (l int) { m.mem = append(m.mem, v...) return l } + func (m *Machine) Pop() (v Value) { l := len(m.mem) - 1 v = m.mem[l] m.mem = m.mem[:l] return v } + func (m *Machine) Top() (v Value) { if l := len(m.mem); l > 0 { v = m.mem[l-1] |
