diff options
| author | Marc Vertes <mvertes@free.fr> | 2024-04-02 11:27:13 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-04-02 11:27:13 +0200 |
| commit | 1bff92c52b27b9a516599e172fe9852c3d99be38 (patch) | |
| tree | 26b30b5ec1a5537dcafd806d23e03a062475705d /scanner/scan.go | |
| parent | 362f7c9c45598b429c92e67756f41b690043e0c4 (diff) | |
chore: add linters and some lint fixes (#8)
* chore: add linters and some lint fixes
Configure some golangci-lint linters to get the code quality right.
Apply the first fixes.
Next step will be to add github actions to run lint and tests in
github CI.
* chore: more lint, fixed comments and variable names. no semantic change.
* chore: add Makefile
This makefile is intended to be used as a local substitute to github
actions.
Diffstat (limited to 'scanner/scan.go')
| -rw-r--r-- | scanner/scan.go | 92 |
1 files changed, 50 insertions, 42 deletions
diff --git a/scanner/scan.go b/scanner/scan.go index 4c787fb..6ea99a9 100644 --- a/scanner/scan.go +++ b/scanner/scan.go @@ -1,3 +1,4 @@ +// Package scanner provide a language independent scanner. package scanner import ( @@ -10,6 +11,7 @@ import ( "github.com/mvertes/parscan/lang" ) +// Error definitions. var ( ErrBlock = errors.New("block not terminated") ErrIllegal = errors.New("illegal token") @@ -17,16 +19,20 @@ var ( // Token defines a scanner token. type Token struct { - Id lang.TokenId // token identificator - Pos int // position in source - Str string // string in source - Beg int // length of begin delimiter (block, string) - End int // length of end delimiter (block, string) + Tok lang.Token // token identificator + Pos int // position in source + Str string // string in source + Beg int // length of begin delimiter (block, string) + End int // length of end delimiter (block, string) } -func (t *Token) Block() string { return t.Str[t.Beg : len(t.Str)-t.End] } +// Block return the block content of t. +func (t *Token) Block() string { return t.Str[t.Beg : len(t.Str)-t.End] } + +// Prefix returns the block starting delimiter of t. func (t *Token) Prefix() string { return t.Str[:t.Beg] } +// Name return the name of t (short string for debugging). func (t *Token) Name() string { name := t.Str if t.Beg > 1 { @@ -39,9 +45,9 @@ func (t *Token) Name() string { } func (t *Token) String() string { - s := t.Id.String() - if t.Id.IsLiteral() || t.Id.IsBlock() || t.Id == lang.Ident || t.Id == lang.Comment || - t.Id == lang.Period || t.Id == lang.Label || t.Id == lang.Goto { + s := t.Tok.String() + if t.Tok.IsLiteral() || t.Tok.IsBlock() || t.Tok == lang.Ident || t.Tok == lang.Comment || + t.Tok == lang.Period || t.Tok == lang.Label || t.Tok == lang.Goto { s += strconv.Quote(t.Str) } return s @@ -54,6 +60,7 @@ type Scanner struct { sdre *regexp.Regexp // string delimiters regular expression } +// NewScanner returns a new scanner for a given language specification. func NewScanner(spec *lang.Spec) *Scanner { sc := &Scanner{Spec: spec} @@ -77,25 +84,26 @@ func NewScanner(spec *lang.Spec) *Scanner { return sc } -func (sc *Scanner) HasProp(r rune, p uint) bool { +func (sc *Scanner) hasProp(r rune, p uint) bool { if r >= lang.ASCIILen { return false } return sc.CharProp[r]&p != 0 } -func (sc *Scanner) isOp(r rune) bool { return sc.HasProp(r, lang.CharOp) } -func (sc *Scanner) isSep(r rune) bool { return sc.HasProp(r, lang.CharSep) } -func (sc *Scanner) isLineSep(r rune) bool { return sc.HasProp(r, lang.CharLineSep) } -func (sc *Scanner) isGroupSep(r rune) bool { return sc.HasProp(r, lang.CharGroupSep) } -func (sc *Scanner) isStr(r rune) bool { return sc.HasProp(r, lang.CharStr) } -func (sc *Scanner) isBlock(r rune) bool { return sc.HasProp(r, lang.CharBlock) } +func (sc *Scanner) isOp(r rune) bool { return sc.hasProp(r, lang.CharOp) } +func (sc *Scanner) isSep(r rune) bool { return sc.hasProp(r, lang.CharSep) } +func (sc *Scanner) isLineSep(r rune) bool { return sc.hasProp(r, lang.CharLineSep) } +func (sc *Scanner) isGroupSep(r rune) bool { return sc.hasProp(r, lang.CharGroupSep) } +func (sc *Scanner) isStr(r rune) bool { return sc.hasProp(r, lang.CharStr) } +func (sc *Scanner) isBlock(r rune) bool { return sc.hasProp(r, lang.CharBlock) } func (sc *Scanner) isDir(r rune) bool { - return !sc.HasProp(r, lang.CharOp|lang.CharSep|lang.CharLineSep|lang.CharGroupSep|lang.CharStr|lang.CharBlock) + return !sc.hasProp(r, lang.CharOp|lang.CharSep|lang.CharLineSep|lang.CharGroupSep|lang.CharStr|lang.CharBlock) } func isNum(r rune) bool { return '0' <= r && r <= '9' } +// Scan performs a lexical analysis on src and returns tokens or an error. func (sc *Scanner) Scan(src string, semiEOF bool) (tokens []Token, err error) { offset := 0 s := strings.TrimSpace(src) @@ -104,18 +112,18 @@ func (sc *Scanner) Scan(src string, semiEOF bool) (tokens []Token, err error) { if err != nil { return nil, fmt.Errorf("%s: %w", loc(src, offset+t.Pos), err) } - if t.Id == lang.Illegal && t.Str == "" { + if t.Tok == lang.Illegal && t.Str == "" { break } skip := false if len(tokens) > 0 && t.Str == "\n" { // Check for automatic semi-colon insertion after newline. last := tokens[len(tokens)-1] - if last.Id.IsKeyword() && sc.TokenProps[last.Str].SkipSemi || - last.Id.IsOperator() && !sc.TokenProps[last.Str].SkipSemi { + if last.Tok.IsKeyword() && sc.TokenProps[last.Str].SkipSemi || + last.Tok.IsOperator() && !sc.TokenProps[last.Str].SkipSemi { skip = true } else { - t.Id = lang.Semicolon + t.Tok = lang.Semicolon t.Str = ";" } } @@ -133,9 +141,9 @@ func (sc *Scanner) Scan(src string, semiEOF bool) (tokens []Token, err error) { if last.Str == ";" { return tokens, nil } - if !(last.Id == lang.Ident && sc.TokenProps[last.Str].SkipSemi || - last.Id.IsOperator() && !sc.TokenProps[last.Str].SkipSemi) { - tokens = append(tokens, Token{Id: lang.Semicolon, Str: ";"}) + if !(last.Tok == lang.Ident && sc.TokenProps[last.Str].SkipSemi || + last.Tok.IsOperator() && !sc.TokenProps[last.Str].SkipSemi) { + tokens = append(tokens, Token{Tok: lang.Semicolon, Str: ";"}) } } return tokens, nil @@ -171,7 +179,7 @@ func (sc *Scanner) Next(src string) (tok Token, err error) { return Token{}, nil case sc.isGroupSep(r): // TODO: handle group separators. - return Token{Id: sc.TokenProps[string(r)].TokenId, Pos: p + i, Str: string(r)}, nil + return Token{Tok: sc.TokenProps[string(r)].Token, Pos: p + i, Str: string(r)}, nil case sc.isLineSep(r): return Token{Pos: p + i, Str: "\n"}, nil case sc.isStr(r): @@ -179,23 +187,23 @@ func (sc *Scanner) Next(src string) (tok Token, err error) { if !ok { err = ErrBlock } - return Token{Id: lang.String, Pos: p + i, Str: s, Beg: 1, End: 1}, err + return Token{Tok: lang.String, Pos: p + i, Str: s, Beg: 1, End: 1}, err case sc.isBlock(r): b, ok := sc.getBlock(src[i:], 1) if !ok { err = ErrBlock } tok := Token{Pos: p + i, Str: b, Beg: 1, End: 1} - tok.Id = sc.TokenProps[tok.Name()].TokenId + tok.Tok = sc.TokenProps[tok.Name()].Token return tok, err case sc.isOp(r): op, isOp := sc.getOp(src[i:]) if isOp { - id := sc.TokenProps[op].TokenId - if id == lang.Illegal { + t := sc.TokenProps[op].Token + if t == lang.Illegal { err = fmt.Errorf("%w: %s", ErrIllegal, op) } - return Token{Id: id, Pos: p + i, Str: op}, err + return Token{Tok: t, Pos: p + i, Str: op}, err } flag := sc.BlockProp[op] if flag&lang.CharStr != 0 { @@ -203,41 +211,41 @@ func (sc *Scanner) Next(src string) (tok Token, err error) { if !ok { err = ErrBlock } - return Token{Id: lang.Comment, Pos: p + i, Str: s, Beg: len(op), End: len(op)}, err + return Token{Tok: lang.Comment, Pos: p + i, Str: s, Beg: len(op), End: len(op)}, err } case isNum(r): - return Token{Id: lang.Int, Pos: p + i, Str: sc.getNum(src[i:])}, nil + return Token{Tok: lang.Int, Pos: p + i, Str: sc.getNum(src[i:])}, nil default: - id, isId := sc.getId(src[i:]) - if isId { - ident := sc.TokenProps[id].TokenId + t, isDefined := sc.getToken(src[i:]) + if isDefined { + ident := sc.TokenProps[t].Token if ident == lang.Illegal { ident = lang.Ident } - return Token{Id: ident, Pos: p + i, Str: id}, nil + return Token{Tok: ident, Pos: p + i, Str: t}, nil } - flag := sc.BlockProp[id] + flag := sc.BlockProp[t] if flag&lang.CharBlock != 0 { - s, ok := sc.getBlock(src[i:], len(id)) + s, ok := sc.getBlock(src[i:], len(t)) if !ok { err = ErrBlock } - return Token{Pos: p + i, Str: s, Beg: len(id), End: len(id)}, err + return Token{Pos: p + i, Str: s, Beg: len(t), End: len(t)}, err } } } return Token{}, nil } -func (sc *Scanner) getId(src string) (s string, isId bool) { - s = sc.nextId(src) +func (sc *Scanner) getToken(src string) (s string, isDefined bool) { + s = sc.nextToken(src) if _, match := sc.BlockProp[s]; match { return s, false } return s, true } -func (sc *Scanner) nextId(src string) (s string) { +func (sc *Scanner) nextToken(src string) (s string) { for i, r := range src { if !sc.isDir(r) { break |
