summaryrefslogtreecommitdiff
path: root/lang
diff options
context:
space:
mode:
Diffstat (limited to 'lang')
-rw-r--r--lang/golang/go.go122
-rw-r--r--lang/spec.go15
-rw-r--r--lang/token.go64
-rw-r--r--lang/token_string.go (renamed from lang/tokenid_string.go)14
4 files changed, 120 insertions, 95 deletions
diff --git a/lang/golang/go.go b/lang/golang/go.go
index 5b0ffa6..47baee2 100644
--- a/lang/golang/go.go
+++ b/lang/golang/go.go
@@ -1,7 +1,9 @@
+// Package golang provides the lexical specification of Go language.
package golang
import "github.com/mvertes/parscan/lang"
+// GoSpec contains the lexical specification of Go.
var GoSpec = &lang.Spec{
CharProp: [lang.ASCIILen]uint{
'\t': lang.CharSep,
@@ -53,73 +55,73 @@ var GoSpec = &lang.Spec{
},
TokenProps: map[string]lang.TokenProp{
// Block tokens (can be nested)
- "{..}": {TokenId: lang.BraceBlock},
- "[..]": {TokenId: lang.BracketBlock},
- "(..)": {TokenId: lang.ParenBlock},
+ "{..}": {Token: lang.BraceBlock},
+ "[..]": {Token: lang.BracketBlock},
+ "(..)": {Token: lang.ParenBlock},
// String tokens (not nested)
- "//..": {TokenId: lang.Comment},
- "/*..": {TokenId: lang.Comment},
- `".."`: {TokenId: lang.String},
- "`..`": {TokenId: lang.String},
+ "//..": {Token: lang.Comment},
+ "/*..": {Token: lang.Comment},
+ `".."`: {Token: lang.String},
+ "`..`": {Token: lang.String},
// Separators
- ",": {TokenId: lang.Comma},
- ";": {TokenId: lang.Semicolon},
- ".": {TokenId: lang.Period},
- ":": {TokenId: lang.Colon},
+ ",": {Token: lang.Comma},
+ ";": {Token: lang.Semicolon},
+ ".": {Token: lang.Period},
+ ":": {Token: lang.Colon},
// Operators
- "&": {TokenId: lang.And, Precedence: 1},
- "*": {TokenId: lang.Mul, Precedence: 1},
- "/": {TokenId: lang.Quo, Precedence: 1},
- "%": {TokenId: lang.Rem, Precedence: 1},
- "<<": {TokenId: lang.Shl, Precedence: 1},
- ">>": {TokenId: lang.Shr, Precedence: 1},
- "+": {TokenId: lang.Add, Precedence: 2},
- "-": {TokenId: lang.Sub, Precedence: 2},
- "=": {TokenId: lang.Assign, Precedence: 6},
- "+=": {TokenId: lang.AddAssign, Precedence: 6},
- "<": {TokenId: lang.Less, Precedence: 3},
- ">": {TokenId: lang.Greater, Precedence: 3},
- "^": {TokenId: lang.Xor, Precedence: 2},
- "~": {TokenId: lang.Tilde},
- "&&": {TokenId: lang.Land, Precedence: 4},
- "||": {TokenId: lang.Lor, Precedence: 5},
- ":=": {TokenId: lang.Define, Precedence: 6},
- "==": {TokenId: lang.Equal, Precedence: 3},
- "<=": {TokenId: lang.LessEqual, Precedence: 3},
- ">=": {TokenId: lang.GreaterEqual, Precedence: 3},
- "->": {TokenId: lang.Arrow},
- "!": {TokenId: lang.Not},
- "++": {TokenId: lang.Inc, SkipSemi: true},
- "--": {TokenId: lang.Dec, SkipSemi: true},
+ "&": {Token: lang.And, Precedence: 1},
+ "*": {Token: lang.Mul, Precedence: 1},
+ "/": {Token: lang.Quo, Precedence: 1},
+ "%": {Token: lang.Rem, Precedence: 1},
+ "<<": {Token: lang.Shl, Precedence: 1},
+ ">>": {Token: lang.Shr, Precedence: 1},
+ "+": {Token: lang.Add, Precedence: 2},
+ "-": {Token: lang.Sub, Precedence: 2},
+ "=": {Token: lang.Assign, Precedence: 6},
+ "+=": {Token: lang.AddAssign, Precedence: 6},
+ "<": {Token: lang.Less, Precedence: 3},
+ ">": {Token: lang.Greater, Precedence: 3},
+ "^": {Token: lang.Xor, Precedence: 2},
+ "~": {Token: lang.Tilde},
+ "&&": {Token: lang.Land, Precedence: 4},
+ "||": {Token: lang.Lor, Precedence: 5},
+ ":=": {Token: lang.Define, Precedence: 6},
+ "==": {Token: lang.Equal, Precedence: 3},
+ "<=": {Token: lang.LessEqual, Precedence: 3},
+ ">=": {Token: lang.GreaterEqual, Precedence: 3},
+ "->": {Token: lang.Arrow},
+ "!": {Token: lang.Not},
+ "++": {Token: lang.Inc, SkipSemi: true},
+ "--": {Token: lang.Dec, SkipSemi: true},
// Reserved keywords
- "break": {TokenId: lang.Break},
- "case": {TokenId: lang.Case, SkipSemi: true},
- "chan": {TokenId: lang.Chan, SkipSemi: true},
- "const": {TokenId: lang.Const, SkipSemi: true},
- "continue": {TokenId: lang.Continue},
- "default": {TokenId: lang.Case, SkipSemi: true},
- "defer": {TokenId: lang.Defer, SkipSemi: true},
- "else": {TokenId: lang.Else, SkipSemi: true},
- "fallthrough": {TokenId: lang.Fallthrough},
- "for": {TokenId: lang.For, SkipSemi: true},
- "func": {TokenId: lang.Func, SkipSemi: true},
- "go": {TokenId: lang.Go, SkipSemi: true},
- "goto": {TokenId: lang.Goto, SkipSemi: true},
- "if": {TokenId: lang.If, SkipSemi: true},
- "import": {TokenId: lang.Import, SkipSemi: true},
- "interface": {TokenId: lang.Interface, SkipSemi: true},
- "map": {TokenId: lang.Map, SkipSemi: true},
- "package": {TokenId: lang.Package, SkipSemi: true},
- "range": {TokenId: lang.Range, SkipSemi: true},
- "return": {TokenId: lang.Return},
- "select": {TokenId: lang.Select, SkipSemi: true},
- "struct": {TokenId: lang.Struct, SkipSemi: true},
- "switch": {TokenId: lang.Switch, SkipSemi: true},
- "type": {TokenId: lang.Type, SkipSemi: true},
- "var": {TokenId: lang.Var, SkipSemi: true},
+ "break": {Token: lang.Break},
+ "case": {Token: lang.Case, SkipSemi: true},
+ "chan": {Token: lang.Chan, SkipSemi: true},
+ "const": {Token: lang.Const, SkipSemi: true},
+ "continue": {Token: lang.Continue},
+ "default": {Token: lang.Case, SkipSemi: true},
+ "defer": {Token: lang.Defer, SkipSemi: true},
+ "else": {Token: lang.Else, SkipSemi: true},
+ "fallthrough": {Token: lang.Fallthrough},
+ "for": {Token: lang.For, SkipSemi: true},
+ "func": {Token: lang.Func, SkipSemi: true},
+ "go": {Token: lang.Go, SkipSemi: true},
+ "goto": {Token: lang.Goto, SkipSemi: true},
+ "if": {Token: lang.If, SkipSemi: true},
+ "import": {Token: lang.Import, SkipSemi: true},
+ "interface": {Token: lang.Interface, SkipSemi: true},
+ "map": {Token: lang.Map, SkipSemi: true},
+ "package": {Token: lang.Package, SkipSemi: true},
+ "range": {Token: lang.Range, SkipSemi: true},
+ "return": {Token: lang.Return},
+ "select": {Token: lang.Select, SkipSemi: true},
+ "struct": {Token: lang.Struct, SkipSemi: true},
+ "switch": {Token: lang.Switch, SkipSemi: true},
+ "type": {Token: lang.Type, SkipSemi: true},
+ "var": {Token: lang.Var, SkipSemi: true},
},
}
diff --git a/lang/spec.go b/lang/spec.go
index a910f70..92d90f7 100644
--- a/lang/spec.go
+++ b/lang/spec.go
@@ -1,5 +1,7 @@
+// Package lang provides tokens for possibly multiple languages.
package lang
+// Lexical properties of tokens to allow scanning.
const (
CharIllegal = 1 << iota
CharOp
@@ -16,26 +18,29 @@ const (
EosValidEnd // end of input string terminates block or string token
)
+// ASCIILen is the length of the ASCII characters set.
const ASCIILen = 1 << 7 // 128
+// TokenProp represent token properties for parsing.
type TokenProp struct {
- TokenId
+ Token
SkipSemi bool // automatic semicolon insertion after newline
Precedence int // operator precedence
}
+// Spec represents the token specification for scanning.
type Spec struct {
CharProp [ASCIILen]uint // special Character properties
End map[string]string // end delimiters, indexed by start
BlockProp map[string]uint // block properties
TokenProps map[string]TokenProp // token properties
DotNum bool // true if a number can start with '.'
- IdAscii bool // true if an identifier can be in ASCII only
- Num_ bool // true if a number can contain _ character
+ IdentASCII bool // true if an identifier can be in ASCII only
+ NumUnder bool // true if a number can contain _ character
}
-// HasInit stores if a statement may contain a simple init statement
-var HasInit = map[TokenId]bool{
+// HasInit stores if a statement may contain a simple init statement.
+var HasInit = map[Token]bool{
Case: true,
For: true,
If: true,
diff --git a/lang/token.go b/lang/token.go
index 613f2c6..7ad7bf1 100644
--- a/lang/token.go
+++ b/lang/token.go
@@ -1,23 +1,25 @@
package lang
-//go:generate stringer -type=TokenId
+//go:generate stringer -type=Token
-type TokenId int
+// Token represents a lexical token.
+type Token int
+// All known tokens for the set of supported languages.
const (
- Illegal TokenId = iota
+ Illegal Token = iota
Comment
Ident
- // Literal values
+ // Literal values.
Char
Float
Imag
Int
String
- // Binary operators (except indicated)
- // Arithmetic and bitwise binary operators
+ // Binary operators (except indicated).
+ // Arithmetic and bitwise binary operators.
Add // +
Sub // -
Mul // *
@@ -31,7 +33,7 @@ const (
AndNot // &^
Period // .
- // Binary operators returning a boolean
+ // Binary operators returning a boolean.
Equal // ==
Greater // >
GreaterEqual // >=
@@ -41,7 +43,7 @@ const (
Lor // ||
NotEqual // !=
- // Assigment operators (arithmetic and bitwise)
+ // Assigment operators (arithmetic and bitwise).
Define // :=
Assign // =
AddAssign // +=
@@ -58,7 +60,7 @@ const (
Inc // ++
Dec // --
- // Unary operations
+ // Unary operations.
Plus // unary +
Minus // unary -
Addr // unary &
@@ -69,17 +71,17 @@ const (
Not // unary !
Tilde // unary ~ (underlying type)
- // Separators (punctuation)
+ // Separators (punctuation).
Comma // ,
Semicolon // ;
Colon // :
- // Block tokens
+ // Block tokens.
ParenBlock // (..)
BracketBlock // [..]
BraceBlock // {..}
- // Reserved keywords
+ // Reserved keywords.
Break
Case
Chan
@@ -106,7 +108,7 @@ const (
Type
Var
- // Internal virtual machine tokens (no corresponding keyword)
+ // Internal virtual machine tokens (no corresponding keyword).
Call
CallX
EqualSet
@@ -119,8 +121,9 @@ const (
New
)
-// TODO: define UnaryOp per language
-var UnaryOp = map[TokenId]TokenId{
+// UnaryOp contains the set of unary operators.
+// TODO: define UnaryOp per language.
+var UnaryOp = map[Token]Token{
Add: Plus, // +
And: Addr, // &
Not: Not, // !
@@ -130,11 +133,26 @@ var UnaryOp = map[TokenId]TokenId{
Xor: BitComp, // ^
}
-func (t TokenId) IsKeyword() bool { return t >= Break && t <= Var }
-func (t TokenId) IsLiteral() bool { return t >= Char && t <= String }
-func (t TokenId) IsOperator() bool { return t >= Add && t <= Tilde }
-func (t TokenId) IsBlock() bool { return t >= ParenBlock && t <= BraceBlock }
-func (t TokenId) IsBoolOp() bool { return t >= Equal && t <= NotEqual || t == Not }
-func (t TokenId) IsBinaryOp() bool { return t >= Add && t <= NotEqual }
-func (t TokenId) IsUnaryOp() bool { return t >= Plus && t <= Tilde }
-func (t TokenId) IsLogicalOp() bool { return t == Land || t == Lor }
+// IsKeyword returns true if t is a keyword.
+func (t Token) IsKeyword() bool { return t >= Break && t <= Var }
+
+// IsLiteral returns true if t is a literal value.
+func (t Token) IsLiteral() bool { return t >= Char && t <= String }
+
+// IsOperator returns true if t is an operator.
+func (t Token) IsOperator() bool { return t >= Add && t <= Tilde }
+
+// IsBlock returns true if t is a block kind of token.
+func (t Token) IsBlock() bool { return t >= ParenBlock && t <= BraceBlock }
+
+// IsBoolOp returns true if t is boolean operator.
+func (t Token) IsBoolOp() bool { return t >= Equal && t <= NotEqual || t == Not }
+
+// IsBinaryOp returns true if t is a binary operator (takes 2 operands).
+func (t Token) IsBinaryOp() bool { return t >= Add && t <= NotEqual }
+
+// IsUnaryOp returns true if t is an unary operator (takes 1 operand).
+func (t Token) IsUnaryOp() bool { return t >= Plus && t <= Tilde }
+
+// IsLogicalOp returns true if t is a logical operator.
+func (t Token) IsLogicalOp() bool { return t == Land || t == Lor }
diff --git a/lang/tokenid_string.go b/lang/token_string.go
index 705edc6..6b19fca 100644
--- a/lang/tokenid_string.go
+++ b/lang/token_string.go
@@ -1,4 +1,4 @@
-// Code generated by "stringer -type=TokenId"; DO NOT EDIT.
+// Code generated by "stringer -type=Token"; DO NOT EDIT.
package lang
@@ -103,13 +103,13 @@ func _() {
_ = x[New-92]
}
-const _TokenId_name = "IllegalCommentIdentCharFloatImagIntStringAddSubMulQuoRemAndOrXorShlShrAndNotPeriodEqualGreaterGreaterEqualLandLessLessEqualLorNotEqualDefineAssignAddAssignSubAssignMulAssignQuoAssignRemAssignAndAssignOrAssignXorAssignShlAssignShrAssignAndNotAssignIncDecPlusMinusAddrDerefBitCompArrowEllipsisNotTildeCommaSemicolonColonParenBlockBracketBlockBraceBlockBreakCaseChanConstContinueDefaultDeferElseFallthroughForFuncGoGotoIfImportInterfaceMapPackageRangeReturnSelectStructSwitchTypeVarCallCallXEqualSetGrowIndexJumpFalseJumpSetFalseJumpSetTrueLabelNew"
+const _Token_name = "IllegalCommentIdentCharFloatImagIntStringAddSubMulQuoRemAndOrXorShlShrAndNotPeriodEqualGreaterGreaterEqualLandLessLessEqualLorNotEqualDefineAssignAddAssignSubAssignMulAssignQuoAssignRemAssignAndAssignOrAssignXorAssignShlAssignShrAssignAndNotAssignIncDecPlusMinusAddrDerefBitCompArrowEllipsisNotTildeCommaSemicolonColonParenBlockBracketBlockBraceBlockBreakCaseChanConstContinueDefaultDeferElseFallthroughForFuncGoGotoIfImportInterfaceMapPackageRangeReturnSelectStructSwitchTypeVarCallCallXEqualSetGrowIndexJumpFalseJumpSetFalseJumpSetTrueLabelNew"
-var _TokenId_index = [...]uint16{0, 7, 14, 19, 23, 28, 32, 35, 41, 44, 47, 50, 53, 56, 59, 61, 64, 67, 70, 76, 82, 87, 94, 106, 110, 114, 123, 126, 134, 140, 146, 155, 164, 173, 182, 191, 200, 208, 217, 226, 235, 247, 250, 253, 257, 262, 266, 271, 278, 283, 291, 294, 299, 304, 313, 318, 328, 340, 350, 355, 359, 363, 368, 376, 383, 388, 392, 403, 406, 410, 412, 416, 418, 424, 433, 436, 443, 448, 454, 460, 466, 472, 476, 479, 483, 488, 496, 500, 505, 514, 526, 537, 542, 545}
+var _Token_index = [...]uint16{0, 7, 14, 19, 23, 28, 32, 35, 41, 44, 47, 50, 53, 56, 59, 61, 64, 67, 70, 76, 82, 87, 94, 106, 110, 114, 123, 126, 134, 140, 146, 155, 164, 173, 182, 191, 200, 208, 217, 226, 235, 247, 250, 253, 257, 262, 266, 271, 278, 283, 291, 294, 299, 304, 313, 318, 328, 340, 350, 355, 359, 363, 368, 376, 383, 388, 392, 403, 406, 410, 412, 416, 418, 424, 433, 436, 443, 448, 454, 460, 466, 472, 476, 479, 483, 488, 496, 500, 505, 514, 526, 537, 542, 545}
-func (i TokenId) String() string {
- if i < 0 || i >= TokenId(len(_TokenId_index)-1) {
- return "TokenId(" + strconv.FormatInt(int64(i), 10) + ")"
+func (i Token) String() string {
+ if i < 0 || i >= Token(len(_Token_index)-1) {
+ return "Token(" + strconv.FormatInt(int64(i), 10) + ")"
}
- return _TokenId_name[_TokenId_index[i]:_TokenId_index[i+1]]
+ return _Token_name[_Token_index[i]:_Token_index[i+1]]
}