summaryrefslogtreecommitdiff
path: root/lang
diff options
context:
space:
mode:
authorMarc Vertes <marc.vertes@tendermint.com>2023-10-12 10:51:58 +0200
committerGitHub <noreply@github.com>2023-10-12 10:51:58 +0200
commit37b9da32d3b911091deb254f6cba2a137c471287 (patch)
treeb4451de0fa0473a937a77d39fd1f8a4f87c8f60d /lang
parenta21b9b12ad865a19ff687645082f9093c4101039 (diff)
move to a direct byte code compiler (#8)
* chore: refactor to keep only the new parser and bytecode vm * scanner: remove Token.value field * scanner: remove scanner.kind field * chore: move language specification in lang package This avoid a cyclic dependency in scanner_test which can now use the golang/GoSpec language specification for Go. * clean code * scanner: export scanner fields Also parser now generate function calls, including externals. * chore: fix lint issues * parser: handle strings * wip * parser: implement support for 'if, else, else if' statements Resolving labels in the compiler still in progress. * parser: support if statements, improve compiler * improve handling of functions * improve support of local variables * scanner: trim leading and trailing spaces * fixes to make fibonacci work * parser: improve README, fix function parameters parsing
Diffstat (limited to 'lang')
-rw-r--r--lang/golang/go.go183
-rw-r--r--lang/spec.go43
-rw-r--r--lang/token.go110
3 files changed, 251 insertions, 85 deletions
diff --git a/lang/golang/go.go b/lang/golang/go.go
index 47ca6db..7f66594 100644
--- a/lang/golang/go.go
+++ b/lang/golang/go.go
@@ -1,37 +1,35 @@
package golang
-import (
- "github.com/gnolang/parscan/parser"
- "github.com/gnolang/parscan/scanner"
-)
+import "github.com/gnolang/parscan/lang"
-var GoScanner = &scanner.Scanner{
- CharProp: [scanner.ASCIILen]uint{
- '\t': scanner.CharSep,
- '\n': scanner.CharLineSep,
- ' ': scanner.CharSep,
- '!': scanner.CharOp,
- '"': scanner.CharStr,
- '%': scanner.CharOp,
- '&': scanner.CharOp,
- '\'': scanner.CharStr,
- '(': scanner.CharBlock,
- '*': scanner.CharOp,
- '+': scanner.CharOp,
- ',': scanner.CharGroupSep,
- '-': scanner.CharOp,
- '.': scanner.CharOp,
- '/': scanner.CharOp,
- ':': scanner.CharOp,
- ';': scanner.CharGroupSep,
- '<': scanner.CharOp,
- '=': scanner.CharOp,
- '>': scanner.CharOp,
- '[': scanner.CharBlock,
- '^': scanner.CharOp,
- '{': scanner.CharBlock,
- '|': scanner.CharOp,
- '~': scanner.CharOp,
+var GoSpec = &lang.Spec{
+ CharProp: [lang.ASCIILen]uint{
+ '\t': lang.CharSep,
+ '\n': lang.CharLineSep,
+ ' ': lang.CharSep,
+ '!': lang.CharOp,
+ '"': lang.CharStr,
+ '%': lang.CharOp,
+ '&': lang.CharOp,
+ '\'': lang.CharStr,
+ '(': lang.CharBlock,
+ '*': lang.CharOp,
+ '+': lang.CharOp,
+ ',': lang.CharGroupSep,
+ '-': lang.CharOp,
+ '`': lang.CharStr,
+ '.': lang.CharOp,
+ '/': lang.CharOp,
+ ':': lang.CharOp,
+ ';': lang.CharGroupSep,
+ '<': lang.CharOp,
+ '=': lang.CharOp,
+ '>': lang.CharOp,
+ '[': lang.CharBlock,
+ '^': lang.CharOp,
+ '{': lang.CharBlock,
+ '|': lang.CharOp,
+ '~': lang.CharOp,
},
End: map[string]string{
"(": ")",
@@ -44,61 +42,76 @@ var GoScanner = &scanner.Scanner{
"//": "\n",
},
BlockProp: map[string]uint{
- "(": scanner.CharBlock,
- "{": scanner.CharBlock,
- "[": scanner.CharBlock,
- `"`: scanner.CharStr | scanner.StrEsc | scanner.StrNonl,
- "`": scanner.CharStr,
- "'": scanner.CharStr | scanner.StrEsc,
- "/*": scanner.CharStr,
- "//": scanner.CharStr | scanner.ExcludeEnd | scanner.EosValidEnd,
+ "(": lang.CharBlock,
+ "{": lang.CharBlock,
+ "[": lang.CharBlock,
+ `"`: lang.CharStr | lang.StrEsc | lang.StrNonl,
+ "`": lang.CharStr,
+ "'": lang.CharStr | lang.StrEsc,
+ "/*": lang.CharStr,
+ "//": lang.CharStr | lang.ExcludeEnd | lang.EosValidEnd,
},
- SkipSemi: map[string]bool{
- "++": true,
- "--": true,
- "case": true,
- "chan": true,
- "const": true,
- "default": true,
- "defer": true,
- "else": true,
- "for": true,
- "func": true,
- "go": true,
- "goto": true,
- "if": true,
- "import": true,
- "interface": true,
- "map": true,
- "package": true,
- "range": true,
- "select": true,
- "struct": true,
- "switch": true,
- "type": true,
- "var": true,
- },
-}
+ TokenProps: map[string]lang.TokenProp{
+ // Block tokens (can be nested)
+ "{..}": {TokenId: lang.BraceBlock},
+ "[..]": {TokenId: lang.BracketBlock},
+ "(..)": {TokenId: lang.ParenBlock},
+
+ // String tokens (not nested)
+ "//..": {TokenId: lang.Comment},
+ "/*..": {TokenId: lang.Comment},
+ `".."`: {TokenId: lang.String},
+ "`..`": {TokenId: lang.String},
-var GoParser = &parser.Parser{
- Scanner: GoScanner,
- Spec: map[string]parser.NodeSpec{
- ".": {Kind: parser.OpDot, Flags: parser.Call, Order: 3},
- "*": {Kind: parser.OpMultiply, Order: 4},
- "+": {Kind: parser.OpAdd, Order: 5},
- "-": {Kind: parser.OpSubtract, Order: 5},
- "<": {Kind: parser.OpInferior, Order: 6},
- ":=": {Kind: parser.OpDefine, Order: 7},
- "=": {Kind: parser.OpAssign, Order: 7},
- "if": {Kind: parser.StmtIf, Flags: parser.Stmt | parser.ExprSep},
- "func": {Kind: parser.DeclFunc, Flags: parser.Decl | parser.Call},
- "return": {Kind: parser.StmtReturn, Flags: parser.Stmt},
- "{..}": {Kind: parser.BlockStmt, Flags: parser.ExprSep},
- "(..)": {Kind: parser.BlockParen, Flags: parser.Call},
- `".."`: {Kind: parser.LiteralString},
- "//..": {Kind: parser.Comment},
- "/*..": {Kind: parser.Comment},
+ // Separators
+ ",": {TokenId: lang.Comma},
+ ";": {TokenId: lang.Semicolon},
+ ".": {TokenId: lang.Period},
+ ":": {TokenId: lang.Colon},
+
+ // Operators
+ "&": {TokenId: lang.And},
+ "*": {TokenId: lang.Mul},
+ "+": {TokenId: lang.Add},
+ "-": {TokenId: lang.Sub},
+ "=": {TokenId: lang.Assign},
+ "<": {TokenId: lang.Less},
+ ">": {TokenId: lang.Greater},
+ "^": {TokenId: lang.Xor},
+ "~": {TokenId: lang.Tilde},
+ ":=": {TokenId: lang.Define},
+ "==": {TokenId: lang.Equal},
+ "<=": {TokenId: lang.LessEqual},
+ ">=": {TokenId: lang.GreaterEqual},
+ "->": {TokenId: lang.Arrow},
+ "++": {TokenId: lang.Inc, SkipSemi: true},
+ "--": {TokenId: lang.Dec, SkipSemi: true},
+
+ // Reserved keywords
+ "break": {TokenId: lang.Break},
+ "case": {TokenId: lang.Case, SkipSemi: true},
+ "chan": {TokenId: lang.Chan, SkipSemi: true},
+ "const": {TokenId: lang.Const, SkipSemi: true},
+ "continue": {TokenId: lang.Continue},
+ "default": {TokenId: lang.Default, SkipSemi: true},
+ "defer": {TokenId: lang.Defer, SkipSemi: true},
+ "else": {TokenId: lang.Else, SkipSemi: true},
+ "fallthrough": {TokenId: lang.Fallthrough},
+ "for": {TokenId: lang.For, SkipSemi: true},
+ "func": {TokenId: lang.Func, SkipSemi: true},
+ "go": {TokenId: lang.Go, SkipSemi: true},
+ "goto": {TokenId: lang.Goto, SkipSemi: true},
+ "if": {TokenId: lang.If, SkipSemi: true},
+ "import": {TokenId: lang.Import, SkipSemi: true},
+ "interface": {TokenId: lang.Interface, SkipSemi: true},
+ "map": {TokenId: lang.Map, SkipSemi: true},
+ "package": {TokenId: lang.Package, SkipSemi: true},
+ "range": {TokenId: lang.Range, SkipSemi: true},
+ "return": {TokenId: lang.Return},
+ "select": {TokenId: lang.Select, SkipSemi: true},
+ "struct": {TokenId: lang.Struct, SkipSemi: true},
+ "switch": {TokenId: lang.Switch, SkipSemi: true},
+ "type": {TokenId: lang.Type, SkipSemi: true},
+ "var": {TokenId: lang.Var, SkipSemi: true},
},
}
-
-func init() { GoParser.Init() }
diff --git a/lang/spec.go b/lang/spec.go
new file mode 100644
index 0000000..c5b50a4
--- /dev/null
+++ b/lang/spec.go
@@ -0,0 +1,43 @@
+package lang
+
+const (
+ CharIllegal = 1 << iota
+ CharOp
+ CharNum
+ CharAlpha
+ CharSep
+ CharLineSep
+ CharGroupSep
+ CharStr
+ CharBlock
+ StrEsc
+ StrNonl
+ ExcludeEnd // exclude end delimiter from content
+ EosValidEnd // end of input string terminates block or string token
+)
+
+const ASCIILen = 1 << 7 // 128
+
+type TokenProp struct {
+ TokenId
+ SkipSemi bool // automatic semicolon insertion after newline
+}
+
+type Spec struct {
+ CharProp [ASCIILen]uint // special Character properties
+ End map[string]string // end delimiters, indexed by start
+ BlockProp map[string]uint // block properties
+ TokenProps map[string]TokenProp // token properties
+ DotNum bool // true if a number can start with '.'
+ IdAscii bool // true if an identifier can be in ASCII only
+ Num_ bool // true if a number can contain _ character
+}
+
+// HasInit stores if a statement may contain a simple init statement
+var HasInit = map[TokenId]bool{
+ Case: true,
+ For: true,
+ If: true,
+ Select: true,
+ Switch: true,
+}
diff --git a/lang/token.go b/lang/token.go
new file mode 100644
index 0000000..3a980a4
--- /dev/null
+++ b/lang/token.go
@@ -0,0 +1,110 @@
+package lang
+
+type TokenId int
+
+const (
+ Illegal = iota
+ Comment
+ Ident
+ Int
+ Float
+ Imag
+ Char
+ String
+
+ // Operators
+ Add
+ Sub
+ Mul
+ Quo
+ Rem
+ And
+ Or
+ Xor
+ Shl // <<
+ Shr // >>
+ AndNot //
+
+ AddAssign
+ SubAssign
+ MulAssign
+ QuoAssign
+ RemAssign
+ AndAssign
+ OrAssign
+ XorAssign
+ ShlAssign
+ ShrAssign
+ AndNotAssign
+
+ Land
+ Lor
+ Arrow
+ Inc
+ Dec
+ Equal
+ Less
+ Greater
+ Assign
+ Not
+ Plus // unitary +
+ Minus // unitary -
+ Address // unitary &
+ Deref // unitary *
+ NotEqual
+ LessEqual
+ GreaterEqual
+ Define
+ Ellipsis
+ Period
+ Tilde
+
+ // Separators
+ Comma
+ Semicolon
+ Colon
+
+ // Block tokens
+ ParenBlock // (..)
+ BracketBlock // [..]
+ BraceBlock // {..}
+
+ // Reserved keywords
+ Break
+ Case
+ Chan
+ Const
+ Continue
+ Default
+ Defer
+ Else
+ Fallthrough
+ For
+ Func
+ Go
+ Goto
+ If
+ Import
+ Interface
+ Map
+ Package
+ Range
+ Return
+ Select
+ Struct
+ Switch
+ Type
+ Var
+
+ // Internal tokens (no corresponding keyword)
+ Call
+ CallX
+ Label
+ JumpFalse
+ Enter // entering in function context
+ Exit // exiting from function context
+)
+
+func (t TokenId) IsKeyword() bool { return t >= Break && t <= Var }
+func (t TokenId) IsOperator() bool { return t >= Add && t <= Tilde }
+func (t TokenId) IsBlock() bool { return t >= ParenBlock && t <= BraceBlock }