summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc Vertes <mvertes@free.fr>2023-11-15 11:59:15 +0100
committerMarc Vertes <mvertes@free.fr>2023-11-15 11:59:15 +0100
commita4d7fb2da6a8390b818dae8d07391c7d76e365e9 (patch)
tree166d1ed4bb07337ead19fd4f7ebc8d79885e2cfd
parent2eab5877e1c634db872b595dd2414f4031ae4eb5 (diff)
parser: hande const declarations
Only symbols are produced, no bytecode is emitted. The constant expressions are evaluated at compile time using the stdlib package go/constant. The parser handles implicit repetition of the last non-empty expression list. The iota symbol is reset to 0 and incremented for each line of a const block. To be done in a next commit: type conversions.
-rw-r--r--lang/token.go7
-rw-r--r--parser/README.md4
-rw-r--r--parser/compiler.go59
-rw-r--r--parser/decl.go180
-rw-r--r--parser/interpreter_test.go15
-rw-r--r--parser/parse.go2
-rw-r--r--parser/symbol.go4
-rw-r--r--vm/vm.go10
8 files changed, 238 insertions, 43 deletions
diff --git a/lang/token.go b/lang/token.go
index af2fc26..37ac557 100644
--- a/lang/token.go
+++ b/lang/token.go
@@ -6,10 +6,12 @@ const (
Illegal = iota
Comment
Ident
- Int
+
+ // Literal values
+ Char
Float
Imag
- Char
+ Int
String
// Binary operators (except indicated)
@@ -114,6 +116,7 @@ const (
)
func (t TokenId) IsKeyword() bool { return t >= Break && t <= Var }
+func (t TokenId) IsLiteral() bool { return t >= Char && t <= String }
func (t TokenId) IsOperator() bool { return t >= Add && t <= Tilde }
func (t TokenId) IsBlock() bool { return t >= ParenBlock && t <= BraceBlock }
func (t TokenId) IsBoolOp() bool { return t >= Equal && t <= NotEqual || t == Not }
diff --git a/parser/README.md b/parser/README.md
index 54fead3..6f3b6dd 100644
--- a/parser/README.md
+++ b/parser/README.md
@@ -54,8 +54,8 @@ Go language support:
- [x] var declaration
- [x] type declaration
- [x] func declaration
-- [ ] const declaration
-- [ ] iota expression
+- [x] const declaration
+- [x] iota expression
- [ ] defer statement
- [ ] recover statement
- [ ] go statement
diff --git a/parser/compiler.go b/parser/compiler.go
index bce83d5..ed950cb 100644
--- a/parser/compiler.go
+++ b/parser/compiler.go
@@ -13,9 +13,9 @@ import (
type Compiler struct {
*Parser
- Code [][]int64 // produced code, to fill VM with
- Data []any // produced data, will be at the bottom of VM stack
- Entry int // offset in Code to start execution from (skip function defintions)
+ vm.Code // produced code, to fill VM with
+ Data []any // produced data, will be at the bottom of VM stack
+ Entry int // offset in Code to start execution from (skip function defintions)
strings map[string]int // locations of strings in Data
}
@@ -28,13 +28,6 @@ func NewCompiler(scanner *scanner.Scanner) *Compiler {
}
}
-func (c *Compiler) Emit(op ...int64) int {
- op = append([]int64{}, op...)
- l := len(c.Code)
- c.Code = append(c.Code, op)
- return l
-}
-
func (c *Compiler) AddSym(name string, value any) int {
p := len(c.Data)
c.Data = append(c.Data, value)
@@ -46,6 +39,8 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) {
fixList := Tokens{}
log.Println("Codegen tokens:", tokens)
+ emit := func(op ...int64) { c.Code = append(c.Code, op) }
+
for i, t := range tokens {
switch t.Id {
case lang.Int:
@@ -53,7 +48,7 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) {
if err != nil {
return err
}
- c.Emit(int64(t.Pos), vm.Push, int64(n))
+ emit(int64(t.Pos), vm.Push, int64(n))
case lang.String:
s := t.Block()
@@ -63,31 +58,31 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) {
c.Data = append(c.Data, s)
c.strings[s] = i
}
- c.Emit(int64(t.Pos), vm.Dup, int64(i))
+ emit(int64(t.Pos), vm.Dup, int64(i))
case lang.Add:
- c.Emit(int64(t.Pos), vm.Add)
+ emit(int64(t.Pos), vm.Add)
case lang.Mul:
- c.Emit(int64(t.Pos), vm.Mul)
+ emit(int64(t.Pos), vm.Mul)
case lang.Sub:
- c.Emit(int64(t.Pos), vm.Sub)
+ emit(int64(t.Pos), vm.Sub)
case lang.Greater:
- c.Emit(int64(t.Pos), vm.Greater)
+ emit(int64(t.Pos), vm.Greater)
case lang.Less:
- c.Emit(int64(t.Pos), vm.Lower)
+ emit(int64(t.Pos), vm.Lower)
case lang.Call:
- c.Emit(int64(t.Pos), vm.Call)
+ emit(int64(t.Pos), vm.Call)
case lang.CallX:
- c.Emit(int64(t.Pos), vm.CallX, int64(t.Beg))
+ emit(int64(t.Pos), vm.CallX, int64(t.Beg))
case lang.Grow:
- c.Emit(int64(t.Pos), vm.Grow, int64(t.Beg))
+ emit(int64(t.Pos), vm.Grow, int64(t.Beg))
case lang.Define:
// TODO: support assignment to local, composite objects
@@ -96,7 +91,7 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) {
c.Data = append(c.Data, nil)
// TODO: symbol should be added at parse, not here.
c.addSym(l, st.Str, nil, symVar, nil, false)
- c.Emit(int64(st.Pos), vm.Assign, int64(l))
+ emit(int64(st.Pos), vm.Assign, int64(l))
case lang.Assign:
st := tokens[i-1]
@@ -105,20 +100,20 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) {
return fmt.Errorf("symbol not found: %s", st.Str)
}
if s.local {
- c.Emit(int64(st.Pos), vm.Fassign, int64(s.index))
+ emit(int64(st.Pos), vm.Fassign, int64(s.index))
} else {
if s.index == unsetAddr {
s.index = len(c.Data)
c.Data = append(c.Data, s.value)
}
- c.Emit(int64(st.Pos), vm.Assign, int64(s.index))
+ emit(int64(st.Pos), vm.Assign, int64(s.index))
}
case lang.Equal:
- c.Emit(int64(t.Pos), vm.Equal)
+ emit(int64(t.Pos), vm.Equal)
case lang.EqualSet:
- c.Emit(int64(t.Pos), vm.EqualSet)
+ emit(int64(t.Pos), vm.EqualSet)
case lang.Ident:
if i < len(tokens)-1 {
@@ -132,13 +127,13 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) {
return fmt.Errorf("symbol not found: %s", t.Str)
}
if s.local {
- c.Emit(int64(t.Pos), vm.Fdup, int64(s.index))
+ emit(int64(t.Pos), vm.Fdup, int64(s.index))
} else {
if s.index == unsetAddr {
s.index = len(c.Data)
c.Data = append(c.Data, s.value)
}
- c.Emit(int64(t.Pos), vm.Dup, int64(s.index))
+ emit(int64(t.Pos), vm.Dup, int64(s.index))
}
case lang.Label:
@@ -167,7 +162,7 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) {
} else {
i = s.value.(int) - len(c.Code)
}
- c.Emit(int64(t.Pos), vm.JumpFalse, int64(i))
+ emit(int64(t.Pos), vm.JumpFalse, int64(i))
case lang.JumpSetFalse:
label := t.Str[13:]
@@ -179,7 +174,7 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) {
} else {
i = s.value.(int) - len(c.Code)
}
- c.Emit(int64(t.Pos), vm.JumpSetFalse, int64(i))
+ emit(int64(t.Pos), vm.JumpSetFalse, int64(i))
case lang.JumpSetTrue:
label := t.Str[12:]
@@ -191,7 +186,7 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) {
} else {
i = s.value.(int) - len(c.Code)
}
- c.Emit(int64(t.Pos), vm.JumpSetTrue, int64(i))
+ emit(int64(t.Pos), vm.JumpSetTrue, int64(i))
case lang.Goto:
label := t.Str[5:]
@@ -202,10 +197,10 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) {
} else {
i = s.value.(int) - len(c.Code)
}
- c.Emit(int64(t.Pos), vm.Jump, int64(i))
+ emit(int64(t.Pos), vm.Jump, int64(i))
case lang.Return:
- c.Emit(int64(t.Pos), vm.Return, int64(t.Beg), int64(t.End))
+ emit(int64(t.Pos), vm.Return, int64(t.Beg), int64(t.End))
default:
return fmt.Errorf("Codegen: unsupported token %v", t)
diff --git a/parser/decl.go b/parser/decl.go
index 6c10363..ba68da0 100644
--- a/parser/decl.go
+++ b/parser/decl.go
@@ -2,13 +2,190 @@ package parser
import (
"errors"
- "log"
+ "go/constant"
+ "go/token"
"strings"
"github.com/gnolang/parscan/lang"
"github.com/gnolang/parscan/scanner"
)
+func (p *Parser) ParseConst(in Tokens) (out Tokens, err error) {
+ if len(in) < 2 {
+ return out, errors.New("missing expression")
+ }
+ if in[1].Id != lang.ParenBlock {
+ return p.parseConstLine(in[1:])
+ }
+ if in, err = p.Scan(in[1].Block(), false); err != nil {
+ return out, err
+ }
+ var cnt int64
+ p.symbols["iota"].cval = constant.Make(cnt)
+ var prev Tokens
+ for i, lt := range in.Split(lang.Semicolon) {
+ if i > 0 && len(lt) == 1 {
+ lt = append(Tokens{lt[0]}, prev...) // Handle implicit repetition of the previous expression.
+ }
+ ot, err := p.parseConstLine(lt)
+ if err != nil {
+ return out, err
+ }
+ out = append(out, ot...)
+ prev = lt[1:]
+ cnt++
+ p.symbols["iota"].cval = constant.Make(cnt)
+ }
+ return out, err
+}
+
+func (p *Parser) parseConstLine(in Tokens) (out Tokens, err error) {
+ decl := in
+ var assign Tokens
+ if i := decl.Index(lang.Assign); i >= 0 {
+ assign = decl[i+1:]
+ decl = decl[:i]
+ }
+ var vars []string
+ if _, vars, err = p.parseParamTypes(decl, parseTypeVar); err != nil {
+ if errors.Is(err, missingTypeError) {
+ for _, lt := range decl.Split(lang.Comma) {
+ vars = append(vars, lt[0].Str)
+ // TODO: compute type from rhs
+ p.addSym(unsetAddr, strings.TrimPrefix(p.scope+"/"+lt[0].Str, "/"), nil, symConst, nil, false)
+ }
+ } else {
+ return out, err
+ }
+ }
+ values := assign.Split(lang.Comma)
+ if len(values) == 1 && len(values[0]) == 0 {
+ values = nil
+ }
+ for i, v := range values {
+ if v, err = p.ParseExpr(v); err != nil {
+ return out, err
+ }
+ cval, _, err := p.evalConstExpr(v)
+ if err != nil {
+ return out, err
+ }
+ name := strings.TrimPrefix(p.scope+"/"+vars[i], "/")
+ p.symbols[name] = &symbol{
+ kind: symConst,
+ index: unsetAddr,
+ cval: cval,
+ value: constValue(cval),
+ local: p.funcScope != "",
+ used: true,
+ }
+ // TODO: type conversion when applicable.
+ }
+ return out, err
+}
+
+func (p *Parser) evalConstExpr(in Tokens) (cval constant.Value, length int, err error) {
+ l := len(in) - 1
+ if l < 0 {
+ return nil, 0, errors.New("missing argument")
+ }
+ t := in[l]
+ id := t.Id
+ switch {
+ case id.IsBinaryOp():
+ op1, l1, err := p.evalConstExpr(in[:l])
+ if err != nil {
+ return nil, 0, err
+ }
+ op2, l2, err := p.evalConstExpr(in[:l-l1])
+ if err != nil {
+ return nil, 0, err
+ }
+ length = 1 + l1 + l2
+ tok := gotok[id]
+ if id.IsBoolOp() {
+ return constant.MakeBool(constant.Compare(op1, tok, op2)), length, err
+ }
+ if id == lang.Shl || id == lang.Shr {
+ s, ok := constant.Uint64Val(op2)
+ if !ok {
+ return nil, 0, errors.New("invalid shift parameter")
+ }
+ return constant.Shift(op1, tok, uint(s)), length, err
+ }
+ if tok == token.QUO && op1.Kind() == constant.Int && op2.Kind() == constant.Int {
+ tok = token.QUO_ASSIGN // Force int result, see https://pkg.go.dev/go/constant#BinaryOp
+ }
+ return constant.BinaryOp(op1, tok, op2), length, err
+ case id.IsUnaryOp():
+ op1, l1, err := p.evalConstExpr(in[:l])
+ if err != nil {
+ return nil, 0, err
+ }
+ return constant.UnaryOp(gotok[id], op1, 0), 1 + l1, err
+ case id.IsLiteral():
+ return constant.MakeFromLiteral(t.Str, gotok[id], 0), 1, err
+ case id == lang.Ident:
+ s, _, ok := p.getSym(t.Str, p.scope)
+ if !ok {
+ return nil, 0, errors.New("symbol not found")
+ }
+ if s.kind != symConst {
+ return nil, 0, errors.New("symbol is not a constant")
+ }
+ return s.cval, 1, err
+ case id == lang.Call:
+ // TODO: implement support for type conversions and builtin calls.
+ panic("not implemented yet")
+ default:
+ return nil, 0, errors.New("invalid constant expression")
+ }
+}
+
+func constValue(c constant.Value) any {
+ switch c.Kind() {
+ case constant.Bool:
+ return constant.BoolVal(c)
+ case constant.String:
+ return constant.StringVal(c)
+ case constant.Int:
+ v, _ := constant.Int64Val(c)
+ return int(v)
+ case constant.Float:
+ v, _ := constant.Float64Val(c)
+ return v
+ }
+ return nil
+}
+
+var gotok = map[lang.TokenId]token.Token{
+ lang.Char: token.CHAR,
+ lang.Imag: token.IMAG,
+ lang.Int: token.INT,
+ lang.Float: token.FLOAT,
+ lang.Add: token.ADD,
+ lang.Sub: token.SUB,
+ lang.Mul: token.MUL,
+ lang.Quo: token.QUO,
+ lang.Rem: token.REM,
+ lang.And: token.AND,
+ lang.Or: token.OR,
+ lang.Xor: token.XOR,
+ lang.Shl: token.SHL,
+ lang.Shr: token.SHR,
+ lang.AndNot: token.AND_NOT,
+ lang.Equal: token.EQL,
+ lang.Greater: token.GTR,
+ lang.Less: token.LSS,
+ lang.GreaterEqual: token.GEQ,
+ lang.LessEqual: token.LEQ,
+ lang.NotEqual: token.NEQ,
+ lang.Plus: token.ADD,
+ lang.Minus: token.SUB,
+ lang.BitComp: token.XOR,
+ lang.Not: token.NOT,
+}
+
func (p *Parser) ParseType(in Tokens) (out Tokens, err error) {
if len(in) < 2 {
return out, missingTypeError
@@ -91,7 +268,6 @@ func (p *Parser) parseVarLine(in Tokens) (out Tokens, err error) {
if len(values) == 1 && len(values[0]) == 0 {
values = nil
}
- log.Println("ParseVar:", vars, values, len(values))
for i, v := range values {
if v, err = p.ParseExpr(v); err != nil {
return out, err
diff --git a/parser/interpreter_test.go b/parser/interpreter_test.go
index 3a5598e..7ac1212 100644
--- a/parser/interpreter_test.go
+++ b/parser/interpreter_test.go
@@ -164,6 +164,21 @@ func TestSwitch(t *testing.T) {
})
}
+func TestConst(t *testing.T) {
+ src0 := `const (
+ a = iota
+ b
+ c
+)
+`
+ run(t, []etest{
+ {src: "const a = 1+2; a", res: "3"},
+ {src: "const a, b = 1, 2; a+b", res: "3"},
+
+ {src: src0 + "c", res: "2"},
+ })
+}
+
func TestType(t *testing.T) {
run(t, []etest{
{src: "type t int; var a t = 1; a", res: "1"},
diff --git a/parser/parse.go b/parser/parse.go
index 7de73b6..131e9c0 100644
--- a/parser/parse.go
+++ b/parser/parse.go
@@ -75,6 +75,8 @@ func (p *Parser) ParseStmt(in Tokens) (out Tokens, err error) {
return p.ParseBreak(in)
case lang.Continue:
return p.ParseContinue(in)
+ case lang.Const:
+ return p.ParseConst(in)
case lang.For:
return p.ParseFor(in)
case lang.Func:
diff --git a/parser/symbol.go b/parser/symbol.go
index d7c05f1..f707feb 100644
--- a/parser/symbol.go
+++ b/parser/symbol.go
@@ -2,6 +2,7 @@ package parser
import (
"fmt"
+ "go/constant"
"reflect"
"strings"
)
@@ -23,6 +24,7 @@ type symbol struct {
kind symKind
index int // address of symbol in frame
value any
+ cval constant.Value
Type reflect.Type
local bool // if true address is relative to local frame, otherwise global
used bool
@@ -62,7 +64,7 @@ func initUniverse() map[string]*symbol {
"string": {kind: symType, index: unsetAddr, Type: reflect.TypeOf((*string)(nil)).Elem()},
"nil": {index: unsetAddr},
- "iota": {index: unsetAddr, value: 0},
+ "iota": {kind: symConst, index: unsetAddr},
"true": {index: unsetAddr, value: true, Type: reflect.TypeOf(true)},
"false": {index: unsetAddr, value: false, Type: reflect.TypeOf(false)},
diff --git a/vm/vm.go b/vm/vm.go
index ba1f97e..f7f4d78 100644
--- a/vm/vm.go
+++ b/vm/vm.go
@@ -71,12 +71,14 @@ var strop = [...]string{ // for VM tracing.
Subi: "Subi",
}
+type Code [][]int64
+
// Machine represents a virtual machine.
type Machine struct {
- code [][]int64 // code to execute
- mem []any // memory, as a stack
- ip, fp int // instruction and frame pointer
- ic uint64 // instruction counter, incremented at each instruction executed
+ code Code // code to execute
+ mem []any // memory, as a stack
+ ip, fp int // instruction and frame pointer
+ ic uint64 // instruction counter, incremented at each instruction executed
// flags uint // to set options such as restrict CallX, etc...
}