From a4d7fb2da6a8390b818dae8d07391c7d76e365e9 Mon Sep 17 00:00:00 2001 From: Marc Vertes Date: Wed, 15 Nov 2023 11:59:15 +0100 Subject: parser: hande const declarations Only symbols are produced, no bytecode is emitted. The constant expressions are evaluated at compile time using the stdlib package go/constant. The parser handles implicit repetition of the last non-empty expression list. The iota symbol is reset to 0 and incremented for each line of a const block. To be done in a next commit: type conversions. --- lang/token.go | 7 +- parser/README.md | 4 +- parser/compiler.go | 59 +++++++-------- parser/decl.go | 180 ++++++++++++++++++++++++++++++++++++++++++++- parser/interpreter_test.go | 15 ++++ parser/parse.go | 2 + parser/symbol.go | 4 +- vm/vm.go | 10 ++- 8 files changed, 238 insertions(+), 43 deletions(-) diff --git a/lang/token.go b/lang/token.go index af2fc26..37ac557 100644 --- a/lang/token.go +++ b/lang/token.go @@ -6,10 +6,12 @@ const ( Illegal = iota Comment Ident - Int + + // Literal values + Char Float Imag - Char + Int String // Binary operators (except indicated) @@ -114,6 +116,7 @@ const ( ) func (t TokenId) IsKeyword() bool { return t >= Break && t <= Var } +func (t TokenId) IsLiteral() bool { return t >= Char && t <= String } func (t TokenId) IsOperator() bool { return t >= Add && t <= Tilde } func (t TokenId) IsBlock() bool { return t >= ParenBlock && t <= BraceBlock } func (t TokenId) IsBoolOp() bool { return t >= Equal && t <= NotEqual || t == Not } diff --git a/parser/README.md b/parser/README.md index 54fead3..6f3b6dd 100644 --- a/parser/README.md +++ b/parser/README.md @@ -54,8 +54,8 @@ Go language support: - [x] var declaration - [x] type declaration - [x] func declaration -- [ ] const declaration -- [ ] iota expression +- [x] const declaration +- [x] iota expression - [ ] defer statement - [ ] recover statement - [ ] go statement diff --git a/parser/compiler.go b/parser/compiler.go index bce83d5..ed950cb 100644 --- a/parser/compiler.go +++ b/parser/compiler.go @@ -13,9 +13,9 @@ import ( type Compiler struct { *Parser - Code [][]int64 // produced code, to fill VM with - Data []any // produced data, will be at the bottom of VM stack - Entry int // offset in Code to start execution from (skip function defintions) + vm.Code // produced code, to fill VM with + Data []any // produced data, will be at the bottom of VM stack + Entry int // offset in Code to start execution from (skip function defintions) strings map[string]int // locations of strings in Data } @@ -28,13 +28,6 @@ func NewCompiler(scanner *scanner.Scanner) *Compiler { } } -func (c *Compiler) Emit(op ...int64) int { - op = append([]int64{}, op...) - l := len(c.Code) - c.Code = append(c.Code, op) - return l -} - func (c *Compiler) AddSym(name string, value any) int { p := len(c.Data) c.Data = append(c.Data, value) @@ -46,6 +39,8 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { fixList := Tokens{} log.Println("Codegen tokens:", tokens) + emit := func(op ...int64) { c.Code = append(c.Code, op) } + for i, t := range tokens { switch t.Id { case lang.Int: @@ -53,7 +48,7 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { if err != nil { return err } - c.Emit(int64(t.Pos), vm.Push, int64(n)) + emit(int64(t.Pos), vm.Push, int64(n)) case lang.String: s := t.Block() @@ -63,31 +58,31 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { c.Data = append(c.Data, s) c.strings[s] = i } - c.Emit(int64(t.Pos), vm.Dup, int64(i)) + emit(int64(t.Pos), vm.Dup, int64(i)) case lang.Add: - c.Emit(int64(t.Pos), vm.Add) + emit(int64(t.Pos), vm.Add) case lang.Mul: - c.Emit(int64(t.Pos), vm.Mul) + emit(int64(t.Pos), vm.Mul) case lang.Sub: - c.Emit(int64(t.Pos), vm.Sub) + emit(int64(t.Pos), vm.Sub) case lang.Greater: - c.Emit(int64(t.Pos), vm.Greater) + emit(int64(t.Pos), vm.Greater) case lang.Less: - c.Emit(int64(t.Pos), vm.Lower) + emit(int64(t.Pos), vm.Lower) case lang.Call: - c.Emit(int64(t.Pos), vm.Call) + emit(int64(t.Pos), vm.Call) case lang.CallX: - c.Emit(int64(t.Pos), vm.CallX, int64(t.Beg)) + emit(int64(t.Pos), vm.CallX, int64(t.Beg)) case lang.Grow: - c.Emit(int64(t.Pos), vm.Grow, int64(t.Beg)) + emit(int64(t.Pos), vm.Grow, int64(t.Beg)) case lang.Define: // TODO: support assignment to local, composite objects @@ -96,7 +91,7 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { c.Data = append(c.Data, nil) // TODO: symbol should be added at parse, not here. c.addSym(l, st.Str, nil, symVar, nil, false) - c.Emit(int64(st.Pos), vm.Assign, int64(l)) + emit(int64(st.Pos), vm.Assign, int64(l)) case lang.Assign: st := tokens[i-1] @@ -105,20 +100,20 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { return fmt.Errorf("symbol not found: %s", st.Str) } if s.local { - c.Emit(int64(st.Pos), vm.Fassign, int64(s.index)) + emit(int64(st.Pos), vm.Fassign, int64(s.index)) } else { if s.index == unsetAddr { s.index = len(c.Data) c.Data = append(c.Data, s.value) } - c.Emit(int64(st.Pos), vm.Assign, int64(s.index)) + emit(int64(st.Pos), vm.Assign, int64(s.index)) } case lang.Equal: - c.Emit(int64(t.Pos), vm.Equal) + emit(int64(t.Pos), vm.Equal) case lang.EqualSet: - c.Emit(int64(t.Pos), vm.EqualSet) + emit(int64(t.Pos), vm.EqualSet) case lang.Ident: if i < len(tokens)-1 { @@ -132,13 +127,13 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { return fmt.Errorf("symbol not found: %s", t.Str) } if s.local { - c.Emit(int64(t.Pos), vm.Fdup, int64(s.index)) + emit(int64(t.Pos), vm.Fdup, int64(s.index)) } else { if s.index == unsetAddr { s.index = len(c.Data) c.Data = append(c.Data, s.value) } - c.Emit(int64(t.Pos), vm.Dup, int64(s.index)) + emit(int64(t.Pos), vm.Dup, int64(s.index)) } case lang.Label: @@ -167,7 +162,7 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { } else { i = s.value.(int) - len(c.Code) } - c.Emit(int64(t.Pos), vm.JumpFalse, int64(i)) + emit(int64(t.Pos), vm.JumpFalse, int64(i)) case lang.JumpSetFalse: label := t.Str[13:] @@ -179,7 +174,7 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { } else { i = s.value.(int) - len(c.Code) } - c.Emit(int64(t.Pos), vm.JumpSetFalse, int64(i)) + emit(int64(t.Pos), vm.JumpSetFalse, int64(i)) case lang.JumpSetTrue: label := t.Str[12:] @@ -191,7 +186,7 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { } else { i = s.value.(int) - len(c.Code) } - c.Emit(int64(t.Pos), vm.JumpSetTrue, int64(i)) + emit(int64(t.Pos), vm.JumpSetTrue, int64(i)) case lang.Goto: label := t.Str[5:] @@ -202,10 +197,10 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { } else { i = s.value.(int) - len(c.Code) } - c.Emit(int64(t.Pos), vm.Jump, int64(i)) + emit(int64(t.Pos), vm.Jump, int64(i)) case lang.Return: - c.Emit(int64(t.Pos), vm.Return, int64(t.Beg), int64(t.End)) + emit(int64(t.Pos), vm.Return, int64(t.Beg), int64(t.End)) default: return fmt.Errorf("Codegen: unsupported token %v", t) diff --git a/parser/decl.go b/parser/decl.go index 6c10363..ba68da0 100644 --- a/parser/decl.go +++ b/parser/decl.go @@ -2,13 +2,190 @@ package parser import ( "errors" - "log" + "go/constant" + "go/token" "strings" "github.com/gnolang/parscan/lang" "github.com/gnolang/parscan/scanner" ) +func (p *Parser) ParseConst(in Tokens) (out Tokens, err error) { + if len(in) < 2 { + return out, errors.New("missing expression") + } + if in[1].Id != lang.ParenBlock { + return p.parseConstLine(in[1:]) + } + if in, err = p.Scan(in[1].Block(), false); err != nil { + return out, err + } + var cnt int64 + p.symbols["iota"].cval = constant.Make(cnt) + var prev Tokens + for i, lt := range in.Split(lang.Semicolon) { + if i > 0 && len(lt) == 1 { + lt = append(Tokens{lt[0]}, prev...) // Handle implicit repetition of the previous expression. + } + ot, err := p.parseConstLine(lt) + if err != nil { + return out, err + } + out = append(out, ot...) + prev = lt[1:] + cnt++ + p.symbols["iota"].cval = constant.Make(cnt) + } + return out, err +} + +func (p *Parser) parseConstLine(in Tokens) (out Tokens, err error) { + decl := in + var assign Tokens + if i := decl.Index(lang.Assign); i >= 0 { + assign = decl[i+1:] + decl = decl[:i] + } + var vars []string + if _, vars, err = p.parseParamTypes(decl, parseTypeVar); err != nil { + if errors.Is(err, missingTypeError) { + for _, lt := range decl.Split(lang.Comma) { + vars = append(vars, lt[0].Str) + // TODO: compute type from rhs + p.addSym(unsetAddr, strings.TrimPrefix(p.scope+"/"+lt[0].Str, "/"), nil, symConst, nil, false) + } + } else { + return out, err + } + } + values := assign.Split(lang.Comma) + if len(values) == 1 && len(values[0]) == 0 { + values = nil + } + for i, v := range values { + if v, err = p.ParseExpr(v); err != nil { + return out, err + } + cval, _, err := p.evalConstExpr(v) + if err != nil { + return out, err + } + name := strings.TrimPrefix(p.scope+"/"+vars[i], "/") + p.symbols[name] = &symbol{ + kind: symConst, + index: unsetAddr, + cval: cval, + value: constValue(cval), + local: p.funcScope != "", + used: true, + } + // TODO: type conversion when applicable. + } + return out, err +} + +func (p *Parser) evalConstExpr(in Tokens) (cval constant.Value, length int, err error) { + l := len(in) - 1 + if l < 0 { + return nil, 0, errors.New("missing argument") + } + t := in[l] + id := t.Id + switch { + case id.IsBinaryOp(): + op1, l1, err := p.evalConstExpr(in[:l]) + if err != nil { + return nil, 0, err + } + op2, l2, err := p.evalConstExpr(in[:l-l1]) + if err != nil { + return nil, 0, err + } + length = 1 + l1 + l2 + tok := gotok[id] + if id.IsBoolOp() { + return constant.MakeBool(constant.Compare(op1, tok, op2)), length, err + } + if id == lang.Shl || id == lang.Shr { + s, ok := constant.Uint64Val(op2) + if !ok { + return nil, 0, errors.New("invalid shift parameter") + } + return constant.Shift(op1, tok, uint(s)), length, err + } + if tok == token.QUO && op1.Kind() == constant.Int && op2.Kind() == constant.Int { + tok = token.QUO_ASSIGN // Force int result, see https://pkg.go.dev/go/constant#BinaryOp + } + return constant.BinaryOp(op1, tok, op2), length, err + case id.IsUnaryOp(): + op1, l1, err := p.evalConstExpr(in[:l]) + if err != nil { + return nil, 0, err + } + return constant.UnaryOp(gotok[id], op1, 0), 1 + l1, err + case id.IsLiteral(): + return constant.MakeFromLiteral(t.Str, gotok[id], 0), 1, err + case id == lang.Ident: + s, _, ok := p.getSym(t.Str, p.scope) + if !ok { + return nil, 0, errors.New("symbol not found") + } + if s.kind != symConst { + return nil, 0, errors.New("symbol is not a constant") + } + return s.cval, 1, err + case id == lang.Call: + // TODO: implement support for type conversions and builtin calls. + panic("not implemented yet") + default: + return nil, 0, errors.New("invalid constant expression") + } +} + +func constValue(c constant.Value) any { + switch c.Kind() { + case constant.Bool: + return constant.BoolVal(c) + case constant.String: + return constant.StringVal(c) + case constant.Int: + v, _ := constant.Int64Val(c) + return int(v) + case constant.Float: + v, _ := constant.Float64Val(c) + return v + } + return nil +} + +var gotok = map[lang.TokenId]token.Token{ + lang.Char: token.CHAR, + lang.Imag: token.IMAG, + lang.Int: token.INT, + lang.Float: token.FLOAT, + lang.Add: token.ADD, + lang.Sub: token.SUB, + lang.Mul: token.MUL, + lang.Quo: token.QUO, + lang.Rem: token.REM, + lang.And: token.AND, + lang.Or: token.OR, + lang.Xor: token.XOR, + lang.Shl: token.SHL, + lang.Shr: token.SHR, + lang.AndNot: token.AND_NOT, + lang.Equal: token.EQL, + lang.Greater: token.GTR, + lang.Less: token.LSS, + lang.GreaterEqual: token.GEQ, + lang.LessEqual: token.LEQ, + lang.NotEqual: token.NEQ, + lang.Plus: token.ADD, + lang.Minus: token.SUB, + lang.BitComp: token.XOR, + lang.Not: token.NOT, +} + func (p *Parser) ParseType(in Tokens) (out Tokens, err error) { if len(in) < 2 { return out, missingTypeError @@ -91,7 +268,6 @@ func (p *Parser) parseVarLine(in Tokens) (out Tokens, err error) { if len(values) == 1 && len(values[0]) == 0 { values = nil } - log.Println("ParseVar:", vars, values, len(values)) for i, v := range values { if v, err = p.ParseExpr(v); err != nil { return out, err diff --git a/parser/interpreter_test.go b/parser/interpreter_test.go index 3a5598e..7ac1212 100644 --- a/parser/interpreter_test.go +++ b/parser/interpreter_test.go @@ -164,6 +164,21 @@ func TestSwitch(t *testing.T) { }) } +func TestConst(t *testing.T) { + src0 := `const ( + a = iota + b + c +) +` + run(t, []etest{ + {src: "const a = 1+2; a", res: "3"}, + {src: "const a, b = 1, 2; a+b", res: "3"}, + + {src: src0 + "c", res: "2"}, + }) +} + func TestType(t *testing.T) { run(t, []etest{ {src: "type t int; var a t = 1; a", res: "1"}, diff --git a/parser/parse.go b/parser/parse.go index 7de73b6..131e9c0 100644 --- a/parser/parse.go +++ b/parser/parse.go @@ -75,6 +75,8 @@ func (p *Parser) ParseStmt(in Tokens) (out Tokens, err error) { return p.ParseBreak(in) case lang.Continue: return p.ParseContinue(in) + case lang.Const: + return p.ParseConst(in) case lang.For: return p.ParseFor(in) case lang.Func: diff --git a/parser/symbol.go b/parser/symbol.go index d7c05f1..f707feb 100644 --- a/parser/symbol.go +++ b/parser/symbol.go @@ -2,6 +2,7 @@ package parser import ( "fmt" + "go/constant" "reflect" "strings" ) @@ -23,6 +24,7 @@ type symbol struct { kind symKind index int // address of symbol in frame value any + cval constant.Value Type reflect.Type local bool // if true address is relative to local frame, otherwise global used bool @@ -62,7 +64,7 @@ func initUniverse() map[string]*symbol { "string": {kind: symType, index: unsetAddr, Type: reflect.TypeOf((*string)(nil)).Elem()}, "nil": {index: unsetAddr}, - "iota": {index: unsetAddr, value: 0}, + "iota": {kind: symConst, index: unsetAddr}, "true": {index: unsetAddr, value: true, Type: reflect.TypeOf(true)}, "false": {index: unsetAddr, value: false, Type: reflect.TypeOf(false)}, diff --git a/vm/vm.go b/vm/vm.go index ba1f97e..f7f4d78 100644 --- a/vm/vm.go +++ b/vm/vm.go @@ -71,12 +71,14 @@ var strop = [...]string{ // for VM tracing. Subi: "Subi", } +type Code [][]int64 + // Machine represents a virtual machine. type Machine struct { - code [][]int64 // code to execute - mem []any // memory, as a stack - ip, fp int // instruction and frame pointer - ic uint64 // instruction counter, incremented at each instruction executed + code Code // code to execute + mem []any // memory, as a stack + ip, fp int // instruction and frame pointer + ic uint64 // instruction counter, incremented at each instruction executed // flags uint // to set options such as restrict CallX, etc... } -- cgit v1.2.3