From f40a1c23467eef36f53635e525f8b25f591e8a45 Mon Sep 17 00:00:00 2001 From: Marc Vertes Date: Sat, 29 Nov 2025 19:46:34 +0100 Subject: chore: shorter name for packages, simpilfy vm values --- .golangci.yaml | 2 + Makefile | 2 +- comp/compiler.go | 522 ++++++++++++++++++++++++++++++++++++++++ compiler/compiler.go | 522 ---------------------------------------- interp/dump_test.go | 49 ++++ interp/interpreter.go | 55 +++++ interp/interpreter_test.go | 263 ++++++++++++++++++++ interp/repl.go | 35 +++ interpreter/dump_test.go | 49 ---- interpreter/interpreter.go | 55 ----- interpreter/interpreter_test.go | 263 -------------------- interpreter/repl.go | 35 --- main.go | 12 +- vm/type.go | 8 +- vm/vm.go | 60 ++--- vm/vm_test.go | 2 +- 16 files changed, 968 insertions(+), 966 deletions(-) create mode 100644 comp/compiler.go delete mode 100644 compiler/compiler.go create mode 100644 interp/dump_test.go create mode 100644 interp/interpreter.go create mode 100644 interp/interpreter_test.go create mode 100644 interp/repl.go delete mode 100644 interpreter/dump_test.go delete mode 100644 interpreter/interpreter.go delete mode 100644 interpreter/interpreter_test.go delete mode 100644 interpreter/repl.go diff --git a/.golangci.yaml b/.golangci.yaml index 34d9d92..ade2ad1 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -1,9 +1,11 @@ version: "2" linters: enable: + - errcheck - gocritic - godot - gosec + - govet - ineffassign - misspell - modernize diff --git a/Makefile b/Makefile index 70b2f3a..1d4e5c8 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ lint: # Run tests with race detector, measure coverage. test: - go test -race -covermode=atomic -coverpkg=./... -coverprofile=cover.out ./interpreter + go test -race -covermode=atomic -coverpkg=./... -coverprofile=cover.out ./interp # Open coverage info in browser cover: test diff --git a/comp/compiler.go b/comp/compiler.go new file mode 100644 index 0000000..610d4a1 --- /dev/null +++ b/comp/compiler.go @@ -0,0 +1,522 @@ +// Package comp implements a byte code generator targeting the vm. +package comp + +import ( + "fmt" + "log" + "os" + "path" + "reflect" + "runtime" + "strconv" + + "github.com/mvertes/parscan/lang" + "github.com/mvertes/parscan/parser" + "github.com/mvertes/parscan/scanner" + "github.com/mvertes/parscan/vm" +) + +// Compiler represents the state of a compiler. +type Compiler struct { + *parser.Parser + vm.Code // produced code, to fill VM with + Data []vm.Value // produced data, will be at the bottom of VM stack + Entry int // offset in Code to start execution from (skip function defintions) + + strings map[string]int // locations of strings in Data +} + +// NewCompiler returns a new compiler state for a given scanner. +func NewCompiler(spec *lang.Spec) *Compiler { + return &Compiler{ + Parser: parser.NewParser(spec, true), + Entry: -1, + strings: map[string]int{}, + } +} + +// AddSym adds a new named value to the compiler symbol table, and returns its index in memory. +func (c *Compiler) AddSym(name string, value vm.Value) int { + p := len(c.Data) + c.Data = append(c.Data, value) + c.AddSymbol(p, name, value, parser.SymValue, nil, false) + return p +} + +// Generate generates vm code and data from parsed tokens. +func (c *Compiler) Generate(tokens parser.Tokens) (err error) { + log.Println("Codegen tokens:", tokens) + fixList := parser.Tokens{} // list of tokens to fix after all necessary information is gathered + stack := []*parser.Symbol{} // for symbolic evaluation, type checking, etc + + emit := func(t scanner.Token, op vm.Op, arg ...int) { + _, file, line, _ := runtime.Caller(1) + fmt.Fprintf(os.Stderr, "%s:%d: %v emit %v %v\n", path.Base(file), line, t, op, arg) + c.Code = append(c.Code, vm.Instruction{Pos: vm.Pos(t.Pos), Op: op, Arg: arg}) + } + push := func(s *parser.Symbol) { stack = append(stack, s) } + pop := func() *parser.Symbol { l := len(stack) - 1; s := stack[l]; stack = stack[:l]; return s } + + for i, t := range tokens { + switch t.Tok { + case lang.Int: + n, err := strconv.Atoi(t.Str) + if err != nil { + return err + } + push(&parser.Symbol{Kind: parser.SymConst, Value: vm.ValueOf(n), Type: vm.TypeOf(0)}) + emit(t, vm.Push, n) + + case lang.String: + s := t.Block() + v := vm.Value{Type: vm.TypeOf(s), Value: reflect.ValueOf(s)} + i, ok := c.strings[s] + if !ok { + i = len(c.Data) + c.Data = append(c.Data, v) + c.strings[s] = i + } + push(&parser.Symbol{Kind: parser.SymConst, Value: v}) + emit(t, vm.Dup, i) + + case lang.Add: + push(&parser.Symbol{Type: arithmeticOpType(pop(), pop())}) + emit(t, vm.Add) + + case lang.Mul: + push(&parser.Symbol{Type: arithmeticOpType(pop(), pop())}) + emit(t, vm.Mul) + + case lang.Sub: + push(&parser.Symbol{Type: arithmeticOpType(pop(), pop())}) + emit(t, vm.Sub) + + case lang.Minus: + emit(t, vm.Push, 0) + emit(t, vm.Sub) + + case lang.Not: + emit(t, vm.Not) + + case lang.Plus: + // Unary '+' is idempotent. Nothing to do. + + case lang.Addr: + push(&parser.Symbol{Type: vm.PointerTo(pop().Type)}) + emit(t, vm.Addr) + + case lang.Deref: + push(&parser.Symbol{Type: pop().Type.Elem()}) + emit(t, vm.Deref) + + case lang.Index: + push(&parser.Symbol{Type: pop().Type.Elem()}) + emit(t, vm.Index) + + case lang.Greater: + push(&parser.Symbol{Type: booleanOpType(pop(), pop())}) + emit(t, vm.Greater) + + case lang.Less: + push(&parser.Symbol{Type: booleanOpType(pop(), pop())}) + emit(t, vm.Lower) + + case lang.Call: + s := pop() + if s.Kind != parser.SymValue { + typ := s.Type + // TODO: pop input types (careful with variadic function). + for i := 0; i < typ.Rtype.NumOut(); i++ { + push(&parser.Symbol{Type: typ.Out(i)}) + } + emit(t, vm.Call) + break + } + push(s) + fallthrough // A symValue must be called through callX. + + case lang.CallX: + rtyp := pop().Value.Value.Type() + // TODO: pop input types (careful with variadic function). + for i := 0; i < rtyp.NumOut(); i++ { + push(&parser.Symbol{Type: &vm.Type{Rtype: rtyp.Out(i)}}) + } + emit(t, vm.CallX, t.Beg) + + case lang.Composite: + log.Println("COMPOSITE") + /* + d := pop() + switch d.Type.Rtype.Kind() { + case reflect.Struct: + // nf := d.typ.Rtype.NumField() + // emit(t.Pos, vm.New, d.index, c.typeSym(d.typ).index) + emit(t, vm.Field, 0) + emit(t, vm.Vassign) + emit(t, vm.Fdup, 2) + emit(t, vm.Field, 1) + emit(t, vm.Vassign) + emit(t, vm.Pop, 1) + // emit(t, vm.Fdup, 2) + // Assume an element list with no keys, one per struct field in order + } + */ + + case lang.Grow: + emit(t, vm.Grow, t.Beg) + + case lang.Define: + // TODO: support assignment to local, composite objects. + st := tokens[i-1] + l := len(c.Data) + d := pop() + typ := d.Type + if typ == nil { + typ = d.Value.Type + } + v := vm.NewValue(typ) + c.AddSymbol(l, st.Str, v, parser.SymVar, typ, false) + c.Data = append(c.Data, v) + emit(t, vm.Assign, l) + + case lang.Assign: + st := tokens[i-1] + if st.Tok == lang.Period || st.Tok == lang.Index { + emit(t, vm.Vassign) + break + } + s, ok := c.Symbols[st.Str] + if !ok { + return fmt.Errorf("symbol not found: %s", st.Str) + } + d := pop() + typ := d.Type + if typ == nil { + typ = d.Value.Type + } + if s.Type == nil { + s.Type = typ + s.Value = vm.NewValue(typ) + } + if s.Local { + if !s.Used { + emit(st, vm.New, s.Index, c.typeSym(s.Type).Index) + s.Used = true + } + emit(st, vm.Fassign, s.Index) + break + } + if s.Index == parser.UnsetAddr { + s.Index = len(c.Data) + c.Data = append(c.Data, s.Value) + } + emit(st, vm.Assign, s.Index) + + case lang.Equal: + push(&parser.Symbol{Type: booleanOpType(pop(), pop())}) + emit(t, vm.Equal) + + case lang.EqualSet: + push(&parser.Symbol{Type: booleanOpType(pop(), pop())}) + emit(t, vm.EqualSet) + + case lang.Ident: + if i < len(tokens)-1 { + switch t1 := tokens[i+1]; t1.Tok { + case lang.Define, lang.Assign, lang.Colon: + continue + } + } + s, ok := c.Symbols[t.Str] + if !ok { + return fmt.Errorf("symbol not found: %s", t.Str) + } + push(s) + if s.Kind == parser.SymPkg { + break + } + if s.Local { + emit(t, vm.Fdup, s.Index) + } else { + if s.Index == parser.UnsetAddr { + s.Index = len(c.Data) + c.Data = append(c.Data, s.Value) + } + emit(t, vm.Dup, s.Index) + } + + case lang.Label: + lc := len(c.Code) + s, ok := c.Symbols[t.Str] + if ok { + s.Value = vm.ValueOf(lc) + if s.Kind == parser.SymFunc { + // label is a function entry point, register its code address in data. + s.Index = len(c.Data) + c.Data = append(c.Data, s.Value) + } else { + c.Data[s.Index] = s.Value + } + } else { + c.Symbols[t.Str] = &parser.Symbol{Kind: parser.SymLabel, Value: vm.ValueOf(lc)} + } + + case lang.JumpFalse: + var i int + if s, ok := c.Symbols[t.Str]; !ok { + // t.Beg contains the position in code which needs to be fixed. + t.Beg = len(c.Code) + fixList = append(fixList, t) + } else { + i = int(s.Value.Int()) - len(c.Code) + } + emit(t, vm.JumpFalse, i) + + case lang.JumpSetFalse: + var i int + if s, ok := c.Symbols[t.Str]; !ok { + // t.Beg contains the position in code which needs to be fixed. + t.Beg = len(c.Code) + fixList = append(fixList, t) + } else { + i = int(s.Value.Int()) - len(c.Code) + } + emit(t, vm.JumpSetFalse, i) + + case lang.JumpSetTrue: + var i int + if s, ok := c.Symbols[t.Str]; !ok { + // t.Beg contains the position in code which needs to be fixed. + t.Beg = len(c.Code) + fixList = append(fixList, t) + } else { + i = int(s.Value.Int()) - len(c.Code) + } + emit(t, vm.JumpSetTrue, i) + + case lang.Goto: + var i int + if s, ok := c.Symbols[t.Str]; !ok { + t.Beg = len(c.Code) + fixList = append(fixList, t) + } else { + i = int(s.Value.Int()) - len(c.Code) + } + emit(t, vm.Jump, i) + + case lang.Period: + s := pop() + switch s.Kind { + case parser.SymPkg: + p, ok := parser.Packages[s.PkgPath] + if !ok { + return fmt.Errorf("package not found: %s", s.PkgPath) + } + v, ok := p[t.Str[1:]] + if !ok { + return fmt.Errorf("symbol not found in package %s: %s", s.PkgPath, t.Str[1:]) + } + name := s.PkgPath + t.Str + var l int + sym, _, ok := c.GetSym(name, "") + if ok { + l = sym.Index + } else { + l = len(c.Data) + c.Data = append(c.Data, v) + c.AddSymbol(l, name, v, parser.SymValue, v.Type, false) + sym = c.Symbols[name] + } + push(sym) + emit(t, vm.Dup, l) + default: + if f, ok := s.Type.Rtype.FieldByName(t.Str[1:]); ok { + emit(t, vm.Field, f.Index...) + break + } + return fmt.Errorf("field or method not found: %s", t.Str[1:]) + } + + case lang.Return: + emit(t, vm.Return, t.Beg, t.End) + + default: + return fmt.Errorf("generate: unsupported token %v", t) + } + } + + // Finally we fix unresolved labels for jump destinations. + for _, t := range fixList { + s, ok := c.Symbols[t.Str] + if !ok { + return fmt.Errorf("label not found: %q", t.Str) + } + c.Code[t.Beg].Arg[0] = int(s.Value.Int()) - t.Beg + } + return err +} +func arithmeticOpType(s1, _ *parser.Symbol) *vm.Type { return parser.SymbolType(s1) } +func booleanOpType(_, _ *parser.Symbol) *vm.Type { return vm.TypeOf(true) } + +// PrintCode pretty prints the generated code. +func (c *Compiler) PrintCode() { + labels := map[int][]string{} // labels indexed by code location + data := map[int]string{} // data indexed by frame location + + for name, sym := range c.Symbols { + if sym.Kind == parser.SymLabel || sym.Kind == parser.SymFunc { + i := int(sym.Value.Int()) + labels[i] = append(labels[i], name) + } + if sym.Used { + data[sym.Index] = name + } + } + + fmt.Fprintln(os.Stderr, "# Code:") + for i, l := range c.Code { + for _, label := range labels[i] { + fmt.Fprintln(os.Stderr, label+":") + } + extra := "" + switch l.Op { + case vm.Jump, vm.JumpFalse, vm.JumpTrue, vm.JumpSetFalse, vm.JumpSetTrue, vm.Calli: + if d, ok := labels[i+l.Arg[0]]; ok { + extra = "// " + d[0] + } + case vm.Dup, vm.Assign, vm.Fdup, vm.Fassign: + if d, ok := data[l.Arg[0]]; ok { + extra = "// " + d + } + } + fmt.Fprintf(os.Stderr, "%4d %-14v %v\n", i, l, extra) + } + + for _, label := range labels[len(c.Code)] { + fmt.Fprintln(os.Stderr, label+":") + } + fmt.Fprintln(os.Stderr, "# End code") +} + +type entry struct { + name string + *parser.Symbol +} + +func (e entry) String() string { + if e.Symbol != nil { + return fmt.Sprintf("name: %s,local: %t, i: %d, k: %d, t: %s, v: %v", + e.name, + e.Local, + e.Index, + e.Kind, + e.Type, + e.Value, + ) + } + return e.name +} + +// PrintData pretty prints the generated global data symbols in compiler. +func (c *Compiler) PrintData() { + dict := c.symbolsByIndex() + + fmt.Fprintln(os.Stderr, "# Data:") + for i, d := range c.Data { + fmt.Fprintf(os.Stderr, "%4d %T %v %v\n", i, d.Interface(), d.Value, dict[i]) + } +} + +func (c *Compiler) symbolsByIndex() map[int]entry { + dict := map[int]entry{} + for name, sym := range c.Symbols { + if sym.Index == parser.UnsetAddr { + continue + } + dict[sym.Index] = entry{name, sym} + } + return dict +} + +// Dump represents the state of a data dump. +type Dump struct { + Values []*DumpValue +} + +// DumpValue is a value of a dump state. +type DumpValue struct { + Index int + Name string + Kind int + Type string + Value any +} + +// Dump creates a snapshot of the execution state of global variables. +// This method is specifically implemented in the Compiler to minimize the coupling between +// the dump format and other components. By situating the dump logic in the Compiler, +// it relies solely on the program being executed and the indexing algorithm used for ordering variables +// (currently, this is an integer that corresponds to the order of variables in the program). +// This design choice allows the Virtual Machine (VM) to evolve its memory management strategies +// without compromising backward compatibility with dumps generated by previous versions. +func (c *Compiler) Dump() *Dump { + dict := c.symbolsByIndex() + dv := make([]*DumpValue, len(c.Data)) + for i, d := range c.Data { + e := dict[i] + dv[i] = &DumpValue{ + Index: e.Index, + Name: e.name, + Kind: int(e.Kind), + Type: e.Type.Name, + Value: d.Interface(), + } + } + return &Dump{Values: dv} +} + +// ApplyDump sets previously saved dump, restoring the state of global variables. +func (c *Compiler) ApplyDump(d *Dump) error { + dict := c.symbolsByIndex() + for _, dv := range d.Values { + // do all the checks to be sure we are applying the correct values + e, ok := dict[dv.Index] + if !ok { + return fmt.Errorf("entry not found on index %d", dv.Index) + } + + if dv.Name != e.name || + dv.Type != e.Type.Name || + dv.Kind != int(e.Kind) { + return fmt.Errorf("entry with index %d does not match with provided entry. "+ + "dumpValue: %s, %s, %d. memoryValue: %s, %s, %d", + dv.Index, + dv.Name, dv.Type, dv.Kind, + e.name, e.Type, e.Kind) + } + + if dv.Index >= len(c.Data) { + return fmt.Errorf("index (%d) bigger than memory (%d)", dv.Index, len(c.Data)) + } + + if !c.Data[dv.Index].CanSet() { + return fmt.Errorf("value %v cannot be set", dv.Value) + } + + c.Data[dv.Index].Set(reflect.ValueOf(dv.Value)) + } + return nil +} + +func (c *Compiler) typeSym(t *vm.Type) *parser.Symbol { + tsym, ok := c.Symbols[t.Rtype.String()] + if !ok { + tsym = &parser.Symbol{Index: parser.UnsetAddr, Kind: parser.SymType, Type: t} + c.Symbols[t.Rtype.String()] = tsym + } + if tsym.Index == parser.UnsetAddr { + tsym.Index = len(c.Data) + c.Data = append(c.Data, vm.NewValue(t)) + } + return tsym +} diff --git a/compiler/compiler.go b/compiler/compiler.go deleted file mode 100644 index 679a886..0000000 --- a/compiler/compiler.go +++ /dev/null @@ -1,522 +0,0 @@ -// Package compiler implements a compiler targeting the vm. -package compiler - -import ( - "fmt" - "log" - "os" - "path" - "reflect" - "runtime" - "strconv" - - "github.com/mvertes/parscan/lang" - "github.com/mvertes/parscan/parser" - "github.com/mvertes/parscan/scanner" - "github.com/mvertes/parscan/vm" -) - -// Compiler represents the state of a compiler. -type Compiler struct { - *parser.Parser - vm.Code // produced code, to fill VM with - Data []vm.Value // produced data, will be at the bottom of VM stack - Entry int // offset in Code to start execution from (skip function defintions) - - strings map[string]int // locations of strings in Data -} - -// NewCompiler returns a new compiler state for a given scanner. -func NewCompiler(spec *lang.Spec) *Compiler { - return &Compiler{ - Parser: parser.NewParser(spec, true), - Entry: -1, - strings: map[string]int{}, - } -} - -// AddSym adds a new named value to the compiler symbol table, and returns its index in memory. -func (c *Compiler) AddSym(name string, value vm.Value) int { - p := len(c.Data) - c.Data = append(c.Data, value) - c.AddSymbol(p, name, value, parser.SymValue, nil, false) - return p -} - -// Codegen generates vm code from parsed tokens. -func (c *Compiler) Codegen(tokens parser.Tokens) (err error) { - log.Println("Codegen tokens:", tokens) - fixList := parser.Tokens{} // list of tokens to fix after all necessary information is gathered - stack := []*parser.Symbol{} // for symbolic evaluation, type checking, etc - - emit := func(t scanner.Token, op vm.Op, arg ...int) { - _, file, line, _ := runtime.Caller(1) - fmt.Fprintf(os.Stderr, "%s:%d: %v emit %v %v\n", path.Base(file), line, t, op, arg) - c.Code = append(c.Code, vm.Instruction{Pos: vm.Pos(t.Pos), Op: op, Arg: arg}) - } - push := func(s *parser.Symbol) { stack = append(stack, s) } - pop := func() *parser.Symbol { l := len(stack) - 1; s := stack[l]; stack = stack[:l]; return s } - - for i, t := range tokens { - switch t.Tok { - case lang.Int: - n, err := strconv.Atoi(t.Str) - if err != nil { - return err - } - push(&parser.Symbol{Kind: parser.SymConst, Value: vm.ValueOf(n), Type: vm.TypeOf(0)}) - emit(t, vm.Push, n) - - case lang.String: - s := t.Block() - v := vm.Value{Data: reflect.ValueOf(s), Type: vm.TypeOf(s)} - i, ok := c.strings[s] - if !ok { - i = len(c.Data) - c.Data = append(c.Data, v) - c.strings[s] = i - } - push(&parser.Symbol{Kind: parser.SymConst, Value: v}) - emit(t, vm.Dup, i) - - case lang.Add: - push(&parser.Symbol{Type: arithmeticOpType(pop(), pop())}) - emit(t, vm.Add) - - case lang.Mul: - push(&parser.Symbol{Type: arithmeticOpType(pop(), pop())}) - emit(t, vm.Mul) - - case lang.Sub: - push(&parser.Symbol{Type: arithmeticOpType(pop(), pop())}) - emit(t, vm.Sub) - - case lang.Minus: - emit(t, vm.Push, 0) - emit(t, vm.Sub) - - case lang.Not: - emit(t, vm.Not) - - case lang.Plus: - // Unary '+' is idempotent. Nothing to do. - - case lang.Addr: - push(&parser.Symbol{Type: vm.PointerTo(pop().Type)}) - emit(t, vm.Addr) - - case lang.Deref: - push(&parser.Symbol{Type: pop().Type.Elem()}) - emit(t, vm.Deref) - - case lang.Index: - push(&parser.Symbol{Type: pop().Type.Elem()}) - emit(t, vm.Index) - - case lang.Greater: - push(&parser.Symbol{Type: booleanOpType(pop(), pop())}) - emit(t, vm.Greater) - - case lang.Less: - push(&parser.Symbol{Type: booleanOpType(pop(), pop())}) - emit(t, vm.Lower) - - case lang.Call: - s := pop() - if s.Kind != parser.SymValue { - typ := s.Type - // TODO: pop input types (careful with variadic function). - for i := 0; i < typ.Rtype.NumOut(); i++ { - push(&parser.Symbol{Type: typ.Out(i)}) - } - emit(t, vm.Call) - break - } - push(s) - fallthrough // A symValue must be called through callX. - - case lang.CallX: - rtyp := pop().Value.Data.Type() - // TODO: pop input types (careful with variadic function). - for i := 0; i < rtyp.NumOut(); i++ { - push(&parser.Symbol{Type: &vm.Type{Rtype: rtyp.Out(i)}}) - } - emit(t, vm.CallX, t.Beg) - - case lang.Composite: - log.Println("COMPOSITE") - /* - d := pop() - switch d.typ.Rtype.Kind() { - case reflect.Struct: - // nf := d.typ.Rtype.NumField() - // emit(t.Pos, vm.New, d.index, c.typeSym(d.typ).index) - emit(t, vm.Field, 0) - emit(t, vm.Vassign) - emit(t, vm.Fdup, 2) - emit(t, vm.Field, 1) - emit(t, vm.Vassign) - emit(t, vm.Pop, 1) - // emit(t, vm.Fdup, 2) - // Assume an element list with no keys, one per struct field in order - } - */ - - case lang.Grow: - emit(t, vm.Grow, t.Beg) - - case lang.Define: - // TODO: support assignment to local, composite objects. - st := tokens[i-1] - l := len(c.Data) - d := pop() - typ := d.Type - if typ == nil { - typ = d.Value.Type - } - v := vm.NewValue(typ) - c.AddSymbol(l, st.Str, v, parser.SymVar, typ, false) - c.Data = append(c.Data, v) - emit(t, vm.Assign, l) - - case lang.Assign: - st := tokens[i-1] - if st.Tok == lang.Period || st.Tok == lang.Index { - emit(t, vm.Vassign) - break - } - s, ok := c.Symbols[st.Str] - if !ok { - return fmt.Errorf("symbol not found: %s", st.Str) - } - d := pop() - typ := d.Type - if typ == nil { - typ = d.Value.Type - } - if s.Type == nil { - s.Type = typ - s.Value = vm.NewValue(typ) - } - if s.Local { - if !s.Used { - emit(st, vm.New, s.Index, c.typeSym(s.Type).Index) - s.Used = true - } - emit(st, vm.Fassign, s.Index) - break - } - if s.Index == parser.UnsetAddr { - s.Index = len(c.Data) - c.Data = append(c.Data, s.Value) - } - emit(st, vm.Assign, s.Index) - - case lang.Equal: - push(&parser.Symbol{Type: booleanOpType(pop(), pop())}) - emit(t, vm.Equal) - - case lang.EqualSet: - push(&parser.Symbol{Type: booleanOpType(pop(), pop())}) - emit(t, vm.EqualSet) - - case lang.Ident: - if i < len(tokens)-1 { - switch t1 := tokens[i+1]; t1.Tok { - case lang.Define, lang.Assign, lang.Colon: - continue - } - } - s, ok := c.Symbols[t.Str] - if !ok { - return fmt.Errorf("symbol not found: %s", t.Str) - } - push(s) - if s.Kind == parser.SymPkg { - break - } - if s.Local { - emit(t, vm.Fdup, s.Index) - } else { - if s.Index == parser.UnsetAddr { - s.Index = len(c.Data) - c.Data = append(c.Data, s.Value) - } - emit(t, vm.Dup, s.Index) - } - - case lang.Label: - lc := len(c.Code) - s, ok := c.Symbols[t.Str] - if ok { - s.Value = vm.ValueOf(lc) - if s.Kind == parser.SymFunc { - // label is a function entry point, register its code address in data. - s.Index = len(c.Data) - c.Data = append(c.Data, s.Value) - } else { - c.Data[s.Index] = s.Value - } - } else { - c.Symbols[t.Str] = &parser.Symbol{Kind: parser.SymLabel, Value: vm.ValueOf(lc)} - } - - case lang.JumpFalse: - var i int - if s, ok := c.Symbols[t.Str]; !ok { - // t.Beg contains the position in code which needs to be fixed. - t.Beg = len(c.Code) - fixList = append(fixList, t) - } else { - i = int(s.Value.Data.Int()) - len(c.Code) - } - emit(t, vm.JumpFalse, i) - - case lang.JumpSetFalse: - var i int - if s, ok := c.Symbols[t.Str]; !ok { - // t.Beg contains the position in code which needs to be fixed. - t.Beg = len(c.Code) - fixList = append(fixList, t) - } else { - i = int(s.Value.Data.Int()) - len(c.Code) - } - emit(t, vm.JumpSetFalse, i) - - case lang.JumpSetTrue: - var i int - if s, ok := c.Symbols[t.Str]; !ok { - // t.Beg contains the position in code which needs to be fixed. - t.Beg = len(c.Code) - fixList = append(fixList, t) - } else { - i = int(s.Value.Data.Int()) - len(c.Code) - } - emit(t, vm.JumpSetTrue, i) - - case lang.Goto: - var i int - if s, ok := c.Symbols[t.Str]; !ok { - t.Beg = len(c.Code) - fixList = append(fixList, t) - } else { - i = int(s.Value.Data.Int()) - len(c.Code) - } - emit(t, vm.Jump, i) - - case lang.Period: - s := pop() - switch s.Kind { - case parser.SymPkg: - p, ok := parser.Packages[s.PkgPath] - if !ok { - return fmt.Errorf("package not found: %s", s.PkgPath) - } - v, ok := p[t.Str[1:]] - if !ok { - return fmt.Errorf("symbol not found in package %s: %s", s.PkgPath, t.Str[1:]) - } - name := s.PkgPath + t.Str - var l int - sym, _, ok := c.GetSym(name, "") - if ok { - l = sym.Index - } else { - l = len(c.Data) - c.Data = append(c.Data, v) - c.AddSymbol(l, name, v, parser.SymValue, v.Type, false) - sym = c.Symbols[name] - } - push(sym) - emit(t, vm.Dup, l) - default: - if f, ok := s.Type.Rtype.FieldByName(t.Str[1:]); ok { - emit(t, vm.Field, f.Index...) - break - } - return fmt.Errorf("field or method not found: %s", t.Str[1:]) - } - - case lang.Return: - emit(t, vm.Return, t.Beg, t.End) - - default: - return fmt.Errorf("Codegen: unsupported token %v", t) - } - } - - // Finally we fix unresolved labels for jump destinations. - for _, t := range fixList { - s, ok := c.Symbols[t.Str] - if !ok { - return fmt.Errorf("label not found: %q", t.Str) - } - c.Code[t.Beg].Arg[0] = int(s.Value.Data.Int()) - t.Beg - } - return err -} -func arithmeticOpType(s1, _ *parser.Symbol) *vm.Type { return parser.SymbolType(s1) } -func booleanOpType(_, _ *parser.Symbol) *vm.Type { return vm.TypeOf(true) } - -// PrintCode pretty prints the generated code. -func (c *Compiler) PrintCode() { - labels := map[int][]string{} // labels indexed by code location - data := map[int]string{} // data indexed by frame location - - for name, sym := range c.Symbols { - if sym.Kind == parser.SymLabel || sym.Kind == parser.SymFunc { - i := int(sym.Value.Data.Int()) - labels[i] = append(labels[i], name) - } - if sym.Used { - data[sym.Index] = name - } - } - - fmt.Fprintln(os.Stderr, "# Code:") - for i, l := range c.Code { - for _, label := range labels[i] { - fmt.Fprintln(os.Stderr, label+":") - } - extra := "" - switch l.Op { - case vm.Jump, vm.JumpFalse, vm.JumpTrue, vm.JumpSetFalse, vm.JumpSetTrue, vm.Calli: - if d, ok := labels[i+l.Arg[0]]; ok { - extra = "// " + d[0] - } - case vm.Dup, vm.Assign, vm.Fdup, vm.Fassign: - if d, ok := data[l.Arg[0]]; ok { - extra = "// " + d - } - } - fmt.Fprintf(os.Stderr, "%4d %-14v %v\n", i, l, extra) - } - - for _, label := range labels[len(c.Code)] { - fmt.Fprintln(os.Stderr, label+":") - } - fmt.Fprintln(os.Stderr, "# End code") -} - -type entry struct { - name string - *parser.Symbol -} - -func (e entry) String() string { - if e.Symbol != nil { - return fmt.Sprintf("name: %s,local: %t, i: %d, k: %d, t: %s, v: %v", - e.name, - e.Local, - e.Index, - e.Kind, - e.Type, - e.Value, - ) - } - return e.name -} - -// PrintData pretty prints the generated global data symbols in compiler. -func (c *Compiler) PrintData() { - dict := c.symbolsByIndex() - - fmt.Fprintln(os.Stderr, "# Data:") - for i, d := range c.Data { - fmt.Fprintf(os.Stderr, "%4d %T %v %v\n", i, d.Data.Interface(), d.Data, dict[i]) - } -} - -func (c *Compiler) symbolsByIndex() map[int]entry { - dict := map[int]entry{} - for name, sym := range c.Symbols { - if sym.Index == parser.UnsetAddr { - continue - } - dict[sym.Index] = entry{name, sym} - } - return dict -} - -// Dump represents the state of a data dump. -type Dump struct { - Values []*DumpValue -} - -// DumpValue is a value of a dump state. -type DumpValue struct { - Index int - Name string - Kind int - Type string - Value any -} - -// Dump creates a snapshot of the execution state of global variables. -// This method is specifically implemented in the Compiler to minimize the coupling between -// the dump format and other components. By situating the dump logic in the Compiler, -// it relies solely on the program being executed and the indexing algorithm used for ordering variables -// (currently, this is an integer that corresponds to the order of variables in the program). -// This design choice allows the Virtual Machine (VM) to evolve its memory management strategies -// without compromising backward compatibility with dumps generated by previous versions. -func (c *Compiler) Dump() *Dump { - dict := c.symbolsByIndex() - dv := make([]*DumpValue, len(c.Data)) - for i, d := range c.Data { - e := dict[i] - dv[i] = &DumpValue{ - Index: e.Index, - Name: e.name, - Kind: int(e.Kind), - Type: e.Type.Name, - Value: d.Data.Interface(), - } - } - return &Dump{Values: dv} -} - -// ApplyDump sets previously saved dump, restoring the state of global variables. -func (c *Compiler) ApplyDump(d *Dump) error { - dict := c.symbolsByIndex() - for _, dv := range d.Values { - // do all the checks to be sure we are applying the correct values - e, ok := dict[dv.Index] - if !ok { - return fmt.Errorf("entry not found on index %d", dv.Index) - } - - if dv.Name != e.name || - dv.Type != e.Type.Name || - dv.Kind != int(e.Kind) { - return fmt.Errorf("entry with index %d does not match with provided entry. "+ - "dumpValue: %s, %s, %d. memoryValue: %s, %s, %d", - dv.Index, - dv.Name, dv.Type, dv.Kind, - e.name, e.Type, e.Kind) - } - - if dv.Index >= len(c.Data) { - return fmt.Errorf("index (%d) bigger than memory (%d)", dv.Index, len(c.Data)) - } - - if !c.Data[dv.Index].Data.CanSet() { - return fmt.Errorf("value %v cannot be set", dv.Value) - } - - c.Data[dv.Index].Data.Set(reflect.ValueOf(dv.Value)) - } - return nil -} - -func (c *Compiler) typeSym(t *vm.Type) *parser.Symbol { - tsym, ok := c.Symbols[t.Rtype.String()] - if !ok { - tsym = &parser.Symbol{Index: parser.UnsetAddr, Kind: parser.SymType, Type: t} - c.Symbols[t.Rtype.String()] = tsym - } - if tsym.Index == parser.UnsetAddr { - tsym.Index = len(c.Data) - c.Data = append(c.Data, vm.NewValue(t)) - } - return tsym -} diff --git a/interp/dump_test.go b/interp/dump_test.go new file mode 100644 index 0000000..fbaa778 --- /dev/null +++ b/interp/dump_test.go @@ -0,0 +1,49 @@ +package interp_test + +import ( + "testing" + + "github.com/mvertes/parscan/interp" + "github.com/mvertes/parscan/lang/golang" +) + +func TestDump(t *testing.T) { + initProgram := "var a int = 2+1; a" + intp := interp.NewInterpreter(golang.GoSpec) + r, e := intp.Eval(initProgram) + t.Log(r, e) + if e != nil { + t.Fatal(e) + } + + r, e = intp.Eval("a = 100") + t.Log(r, e) + if e != nil { + t.Fatal(e) + } + + d := intp.Dump() + t.Log(d) + + intp = interp.NewInterpreter(golang.GoSpec) + r, e = intp.Eval(initProgram) + t.Log(r, e) + if e != nil { + t.Fatal(e) + } + + e = intp.ApplyDump(d) + if e != nil { + t.Fatal(e) + } + + r, e = intp.Eval("a = a + 1;a") + t.Log(r, e) + if e != nil { + t.Fatal(e) + } + + if r.Interface() != int(101) { + t.Fatalf("unexpected result: %v", r) + } +} diff --git a/interp/interpreter.go b/interp/interpreter.go new file mode 100644 index 0000000..8d372c3 --- /dev/null +++ b/interp/interpreter.go @@ -0,0 +1,55 @@ +// Package interp implements an interpreter. +package interp + +import ( + "reflect" + + "github.com/mvertes/parscan/comp" + "github.com/mvertes/parscan/lang" + "github.com/mvertes/parscan/vm" +) + +const debug = true + +// Interp represents the state of an interpreter. +type Interp struct { + *comp.Compiler + *vm.Machine +} + +// NewInterpreter returns a new interpreter. +func NewInterpreter(s *lang.Spec) *Interp { + return &Interp{comp.NewCompiler(s), &vm.Machine{}} +} + +// Eval evaluates code string and return the last produced value if any, or an error. +func (i *Interp) Eval(src string) (res reflect.Value, err error) { + codeOffset := len(i.Code) + dataOffset := 0 + if codeOffset > 0 { + // All data must be copied to the VM the first time only (re-entrance). + dataOffset = len(i.Data) + } + i.PopExit() // Remove last exit from previous run (re-entrance). + + t, err := i.Parse(src) + if err != nil { + return res, err + } + if err = i.Generate(t); err != nil { + return res, err + } + i.Push(i.Data[dataOffset:]...) + i.PushCode(i.Code[codeOffset:]...) + if s, ok := i.Symbols["main"]; ok { + i.PushCode(vm.Instruction{Op: vm.Calli, Arg: []int{int(i.Data[s.Index].Int())}}) + } + i.PushCode(vm.Instruction{Op: vm.Exit}) + i.SetIP(max(codeOffset, i.Entry)) + if debug { + i.PrintData() + i.PrintCode() + } + err = i.Run() + return i.Top().Value, err +} diff --git a/interp/interpreter_test.go b/interp/interpreter_test.go new file mode 100644 index 0000000..21e6274 --- /dev/null +++ b/interp/interpreter_test.go @@ -0,0 +1,263 @@ +package interp_test + +import ( + "fmt" + "log" + "testing" + + "github.com/mvertes/parscan/interp" + "github.com/mvertes/parscan/lang/golang" +) + +type etest struct { + src, res, err string + skip bool +} + +func init() { + log.SetFlags(log.Lshortfile) +} + +func gen(test etest) func(*testing.T) { + return func(t *testing.T) { + t.Parallel() + if test.skip { + t.Skip() + } + intp := interp.NewInterpreter(golang.GoSpec) + errStr := "" + r, e := intp.Eval(test.src) + t.Log(r, e) + if e != nil { + errStr = e.Error() + } + if errStr != test.err { + t.Errorf("got error %#v, want error %#v", errStr, test.err) + } + if res := fmt.Sprintf("%v", r); test.err == "" && res != test.res { + t.Errorf("got %#v, want %#v", res, test.res) + } + } +} + +func run(t *testing.T, tests []etest) { + for _, test := range tests { + t.Run("", gen(test)) + } +} + +func TestExpr(t *testing.T) { + run(t, []etest{ + {src: "", res: ""}, + {src: "1+2", res: "3"}, + {src: "1+", err: "block not terminated"}, + {src: "a := 1 + 2; b := 0; a + 1", res: "4"}, + {src: "1+(2+3)", res: "6"}, + {src: "(1+2)+3", res: "6"}, + {src: "(6+(1+2)+3)+5", res: "17"}, + {src: "(6+(1+2+3)+5", err: "1:1: block not terminated"}, + {src: "a := 2; a = 3; a", res: "3"}, + {src: "2 * 3 + 1 == 7", res: "true"}, + {src: "7 == 2 * 3 + 1", res: "true"}, + {src: "1 + 3 * 2 == 2 * 3 + 1", res: "true"}, + {src: "a := 1 + 3 * 2 == 2 * 3 + 1; a", res: "true"}, + {src: "-2", res: "-2"}, + {src: "-2 + 5", res: "3"}, + {src: "5 + -2", res: "3"}, + {src: "!false", res: "true"}, + {src: `a := "hello"`, res: "hello"}, + }) +} + +func TestLogical(t *testing.T) { + run(t, []etest{ + {src: "true && false", res: "false"}, + {src: "true && true", res: "true"}, + {src: "true && true && false", res: "false"}, + {src: "false || true && true", res: "true"}, + {src: "2 < 3 && 1 > 2 || 3 == 3", res: "true"}, + {src: "2 > 3 && 1 > 2 || 3 == 3", res: "true"}, + {src: "2 > 3 || 2 == 1+1 && 3>0", res: "true"}, + {src: "2 > 3 || 2 == 1+1 && 3>4 || 1<2", res: "true"}, + {src: "a := 1+1 < 3 && 4 == 2+2; a", res: "true"}, + {src: "a := 1+1 < 3 || 3 == 2+2; a", res: "true"}, + }) +} + +func TestFunc(t *testing.T) { + run(t, []etest{ + {src: "func f() int {return 2}; a := f(); a", res: "2"}, + {src: "func f() int {return 2}; f()", res: "2"}, + {src: "func f(a int) int {return a+2}; f(3)", res: "5"}, + {src: "func f(a int) int {if a < 4 {a = 5}; return a}; f(3)", res: "5"}, + {src: "func f(a int) int {return a+2}; 7 - f(3)", res: "2"}, + {src: "func f(a int) int {return a+2}; f(5) - f(3)", res: "2"}, + {src: "func f(a int) int {return a+2}; f(3) - 2", res: "3"}, + {src: "func f(a, b, c int) int {return a+b-c} ; f(7, 1, 3)", res: "5"}, + {src: "var a int; func f() {a = a+2}; f(); a", res: "2"}, + {src: "var f = func(a int) int {return a+3}; f(2)", res: "5"}, + }) +} + +func TestIf(t *testing.T) { + run(t, []etest{ + {src: "a := 0; if a == 0 { a = 2 } else { a = 1 }; a", res: "2"}, + {src: "a := 0; if a == 1 { a = 2 } else { a = 1 }; a", res: "1"}, + {src: "a := 0; if a == 1 { a = 2 } else if a == 0 { a = 3 } else { a = 1 }; a", res: "3"}, + {src: "a := 0; if a == 1 { a = 2 } else if a == 2 { a = 3 } else { a = 1 }; a", res: "1"}, + {src: "a := 1; if a > 0 && a < 2 { a = 3 }; a", res: "3"}, + {src: "a := 1; if a < 0 || a < 2 { a = 3 }; a", res: "3"}, + }) +} + +func TestFor(t *testing.T) { + run(t, []etest{ + {src: "a := 0; for i := 0; i < 3; i = i+1 {a = a+i}; a", res: "3"}, + {src: "func f() int {a := 0; for i := 0; i < 3; i = i+1 {a = a+i}; return a}; f()", res: "3"}, + {src: "a := 0; for {a = a+1; if a == 3 {break}}; a", res: "3"}, + {src: "func f() int {a := 0; for {a = a+1; if a == 3 {break}}; return a}; f()", res: "3"}, + {src: "func f() int {a := 0; for {a = a+1; if a < 3 {continue}; break}; return a}; f()", res: "3"}, + }) +} + +func TestGoto(t *testing.T) { + run(t, []etest{ + {src: ` +func f(a int) int { + a = a+1 + goto end + a = a+1 +end: + return a +} +f(3)`, res: "4"}, + }) +} + +func TestSwitch(t *testing.T) { + src0 := `func f(a int) int { + switch a { + default: a = 0 + case 1,2: a = a+1 + case 3: a = a+2; break; a = 3 + case 4: a = 10 + } + return a +} +` + src1 := `func f(a int) int { + switch { + case a < 3: return 2 + case a < 5: return 5 + default: a = 0 + } + return a +} +` + run(t, []etest{ + {src: src0 + "f(1)", res: "2"}, + {src: src0 + "f(2)", res: "3"}, + {src: src0 + "f(3)", res: "5"}, + {src: src0 + "f(4)", res: "10"}, + {src: src0 + "f(5)", res: "0"}, + + {src: src1 + "f(1)", res: "2"}, + {src: src1 + "f(4)", res: "5"}, + {src: src1 + "f(6)", res: "0"}, + }) +} + +func TestConst(t *testing.T) { + src0 := `const ( + a = iota + b + c +) +` + run(t, []etest{ + {src: "const a = 1+2; a", res: "3"}, + {src: "const a, b = 1, 2; a+b", res: "3"}, + {src: "const huge = 1 << 100; const four = huge >> 98; four", res: "4"}, + + {src: src0 + "c", res: "2"}, + }) +} + +func TestArray(t *testing.T) { + run(t, []etest{ + {src: "type T []int; var t T; t", res: "[]"}, + {src: "type T [3]int; var t T; t", res: "[0 0 0]"}, + {src: "type T [3]int; var t T; t[1] = 2; t", res: "[0 2 0]"}, + }) +} + +func TestPointer(t *testing.T) { + run(t, []etest{ + {src: "var a *int; a", res: ""}, + {src: "var a int; var b *int = &a; *b", res: "0"}, + {src: "var a int = 2; var b *int = &a; *b", res: "2"}, + }) +} + +func TestStruct(t *testing.T) { + run(t, []etest{ + {src: "type T struct {a string; b, c int}; var t T; t", res: "{ 0 0}"}, + {src: "type T struct {a int}; var t T; t.a", res: "0"}, + {src: "type T struct {a int}; var t T; t.a = 1; t.a", res: "1"}, + }) +} + +func TestType(t *testing.T) { + src0 := `type ( + I int + S string +) +` + run(t, []etest{ + {src: "type t int; var a t = 1; a", res: "1"}, + {src: "type t = int; var a t = 1; a", res: "1"}, + {src: src0 + `var s S = "xx"; s`, res: "xx"}, + }) +} + +func TestVar(t *testing.T) { + run(t, []etest{ + {src: "var a int; a", res: "0"}, + {src: "var a, b, c int; a", res: "0"}, + {src: "var a, b, c int; a + b", res: "0"}, + {src: "var a, b, c int; a + b + c", res: "0"}, + {src: "var a int = 2+1; a", res: "3"}, + {src: "var a, b int = 2, 5; a+b", res: "7"}, + {src: "var x = 5; x", res: "5"}, + {src: "var a = 1; func f() int { var a, b int = 3, 4; return a+b}; a+f()", res: "8"}, + {src: `var a = "hello"; a`, res: "hello"}, + {src: `var ( + a, b int = 4+1, 3 + c = 8 +); a+b+c`, res: "16"}, + }) +} + +func TestImport(t *testing.T) { + src0 := `import ( + "fmt" +) +` + run(t, []etest{ + {src: "fmt.Println(4)", err: "symbol not found: fmt"}, + {src: `import "xxx"`, err: "package not found: xxx"}, + {src: `import "fmt"; fmt.Println(4)`, res: ""}, + {src: src0 + "fmt.Println(4)", res: ""}, + {src: `func main() {import "fmt"; fmt.Println("hello")}`, err: "unexpected import"}, + {src: `import m "fmt"; m.Println(4)`, res: ""}, + {src: `import . "fmt"; Println(4)`, res: ""}, + }) +} + +func TestComposite(t *testing.T) { + run(t, []etest{ + {src: "type T struct{}; t := T{}; t", res: "{}"}, + {src: "t := struct{}{}; t", res: "{}"}, + // {src: `type T struct{N int; S string}; t := T{2, "foo"}`, res: `{2 foo}`}, + }) +} diff --git a/interp/repl.go b/interp/repl.go new file mode 100644 index 0000000..b8a5338 --- /dev/null +++ b/interp/repl.go @@ -0,0 +1,35 @@ +package interp + +import ( + "bufio" + "errors" + "fmt" + "io" + + "github.com/mvertes/parscan/scanner" +) + +// Repl executes an interactive line oriented Read Eval Print Loop (REPL). +func (i *Interp) Repl(in io.Reader) (err error) { + liner := bufio.NewScanner(in) + text, prompt := "", "> " + fmt.Print(prompt) + for liner.Scan() { + text += liner.Text() + res, err := i.Eval(text + "\n") + switch { + case err == nil: + if res.IsValid() { + fmt.Println(": ", res) + } + text, prompt = "", "> " + case errors.Is(err, scanner.ErrBlock): + prompt = ">> " + default: + fmt.Println("Error:", err) + text, prompt = "", "> " + } + fmt.Print(prompt) + } + return err +} diff --git a/interpreter/dump_test.go b/interpreter/dump_test.go deleted file mode 100644 index d8fcc55..0000000 --- a/interpreter/dump_test.go +++ /dev/null @@ -1,49 +0,0 @@ -package interpreter_test - -import ( - "testing" - - "github.com/mvertes/parscan/interpreter" - "github.com/mvertes/parscan/lang/golang" -) - -func TestDump(t *testing.T) { - initProgram := "var a int = 2+1; a" - interp := interpreter.NewInterpreter(golang.GoSpec) - r, e := interp.Eval(initProgram) - t.Log(r, e) - if e != nil { - t.Fatal(e) - } - - r, e = interp.Eval("a = 100") - t.Log(r, e) - if e != nil { - t.Fatal(e) - } - - d := interp.Dump() - t.Log(d) - - interp = interpreter.NewInterpreter(golang.GoSpec) - r, e = interp.Eval(initProgram) - t.Log(r, e) - if e != nil { - t.Fatal(e) - } - - e = interp.ApplyDump(d) - if e != nil { - t.Fatal(e) - } - - r, e = interp.Eval("a = a + 1;a") - t.Log(r, e) - if e != nil { - t.Fatal(e) - } - - if r.Interface() != int(101) { - t.Fatalf("unexpected result: %v", r) - } -} diff --git a/interpreter/interpreter.go b/interpreter/interpreter.go deleted file mode 100644 index 032a66f..0000000 --- a/interpreter/interpreter.go +++ /dev/null @@ -1,55 +0,0 @@ -// Package interpreter implements an interpreter. -package interpreter - -import ( - "reflect" - - "github.com/mvertes/parscan/compiler" - "github.com/mvertes/parscan/lang" - "github.com/mvertes/parscan/vm" -) - -const debug = true - -// Interp represents the state of an interpreter. -type Interp struct { - *compiler.Compiler - *vm.Machine -} - -// NewInterpreter returns a new interpreter. -func NewInterpreter(s *lang.Spec) *Interp { - return &Interp{compiler.NewCompiler(s), &vm.Machine{}} -} - -// Eval evaluates code string and return the last produced value if any, or an error. -func (i *Interp) Eval(src string) (res reflect.Value, err error) { - codeOffset := len(i.Code) - dataOffset := 0 - if codeOffset > 0 { - // All data must be copied to the VM the first time only (re-entrance). - dataOffset = len(i.Data) - } - i.PopExit() // Remove last exit from previous run (re-entrance). - - t, err := i.Parse(src) - if err != nil { - return res, err - } - if err = i.Codegen(t); err != nil { - return res, err - } - i.Push(i.Data[dataOffset:]...) - i.PushCode(i.Code[codeOffset:]...) - if s, ok := i.Symbols["main"]; ok { - i.PushCode(vm.Instruction{Op: vm.Calli, Arg: []int{int(i.Data[s.Index].Data.Int())}}) - } - i.PushCode(vm.Instruction{Op: vm.Exit}) - i.SetIP(max(codeOffset, i.Entry)) - if debug { - i.PrintData() - i.PrintCode() - } - err = i.Run() - return i.Top().Data, err -} diff --git a/interpreter/interpreter_test.go b/interpreter/interpreter_test.go deleted file mode 100644 index e48efdb..0000000 --- a/interpreter/interpreter_test.go +++ /dev/null @@ -1,263 +0,0 @@ -package interpreter_test - -import ( - "fmt" - "log" - "testing" - - "github.com/mvertes/parscan/interpreter" - "github.com/mvertes/parscan/lang/golang" -) - -type etest struct { - src, res, err string - skip bool -} - -func init() { - log.SetFlags(log.Lshortfile) -} - -func gen(test etest) func(*testing.T) { - return func(t *testing.T) { - t.Parallel() - if test.skip { - t.Skip() - } - interp := interpreter.NewInterpreter(golang.GoSpec) - errStr := "" - r, e := interp.Eval(test.src) - t.Log(r, e) - if e != nil { - errStr = e.Error() - } - if errStr != test.err { - t.Errorf("got error %#v, want error %#v", errStr, test.err) - } - if res := fmt.Sprintf("%v", r); test.err == "" && res != test.res { - t.Errorf("got %#v, want %#v", res, test.res) - } - } -} - -func run(t *testing.T, tests []etest) { - for _, test := range tests { - t.Run("", gen(test)) - } -} - -func TestExpr(t *testing.T) { - run(t, []etest{ - {src: "", res: ""}, - {src: "1+2", res: "3"}, - {src: "1+", err: "block not terminated"}, - {src: "a := 1 + 2; b := 0; a + 1", res: "4"}, - {src: "1+(2+3)", res: "6"}, - {src: "(1+2)+3", res: "6"}, - {src: "(6+(1+2)+3)+5", res: "17"}, - {src: "(6+(1+2+3)+5", err: "1:1: block not terminated"}, - {src: "a := 2; a = 3; a", res: "3"}, - {src: "2 * 3 + 1 == 7", res: "true"}, - {src: "7 == 2 * 3 + 1", res: "true"}, - {src: "1 + 3 * 2 == 2 * 3 + 1", res: "true"}, - {src: "a := 1 + 3 * 2 == 2 * 3 + 1; a", res: "true"}, - {src: "-2", res: "-2"}, - {src: "-2 + 5", res: "3"}, - {src: "5 + -2", res: "3"}, - {src: "!false", res: "true"}, - {src: `a := "hello"`, res: "hello"}, - }) -} - -func TestLogical(t *testing.T) { - run(t, []etest{ - {src: "true && false", res: "false"}, - {src: "true && true", res: "true"}, - {src: "true && true && false", res: "false"}, - {src: "false || true && true", res: "true"}, - {src: "2 < 3 && 1 > 2 || 3 == 3", res: "true"}, - {src: "2 > 3 && 1 > 2 || 3 == 3", res: "true"}, - {src: "2 > 3 || 2 == 1+1 && 3>0", res: "true"}, - {src: "2 > 3 || 2 == 1+1 && 3>4 || 1<2", res: "true"}, - {src: "a := 1+1 < 3 && 4 == 2+2; a", res: "true"}, - {src: "a := 1+1 < 3 || 3 == 2+2; a", res: "true"}, - }) -} - -func TestFunc(t *testing.T) { - run(t, []etest{ - {src: "func f() int {return 2}; a := f(); a", res: "2"}, - {src: "func f() int {return 2}; f()", res: "2"}, - {src: "func f(a int) int {return a+2}; f(3)", res: "5"}, - {src: "func f(a int) int {if a < 4 {a = 5}; return a}; f(3)", res: "5"}, - {src: "func f(a int) int {return a+2}; 7 - f(3)", res: "2"}, - {src: "func f(a int) int {return a+2}; f(5) - f(3)", res: "2"}, - {src: "func f(a int) int {return a+2}; f(3) - 2", res: "3"}, - {src: "func f(a, b, c int) int {return a+b-c} ; f(7, 1, 3)", res: "5"}, - {src: "var a int; func f() {a = a+2}; f(); a", res: "2"}, - {src: "var f = func(a int) int {return a+3}; f(2)", res: "5"}, - }) -} - -func TestIf(t *testing.T) { - run(t, []etest{ - {src: "a := 0; if a == 0 { a = 2 } else { a = 1 }; a", res: "2"}, - {src: "a := 0; if a == 1 { a = 2 } else { a = 1 }; a", res: "1"}, - {src: "a := 0; if a == 1 { a = 2 } else if a == 0 { a = 3 } else { a = 1 }; a", res: "3"}, - {src: "a := 0; if a == 1 { a = 2 } else if a == 2 { a = 3 } else { a = 1 }; a", res: "1"}, - {src: "a := 1; if a > 0 && a < 2 { a = 3 }; a", res: "3"}, - {src: "a := 1; if a < 0 || a < 2 { a = 3 }; a", res: "3"}, - }) -} - -func TestFor(t *testing.T) { - run(t, []etest{ - {src: "a := 0; for i := 0; i < 3; i = i+1 {a = a+i}; a", res: "3"}, - {src: "func f() int {a := 0; for i := 0; i < 3; i = i+1 {a = a+i}; return a}; f()", res: "3"}, - {src: "a := 0; for {a = a+1; if a == 3 {break}}; a", res: "3"}, - {src: "func f() int {a := 0; for {a = a+1; if a == 3 {break}}; return a}; f()", res: "3"}, - {src: "func f() int {a := 0; for {a = a+1; if a < 3 {continue}; break}; return a}; f()", res: "3"}, - }) -} - -func TestGoto(t *testing.T) { - run(t, []etest{ - {src: ` -func f(a int) int { - a = a+1 - goto end - a = a+1 -end: - return a -} -f(3)`, res: "4"}, - }) -} - -func TestSwitch(t *testing.T) { - src0 := `func f(a int) int { - switch a { - default: a = 0 - case 1,2: a = a+1 - case 3: a = a+2; break; a = 3 - case 4: a = 10 - } - return a -} -` - src1 := `func f(a int) int { - switch { - case a < 3: return 2 - case a < 5: return 5 - default: a = 0 - } - return a -} -` - run(t, []etest{ - {src: src0 + "f(1)", res: "2"}, - {src: src0 + "f(2)", res: "3"}, - {src: src0 + "f(3)", res: "5"}, - {src: src0 + "f(4)", res: "10"}, - {src: src0 + "f(5)", res: "0"}, - - {src: src1 + "f(1)", res: "2"}, - {src: src1 + "f(4)", res: "5"}, - {src: src1 + "f(6)", res: "0"}, - }) -} - -func TestConst(t *testing.T) { - src0 := `const ( - a = iota - b - c -) -` - run(t, []etest{ - {src: "const a = 1+2; a", res: "3"}, - {src: "const a, b = 1, 2; a+b", res: "3"}, - {src: "const huge = 1 << 100; const four = huge >> 98; four", res: "4"}, - - {src: src0 + "c", res: "2"}, - }) -} - -func TestArray(t *testing.T) { - run(t, []etest{ - {src: "type T []int; var t T; t", res: "[]"}, - {src: "type T [3]int; var t T; t", res: "[0 0 0]"}, - {src: "type T [3]int; var t T; t[1] = 2; t", res: "[0 2 0]"}, - }) -} - -func TestPointer(t *testing.T) { - run(t, []etest{ - {src: "var a *int; a", res: ""}, - {src: "var a int; var b *int = &a; *b", res: "0"}, - {src: "var a int = 2; var b *int = &a; *b", res: "2"}, - }) -} - -func TestStruct(t *testing.T) { - run(t, []etest{ - {src: "type T struct {a string; b, c int}; var t T; t", res: "{ 0 0}"}, - {src: "type T struct {a int}; var t T; t.a", res: "0"}, - {src: "type T struct {a int}; var t T; t.a = 1; t.a", res: "1"}, - }) -} - -func TestType(t *testing.T) { - src0 := `type ( - I int - S string -) -` - run(t, []etest{ - {src: "type t int; var a t = 1; a", res: "1"}, - {src: "type t = int; var a t = 1; a", res: "1"}, - {src: src0 + `var s S = "xx"; s`, res: "xx"}, - }) -} - -func TestVar(t *testing.T) { - run(t, []etest{ - {src: "var a int; a", res: "0"}, - {src: "var a, b, c int; a", res: "0"}, - {src: "var a, b, c int; a + b", res: "0"}, - {src: "var a, b, c int; a + b + c", res: "0"}, - {src: "var a int = 2+1; a", res: "3"}, - {src: "var a, b int = 2, 5; a+b", res: "7"}, - {src: "var x = 5; x", res: "5"}, - {src: "var a = 1; func f() int { var a, b int = 3, 4; return a+b}; a+f()", res: "8"}, - {src: `var a = "hello"; a`, res: "hello"}, - {src: `var ( - a, b int = 4+1, 3 - c = 8 -); a+b+c`, res: "16"}, - }) -} - -func TestImport(t *testing.T) { - src0 := `import ( - "fmt" -) -` - run(t, []etest{ - {src: "fmt.Println(4)", err: "symbol not found: fmt"}, - {src: `import "xxx"`, err: "package not found: xxx"}, - {src: `import "fmt"; fmt.Println(4)`, res: ""}, - {src: src0 + "fmt.Println(4)", res: ""}, - {src: `func main() {import "fmt"; fmt.Println("hello")}`, err: "unexpected import"}, - {src: `import m "fmt"; m.Println(4)`, res: ""}, - {src: `import . "fmt"; Println(4)`, res: ""}, - }) -} - -func TestComposite(t *testing.T) { - run(t, []etest{ - {src: "type T struct{}; t := T{}; t", res: "{}"}, - {src: "t := struct{}{}; t", res: "{}"}, - // {src: `type T struct{N int; S string}; t := T{2, "foo"}`, res: `{2 foo}`}, - }) -} diff --git a/interpreter/repl.go b/interpreter/repl.go deleted file mode 100644 index 73af8a7..0000000 --- a/interpreter/repl.go +++ /dev/null @@ -1,35 +0,0 @@ -package interpreter - -import ( - "bufio" - "errors" - "fmt" - "io" - - "github.com/mvertes/parscan/scanner" -) - -// Repl executes an interactive line oriented Read Eval Print Loop (REPL). -func (i *Interp) Repl(in io.Reader) (err error) { - liner := bufio.NewScanner(in) - text, prompt := "", "> " - fmt.Print(prompt) - for liner.Scan() { - text += liner.Text() - res, err := i.Eval(text + "\n") - switch { - case err == nil: - if res.IsValid() { - fmt.Println(": ", res) - } - text, prompt = "", "> " - case errors.Is(err, scanner.ErrBlock): - prompt = ">> " - default: - fmt.Println("Error:", err) - text, prompt = "", "> " - } - fmt.Print(prompt) - } - return err -} diff --git a/main.go b/main.go index 0e95bd4..634d053 100644 --- a/main.go +++ b/main.go @@ -7,7 +7,7 @@ import ( "log" "os" - "github.com/mvertes/parscan/interpreter" + "github.com/mvertes/parscan/interp" "github.com/mvertes/parscan/lang/golang" ) @@ -32,21 +32,21 @@ func run(arg []string) error { } args := rflag.Args() - interp := interpreter.NewInterpreter(golang.GoSpec) + i := interp.NewInterpreter(golang.GoSpec) if str != "" { - return evalStr(interp, str) + return evalStr(i, str) } if len(args) == 0 { - return interp.Repl(os.Stdin) + return i.Repl(os.Stdin) } buf, err := os.ReadFile(arg[0]) if err != nil { return err } - return evalStr(interp, string(buf)) + return evalStr(i, string(buf)) } -func evalStr(i *interpreter.Interp, s string) error { +func evalStr(i *interp.Interp, s string) error { _, err := i.Eval(s) return err } diff --git a/vm/type.go b/vm/type.go index 7c33cd2..5f91eee 100644 --- a/vm/type.go +++ b/vm/type.go @@ -33,8 +33,8 @@ func (t *Type) Out(i int) *Type { // Value is the representation of a runtime value. type Value struct { - Type *Type - Data reflect.Value + *Type + reflect.Value } // NewValue returns an addressable zero value for the specified type. @@ -42,7 +42,7 @@ func NewValue(typ *Type) Value { if typ.Rtype.Kind() == reflect.Func { typ = TypeOf(0) // Function value is its index in the code segment. } - return Value{Type: typ, Data: reflect.New(typ.Rtype).Elem()} + return Value{Type: typ, Value: reflect.New(typ.Rtype).Elem()} } // TypeOf returns the runtime type of v. @@ -53,7 +53,7 @@ func TypeOf(v any) *Type { // ValueOf returns the runtime value of v. func ValueOf(v any) Value { - return Value{Data: reflect.ValueOf(v)} + return Value{Value: reflect.ValueOf(v)} } // PointerTo returns the pointer type with element t. diff --git a/vm/vm.go b/vm/vm.go index 18207f7..7afe374 100644 --- a/vm/vm.go +++ b/vm/vm.go @@ -103,21 +103,21 @@ func (m *Machine) Run() (err error) { ic++ switch c.Op { case Add: - mem[sp-2] = ValueOf(int(mem[sp-2].Data.Int() + mem[sp-1].Data.Int())) + mem[sp-2] = ValueOf(int(mem[sp-2].Int() + mem[sp-1].Int())) mem = mem[:sp-1] case Mul: - mem[sp-2] = ValueOf(int(mem[sp-2].Data.Int() * mem[sp-1].Data.Int())) + mem[sp-2] = ValueOf(int(mem[sp-2].Int() * mem[sp-1].Int())) mem = mem[:sp-1] case Addr: - mem[sp-1].Data = mem[sp-1].Data.Addr() + mem[sp-1].Value = mem[sp-1].Addr() case Assign: - mem[c.Arg[0]].Data.Set(mem[sp-1].Data) + mem[c.Arg[0]].Set(mem[sp-1].Value) mem = mem[:sp-1] case Fassign: - mem[fp+c.Arg[0]-1].Data.Set(mem[sp-1].Data) + mem[fp+c.Arg[0]-1].Set(mem[sp-1].Value) mem = mem[:sp-1] case Call: - nip := int(mem[sp-1].Data.Int()) + nip := int(mem[sp-1].Int()) mem = append(mem[:sp-1], ValueOf(ip+1), ValueOf(fp)) ip = nip fp = sp + 1 @@ -130,24 +130,24 @@ func (m *Machine) Run() (err error) { case CallX: // Should be made optional. in := make([]reflect.Value, c.Arg[0]) for i := range in { - in[i] = mem[sp-2-i].Data + in[i] = mem[sp-2-i].Value } - f := mem[sp-1].Data + f := mem[sp-1].Value mem = mem[:sp-c.Arg[0]-1] for _, v := range f.Call(in) { - mem = append(mem, Value{Data: v}) + mem = append(mem, Value{Value: v}) } case Deref: - mem[sp-1].Data = mem[sp-1].Data.Elem() + mem[sp-1].Value = mem[sp-1].Value.Elem() case Dup: mem = append(mem, mem[c.Arg[0]]) case New: mem[c.Arg[0]+fp-1] = NewValue(mem[c.Arg[1]].Type) case Equal: - mem[sp-2] = ValueOf(mem[sp-2].Data.Equal(mem[sp-1].Data)) + mem[sp-2] = ValueOf(mem[sp-2].Equal(mem[sp-1].Value)) mem = mem[:sp-1] case EqualSet: - if mem[sp-2].Data.Equal(mem[sp-1].Data) { + if mem[sp-2].Equal(mem[sp-1].Value) { // If equal then lhs and rhs are popped, replaced by test result, as in Equal. mem[sp-2] = ValueOf(true) mem = mem[:sp-1] @@ -161,31 +161,31 @@ func (m *Machine) Run() (err error) { case Fdup: mem = append(mem, mem[c.Arg[0]+fp-1]) case Field: - fv := mem[sp-1].Data.FieldByIndex(c.Arg) + fv := mem[sp-1].FieldByIndex(c.Arg) if !fv.CanSet() { // Normally private fields can not bet set via reflect. Override this limitation. fv = reflect.NewAt(fv.Type(), unsafe.Pointer(fv.UnsafeAddr())).Elem() } - mem[sp-1].Data = fv + mem[sp-1].Value = fv case Jump: ip += c.Arg[0] continue case JumpTrue: - cond := mem[sp-1].Data.Bool() + cond := mem[sp-1].Bool() mem = mem[:sp-1] if cond { ip += c.Arg[0] continue } case JumpFalse: - cond := mem[sp-1].Data.Bool() + cond := mem[sp-1].Bool() mem = mem[:sp-1] if !cond { ip += c.Arg[0] continue } case JumpSetTrue: - cond := mem[sp-1].Data.Bool() + cond := mem[sp-1].Bool() if cond { ip += c.Arg[0] // Note that the stack is not modified if cond is true. @@ -193,7 +193,7 @@ func (m *Machine) Run() (err error) { } mem = mem[:sp-1] case JumpSetFalse: - cond := mem[sp-1].Data.Bool() + cond := mem[sp-1].Bool() if !cond { ip += c.Arg[0] // Note that the stack is not modified if cond is false. @@ -201,40 +201,40 @@ func (m *Machine) Run() (err error) { } mem = mem[:sp-1] case Greater: - mem[sp-2] = ValueOf(mem[sp-1].Data.Int() > mem[sp-2].Data.Int()) + mem[sp-2] = ValueOf(mem[sp-1].Int() > mem[sp-2].Int()) mem = mem[:sp-1] case Lower: - mem[sp-2] = ValueOf(mem[sp-1].Data.Int() < mem[sp-2].Data.Int()) + mem[sp-2] = ValueOf(mem[sp-1].Int() < mem[sp-2].Int()) mem = mem[:sp-1] case Loweri: - mem[sp-1] = ValueOf(mem[sp-1].Data.Int() < int64(c.Arg[0])) + mem[sp-1] = ValueOf(mem[sp-1].Int() < int64(c.Arg[0])) case Not: - mem[sp-1] = ValueOf(!mem[sp-1].Data.Bool()) + mem[sp-1] = ValueOf(!mem[sp-1].Bool()) case Pop: mem = mem[:sp-c.Arg[0]] case Push: mem = append(mem, NewValue(TypeOf(0))) - mem[sp].Data.SetInt(int64(c.Arg[0])) + mem[sp].SetInt(int64(c.Arg[0])) case Grow: mem = append(mem, make([]Value, c.Arg[0])...) case Return: - ip = int(mem[fp-2].Data.Int()) + ip = int(mem[fp-2].Int()) ofp := fp - fp = int(mem[fp-1].Data.Int()) + fp = int(mem[fp-1].Int()) mem = append(mem[:ofp-c.Arg[0]-c.Arg[1]-1], mem[sp-c.Arg[0]:]...) continue case Sub: - mem[sp-2] = ValueOf(int(mem[sp-1].Data.Int() - mem[sp-2].Data.Int())) + mem[sp-2] = ValueOf(int(mem[sp-1].Int() - mem[sp-2].Int())) mem = mem[:sp-1] case Subi: - mem[sp-1] = ValueOf(int(mem[sp-1].Data.Int()) - c.Arg[0]) + mem[sp-1] = ValueOf(int(mem[sp-1].Int()) - c.Arg[0]) case Swap: mem[sp-2], mem[sp-1] = mem[sp-1], mem[sp-2] case Index: - mem[sp-2].Data = mem[sp-1].Data.Index(int(mem[sp-2].Data.Int())) + mem[sp-2].Value = mem[sp-1].Index(int(mem[sp-2].Int())) mem = mem[:sp-1] case Vassign: - mem[sp-1].Data.Set(mem[sp-2].Data) + mem[sp-1].Set(mem[sp-2].Value) mem = mem[:sp-2] } ip++ @@ -288,7 +288,7 @@ func Vstring(lv []Value) string { if s != "[" { s += " " } - s += fmt.Sprintf("%v", v.Data) + s += fmt.Sprintf("%v", v.Value) } return s + "]" } diff --git a/vm/vm_test.go b/vm/vm_test.go index f7cbfbf..a968281 100644 --- a/vm/vm_test.go +++ b/vm/vm_test.go @@ -136,7 +136,7 @@ var tests = []struct { }, start: 0, end: 1, mem: "[3]", }, { // #10 -- Assign a variable. - sym: []Value{{Type: TypeOf(0), Data: reflect.ValueOf(0)}}, + sym: []Value{{Type: TypeOf(0), Value: reflect.ValueOf(0)}}, code: []Instruction{ {Op: Grow, Arg: []int{1}}, {Op: New, Arg: []int{2, 0}}, -- cgit v1.2.3