From d99d69391eeae129cad2d5c2c90ce700db01b11c Mon Sep 17 00:00:00 2001 From: Marc Vertes Date: Thu, 27 Nov 2025 12:40:35 +0100 Subject: chore: move compiler and interpreter in their own packages --- compiler/compiler.go | 521 +++++++++++++++++++++++++++++++++++++++ interpreter/dump_test.go | 48 ++++ interpreter/interpreter_test.go | 267 ++++++++++++++++++++ main.go | 8 +- parser/compiler.go | 523 ---------------------------------------- parser/decl.go | 38 +-- parser/dump_test.go | 48 ---- parser/expr.go | 4 +- parser/interpreter.go | 11 +- parser/interpreter_test.go | 267 -------------------- parser/package.go | 2 +- parser/parse.go | 20 +- parser/symbol.go | 88 +++---- parser/symkind_string.go | 28 +-- parser/type.go | 18 +- 15 files changed, 945 insertions(+), 946 deletions(-) create mode 100644 compiler/compiler.go create mode 100644 interpreter/dump_test.go create mode 100644 interpreter/interpreter_test.go delete mode 100644 parser/compiler.go delete mode 100644 parser/dump_test.go delete mode 100644 parser/interpreter_test.go diff --git a/compiler/compiler.go b/compiler/compiler.go new file mode 100644 index 0000000..d63dd14 --- /dev/null +++ b/compiler/compiler.go @@ -0,0 +1,521 @@ +package compiler + +import ( + "fmt" + "log" + "os" + "path" + "reflect" + "runtime" + "strconv" + + "github.com/mvertes/parscan/lang" + "github.com/mvertes/parscan/parser" + "github.com/mvertes/parscan/scanner" + "github.com/mvertes/parscan/vm" +) + +// Compiler represents the state of a compiler. +type Compiler struct { + *parser.Parser + vm.Code // produced code, to fill VM with + Data []vm.Value // produced data, will be at the bottom of VM stack + Entry int // offset in Code to start execution from (skip function defintions) + + strings map[string]int // locations of strings in Data +} + +// NewCompiler returns a new compiler state for a given scanner. +func NewCompiler(scanner *scanner.Scanner) *Compiler { + return &Compiler{ + Parser: parser.NewParser(scanner, true), + Entry: -1, + strings: map[string]int{}, + } +} + +// AddSym adds a new named value to the compiler symbol table, and returns its index in memory. +func (c *Compiler) AddSym(name string, value vm.Value) int { + p := len(c.Data) + c.Data = append(c.Data, value) + c.AddSymbol(p, name, value, parser.SymValue, nil, false) + return p +} + +// Codegen generates vm code from parsed tokens. +func (c *Compiler) Codegen(tokens parser.Tokens) (err error) { + log.Println("Codegen tokens:", tokens) + fixList := parser.Tokens{} // list of tokens to fix after all necessary information is gathered + stack := []*parser.Symbol{} // for symbolic evaluation, type checking, etc + + emit := func(t scanner.Token, op vm.Op, arg ...int) { + _, file, line, _ := runtime.Caller(1) + fmt.Fprintf(os.Stderr, "%s:%d: %v emit %v %v\n", path.Base(file), line, t, op, arg) + c.Code = append(c.Code, vm.Instruction{Pos: vm.Pos(t.Pos), Op: op, Arg: arg}) + } + push := func(s *parser.Symbol) { stack = append(stack, s) } + pop := func() *parser.Symbol { l := len(stack) - 1; s := stack[l]; stack = stack[:l]; return s } + + for i, t := range tokens { + switch t.Tok { + case lang.Int: + n, err := strconv.Atoi(t.Str) + if err != nil { + return err + } + push(&parser.Symbol{Kind: parser.SymConst, Value: vm.ValueOf(n), Type: vm.TypeOf(0)}) + emit(t, vm.Push, n) + + case lang.String: + s := t.Block() + v := vm.Value{Data: reflect.ValueOf(s), Type: vm.TypeOf(s)} + i, ok := c.strings[s] + if !ok { + i = len(c.Data) + c.Data = append(c.Data, v) + c.strings[s] = i + } + push(&parser.Symbol{Kind: parser.SymConst, Value: v}) + emit(t, vm.Dup, i) + + case lang.Add: + push(&parser.Symbol{Type: arithmeticOpType(pop(), pop())}) + emit(t, vm.Add) + + case lang.Mul: + push(&parser.Symbol{Type: arithmeticOpType(pop(), pop())}) + emit(t, vm.Mul) + + case lang.Sub: + push(&parser.Symbol{Type: arithmeticOpType(pop(), pop())}) + emit(t, vm.Sub) + + case lang.Minus: + emit(t, vm.Push, 0) + emit(t, vm.Sub) + + case lang.Not: + emit(t, vm.Not) + + case lang.Plus: + // Unary '+' is idempotent. Nothing to do. + + case lang.Addr: + push(&parser.Symbol{Type: vm.PointerTo(pop().Type)}) + emit(t, vm.Addr) + + case lang.Deref: + push(&parser.Symbol{Type: pop().Type.Elem()}) + emit(t, vm.Deref) + + case lang.Index: + push(&parser.Symbol{Type: pop().Type.Elem()}) + emit(t, vm.Index) + + case lang.Greater: + push(&parser.Symbol{Type: booleanOpType(pop(), pop())}) + emit(t, vm.Greater) + + case lang.Less: + push(&parser.Symbol{Type: booleanOpType(pop(), pop())}) + emit(t, vm.Lower) + + case lang.Call: + s := pop() + if s.Kind != parser.SymValue { + typ := s.Type + // TODO: pop input types (careful with variadic function). + for i := 0; i < typ.Rtype.NumOut(); i++ { + push(&parser.Symbol{Type: typ.Out(i)}) + } + emit(t, vm.Call) + break + } + push(s) + fallthrough // A symValue must be called through callX. + + case lang.CallX: + rtyp := pop().Value.Data.Type() + // TODO: pop input types (careful with variadic function). + for i := 0; i < rtyp.NumOut(); i++ { + push(&parser.Symbol{Type: &vm.Type{Rtype: rtyp.Out(i)}}) + } + emit(t, vm.CallX, t.Beg) + + case lang.Composite: + log.Println("COMPOSITE") + /* + d := pop() + switch d.typ.Rtype.Kind() { + case reflect.Struct: + // nf := d.typ.Rtype.NumField() + // emit(t.Pos, vm.New, d.index, c.typeSym(d.typ).index) + emit(t, vm.Field, 0) + emit(t, vm.Vassign) + emit(t, vm.Fdup, 2) + emit(t, vm.Field, 1) + emit(t, vm.Vassign) + emit(t, vm.Pop, 1) + // emit(t, vm.Fdup, 2) + // Assume an element list with no keys, one per struct field in order + } + */ + + case lang.Grow: + emit(t, vm.Grow, t.Beg) + + case lang.Define: + // TODO: support assignment to local, composite objects. + st := tokens[i-1] + l := len(c.Data) + d := pop() + typ := d.Type + if typ == nil { + typ = d.Value.Type + } + v := vm.NewValue(typ) + c.AddSymbol(l, st.Str, v, parser.SymVar, typ, false) + c.Data = append(c.Data, v) + emit(t, vm.Assign, l) + + case lang.Assign: + st := tokens[i-1] + if st.Tok == lang.Period || st.Tok == lang.Index { + emit(t, vm.Vassign) + break + } + s, ok := c.Symbols[st.Str] + if !ok { + return fmt.Errorf("symbol not found: %s", st.Str) + } + d := pop() + typ := d.Type + if typ == nil { + typ = d.Value.Type + } + if s.Type == nil { + s.Type = typ + s.Value = vm.NewValue(typ) + } + if s.Local { + if !s.Used { + emit(st, vm.New, s.Index, c.typeSym(s.Type).Index) + s.Used = true + } + emit(st, vm.Fassign, s.Index) + break + } + if s.Index == parser.UnsetAddr { + s.Index = len(c.Data) + c.Data = append(c.Data, s.Value) + } + emit(st, vm.Assign, s.Index) + + case lang.Equal: + push(&parser.Symbol{Type: booleanOpType(pop(), pop())}) + emit(t, vm.Equal) + + case lang.EqualSet: + push(&parser.Symbol{Type: booleanOpType(pop(), pop())}) + emit(t, vm.EqualSet) + + case lang.Ident: + if i < len(tokens)-1 { + switch t1 := tokens[i+1]; t1.Tok { + case lang.Define, lang.Assign, lang.Colon: + continue + } + } + s, ok := c.Symbols[t.Str] + if !ok { + return fmt.Errorf("symbol not found: %s", t.Str) + } + push(s) + if s.Kind == parser.SymPkg { + break + } + if s.Local { + emit(t, vm.Fdup, s.Index) + } else { + if s.Index == parser.UnsetAddr { + s.Index = len(c.Data) + c.Data = append(c.Data, s.Value) + } + emit(t, vm.Dup, s.Index) + } + + case lang.Label: + lc := len(c.Code) + s, ok := c.Symbols[t.Str] + if ok { + s.Value = vm.ValueOf(lc) + if s.Kind == parser.SymFunc { + // label is a function entry point, register its code address in data. + s.Index = len(c.Data) + c.Data = append(c.Data, s.Value) + } else { + c.Data[s.Index] = s.Value + } + } else { + c.Symbols[t.Str] = &parser.Symbol{Kind: parser.SymLabel, Value: vm.ValueOf(lc)} + } + + case lang.JumpFalse: + var i int + if s, ok := c.Symbols[t.Str]; !ok { + // t.Beg contains the position in code which needs to be fixed. + t.Beg = len(c.Code) + fixList = append(fixList, t) + } else { + i = int(s.Value.Data.Int()) - len(c.Code) + } + emit(t, vm.JumpFalse, i) + + case lang.JumpSetFalse: + var i int + if s, ok := c.Symbols[t.Str]; !ok { + // t.Beg contains the position in code which needs to be fixed. + t.Beg = len(c.Code) + fixList = append(fixList, t) + } else { + i = int(s.Value.Data.Int()) - len(c.Code) + } + emit(t, vm.JumpSetFalse, i) + + case lang.JumpSetTrue: + var i int + if s, ok := c.Symbols[t.Str]; !ok { + // t.Beg contains the position in code which needs to be fixed. + t.Beg = len(c.Code) + fixList = append(fixList, t) + } else { + i = int(s.Value.Data.Int()) - len(c.Code) + } + emit(t, vm.JumpSetTrue, i) + + case lang.Goto: + var i int + if s, ok := c.Symbols[t.Str]; !ok { + t.Beg = len(c.Code) + fixList = append(fixList, t) + } else { + i = int(s.Value.Data.Int()) - len(c.Code) + } + emit(t, vm.Jump, i) + + case lang.Period: + s := pop() + switch s.Kind { + case parser.SymPkg: + p, ok := parser.Packages[s.PkgPath] + if !ok { + return fmt.Errorf("package not found: %s", s.PkgPath) + } + v, ok := p[t.Str[1:]] + if !ok { + return fmt.Errorf("symbol not found in package %s: %s", s.PkgPath, t.Str[1:]) + } + name := s.PkgPath + t.Str + var l int + sym, _, ok := c.GetSym(name, "") + if ok { + l = sym.Index + } else { + l = len(c.Data) + c.Data = append(c.Data, v) + c.AddSymbol(l, name, v, parser.SymValue, v.Type, false) + sym = c.Symbols[name] + } + push(sym) + emit(t, vm.Dup, l) + default: + if f, ok := s.Type.Rtype.FieldByName(t.Str[1:]); ok { + emit(t, vm.Field, f.Index...) + break + } + return fmt.Errorf("field or method not found: %s", t.Str[1:]) + } + + case lang.Return: + emit(t, vm.Return, t.Beg, t.End) + + default: + return fmt.Errorf("Codegen: unsupported token %v", t) + } + } + + // Finally we fix unresolved labels for jump destinations. + for _, t := range fixList { + s, ok := c.Symbols[t.Str] + if !ok { + return fmt.Errorf("label not found: %q", t.Str) + } + c.Code[t.Beg].Arg[0] = int(s.Value.Data.Int()) - t.Beg + } + return err +} +func arithmeticOpType(s1, _ *parser.Symbol) *vm.Type { return parser.SymbolType(s1) } +func booleanOpType(_, _ *parser.Symbol) *vm.Type { return vm.TypeOf(true) } + +// PrintCode pretty prints the generated code. +func (c *Compiler) PrintCode() { + labels := map[int][]string{} // labels indexed by code location + data := map[int]string{} // data indexed by frame location + + for name, sym := range c.Symbols { + if sym.Kind == parser.SymLabel || sym.Kind == parser.SymFunc { + i := int(sym.Value.Data.Int()) + labels[i] = append(labels[i], name) + } + if sym.Used { + data[sym.Index] = name + } + } + + fmt.Fprintln(os.Stderr, "# Code:") + for i, l := range c.Code { + for _, label := range labels[i] { + fmt.Fprintln(os.Stderr, label+":") + } + extra := "" + switch l.Op { + case vm.Jump, vm.JumpFalse, vm.JumpTrue, vm.JumpSetFalse, vm.JumpSetTrue, vm.Calli: + if d, ok := labels[i+l.Arg[0]]; ok { + extra = "// " + d[0] + } + case vm.Dup, vm.Assign, vm.Fdup, vm.Fassign: + if d, ok := data[l.Arg[0]]; ok { + extra = "// " + d + } + } + fmt.Fprintf(os.Stderr, "%4d %-14v %v\n", i, l, extra) + } + + for _, label := range labels[len(c.Code)] { + fmt.Fprintln(os.Stderr, label+":") + } + fmt.Fprintln(os.Stderr, "# End code") +} + +type entry struct { + name string + *parser.Symbol +} + +func (e entry) String() string { + if e.Symbol != nil { + return fmt.Sprintf("name: %s,local: %t, i: %d, k: %d, t: %s, v: %v", + e.name, + e.Local, + e.Index, + e.Kind, + e.Type, + e.Value, + ) + } + return e.name +} + +// PrintData pretty prints the generated global data symbols in compiler. +func (c *Compiler) PrintData() { + dict := c.symbolsByIndex() + + fmt.Fprintln(os.Stderr, "# Data:") + for i, d := range c.Data { + fmt.Fprintf(os.Stderr, "%4d %T %v %v\n", i, d.Data.Interface(), d.Data, dict[i]) + } +} + +func (c *Compiler) symbolsByIndex() map[int]entry { + dict := map[int]entry{} + for name, sym := range c.Symbols { + if sym.Index == parser.UnsetAddr { + continue + } + dict[sym.Index] = entry{name, sym} + } + return dict +} + +// Dump represents the state of a data dump. +type Dump struct { + Values []*DumpValue +} + +// DumpValue is a value of a dump state. +type DumpValue struct { + Index int + Name string + Kind int + Type string + Value any +} + +// Dump creates a snapshot of the execution state of global variables. +// This method is specifically implemented in the Compiler to minimize the coupling between +// the dump format and other components. By situating the dump logic in the Compiler, +// it relies solely on the program being executed and the indexing algorithm used for ordering variables +// (currently, this is an integer that corresponds to the order of variables in the program). +// This design choice allows the Virtual Machine (VM) to evolve its memory management strategies +// without compromising backward compatibility with dumps generated by previous versions. +func (c *Compiler) Dump() *Dump { + dict := c.symbolsByIndex() + dv := make([]*DumpValue, len(c.Data)) + for i, d := range c.Data { + e := dict[i] + dv[i] = &DumpValue{ + Index: e.Index, + Name: e.name, + Kind: int(e.Kind), + Type: e.Type.Name, + Value: d.Data.Interface(), + } + } + return &Dump{Values: dv} +} + +// ApplyDump sets previously saved dump, restoring the state of global variables. +func (c *Compiler) ApplyDump(d *Dump) error { + dict := c.symbolsByIndex() + for _, dv := range d.Values { + // do all the checks to be sure we are applying the correct values + e, ok := dict[dv.Index] + if !ok { + return fmt.Errorf("entry not found on index %d", dv.Index) + } + + if dv.Name != e.name || + dv.Type != e.Type.Name || + dv.Kind != int(e.Kind) { + return fmt.Errorf("entry with index %d does not match with provided entry. "+ + "dumpValue: %s, %s, %d. memoryValue: %s, %s, %d", + dv.Index, + dv.Name, dv.Type, dv.Kind, + e.name, e.Type, e.Kind) + } + + if dv.Index >= len(c.Data) { + return fmt.Errorf("index (%d) bigger than memory (%d)", dv.Index, len(c.Data)) + } + + if !c.Data[dv.Index].Data.CanSet() { + return fmt.Errorf("value %v cannot be set", dv.Value) + } + + c.Data[dv.Index].Data.Set(reflect.ValueOf(dv.Value)) + } + return nil +} + +func (c *Compiler) typeSym(t *vm.Type) *parser.Symbol { + tsym, ok := c.Symbols[t.Rtype.String()] + if !ok { + tsym = &parser.Symbol{Index: parser.UnsetAddr, Kind: parser.SymType, Type: t} + c.Symbols[t.Rtype.String()] = tsym + } + if tsym.Index == parser.UnsetAddr { + tsym.Index = len(c.Data) + c.Data = append(c.Data, vm.NewValue(t)) + } + return tsym +} diff --git a/interpreter/dump_test.go b/interpreter/dump_test.go new file mode 100644 index 0000000..dd172b9 --- /dev/null +++ b/interpreter/dump_test.go @@ -0,0 +1,48 @@ +package interpreter_test + +import ( + "testing" + + "github.com/mvertes/parscan/interpreter" +) + +func TestDump(t *testing.T) { + initProgram := "var a int = 2+1; a" + interp := interpreter.NewInterpreter(GoScanner) + r, e := interp.Eval(initProgram) + t.Log(r, e) + if e != nil { + t.Fatal(e) + } + + r, e = interp.Eval("a = 100") + t.Log(r, e) + if e != nil { + t.Fatal(e) + } + + d := interp.Dump() + t.Log(d) + + interp = interpreter.NewInterpreter(GoScanner) + r, e = interp.Eval(initProgram) + t.Log(r, e) + if e != nil { + t.Fatal(e) + } + + e = interp.ApplyDump(d) + if e != nil { + t.Fatal(e) + } + + r, e = interp.Eval("a = a + 1;a") + t.Log(r, e) + if e != nil { + t.Fatal(e) + } + + if r.Interface() != int(101) { + t.Fatalf("unexpected result: %v", r) + } +} diff --git a/interpreter/interpreter_test.go b/interpreter/interpreter_test.go new file mode 100644 index 0000000..800ace5 --- /dev/null +++ b/interpreter/interpreter_test.go @@ -0,0 +1,267 @@ +package interpreter_test + +import ( + "fmt" + "log" + "testing" + + "github.com/mvertes/parscan/interpreter" + "github.com/mvertes/parscan/lang/golang" + "github.com/mvertes/parscan/scanner" +) + +type etest struct { + src, res, err string + skip bool +} + +var GoScanner *scanner.Scanner + +func init() { + log.SetFlags(log.Lshortfile) + GoScanner = scanner.NewScanner(golang.GoSpec) +} + +func gen(test etest) func(*testing.T) { + return func(t *testing.T) { + if test.skip { + t.Skip() + } + interp := interpreter.NewInterpreter(GoScanner) + errStr := "" + r, e := interp.Eval(test.src) + t.Log(r, e) + if e != nil { + errStr = e.Error() + } + if errStr != test.err { + t.Errorf("got error %#v, want error %#v", errStr, test.err) + } + if res := fmt.Sprintf("%v", r); test.err == "" && res != test.res { + t.Errorf("got %#v, want %#v", res, test.res) + } + } +} + +func run(t *testing.T, tests []etest) { + for _, test := range tests { + test := test + t.Run("", gen(test)) + } +} + +func TestExpr(t *testing.T) { + run(t, []etest{ + {src: "", res: ""}, + {src: "1+2", res: "3"}, + {src: "1+", err: "block not terminated"}, + {src: "a := 1 + 2; b := 0; a + 1", res: "4"}, + {src: "1+(2+3)", res: "6"}, + {src: "(1+2)+3", res: "6"}, + {src: "(6+(1+2)+3)+5", res: "17"}, + {src: "(6+(1+2+3)+5", err: "1:1: block not terminated"}, + {src: "a := 2; a = 3; a", res: "3"}, + {src: "2 * 3 + 1 == 7", res: "true"}, + {src: "7 == 2 * 3 + 1", res: "true"}, + {src: "1 + 3 * 2 == 2 * 3 + 1", res: "true"}, + {src: "a := 1 + 3 * 2 == 2 * 3 + 1; a", res: "true"}, + {src: "-2", res: "-2"}, + {src: "-2 + 5", res: "3"}, + {src: "5 + -2", res: "3"}, + {src: "!false", res: "true"}, + {src: `a := "hello"`, res: "hello"}, + }) +} + +func TestLogical(t *testing.T) { + run(t, []etest{ + {src: "true && false", res: "false"}, + {src: "true && true", res: "true"}, + {src: "true && true && false", res: "false"}, + {src: "false || true && true", res: "true"}, + {src: "2 < 3 && 1 > 2 || 3 == 3", res: "true"}, + {src: "2 > 3 && 1 > 2 || 3 == 3", res: "true"}, + {src: "2 > 3 || 2 == 1+1 && 3>0", res: "true"}, + {src: "2 > 3 || 2 == 1+1 && 3>4 || 1<2", res: "true"}, + {src: "a := 1+1 < 3 && 4 == 2+2; a", res: "true"}, + {src: "a := 1+1 < 3 || 3 == 2+2; a", res: "true"}, + }) +} + +func TestFunc(t *testing.T) { + run(t, []etest{ + {src: "func f() int {return 2}; a := f(); a", res: "2"}, + {src: "func f() int {return 2}; f()", res: "2"}, + {src: "func f(a int) int {return a+2}; f(3)", res: "5"}, + {src: "func f(a int) int {if a < 4 {a = 5}; return a}; f(3)", res: "5"}, + {src: "func f(a int) int {return a+2}; 7 - f(3)", res: "2"}, + {src: "func f(a int) int {return a+2}; f(5) - f(3)", res: "2"}, + {src: "func f(a int) int {return a+2}; f(3) - 2", res: "3"}, + {src: "func f(a, b, c int) int {return a+b-c} ; f(7, 1, 3)", res: "5"}, + {src: "var a int; func f() {a = a+2}; f(); a", res: "2"}, + {src: "var f = func(a int) int {return a+3}; f(2)", res: "5"}, + }) +} + +func TestIf(t *testing.T) { + run(t, []etest{ + {src: "a := 0; if a == 0 { a = 2 } else { a = 1 }; a", res: "2"}, + {src: "a := 0; if a == 1 { a = 2 } else { a = 1 }; a", res: "1"}, + {src: "a := 0; if a == 1 { a = 2 } else if a == 0 { a = 3 } else { a = 1 }; a", res: "3"}, + {src: "a := 0; if a == 1 { a = 2 } else if a == 2 { a = 3 } else { a = 1 }; a", res: "1"}, + {src: "a := 1; if a > 0 && a < 2 { a = 3 }; a", res: "3"}, + {src: "a := 1; if a < 0 || a < 2 { a = 3 }; a", res: "3"}, + }) +} + +func TestFor(t *testing.T) { + run(t, []etest{ + {src: "a := 0; for i := 0; i < 3; i = i+1 {a = a+i}; a", res: "3"}, + {src: "func f() int {a := 0; for i := 0; i < 3; i = i+1 {a = a+i}; return a}; f()", res: "3"}, + {src: "a := 0; for {a = a+1; if a == 3 {break}}; a", res: "3"}, + {src: "func f() int {a := 0; for {a = a+1; if a == 3 {break}}; return a}; f()", res: "3"}, + {src: "func f() int {a := 0; for {a = a+1; if a < 3 {continue}; break}; return a}; f()", res: "3"}, + }) +} + +func TestGoto(t *testing.T) { + run(t, []etest{ + {src: ` +func f(a int) int { + a = a+1 + goto end + a = a+1 +end: + return a +} +f(3)`, res: "4"}, + }) +} + +func TestSwitch(t *testing.T) { + src0 := `func f(a int) int { + switch a { + default: a = 0 + case 1,2: a = a+1 + case 3: a = a+2; break; a = 3 + case 4: a = 10 + } + return a +} +` + src1 := `func f(a int) int { + switch { + case a < 3: return 2 + case a < 5: return 5 + default: a = 0 + } + return a +} +` + run(t, []etest{ + {src: src0 + "f(1)", res: "2"}, + {src: src0 + "f(2)", res: "3"}, + {src: src0 + "f(3)", res: "5"}, + {src: src0 + "f(4)", res: "10"}, + {src: src0 + "f(5)", res: "0"}, + + {src: src1 + "f(1)", res: "2"}, + {src: src1 + "f(4)", res: "5"}, + {src: src1 + "f(6)", res: "0"}, + }) +} + +func TestConst(t *testing.T) { + src0 := `const ( + a = iota + b + c +) +` + run(t, []etest{ + {src: "const a = 1+2; a", res: "3"}, + {src: "const a, b = 1, 2; a+b", res: "3"}, + {src: "const huge = 1 << 100; const four = huge >> 98; four", res: "4"}, + + {src: src0 + "c", res: "2"}, + }) +} + +func TestArray(t *testing.T) { + run(t, []etest{ + {src: "type T []int; var t T; t", res: "[]"}, + {src: "type T [3]int; var t T; t", res: "[0 0 0]"}, + {src: "type T [3]int; var t T; t[1] = 2; t", res: "[0 2 0]"}, + }) +} + +func TestPointer(t *testing.T) { + run(t, []etest{ + {src: "var a *int; a", res: ""}, + {src: "var a int; var b *int = &a; *b", res: "0"}, + {src: "var a int = 2; var b *int = &a; *b", res: "2"}, + }) +} + +func TestStruct(t *testing.T) { + run(t, []etest{ + {src: "type T struct {a string; b, c int}; var t T; t", res: "{ 0 0}"}, + {src: "type T struct {a int}; var t T; t.a", res: "0"}, + {src: "type T struct {a int}; var t T; t.a = 1; t.a", res: "1"}, + }) +} + +func TestType(t *testing.T) { + src0 := `type ( + I int + S string +) +` + run(t, []etest{ + {src: "type t int; var a t = 1; a", res: "1"}, + {src: "type t = int; var a t = 1; a", res: "1"}, + {src: src0 + `var s S = "xx"; s`, res: "xx"}, + }) +} + +func TestVar(t *testing.T) { + run(t, []etest{ + {src: "var a int; a", res: "0"}, + {src: "var a, b, c int; a", res: "0"}, + {src: "var a, b, c int; a + b", res: "0"}, + {src: "var a, b, c int; a + b + c", res: "0"}, + {src: "var a int = 2+1; a", res: "3"}, + {src: "var a, b int = 2, 5; a+b", res: "7"}, + {src: "var x = 5; x", res: "5"}, + {src: "var a = 1; func f() int { var a, b int = 3, 4; return a+b}; a+f()", res: "8"}, + {src: `var a = "hello"; a`, res: "hello"}, + {src: `var ( + a, b int = 4+1, 3 + c = 8 +); a+b+c`, res: "16"}, + }) +} + +func TestImport(t *testing.T) { + src0 := `import ( + "fmt" +) +` + run(t, []etest{ + {src: "fmt.Println(4)", err: "symbol not found: fmt"}, + {src: `import "xxx"`, err: "package not found: xxx"}, + {src: `import "fmt"; fmt.Println(4)`, res: ""}, + {src: src0 + "fmt.Println(4)", res: ""}, + {src: `func main() {import "fmt"; fmt.Println("hello")}`, err: "unexpected import"}, + {src: `import m "fmt"; m.Println(4)`, res: ""}, + {src: `import . "fmt"; Println(4)`, res: ""}, + }) +} + +func TestComposite(t *testing.T) { + run(t, []etest{ + {src: "type T struct{}; t := T{}; t", res: "{}"}, + {src: "t := struct{}{}; t", res: "{}"}, + // {src: `type T struct{N int; S string}; t := T{2, "foo"}`, res: `{2 foo}`}, + }) +} diff --git a/main.go b/main.go index 1293bd6..99652ad 100644 --- a/main.go +++ b/main.go @@ -11,8 +11,8 @@ import ( "reflect" "strings" + "github.com/mvertes/parscan/interpreter" "github.com/mvertes/parscan/lang/golang" - "github.com/mvertes/parscan/parser" "github.com/mvertes/parscan/scanner" ) @@ -76,7 +76,7 @@ func run(arg []string) (err error) { } args := rflag.Args() - interp := parser.NewInterpreter(scanner.NewScanner(golang.GoSpec)) + intpr := interpreter.NewInterpreter(scanner.NewScanner(golang.GoSpec)) var in io.Reader if str != "" { @@ -94,13 +94,13 @@ func run(arg []string) (err error) { } if isatty(in) { - return repl(interp, in) + return repl(intpr, in) } buf, err := io.ReadAll(in) if err != nil { return err } - _, err = interp.Eval(string(buf)) + _, err = intpr.Eval(string(buf)) return err } diff --git a/parser/compiler.go b/parser/compiler.go deleted file mode 100644 index 501fb25..0000000 --- a/parser/compiler.go +++ /dev/null @@ -1,523 +0,0 @@ -package parser - -import ( - "fmt" - "log" - "os" - "path" - "reflect" - "runtime" - "strconv" - - "github.com/mvertes/parscan/lang" - "github.com/mvertes/parscan/scanner" - "github.com/mvertes/parscan/vm" -) - -// Compiler represents the state of a compiler. -type Compiler struct { - *Parser - vm.Code // produced code, to fill VM with - Data []vm.Value // produced data, will be at the bottom of VM stack - Entry int // offset in Code to start execution from (skip function defintions) - - strings map[string]int // locations of strings in Data -} - -// NewCompiler returns a new compiler state for a given scanner. -func NewCompiler(scanner *scanner.Scanner) *Compiler { - return &Compiler{ - Parser: NewParser(scanner, true), - Entry: -1, - strings: map[string]int{}, - } -} - -// AddSym adds a new named value to the compiler symbol table, and returns its index in memory. -func (c *Compiler) AddSym(name string, value vm.Value) int { - p := len(c.Data) - c.Data = append(c.Data, value) - c.Parser.AddSym(p, name, value) - return p -} - -// Codegen generates vm code from parsed tokens. -func (c *Compiler) Codegen(tokens Tokens) (err error) { - log.Println("Codegen tokens:", tokens) - fixList := Tokens{} // list of tokens to fix after we gathered all necessary information - stack := []*symbol{} // for symbolic evaluation, type checking, etc - - emit := func(t scanner.Token, op vm.Op, arg ...int) { - _, file, line, _ := runtime.Caller(1) - fmt.Fprintf(os.Stderr, "%s:%d: %v emit %v %v\n", path.Base(file), line, t, op, arg) - c.Code = append(c.Code, vm.Instruction{Pos: vm.Pos(t.Pos), Op: op, Arg: arg}) - } - push := func(s *symbol) { stack = append(stack, s) } - pop := func() *symbol { l := len(stack) - 1; s := stack[l]; stack = stack[:l]; return s } - - for i, t := range tokens { - switch t.Tok { - case lang.Int: - n, err := strconv.Atoi(t.Str) - if err != nil { - return err - } - push(&symbol{kind: symConst, value: vm.ValueOf(n), typ: vm.TypeOf(0)}) - emit(t, vm.Push, n) - - case lang.String: - s := t.Block() - v := vm.Value{Data: reflect.ValueOf(s), Type: vm.TypeOf(s)} - i, ok := c.strings[s] - if !ok { - i = len(c.Data) - c.Data = append(c.Data, v) - c.strings[s] = i - } - push(&symbol{kind: symConst, value: v}) - emit(t, vm.Dup, i) - - case lang.Add: - push(&symbol{typ: arithmeticOpType(pop(), pop())}) - emit(t, vm.Add) - - case lang.Mul: - push(&symbol{typ: arithmeticOpType(pop(), pop())}) - emit(t, vm.Mul) - - case lang.Sub: - push(&symbol{typ: arithmeticOpType(pop(), pop())}) - emit(t, vm.Sub) - - case lang.Minus: - emit(t, vm.Push, 0) - emit(t, vm.Sub) - - case lang.Not: - emit(t, vm.Not) - - case lang.Plus: - // Unary '+' is idempotent. Nothing to do. - - case lang.Addr: - push(&symbol{typ: vm.PointerTo(pop().typ)}) - emit(t, vm.Addr) - - case lang.Deref: - push(&symbol{typ: pop().typ.Elem()}) - emit(t, vm.Deref) - - case lang.Index: - push(&symbol{typ: pop().typ.Elem()}) - emit(t, vm.Index) - - case lang.Greater: - push(&symbol{typ: booleanOpType(pop(), pop())}) - emit(t, vm.Greater) - - case lang.Less: - push(&symbol{typ: booleanOpType(pop(), pop())}) - emit(t, vm.Lower) - - case lang.Call: - s := pop() - if s.kind != symValue { - typ := s.typ - // TODO: pop input types (careful with variadic function). - for i := 0; i < typ.Rtype.NumOut(); i++ { - push(&symbol{typ: typ.Out(i)}) - } - emit(t, vm.Call) - break - } - push(s) - fallthrough // A symValue must be called through callX. - - case lang.CallX: - rtyp := pop().value.Data.Type() - // TODO: pop input types (careful with variadic function). - for i := 0; i < rtyp.NumOut(); i++ { - push(&symbol{typ: &vm.Type{Rtype: rtyp.Out(i)}}) - } - emit(t, vm.CallX, t.Beg) - - case lang.Composite: - log.Println("COMPOSITE") - /* - d := pop() - switch d.typ.Rtype.Kind() { - case reflect.Struct: - // nf := d.typ.Rtype.NumField() - // emit(t.Pos, vm.New, d.index, c.typeSym(d.typ).index) - emit(t, vm.Field, 0) - emit(t, vm.Vassign) - emit(t, vm.Fdup, 2) - emit(t, vm.Field, 1) - emit(t, vm.Vassign) - emit(t, vm.Pop, 1) - // emit(t, vm.Fdup, 2) - // Assume an element list with no keys, one per struct field in order - } - */ - - case lang.Grow: - emit(t, vm.Grow, t.Beg) - - case lang.Define: - // TODO: support assignment to local, composite objects. - st := tokens[i-1] - l := len(c.Data) - d := pop() - typ := d.typ - if typ == nil { - typ = d.value.Type - } - v := vm.NewValue(typ) - c.addSym(l, st.Str, v, symVar, typ, false) - c.Data = append(c.Data, v) - emit(t, vm.Assign, l) - - case lang.Assign: - st := tokens[i-1] - if st.Tok == lang.Period || st.Tok == lang.Index { - emit(t, vm.Vassign) - break - } - s, ok := c.symbols[st.Str] - if !ok { - return fmt.Errorf("symbol not found: %s", st.Str) - } - d := pop() - typ := d.typ - if typ == nil { - typ = d.value.Type - } - if s.typ == nil { - s.typ = typ - s.value = vm.NewValue(typ) - } - if s.local { - if !s.used { - emit(st, vm.New, s.index, c.typeSym(s.typ).index) - s.used = true - } - emit(st, vm.Fassign, s.index) - break - } - if s.index == unsetAddr { - s.index = len(c.Data) - c.Data = append(c.Data, s.value) - } - emit(st, vm.Assign, s.index) - - case lang.Equal: - push(&symbol{typ: booleanOpType(pop(), pop())}) - emit(t, vm.Equal) - - case lang.EqualSet: - push(&symbol{typ: booleanOpType(pop(), pop())}) - emit(t, vm.EqualSet) - - case lang.Ident: - if i < len(tokens)-1 { - switch t1 := tokens[i+1]; t1.Tok { - case lang.Define, lang.Assign, lang.Colon: - continue - } - } - s, ok := c.symbols[t.Str] - if !ok { - return fmt.Errorf("symbol not found: %s", t.Str) - } - push(s) - if s.kind == symPkg { - break - } - if s.local { - emit(t, vm.Fdup, s.index) - } else { - if s.index == unsetAddr { - s.index = len(c.Data) - c.Data = append(c.Data, s.value) - } - log.Println(t, ": emit(", t.Pos, vm.Dup, s.index, ")") - emit(t, vm.Dup, s.index) - } - - case lang.Label: - lc := len(c.Code) - s, ok := c.symbols[t.Str] - if ok { - s.value = vm.ValueOf(lc) - if s.kind == symFunc { - // label is a function entry point, register its code address in data. - s.index = len(c.Data) - c.Data = append(c.Data, s.value) - } else { - c.Data[s.index] = s.value - } - } else { - c.symbols[t.Str] = &symbol{kind: symLabel, value: vm.ValueOf(lc)} - } - - case lang.JumpFalse: - var i int - if s, ok := c.symbols[t.Str]; !ok { - // t.Beg contains the position in code which needs to be fixed. - t.Beg = len(c.Code) - fixList = append(fixList, t) - } else { - i = int(s.value.Data.Int()) - len(c.Code) - } - emit(t, vm.JumpFalse, i) - - case lang.JumpSetFalse: - var i int - if s, ok := c.symbols[t.Str]; !ok { - // t.Beg contains the position in code which needs to be fixed. - t.Beg = len(c.Code) - fixList = append(fixList, t) - } else { - i = int(s.value.Data.Int()) - len(c.Code) - } - emit(t, vm.JumpSetFalse, i) - - case lang.JumpSetTrue: - var i int - if s, ok := c.symbols[t.Str]; !ok { - // t.Beg contains the position in code which needs to be fixed. - t.Beg = len(c.Code) - fixList = append(fixList, t) - } else { - i = int(s.value.Data.Int()) - len(c.Code) - } - emit(t, vm.JumpSetTrue, i) - - case lang.Goto: - var i int - if s, ok := c.symbols[t.Str]; !ok { - t.Beg = len(c.Code) - fixList = append(fixList, t) - } else { - i = int(s.value.Data.Int()) - len(c.Code) - } - emit(t, vm.Jump, i) - - case lang.Period: - s := pop() - switch s.kind { - case symPkg: - p, ok := packages[s.pkgPath] - if !ok { - return fmt.Errorf("package not found: %s", s.pkgPath) - } - v, ok := p[t.Str[1:]] - if !ok { - return fmt.Errorf("symbol not found in package %s: %s", s.pkgPath, t.Str[1:]) - } - name := s.pkgPath + t.Str - var l int - sym, _, ok := c.getSym(name, "") - if ok { - l = sym.index - } else { - l = len(c.Data) - c.Data = append(c.Data, v) - c.addSym(l, name, v, symValue, v.Type, false) - sym = c.symbols[name] - } - push(sym) - emit(t, vm.Dup, l) - default: - if f, ok := s.typ.Rtype.FieldByName(t.Str[1:]); ok { - emit(t, vm.Field, f.Index...) - break - } - return fmt.Errorf("field or method not found: %s", t.Str[1:]) - } - - case lang.Return: - emit(t, vm.Return, t.Beg, t.End) - - default: - return fmt.Errorf("Codegen: unsupported token %v", t) - } - } - - // Finally we fix unresolved labels for jump destinations. - for _, t := range fixList { - s, ok := c.symbols[t.Str] - if !ok { - return fmt.Errorf("label not found: %q", t.Str) - } - c.Code[t.Beg].Arg[0] = int(s.value.Data.Int()) - t.Beg - - } - return err -} - -func arithmeticOpType(s1, _ *symbol) *vm.Type { return symtype(s1) } -func booleanOpType(_, _ *symbol) *vm.Type { return vm.TypeOf(true) } - -// PrintCode pretty prints the generated code in compiler. -func (c *Compiler) PrintCode() { - labels := map[int][]string{} // labels indexed by code location - data := map[int]string{} // data indexed by frame location - - for name, sym := range c.symbols { - if sym.kind == symLabel || sym.kind == symFunc { - i := int(sym.value.Data.Int()) - labels[i] = append(labels[i], name) - } - if sym.used { - data[sym.index] = name - } - } - - fmt.Fprintln(os.Stderr, "# Code:") - for i, l := range c.Code { - for _, label := range labels[i] { - fmt.Fprintln(os.Stderr, label+":") - } - extra := "" - switch l.Op { - case vm.Jump, vm.JumpFalse, vm.JumpTrue, vm.JumpSetFalse, vm.JumpSetTrue, vm.Calli: - if d, ok := labels[i+l.Arg[0]]; ok { - extra = "// " + d[0] - } - case vm.Dup, vm.Assign, vm.Fdup, vm.Fassign: - if d, ok := data[l.Arg[0]]; ok { - extra = "// " + d - } - } - fmt.Fprintf(os.Stderr, "%4d %-14v %v\n", i, l, extra) - } - - for _, label := range labels[len(c.Code)] { - fmt.Fprintln(os.Stderr, label+":") - } - fmt.Fprintln(os.Stderr, "# End code") -} - -type entry struct { - name string - *symbol -} - -func (e entry) String() string { - if e.symbol != nil { - return fmt.Sprintf("name: %s,local: %t, i: %d, k: %d, t: %s, v: %v", - e.name, - e.local, - e.index, - e.kind, - e.typ, - e.value, - ) - } - return e.name -} - -// PrintData pretty prints the generated global data symbols in compiler. -func (c *Compiler) PrintData() { - dict := c.symbolsByIndex() - - fmt.Fprintln(os.Stderr, "# Data:") - for i, d := range c.Data { - fmt.Fprintf(os.Stderr, "%4d %T %v %v\n", i, d.Data.Interface(), d.Data, dict[i]) - } -} - -func (c *Compiler) symbolsByIndex() map[int]entry { - dict := map[int]entry{} - for name, sym := range c.symbols { - if sym.index == unsetAddr { - continue - } - dict[sym.index] = entry{name, sym} - } - return dict -} - -// Dump represents the state of a data dump. -type Dump struct { - Values []*DumpValue -} - -// DumpValue is a value of a dump state. -type DumpValue struct { - Index int - Name string - Kind int - Type string - Value any -} - -// Dump creates a snapshot of the execution state of global variables. -// This method is specifically implemented in the Compiler to minimize the coupling between -// the dump format and other components. By situating the dump logic in the Compiler, -// it relies solely on the program being executed and the indexing algorithm used for ordering variables -// (currently, this is an integer that corresponds to the order of variables in the program). -// This design choice allows the Virtual Machine (VM) to evolve its memory management strategies -// without compromising backward compatibility with dumps generated by previous versions. -func (c *Compiler) Dump() *Dump { - dict := c.symbolsByIndex() - dv := make([]*DumpValue, len(c.Data)) - for i, d := range c.Data { - e := dict[i] - dv[i] = &DumpValue{ - Index: e.index, - Name: e.name, - Kind: int(e.kind), - Type: e.typ.Name, - Value: d.Data.Interface(), - } - } - return &Dump{Values: dv} -} - -// ApplyDump sets previously saved dump, restoring the state of global variables. -func (c *Compiler) ApplyDump(d *Dump) error { - dict := c.symbolsByIndex() - for _, dv := range d.Values { - // do all the checks to be sure we are applying the correct values - e, ok := dict[dv.Index] - if !ok { - return fmt.Errorf("entry not found on index %d", dv.Index) - } - - if dv.Name != e.name || - dv.Type != e.typ.Name || - dv.Kind != int(e.kind) { - return fmt.Errorf("entry with index %d does not match with provided entry. "+ - "dumpValue: %s, %s, %d. memoryValue: %s, %s, %d", - dv.Index, - dv.Name, dv.Type, dv.Kind, - e.name, e.typ, e.kind) - } - - if dv.Index >= len(c.Data) { - return fmt.Errorf("index (%d) bigger than memory (%d)", dv.Index, len(c.Data)) - } - - if !c.Data[dv.Index].Data.CanSet() { - return fmt.Errorf("value %v cannot be set", dv.Value) - } - - c.Data[dv.Index].Data.Set(reflect.ValueOf(dv.Value)) - } - return nil -} - -func (c *Compiler) typeSym(t *vm.Type) *symbol { - tsym, ok := c.symbols[t.Rtype.String()] - if !ok { - tsym = &symbol{index: unsetAddr, kind: symType, typ: t} - c.symbols[t.Rtype.String()] = tsym - } - if tsym.index == unsetAddr { - tsym.index = len(c.Data) - c.Data = append(c.Data, vm.NewValue(t)) - } - return tsym -} diff --git a/parser/decl.go b/parser/decl.go index 094fa9f..876b2ca 100644 --- a/parser/decl.go +++ b/parser/decl.go @@ -26,7 +26,7 @@ func (p *Parser) parseConst(in Tokens) (out Tokens, err error) { return out, err } var cnt int64 - p.symbols["iota"].cval = constant.Make(cnt) + p.Symbols["iota"].Cval = constant.Make(cnt) var prev Tokens for i, lt := range in.Split(lang.Semicolon) { if i > 0 && len(lt) == 1 { @@ -39,7 +39,7 @@ func (p *Parser) parseConst(in Tokens) (out Tokens, err error) { out = append(out, ot...) prev = lt[1:] cnt++ - p.symbols["iota"].cval = constant.Make(cnt) + p.Symbols["iota"].Cval = constant.Make(cnt) } return out, err } @@ -57,7 +57,7 @@ func (p *Parser) parseConstLine(in Tokens) (out Tokens, err error) { for _, lt := range decl.Split(lang.Comma) { vars = append(vars, lt[0].Str) name := strings.TrimPrefix(p.scope+"/"+lt[0].Str, "/") - p.addSym(unsetAddr, name, nilValue, symConst, nil, false) + p.AddSymbol(UnsetAddr, name, nilValue, SymConst, nil, false) } } else { return out, err @@ -76,13 +76,13 @@ func (p *Parser) parseConstLine(in Tokens) (out Tokens, err error) { return out, err } name := strings.TrimPrefix(p.scope+"/"+vars[i], "/") - p.symbols[name] = &symbol{ - kind: symConst, - index: unsetAddr, - cval: cval, - value: vm.ValueOf(constValue(cval)), - local: p.funcScope != "", - used: true, + p.Symbols[name] = &Symbol{ + Kind: SymConst, + Index: UnsetAddr, + Cval: cval, + Value: vm.ValueOf(constValue(cval)), + Local: p.funcScope != "", + Used: true, } // TODO: type conversion when applicable. } @@ -131,14 +131,14 @@ func (p *Parser) evalConstExpr(in Tokens) (cval constant.Value, length int, err case id.IsLiteral(): return constant.MakeFromLiteral(t.Str, gotok[id], 0), 1, err case id == lang.Ident: - s, _, ok := p.getSym(t.Str, p.scope) + s, _, ok := p.GetSym(t.Str, p.scope) if !ok { return nil, 0, errors.New("symbol not found") } - if s.kind != symConst { + if s.Kind != SymConst { return nil, 0, errors.New("symbol is not a constant") } - return s.cval, 1, err + return s.Cval, 1, err case id == lang.Call: // TODO: implement support for type conversions and builtin calls. panic("not implemented yet") @@ -223,7 +223,7 @@ func (p *Parser) parseImportLine(in Tokens) (out Tokens, err error) { return out, fmt.Errorf("invalid argument %v", in[0]) } pp := in[l-1].Block() - pkg, ok := packages[pp] + pkg, ok := Packages[pp] if !ok { // TODO: try to import source package from here. return out, fmt.Errorf("package not found: %s", pp) @@ -240,10 +240,10 @@ func (p *Parser) parseImportLine(in Tokens) (out Tokens, err error) { if n == "." { // Import package symbols in the current scope. for k, v := range pkg { - p.symbols[k] = &symbol{index: unsetAddr, pkgPath: pp, value: v} + p.Symbols[k] = &Symbol{Index: UnsetAddr, PkgPath: pp, Value: v} } } else { - p.symbols[n] = &symbol{kind: symPkg, pkgPath: pp, index: unsetAddr} + p.Symbols[n] = &Symbol{Kind: SymPkg, PkgPath: pp, Index: UnsetAddr} } return out, err } @@ -299,7 +299,7 @@ func (p *Parser) parseTypeLine(in Tokens) (out Tokens, err error) { return out, err } typ.Name = in[0].Str - p.addSym(unsetAddr, in[0].Str, vm.NewValue(typ), symType, typ, p.funcScope != "") + p.AddSymbol(UnsetAddr, in[0].Str, vm.NewValue(typ), SymType, typ, p.funcScope != "") return out, err } @@ -336,10 +336,10 @@ func (p *Parser) parseVarLine(in Tokens) (out Tokens, err error) { vars = append(vars, lt[0].Str) name := strings.TrimPrefix(p.scope+"/"+lt[0].Str, "/") if p.funcScope == "" { - p.addSym(unsetAddr, name, nilValue, symVar, nil, false) + p.AddSymbol(UnsetAddr, name, nilValue, SymVar, nil, false) continue } - p.addSym(p.framelen[p.funcScope], name, nilValue, symVar, nil, false) + p.AddSymbol(p.framelen[p.funcScope], name, nilValue, SymVar, nil, false) p.framelen[p.funcScope]++ } } else { diff --git a/parser/dump_test.go b/parser/dump_test.go deleted file mode 100644 index db86c48..0000000 --- a/parser/dump_test.go +++ /dev/null @@ -1,48 +0,0 @@ -package parser_test - -import ( - "testing" - - "github.com/mvertes/parscan/parser" -) - -func TestDump(t *testing.T) { - initProgram := "var a int = 2+1; a" - interp := parser.NewInterpreter(GoScanner) - r, e := interp.Eval(initProgram) - t.Log(r, e) - if e != nil { - t.Fatal(e) - } - - r, e = interp.Eval("a = 100") - t.Log(r, e) - if e != nil { - t.Fatal(e) - } - - d := interp.Dump() - t.Log(d) - - interp = parser.NewInterpreter(GoScanner) - r, e = interp.Eval(initProgram) - t.Log(r, e) - if e != nil { - t.Fatal(e) - } - - e = interp.ApplyDump(d) - if e != nil { - t.Fatal(e) - } - - r, e = interp.Eval("a = a + 1;a") - t.Log(r, e) - if e != nil { - t.Fatal(e) - } - - if r.Interface() != int(101) { - t.Fatalf("unexpected result: %v", r) - } -} diff --git a/parser/expr.go b/parser/expr.go index 23534e5..f2dbce3 100644 --- a/parser/expr.go +++ b/parser/expr.go @@ -40,7 +40,7 @@ func (p *Parser) parseExpr(in Tokens) (out Tokens, err error) { continue } // resolve symbol if not a selector rhs. - _, sc, ok := p.getSym(t.Str, p.scope) + _, sc, ok := p.GetSym(t.Str, p.scope) if ok { if sc != "" { t.Str = sc + "/" + t.Str @@ -98,7 +98,7 @@ func (p *Parser) parseExpr(in Tokens) (out Tokens, err error) { if err != nil { return out, ErrInvalidType } - p.addSym(unsetAddr, typ.String(), vm.NewValue(typ), symType, typ, p.funcScope != "") + p.AddSymbol(UnsetAddr, typ.String(), vm.NewValue(typ), SymType, typ, p.funcScope != "") out = append(out, t, scanner.Token{Tok: lang.Ident, Pos: t.Pos, Str: typ.String()}) i = ti vl += 2 diff --git a/parser/interpreter.go b/parser/interpreter.go index 6f354d9..06a4bc1 100644 --- a/parser/interpreter.go +++ b/parser/interpreter.go @@ -1,8 +1,9 @@ -package parser +package interpreter import ( "reflect" + "github.com/mvertes/parscan/compiler" "github.com/mvertes/parscan/scanner" "github.com/mvertes/parscan/vm" ) @@ -11,13 +12,13 @@ const debug = true // Interpreter represents the state of an interpreter. type Interpreter struct { - *Compiler + *compiler.Compiler *vm.Machine } // NewInterpreter returns a new interpreter state. func NewInterpreter(s *scanner.Scanner) *Interpreter { - return &Interpreter{NewCompiler(s), &vm.Machine{}} + return &Interpreter{compiler.NewCompiler(s), &vm.Machine{}} } // Eval interprets a src program and return the last produced value if any, or an error. @@ -39,8 +40,8 @@ func (i *Interpreter) Eval(src string) (res reflect.Value, err error) { } i.Push(i.Data[dataOffset:]...) i.PushCode(i.Code[codeOffset:]...) - if s, ok := i.symbols["main"]; ok { - i.PushCode(vm.Instruction{Op: vm.Calli, Arg: []int{int(i.Data[s.index].Data.Int())}}) + if s, ok := i.Symbols["main"]; ok { + i.PushCode(vm.Instruction{Op: vm.Calli, Arg: []int{int(i.Data[s.Index].Data.Int())}}) } i.PushCode(vm.Instruction{Op: vm.Exit}) i.SetIP(max(codeOffset, i.Entry)) diff --git a/parser/interpreter_test.go b/parser/interpreter_test.go deleted file mode 100644 index bbe0e2c..0000000 --- a/parser/interpreter_test.go +++ /dev/null @@ -1,267 +0,0 @@ -package parser_test - -import ( - "fmt" - "log" - "testing" - - "github.com/mvertes/parscan/lang/golang" - "github.com/mvertes/parscan/parser" - "github.com/mvertes/parscan/scanner" -) - -type etest struct { - src, res, err string - skip bool -} - -var GoScanner *scanner.Scanner - -func init() { - log.SetFlags(log.Lshortfile) - GoScanner = scanner.NewScanner(golang.GoSpec) -} - -func gen(test etest) func(*testing.T) { - return func(t *testing.T) { - if test.skip { - t.Skip() - } - interp := parser.NewInterpreter(GoScanner) - errStr := "" - r, e := interp.Eval(test.src) - t.Log(r, e) - if e != nil { - errStr = e.Error() - } - if errStr != test.err { - t.Errorf("got error %#v, want error %#v", errStr, test.err) - } - if res := fmt.Sprintf("%v", r); test.err == "" && res != test.res { - t.Errorf("got %#v, want %#v", res, test.res) - } - } -} - -func run(t *testing.T, tests []etest) { - for _, test := range tests { - test := test - t.Run("", gen(test)) - } -} - -func TestExpr(t *testing.T) { - run(t, []etest{ - {src: "", res: ""}, - {src: "1+2", res: "3"}, - {src: "1+", err: "block not terminated"}, - {src: "a := 1 + 2; b := 0; a + 1", res: "4"}, - {src: "1+(2+3)", res: "6"}, - {src: "(1+2)+3", res: "6"}, - {src: "(6+(1+2)+3)+5", res: "17"}, - {src: "(6+(1+2+3)+5", err: "1:1: block not terminated"}, - {src: "a := 2; a = 3; a", res: "3"}, - {src: "2 * 3 + 1 == 7", res: "true"}, - {src: "7 == 2 * 3 + 1", res: "true"}, - {src: "1 + 3 * 2 == 2 * 3 + 1", res: "true"}, - {src: "a := 1 + 3 * 2 == 2 * 3 + 1; a", res: "true"}, - {src: "-2", res: "-2"}, - {src: "-2 + 5", res: "3"}, - {src: "5 + -2", res: "3"}, - {src: "!false", res: "true"}, - {src: `a := "hello"`, res: "hello"}, - }) -} - -func TestLogical(t *testing.T) { - run(t, []etest{ - {src: "true && false", res: "false"}, - {src: "true && true", res: "true"}, - {src: "true && true && false", res: "false"}, - {src: "false || true && true", res: "true"}, - {src: "2 < 3 && 1 > 2 || 3 == 3", res: "true"}, - {src: "2 > 3 && 1 > 2 || 3 == 3", res: "true"}, - {src: "2 > 3 || 2 == 1+1 && 3>0", res: "true"}, - {src: "2 > 3 || 2 == 1+1 && 3>4 || 1<2", res: "true"}, - {src: "a := 1+1 < 3 && 4 == 2+2; a", res: "true"}, - {src: "a := 1+1 < 3 || 3 == 2+2; a", res: "true"}, - }) -} - -func TestFunc(t *testing.T) { - run(t, []etest{ - {src: "func f() int {return 2}; a := f(); a", res: "2"}, - {src: "func f() int {return 2}; f()", res: "2"}, - {src: "func f(a int) int {return a+2}; f(3)", res: "5"}, - {src: "func f(a int) int {if a < 4 {a = 5}; return a}; f(3)", res: "5"}, - {src: "func f(a int) int {return a+2}; 7 - f(3)", res: "2"}, - {src: "func f(a int) int {return a+2}; f(5) - f(3)", res: "2"}, - {src: "func f(a int) int {return a+2}; f(3) - 2", res: "3"}, - {src: "func f(a, b, c int) int {return a+b-c} ; f(7, 1, 3)", res: "5"}, - {src: "var a int; func f() {a = a+2}; f(); a", res: "2"}, - {src: "var f = func(a int) int {return a+3}; f(2)", res: "5"}, - }) -} - -func TestIf(t *testing.T) { - run(t, []etest{ - {src: "a := 0; if a == 0 { a = 2 } else { a = 1 }; a", res: "2"}, - {src: "a := 0; if a == 1 { a = 2 } else { a = 1 }; a", res: "1"}, - {src: "a := 0; if a == 1 { a = 2 } else if a == 0 { a = 3 } else { a = 1 }; a", res: "3"}, - {src: "a := 0; if a == 1 { a = 2 } else if a == 2 { a = 3 } else { a = 1 }; a", res: "1"}, - {src: "a := 1; if a > 0 && a < 2 { a = 3 }; a", res: "3"}, - {src: "a := 1; if a < 0 || a < 2 { a = 3 }; a", res: "3"}, - }) -} - -func TestFor(t *testing.T) { - run(t, []etest{ - {src: "a := 0; for i := 0; i < 3; i = i+1 {a = a+i}; a", res: "3"}, - {src: "func f() int {a := 0; for i := 0; i < 3; i = i+1 {a = a+i}; return a}; f()", res: "3"}, - {src: "a := 0; for {a = a+1; if a == 3 {break}}; a", res: "3"}, - {src: "func f() int {a := 0; for {a = a+1; if a == 3 {break}}; return a}; f()", res: "3"}, - {src: "func f() int {a := 0; for {a = a+1; if a < 3 {continue}; break}; return a}; f()", res: "3"}, - }) -} - -func TestGoto(t *testing.T) { - run(t, []etest{ - {src: ` -func f(a int) int { - a = a+1 - goto end - a = a+1 -end: - return a -} -f(3)`, res: "4"}, - }) -} - -func TestSwitch(t *testing.T) { - src0 := `func f(a int) int { - switch a { - default: a = 0 - case 1,2: a = a+1 - case 3: a = a+2; break; a = 3 - case 4: a = 10 - } - return a -} -` - src1 := `func f(a int) int { - switch { - case a < 3: return 2 - case a < 5: return 5 - default: a = 0 - } - return a -} -` - run(t, []etest{ - {src: src0 + "f(1)", res: "2"}, - {src: src0 + "f(2)", res: "3"}, - {src: src0 + "f(3)", res: "5"}, - {src: src0 + "f(4)", res: "10"}, - {src: src0 + "f(5)", res: "0"}, - - {src: src1 + "f(1)", res: "2"}, - {src: src1 + "f(4)", res: "5"}, - {src: src1 + "f(6)", res: "0"}, - }) -} - -func TestConst(t *testing.T) { - src0 := `const ( - a = iota - b - c -) -` - run(t, []etest{ - {src: "const a = 1+2; a", res: "3"}, - {src: "const a, b = 1, 2; a+b", res: "3"}, - {src: "const huge = 1 << 100; const four = huge >> 98; four", res: "4"}, - - {src: src0 + "c", res: "2"}, - }) -} - -func TestArray(t *testing.T) { - run(t, []etest{ - {src: "type T []int; var t T; t", res: "[]"}, - {src: "type T [3]int; var t T; t", res: "[0 0 0]"}, - {src: "type T [3]int; var t T; t[1] = 2; t", res: "[0 2 0]"}, - }) -} - -func TestPointer(t *testing.T) { - run(t, []etest{ - {src: "var a *int; a", res: ""}, - {src: "var a int; var b *int = &a; *b", res: "0"}, - {src: "var a int = 2; var b *int = &a; *b", res: "2"}, - }) -} - -func TestStruct(t *testing.T) { - run(t, []etest{ - {src: "type T struct {a string; b, c int}; var t T; t", res: "{ 0 0}"}, - {src: "type T struct {a int}; var t T; t.a", res: "0"}, - {src: "type T struct {a int}; var t T; t.a = 1; t.a", res: "1"}, - }) -} - -func TestType(t *testing.T) { - src0 := `type ( - I int - S string -) -` - run(t, []etest{ - {src: "type t int; var a t = 1; a", res: "1"}, - {src: "type t = int; var a t = 1; a", res: "1"}, - {src: src0 + `var s S = "xx"; s`, res: "xx"}, - }) -} - -func TestVar(t *testing.T) { - run(t, []etest{ - {src: "var a int; a", res: "0"}, - {src: "var a, b, c int; a", res: "0"}, - {src: "var a, b, c int; a + b", res: "0"}, - {src: "var a, b, c int; a + b + c", res: "0"}, - {src: "var a int = 2+1; a", res: "3"}, - {src: "var a, b int = 2, 5; a+b", res: "7"}, - {src: "var x = 5; x", res: "5"}, - {src: "var a = 1; func f() int { var a, b int = 3, 4; return a+b}; a+f()", res: "8"}, - {src: `var a = "hello"; a`, res: "hello"}, - {src: `var ( - a, b int = 4+1, 3 - c = 8 -); a+b+c`, res: "16"}, - }) -} - -func TestImport(t *testing.T) { - src0 := `import ( - "fmt" -) -` - run(t, []etest{ - {src: "fmt.Println(4)", err: "symbol not found: fmt"}, - {src: `import "xxx"`, err: "package not found: xxx"}, - {src: `import "fmt"; fmt.Println(4)`, res: ""}, - {src: src0 + "fmt.Println(4)", res: ""}, - {src: `func main() {import "fmt"; fmt.Println("hello")}`, err: "unexpected import"}, - {src: `import m "fmt"; m.Println(4)`, res: ""}, - {src: `import . "fmt"; Println(4)`, res: ""}, - }) -} - -func TestComposite(t *testing.T) { - run(t, []etest{ - {src: "type T struct{}; t := T{}; t", res: "{}"}, - {src: "t := struct{}{}; t", res: "{}"}, - // {src: `type T struct{N int; S string}; t := T{2, "foo"}`, res: `{2 foo}`}, - }) -} diff --git a/parser/package.go b/parser/package.go index f03c59f..67e2867 100644 --- a/parser/package.go +++ b/parser/package.go @@ -6,7 +6,7 @@ import ( "github.com/mvertes/parscan/vm" ) -var packages = map[string]map[string]vm.Value{ +var Packages = map[string]map[string]vm.Value{ "fmt": fmtPkg, } diff --git a/parser/parse.go b/parser/parse.go index 266ce8b..3a7ce5e 100644 --- a/parser/parse.go +++ b/parser/parse.go @@ -16,8 +16,8 @@ import ( type Parser struct { *scanner.Scanner - symbols map[string]*symbol - function *symbol + Symbols map[string]*Symbol + function *Symbol scope string fname string pkgName string // current package name @@ -45,7 +45,7 @@ func NewParser(scanner *scanner.Scanner, noPkg bool) *Parser { return &Parser{ Scanner: scanner, noPkg: noPkg, - symbols: initUniverse(), + Symbols: initUniverse(), framelen: map[string]int{}, labelCount: map[string]int{}, } @@ -255,10 +255,10 @@ func (p *Parser) parseFunc(in Tokens) (out Tokens, err error) { p.fname = fname ofunc := p.function funcScope := p.funcScope - s, _, ok := p.getSym(fname, p.scope) + s, _, ok := p.GetSym(fname, p.scope) if !ok { - s = &symbol{used: true} - p.symbols[p.scope+fname] = s + s = &Symbol{Used: true} + p.Symbols[p.scope+fname] = s } p.pushScope(fname) p.funcScope = p.scope @@ -282,8 +282,8 @@ func (p *Parser) parseFunc(in Tokens) (out Tokens, err error) { if err != nil { return out, err } - s.kind = symFunc - s.typ = typ + s.Kind = SymFunc + s.Type = typ p.function = s toks, err := p.Parse(in[len(in)-1].Block()) @@ -479,8 +479,8 @@ func (p *Parser) parseReturn(in Tokens) (out Tokens, err error) { // TODO: the function symbol should be already present in the parser context. // otherwise no way to handle anonymous func. s := p.function - in[0].Beg = s.typ.Rtype.NumOut() - in[0].End = s.typ.Rtype.NumIn() + in[0].Beg = s.Type.Rtype.NumOut() + in[0].End = s.Type.Rtype.NumIn() out = append(out, in[0]) return out, err } diff --git a/parser/symbol.go b/parser/symbol.go index 606752c..3aba4e5 100644 --- a/parser/symbol.go +++ b/parser/symbol.go @@ -8,54 +8,54 @@ import ( "github.com/mvertes/parscan/vm" ) -type symKind int +type SymKind int const ( - symValue symKind = iota // a Go value defined in the runtime - symType // a Go type - symLabel // a label indication a position in the VM code - symConst // a Go constant - symVar // a Go variable, located in the VM memory - symFunc // a Go function, located in the VM code - symPkg // a Go package + SymValue SymKind = iota // a value defined in the runtime + SymType // a type + SymLabel // a label indication a position in the VM code + SymConst // a constant + SymVar // a variable, located in the VM memory + SymFunc // a function, located in the VM code + SymPkg // a package ) -//go:generate stringer -type=symKind +//go:generate stringer -type=SymKind -const unsetAddr = -65535 +const UnsetAddr = -65535 -type symbol struct { - kind symKind - index int // address of symbol in frame - pkgPath string // - typ *vm.Type // - value vm.Value // - cval constant.Value // - local bool // if true address is relative to local frame, otherwise global - used bool // +type Symbol struct { + Kind SymKind + Index int // address of symbol in frame + PkgPath string // + Type *vm.Type // + Value vm.Value // + Cval constant.Value // + Local bool // if true address is relative to local frame, otherwise global + Used bool // } -func symtype(s *symbol) *vm.Type { - if s.typ != nil { - return s.typ +func SymbolType(s *Symbol) *vm.Type { + if s.Type != nil { + return s.Type } - return vm.TypeOf(s.value) + return vm.TypeOf(s.Value) } // AddSym add a new named value at memory position i in the parser symbol table. -func (p *Parser) AddSym(i int, name string, v vm.Value) { - p.addSym(i, name, v, symValue, nil, false) -} +// func (p *Parser) AddSym(i int, name string, v vm.Value) { +// p.addSym(i, name, v, SymValue, nil, false) +// } -func (p *Parser) addSym(i int, name string, v vm.Value, k symKind, t *vm.Type, local bool) { +func (p *Parser) AddSymbol(i int, name string, v vm.Value, k SymKind, t *vm.Type, local bool) { name = strings.TrimPrefix(name, "/") - p.symbols[name] = &symbol{kind: k, index: i, local: local, value: v, typ: t} + p.Symbols[name] = &Symbol{Kind: k, Index: i, Local: local, Value: v, Type: t} } -// getSym searches for an existing symbol starting from the deepest scope. -func (p *Parser) getSym(name, scope string) (sym *symbol, sc string, ok bool) { +// GetSym searches for an existing symbol starting from the deepest scope. +func (p *Parser) GetSym(name, scope string) (sym *Symbol, sc string, ok bool) { for { - if sym, ok = p.symbols[scope+"/"+name]; ok { + if sym, ok = p.Symbols[scope+"/"+name]; ok { return sym, scope, ok } i := strings.LastIndex(scope, "/") @@ -66,23 +66,23 @@ func (p *Parser) getSym(name, scope string) (sym *symbol, sc string, ok bool) { break } } - sym, ok = p.symbols[name] + sym, ok = p.Symbols[name] return sym, scope, ok } -func initUniverse() map[string]*symbol { - return map[string]*symbol{ - "any": {kind: symType, index: unsetAddr, typ: vm.TypeOf((*any)(nil)).Elem()}, - "bool": {kind: symType, index: unsetAddr, typ: vm.TypeOf((*bool)(nil)).Elem()}, - "error": {kind: symType, index: unsetAddr, typ: vm.TypeOf((*error)(nil)).Elem()}, - "int": {kind: symType, index: unsetAddr, typ: vm.TypeOf((*int)(nil)).Elem()}, - "string": {kind: symType, index: unsetAddr, typ: vm.TypeOf((*string)(nil)).Elem()}, +func initUniverse() map[string]*Symbol { + return map[string]*Symbol{ + "any": {Kind: SymType, Index: UnsetAddr, Type: vm.TypeOf((*any)(nil)).Elem()}, + "bool": {Kind: SymType, Index: UnsetAddr, Type: vm.TypeOf((*bool)(nil)).Elem()}, + "error": {Kind: SymType, Index: UnsetAddr, Type: vm.TypeOf((*error)(nil)).Elem()}, + "int": {Kind: SymType, Index: UnsetAddr, Type: vm.TypeOf((*int)(nil)).Elem()}, + "string": {Kind: SymType, Index: UnsetAddr, Type: vm.TypeOf((*string)(nil)).Elem()}, - "nil": {index: unsetAddr}, - "iota": {kind: symConst, index: unsetAddr}, - "true": {index: unsetAddr, value: vm.ValueOf(true), typ: vm.TypeOf(true)}, - "false": {index: unsetAddr, value: vm.ValueOf(false), typ: vm.TypeOf(false)}, + "nil": {Index: UnsetAddr}, + "iota": {Kind: SymConst, Index: UnsetAddr}, + "true": {Index: UnsetAddr, Value: vm.ValueOf(true), Type: vm.TypeOf(true)}, + "false": {Index: UnsetAddr, Value: vm.ValueOf(false), Type: vm.TypeOf(false)}, - "println": {index: unsetAddr, value: vm.ValueOf(func(v ...any) { fmt.Println(v...) })}, + "println": {Index: UnsetAddr, Value: vm.ValueOf(func(v ...any) { fmt.Println(v...) })}, } } diff --git a/parser/symkind_string.go b/parser/symkind_string.go index a22c994..dc6a33d 100644 --- a/parser/symkind_string.go +++ b/parser/symkind_string.go @@ -1,4 +1,4 @@ -// Code generated by "stringer -type=symKind"; DO NOT EDIT. +// Code generated by "stringer -type=SymKind"; DO NOT EDIT. package parser @@ -8,23 +8,23 @@ func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} - _ = x[symValue-0] - _ = x[symType-1] - _ = x[symLabel-2] - _ = x[symConst-3] - _ = x[symVar-4] - _ = x[symFunc-5] - _ = x[symPkg-6] + _ = x[SymValue-0] + _ = x[SymType-1] + _ = x[SymLabel-2] + _ = x[SymConst-3] + _ = x[SymVar-4] + _ = x[SymFunc-5] + _ = x[SymPkg-6] } -const _symKind_name = "symValuesymTypesymLabelsymConstsymVarsymFuncsymPkg" +const _SymKind_name = "SymValueSymTypeSymLabelSymConstSymVarSymFuncSymPkg" -var _symKind_index = [...]uint8{0, 8, 15, 23, 31, 37, 44, 50} +var _SymKind_index = [...]uint8{0, 8, 15, 23, 31, 37, 44, 50} -func (i symKind) String() string { +func (i SymKind) String() string { idx := int(i) - 0 - if i < 0 || idx >= len(_symKind_index)-1 { - return "symKind(" + strconv.FormatInt(int64(i), 10) + ")" + if i < 0 || idx >= len(_SymKind_index)-1 { + return "SymKind(" + strconv.FormatInt(int64(i), 10) + ")" } - return _symKind_name[_symKind_index[idx]:_symKind_index[idx+1]] + return _SymKind_name[_SymKind_index[idx]:_SymKind_index[idx+1]] } diff --git a/parser/type.go b/parser/type.go index b1b25dc..6f75f6f 100644 --- a/parser/type.go +++ b/parser/type.go @@ -100,11 +100,11 @@ func (p *Parser) parseTypeExpr(in Tokens) (typ *vm.Type, err error) { case lang.Ident: // TODO: selector expression (pkg.type) - s, _, ok := p.getSym(in[0].Str, p.scope) - if !ok || s.kind != symType { + s, _, ok := p.GetSym(in[0].Str, p.scope) + if !ok || s.Kind != SymType { return nil, fmt.Errorf("%w: %s", ErrInvalidType, in[0].Str) } - return s.typ, nil + return s.Type, nil case lang.Struct: if len(in) != 2 || in[1].Tok != lang.BraceBlock { @@ -174,16 +174,16 @@ func (p *Parser) addSymVar(index int, name string, typ *vm.Type, flag typeFlag, zv := vm.NewValue(typ) switch flag { case parseTypeIn: - p.addSym(-index-2, name, zv, symVar, typ, true) + p.AddSymbol(-index-2, name, zv, SymVar, typ, true) case parseTypeOut: - p.addSym(p.framelen[p.funcScope], name, zv, symVar, typ, true) + p.AddSymbol(p.framelen[p.funcScope], name, zv, SymVar, typ, true) p.framelen[p.funcScope]++ case parseTypeVar: if !local { - p.addSym(unsetAddr, name, zv, symVar, typ, local) + p.AddSymbol(UnsetAddr, name, zv, SymVar, typ, local) break } - p.addSym(p.framelen[p.funcScope], name, zv, symVar, typ, local) + p.AddSymbol(p.framelen[p.funcScope], name, zv, SymVar, typ, local) p.framelen[p.funcScope]++ } } @@ -193,8 +193,8 @@ func (p *Parser) hasFirstParam(in Tokens) bool { if in[0].Tok != lang.Ident { return false } - s, _, ok := p.getSym(in[0].Str, p.scope) - return !ok || s.kind != symType + s, _, ok := p.GetSym(in[0].Str, p.scope) + return !ok || s.Kind != SymType } // typeStartIndex returns the index of the start of type expression in tokens, or -1. -- cgit v1.2.3