summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorMarc Vertes <mvertes@free.fr>2025-11-29 19:46:34 +0100
committerMarc Vertes <mvertes@free.fr>2025-11-29 19:46:34 +0100
commitf40a1c23467eef36f53635e525f8b25f591e8a45 (patch)
treebeae2442ecee1d734b9a676f7b176376a1d80af5 /compiler
parentfbc73922e9853d7e344e388f3fdfedb8fa323682 (diff)
chore: shorter name for packages, simpilfy vm values
Diffstat (limited to 'compiler')
-rw-r--r--compiler/compiler.go522
1 files changed, 0 insertions, 522 deletions
diff --git a/compiler/compiler.go b/compiler/compiler.go
deleted file mode 100644
index 679a886..0000000
--- a/compiler/compiler.go
+++ /dev/null
@@ -1,522 +0,0 @@
-// Package compiler implements a compiler targeting the vm.
-package compiler
-
-import (
- "fmt"
- "log"
- "os"
- "path"
- "reflect"
- "runtime"
- "strconv"
-
- "github.com/mvertes/parscan/lang"
- "github.com/mvertes/parscan/parser"
- "github.com/mvertes/parscan/scanner"
- "github.com/mvertes/parscan/vm"
-)
-
-// Compiler represents the state of a compiler.
-type Compiler struct {
- *parser.Parser
- vm.Code // produced code, to fill VM with
- Data []vm.Value // produced data, will be at the bottom of VM stack
- Entry int // offset in Code to start execution from (skip function defintions)
-
- strings map[string]int // locations of strings in Data
-}
-
-// NewCompiler returns a new compiler state for a given scanner.
-func NewCompiler(spec *lang.Spec) *Compiler {
- return &Compiler{
- Parser: parser.NewParser(spec, true),
- Entry: -1,
- strings: map[string]int{},
- }
-}
-
-// AddSym adds a new named value to the compiler symbol table, and returns its index in memory.
-func (c *Compiler) AddSym(name string, value vm.Value) int {
- p := len(c.Data)
- c.Data = append(c.Data, value)
- c.AddSymbol(p, name, value, parser.SymValue, nil, false)
- return p
-}
-
-// Codegen generates vm code from parsed tokens.
-func (c *Compiler) Codegen(tokens parser.Tokens) (err error) {
- log.Println("Codegen tokens:", tokens)
- fixList := parser.Tokens{} // list of tokens to fix after all necessary information is gathered
- stack := []*parser.Symbol{} // for symbolic evaluation, type checking, etc
-
- emit := func(t scanner.Token, op vm.Op, arg ...int) {
- _, file, line, _ := runtime.Caller(1)
- fmt.Fprintf(os.Stderr, "%s:%d: %v emit %v %v\n", path.Base(file), line, t, op, arg)
- c.Code = append(c.Code, vm.Instruction{Pos: vm.Pos(t.Pos), Op: op, Arg: arg})
- }
- push := func(s *parser.Symbol) { stack = append(stack, s) }
- pop := func() *parser.Symbol { l := len(stack) - 1; s := stack[l]; stack = stack[:l]; return s }
-
- for i, t := range tokens {
- switch t.Tok {
- case lang.Int:
- n, err := strconv.Atoi(t.Str)
- if err != nil {
- return err
- }
- push(&parser.Symbol{Kind: parser.SymConst, Value: vm.ValueOf(n), Type: vm.TypeOf(0)})
- emit(t, vm.Push, n)
-
- case lang.String:
- s := t.Block()
- v := vm.Value{Data: reflect.ValueOf(s), Type: vm.TypeOf(s)}
- i, ok := c.strings[s]
- if !ok {
- i = len(c.Data)
- c.Data = append(c.Data, v)
- c.strings[s] = i
- }
- push(&parser.Symbol{Kind: parser.SymConst, Value: v})
- emit(t, vm.Dup, i)
-
- case lang.Add:
- push(&parser.Symbol{Type: arithmeticOpType(pop(), pop())})
- emit(t, vm.Add)
-
- case lang.Mul:
- push(&parser.Symbol{Type: arithmeticOpType(pop(), pop())})
- emit(t, vm.Mul)
-
- case lang.Sub:
- push(&parser.Symbol{Type: arithmeticOpType(pop(), pop())})
- emit(t, vm.Sub)
-
- case lang.Minus:
- emit(t, vm.Push, 0)
- emit(t, vm.Sub)
-
- case lang.Not:
- emit(t, vm.Not)
-
- case lang.Plus:
- // Unary '+' is idempotent. Nothing to do.
-
- case lang.Addr:
- push(&parser.Symbol{Type: vm.PointerTo(pop().Type)})
- emit(t, vm.Addr)
-
- case lang.Deref:
- push(&parser.Symbol{Type: pop().Type.Elem()})
- emit(t, vm.Deref)
-
- case lang.Index:
- push(&parser.Symbol{Type: pop().Type.Elem()})
- emit(t, vm.Index)
-
- case lang.Greater:
- push(&parser.Symbol{Type: booleanOpType(pop(), pop())})
- emit(t, vm.Greater)
-
- case lang.Less:
- push(&parser.Symbol{Type: booleanOpType(pop(), pop())})
- emit(t, vm.Lower)
-
- case lang.Call:
- s := pop()
- if s.Kind != parser.SymValue {
- typ := s.Type
- // TODO: pop input types (careful with variadic function).
- for i := 0; i < typ.Rtype.NumOut(); i++ {
- push(&parser.Symbol{Type: typ.Out(i)})
- }
- emit(t, vm.Call)
- break
- }
- push(s)
- fallthrough // A symValue must be called through callX.
-
- case lang.CallX:
- rtyp := pop().Value.Data.Type()
- // TODO: pop input types (careful with variadic function).
- for i := 0; i < rtyp.NumOut(); i++ {
- push(&parser.Symbol{Type: &vm.Type{Rtype: rtyp.Out(i)}})
- }
- emit(t, vm.CallX, t.Beg)
-
- case lang.Composite:
- log.Println("COMPOSITE")
- /*
- d := pop()
- switch d.typ.Rtype.Kind() {
- case reflect.Struct:
- // nf := d.typ.Rtype.NumField()
- // emit(t.Pos, vm.New, d.index, c.typeSym(d.typ).index)
- emit(t, vm.Field, 0)
- emit(t, vm.Vassign)
- emit(t, vm.Fdup, 2)
- emit(t, vm.Field, 1)
- emit(t, vm.Vassign)
- emit(t, vm.Pop, 1)
- // emit(t, vm.Fdup, 2)
- // Assume an element list with no keys, one per struct field in order
- }
- */
-
- case lang.Grow:
- emit(t, vm.Grow, t.Beg)
-
- case lang.Define:
- // TODO: support assignment to local, composite objects.
- st := tokens[i-1]
- l := len(c.Data)
- d := pop()
- typ := d.Type
- if typ == nil {
- typ = d.Value.Type
- }
- v := vm.NewValue(typ)
- c.AddSymbol(l, st.Str, v, parser.SymVar, typ, false)
- c.Data = append(c.Data, v)
- emit(t, vm.Assign, l)
-
- case lang.Assign:
- st := tokens[i-1]
- if st.Tok == lang.Period || st.Tok == lang.Index {
- emit(t, vm.Vassign)
- break
- }
- s, ok := c.Symbols[st.Str]
- if !ok {
- return fmt.Errorf("symbol not found: %s", st.Str)
- }
- d := pop()
- typ := d.Type
- if typ == nil {
- typ = d.Value.Type
- }
- if s.Type == nil {
- s.Type = typ
- s.Value = vm.NewValue(typ)
- }
- if s.Local {
- if !s.Used {
- emit(st, vm.New, s.Index, c.typeSym(s.Type).Index)
- s.Used = true
- }
- emit(st, vm.Fassign, s.Index)
- break
- }
- if s.Index == parser.UnsetAddr {
- s.Index = len(c.Data)
- c.Data = append(c.Data, s.Value)
- }
- emit(st, vm.Assign, s.Index)
-
- case lang.Equal:
- push(&parser.Symbol{Type: booleanOpType(pop(), pop())})
- emit(t, vm.Equal)
-
- case lang.EqualSet:
- push(&parser.Symbol{Type: booleanOpType(pop(), pop())})
- emit(t, vm.EqualSet)
-
- case lang.Ident:
- if i < len(tokens)-1 {
- switch t1 := tokens[i+1]; t1.Tok {
- case lang.Define, lang.Assign, lang.Colon:
- continue
- }
- }
- s, ok := c.Symbols[t.Str]
- if !ok {
- return fmt.Errorf("symbol not found: %s", t.Str)
- }
- push(s)
- if s.Kind == parser.SymPkg {
- break
- }
- if s.Local {
- emit(t, vm.Fdup, s.Index)
- } else {
- if s.Index == parser.UnsetAddr {
- s.Index = len(c.Data)
- c.Data = append(c.Data, s.Value)
- }
- emit(t, vm.Dup, s.Index)
- }
-
- case lang.Label:
- lc := len(c.Code)
- s, ok := c.Symbols[t.Str]
- if ok {
- s.Value = vm.ValueOf(lc)
- if s.Kind == parser.SymFunc {
- // label is a function entry point, register its code address in data.
- s.Index = len(c.Data)
- c.Data = append(c.Data, s.Value)
- } else {
- c.Data[s.Index] = s.Value
- }
- } else {
- c.Symbols[t.Str] = &parser.Symbol{Kind: parser.SymLabel, Value: vm.ValueOf(lc)}
- }
-
- case lang.JumpFalse:
- var i int
- if s, ok := c.Symbols[t.Str]; !ok {
- // t.Beg contains the position in code which needs to be fixed.
- t.Beg = len(c.Code)
- fixList = append(fixList, t)
- } else {
- i = int(s.Value.Data.Int()) - len(c.Code)
- }
- emit(t, vm.JumpFalse, i)
-
- case lang.JumpSetFalse:
- var i int
- if s, ok := c.Symbols[t.Str]; !ok {
- // t.Beg contains the position in code which needs to be fixed.
- t.Beg = len(c.Code)
- fixList = append(fixList, t)
- } else {
- i = int(s.Value.Data.Int()) - len(c.Code)
- }
- emit(t, vm.JumpSetFalse, i)
-
- case lang.JumpSetTrue:
- var i int
- if s, ok := c.Symbols[t.Str]; !ok {
- // t.Beg contains the position in code which needs to be fixed.
- t.Beg = len(c.Code)
- fixList = append(fixList, t)
- } else {
- i = int(s.Value.Data.Int()) - len(c.Code)
- }
- emit(t, vm.JumpSetTrue, i)
-
- case lang.Goto:
- var i int
- if s, ok := c.Symbols[t.Str]; !ok {
- t.Beg = len(c.Code)
- fixList = append(fixList, t)
- } else {
- i = int(s.Value.Data.Int()) - len(c.Code)
- }
- emit(t, vm.Jump, i)
-
- case lang.Period:
- s := pop()
- switch s.Kind {
- case parser.SymPkg:
- p, ok := parser.Packages[s.PkgPath]
- if !ok {
- return fmt.Errorf("package not found: %s", s.PkgPath)
- }
- v, ok := p[t.Str[1:]]
- if !ok {
- return fmt.Errorf("symbol not found in package %s: %s", s.PkgPath, t.Str[1:])
- }
- name := s.PkgPath + t.Str
- var l int
- sym, _, ok := c.GetSym(name, "")
- if ok {
- l = sym.Index
- } else {
- l = len(c.Data)
- c.Data = append(c.Data, v)
- c.AddSymbol(l, name, v, parser.SymValue, v.Type, false)
- sym = c.Symbols[name]
- }
- push(sym)
- emit(t, vm.Dup, l)
- default:
- if f, ok := s.Type.Rtype.FieldByName(t.Str[1:]); ok {
- emit(t, vm.Field, f.Index...)
- break
- }
- return fmt.Errorf("field or method not found: %s", t.Str[1:])
- }
-
- case lang.Return:
- emit(t, vm.Return, t.Beg, t.End)
-
- default:
- return fmt.Errorf("Codegen: unsupported token %v", t)
- }
- }
-
- // Finally we fix unresolved labels for jump destinations.
- for _, t := range fixList {
- s, ok := c.Symbols[t.Str]
- if !ok {
- return fmt.Errorf("label not found: %q", t.Str)
- }
- c.Code[t.Beg].Arg[0] = int(s.Value.Data.Int()) - t.Beg
- }
- return err
-}
-func arithmeticOpType(s1, _ *parser.Symbol) *vm.Type { return parser.SymbolType(s1) }
-func booleanOpType(_, _ *parser.Symbol) *vm.Type { return vm.TypeOf(true) }
-
-// PrintCode pretty prints the generated code.
-func (c *Compiler) PrintCode() {
- labels := map[int][]string{} // labels indexed by code location
- data := map[int]string{} // data indexed by frame location
-
- for name, sym := range c.Symbols {
- if sym.Kind == parser.SymLabel || sym.Kind == parser.SymFunc {
- i := int(sym.Value.Data.Int())
- labels[i] = append(labels[i], name)
- }
- if sym.Used {
- data[sym.Index] = name
- }
- }
-
- fmt.Fprintln(os.Stderr, "# Code:")
- for i, l := range c.Code {
- for _, label := range labels[i] {
- fmt.Fprintln(os.Stderr, label+":")
- }
- extra := ""
- switch l.Op {
- case vm.Jump, vm.JumpFalse, vm.JumpTrue, vm.JumpSetFalse, vm.JumpSetTrue, vm.Calli:
- if d, ok := labels[i+l.Arg[0]]; ok {
- extra = "// " + d[0]
- }
- case vm.Dup, vm.Assign, vm.Fdup, vm.Fassign:
- if d, ok := data[l.Arg[0]]; ok {
- extra = "// " + d
- }
- }
- fmt.Fprintf(os.Stderr, "%4d %-14v %v\n", i, l, extra)
- }
-
- for _, label := range labels[len(c.Code)] {
- fmt.Fprintln(os.Stderr, label+":")
- }
- fmt.Fprintln(os.Stderr, "# End code")
-}
-
-type entry struct {
- name string
- *parser.Symbol
-}
-
-func (e entry) String() string {
- if e.Symbol != nil {
- return fmt.Sprintf("name: %s,local: %t, i: %d, k: %d, t: %s, v: %v",
- e.name,
- e.Local,
- e.Index,
- e.Kind,
- e.Type,
- e.Value,
- )
- }
- return e.name
-}
-
-// PrintData pretty prints the generated global data symbols in compiler.
-func (c *Compiler) PrintData() {
- dict := c.symbolsByIndex()
-
- fmt.Fprintln(os.Stderr, "# Data:")
- for i, d := range c.Data {
- fmt.Fprintf(os.Stderr, "%4d %T %v %v\n", i, d.Data.Interface(), d.Data, dict[i])
- }
-}
-
-func (c *Compiler) symbolsByIndex() map[int]entry {
- dict := map[int]entry{}
- for name, sym := range c.Symbols {
- if sym.Index == parser.UnsetAddr {
- continue
- }
- dict[sym.Index] = entry{name, sym}
- }
- return dict
-}
-
-// Dump represents the state of a data dump.
-type Dump struct {
- Values []*DumpValue
-}
-
-// DumpValue is a value of a dump state.
-type DumpValue struct {
- Index int
- Name string
- Kind int
- Type string
- Value any
-}
-
-// Dump creates a snapshot of the execution state of global variables.
-// This method is specifically implemented in the Compiler to minimize the coupling between
-// the dump format and other components. By situating the dump logic in the Compiler,
-// it relies solely on the program being executed and the indexing algorithm used for ordering variables
-// (currently, this is an integer that corresponds to the order of variables in the program).
-// This design choice allows the Virtual Machine (VM) to evolve its memory management strategies
-// without compromising backward compatibility with dumps generated by previous versions.
-func (c *Compiler) Dump() *Dump {
- dict := c.symbolsByIndex()
- dv := make([]*DumpValue, len(c.Data))
- for i, d := range c.Data {
- e := dict[i]
- dv[i] = &DumpValue{
- Index: e.Index,
- Name: e.name,
- Kind: int(e.Kind),
- Type: e.Type.Name,
- Value: d.Data.Interface(),
- }
- }
- return &Dump{Values: dv}
-}
-
-// ApplyDump sets previously saved dump, restoring the state of global variables.
-func (c *Compiler) ApplyDump(d *Dump) error {
- dict := c.symbolsByIndex()
- for _, dv := range d.Values {
- // do all the checks to be sure we are applying the correct values
- e, ok := dict[dv.Index]
- if !ok {
- return fmt.Errorf("entry not found on index %d", dv.Index)
- }
-
- if dv.Name != e.name ||
- dv.Type != e.Type.Name ||
- dv.Kind != int(e.Kind) {
- return fmt.Errorf("entry with index %d does not match with provided entry. "+
- "dumpValue: %s, %s, %d. memoryValue: %s, %s, %d",
- dv.Index,
- dv.Name, dv.Type, dv.Kind,
- e.name, e.Type, e.Kind)
- }
-
- if dv.Index >= len(c.Data) {
- return fmt.Errorf("index (%d) bigger than memory (%d)", dv.Index, len(c.Data))
- }
-
- if !c.Data[dv.Index].Data.CanSet() {
- return fmt.Errorf("value %v cannot be set", dv.Value)
- }
-
- c.Data[dv.Index].Data.Set(reflect.ValueOf(dv.Value))
- }
- return nil
-}
-
-func (c *Compiler) typeSym(t *vm.Type) *parser.Symbol {
- tsym, ok := c.Symbols[t.Rtype.String()]
- if !ok {
- tsym = &parser.Symbol{Index: parser.UnsetAddr, Kind: parser.SymType, Type: t}
- c.Symbols[t.Rtype.String()] = tsym
- }
- if tsym.Index == parser.UnsetAddr {
- tsym.Index = len(c.Data)
- c.Data = append(c.Data, vm.NewValue(t))
- }
- return tsym
-}