diff options
| author | Marc Vertes <mvertes@free.fr> | 2026-01-21 19:26:42 +0100 |
|---|---|---|
| committer | Marc Vertes <mvertes@free.fr> | 2026-01-21 19:26:42 +0100 |
| commit | c922c797204069f42a7abf88500c5708f68a8e43 (patch) | |
| tree | a0379dc6f6992f0ba077b028dfd4b031dd674d98 | |
| parent | ee9397bc031dc33e4f735b3331643bbf60a0d17a (diff) | |
feat: add support for range clause and iterators
- vm: added Pull, Next and Stop instructions, to implement iterators
- lang: add Range, Next and Stop tokens
- parser: handle range clause. Still naive and incomplete.
- comp: generate iterator instructions from range clause.
Work in progress. Only initial support for slices. Many more tests
and combinations needed, but the main pattern is there now.
| -rw-r--r-- | comp/compiler.go | 48 | ||||
| -rw-r--r-- | go.mod | 2 | ||||
| -rw-r--r-- | interp/interpreter_test.go | 12 | ||||
| -rw-r--r-- | lang/golang/go.go | 2 | ||||
| -rw-r--r-- | lang/token.go | 2 | ||||
| -rw-r--r-- | lang/token_string.go | 10 | ||||
| -rw-r--r-- | parser/README.md | 4 | ||||
| -rw-r--r-- | parser/expr.go | 52 | ||||
| -rw-r--r-- | parser/parse.go | 27 | ||||
| -rw-r--r-- | vm/op_string.go | 23 | ||||
| -rw-r--r-- | vm/vm.go | 18 |
11 files changed, 127 insertions, 73 deletions
diff --git a/comp/compiler.go b/comp/compiler.go index 4c691b2..55a3166 100644 --- a/comp/compiler.go +++ b/comp/compiler.go @@ -51,6 +51,14 @@ func errorf(format string, v ...any) error { return fmt.Errorf(loc+format, v...) } +func showStack(stack []*symbol.Symbol) { + _, file, line, _ := runtime.Caller(1) + fmt.Fprintf(os.Stderr, "%s%d: showstack: %d\n", path.Base(file), line, len(stack)) + for i, s := range stack { + fmt.Fprintf(os.Stderr, " stack[%d]: %v\n", i, s) + } +} + // Generate generates vm code and data from parsed tokens. func (c *Compiler) Generate(tokens parser.Tokens) (err error) { log.Println("Codegen tokens:", tokens) @@ -68,14 +76,6 @@ func (c *Compiler) Generate(tokens parser.Tokens) (err error) { pop := func() *symbol.Symbol { l := len(stack) - 1; s := stack[l]; stack = stack[:l]; return s } popflen := func() int { le := len(flen) - 1; l := flen[le]; flen = flen[:le]; return l } - showStack := func() { - _, file, line, _ := runtime.Caller(1) - fmt.Fprintf(os.Stderr, "%s%d: showstack: %d\n", path.Base(file), line, len(stack)) - for i, s := range stack { - fmt.Fprintf(os.Stderr, " stack[%d]: %v\n", i, s) - } - } - for _, t := range tokens { switch t.Tok { case lang.Int: @@ -128,7 +128,7 @@ func (c *Compiler) Generate(tokens parser.Tokens) (err error) { emit(t, vm.Deref) case lang.Index: - showStack() + showStack(stack) pop() s := pop() if s.Type.Rtype.Kind() == reflect.Map { @@ -147,7 +147,7 @@ func (c *Compiler) Generate(tokens parser.Tokens) (err error) { emit(t, vm.Lower) case lang.Call: - showStack() + showStack(stack) narg := t.Beg // FIXME: t.Beg is hijacked to store the number of function parameters. s := stack[len(stack)-1-narg] if s.Kind != symbol.Value { @@ -163,7 +163,7 @@ func (c *Compiler) Generate(tokens parser.Tokens) (err error) { } emit(t, vm.Call, narg) - showStack() + showStack(stack) break } fallthrough // A symValue must be called through callX. @@ -211,7 +211,6 @@ func (c *Compiler) Generate(tokens parser.Tokens) (err error) { emit(t, vm.Grow, t.Beg) case lang.Define: - showStack() rhs := pop() typ := rhs.Type if typ == nil { @@ -223,7 +222,6 @@ func (c *Compiler) Generate(tokens parser.Tokens) (err error) { emit(t, vm.Vassign) case lang.Assign: - showStack() rhs := pop() lhs := pop() if lhs.Local { @@ -264,7 +262,7 @@ func (c *Compiler) Generate(tokens parser.Tokens) (err error) { case lang.Ident: s, ok := c.Symbols[t.Str] if !ok { - // it could be either an undefined symbol or a key ident in a literal composite expr. + // It could be either an undefined symbol or a key ident in a literal composite expr. s = &symbol.Symbol{Name: t.Str} } log.Println("Ident symbol", t.Str, s.Local, s.Index, s.Type) @@ -368,7 +366,7 @@ func (c *Compiler) Generate(tokens parser.Tokens) (err error) { if len(stack) < 1 { return errorf("missing symbol") } - showStack() + showStack(stack) s := pop() switch s.Kind { case symbol.Pkg: @@ -414,6 +412,26 @@ func (c *Compiler) Generate(tokens parser.Tokens) (err error) { return fmt.Errorf("field or method not found: %s", t.Str[1:]) } + case lang.Next: + k := stack[len(stack)-2] + emit(t, vm.Next, k.Index) + + case lang.Range: + // FIXME: handle all iterator types. + // set the correct type to the iterator variables. + switch t := top().Type; t.Rtype.Kind() { + case reflect.Slice: + k := stack[len(stack)-2] + k.Type = c.Symbols["int"].Type + c.Data[k.Index] = vm.NewValue(k.Type) + case reflect.Map: + // FIXME: handle map + } + emit(t, vm.Pull) + + case lang.Stop: + emit(t, vm.Stop) + case lang.Return: emit(t, vm.Return, t.Beg, t.End) @@ -1,3 +1,3 @@ module github.com/mvertes/parscan -go 1.21 +go 1.24 diff --git a/interp/interpreter_test.go b/interp/interpreter_test.go index 7a974d1..f5c347b 100644 --- a/interp/interpreter_test.go +++ b/interp/interpreter_test.go @@ -121,11 +121,13 @@ func TestIf(t *testing.T) { func TestFor(t *testing.T) { run(t, []etest{ - {src: "a := 0; for i := 0; i < 3; i = i+1 {a = a+i}; a", res: "3"}, // #00 - {src: "func f() int {a := 0; for i := 0; i < 3; i = i+1 {a = a+i}; return a}; f()", res: "3"}, // #01 - {src: "a := 0; for {a = a+1; if a == 3 {break}}; a", res: "3"}, // #02 - {src: "func f() int {a := 0; for {a = a+1; if a == 3 {break}}; return a}; f()", res: "3"}, // #03 - {src: "func f() int {a := 0; for {a = a+1; if a < 3 {continue}; break}; return a}; f()", res: "3"}, // #04 + {src: "a := 0; for i := 0; i < 3; i = i+1 {a = a+i}; a", res: "3"}, // #00 + {src: "func f() int {a := 0; for i := 0; i < 3; i = i+1 {a = a+i}; return a}; f()", res: "3"}, // #01 + {src: "a := 0; for {a = a+1; if a == 3 {break}}; a", res: "3"}, // #02 + {src: "func f() int {a := 0; for {a = a+1; if a == 3 {break}}; return a}; f()", res: "3"}, // #03 + {src: "func f() int {a := 0; for {a = a+1; if a < 3 {continue}; break}; return a}; f()", res: "3"}, // #04 + {src: "a := []int{1,2,3,4}; b := 0; for i := range a {b = b+i}; b", res: "6"}, // #05 + {src: "func f() int {a := []int{1,2,3,4}; b := 0; for i := range a {b = b+i}; return b}; f()", res: "6"}, // #06 }) } diff --git a/lang/golang/go.go b/lang/golang/go.go index 0d60e8b..cba6944 100644 --- a/lang/golang/go.go +++ b/lang/golang/go.go @@ -156,7 +156,7 @@ var GoSpec = &lang.Spec{ lang.Interface: {SkipSemi: true}, lang.Map: {SkipSemi: true}, lang.Package: {SkipSemi: true}, - lang.Range: {SkipSemi: true}, + lang.Range: {SkipSemi: true, Precedence: 9}, lang.Select: {SkipSemi: true, HasInit: true}, lang.Struct: {SkipSemi: true}, lang.Switch: {SkipSemi: true, HasInit: true}, diff --git a/lang/token.go b/lang/token.go index 9800599..00f7366 100644 --- a/lang/token.go +++ b/lang/token.go @@ -122,7 +122,9 @@ const ( Label Len New + Next Slice + Stop // This must be the last token value. MaxTok diff --git a/lang/token_string.go b/lang/token_string.go index cf17584..69222e4 100644 --- a/lang/token_string.go +++ b/lang/token_string.go @@ -104,13 +104,15 @@ func _() { _ = x[Label-93] _ = x[Len-94] _ = x[New-95] - _ = x[Slice-96] - _ = x[MaxTok-97] + _ = x[Next-96] + _ = x[Slice-97] + _ = x[Stop-98] + _ = x[MaxTok-99] } -const _Token_name = "IllegalCommentIdentCharFloatImagIntStringAddSubMulQuoRemAndOrXorShlShrAndNotPeriodEqualGreaterGreaterEqualLandLessLessEqualLorNotEqualDefineAssignAddAssignSubAssignMulAssignQuoAssignRemAssignAndAssignOrAssignXorAssignShlAssignShrAssignAndNotAssignIncDecIndexAssignPlusMinusAddrDerefBitCompArrowEllipsisNotTildeCommaSemicolonColonParenBlockBracketBlockBraceBlockBreakCaseChanConstContinueDefaultDeferElseFallthroughForFuncGoGotoIfImportInterfaceMapPackageRangeReturnSelectStructSwitchTypeVarCallCallXCompositeEqualSetGrowIndexJumpFalseJumpSetFalseJumpSetTrueLabelLenNewSliceMaxTok" +const _Token_name = "IllegalCommentIdentCharFloatImagIntStringAddSubMulQuoRemAndOrXorShlShrAndNotPeriodEqualGreaterGreaterEqualLandLessLessEqualLorNotEqualDefineAssignAddAssignSubAssignMulAssignQuoAssignRemAssignAndAssignOrAssignXorAssignShlAssignShrAssignAndNotAssignIncDecIndexAssignPlusMinusAddrDerefBitCompArrowEllipsisNotTildeCommaSemicolonColonParenBlockBracketBlockBraceBlockBreakCaseChanConstContinueDefaultDeferElseFallthroughForFuncGoGotoIfImportInterfaceMapPackageRangeReturnSelectStructSwitchTypeVarCallCallXCompositeEqualSetGrowIndexJumpFalseJumpSetFalseJumpSetTrueLabelLenNewNextSliceStopMaxTok" -var _Token_index = [...]uint16{0, 7, 14, 19, 23, 28, 32, 35, 41, 44, 47, 50, 53, 56, 59, 61, 64, 67, 70, 76, 82, 87, 94, 106, 110, 114, 123, 126, 134, 140, 146, 155, 164, 173, 182, 191, 200, 208, 217, 226, 235, 247, 250, 253, 264, 268, 273, 277, 282, 289, 294, 302, 305, 310, 315, 324, 329, 339, 351, 361, 366, 370, 374, 379, 387, 394, 399, 403, 414, 417, 421, 423, 427, 429, 435, 444, 447, 454, 459, 465, 471, 477, 483, 487, 490, 494, 499, 508, 516, 520, 525, 534, 546, 557, 562, 565, 568, 573, 579} +var _Token_index = [...]uint16{0, 7, 14, 19, 23, 28, 32, 35, 41, 44, 47, 50, 53, 56, 59, 61, 64, 67, 70, 76, 82, 87, 94, 106, 110, 114, 123, 126, 134, 140, 146, 155, 164, 173, 182, 191, 200, 208, 217, 226, 235, 247, 250, 253, 264, 268, 273, 277, 282, 289, 294, 302, 305, 310, 315, 324, 329, 339, 351, 361, 366, 370, 374, 379, 387, 394, 399, 403, 414, 417, 421, 423, 427, 429, 435, 444, 447, 454, 459, 465, 471, 477, 483, 487, 490, 494, 499, 508, 516, 520, 525, 534, 546, 557, 562, 565, 568, 572, 577, 581, 587} func (i Token) String() string { idx := int(i) - 0 diff --git a/parser/README.md b/parser/README.md index ccf580d..dda3f25 100644 --- a/parser/README.md +++ b/parser/README.md @@ -59,7 +59,7 @@ Go language support: - [x] iota expression - [ ] defer statement - [ ] recover statement -- [ ] range clause +- [x] range clause - [ ] go statement - [x] if statement (including else and else if) - [x] for statement @@ -80,7 +80,7 @@ Go language support: - [x] call expressions - [x] index expressions - [x] selector expressions -- [ ] slice expressions +- [x] slice expressions - [ ] type convertions - [ ] type assertions - [ ] parametric types (generic) diff --git a/parser/expr.go b/parser/expr.go index 1944827..149b5e2 100644 --- a/parser/expr.go +++ b/parser/expr.go @@ -17,9 +17,9 @@ func (p *Parser) parseExpr(in Tokens, typeStr string) (out Tokens, err error) { var ops Tokens var ctype string - popop := func() (t scanner.Token) { + popop := func() scanner.Token { l := len(ops) - 1 - t = ops[l] + t := ops[l] ops = ops[:l] if t.Tok.IsLogicalOp() { t.Tok = lang.Label // Implement conditional branching directly. @@ -27,6 +27,15 @@ func (p *Parser) parseExpr(in Tokens, typeStr string) (out Tokens, err error) { return t } + // addop adds an operator to the operator stack. + addop := func(t scanner.Token) { + // Operators on stack with a lower precedence are poped out and output first. + for len(ops) > 0 && p.precedence(t) < p.precedence(ops[len(ops)-1]) { + out = append(out, popop()) + } + ops = append(ops, t) + } + lin := len(in) for i := 0; i < lin; i++ { switch t := in[i]; t.Tok { @@ -47,18 +56,19 @@ func (p *Parser) parseExpr(in Tokens, typeStr string) (out Tokens, err error) { case lang.Period: // TODO: fail if next is not an ident. t.Str += in[i+1].Str // Hardwire selector argument. - for len(ops) > 0 && p.precedence(t) < p.precedence(ops[len(ops)-1]) { - out = append(out, popop()) - } - ops = append(ops, t) + addop(t) i++ // Skip over next ident. + case lang.Next: + out = append(out, t) + + case lang.Range: + ops = ops[:len(ops)-1] // Suppress previous assign or define. + addop(t) + case lang.Colon: t.Str = typeStr - for len(ops) > 0 && p.precedence(t) < p.precedence(ops[len(ops)-1]) { - out = append(out, popop()) - } - ops = append(ops, t) + addop(t) case lang.Add, lang.And, lang.Assign, lang.Define, lang.Equal, lang.Greater, lang.Less, lang.Mul, lang.Not, lang.Sub, lang.Shl, lang.Shr: if i == 0 || in[i-1].Tok.IsOperator() { @@ -66,30 +76,21 @@ func (p *Parser) parseExpr(in Tokens, typeStr string) (out Tokens, err error) { t.Tok = lang.UnaryOp[t.Tok] // FIXME: parsetype for composite if & or * } - for len(ops) > 0 && p.precedence(t) < p.precedence(ops[len(ops)-1]) { - out = append(out, popop()) - } - ops = append(ops, t) + addop(t) case lang.Land: - for len(ops) > 0 && p.precedence(t) < p.precedence(ops[len(ops)-1]) { - out = append(out, popop()) - } + addop(t) xp := strconv.Itoa(p.labelCount[p.scope]) p.labelCount[p.scope]++ out = append(out, scanner.Token{Tok: lang.JumpSetFalse, Str: p.scope + "x" + xp}) - t.Str = p.scope + "x" + xp - ops = append(ops, t) + ops[len(ops)-1].Str = p.scope + "x" + xp case lang.Lor: - for len(ops) > 0 && p.precedence(t) < p.precedence(ops[len(ops)-1]) { - out = append(out, popop()) - } + addop(t) xp := strconv.Itoa(p.labelCount[p.scope]) p.labelCount[p.scope]++ out = append(out, scanner.Token{Tok: lang.JumpSetTrue, Str: p.scope + "x" + xp}) - t.Str = p.scope + "x" + xp - ops = append(ops, t) + ops[len(ops)-1].Str = p.scope + "x" + xp case lang.Ident: s, sc, ok := p.Symbols.Get(t.Str, p.scope) @@ -145,7 +146,8 @@ func (p *Parser) parseExpr(in Tokens, typeStr string) (out Tokens, err error) { return out, err } ctype = typ.String() - p.Symbols.Add(symbol.UnsetAddr, ctype, vm.NewValue(typ), symbol.Type, typ, p.funcScope != "") + // p.Symbols.Add(symbol.UnsetAddr, ctype, vm.NewValue(typ), symbol.Type, typ, p.funcScope != "") + p.Symbols.Add(symbol.UnsetAddr, ctype, vm.NewValue(typ), symbol.Type, typ, false) out = append(out, scanner.Token{Tok: lang.Ident, Pos: t.Pos, Str: ctype}) i += n - 1 break diff --git a/parser/parse.go b/parser/parse.go index 46b1724..67d8657 100644 --- a/parser/parse.go +++ b/parser/parse.go @@ -193,25 +193,33 @@ func (p *Parser) parseGoto(in Tokens) (out Tokens, err error) { func (p *Parser) parseFor(in Tokens) (out Tokens, err error) { // TODO: detect invalid code. + var init, cond, post, body, final Tokens fc := strconv.Itoa(p.labelCount[p.scope]) p.labelCount[p.scope]++ - var init, cond, post, body Tokens + breakLabel, continueLabel := p.breakLabel, p.continueLabel + p.pushScope("for" + fc) + p.breakLabel, p.continueLabel = p.scope+"e", p.scope+"b" + defer func() { + p.breakLabel, p.continueLabel = breakLabel, continueLabel + p.popScope() + }() pre := in[1 : len(in)-1].Split(lang.Semicolon) switch len(pre) { case 1: - cond = pre[0] + if in.Index(lang.Range) >= 0 { + init = pre[0] + // cond = Tokens{{Tok: lang.Next, Str: p.scope + "c"}} + // final = Tokens{{Tok: lang.Stop, Str: p.scope + "f"}} + cond = Tokens{{Tok: lang.Next}} + final = Tokens{{Tok: lang.Stop}} + } else { + cond = pre[0] + } case 3: init, cond, post = pre[0], pre[1], pre[2] default: return nil, ErrFor } - breakLabel, continueLabel := p.breakLabel, p.continueLabel - p.pushScope("for" + fc) - p.breakLabel, p.continueLabel = p.scope+"e", p.scope+"b" - defer func() { - p.breakLabel, p.continueLabel = breakLabel, continueLabel - p.popScope() - }() if len(init) > 0 { if init, err = p.parseStmt(init); err != nil { return nil, err @@ -239,6 +247,7 @@ func (p *Parser) parseFor(in Tokens) (out Tokens, err error) { out = append(out, scanner.Token{Tok: lang.Goto, Str: p.scope + "b"}, scanner.Token{Tok: lang.Label, Str: p.scope + "e"}) + out = append(out, final...) return out, err } diff --git a/vm/op_string.go b/vm/op_string.go index f53ce5e..594bc3b 100644 --- a/vm/op_string.go +++ b/vm/op_string.go @@ -44,19 +44,22 @@ func _() { _ = x[Mul-33] _ = x[New-34] _ = x[Negate-35] - _ = x[Not-36] - _ = x[Pop-37] - _ = x[Push-38] - _ = x[Return-39] - _ = x[Slice-40] - _ = x[Slice3-41] - _ = x[Sub-42] - _ = x[Swap-43] + _ = x[Next-36] + _ = x[Not-37] + _ = x[Pop-38] + _ = x[Push-39] + _ = x[Pull-40] + _ = x[Return-41] + _ = x[Slice-42] + _ = x[Slice3-43] + _ = x[Stop-44] + _ = x[Sub-45] + _ = x[Swap-46] } -const _Op_name = "NopAddAddrAssignFassignVassignCallCallXDerefDupFdupFnewFnewEEqualEqualSetExitFieldFieldEFieldSetFieldFsetGreaterGrowIndexIndexSetJumpJumpTrueJumpFalseJumpSetTrueJumpSetFalseLenLowerMapIndexMapSetMulNewNegateNotPopPushReturnSliceSlice3SubSwap" +const _Op_name = "NopAddAddrAssignFassignVassignCallCallXDerefDupFdupFnewFnewEEqualEqualSetExitFieldFieldEFieldSetFieldFsetGreaterGrowIndexIndexSetJumpJumpTrueJumpFalseJumpSetTrueJumpSetFalseLenLowerMapIndexMapSetMulNewNegateNextNotPopPushPullReturnSliceSlice3StopSubSwap" -var _Op_index = [...]uint8{0, 3, 6, 10, 16, 23, 30, 34, 39, 44, 47, 51, 55, 60, 65, 73, 77, 82, 88, 96, 105, 112, 116, 121, 129, 133, 141, 150, 161, 173, 176, 181, 189, 195, 198, 201, 207, 210, 213, 217, 223, 228, 234, 237, 241} +var _Op_index = [...]uint8{0, 3, 6, 10, 16, 23, 30, 34, 39, 44, 47, 51, 55, 60, 65, 73, 77, 82, 88, 96, 105, 112, 116, 121, 129, 133, 141, 150, 161, 173, 176, 181, 189, 195, 198, 201, 207, 211, 214, 217, 221, 225, 231, 236, 242, 246, 249, 253} func (i Op) String() string { idx := int(i) - 0 @@ -2,7 +2,8 @@ package vm import ( - "fmt" // for tracing only + "fmt" // for tracing only + "iter" "log" // for tracing only "reflect" // for optional CallX only "strings" @@ -55,12 +56,15 @@ const ( Mul // n1 n2 -- prod ; prod = n1*n2 New // -- x; mem[fp+$1] = new mem[$2] Negate // -- ; - mem[fp] + Next // -- ; iterator next Not // c -- r ; r = !c Pop // v -- Push // -- v + Pull // a -- a s n; pull iterator next and stop function Return // [r1 .. ri] -- ; exit frame: sp = fp, fp = pop Slice // a l h -- a; a = a [l:h] Slice3 // a l h m -- a; a = a[l:h:m] + Stop // -- iterator stop Sub // n1 n2 -- diff ; diff = n1 - n2 Swap // -- ) @@ -242,6 +246,12 @@ func (m *Machine) Run() (err error) { mem = append(mem, ValueOf(mem[sp-1-c.Arg[0]].Len())) case Negate: mem[sp-1] = ValueOf(-mem[sp-1].Int()) + case Next: + v, ok := mem[sp-2].Interface().(func() (reflect.Value, bool))() + if ok { + mem[c.Arg[0]].Set(v) + } + mem = append(mem, ValueOf(ok)) case Not: mem[sp-1] = ValueOf(!mem[sp-1].Bool()) case Pop: @@ -249,6 +259,9 @@ func (m *Machine) Run() (err error) { case Push: mem = append(mem, NewValue(TypeOf(0))) mem[sp].SetInt(int64(c.Arg[0])) + case Pull: + next, stop := iter.Pull(mem[sp-1].Seq()) + mem = append(mem, ValueOf(next), ValueOf(stop)) case Grow: mem = append(mem, make([]Value, c.Arg[0])...) case Return: @@ -263,6 +276,9 @@ func (m *Machine) Run() (err error) { case Slice3: mem[sp-4].Value = mem[sp-4].Slice3(int(mem[sp-3].Int()), int(mem[sp-2].Int()), int(mem[sp-1].Int())) mem = mem[:sp-3] + case Stop: + mem[sp-1].Interface().(func())() + mem = mem[:sp-4] case Sub: mem[sp-2] = ValueOf(int(mem[sp-2].Int() - mem[sp-1].Int())) mem = mem[:sp-1] |
