diff options
| author | Marc Vertes <mvertes@free.fr> | 2025-08-16 19:07:59 +0200 |
|---|---|---|
| committer | Marc Vertes <mvertes@free.fr> | 2025-08-16 19:07:59 +0200 |
| commit | d2ecacf2479c9fb8da55a632899ec6eb23337532 (patch) | |
| tree | 7a64a8c2851a899a168c5866f5b91a966735c0df | |
| parent | b757984b8a8a865eb5cbd72008b91e66545f2c56 (diff) | |
| -rwxr-xr-x | ev | 73 | ||||
| -rwxr-xr-x | ev1 | 115 |
2 files changed, 169 insertions, 19 deletions
@@ -12,7 +12,12 @@ BEGIN { # The rigth array contains binary operators which are right associative. right["="] = right["+="] = right["-="] = right["*="] = right["/="] = right["%="] = right["^="] = 1 right["^"] = 1 + start[")"] = "("; end["("] = ")" + start["}"] = "{"; end["{"] = "}" + start["]"] = "["; end["["] = "]" + + cl = 0 sp = pc = 0 # sp: stack pointer, pc: program counter. split("", code) split("", stack) @@ -21,19 +26,18 @@ BEGIN { } { line = $0 - parse() - run() + while (parse()) run() printf "> " } -function scan() { +function scan(peek) { ptok = tok sub(/^[ \t]+/, "", line) if (match(line, /^[0-9]([.0-9]*(e[+-]*)*[0-9])*/)) { # Literal number tok = "d" substr(line, 1, RLENGTH) } else if (match(line, /^([-=+|&]{2}|![=~]|[<>+-*\/^%]=)/)) { # Operator (2 chars) tok = substr(line, 1, RLENGTH) - } else if (match(line, /^([][}{)(,+-=?:]|[*\/÷\^])/)) { # Operator (1 char) + } else if (match(line, /^([][}{)(,+-=?:;]|[*\/÷\^])/)) { # Operator (1 char) tok = substr(line, 1, RLENGTH) } else if (match(line, /^[A-Za-z_][A-Za-z_0-9]*/)) { # Identifier tok = "v" substr(line, 1, RLENGTH) @@ -42,39 +46,66 @@ function scan() { } else { # Bad token tok = "b" substr(line, 1, 1) } - line = substr(line, RLENGTH+1) + if (!peek) line = substr(line, RLENGTH+1) return RLENGTH } -# TODO: unary operators, flow contror -function parse( ops, sl, i, b) { - b = i = length(code) +# TODO: unary operators, flow control, functions +# Recursive descent parser +function parse(want, got, ops, sl, i, b) { + b = cl while (scan() > 0) { - if (tok == "(") { + if (want && ! got) { + if ((got = tok) != want && want == "(") { print "missing " want; return 0 } + ops[++sl] = "done" + } + if (tok == ";") { + break + } else if (tok == "vif") { + if (! parse("(")) break + code[cl++] = "jzLif" ++lif + if (! parse("{")) break + if (scan(1) && tok == "velse") { + code[cl++] = "jLif" lif+1 + code[cl++] = "Lif" lif + sym["Lif" lif] = cl + if (! parse("{")) break + sym["Lif" ++lif] = cl + } else { + code[cl++] = "Lif" lif + sym["Lif" lif] = cl + } + } else if (tok in end) { ops[++sl] = tok - } else if (tok == ")") { - while (sl && ops[sl] != "(") code[i++] = ops[sl--] - sl-- + } else if (tok in start) { + while (sl && ops[sl] != start[tok]) code[cl++] = ops[sl--] + if (ops[--sl] == "done") { sl--; break } } else if (tok in prec) { # print "tok:" tok " ptok:" ptok - if (tok ~ /[+-*\/%^]?=/ && ptok ~ /^v/) sub(/^v/, "p", code[i-1]) # mutate var to pointer for assign + if (tok ~ /[+-*\/%^]?=/ && ptok ~ /^v/) sub(/^v/, "p", code[cl-1]) # Test precedence against ops, or associativity if same precedence. - while (sl && (prec[tok] < prec[ops[sl]] || prec[tok] == prec[ops[sl]] && !(ops[sl] in right))) { - code[i++] = ops[sl--] + while (sl && (prec[tok] < prec[ops[sl]] || + prec[tok] == prec[ops[sl]] && !(ops[sl] in right))) { + code[cl++] = ops[sl--] } ops[++sl] = tok } else { - code[i++] = tok + code[cl++] = tok } } - while (sl) code[i++] = ops[sl--] - for (j = b; j < i; j++) printf("%s ", code[j]); print "" + while (sl) { + # if (ops[sl] in end) { print "missing " end[ops[sl]]; return 0 } + if (want && ops[sl--] == "done") continue + code[cl++] = ops[sl--] + } + return cl-b } # TODO: if, while, function # Feel like being a stack machine. function run( c, i, l, t) { cl = length(code) + for (i = pc; i < cl; i++) printf("%s ", code[i]); print "" while (pc < cl) { c = code[pc] if (c == "+") { @@ -97,11 +128,15 @@ function run( c, i, l, t) { stack[++sp] = sym[substr(c, 2)] } else if (c ~ /^[d]/) { stack[++sp] = 0 + substr(c, 2) + } else if (c ~ /^jz/) { + if (!stack[sp--]) { pc = sym[substr(c, 3)]; continue } + } else if (c ~ /^L/) { + # nop on label } else { print "run: invalid instruction " c return } - # printf "c" pc ":%s", c; for (i = 1; i <= sp; i++) printf " " i ":" stack[i]; print "" + printf "c" pc ":%s", c; for (i = 1; i <= sp; i++) printf " " i ":" stack[i]; print "" # for (i in sym) print "sym[" i "]=" sym[i] pc++ } @@ -0,0 +1,115 @@ +#!/usr/bin/env awk -f + +# an evaluator of code: scanner, parser and bytecode vm. + +BEGIN { + # The prec array contains the precedence level of binary operators. + prec["="] = prec["=="] = prec["!="] = prec["<"] = prec["<="] = prec[">"] = prec[">="] = 1 + prec["+="] = prec["-="] = prec["*="] = prec["/="] = prec["%="] = prec["^="] = 1 + prec["+"] = prec["-"] = 2 + prec["*"] = prec["/"] = prec["%"] = 3 + prec["^"] = 4 + # The rigth array contains binary operators which are right associative. + right["="] = right["+="] = right["-="] = right["*="] = right["/="] = right["%="] = right["^="] = 1 + right["^"] = 1 + start[")"] = "(" + start["}"] = "{" + start["]"] = "[" + + + sp = pc = 0 # sp: stack pointer, pc: program counter. + split("", code) + split("", stack) + split("", sym) + printf "> " +} +{ + line = $0 + parse() + run() + printf "> " +} + +function scan() { + ptok = tok + sub(/^[ \t]+/, "", line) + if (match(line, /^[0-9]([.0-9]*(e[+-]*)*[0-9])*/)) { # Literal number + tok = "d" substr(line, 1, RLENGTH) + } else if (match(line, /^([-=+|&]{2}|![=~]|[<>+-*\/^%]=)/)) { # Operator (2 chars) + tok = substr(line, 1, RLENGTH) + } else if (match(line, /^([][}{)(,+-=?:]|[*\/÷\^])/)) { # Operator (1 char) + tok = substr(line, 1, RLENGTH) + } else if (match(line, /^[A-Za-z_][A-Za-z_0-9]*/)) { # Identifier + tok = "v" substr(line, 1, RLENGTH) + } else if (match(line, /^"(\\.|[^\\"])*"/)) { # Literal string + tok = "s" substr(line, 2, RLENGTH-2) + } else { # Bad token + tok = "b" substr(line, 1, 1) + } + line = substr(line, RLENGTH+1) + return RLENGTH +} + +# TODO: unary operators, flow contror +function parse( ops, sl, i, b) { + b = i = length(code) + while (scan() > 0) { + if (tok ~ /[({[]/) { + ops[++sl] = tok + } else if (tok in start) { + while (sl && ops[sl] != start[tok]) code[i++] = ops[sl--] + sl-- + } else if (tok in prec) { + # print "tok:" tok " ptok:" ptok + if (tok ~ /[+-*\/%^]?=/ && ptok ~ /^v/) sub(/^v/, "p", code[i-1]) # mutate var to pointer for assign + # Test precedence against ops, or associativity if same precedence. + while (sl && (prec[tok] < prec[ops[sl]] || prec[tok] == prec[ops[sl]] && !(ops[sl] in right))) { + code[i++] = ops[sl--] + } + ops[++sl] = tok + } else { + code[i++] = tok + } + } + while (sl) code[i++] = ops[sl--] + for (j = b; j < i; j++) printf("%s ", code[j]); print "" +} + +# TODO: if, while, function +# Feel like being a stack machine. +function run( c, i, l, t) { + cl = length(code) + while (pc < cl) { + c = code[pc] + if (c == "+") { + stack[--sp] = stack[sp-1] + stack[sp] + } else if (c == "-") { + stack[--sp] = stack[sp-1] - stack[sp] + } else if (c == "*") { + stack[--sp] = stack[sp-1] * stack[sp] + } else if (c == "/") { + stack[--sp] = stack[sp-1] / stack[sp] + } else if (c == "^") { + stack[--sp] = stack[sp-1] ^ stack[sp] + } else if (c == "=") { + stack[--sp] = sym[substr(stack[sp-1], 2)] = stack[sp] + } else if (c == "+=") { + stack[--sp] = sym[substr(stack[sp-1], 2)] += stack[sp] + } else if (c ~ /^p/) { + stack[++sp] = c + } else if (c ~ /^v/) { + stack[++sp] = sym[substr(c, 2)] + } else if (c ~ /^[d]/) { + stack[++sp] = 0 + substr(c, 2) + } else { + print "run: invalid instruction " c + return + } + # printf "c" pc ":%s", c; for (i = 1; i <= sp; i++) printf " " i ":" stack[i]; print "" + # for (i in sym) print "sym[" i "]=" sym[i] + pc++ + } + print sym["_"] = stack[sp--] + print "sp: " sp ", len(stack): " length(stack) + # for (i = sp; j in stack; i++) delete stack[i] +} |
