summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc Vertes <mvertes@free.fr>2025-08-16 19:07:59 +0200
committerMarc Vertes <mvertes@free.fr>2025-08-16 19:07:59 +0200
commitd2ecacf2479c9fb8da55a632899ec6eb23337532 (patch)
tree7a64a8c2851a899a168c5866f5b91a966735c0df
parentb757984b8a8a865eb5cbd72008b91e66545f2c56 (diff)
updateHEADmain
-rwxr-xr-xev73
-rwxr-xr-xev1115
2 files changed, 169 insertions, 19 deletions
diff --git a/ev b/ev
index 7fcf207..534368a 100755
--- a/ev
+++ b/ev
@@ -12,7 +12,12 @@ BEGIN {
# The rigth array contains binary operators which are right associative.
right["="] = right["+="] = right["-="] = right["*="] = right["/="] = right["%="] = right["^="] = 1
right["^"] = 1
+ start[")"] = "("; end["("] = ")"
+ start["}"] = "{"; end["{"] = "}"
+ start["]"] = "["; end["["] = "]"
+
+ cl = 0
sp = pc = 0 # sp: stack pointer, pc: program counter.
split("", code)
split("", stack)
@@ -21,19 +26,18 @@ BEGIN {
}
{
line = $0
- parse()
- run()
+ while (parse()) run()
printf "> "
}
-function scan() {
+function scan(peek) {
ptok = tok
sub(/^[ \t]+/, "", line)
if (match(line, /^[0-9]([.0-9]*(e[+-]*)*[0-9])*/)) { # Literal number
tok = "d" substr(line, 1, RLENGTH)
} else if (match(line, /^([-=+|&]{2}|![=~]|[<>+-*\/^%]=)/)) { # Operator (2 chars)
tok = substr(line, 1, RLENGTH)
- } else if (match(line, /^([][}{)(,+-=?:]|[*\/÷\^])/)) { # Operator (1 char)
+ } else if (match(line, /^([][}{)(,+-=?:;]|[*\/÷\^])/)) { # Operator (1 char)
tok = substr(line, 1, RLENGTH)
} else if (match(line, /^[A-Za-z_][A-Za-z_0-9]*/)) { # Identifier
tok = "v" substr(line, 1, RLENGTH)
@@ -42,39 +46,66 @@ function scan() {
} else { # Bad token
tok = "b" substr(line, 1, 1)
}
- line = substr(line, RLENGTH+1)
+ if (!peek) line = substr(line, RLENGTH+1)
return RLENGTH
}
-# TODO: unary operators, flow contror
-function parse( ops, sl, i, b) {
- b = i = length(code)
+# TODO: unary operators, flow control, functions
+# Recursive descent parser
+function parse(want, got, ops, sl, i, b) {
+ b = cl
while (scan() > 0) {
- if (tok == "(") {
+ if (want && ! got) {
+ if ((got = tok) != want && want == "(") { print "missing " want; return 0 }
+ ops[++sl] = "done"
+ }
+ if (tok == ";") {
+ break
+ } else if (tok == "vif") {
+ if (! parse("(")) break
+ code[cl++] = "jzLif" ++lif
+ if (! parse("{")) break
+ if (scan(1) && tok == "velse") {
+ code[cl++] = "jLif" lif+1
+ code[cl++] = "Lif" lif
+ sym["Lif" lif] = cl
+ if (! parse("{")) break
+ sym["Lif" ++lif] = cl
+ } else {
+ code[cl++] = "Lif" lif
+ sym["Lif" lif] = cl
+ }
+ } else if (tok in end) {
ops[++sl] = tok
- } else if (tok == ")") {
- while (sl && ops[sl] != "(") code[i++] = ops[sl--]
- sl--
+ } else if (tok in start) {
+ while (sl && ops[sl] != start[tok]) code[cl++] = ops[sl--]
+ if (ops[--sl] == "done") { sl--; break }
} else if (tok in prec) {
# print "tok:" tok " ptok:" ptok
- if (tok ~ /[+-*\/%^]?=/ && ptok ~ /^v/) sub(/^v/, "p", code[i-1]) # mutate var to pointer for assign
+ if (tok ~ /[+-*\/%^]?=/ && ptok ~ /^v/) sub(/^v/, "p", code[cl-1])
# Test precedence against ops, or associativity if same precedence.
- while (sl && (prec[tok] < prec[ops[sl]] || prec[tok] == prec[ops[sl]] && !(ops[sl] in right))) {
- code[i++] = ops[sl--]
+ while (sl && (prec[tok] < prec[ops[sl]] ||
+ prec[tok] == prec[ops[sl]] && !(ops[sl] in right))) {
+ code[cl++] = ops[sl--]
}
ops[++sl] = tok
} else {
- code[i++] = tok
+ code[cl++] = tok
}
}
- while (sl) code[i++] = ops[sl--]
- for (j = b; j < i; j++) printf("%s ", code[j]); print ""
+ while (sl) {
+ # if (ops[sl] in end) { print "missing " end[ops[sl]]; return 0 }
+ if (want && ops[sl--] == "done") continue
+ code[cl++] = ops[sl--]
+ }
+ return cl-b
}
# TODO: if, while, function
# Feel like being a stack machine.
function run( c, i, l, t) {
cl = length(code)
+ for (i = pc; i < cl; i++) printf("%s ", code[i]); print ""
while (pc < cl) {
c = code[pc]
if (c == "+") {
@@ -97,11 +128,15 @@ function run( c, i, l, t) {
stack[++sp] = sym[substr(c, 2)]
} else if (c ~ /^[d]/) {
stack[++sp] = 0 + substr(c, 2)
+ } else if (c ~ /^jz/) {
+ if (!stack[sp--]) { pc = sym[substr(c, 3)]; continue }
+ } else if (c ~ /^L/) {
+ # nop on label
} else {
print "run: invalid instruction " c
return
}
- # printf "c" pc ":%s", c; for (i = 1; i <= sp; i++) printf " " i ":" stack[i]; print ""
+ printf "c" pc ":%s", c; for (i = 1; i <= sp; i++) printf " " i ":" stack[i]; print ""
# for (i in sym) print "sym[" i "]=" sym[i]
pc++
}
diff --git a/ev1 b/ev1
new file mode 100755
index 0000000..2ead3b7
--- /dev/null
+++ b/ev1
@@ -0,0 +1,115 @@
+#!/usr/bin/env awk -f
+
+# an evaluator of code: scanner, parser and bytecode vm.
+
+BEGIN {
+ # The prec array contains the precedence level of binary operators.
+ prec["="] = prec["=="] = prec["!="] = prec["<"] = prec["<="] = prec[">"] = prec[">="] = 1
+ prec["+="] = prec["-="] = prec["*="] = prec["/="] = prec["%="] = prec["^="] = 1
+ prec["+"] = prec["-"] = 2
+ prec["*"] = prec["/"] = prec["%"] = 3
+ prec["^"] = 4
+ # The rigth array contains binary operators which are right associative.
+ right["="] = right["+="] = right["-="] = right["*="] = right["/="] = right["%="] = right["^="] = 1
+ right["^"] = 1
+ start[")"] = "("
+ start["}"] = "{"
+ start["]"] = "["
+
+
+ sp = pc = 0 # sp: stack pointer, pc: program counter.
+ split("", code)
+ split("", stack)
+ split("", sym)
+ printf "> "
+}
+{
+ line = $0
+ parse()
+ run()
+ printf "> "
+}
+
+function scan() {
+ ptok = tok
+ sub(/^[ \t]+/, "", line)
+ if (match(line, /^[0-9]([.0-9]*(e[+-]*)*[0-9])*/)) { # Literal number
+ tok = "d" substr(line, 1, RLENGTH)
+ } else if (match(line, /^([-=+|&]{2}|![=~]|[<>+-*\/^%]=)/)) { # Operator (2 chars)
+ tok = substr(line, 1, RLENGTH)
+ } else if (match(line, /^([][}{)(,+-=?:]|[*\/÷\^])/)) { # Operator (1 char)
+ tok = substr(line, 1, RLENGTH)
+ } else if (match(line, /^[A-Za-z_][A-Za-z_0-9]*/)) { # Identifier
+ tok = "v" substr(line, 1, RLENGTH)
+ } else if (match(line, /^"(\\.|[^\\"])*"/)) { # Literal string
+ tok = "s" substr(line, 2, RLENGTH-2)
+ } else { # Bad token
+ tok = "b" substr(line, 1, 1)
+ }
+ line = substr(line, RLENGTH+1)
+ return RLENGTH
+}
+
+# TODO: unary operators, flow contror
+function parse( ops, sl, i, b) {
+ b = i = length(code)
+ while (scan() > 0) {
+ if (tok ~ /[({[]/) {
+ ops[++sl] = tok
+ } else if (tok in start) {
+ while (sl && ops[sl] != start[tok]) code[i++] = ops[sl--]
+ sl--
+ } else if (tok in prec) {
+ # print "tok:" tok " ptok:" ptok
+ if (tok ~ /[+-*\/%^]?=/ && ptok ~ /^v/) sub(/^v/, "p", code[i-1]) # mutate var to pointer for assign
+ # Test precedence against ops, or associativity if same precedence.
+ while (sl && (prec[tok] < prec[ops[sl]] || prec[tok] == prec[ops[sl]] && !(ops[sl] in right))) {
+ code[i++] = ops[sl--]
+ }
+ ops[++sl] = tok
+ } else {
+ code[i++] = tok
+ }
+ }
+ while (sl) code[i++] = ops[sl--]
+ for (j = b; j < i; j++) printf("%s ", code[j]); print ""
+}
+
+# TODO: if, while, function
+# Feel like being a stack machine.
+function run( c, i, l, t) {
+ cl = length(code)
+ while (pc < cl) {
+ c = code[pc]
+ if (c == "+") {
+ stack[--sp] = stack[sp-1] + stack[sp]
+ } else if (c == "-") {
+ stack[--sp] = stack[sp-1] - stack[sp]
+ } else if (c == "*") {
+ stack[--sp] = stack[sp-1] * stack[sp]
+ } else if (c == "/") {
+ stack[--sp] = stack[sp-1] / stack[sp]
+ } else if (c == "^") {
+ stack[--sp] = stack[sp-1] ^ stack[sp]
+ } else if (c == "=") {
+ stack[--sp] = sym[substr(stack[sp-1], 2)] = stack[sp]
+ } else if (c == "+=") {
+ stack[--sp] = sym[substr(stack[sp-1], 2)] += stack[sp]
+ } else if (c ~ /^p/) {
+ stack[++sp] = c
+ } else if (c ~ /^v/) {
+ stack[++sp] = sym[substr(c, 2)]
+ } else if (c ~ /^[d]/) {
+ stack[++sp] = 0 + substr(c, 2)
+ } else {
+ print "run: invalid instruction " c
+ return
+ }
+ # printf "c" pc ":%s", c; for (i = 1; i <= sp; i++) printf " " i ":" stack[i]; print ""
+ # for (i in sym) print "sym[" i "]=" sym[i]
+ pc++
+ }
+ print sym["_"] = stack[sp--]
+ print "sp: " sp ", len(stack): " length(stack)
+ # for (i = sp; j in stack; i++) delete stack[i]
+}