#!/usr/bin/env awk -f # an evaluator of code: scanner, parser and bytecode vm. BEGIN { # The prec array contains the precedence level of binary operators. prec["="] = prec["=="] = prec["!="] = prec["<"] = prec["<="] = prec[">"] = prec[">="] = 1 prec["+="] = prec["-="] = prec["*="] = prec["/="] = prec["%="] = prec["^="] = 1 prec["+"] = prec["-"] = 2 prec["*"] = prec["/"] = prec["%"] = 3 prec["^"] = 4 # The rigth array contains binary operators which are right associative. right["="] = right["+="] = right["-="] = right["*="] = right["/="] = right["%="] = right["^="] = 1 right["^"] = 1 start[")"] = "("; end["("] = ")" start["}"] = "{"; end["{"] = "}" start["]"] = "["; end["["] = "]" cl = 0 sp = pc = 0 # sp: stack pointer, pc: program counter. split("", code) split("", stack) split("", sym) printf "> " } { line = $0 while (parse()) run() printf "> " } function scan(peek) { ptok = tok sub(/^[ \t]+/, "", line) if (match(line, /^[0-9]([.0-9]*(e[+-]*)*[0-9])*/)) { # Literal number tok = "d" substr(line, 1, RLENGTH) } else if (match(line, /^([-=+|&]{2}|![=~]|[<>+-*\/^%]=)/)) { # Operator (2 chars) tok = substr(line, 1, RLENGTH) } else if (match(line, /^([][}{)(,+-=?:;]|[*\/รท\^])/)) { # Operator (1 char) tok = substr(line, 1, RLENGTH) } else if (match(line, /^[A-Za-z_][A-Za-z_0-9]*/)) { # Identifier tok = "v" substr(line, 1, RLENGTH) } else if (match(line, /^"(\\.|[^\\"])*"/)) { # Literal string tok = "s" substr(line, 2, RLENGTH-2) } else { # Bad token tok = "b" substr(line, 1, 1) } if (!peek) line = substr(line, RLENGTH+1) return RLENGTH } # TODO: unary operators, flow control, functions # Recursive descent parser function parse(want, got, ops, sl, i, b) { b = cl while (scan() > 0) { if (want && ! got) { if ((got = tok) != want && want == "(") { print "missing " want; return 0 } ops[++sl] = "done" } if (tok == ";") { break } else if (tok == "vif") { if (! parse("(")) break code[cl++] = "jzLif" ++lif if (! parse("{")) break if (scan(1) && tok == "velse") { code[cl++] = "jLif" lif+1 code[cl++] = "Lif" lif sym["Lif" lif] = cl if (! parse("{")) break sym["Lif" ++lif] = cl } else { code[cl++] = "Lif" lif sym["Lif" lif] = cl } } else if (tok in end) { ops[++sl] = tok } else if (tok in start) { while (sl && ops[sl] != start[tok]) code[cl++] = ops[sl--] if (ops[--sl] == "done") { sl--; break } } else if (tok in prec) { # print "tok:" tok " ptok:" ptok if (tok ~ /[+-*\/%^]?=/ && ptok ~ /^v/) sub(/^v/, "p", code[cl-1]) # Test precedence against ops, or associativity if same precedence. while (sl && (prec[tok] < prec[ops[sl]] || prec[tok] == prec[ops[sl]] && !(ops[sl] in right))) { code[cl++] = ops[sl--] } ops[++sl] = tok } else { code[cl++] = tok } } while (sl) { # if (ops[sl] in end) { print "missing " end[ops[sl]]; return 0 } if (want && ops[sl--] == "done") continue code[cl++] = ops[sl--] } return cl-b } # TODO: if, while, function # Feel like being a stack machine. function run( c, i, l, t) { cl = length(code) for (i = pc; i < cl; i++) printf("%s ", code[i]); print "" while (pc < cl) { c = code[pc] if (c == "+") { stack[--sp] = stack[sp-1] + stack[sp] } else if (c == "-") { stack[--sp] = stack[sp-1] - stack[sp] } else if (c == "*") { stack[--sp] = stack[sp-1] * stack[sp] } else if (c == "/") { stack[--sp] = stack[sp-1] / stack[sp] } else if (c == "^") { stack[--sp] = stack[sp-1] ^ stack[sp] } else if (c == "=") { stack[--sp] = sym[substr(stack[sp-1], 2)] = stack[sp] } else if (c == "+=") { stack[--sp] = sym[substr(stack[sp-1], 2)] += stack[sp] } else if (c ~ /^p/) { stack[++sp] = c } else if (c ~ /^v/) { stack[++sp] = sym[substr(c, 2)] } else if (c ~ /^[d]/) { stack[++sp] = 0 + substr(c, 2) } else if (c ~ /^jz/) { if (!stack[sp--]) { pc = sym[substr(c, 3)]; continue } } else if (c ~ /^L/) { # nop on label } else { print "run: invalid instruction " c return } printf "c" pc ":%s", c; for (i = 1; i <= sp; i++) printf " " i ":" stack[i]; print "" # for (i in sym) print "sym[" i "]=" sym[i] pc++ } print sym["_"] = stack[sp--] print "sp: " sp ", len(stack): " length(stack) # for (i = sp; j in stack; i++) delete stack[i] }