#!/usr/bin/env awk -f # an evaluator of code: scanner, parser and bytecode vm. BEGIN { # The prec array contains the precedence level of binary operators. prec["="] = prec["=="] = prec["!="] = prec["<"] = prec["<="] = prec[">"] = prec[">="] = 1 prec["+="] = prec["-="] = prec["*="] = prec["/="] = prec["%="] = prec["^="] = 1 prec["+"] = prec["-"] = 2 prec["*"] = prec["/"] = prec["%"] = 3 prec["^"] = 4 # The rigth array contains binary operators which are right associative. right["="] = right["+="] = right["-="] = right["*="] = right["/="] = right["%="] = right["^="] = 1 right["^"] = 1 start[")"] = "(" start["}"] = "{" start["]"] = "[" sp = pc = 0 # sp: stack pointer, pc: program counter. split("", code) split("", stack) split("", sym) printf "> " } { line = $0 parse() run() printf "> " } function scan() { ptok = tok sub(/^[ \t]+/, "", line) if (match(line, /^[0-9]([.0-9]*(e[+-]*)*[0-9])*/)) { # Literal number tok = "d" substr(line, 1, RLENGTH) } else if (match(line, /^([-=+|&]{2}|![=~]|[<>+-*\/^%]=)/)) { # Operator (2 chars) tok = substr(line, 1, RLENGTH) } else if (match(line, /^([][}{)(,+-=?:]|[*\/รท\^])/)) { # Operator (1 char) tok = substr(line, 1, RLENGTH) } else if (match(line, /^[A-Za-z_][A-Za-z_0-9]*/)) { # Identifier tok = "v" substr(line, 1, RLENGTH) } else if (match(line, /^"(\\.|[^\\"])*"/)) { # Literal string tok = "s" substr(line, 2, RLENGTH-2) } else { # Bad token tok = "b" substr(line, 1, 1) } line = substr(line, RLENGTH+1) return RLENGTH } # TODO: unary operators, flow contror function parse( ops, sl, i, b) { b = i = length(code) while (scan() > 0) { if (tok ~ /[({[]/) { ops[++sl] = tok } else if (tok in start) { while (sl && ops[sl] != start[tok]) code[i++] = ops[sl--] sl-- } else if (tok in prec) { # print "tok:" tok " ptok:" ptok if (tok ~ /[+-*\/%^]?=/ && ptok ~ /^v/) sub(/^v/, "p", code[i-1]) # mutate var to pointer for assign # Test precedence against ops, or associativity if same precedence. while (sl && (prec[tok] < prec[ops[sl]] || prec[tok] == prec[ops[sl]] && !(ops[sl] in right))) { code[i++] = ops[sl--] } ops[++sl] = tok } else { code[i++] = tok } } while (sl) code[i++] = ops[sl--] for (j = b; j < i; j++) printf("%s ", code[j]); print "" } # TODO: if, while, function # Feel like being a stack machine. function run( c, i, l, t) { cl = length(code) while (pc < cl) { c = code[pc] if (c == "+") { stack[--sp] = stack[sp-1] + stack[sp] } else if (c == "-") { stack[--sp] = stack[sp-1] - stack[sp] } else if (c == "*") { stack[--sp] = stack[sp-1] * stack[sp] } else if (c == "/") { stack[--sp] = stack[sp-1] / stack[sp] } else if (c == "^") { stack[--sp] = stack[sp-1] ^ stack[sp] } else if (c == "=") { stack[--sp] = sym[substr(stack[sp-1], 2)] = stack[sp] } else if (c == "+=") { stack[--sp] = sym[substr(stack[sp-1], 2)] += stack[sp] } else if (c ~ /^p/) { stack[++sp] = c } else if (c ~ /^v/) { stack[++sp] = sym[substr(c, 2)] } else if (c ~ /^[d]/) { stack[++sp] = 0 + substr(c, 2) } else { print "run: invalid instruction " c return } # printf "c" pc ":%s", c; for (i = 1; i <= sp; i++) printf " " i ":" stack[i]; print "" # for (i in sym) print "sym[" i "]=" sym[i] pc++ } print sym["_"] = stack[sp--] print "sp: " sp ", len(stack): " length(stack) # for (i = sp; j in stack; i++) delete stack[i] }