diff options
| author | Marc Vertes <mvertes@free.fr> | 2025-07-14 16:40:09 +0200 |
|---|---|---|
| committer | Marc Vertes <mvertes@free.fr> | 2025-07-14 16:40:09 +0200 |
| commit | da2a57d9209f36c5439c1051a97f2af4a5faa0dc (patch) | |
| tree | 1f94393168351d06d4cdde56ee96e2b5193d0034 | |
| parent | a1bd922ef46440a2bf99d7ee4e44749e10d9e66d (diff) | |
parse an scan work in stream mode
| -rwxr-xr-x | mp | 151 | ||||
| -rw-r--r-- | readme.md | 35 | ||||
| -rwxr-xr-x | tests | 23 |
3 files changed, 115 insertions, 94 deletions
@@ -10,35 +10,31 @@ BEGIN { " Prints cmd help or general help text.\n" \ " Example: help help\n" -SUBSEP="." + SUBSEP="." filename = ARGV[1] ARGV[1] = "/dev/stdin" tty = 1 - system("test -t 0") - if (filename) src = getfile(filename) + if (filename) parse(filename, v) prompt("mp-" version " meta parser. Try \"help\".\n") } { ERROR = "" if ($1 == "help") { - print help[$2] - } else if ($1 == "src") { - printf "%s", src + printf "%s", help[$2] } else if ($1 == "format") { - print format_json(v, "", $2) + print json_format(v, "", $2) } else if ($1 == "key") { key(v, $2) - } else if ($1 == "keys") { - for (k in v) print k - } else if ($1 == "parse") { + } else if ($1 == "dump") { + for (k in v) print k " '" v[k] "'" + } else if ($1 == "parse_string") { delete v sub(/^[[:space:]]+/, "") - $0 = substr($0, 7) - if ($0) src = $0 - parse_json(src, v) - } else if ($1 == "read") { - sub(/^[[:space:]]+/, "") - $0 = substr($0, 6) - src = getfile($0) + $0 = substr($0, length($1)+2) + parse("", v, "", $0) + } else if ($1 == "parse") { + delete v + parse($2, v) } else if (NF) { error("invalid command: " $1) } @@ -50,102 +46,95 @@ function error(s, n) { ERROR = s; return n } function prompt(s) { if (ERROR) print ERROR > "/dev/stderr" - if (tty) printf("%s> ", s) + if (tty) printf "%s> ", s > "/dev/stderr" } -function getfile(name, l, r, o) { - while (r = getline l < name) { - if (r == -1) return error("getfile " name ": getline error", o) - o = o l "\n" - } - return o -} - -function next_json_token(str, n) { - n = 0 +function scan(name, line, r) { TOKEN = TOKSTR = "" - if (match(str, /^[[:space:]]+/)) { - n = RLENGTH - str = substr(str, RLENGTH+1) + sub(/^[[:space:]]+/, "", line) + if (name) { + while (line == "") { + if ((r = getline line < name) == 0) return + if (r == -1) return error("error read " name) + sub(/^[[:space:]]+/, "", line) + } } - if (match(str, /^(null|true|false|[][}{,:])/)) { - TOKEN = substr(str, 1, RLENGTH) - } else if (match(str, /^[.0-9Ee+-]+/)) { + if (match(line, /^(null|true|false|[][}{,:])/)) { + TOKEN = substr(line, 1, RLENGTH) + } else if (match(line, /^[.0-9Ee+-]+/)) { + TOKSTR = substr(line, 1, RLENGTH) TOKEN = "number" - TOKSTR = substr(str, 1, RLENGTH) - } else if (match(str, /^"(\\.|[^\\"])*"/)) { + } else if (match(line, /^"(\\.|[^\\"])*"/)) { + TOKSTR = substr(line, 2, RLENGTH-2) TOKEN = "string" - TOKSTR = substr(str, 2, RLENGTH-2) } else { - TOKSTR = substr(str, 1, 1) - TOKEN = TOKSTR ? "invalid" : "" + TOKSTR = substr(line, 1, 1) + if (TOKSTR) TOKEN = "invalid" } - return n + RLENGTH + return substr(line, RLENGTH+1) } -function parse_json(str, arr, pk, i, n) { - n = next_json_token(str) +function parse(name, mem, key, line, i) { + line = scan(name, line) if (TOKEN == "[") { - arr[pk, "type"] = "array" + mem[key, "type"] = "array" for (i = 0; TOKEN != "]"; i++) { - if (i > 0 && TOKEN != ",") return error("not a ','", n) - n += parse_json(substr(str, n+1), arr, pk SUBSEP "value" SUBSEP i) + if (i > 0 && TOKEN != ",") return error("not a ','", line) + line = parse(name, mem, key SUBSEP "value" SUBSEP i, line) if (ERROR) { if (i == 0 && TOKEN == "]") ERROR = "" break } - n += next_json_token(substr(str, n+1)) + line = scan(name, line) } - arr[pk, "size"] = i } else if (TOKEN == "{") { - arr[pk, "type"] = "object" + mem[key, "type"] = "object" for (i = 0; TOKEN != "}"; i++) { - if (i > 0 && TOKEN != ",") return error("not a ','", n) - n += next_json_token(substr(str, n+1)) + if (i > 0 && TOKEN != ",") return error("not a ','", line) + line = scan(name, line) if (i == 0 && TOKEN == "}") break - if (TOKEN != "string") return error("not a string", n) - arr[pk, "key", i] = TOKSTR - n += next_json_token(substr(str, n+1)) - if (TOKEN != ":") return error("not a ':'", n) - n += parse_json(substr(str, n+1), arr, pk SUBSEP "value" SUBSEP i) - if (ERROR) return n - n += next_json_token(substr(str, n+1)) + if (TOKEN != "string") return error("not a string", line) + mem[key, "key", i] = TOKSTR + line = scan(name, line) + if (TOKEN != ":") return error("not a ':'", line) + line = parse(name, mem, key SUBSEP "value" SUBSEP i, line) + if (ERROR) return line + line = scan(name, line) } - arr[pk, "size"] = i } else if (TOKEN ~ /^(null|true|false|string|number)$/) { - arr[pk, "type"] = TOKEN - if (TOKSTR) arr[pk, "string"] = TOKSTR + mem[key, "type"] = TOKEN + if (TOKSTR) mem[key, "string"] = TOKSTR } else if (TOKEN) ERROR = "invalid token '" TOKEN "'" - return n + return line } -function format_json(arr, pk, il, i, l, t, s, bl, al, ps) { - if (il) { - bl = space(il * 2) - al = "\n" - ps = " " - } - t = arr[pk, "type"] +function json_format(mem, key, indent, i, k, t, s, pre, post, ksep) { + pre = indent ? space(indent * 2) : "" + post = indent ? "\n" : "" + ksep = indent ? ": " : ":" + t = mem[key, "type"] if (t == "null" || t == "true" || t == "false") return t - if (t == "number") return arr[pk, "string"] - if (t == "string") return "\"" arr[pk, "string"] "\"" + if (t == "number") return mem[key, "string"] + if (t == "string") return "\"" mem[key, "string"] "\"" if (t == "object") { - s = "{" al - l = arr[pk, "size"] - for (i = 0; i < l; i++) { - s = s bl "\"" arr[pk, "key", i] "\":" ps format_json(arr, pk SUBSEP "value" SUBSEP i, il ? il+1 : 0) - if (i < l-1) s = s "," al + s = "{" post + for (i = 0; ; i++) { + k = key SUBSEP "value" SUBSEP i + if (! (k SUBSEP "type" in mem)) break + s = s pre "\"" mem[key, "key", i] "\"" ksep json_format(mem, k, indent ? indent+1 : 0) "," post } - return s al substr(bl, 3) "}" + if (i) s = substr(s, 1, length(s) - length(post) - 1) + return s post substr(pre, 3) "}" } if (t == "array") { - s = "[" al - l = arr[pk, "size"] - for (i = 0; i < arr[pk, "size"]; i++) { - s = s bl format_json(arr, pk SUBSEP "value" SUBSEP i, il ? il+1 : 0) - if (i < l-1) s = s "," al + s = "[" post + for (i = 0; ; i++) { + k = key SUBSEP "value" SUBSEP i + if (! (k SUBSEP "type" in mem)) break + s = s pre json_format(mem, k, indent ? indent+1 : 0) "," post } - return s al substr(bl, 3) "]" + if (i) s = substr(s, 1, length(s) - length(post) - 1) + return s post substr(pre, 3) "]" } } @@ -1,3 +1,36 @@ # mp -parsing and repl in awk. +Parsing and repl in awk. + +## Representation of JSON in awk + +Rely on multi-dimensional sparse arrays. In fine this is just +an array of strings indexed by strings. + +The JSON types are: true, false, + +Key elements: +- T: true type +- F: false type +- N: null type +- s: string type +- n: number type +- a: array type +- o: object type +- k: key (object only) +- v: value (array and object) +- t: type (one of T, F, N, s, n, a, o) +- l: length (number of array values, or object key-value pairs). +- [0-9]+: index of key or value + +## I/O + +Use string encoded, line oriented protocols. Each command or response fits on a +single line. Newlines in string values must be escaped. Use an optional session +id at begin of command and response to allow to multiplex several peer sessions +on the same stream. + +- Command: + [number] command_name args... +- Response: + [number] data ... @@ -12,16 +12,15 @@ run() { [ "$FAILFAST" ] && set -e pass=0 fail=0 filter="$1" -trap 'echo "$pass passed, $fail failed"; exit $((fail))' EXIT - -run basic1 "parse\nformat" '' -run basic2 'parse null\nformat' 'null' -run basic3 'parse true\nformat' 'true' -run basic4 'parse false\nformat' 'false' -run num1 'parse 12\nformat' '12' -run str1 'parse "hello"\nformat' '"hello"' -run arr1 'parse []\nformat' '[]' -run arr2 'parse [null]\nformat' '[null]' -run arr3 'parse [true,false]\nformat' '[true,false]' -run obj1 'parse {}\nformat' '{}' +trap 'echo "$pass passed, $fail failed"; exit $((fail != 0))' EXIT +run basic1 'parse_string\nformat' '' +run basic2 'parse_string null\nformat' 'null' +run basic3 'parse_string true\nformat' 'true' +run basic4 'parse_string false\nformat' 'false' +run num1 'parse_string 12\nformat' '12' +run str1 'parse_string "hello"\nformat' '"hello"' +run arr1 'parse_string []\nformat' '[]' +run arr2 'parse_string [null]\nformat' '[null]' +run arr3 'parse_string [true,false]\nformat' '[true,false]' +run obj1 'parse_string {}\nformat' '{}' |
