summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc Vertes <mvertes@free.fr>2025-07-14 16:40:09 +0200
committerMarc Vertes <mvertes@free.fr>2025-07-14 16:40:09 +0200
commitda2a57d9209f36c5439c1051a97f2af4a5faa0dc (patch)
tree1f94393168351d06d4cdde56ee96e2b5193d0034
parenta1bd922ef46440a2bf99d7ee4e44749e10d9e66d (diff)
parse an scan work in stream mode
-rwxr-xr-xmp151
-rw-r--r--readme.md35
-rwxr-xr-xtests23
3 files changed, 115 insertions, 94 deletions
diff --git a/mp b/mp
index 816994c..bd07162 100755
--- a/mp
+++ b/mp
@@ -10,35 +10,31 @@ BEGIN {
" Prints cmd help or general help text.\n" \
" Example: help help\n"
-SUBSEP="."
+ SUBSEP="."
filename = ARGV[1]
ARGV[1] = "/dev/stdin"
tty = 1 - system("test -t 0")
- if (filename) src = getfile(filename)
+ if (filename) parse(filename, v)
prompt("mp-" version " meta parser. Try \"help\".\n")
}
{
ERROR = ""
if ($1 == "help") {
- print help[$2]
- } else if ($1 == "src") {
- printf "%s", src
+ printf "%s", help[$2]
} else if ($1 == "format") {
- print format_json(v, "", $2)
+ print json_format(v, "", $2)
} else if ($1 == "key") {
key(v, $2)
- } else if ($1 == "keys") {
- for (k in v) print k
- } else if ($1 == "parse") {
+ } else if ($1 == "dump") {
+ for (k in v) print k " '" v[k] "'"
+ } else if ($1 == "parse_string") {
delete v
sub(/^[[:space:]]+/, "")
- $0 = substr($0, 7)
- if ($0) src = $0
- parse_json(src, v)
- } else if ($1 == "read") {
- sub(/^[[:space:]]+/, "")
- $0 = substr($0, 6)
- src = getfile($0)
+ $0 = substr($0, length($1)+2)
+ parse("", v, "", $0)
+ } else if ($1 == "parse") {
+ delete v
+ parse($2, v)
} else if (NF) {
error("invalid command: " $1)
}
@@ -50,102 +46,95 @@ function error(s, n) { ERROR = s; return n }
function prompt(s) {
if (ERROR) print ERROR > "/dev/stderr"
- if (tty) printf("%s> ", s)
+ if (tty) printf "%s> ", s > "/dev/stderr"
}
-function getfile(name, l, r, o) {
- while (r = getline l < name) {
- if (r == -1) return error("getfile " name ": getline error", o)
- o = o l "\n"
- }
- return o
-}
-
-function next_json_token(str, n) {
- n = 0
+function scan(name, line, r) {
TOKEN = TOKSTR = ""
- if (match(str, /^[[:space:]]+/)) {
- n = RLENGTH
- str = substr(str, RLENGTH+1)
+ sub(/^[[:space:]]+/, "", line)
+ if (name) {
+ while (line == "") {
+ if ((r = getline line < name) == 0) return
+ if (r == -1) return error("error read " name)
+ sub(/^[[:space:]]+/, "", line)
+ }
}
- if (match(str, /^(null|true|false|[][}{,:])/)) {
- TOKEN = substr(str, 1, RLENGTH)
- } else if (match(str, /^[.0-9Ee+-]+/)) {
+ if (match(line, /^(null|true|false|[][}{,:])/)) {
+ TOKEN = substr(line, 1, RLENGTH)
+ } else if (match(line, /^[.0-9Ee+-]+/)) {
+ TOKSTR = substr(line, 1, RLENGTH)
TOKEN = "number"
- TOKSTR = substr(str, 1, RLENGTH)
- } else if (match(str, /^"(\\.|[^\\"])*"/)) {
+ } else if (match(line, /^"(\\.|[^\\"])*"/)) {
+ TOKSTR = substr(line, 2, RLENGTH-2)
TOKEN = "string"
- TOKSTR = substr(str, 2, RLENGTH-2)
} else {
- TOKSTR = substr(str, 1, 1)
- TOKEN = TOKSTR ? "invalid" : ""
+ TOKSTR = substr(line, 1, 1)
+ if (TOKSTR) TOKEN = "invalid"
}
- return n + RLENGTH
+ return substr(line, RLENGTH+1)
}
-function parse_json(str, arr, pk, i, n) {
- n = next_json_token(str)
+function parse(name, mem, key, line, i) {
+ line = scan(name, line)
if (TOKEN == "[") {
- arr[pk, "type"] = "array"
+ mem[key, "type"] = "array"
for (i = 0; TOKEN != "]"; i++) {
- if (i > 0 && TOKEN != ",") return error("not a ','", n)
- n += parse_json(substr(str, n+1), arr, pk SUBSEP "value" SUBSEP i)
+ if (i > 0 && TOKEN != ",") return error("not a ','", line)
+ line = parse(name, mem, key SUBSEP "value" SUBSEP i, line)
if (ERROR) {
if (i == 0 && TOKEN == "]") ERROR = ""
break
}
- n += next_json_token(substr(str, n+1))
+ line = scan(name, line)
}
- arr[pk, "size"] = i
} else if (TOKEN == "{") {
- arr[pk, "type"] = "object"
+ mem[key, "type"] = "object"
for (i = 0; TOKEN != "}"; i++) {
- if (i > 0 && TOKEN != ",") return error("not a ','", n)
- n += next_json_token(substr(str, n+1))
+ if (i > 0 && TOKEN != ",") return error("not a ','", line)
+ line = scan(name, line)
if (i == 0 && TOKEN == "}") break
- if (TOKEN != "string") return error("not a string", n)
- arr[pk, "key", i] = TOKSTR
- n += next_json_token(substr(str, n+1))
- if (TOKEN != ":") return error("not a ':'", n)
- n += parse_json(substr(str, n+1), arr, pk SUBSEP "value" SUBSEP i)
- if (ERROR) return n
- n += next_json_token(substr(str, n+1))
+ if (TOKEN != "string") return error("not a string", line)
+ mem[key, "key", i] = TOKSTR
+ line = scan(name, line)
+ if (TOKEN != ":") return error("not a ':'", line)
+ line = parse(name, mem, key SUBSEP "value" SUBSEP i, line)
+ if (ERROR) return line
+ line = scan(name, line)
}
- arr[pk, "size"] = i
} else if (TOKEN ~ /^(null|true|false|string|number)$/) {
- arr[pk, "type"] = TOKEN
- if (TOKSTR) arr[pk, "string"] = TOKSTR
+ mem[key, "type"] = TOKEN
+ if (TOKSTR) mem[key, "string"] = TOKSTR
} else if (TOKEN) ERROR = "invalid token '" TOKEN "'"
- return n
+ return line
}
-function format_json(arr, pk, il, i, l, t, s, bl, al, ps) {
- if (il) {
- bl = space(il * 2)
- al = "\n"
- ps = " "
- }
- t = arr[pk, "type"]
+function json_format(mem, key, indent, i, k, t, s, pre, post, ksep) {
+ pre = indent ? space(indent * 2) : ""
+ post = indent ? "\n" : ""
+ ksep = indent ? ": " : ":"
+ t = mem[key, "type"]
if (t == "null" || t == "true" || t == "false") return t
- if (t == "number") return arr[pk, "string"]
- if (t == "string") return "\"" arr[pk, "string"] "\""
+ if (t == "number") return mem[key, "string"]
+ if (t == "string") return "\"" mem[key, "string"] "\""
if (t == "object") {
- s = "{" al
- l = arr[pk, "size"]
- for (i = 0; i < l; i++) {
- s = s bl "\"" arr[pk, "key", i] "\":" ps format_json(arr, pk SUBSEP "value" SUBSEP i, il ? il+1 : 0)
- if (i < l-1) s = s "," al
+ s = "{" post
+ for (i = 0; ; i++) {
+ k = key SUBSEP "value" SUBSEP i
+ if (! (k SUBSEP "type" in mem)) break
+ s = s pre "\"" mem[key, "key", i] "\"" ksep json_format(mem, k, indent ? indent+1 : 0) "," post
}
- return s al substr(bl, 3) "}"
+ if (i) s = substr(s, 1, length(s) - length(post) - 1)
+ return s post substr(pre, 3) "}"
}
if (t == "array") {
- s = "[" al
- l = arr[pk, "size"]
- for (i = 0; i < arr[pk, "size"]; i++) {
- s = s bl format_json(arr, pk SUBSEP "value" SUBSEP i, il ? il+1 : 0)
- if (i < l-1) s = s "," al
+ s = "[" post
+ for (i = 0; ; i++) {
+ k = key SUBSEP "value" SUBSEP i
+ if (! (k SUBSEP "type" in mem)) break
+ s = s pre json_format(mem, k, indent ? indent+1 : 0) "," post
}
- return s al substr(bl, 3) "]"
+ if (i) s = substr(s, 1, length(s) - length(post) - 1)
+ return s post substr(pre, 3) "]"
}
}
diff --git a/readme.md b/readme.md
index 83650cf..69268e4 100644
--- a/readme.md
+++ b/readme.md
@@ -1,3 +1,36 @@
# mp
-parsing and repl in awk.
+Parsing and repl in awk.
+
+## Representation of JSON in awk
+
+Rely on multi-dimensional sparse arrays. In fine this is just
+an array of strings indexed by strings.
+
+The JSON types are: true, false,
+
+Key elements:
+- T: true type
+- F: false type
+- N: null type
+- s: string type
+- n: number type
+- a: array type
+- o: object type
+- k: key (object only)
+- v: value (array and object)
+- t: type (one of T, F, N, s, n, a, o)
+- l: length (number of array values, or object key-value pairs).
+- [0-9]+: index of key or value
+
+## I/O
+
+Use string encoded, line oriented protocols. Each command or response fits on a
+single line. Newlines in string values must be escaped. Use an optional session
+id at begin of command and response to allow to multiplex several peer sessions
+on the same stream.
+
+- Command:
+ [number] command_name args...
+- Response:
+ [number] data ...
diff --git a/tests b/tests
index a13cfdb..8e5d20b 100755
--- a/tests
+++ b/tests
@@ -12,16 +12,15 @@ run() {
[ "$FAILFAST" ] && set -e
pass=0 fail=0 filter="$1"
-trap 'echo "$pass passed, $fail failed"; exit $((fail))' EXIT
-
-run basic1 "parse\nformat" ''
-run basic2 'parse null\nformat' 'null'
-run basic3 'parse true\nformat' 'true'
-run basic4 'parse false\nformat' 'false'
-run num1 'parse 12\nformat' '12'
-run str1 'parse "hello"\nformat' '"hello"'
-run arr1 'parse []\nformat' '[]'
-run arr2 'parse [null]\nformat' '[null]'
-run arr3 'parse [true,false]\nformat' '[true,false]'
-run obj1 'parse {}\nformat' '{}'
+trap 'echo "$pass passed, $fail failed"; exit $((fail != 0))' EXIT
+run basic1 'parse_string\nformat' ''
+run basic2 'parse_string null\nformat' 'null'
+run basic3 'parse_string true\nformat' 'true'
+run basic4 'parse_string false\nformat' 'false'
+run num1 'parse_string 12\nformat' '12'
+run str1 'parse_string "hello"\nformat' '"hello"'
+run arr1 'parse_string []\nformat' '[]'
+run arr2 'parse_string [null]\nformat' '[null]'
+run arr3 'parse_string [true,false]\nformat' '[true,false]'
+run obj1 'parse_string {}\nformat' '{}'