From eb3e34cc63e56dbe9752a3abeddcc0639d7a542e Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 7 Sep 2020 00:14:07 -0400 Subject: [PATCH] make JSON printer nesting-aware --- go/src/miller/lib/mlrmap_json.go | 51 +++++++- go/src/miller/lib/mlrval_accessors.go | 3 + go/src/miller/lib/mlrval_json.go | 93 ++++++++++++-- go/todo.txt | 1 - go/u/try-cst | 15 +++ go/u/try-cst.out | 169 ++++++++++++++++++++++++-- 6 files changed, 309 insertions(+), 23 deletions(-) diff --git a/go/src/miller/lib/mlrmap_json.go b/go/src/miller/lib/mlrmap_json.go index b1ab15ac9..af43827dd 100644 --- a/go/src/miller/lib/mlrmap_json.go +++ b/go/src/miller/lib/mlrmap_json.go @@ -10,19 +10,52 @@ import ( // ---------------------------------------------------------------- func (this *Mlrmap) MarshalJSON() ([]byte, error) { var buffer bytes.Buffer + mapBytes, err := this.marshalJSONAux(1) + if err != nil { + return nil, err + } + buffer.Write(mapBytes) + buffer.WriteString("\n") + return buffer.Bytes(), nil +} - // TODO: how to handle indentation for the nested-object case. - buffer.WriteString("{\n") +// For a map we only write from opening curly brace to closing curly brace. In +// nested-map contexts, this particular map might be written with a comma +// immediately after its closing curly brace, or a newline, and only the caller +// can know that. +// +// element nesting depth is how deeply our element should be indented. Our +// closing curly brace is indented one less than that. For example, a +// root-level record '{"a":1,"b":2}' should be formatted as +// +// { +// "a": 1, <-- element nesting depth is 1 for root-level map +// "b": 2 <-- element nesting depth is 1 for root-level map +// } <-- closing curly brace nesting depth is 0 for root-level map + +func (this *Mlrmap) marshalJSONAux(elementNestingDepth int) ([]byte, error) { + var buffer bytes.Buffer + + buffer.WriteString("{") + // Write empty map as '{}'. For anything else, opening curly brace in a + // line of its own, one key-value pair per line, closing curly brace on a + // line of its own. + if this.Head != nil { + buffer.WriteString("\n") + } for pe := this.Head; pe != nil; pe = pe.Next { // Write the key which is necessarily string-valued in Miller, and in // JSON for that matter :) - buffer.WriteString(" \"") + for i := 0; i < elementNestingDepth; i++ { + buffer.WriteString(MLRVAL_JSON_INDENT_STRING) + } + buffer.WriteString("\"") buffer.WriteString(*pe.Key) buffer.WriteString("\": ") // Write the value which is a mlrval - valueBytes, err := pe.Value.MarshalJSON() + valueBytes, err := pe.Value.marshalJSONAux(elementNestingDepth + 1) if err != nil { return nil, err } @@ -36,6 +69,14 @@ func (this *Mlrmap) MarshalJSON() ([]byte, error) { } buffer.WriteString("\n") } - buffer.WriteString("}\n") + + // Write empty map as '{}'. + if this.Head != nil { + for i := 0; i < elementNestingDepth - 1; i++ { + buffer.WriteString(MLRVAL_JSON_INDENT_STRING) + } + } + buffer.WriteString("}") + return buffer.Bytes(), nil } diff --git a/go/src/miller/lib/mlrval_accessors.go b/go/src/miller/lib/mlrval_accessors.go index dfd1b95b2..1290c4d5f 100644 --- a/go/src/miller/lib/mlrval_accessors.go +++ b/go/src/miller/lib/mlrval_accessors.go @@ -49,6 +49,9 @@ func (this *Mlrval) IsArray() bool { func (this *Mlrval) IsMap() bool { return this.mvtype == MT_MAP } +func (this *Mlrval) IsArrayOrMap() bool { + return this.mvtype == MT_ARRAY || this.mvtype == MT_MAP +} func (this *Mlrval) GetArray() []Mlrval { if this.mvtype == MT_ARRAY { diff --git a/go/src/miller/lib/mlrval_json.go b/go/src/miller/lib/mlrval_json.go index 8069a2269..783a46070 100644 --- a/go/src/miller/lib/mlrval_json.go +++ b/go/src/miller/lib/mlrval_json.go @@ -16,6 +16,8 @@ import ( "strings" ) +const MLRVAL_JSON_INDENT_STRING string = " " + // ================================================================ // The JSON decoder (https://golang.org/pkg/encoding/json/#Decoder) is quite // nice. What we can have is: @@ -143,7 +145,7 @@ func MlrvalDecodeFromJSON(decoder *json.Decoder) (mlrval *Mlrval, eof bool, err } return nil, false, errors.New( - "Miller JSON reader: internal coding error: non-delimiter token unhandled", + "Miller JSON reader internal coding error: non-delimiter token unhandled", ) } else { @@ -247,6 +249,10 @@ func MlrvalDecodeFromJSON(decoder *json.Decoder) (mlrval *Mlrval, eof bool, err // ================================================================ func (this *Mlrval) MarshalJSON() ([]byte, error) { + return this.marshalJSONAux(1) +} + +func (this *Mlrval) marshalJSONAux(elementNestingDepth int) ([]byte, error) { switch this.mvtype { case MT_PENDING: return this.marshalJSONPending() @@ -270,10 +276,10 @@ func (this *Mlrval) MarshalJSON() ([]byte, error) { return this.marshalJSONBool() break case MT_ARRAY: - return this.marshalJSONArray() + return this.marshalJSONArray(elementNestingDepth) break case MT_MAP: - return this.marshalJSONMap() + return this.marshalJSONMap(elementNestingDepth) break case MT_DIM: // MT_DIM is one past the last valid type return nil, errors.New("internal coding error detected") @@ -288,7 +294,7 @@ func (this *Mlrval) MarshalJSON() ([]byte, error) { func (this *Mlrval) marshalJSONPending() ([]byte, error) { InternalCodingErrorIf(this.mvtype != MT_PENDING) return nil, errors.New( - "Miller: internal coding error: pending-values should not have been produced", + "Miller internal coding error: pending-values should not have been produced", ) } @@ -296,7 +302,7 @@ func (this *Mlrval) marshalJSONPending() ([]byte, error) { func (this *Mlrval) marshalJSONAbsent() ([]byte, error) { InternalCodingErrorIf(this.mvtype != MT_ABSENT) return nil, errors.New( - "Miller: internal coding error: absent-values should not have been assigned", + "Miller internal coding error: absent-values should not have been assigned", ) } @@ -335,14 +341,31 @@ func (this *Mlrval) marshalJSONBool() ([]byte, error) { } // ---------------------------------------------------------------- -// TODO: find out how to handle indentation in the nested-array/nested-map case ... -func (this *Mlrval) marshalJSONArray() ([]byte, error) { +func (this *Mlrval) marshalJSONArray(elementNestingDepth int) ([]byte, error) { InternalCodingErrorIf(this.mvtype != MT_ARRAY) + + // Put an array of all-terminal nodes all on one line, like '[1,2,3,4,5]. + allTerminal := true + for _, element := range this.arrayval { + if element.IsArrayOrMap() { + allTerminal = false + break + } + } + if allTerminal { + return this.marshalJSONArraySingleLine(elementNestingDepth) + } else { + return this.marshalJSONArrayMultipleLines(elementNestingDepth) + } +} + +func (this *Mlrval) marshalJSONArraySingleLine(elementNestingDepth int) ([]byte, error) { n := len(this.arrayval) var buffer bytes.Buffer buffer.WriteByte('[') + for i, element := range this.arrayval { - elementBytes, err := element.MarshalJSON() + elementBytes, err := element.marshalJSONAux(elementNestingDepth + 1) if err != nil { return nil, err } @@ -355,10 +378,60 @@ func (this *Mlrval) marshalJSONArray() ([]byte, error) { return buffer.Bytes(), nil } +// The element nesting depth is how deeply our element should be indented. Our +// closing bracket is indented one less than that. For example, a +// record '{"a":1,"b":[3,[4,5],6]"c":7}' should be formatted as +// +// { +// "a": 1, +// "b": [ <-- root-level map element nesting depth is 1 +// 3, <-- this array's element nesting depth is 2 +// [4, 5], +// 6 +// ], <-- this array's closing-bracket is 1, one less than its element nesting detph +// "c": 7 +// } + +func (this *Mlrval) marshalJSONArrayMultipleLines(elementNestingDepth int) ([]byte, error) { + n := len(this.arrayval) + var buffer bytes.Buffer + + // Write empty array as '[]' + buffer.WriteByte('[') + if n > 0 { + buffer.WriteByte('\n') + } + + for i, element := range this.arrayval { + elementBytes, err := element.marshalJSONAux(elementNestingDepth + 1) + if err != nil { + return nil, err + } + for i := 0; i < elementNestingDepth; i++ { + buffer.WriteString(MLRVAL_JSON_INDENT_STRING) + } + buffer.Write(elementBytes) + if i < n-1 { + buffer.WriteString(",") + } + buffer.WriteString("\n") + } + + // Write empty array as '[]' + if n > 0 { + for i := 0; i < elementNestingDepth-1; i++ { + buffer.WriteString(MLRVAL_JSON_INDENT_STRING) + } + } + + buffer.WriteByte(']') + return buffer.Bytes(), nil +} + // ---------------------------------------------------------------- -func (this *Mlrval) marshalJSONMap() ([]byte, error) { +func (this *Mlrval) marshalJSONMap(elementNestingDepth int) ([]byte, error) { InternalCodingErrorIf(this.mvtype != MT_MAP) - bytes, err := this.mapval.MarshalJSON() + bytes, err := this.mapval.marshalJSONAux(elementNestingDepth) if err != nil { return nil, err } diff --git a/go/todo.txt b/go/todo.txt index 168e417fb..d827a2273 100644 --- a/go/todo.txt +++ b/go/todo.txt @@ -3,7 +3,6 @@ TOP OF LIST: * json: o thorough UT for json mlrval-parser - o need nesting-aware json printer * doc re no jlistwrap on input if they want get streaming input o UT JSON-to-JSON cat-mapping should be identical diff --git a/go/u/try-cst b/go/u/try-cst index bef0b888e..cfc3adc7c 100755 --- a/go/u/try-cst +++ b/go/u/try-cst @@ -104,3 +104,18 @@ echo; run_mlr --from u/s.dkvp --idkvp --opprint put '$z = {"a":$a,"b":$b,"i":$i, echo; run_mlr --from u/s.dkvp --from u/t.dkvp --ojson put '$z=[1,2,[NR,[FILENAME,5],$x*$y]]' + + + +echo '{"x":1}' | run_mlr --json cat +echo '{"x":[1,2,3]}' | run_mlr --json cat +echo '{"x":[1,[2,3,4],5]}' | run_mlr --json cat +echo '{"x":[1,[2,[3,4,5],6],7]}' | run_mlr --json cat + +echo '{"x":{}}' | run_mlr --json cat +echo '{"x":{"a":1,"b":2,"c":3}}' | run_mlr --json cat +echo '{"x":{"a":1,"b":{"c":3,"d":4,"e":5},"f":6}}' | run_mlr --json cat + +echo '{"x":{},"y":1}' | run_mlr --json cat +echo '{"x":{"a":1,"b":2,"c":3},"y":4}' | run_mlr --json cat +echo '{"x":{"a":1,"b":{"c":3,"d":4,"e":5},"f":6},"y":7}' | run_mlr --json cat diff --git a/go/u/try-cst.out b/go/u/try-cst.out index b9d5a796d..aa417f434 100644 --- a/go/u/try-cst.out +++ b/go/u/try-cst.out @@ -818,7 +818,15 @@ mlr --from u/s.dkvp --from u/t.dkvp --ojson put $z=[1,2,[NR,[FILENAME,5],$x*$y]] "i": 1, "x": 0.3467901443380824, "y": 0.7268028627434533, - "z": [1, 2, [1, ["u/s.dkvp", 5], 0.2520480696761337]] + "z": [ + 1, + 2, + [ + 1, + ["u/s.dkvp", 5], + 0.2520480696761337 + ] + ] } { "a": "eks", @@ -826,7 +834,15 @@ mlr --from u/s.dkvp --from u/t.dkvp --ojson put $z=[1,2,[NR,[FILENAME,5],$x*$y]] "i": 2, "x": 0.7586799647899636, "y": 0.5221511083334797, - "z": [1, 2, [2, ["u/s.dkvp", 5], 0.3961455844854848]] + "z": [ + 1, + 2, + [ + 2, + ["u/s.dkvp", 5], + 0.3961455844854848 + ] + ] } { "a": "wye", @@ -834,7 +850,15 @@ mlr --from u/s.dkvp --from u/t.dkvp --ojson put $z=[1,2,[NR,[FILENAME,5],$x*$y]] "i": 3, "x": 0.20460330576630303, "y": 0.33831852551664776, - "z": [1, 2, [3, ["u/s.dkvp", 5], 0.06922108872268748]] + "z": [ + 1, + 2, + [ + 3, + ["u/s.dkvp", 5], + 0.06922108872268748 + ] + ] } { "a": "eks", @@ -842,7 +866,15 @@ mlr --from u/s.dkvp --from u/t.dkvp --ojson put $z=[1,2,[NR,[FILENAME,5],$x*$y]] "i": 4, "x": 0.38139939387114097, "y": 0.13418874328430463, - "z": [1, 2, [4, ["u/s.dkvp", 5], 0.05117950535296393]] + "z": [ + 1, + 2, + [ + 4, + ["u/s.dkvp", 5], + 0.05117950535296393 + ] + ] } { "a": "wye", @@ -850,7 +882,15 @@ mlr --from u/s.dkvp --from u/t.dkvp --ojson put $z=[1,2,[NR,[FILENAME,5],$x*$y]] "i": 5, "x": 0.5732889198020006, "y": 0.8636244699032729, - "z": [1, 2, [5, ["u/t.dkvp", 5], 0.4951063394654227]] + "z": [ + 1, + 2, + [ + 5, + ["u/t.dkvp", 5], + 0.4951063394654227 + ] + ] } { "a": "zee", @@ -858,7 +898,15 @@ mlr --from u/s.dkvp --from u/t.dkvp --ojson put $z=[1,2,[NR,[FILENAME,5],$x*$y]] "i": 6, "x": 0.5271261600918548, "y": 0.49322128674835697, - "z": [1, 2, [6, ["u/t.dkvp", 5], 0.259989842959225]] + "z": [ + 1, + 2, + [ + 6, + ["u/t.dkvp", 5], + 0.259989842959225 + ] + ] } { "a": "eks", @@ -866,5 +914,112 @@ mlr --from u/s.dkvp --from u/t.dkvp --ojson put $z=[1,2,[NR,[FILENAME,5],$x*$y]] "i": 7, "x": 0.6117840605678454, "y": 0.1878849191181694, - "z": [1, 2, [7, ["u/t.dkvp", 5], 0.11494499873757488]] + "z": [ + 1, + 2, + [ + 7, + ["u/t.dkvp", 5], + 0.11494499873757488 + ] + ] +} + +---------------------------------------------------------------- +mlr --json cat +{ + "x": 1 +} + +---------------------------------------------------------------- +mlr --json cat +{ + "x": [1, 2, 3] +} + +---------------------------------------------------------------- +mlr --json cat +{ + "x": [ + 1, + [2, 3, 4], + 5 + ] +} + +---------------------------------------------------------------- +mlr --json cat +{ + "x": [ + 1, + [ + 2, + [3, 4, 5], + 6 + ], + 7 + ] +} + +---------------------------------------------------------------- +mlr --json cat +{ + "x": {} +} + +---------------------------------------------------------------- +mlr --json cat +{ + "x": { + "a": 1, + "b": 2, + "c": 3 + } +} + +---------------------------------------------------------------- +mlr --json cat +{ + "x": { + "a": 1, + "b": { + "c": 3, + "d": 4, + "e": 5 + }, + "f": 6 + } +} + +---------------------------------------------------------------- +mlr --json cat +{ + "x": {}, + "y": 1 +} + +---------------------------------------------------------------- +mlr --json cat +{ + "x": { + "a": 1, + "b": 2, + "c": 3 + }, + "y": 4 +} + +---------------------------------------------------------------- +mlr --json cat +{ + "x": { + "a": 1, + "b": { + "c": 3, + "d": 4, + "e": 5 + }, + "f": 6 + }, + "y": 7 }