From ee86189f12bc5e53921f732c5afb76d25cd3dabc Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 11 Feb 2021 00:49:02 -0500 Subject: [PATCH] flatten/unflatten code-dedupe --- go/reg-test/cases/case-repl.sh | 21 ++++ go/reg-test/expected/case-repl.sh.out | 96 +++++++++++++++ go/src/miller/auxents/repl/entry.go | 6 + go/src/miller/auxents/repl/verbs.go | 75 ++---------- go/src/miller/cli/mlrcli_parse.go | 166 ++++++++++++++------------ go/todo.txt | 19 +-- 6 files changed, 227 insertions(+), 156 deletions(-) diff --git a/go/reg-test/cases/case-repl.sh b/go/reg-test/cases/case-repl.sh index 57c26953c..ef9fcd920 100644 --- a/go/reg-test/cases/case-repl.sh +++ b/go/reg-test/cases/case-repl.sh @@ -66,3 +66,24 @@ run_mlr repl < non-JSON. -// -// TODO: centralize a function/data between here & mlrcli_parse.go & refer to it. -// TODO: centralize the narrative comments as well. -// -// ---------------------------------------------------------------- -// PROBLEM TO BE SOLVED: -// -// JSON has nested structures and CSV et al. do not. For example: -// { -// "req" : { -// "method": "GET", -// "path": "api/check", -// } -// } -// -// For CSV we flatten this down to -// -// { -// "req.method": "GET", -// "req.path": "api/check" -// } -// -// ---------------------------------------------------------------- -// APPROACH: -// -// Use the Principle of Least Surprise (POLS). -// -// * If input is JSON and output is JSON: -// o Records can be nested from record-read -// o They remain that way through the Miller record-processing stream -// o They are nested on record-write -// o No action needs to be taken -// * If input is JSON and output is non-JSON: -// o Records can be nested from record-read -// o They remain that way through the Miller record-processing stream -// o On record-write, nested structures will be converted to string (carriage -// returns and all) using json_stringify. People *might* want this but -// (using POLS) we will (by default) AUTO-FLATTEN for them. There is a -// --no-auto-unflatten CLI flag for those who want it. -// * If input is non-JSON and output is non-JSON: -// o Leave records as-is. -// o Example, if there is a "req.method" field, people should be able to do -// 'mlr sort -f req.method' with no surprises. (Again, POLS.) -// o People can insert an unflatten verb into their verb chain if they really -// want unflatten for non-JSON files. -// * If input is non-JSON and output is JSON: -// o Default is to auto-unflatten at output. -// o There is a --no-auto-unflatten for those who want it. -// ================================================================ - func writeRecord(this *Repl, outrec *types.Mlrmap) { - ropt := &this.options.ReaderOptions - wopt := &this.options.WriterOptions - ifmt := ropt.InputFileFormat - ofmt := wopt.OutputFileFormat - - if wopt.AutoFlatten { - if ifmt == "json" && ofmt != "json" { - outrec.Flatten(wopt.OFLATSEP) + if outrec != nil { + // E.g. '{"req": {"method": "GET", "path": "/api/check"}}' becomes + // req.method=GET,req.path=/api/check. + if this.options.WriterOptions.AutoFlatten { + outrec.Flatten(this.options.WriterOptions.OFLATSEP) } - } - - if wopt.AutoUnflatten { - if ifmt != "json" && ofmt == "json" { - outrec.Unflatten(wopt.OFLATSEP) + // E.g. req.method=GET,req.path=/api/check becomes + // '{"req": {"method": "GET", "path": "/api/check"}}' + if this.options.WriterOptions.AutoUnflatten { + outrec.Unflatten(this.options.WriterOptions.OFLATSEP) } } this.recordWriter.Write(outrec, this.outputStream) diff --git a/go/src/miller/cli/mlrcli_parse.go b/go/src/miller/cli/mlrcli_parse.go index 1c3ae24db..895529deb 100644 --- a/go/src/miller/cli/mlrcli_parse.go +++ b/go/src/miller/cli/mlrcli_parse.go @@ -110,82 +110,22 @@ func ParseCommandLine(args []string) ( options.NoInput = true // e.g. then-chain begins with seqgen } - // ================================================================ - // TODO: centralize a function/data between here & repl/verbs.go & refer to it. - // TODO: centralize the narrative comments as well. - // - // ---------------------------------------------------------------- - // PROBLEM TO BE SOLVED: - // - // JSON has nested structures and CSV et al. do not. For example: - // { - // "req" : { - // "method": "GET", - // "path": "api/check", - // } - // } - // - // For CSV we flatten this down to - // - // { - // "req.method": "GET", - // "req.path": "api/check" - // } - // - // ---------------------------------------------------------------- - // APPROACH: - // - // Use the Principle of Least Surprise (POLS). - // - // * If input is JSON and output is JSON: - // o Records can be nested from record-read - // o They remain that way through the Miller record-processing stream - // o They are nested on record-write - // o No action needs to be taken - // - // * If input is JSON and output is non-JSON: - // o Records can be nested from record-read - // o They remain that way through the Miller record-processing stream - // o On record-write, nested structures will be converted to string (carriage - // returns and all) using json_stringify. People *might* want this but - // (using POLS) we will (by default) AUTO-FLATTEN for them. There is a - // --no-auto-unflatten CLI flag for those who want it. - // - // * If input is non-JSON and output is non-JSON: - // o If there is a "req.method" field, people should be able to do - // 'mlr sort -f req.method' with no surprises. (Again, POLS.) Therefore - // no auto-unflatten on input. People can insert an unflatten verb - // into their verb chain if they really want unflatten for non-JSON - // files. - // o The DSL can make nested data, so AUTO-FLATTEN at output. - // - // * If input is non-JSON and output is JSON: - // o Default is to auto-unflatten at output. - // o There is a --no-auto-unflatten for those who want it. - // ================================================================ - - ifmt := options.ReaderOptions.InputFileFormat - ofmt := options.WriterOptions.OutputFileFormat - oflatsep := options.WriterOptions.OFLATSEP - - if options.WriterOptions.AutoFlatten { - if ofmt != "json" { - transformer, err := transformers.NewTransformerFlatten(oflatsep, nil) - lib.InternalCodingErrorIf(err != nil) - lib.InternalCodingErrorIf(transformer == nil) - recordTransformers = append(recordTransformers, transformer) - } + if DecideFinalFlatten(&options) { + // E.g. '{"req": {"method": "GET", "path": "/api/check"}}' becomes + // req.method=GET,req.path=/api/check. + transformer, err := transformers.NewTransformerFlatten(options.WriterOptions.OFLATSEP, nil) + lib.InternalCodingErrorIf(err != nil) + lib.InternalCodingErrorIf(transformer == nil) + recordTransformers = append(recordTransformers, transformer) } - if options.WriterOptions.AutoUnflatten { - if ifmt != "json" { - if ofmt == "json" { - transformer, err := transformers.NewTransformerUnflatten(oflatsep, nil) - lib.InternalCodingErrorIf(err != nil) - lib.InternalCodingErrorIf(transformer == nil) - recordTransformers = append(recordTransformers, transformer) - } - } + if DecideFinalUnflatten(&options) { + // E.g. req.method=GET,req.path=/api/check becomes + // '{"req": {"method": "GET", "path": "/api/check"}}' + transformer, err := transformers.NewTransformerUnflatten(options.WriterOptions.OFLATSEP, nil) + lib.InternalCodingErrorIf(err != nil) + lib.InternalCodingErrorIf(transformer == nil) + recordTransformers = append(recordTransformers, transformer) } // There may already be one or more because of --from on the command line, @@ -211,6 +151,84 @@ func ParseCommandLine(args []string) ( return options, recordTransformers, nil } +// ================================================================ +// Decide whether to insert a flatten or unflatten verb at the end of the +// chain. See also repl/verbs.go which handles the same issue in the REPL. +// +// ---------------------------------------------------------------- +// PROBLEM TO BE SOLVED: +// +// JSON has nested structures and CSV et al. do not. For example: +// { +// "req" : { +// "method": "GET", +// "path": "api/check", +// } +// } +// +// For CSV we flatten this down to +// +// { +// "req.method": "GET", +// "req.path": "api/check" +// } +// +// ---------------------------------------------------------------- +// APPROACH: +// +// Use the Principle of Least Surprise (POLS). +// +// * If input is JSON and output is JSON: +// o Records can be nested from record-read +// o They remain that way through the Miller record-processing stream +// o They are nested on record-write +// o No action needs to be taken +// +// * If input is JSON and output is non-JSON: +// o Records can be nested from record-read +// o They remain that way through the Miller record-processing stream +// o On record-write, nested structures will be converted to string (carriage +// returns and all) using json_stringify. People *might* want this but +// (using POLS) we will (by default) AUTO-FLATTEN for them. There is a +// --no-auto-unflatten CLI flag for those who want it. +// +// * If input is non-JSON and output is non-JSON: +// o If there is a "req.method" field, people should be able to do +// 'mlr sort -f req.method' with no surprises. (Again, POLS.) Therefore +// no auto-unflatten on input. People can insert an unflatten verb +// into their verb chain if they really want unflatten for non-JSON +// files. +// o The DSL can make nested data, so AUTO-FLATTEN at output. +// +// * If input is non-JSON and output is JSON: +// o Default is to auto-unflatten at output. +// o There is a --no-auto-unflatten for those who want it. +// ================================================================ + +func DecideFinalFlatten(options *cliutil.TOptions) bool { + ofmt := options.WriterOptions.OutputFileFormat + if options.WriterOptions.AutoFlatten { + if ofmt != "json" { + return true + } + } + return false +} + +func DecideFinalUnflatten(options *cliutil.TOptions) bool { + ifmt := options.ReaderOptions.InputFileFormat + ofmt := options.WriterOptions.OutputFileFormat + + if options.WriterOptions.AutoUnflatten { + if ifmt != "json" { + if ofmt == "json" { + return true + } + } + } + return false +} + // ---------------------------------------------------------------- // Returns a list of transformers, from the starting point in args given by *pargi. // Bumps *pargi to point to remaining post-transformer-setup args, i.e. filenames. diff --git a/go/todo.txt b/go/todo.txt index f7f9aba3f..0063f56b1 100644 --- a/go/todo.txt +++ b/go/todo.txt @@ -1,4 +1,5 @@ ----------------------------------------------------------------- TOP OF LIST: +---------------------------------------------------------------- +TOP OF LIST: ! issues ! ! rmd ex1 even simpler -- commarect @@ -29,25 +30,10 @@ mlrtut links: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -* revisit flatten/flatten: - ! mlr --csv --from x.csv put -q '@a["b"]=1;@a["c"]=2; emitp @a'` - mlr --opprint put '$f=asserting_map($*)' ./reg-test/input/nullvals.dkvp - mlr --opprint put '$f=asserting_map($*)' then flatten ./reg-test/input/nullvals.dkvp - - - centralize POLS comment/method from $repl/entry.go - - put into the go/README.md - - schedule for doc6 - - refactor/rename args in mlrmain - - avoid if flatten/unflatten verbs are anywhere in the chain? - * repl fu: - * :rw -- doc & UT - * :reopen verb - * auto-unflatten / auto-flatten UT - o tilde-expand for load/open ... - if '~' is in the string, run it though sh -c echo ... @@ -510,6 +496,7 @@ i https://en.wikipedia.org/wiki/Delimiter#Delimiter_collision o the former is not necessarily in sync with the output record stream * dev-note on why `int` not `int64` -- processor-arch & those who most need it get it * document tee -p +* doc auto-flatten/auto-unflatten -- incl narrative from mlrcli_parse.go * doc6: default flatsep is now "." not ":" in keeping with JSON culture ? allow [[...]] / [[[...]]] at assignment LHS