From ee86189f12bc5e53921f732c5afb76d25cd3dabc Mon Sep 17 00:00:00 2001
From: John Kerl <kerl.john.r@gmail.com>
Date: Thu, 11 Feb 2021 00:49:02 -0500
Subject: [PATCH] flatten/unflatten code-dedupe

---
 go/reg-test/cases/case-repl.sh        |  21 ++++
 go/reg-test/expected/case-repl.sh.out |  96 +++++++++++++++
 go/src/miller/auxents/repl/entry.go   |   6 +
 go/src/miller/auxents/repl/verbs.go   |  75 ++----------
 go/src/miller/cli/mlrcli_parse.go     | 166 ++++++++++++++------------
 go/todo.txt                           |  19 +--
 6 files changed, 227 insertions(+), 156 deletions(-)

diff --git a/go/reg-test/cases/case-repl.sh b/go/reg-test/cases/case-repl.sh
index 57c26953c..ef9fcd920 100644
--- a/go/reg-test/cases/case-repl.sh
+++ b/go/reg-test/cases/case-repl.sh
@@ -66,3 +66,24 @@ run_mlr repl <<EOF
 :context
 \$*
 EOF
+
+# ----------------------------------------------------------------
+run_mlr repl --j2x $indir/flatten-input-2.json <<EOF
+:rw
+:rw
+EOF
+
+run_mlr repl --x2j $indir/unflatten-input.xtab <<EOF
+:rw
+:rw
+EOF
+
+run_mlr repl --xtab $indir/unflatten-input.xtab <<EOF
+:rw
+:rw
+EOF
+
+run_mlr repl --json $indir/flatten-input-2.json <<EOF
+:rw
+:rw
+EOF
diff --git a/go/reg-test/expected/case-repl.sh.out b/go/reg-test/expected/case-repl.sh.out
index 8a8ad53d9..e4d026add 100644
--- a/go/reg-test/expected/case-repl.sh.out
+++ b/go/reg-test/expected/case-repl.sh.out
@@ -78,3 +78,99 @@ FILENAME="./reg-test/input/medium.dkvp",FILENUM=1,NR=40,FNR=40
   "y": 0.2884018711352886
 }
 
+mlr repl --j2x ./reg-test/input/flatten-input-2.json
+hostname                    localhost
+pid                         12345
+req.id                      6789
+req.method                  GET
+req.path                    api/check
+req.host                    foo.bar
+req.headers.host            bar.baz
+req.headers.user-agent      browser
+res.status_code             200
+res.header.content-type     text
+res.header.content-encoding plain
+empty1                      {}
+empty2                      []
+wrapper.empty3              {}
+wrapper.emtpy4              []
+End of record stream
+
+mlr repl --x2j ./reg-test/input/unflatten-input.xtab
+{
+  "hostname": "localhost",
+  "pid": 12345,
+  "req": {
+    "id": 6789,
+    "method": "GET",
+    "path": "api/check",
+    "host": "foo.bar",
+    "headers": {
+      "host": "bar.baz",
+      "user-agent": "browser"
+    }
+  },
+  "res": {
+    "status_code": 200,
+    "header": {
+      "content-type": "text",
+      "content-encoding": "plain"
+    }
+  },
+  "empty1": {},
+  "empty2": [],
+  "wrapper": {
+    "empty3": {},
+    "emtpy4": []
+  }
+}
+End of record stream
+
+mlr repl --xtab ./reg-test/input/unflatten-input.xtab
+hostname                    localhost
+pid                         12345
+req.id                      6789
+req.method                  GET
+req.path                    api/check
+req.host                    foo.bar
+req.headers.host            bar.baz
+req.headers.user-agent      browser
+res.status_code             200
+res.header.content-type     text
+res.header.content-encoding plain
+empty1                      {}
+empty2                      []
+wrapper.empty3              {}
+wrapper.emtpy4              []
+End of record stream
+
+mlr repl --json ./reg-test/input/flatten-input-2.json
+{
+  "hostname": "localhost",
+  "pid": 12345,
+  "req": {
+    "id": 6789,
+    "method": "GET",
+    "path": "api/check",
+    "host": "foo.bar",
+    "headers": {
+      "host": "bar.baz",
+      "user-agent": "browser"
+    }
+  },
+  "res": {
+    "status_code": 200,
+    "header": {
+      "content-type": "text",
+      "content-encoding": "plain"
+    }
+  },
+  "empty1": {},
+  "empty2": [],
+  "wrapper": {
+    "empty3": {},
+    "emtpy4": []
+  }
+}
+End of record stream
+
diff --git a/go/src/miller/auxents/repl/entry.go b/go/src/miller/auxents/repl/entry.go
index c39de1656..fb3547b87 100644
--- a/go/src/miller/auxents/repl/entry.go
+++ b/go/src/miller/auxents/repl/entry.go
@@ -27,6 +27,7 @@ import (
 	"path"
 	"strings"
 
+	"miller/cli"
 	"miller/cliutil"
 )
 
@@ -106,6 +107,11 @@ func ReplMain(args []string) int {
 		}
 	}
 
+	// --auto-flatten is on by default. But if input and output formats are both JSON,
+	// then we don't need to actually do anything. See also mlrcli_parse.go.
+	options.WriterOptions.AutoFlatten = cli.DecideFinalFlatten(&options)
+	options.WriterOptions.AutoUnflatten = cli.DecideFinalUnflatten(&options)
+
 	repl, err := NewRepl(
 		exeName,
 		replName,
diff --git a/go/src/miller/auxents/repl/verbs.go b/go/src/miller/auxents/repl/verbs.go
index a3a86576e..bd182d467 100644
--- a/go/src/miller/auxents/repl/verbs.go
+++ b/go/src/miller/auxents/repl/verbs.go
@@ -540,74 +540,17 @@ func handleWrite(this *Repl, args []string) bool {
 	return true
 }
 
-// ================================================================
-// Takes care of flattening nested JSON data structures to multiple fields for
-// JSON -> non-JSON.
-//
-// TODO: centralize a function/data between here & mlrcli_parse.go & refer to it.
-// TODO: centralize the narrative comments as well.
-//
-// ----------------------------------------------------------------
-// PROBLEM TO BE SOLVED:
-//
-// JSON has nested structures and CSV et al. do not. For example:
-// {
-//   "req" : {
-//     "method": "GET",
-//     "path":   "api/check",
-//   }
-// }
-//
-// For CSV we flatten this down to
-//
-// {
-//   "req.method": "GET",
-//   "req.path":   "api/check"
-// }
-//
-// ----------------------------------------------------------------
-// APPROACH:
-//
-// Use the Principle of Least Surprise (POLS).
-//
-// * If input is JSON and output is JSON:
-//   o Records can be nested from record-read
-//   o They remain that way through the Miller record-processing stream
-//   o They are nested on record-write
-//   o No action needs to be taken
-// * If input is JSON and output is non-JSON:
-//   o Records can be nested from record-read
-//   o They remain that way through the Miller record-processing stream
-//   o On record-write, nested structures will be converted to string (carriage
-//     returns and all) using json_stringify. People *might* want this but
-//     (using POLS) we will (by default) AUTO-FLATTEN for them. There is a
-//     --no-auto-unflatten CLI flag for those who want it.
-// * If input is non-JSON and output is non-JSON:
-//   o Leave records as-is.
-//   o Example, if there is a "req.method" field, people should be able to do
-//     'mlr sort -f req.method' with no surprises. (Again, POLS.)
-//   o People can insert an unflatten verb into their verb chain if they really
-//     want unflatten for non-JSON files.
-// * If input is non-JSON and output is JSON:
-//   o Default is to auto-unflatten at output.
-//   o There is a --no-auto-unflatten for those who want it.
-// ================================================================
-
 func writeRecord(this *Repl, outrec *types.Mlrmap) {
-	ropt := &this.options.ReaderOptions
-	wopt := &this.options.WriterOptions
-	ifmt := ropt.InputFileFormat
-	ofmt := wopt.OutputFileFormat
-
-	if wopt.AutoFlatten {
-		if ifmt == "json" && ofmt != "json" {
-			outrec.Flatten(wopt.OFLATSEP)
+	if outrec != nil {
+		// E.g. '{"req": {"method": "GET", "path": "/api/check"}}' becomes
+		// req.method=GET,req.path=/api/check.
+		if this.options.WriterOptions.AutoFlatten {
+			outrec.Flatten(this.options.WriterOptions.OFLATSEP)
 		}
-	}
-
-	if wopt.AutoUnflatten {
-		if ifmt != "json" && ofmt == "json" {
-			outrec.Unflatten(wopt.OFLATSEP)
+		// E.g.  req.method=GET,req.path=/api/check becomes
+		// '{"req": {"method": "GET", "path": "/api/check"}}'
+		if this.options.WriterOptions.AutoUnflatten {
+			outrec.Unflatten(this.options.WriterOptions.OFLATSEP)
 		}
 	}
 	this.recordWriter.Write(outrec, this.outputStream)
diff --git a/go/src/miller/cli/mlrcli_parse.go b/go/src/miller/cli/mlrcli_parse.go
index 1c3ae24db..895529deb 100644
--- a/go/src/miller/cli/mlrcli_parse.go
+++ b/go/src/miller/cli/mlrcli_parse.go
@@ -110,82 +110,22 @@ func ParseCommandLine(args []string) (
 		options.NoInput = true // e.g. then-chain begins with seqgen
 	}
 
-	// ================================================================
-	// TODO: centralize a function/data between here & repl/verbs.go & refer to it.
-	// TODO: centralize the narrative comments as well.
-	//
-	// ----------------------------------------------------------------
-	// PROBLEM TO BE SOLVED:
-	//
-	// JSON has nested structures and CSV et al. do not. For example:
-	// {
-	//   "req" : {
-	//     "method": "GET",
-	//     "path":   "api/check",
-	//   }
-	// }
-	//
-	// For CSV we flatten this down to
-	//
-	// {
-	//   "req.method": "GET",
-	//   "req.path":   "api/check"
-	// }
-	//
-	// ----------------------------------------------------------------
-	// APPROACH:
-	//
-	// Use the Principle of Least Surprise (POLS).
-	//
-	// * If input is JSON and output is JSON:
-	//   o Records can be nested from record-read
-	//   o They remain that way through the Miller record-processing stream
-	//   o They are nested on record-write
-	//   o No action needs to be taken
-	//
-	// * If input is JSON and output is non-JSON:
-	//   o Records can be nested from record-read
-	//   o They remain that way through the Miller record-processing stream
-	//   o On record-write, nested structures will be converted to string (carriage
-	//     returns and all) using json_stringify. People *might* want this but
-	//     (using POLS) we will (by default) AUTO-FLATTEN for them. There is a
-	//     --no-auto-unflatten CLI flag for those who want it.
-	//
-	// * If input is non-JSON and output is non-JSON:
-	//   o If there is a "req.method" field, people should be able to do
-	//     'mlr sort -f req.method' with no surprises. (Again, POLS.) Therefore
-	//     no auto-unflatten on input.  People can insert an unflatten verb
-	//     into their verb chain if they really want unflatten for non-JSON
-	//     files.
-	//   o The DSL can make nested data, so AUTO-FLATTEN at output.
-	//
-	// * If input is non-JSON and output is JSON:
-	//   o Default is to auto-unflatten at output.
-	//   o There is a --no-auto-unflatten for those who want it.
-	// ================================================================
-
-	ifmt := options.ReaderOptions.InputFileFormat
-	ofmt := options.WriterOptions.OutputFileFormat
-	oflatsep := options.WriterOptions.OFLATSEP
-
-	if options.WriterOptions.AutoFlatten {
-		if ofmt != "json" {
-			transformer, err := transformers.NewTransformerFlatten(oflatsep, nil)
-			lib.InternalCodingErrorIf(err != nil)
-			lib.InternalCodingErrorIf(transformer == nil)
-			recordTransformers = append(recordTransformers, transformer)
-		}
+	if DecideFinalFlatten(&options) {
+		// E.g. '{"req": {"method": "GET", "path": "/api/check"}}' becomes
+		// req.method=GET,req.path=/api/check.
+		transformer, err := transformers.NewTransformerFlatten(options.WriterOptions.OFLATSEP, nil)
+		lib.InternalCodingErrorIf(err != nil)
+		lib.InternalCodingErrorIf(transformer == nil)
+		recordTransformers = append(recordTransformers, transformer)
 	}
 
-	if options.WriterOptions.AutoUnflatten {
-		if ifmt != "json" {
-			if ofmt == "json" {
-				transformer, err := transformers.NewTransformerUnflatten(oflatsep, nil)
-				lib.InternalCodingErrorIf(err != nil)
-				lib.InternalCodingErrorIf(transformer == nil)
-				recordTransformers = append(recordTransformers, transformer)
-			}
-		}
+	if DecideFinalUnflatten(&options) {
+		// E.g.  req.method=GET,req.path=/api/check becomes
+		// '{"req": {"method": "GET", "path": "/api/check"}}'
+		transformer, err := transformers.NewTransformerUnflatten(options.WriterOptions.OFLATSEP, nil)
+		lib.InternalCodingErrorIf(err != nil)
+		lib.InternalCodingErrorIf(transformer == nil)
+		recordTransformers = append(recordTransformers, transformer)
 	}
 
 	// There may already be one or more because of --from on the command line,
@@ -211,6 +151,84 @@ func ParseCommandLine(args []string) (
 	return options, recordTransformers, nil
 }
 
+// ================================================================
+// Decide whether to insert a flatten or unflatten verb at the end of the
+// chain.  See also repl/verbs.go which handles the same issue in the REPL.
+//
+// ----------------------------------------------------------------
+// PROBLEM TO BE SOLVED:
+//
+// JSON has nested structures and CSV et al. do not. For example:
+// {
+//   "req" : {
+//     "method": "GET",
+//     "path":   "api/check",
+//   }
+// }
+//
+// For CSV we flatten this down to
+//
+// {
+//   "req.method": "GET",
+//   "req.path":   "api/check"
+// }
+//
+// ----------------------------------------------------------------
+// APPROACH:
+//
+// Use the Principle of Least Surprise (POLS).
+//
+// * If input is JSON and output is JSON:
+//   o Records can be nested from record-read
+//   o They remain that way through the Miller record-processing stream
+//   o They are nested on record-write
+//   o No action needs to be taken
+//
+// * If input is JSON and output is non-JSON:
+//   o Records can be nested from record-read
+//   o They remain that way through the Miller record-processing stream
+//   o On record-write, nested structures will be converted to string (carriage
+//     returns and all) using json_stringify. People *might* want this but
+//     (using POLS) we will (by default) AUTO-FLATTEN for them. There is a
+//     --no-auto-unflatten CLI flag for those who want it.
+//
+// * If input is non-JSON and output is non-JSON:
+//   o If there is a "req.method" field, people should be able to do
+//     'mlr sort -f req.method' with no surprises. (Again, POLS.) Therefore
+//     no auto-unflatten on input.  People can insert an unflatten verb
+//     into their verb chain if they really want unflatten for non-JSON
+//     files.
+//   o The DSL can make nested data, so AUTO-FLATTEN at output.
+//
+// * If input is non-JSON and output is JSON:
+//   o Default is to auto-unflatten at output.
+//   o There is a --no-auto-unflatten for those who want it.
+// ================================================================
+
+func DecideFinalFlatten(options *cliutil.TOptions) bool {
+	ofmt := options.WriterOptions.OutputFileFormat
+	if options.WriterOptions.AutoFlatten {
+		if ofmt != "json" {
+			return true
+		}
+	}
+	return false
+}
+
+func DecideFinalUnflatten(options *cliutil.TOptions) bool {
+	ifmt := options.ReaderOptions.InputFileFormat
+	ofmt := options.WriterOptions.OutputFileFormat
+
+	if options.WriterOptions.AutoUnflatten {
+		if ifmt != "json" {
+			if ofmt == "json" {
+				return true
+			}
+		}
+	}
+	return false
+}
+
 // ----------------------------------------------------------------
 // Returns a list of transformers, from the starting point in args given by *pargi.
 // Bumps *pargi to point to remaining post-transformer-setup args, i.e. filenames.
diff --git a/go/todo.txt b/go/todo.txt
index f7f9aba3f..0063f56b1 100644
--- a/go/todo.txt
+++ b/go/todo.txt
@@ -1,4 +1,5 @@
----------------------------------------------------------------- TOP OF LIST:
+----------------------------------------------------------------
+TOP OF LIST:
 
 ! issues !
 ! rmd ex1 even simpler -- commarect
@@ -29,25 +30,10 @@ mlrtut links:
 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-* revisit flatten/flatten:
-  ! mlr --csv --from x.csv put -q '@a["b"]=1;@a["c"]=2; emitp @a'`
-    mlr --opprint put '$f=asserting_map($*)' ./reg-test/input/nullvals.dkvp
-    mlr --opprint put '$f=asserting_map($*)' then flatten ./reg-test/input/nullvals.dkvp
-
-  - centralize POLS comment/method from $repl/entry.go
-  - put into the go/README.md
-  - schedule for doc6
-  - refactor/rename args in mlrmain
-  - avoid if flatten/unflatten verbs are anywhere in the chain?
-
 * repl fu:
 
-  * :rw -- doc & UT
-
   * :reopen verb
 
-  * auto-unflatten / auto-flatten UT
-
   o tilde-expand for load/open ...
     - if '~' is in the string, run it though sh -c echo ...
 
@@ -510,6 +496,7 @@ i https://en.wikipedia.org/wiki/Delimiter#Delimiter_collision
   o the former is not necessarily in sync with the output record stream
 * dev-note on why `int` not `int64` -- processor-arch & those who most need it get it
 * document tee -p
+* doc auto-flatten/auto-unflatten -- incl narrative from mlrcli_parse.go
 * doc6: default flatsep is now "." not ":" in keeping with JSON culture
 ? allow [[...]] / [[[...]]] at assignment LHS