From b518bf0fe5992a3204bdc603c049be73abdd94bf Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 1 Jan 2023 16:44:06 -0500 Subject: [PATCH] mlr unspace verb (#1167) * mlr unspace verb * unit tests * unit tests * lint --- docs/src/data/spaces.csv | 7 +- docs/src/reference-verbs.md | 66 ++++++ docs/src/reference-verbs.md.in | 28 +++ .../pkg/transformers/aaa_transformer_table.go | 1 + internal/pkg/transformers/unspace.go | 190 ++++++++++++++++++ test/cases/cli-help/0001/expout | 10 + test/cases/verb-unspace/0001/cmd | 1 + test/cases/verb-unspace/0001/experr | 0 test/cases/verb-unspace/0001/expout | 4 + test/cases/verb-unspace/0002/cmd | 1 + test/cases/verb-unspace/0002/experr | 0 test/cases/verb-unspace/0002/expout | 4 + test/cases/verb-unspace/0003/cmd | 1 + test/cases/verb-unspace/0003/experr | 0 test/cases/verb-unspace/0003/expout | 4 + test/input/spaces.csv | 4 + 16 files changed, 317 insertions(+), 4 deletions(-) create mode 100644 internal/pkg/transformers/unspace.go create mode 100644 test/cases/verb-unspace/0001/cmd create mode 100644 test/cases/verb-unspace/0001/experr create mode 100644 test/cases/verb-unspace/0001/expout create mode 100644 test/cases/verb-unspace/0002/cmd create mode 100644 test/cases/verb-unspace/0002/experr create mode 100644 test/cases/verb-unspace/0002/expout create mode 100644 test/cases/verb-unspace/0003/cmd create mode 100644 test/cases/verb-unspace/0003/experr create mode 100644 test/cases/verb-unspace/0003/expout create mode 100644 test/input/spaces.csv diff --git a/docs/src/data/spaces.csv b/docs/src/data/spaces.csv index b2838bcf1..5868e0960 100644 --- a/docs/src/data/spaces.csv +++ b/docs/src/data/spaces.csv @@ -1,4 +1,3 @@ -a b c,def,g h i -123,4567,890 -2468,1357,3579 -9987,3312,4543 +column 1,column 2,column 3 +apple,ball,cat +dale egg,fish,gale diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 666203d8b..1bbeb2e70 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -4078,6 +4078,72 @@ count color shape flag 2 yellow triangle 1 +## unspace + +
+mlr unspace --help
+
+
+Usage: mlr unspace [options]
+Replaces spaces in record keys and/or values with _. This is helpful for PPRINT output.
+Options:
+-f {x}    Replace spaces with specified filler character.
+-k        Unspace only keys, not keys and values.
+-v        Unspace only values, not keys and values.
+-h|--help Show this message.
+
+ +The primary use-case is for PPRINT output, which is space-delimited. For example: + +
+cat data/spaces.csv
+
+
+column 1, column 2, column 3
+apple,ball,cat
+dale egg,fish,gale
+
+ +
+mlr --icsv --opprint cat data/spaces.csv
+
+
+column 1  column 2  column 3
+apple    ball      cat
+dale egg fish      gale
+
+ +
+mlr --icsv --opprint cat data/spaces.csv
+
+
+column 1  column 2  column 3
+apple    ball      cat
+dale egg fish      gale
+
+ +
+mlr --icsv --opprint unspace data/spaces.csv
+
+
+column_1 _column_2 _column_3
+apple    ball      cat
+dale_egg fish      gale
+
+ +
+mlr --icsv --opprint unspace data/spaces.csv | mlr --ipprint --oxtab cat
+
+
+column_1  apple
+_column_2 ball
+_column_3 cat
+
+column_1  dale_egg
+_column_2 fish
+_column_3 gale
+
+ ## unsparsify
diff --git a/docs/src/reference-verbs.md.in b/docs/src/reference-verbs.md.in
index ff2776e41..40fbbe4f1 100644
--- a/docs/src/reference-verbs.md.in
+++ b/docs/src/reference-verbs.md.in
@@ -1229,6 +1229,34 @@ GENMD-RUN-COMMAND
 mlr --opprint uniq -a -c data/repeats.dkvp
 GENMD-EOF
 
+## unspace
+
+GENMD-RUN-COMMAND
+mlr unspace --help
+GENMD-EOF
+
+The primary use-case is for PPRINT output, which is space-delimited. For example:
+
+GENMD-RUN-COMMAND
+cat data/spaces.csv
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint cat data/spaces.csv
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint cat data/spaces.csv
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint unspace data/spaces.csv
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint unspace data/spaces.csv | mlr --ipprint --oxtab cat
+GENMD-EOF
+
 ## unsparsify
 
 GENMD-RUN-COMMAND
diff --git a/internal/pkg/transformers/aaa_transformer_table.go b/internal/pkg/transformers/aaa_transformer_table.go
index fc2a4bc54..50f8d0a63 100644
--- a/internal/pkg/transformers/aaa_transformer_table.go
+++ b/internal/pkg/transformers/aaa_transformer_table.go
@@ -73,6 +73,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{
 	UTF8ToLatin1Setup,
 	UnflattenSetup,
 	UniqSetup,
+	UnspaceSetup,
 	UnsparsifySetup,
 }
 
diff --git a/internal/pkg/transformers/unspace.go b/internal/pkg/transformers/unspace.go
new file mode 100644
index 000000000..20e2b3365
--- /dev/null
+++ b/internal/pkg/transformers/unspace.go
@@ -0,0 +1,190 @@
+package transformers
+
+import (
+	"container/list"
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/johnkerl/miller/internal/pkg/cli"
+	"github.com/johnkerl/miller/internal/pkg/mlrval"
+	"github.com/johnkerl/miller/internal/pkg/types"
+)
+
+// ----------------------------------------------------------------
+const verbNameUnspace = "unspace"
+
+var UnspaceSetup = TransformerSetup{
+	Verb:         verbNameUnspace,
+	UsageFunc:    transformerUnspaceUsage,
+	ParseCLIFunc: transformerUnspaceParseCLI,
+	IgnoresInput: false,
+}
+
+func transformerUnspaceUsage(
+	o *os.File,
+) {
+	fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameUnspace)
+	fmt.Fprintf(o, "Replaces spaces in record keys and/or values with _. This is helpful for PPRINT output.\n")
+	fmt.Fprintf(o, "Options:\n")
+	fmt.Fprintf(o, "-f {x}    Replace spaces with specified filler character.\n")
+	fmt.Fprintf(o, "-k        Unspace only keys, not keys and values.\n")
+	fmt.Fprintf(o, "-v        Unspace only values, not keys and values.\n")
+	fmt.Fprintf(o, "-h|--help Show this message.\n")
+}
+
+func transformerUnspaceParseCLI(
+	pargi *int,
+	argc int,
+	args []string,
+	_ *cli.TOptions,
+	doConstruct bool, // false for first pass of CLI-parse, true for second pass
+) IRecordTransformer {
+
+	// Skip the verb name from the current spot in the mlr command line
+	argi := *pargi
+	verb := args[argi]
+	argi++
+
+	filler := "_"
+	which := "keys_and_values"
+
+	for argi < argc /* variable increment: 1 or 2 depending on flag */ {
+		opt := args[argi]
+		if !strings.HasPrefix(opt, "-") {
+			break // No more flag options to process
+		}
+		if args[argi] == "--" {
+			break // All transformers must do this so main-flags can follow verb-flags
+		}
+		argi++
+
+		if opt == "-h" || opt == "--help" {
+			transformerUnspaceUsage(os.Stdout)
+			os.Exit(0)
+
+		} else if opt == "-f" {
+			filler = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc)
+
+		} else if opt == "-k" {
+			which = "keys_only"
+
+		} else if opt == "-v" {
+			which = "values_only"
+
+		} else {
+			transformerUnspaceUsage(os.Stderr)
+			os.Exit(1)
+		}
+	}
+
+	*pargi = argi
+	if !doConstruct { // All transformers must do this for main command-line parsing
+		return nil
+	}
+
+	transformer, err := NewTransformerUnspace(filler, which)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+
+	return transformer
+}
+
+// ----------------------------------------------------------------
+type TransformerUnspace struct {
+	filler                string
+	recordTransformerFunc RecordTransformerFunc
+}
+
+func NewTransformerUnspace(
+	filler string,
+	which string,
+) (*TransformerUnspace, error) {
+	tr := &TransformerUnspace{filler: filler}
+	if which == "keys_only" {
+		tr.recordTransformerFunc = tr.transformKeysOnly
+	} else if which == "values_only" {
+		tr.recordTransformerFunc = tr.transformValuesOnly
+	} else {
+		tr.recordTransformerFunc = tr.transformKeysAndValues
+	}
+	return tr, nil
+}
+
+func (tr *TransformerUnspace) Transform(
+	inrecAndContext *types.RecordAndContext,
+	outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+	inputDownstreamDoneChannel <-chan bool,
+	outputDownstreamDoneChannel chan<- bool,
+) {
+	HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
+	if !inrecAndContext.EndOfStream {
+		tr.recordTransformerFunc(
+			inrecAndContext,
+			outputRecordsAndContexts,
+			inputDownstreamDoneChannel,
+			outputDownstreamDoneChannel,
+		)
+	} else { // end of record stream
+		outputRecordsAndContexts.PushBack(inrecAndContext)
+	}
+}
+
+func (tr *TransformerUnspace) transformKeysOnly(
+	inrecAndContext *types.RecordAndContext,
+	outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+	_ <-chan bool,
+	__ chan<- bool,
+) {
+	inrec := inrecAndContext.Record
+	newrec := mlrval.NewMlrmapAsRecord()
+	for pe := inrec.Head; pe != nil; pe = pe.Next {
+		newkey := tr.unspace(pe.Key)
+		// Reference not copy since this is ownership transfer of the value from the now-abandoned inrec
+		newrec.PutReference(newkey, pe.Value)
+	}
+	outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context))
+}
+
+func (tr *TransformerUnspace) transformValuesOnly(
+	inrecAndContext *types.RecordAndContext,
+	outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+	_ <-chan bool,
+	__ chan<- bool,
+) {
+	inrec := inrecAndContext.Record
+	for pe := inrec.Head; pe != nil; pe = pe.Next {
+		stringval, ok := pe.Value.GetStringValue()
+		if ok {
+			pe.Value = mlrval.FromString(tr.unspace(stringval))
+		}
+	}
+	outputRecordsAndContexts.PushBack(types.NewRecordAndContext(inrec, &inrecAndContext.Context))
+}
+
+func (tr *TransformerUnspace) transformKeysAndValues(
+	inrecAndContext *types.RecordAndContext,
+	outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+	_ <-chan bool,
+	__ chan<- bool,
+) {
+	inrec := inrecAndContext.Record
+	newrec := mlrval.NewMlrmapAsRecord()
+	for pe := inrec.Head; pe != nil; pe = pe.Next {
+		newkey := tr.unspace(pe.Key)
+		stringval, ok := pe.Value.GetStringValue()
+		if ok {
+			stringval = tr.unspace(stringval)
+			newrec.PutReference(newkey, mlrval.FromString(stringval))
+		} else {
+			newrec.PutReference(newkey, pe.Value)
+		}
+	}
+	outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context))
+}
+
+func (tr *TransformerUnspace) unspace(input string) string {
+	return strings.ReplaceAll(input, " ", tr.filler)
+}
diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout
index 45eb18823..1a276f7a4 100644
--- a/test/cases/cli-help/0001/expout
+++ b/test/cases/cli-help/0001/expout
@@ -1271,6 +1271,16 @@ Options:
               With -n, produces only one record which is the unique-record count.
               With neither -c nor -n, produces unique records.
 
+================================================================
+unspace
+Usage: mlr unspace [options]
+Replaces spaces in record keys and/or values with _. This is helpful for PPRINT output.
+Options:
+-f {x}    Replace spaces with specified filler character.
+-k        Unspace only keys, not keys and values.
+-v        Unspace only values, not keys and values.
+-h|--help Show this message.
+
 ================================================================
 unsparsify
 Usage: mlr unsparsify [options]
diff --git a/test/cases/verb-unspace/0001/cmd b/test/cases/verb-unspace/0001/cmd
new file mode 100644
index 000000000..48be1de23
--- /dev/null
+++ b/test/cases/verb-unspace/0001/cmd
@@ -0,0 +1 @@
+mlr --c2p unspace test/input/spaces.csv
diff --git a/test/cases/verb-unspace/0001/experr b/test/cases/verb-unspace/0001/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-unspace/0001/expout b/test/cases/verb-unspace/0001/expout
new file mode 100644
index 000000000..a99916fc6
--- /dev/null
+++ b/test/cases/verb-unspace/0001/expout
@@ -0,0 +1,4 @@
+a_b c  _d_e
+1   -  3
+4_5 6  _7__8
+9   10 11
diff --git a/test/cases/verb-unspace/0002/cmd b/test/cases/verb-unspace/0002/cmd
new file mode 100644
index 000000000..9d58b9d84
--- /dev/null
+++ b/test/cases/verb-unspace/0002/cmd
@@ -0,0 +1 @@
+mlr --c2p unspace -k test/input/spaces.csv
diff --git a/test/cases/verb-unspace/0002/experr b/test/cases/verb-unspace/0002/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-unspace/0002/expout b/test/cases/verb-unspace/0002/expout
new file mode 100644
index 000000000..a71910cff
--- /dev/null
+++ b/test/cases/verb-unspace/0002/expout
@@ -0,0 +1,4 @@
+a_b c  _d_e
+1   -  3
+4 5 6   7  8
+9   10 11
diff --git a/test/cases/verb-unspace/0003/cmd b/test/cases/verb-unspace/0003/cmd
new file mode 100644
index 000000000..2eb62d0d4
--- /dev/null
+++ b/test/cases/verb-unspace/0003/cmd
@@ -0,0 +1 @@
+mlr --c2p unspace -v test/input/spaces.csv
diff --git a/test/cases/verb-unspace/0003/experr b/test/cases/verb-unspace/0003/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-unspace/0003/expout b/test/cases/verb-unspace/0003/expout
new file mode 100644
index 000000000..c47dcd4df
--- /dev/null
+++ b/test/cases/verb-unspace/0003/expout
@@ -0,0 +1,4 @@
+a b c   d e
+1   -  3
+4_5 6  _7__8
+9   10 11
diff --git a/test/input/spaces.csv b/test/input/spaces.csv
new file mode 100644
index 000000000..3e52728f3
--- /dev/null
+++ b/test/input/spaces.csv
@@ -0,0 +1,4 @@
+a b,c, d e
+1,,3
+4 5,6, 7  8
+9,10,11