diff --git a/docs/src/data/spaces.csv b/docs/src/data/spaces.csv index b2838bcf1..5868e0960 100644 --- a/docs/src/data/spaces.csv +++ b/docs/src/data/spaces.csv @@ -1,4 +1,3 @@ -a b c,def,g h i -123,4567,890 -2468,1357,3579 -9987,3312,4543 +column 1,column 2,column 3 +apple,ball,cat +dale egg,fish,gale diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 666203d8b..1bbeb2e70 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -4078,6 +4078,72 @@ count color shape flag 2 yellow triangle 1 +## unspace + +
+mlr unspace --help ++
+Usage: mlr unspace [options]
+Replaces spaces in record keys and/or values with _. This is helpful for PPRINT output.
+Options:
+-f {x} Replace spaces with specified filler character.
+-k Unspace only keys, not keys and values.
+-v Unspace only values, not keys and values.
+-h|--help Show this message.
+
+
+The primary use-case is for PPRINT output, which is space-delimited. For example:
+
++cat data/spaces.csv ++
+column 1, column 2, column 3 +apple,ball,cat +dale egg,fish,gale ++ +
+mlr --icsv --opprint cat data/spaces.csv ++
+column 1 column 2 column 3 +apple ball cat +dale egg fish gale ++ +
+mlr --icsv --opprint cat data/spaces.csv ++
+column 1 column 2 column 3 +apple ball cat +dale egg fish gale ++ +
+mlr --icsv --opprint unspace data/spaces.csv ++
+column_1 _column_2 _column_3 +apple ball cat +dale_egg fish gale ++ +
+mlr --icsv --opprint unspace data/spaces.csv | mlr --ipprint --oxtab cat ++
+column_1 apple +_column_2 ball +_column_3 cat + +column_1 dale_egg +_column_2 fish +_column_3 gale ++ ## unsparsify
diff --git a/docs/src/reference-verbs.md.in b/docs/src/reference-verbs.md.in
index ff2776e41..40fbbe4f1 100644
--- a/docs/src/reference-verbs.md.in
+++ b/docs/src/reference-verbs.md.in
@@ -1229,6 +1229,34 @@ GENMD-RUN-COMMAND
mlr --opprint uniq -a -c data/repeats.dkvp
GENMD-EOF
+## unspace
+
+GENMD-RUN-COMMAND
+mlr unspace --help
+GENMD-EOF
+
+The primary use-case is for PPRINT output, which is space-delimited. For example:
+
+GENMD-RUN-COMMAND
+cat data/spaces.csv
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint cat data/spaces.csv
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint cat data/spaces.csv
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint unspace data/spaces.csv
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint unspace data/spaces.csv | mlr --ipprint --oxtab cat
+GENMD-EOF
+
## unsparsify
GENMD-RUN-COMMAND
diff --git a/internal/pkg/transformers/aaa_transformer_table.go b/internal/pkg/transformers/aaa_transformer_table.go
index fc2a4bc54..50f8d0a63 100644
--- a/internal/pkg/transformers/aaa_transformer_table.go
+++ b/internal/pkg/transformers/aaa_transformer_table.go
@@ -73,6 +73,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{
UTF8ToLatin1Setup,
UnflattenSetup,
UniqSetup,
+ UnspaceSetup,
UnsparsifySetup,
}
diff --git a/internal/pkg/transformers/unspace.go b/internal/pkg/transformers/unspace.go
new file mode 100644
index 000000000..20e2b3365
--- /dev/null
+++ b/internal/pkg/transformers/unspace.go
@@ -0,0 +1,190 @@
+package transformers
+
+import (
+ "container/list"
+ "fmt"
+ "os"
+ "strings"
+
+ "github.com/johnkerl/miller/internal/pkg/cli"
+ "github.com/johnkerl/miller/internal/pkg/mlrval"
+ "github.com/johnkerl/miller/internal/pkg/types"
+)
+
+// ----------------------------------------------------------------
+const verbNameUnspace = "unspace"
+
+var UnspaceSetup = TransformerSetup{
+ Verb: verbNameUnspace,
+ UsageFunc: transformerUnspaceUsage,
+ ParseCLIFunc: transformerUnspaceParseCLI,
+ IgnoresInput: false,
+}
+
+func transformerUnspaceUsage(
+ o *os.File,
+) {
+ fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameUnspace)
+ fmt.Fprintf(o, "Replaces spaces in record keys and/or values with _. This is helpful for PPRINT output.\n")
+ fmt.Fprintf(o, "Options:\n")
+ fmt.Fprintf(o, "-f {x} Replace spaces with specified filler character.\n")
+ fmt.Fprintf(o, "-k Unspace only keys, not keys and values.\n")
+ fmt.Fprintf(o, "-v Unspace only values, not keys and values.\n")
+ fmt.Fprintf(o, "-h|--help Show this message.\n")
+}
+
+func transformerUnspaceParseCLI(
+ pargi *int,
+ argc int,
+ args []string,
+ _ *cli.TOptions,
+ doConstruct bool, // false for first pass of CLI-parse, true for second pass
+) IRecordTransformer {
+
+ // Skip the verb name from the current spot in the mlr command line
+ argi := *pargi
+ verb := args[argi]
+ argi++
+
+ filler := "_"
+ which := "keys_and_values"
+
+ for argi < argc /* variable increment: 1 or 2 depending on flag */ {
+ opt := args[argi]
+ if !strings.HasPrefix(opt, "-") {
+ break // No more flag options to process
+ }
+ if args[argi] == "--" {
+ break // All transformers must do this so main-flags can follow verb-flags
+ }
+ argi++
+
+ if opt == "-h" || opt == "--help" {
+ transformerUnspaceUsage(os.Stdout)
+ os.Exit(0)
+
+ } else if opt == "-f" {
+ filler = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc)
+
+ } else if opt == "-k" {
+ which = "keys_only"
+
+ } else if opt == "-v" {
+ which = "values_only"
+
+ } else {
+ transformerUnspaceUsage(os.Stderr)
+ os.Exit(1)
+ }
+ }
+
+ *pargi = argi
+ if !doConstruct { // All transformers must do this for main command-line parsing
+ return nil
+ }
+
+ transformer, err := NewTransformerUnspace(filler, which)
+ if err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ os.Exit(1)
+ }
+
+ return transformer
+}
+
+// ----------------------------------------------------------------
+type TransformerUnspace struct {
+ filler string
+ recordTransformerFunc RecordTransformerFunc
+}
+
+func NewTransformerUnspace(
+ filler string,
+ which string,
+) (*TransformerUnspace, error) {
+ tr := &TransformerUnspace{filler: filler}
+ if which == "keys_only" {
+ tr.recordTransformerFunc = tr.transformKeysOnly
+ } else if which == "values_only" {
+ tr.recordTransformerFunc = tr.transformValuesOnly
+ } else {
+ tr.recordTransformerFunc = tr.transformKeysAndValues
+ }
+ return tr, nil
+}
+
+func (tr *TransformerUnspace) Transform(
+ inrecAndContext *types.RecordAndContext,
+ outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+ inputDownstreamDoneChannel <-chan bool,
+ outputDownstreamDoneChannel chan<- bool,
+) {
+ HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
+ if !inrecAndContext.EndOfStream {
+ tr.recordTransformerFunc(
+ inrecAndContext,
+ outputRecordsAndContexts,
+ inputDownstreamDoneChannel,
+ outputDownstreamDoneChannel,
+ )
+ } else { // end of record stream
+ outputRecordsAndContexts.PushBack(inrecAndContext)
+ }
+}
+
+func (tr *TransformerUnspace) transformKeysOnly(
+ inrecAndContext *types.RecordAndContext,
+ outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+ _ <-chan bool,
+ __ chan<- bool,
+) {
+ inrec := inrecAndContext.Record
+ newrec := mlrval.NewMlrmapAsRecord()
+ for pe := inrec.Head; pe != nil; pe = pe.Next {
+ newkey := tr.unspace(pe.Key)
+ // Reference not copy since this is ownership transfer of the value from the now-abandoned inrec
+ newrec.PutReference(newkey, pe.Value)
+ }
+ outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context))
+}
+
+func (tr *TransformerUnspace) transformValuesOnly(
+ inrecAndContext *types.RecordAndContext,
+ outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+ _ <-chan bool,
+ __ chan<- bool,
+) {
+ inrec := inrecAndContext.Record
+ for pe := inrec.Head; pe != nil; pe = pe.Next {
+ stringval, ok := pe.Value.GetStringValue()
+ if ok {
+ pe.Value = mlrval.FromString(tr.unspace(stringval))
+ }
+ }
+ outputRecordsAndContexts.PushBack(types.NewRecordAndContext(inrec, &inrecAndContext.Context))
+}
+
+func (tr *TransformerUnspace) transformKeysAndValues(
+ inrecAndContext *types.RecordAndContext,
+ outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+ _ <-chan bool,
+ __ chan<- bool,
+) {
+ inrec := inrecAndContext.Record
+ newrec := mlrval.NewMlrmapAsRecord()
+ for pe := inrec.Head; pe != nil; pe = pe.Next {
+ newkey := tr.unspace(pe.Key)
+ stringval, ok := pe.Value.GetStringValue()
+ if ok {
+ stringval = tr.unspace(stringval)
+ newrec.PutReference(newkey, mlrval.FromString(stringval))
+ } else {
+ newrec.PutReference(newkey, pe.Value)
+ }
+ }
+ outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context))
+}
+
+func (tr *TransformerUnspace) unspace(input string) string {
+ return strings.ReplaceAll(input, " ", tr.filler)
+}
diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout
index 45eb18823..1a276f7a4 100644
--- a/test/cases/cli-help/0001/expout
+++ b/test/cases/cli-help/0001/expout
@@ -1271,6 +1271,16 @@ Options:
With -n, produces only one record which is the unique-record count.
With neither -c nor -n, produces unique records.
+================================================================
+unspace
+Usage: mlr unspace [options]
+Replaces spaces in record keys and/or values with _. This is helpful for PPRINT output.
+Options:
+-f {x} Replace spaces with specified filler character.
+-k Unspace only keys, not keys and values.
+-v Unspace only values, not keys and values.
+-h|--help Show this message.
+
================================================================
unsparsify
Usage: mlr unsparsify [options]
diff --git a/test/cases/verb-unspace/0001/cmd b/test/cases/verb-unspace/0001/cmd
new file mode 100644
index 000000000..48be1de23
--- /dev/null
+++ b/test/cases/verb-unspace/0001/cmd
@@ -0,0 +1 @@
+mlr --c2p unspace test/input/spaces.csv
diff --git a/test/cases/verb-unspace/0001/experr b/test/cases/verb-unspace/0001/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-unspace/0001/expout b/test/cases/verb-unspace/0001/expout
new file mode 100644
index 000000000..a99916fc6
--- /dev/null
+++ b/test/cases/verb-unspace/0001/expout
@@ -0,0 +1,4 @@
+a_b c _d_e
+1 - 3
+4_5 6 _7__8
+9 10 11
diff --git a/test/cases/verb-unspace/0002/cmd b/test/cases/verb-unspace/0002/cmd
new file mode 100644
index 000000000..9d58b9d84
--- /dev/null
+++ b/test/cases/verb-unspace/0002/cmd
@@ -0,0 +1 @@
+mlr --c2p unspace -k test/input/spaces.csv
diff --git a/test/cases/verb-unspace/0002/experr b/test/cases/verb-unspace/0002/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-unspace/0002/expout b/test/cases/verb-unspace/0002/expout
new file mode 100644
index 000000000..a71910cff
--- /dev/null
+++ b/test/cases/verb-unspace/0002/expout
@@ -0,0 +1,4 @@
+a_b c _d_e
+1 - 3
+4 5 6 7 8
+9 10 11
diff --git a/test/cases/verb-unspace/0003/cmd b/test/cases/verb-unspace/0003/cmd
new file mode 100644
index 000000000..2eb62d0d4
--- /dev/null
+++ b/test/cases/verb-unspace/0003/cmd
@@ -0,0 +1 @@
+mlr --c2p unspace -v test/input/spaces.csv
diff --git a/test/cases/verb-unspace/0003/experr b/test/cases/verb-unspace/0003/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-unspace/0003/expout b/test/cases/verb-unspace/0003/expout
new file mode 100644
index 000000000..c47dcd4df
--- /dev/null
+++ b/test/cases/verb-unspace/0003/expout
@@ -0,0 +1,4 @@
+a b c d e
+1 - 3
+4_5 6 _7__8
+9 10 11
diff --git a/test/input/spaces.csv b/test/input/spaces.csv
new file mode 100644
index 000000000..3e52728f3
--- /dev/null
+++ b/test/input/spaces.csv
@@ -0,0 +1,4 @@
+a b,c, d e
+1,,3
+4 5,6, 7 8
+9,10,11