mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
mlr unspace verb (#1167)
* mlr unspace verb * unit tests * unit tests * lint
This commit is contained in:
parent
b89371d6f6
commit
b518bf0fe5
16 changed files with 317 additions and 4 deletions
|
|
@ -1,4 +1,3 @@
|
|||
a b c,def,g h i
|
||||
123,4567,890
|
||||
2468,1357,3579
|
||||
9987,3312,4543
|
||||
column 1,column 2,column 3
|
||||
apple,ball,cat
|
||||
dale egg,fish,gale
|
||||
|
|
|
|||
|
|
|
@ -4078,6 +4078,72 @@ count color shape flag
|
|||
2 yellow triangle 1
|
||||
</pre>
|
||||
|
||||
## unspace
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
<b>mlr unspace --help</b>
|
||||
</pre>
|
||||
<pre class="pre-non-highlight-in-pair">
|
||||
Usage: mlr unspace [options]
|
||||
Replaces spaces in record keys and/or values with _. This is helpful for PPRINT output.
|
||||
Options:
|
||||
-f {x} Replace spaces with specified filler character.
|
||||
-k Unspace only keys, not keys and values.
|
||||
-v Unspace only values, not keys and values.
|
||||
-h|--help Show this message.
|
||||
</pre>
|
||||
|
||||
The primary use-case is for PPRINT output, which is space-delimited. For example:
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
<b>cat data/spaces.csv</b>
|
||||
</pre>
|
||||
<pre class="pre-non-highlight-in-pair">
|
||||
column 1, column 2, column 3
|
||||
apple,ball,cat
|
||||
dale egg,fish,gale
|
||||
</pre>
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
<b>mlr --icsv --opprint cat data/spaces.csv</b>
|
||||
</pre>
|
||||
<pre class="pre-non-highlight-in-pair">
|
||||
column 1 column 2 column 3
|
||||
apple ball cat
|
||||
dale egg fish gale
|
||||
</pre>
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
<b>mlr --icsv --opprint cat data/spaces.csv</b>
|
||||
</pre>
|
||||
<pre class="pre-non-highlight-in-pair">
|
||||
column 1 column 2 column 3
|
||||
apple ball cat
|
||||
dale egg fish gale
|
||||
</pre>
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
<b>mlr --icsv --opprint unspace data/spaces.csv</b>
|
||||
</pre>
|
||||
<pre class="pre-non-highlight-in-pair">
|
||||
column_1 _column_2 _column_3
|
||||
apple ball cat
|
||||
dale_egg fish gale
|
||||
</pre>
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
<b>mlr --icsv --opprint unspace data/spaces.csv | mlr --ipprint --oxtab cat</b>
|
||||
</pre>
|
||||
<pre class="pre-non-highlight-in-pair">
|
||||
column_1 apple
|
||||
_column_2 ball
|
||||
_column_3 cat
|
||||
|
||||
column_1 dale_egg
|
||||
_column_2 fish
|
||||
_column_3 gale
|
||||
</pre>
|
||||
|
||||
## unsparsify
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
|
|
|
|||
|
|
@ -1229,6 +1229,34 @@ GENMD-RUN-COMMAND
|
|||
mlr --opprint uniq -a -c data/repeats.dkvp
|
||||
GENMD-EOF
|
||||
|
||||
## unspace
|
||||
|
||||
GENMD-RUN-COMMAND
|
||||
mlr unspace --help
|
||||
GENMD-EOF
|
||||
|
||||
The primary use-case is for PPRINT output, which is space-delimited. For example:
|
||||
|
||||
GENMD-RUN-COMMAND
|
||||
cat data/spaces.csv
|
||||
GENMD-EOF
|
||||
|
||||
GENMD-RUN-COMMAND
|
||||
mlr --icsv --opprint cat data/spaces.csv
|
||||
GENMD-EOF
|
||||
|
||||
GENMD-RUN-COMMAND
|
||||
mlr --icsv --opprint cat data/spaces.csv
|
||||
GENMD-EOF
|
||||
|
||||
GENMD-RUN-COMMAND
|
||||
mlr --icsv --opprint unspace data/spaces.csv
|
||||
GENMD-EOF
|
||||
|
||||
GENMD-RUN-COMMAND
|
||||
mlr --icsv --opprint unspace data/spaces.csv | mlr --ipprint --oxtab cat
|
||||
GENMD-EOF
|
||||
|
||||
## unsparsify
|
||||
|
||||
GENMD-RUN-COMMAND
|
||||
|
|
|
|||
|
|
@ -73,6 +73,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{
|
|||
UTF8ToLatin1Setup,
|
||||
UnflattenSetup,
|
||||
UniqSetup,
|
||||
UnspaceSetup,
|
||||
UnsparsifySetup,
|
||||
}
|
||||
|
||||
|
|
|
|||
190
internal/pkg/transformers/unspace.go
Normal file
190
internal/pkg/transformers/unspace.go
Normal file
|
|
@ -0,0 +1,190 @@
|
|||
package transformers
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/johnkerl/miller/internal/pkg/cli"
|
||||
"github.com/johnkerl/miller/internal/pkg/mlrval"
|
||||
"github.com/johnkerl/miller/internal/pkg/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameUnspace = "unspace"
|
||||
|
||||
var UnspaceSetup = TransformerSetup{
|
||||
Verb: verbNameUnspace,
|
||||
UsageFunc: transformerUnspaceUsage,
|
||||
ParseCLIFunc: transformerUnspaceParseCLI,
|
||||
IgnoresInput: false,
|
||||
}
|
||||
|
||||
func transformerUnspaceUsage(
|
||||
o *os.File,
|
||||
) {
|
||||
fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameUnspace)
|
||||
fmt.Fprintf(o, "Replaces spaces in record keys and/or values with _. This is helpful for PPRINT output.\n")
|
||||
fmt.Fprintf(o, "Options:\n")
|
||||
fmt.Fprintf(o, "-f {x} Replace spaces with specified filler character.\n")
|
||||
fmt.Fprintf(o, "-k Unspace only keys, not keys and values.\n")
|
||||
fmt.Fprintf(o, "-v Unspace only values, not keys and values.\n")
|
||||
fmt.Fprintf(o, "-h|--help Show this message.\n")
|
||||
}
|
||||
|
||||
func transformerUnspaceParseCLI(
|
||||
pargi *int,
|
||||
argc int,
|
||||
args []string,
|
||||
_ *cli.TOptions,
|
||||
doConstruct bool, // false for first pass of CLI-parse, true for second pass
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
verb := args[argi]
|
||||
argi++
|
||||
|
||||
filler := "_"
|
||||
which := "keys_and_values"
|
||||
|
||||
for argi < argc /* variable increment: 1 or 2 depending on flag */ {
|
||||
opt := args[argi]
|
||||
if !strings.HasPrefix(opt, "-") {
|
||||
break // No more flag options to process
|
||||
}
|
||||
if args[argi] == "--" {
|
||||
break // All transformers must do this so main-flags can follow verb-flags
|
||||
}
|
||||
argi++
|
||||
|
||||
if opt == "-h" || opt == "--help" {
|
||||
transformerUnspaceUsage(os.Stdout)
|
||||
os.Exit(0)
|
||||
|
||||
} else if opt == "-f" {
|
||||
filler = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc)
|
||||
|
||||
} else if opt == "-k" {
|
||||
which = "keys_only"
|
||||
|
||||
} else if opt == "-v" {
|
||||
which = "values_only"
|
||||
|
||||
} else {
|
||||
transformerUnspaceUsage(os.Stderr)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
*pargi = argi
|
||||
if !doConstruct { // All transformers must do this for main command-line parsing
|
||||
return nil
|
||||
}
|
||||
|
||||
transformer, err := NewTransformerUnspace(filler, which)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
return transformer
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
type TransformerUnspace struct {
|
||||
filler string
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
func NewTransformerUnspace(
|
||||
filler string,
|
||||
which string,
|
||||
) (*TransformerUnspace, error) {
|
||||
tr := &TransformerUnspace{filler: filler}
|
||||
if which == "keys_only" {
|
||||
tr.recordTransformerFunc = tr.transformKeysOnly
|
||||
} else if which == "values_only" {
|
||||
tr.recordTransformerFunc = tr.transformValuesOnly
|
||||
} else {
|
||||
tr.recordTransformerFunc = tr.transformKeysAndValues
|
||||
}
|
||||
return tr, nil
|
||||
}
|
||||
|
||||
func (tr *TransformerUnspace) Transform(
|
||||
inrecAndContext *types.RecordAndContext,
|
||||
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
|
||||
inputDownstreamDoneChannel <-chan bool,
|
||||
outputDownstreamDoneChannel chan<- bool,
|
||||
) {
|
||||
HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
|
||||
if !inrecAndContext.EndOfStream {
|
||||
tr.recordTransformerFunc(
|
||||
inrecAndContext,
|
||||
outputRecordsAndContexts,
|
||||
inputDownstreamDoneChannel,
|
||||
outputDownstreamDoneChannel,
|
||||
)
|
||||
} else { // end of record stream
|
||||
outputRecordsAndContexts.PushBack(inrecAndContext)
|
||||
}
|
||||
}
|
||||
|
||||
func (tr *TransformerUnspace) transformKeysOnly(
|
||||
inrecAndContext *types.RecordAndContext,
|
||||
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
|
||||
_ <-chan bool,
|
||||
__ chan<- bool,
|
||||
) {
|
||||
inrec := inrecAndContext.Record
|
||||
newrec := mlrval.NewMlrmapAsRecord()
|
||||
for pe := inrec.Head; pe != nil; pe = pe.Next {
|
||||
newkey := tr.unspace(pe.Key)
|
||||
// Reference not copy since this is ownership transfer of the value from the now-abandoned inrec
|
||||
newrec.PutReference(newkey, pe.Value)
|
||||
}
|
||||
outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context))
|
||||
}
|
||||
|
||||
func (tr *TransformerUnspace) transformValuesOnly(
|
||||
inrecAndContext *types.RecordAndContext,
|
||||
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
|
||||
_ <-chan bool,
|
||||
__ chan<- bool,
|
||||
) {
|
||||
inrec := inrecAndContext.Record
|
||||
for pe := inrec.Head; pe != nil; pe = pe.Next {
|
||||
stringval, ok := pe.Value.GetStringValue()
|
||||
if ok {
|
||||
pe.Value = mlrval.FromString(tr.unspace(stringval))
|
||||
}
|
||||
}
|
||||
outputRecordsAndContexts.PushBack(types.NewRecordAndContext(inrec, &inrecAndContext.Context))
|
||||
}
|
||||
|
||||
func (tr *TransformerUnspace) transformKeysAndValues(
|
||||
inrecAndContext *types.RecordAndContext,
|
||||
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
|
||||
_ <-chan bool,
|
||||
__ chan<- bool,
|
||||
) {
|
||||
inrec := inrecAndContext.Record
|
||||
newrec := mlrval.NewMlrmapAsRecord()
|
||||
for pe := inrec.Head; pe != nil; pe = pe.Next {
|
||||
newkey := tr.unspace(pe.Key)
|
||||
stringval, ok := pe.Value.GetStringValue()
|
||||
if ok {
|
||||
stringval = tr.unspace(stringval)
|
||||
newrec.PutReference(newkey, mlrval.FromString(stringval))
|
||||
} else {
|
||||
newrec.PutReference(newkey, pe.Value)
|
||||
}
|
||||
}
|
||||
outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context))
|
||||
}
|
||||
|
||||
func (tr *TransformerUnspace) unspace(input string) string {
|
||||
return strings.ReplaceAll(input, " ", tr.filler)
|
||||
}
|
||||
|
|
@ -1271,6 +1271,16 @@ Options:
|
|||
With -n, produces only one record which is the unique-record count.
|
||||
With neither -c nor -n, produces unique records.
|
||||
|
||||
================================================================
|
||||
unspace
|
||||
Usage: mlr unspace [options]
|
||||
Replaces spaces in record keys and/or values with _. This is helpful for PPRINT output.
|
||||
Options:
|
||||
-f {x} Replace spaces with specified filler character.
|
||||
-k Unspace only keys, not keys and values.
|
||||
-v Unspace only values, not keys and values.
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
unsparsify
|
||||
Usage: mlr unsparsify [options]
|
||||
|
|
|
|||
1
test/cases/verb-unspace/0001/cmd
Normal file
1
test/cases/verb-unspace/0001/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --c2p unspace test/input/spaces.csv
|
||||
0
test/cases/verb-unspace/0001/experr
Normal file
0
test/cases/verb-unspace/0001/experr
Normal file
4
test/cases/verb-unspace/0001/expout
Normal file
4
test/cases/verb-unspace/0001/expout
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
a_b c _d_e
|
||||
1 - 3
|
||||
4_5 6 _7__8
|
||||
9 10 11
|
||||
1
test/cases/verb-unspace/0002/cmd
Normal file
1
test/cases/verb-unspace/0002/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --c2p unspace -k test/input/spaces.csv
|
||||
0
test/cases/verb-unspace/0002/experr
Normal file
0
test/cases/verb-unspace/0002/experr
Normal file
4
test/cases/verb-unspace/0002/expout
Normal file
4
test/cases/verb-unspace/0002/expout
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
a_b c _d_e
|
||||
1 - 3
|
||||
4 5 6 7 8
|
||||
9 10 11
|
||||
1
test/cases/verb-unspace/0003/cmd
Normal file
1
test/cases/verb-unspace/0003/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --c2p unspace -v test/input/spaces.csv
|
||||
0
test/cases/verb-unspace/0003/experr
Normal file
0
test/cases/verb-unspace/0003/experr
Normal file
4
test/cases/verb-unspace/0003/expout
Normal file
4
test/cases/verb-unspace/0003/expout
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
a b c d e
|
||||
1 - 3
|
||||
4_5 6 _7__8
|
||||
9 10 11
|
||||
4
test/input/spaces.csv
Normal file
4
test/input/spaces.csv
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
a b,c, d e
|
||||
1,,3
|
||||
4 5,6, 7 8
|
||||
9,10,11
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue