DKVP --incr-key option (#1839)

* Code support for --incr-key

* Add source code for online help for new flag

* Run `make dev`
This commit is contained in:
John Kerl 2025-07-20 17:05:24 -04:00 committed by GitHub
parent d264f562dc
commit fccdf215e6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 119 additions and 13 deletions

View file

@ -145,6 +145,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p
mlr help comments-in-data-flags
mlr help compressed-data-flags
mlr help csv/tsv-only-flags
mlr help dkvp-only-flags
mlr help file-format-flags
mlr help flatten-unflatten-flags
mlr help format-conversion-keystroke-saver-flags
@ -356,6 +357,16 @@ This is simply a copy of what you should see on running `man mlr` at a command p
-N Keystroke-saver for `--implicit-csv-header
--headerless-csv-output`.
1mDKVP-ONLY FLAGS0m
These are flags which are applicable to DKVP format.
--incr-key Without this option, keyless DKVP fields are keyed by
field number. For example: `a=10,b=20,30,d=40,50` is
ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With
this option, they're keyed by a running counter of
keyless fields. For example: `a=10,b=20,30,d=40,50`
is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`.
1mFILE-FORMAT FLAGS0m
See the File formats doc page, and or `mlr help file-formats`, for more
about file formats Miller supports.

View file

@ -124,6 +124,7 @@
mlr help comments-in-data-flags
mlr help compressed-data-flags
mlr help csv/tsv-only-flags
mlr help dkvp-only-flags
mlr help file-format-flags
mlr help flatten-unflatten-flags
mlr help format-conversion-keystroke-saver-flags
@ -335,6 +336,16 @@
-N Keystroke-saver for `--implicit-csv-header
--headerless-csv-output`.
1mDKVP-ONLY FLAGS0m
These are flags which are applicable to DKVP format.
--incr-key Without this option, keyless DKVP fields are keyed by
field number. For example: `a=10,b=20,30,d=40,50` is
ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With
this option, they're keyed by a running counter of
keyless fields. For example: `a=10,b=20,30,d=40,50`
is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`.
1mFILE-FORMAT FLAGS0m
See the File formats doc page, and or `mlr help file-formats`, for more
about file formats Miller supports.

View file

@ -55,6 +55,7 @@ Flags:
mlr help comments-in-data-flags
mlr help compressed-data-flags
mlr help csv/tsv-only-flags
mlr help dkvp-only-flags
mlr help file-format-flags
mlr help flatten-unflatten-flags
mlr help format-conversion-keystroke-saver-flags

View file

@ -128,6 +128,15 @@ These are flags which are applicable to CSV format.
* `--quote-all`: Force double-quoting of CSV fields.
* `-N`: Keystroke-saver for `--implicit-csv-header --headerless-csv-output`.
## DKVP-only flags
These are flags which are applicable to DKVP format.
**Flags:**
* `--incr-key`: Without this option, keyless DKVP fields are keyed by field number. For example: `a=10,b=20,30,d=40,50` is ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With this option, they're keyed by a running counter of keyless fields. For example: `a=10,b=20,30,d=40,50` is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`.
## File-format flags
See the File formats doc page, and or `mlr help file-formats`, for more

View file

@ -124,6 +124,7 @@
mlr help comments-in-data-flags
mlr help compressed-data-flags
mlr help csv/tsv-only-flags
mlr help dkvp-only-flags
mlr help file-format-flags
mlr help flatten-unflatten-flags
mlr help format-conversion-keystroke-saver-flags
@ -335,6 +336,16 @@
-N Keystroke-saver for `--implicit-csv-header
--headerless-csv-output`.
1mDKVP-ONLY FLAGS0m
These are flags which are applicable to DKVP format.
--incr-key Without this option, keyless DKVP fields are keyed by
field number. For example: `a=10,b=20,30,d=40,50` is
ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With
this option, they're keyed by a running counter of
keyless fields. For example: `a=10,b=20,30,d=40,50`
is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`.
1mFILE-FORMAT FLAGS0m
See the File formats doc page, and or `mlr help file-formats`, for more
about file formats Miller supports.

View file

@ -161,6 +161,7 @@ Flags:
mlr help comments-in-data-flags
mlr help compressed-data-flags
mlr help csv/tsv-only-flags
mlr help dkvp-only-flags
mlr help file-format-flags
mlr help flatten-unflatten-flags
mlr help format-conversion-keystroke-saver-flags
@ -410,6 +411,24 @@ These are flags which are applicable to CSV format.
.fi
.if n \{\
.RE
.SH "DKVP-ONLY FLAGS"
.sp
.if n \{\
.RS 0
.\}
.nf
These are flags which are applicable to DKVP format.
--incr-key Without this option, keyless DKVP fields are keyed by
field number. For example: `a=10,b=20,30,d=40,50` is
ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With
this option, they're keyed by a running counter of
keyless fields. For example: `a=10,b=20,30,d=40,50`
is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`.
.fi
.if n \{\
.RE
.SH "FILE-FORMAT FLAGS"
.sp

View file

@ -104,6 +104,7 @@ var FLAG_TABLE = FlagTable{
&CSVTSVOnlyFlagSection,
&JSONOnlyFlagSection,
&PPRINTOnlyFlagSection,
&DKVPOnlyFlagSection,
&CompressedDataFlagSection,
&CommentsInDataFlagSection,
&OutputColorizationFlagSection,
@ -523,6 +524,31 @@ var PPRINTOnlyFlagSection = FlagSection{
},
}
// ================================================================
// DKVP-ONLY FLAGS
func DKVPOnlyPrintInfo() {
fmt.Println("These are flags which are applicable to DKVP format.")
}
func init() { DKVPOnlyFlagSection.Sort() }
var DKVPOnlyFlagSection = FlagSection{
name: "DKVP-only flags",
infoPrinter: DKVPOnlyPrintInfo,
flags: []Flag{
{
name: "--incr-key",
help: "Without this option, keyless DKVP fields are keyed by field number. For example: `a=10,b=20,30,d=40,50` is ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With this option, they're keyed by a running counter of keyless fields. For example: `a=10,b=20,30,d=40,50` is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`.",
parser: func(args []string, argc int, pargi *int, options *TOptions) {
options.WriterOptions.BarredPprintOutput = true
*pargi += 1
},
},
},
}
// ================================================================
// LEGACY FLAGS

View file

@ -53,11 +53,12 @@ type TReaderOptions struct {
irsWasSpecified bool
allowRepeatIFSWasSpecified bool
UseImplicitHeader bool
AllowRaggedCSVInput bool
CSVLazyQuotes bool
CSVTrimLeadingSpace bool
BarredPprintInput bool
UseImplicitHeader bool
AllowRaggedCSVInput bool
CSVLazyQuotes bool
CSVTrimLeadingSpace bool
BarredPprintInput bool
IncrementImplicitKey bool
CommentHandling TCommentHandling
CommentString string

View file

@ -14,7 +14,7 @@ import (
"github.com/johnkerl/miller/v6/pkg/types"
)
// splitter_DKVP_NIDX is a function type for the one bit of code differing
// line_splitter_DKVP_NIDX is a function type for the one bit of code differing
// between the DKVP reader and the NIDX reader, namely, how it splits lines.
type line_splitter_DKVP_NIDX func(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrmap, error)
@ -169,25 +169,42 @@ func recordFromDKVPLine(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrm
pairs := reader.fieldSplitter.Split(line)
// Without --incr-key:
// echo 'a,z=b,c' | mlr cat gives 1=a,z=b,3=c
// I.e. implicit keys are taken from the 1-up field counter.
// With it:
// echo 'a,z=b,c' | mlr cat gives 1=a,z=b,2=c
// I.e. implicit keys are taken from a 1-up count of fields lacking explicit keys.
incr_key := 0
for i, pair := range pairs {
kv := reader.pairSplitter.Split(pair)
if len(kv) == 0 || (len(kv) == 1 && kv[0] == "") {
// Ignore. This is expected when splitting with repeated IFS.
} else if len(kv) == 1 {
// E.g the pair has no equals sign: "a" rather than "a=1" or
// E.g. the pair has no equals sign: "a" rather than "a=1" or
// "a=". Here we use the positional index as the key. This way
// DKVP is a generalization of NIDX.
key := strconv.Itoa(i + 1) // Miller userspace indices are 1-up
//
// Also: recall that Miller userspace indices are 1-up.
var int_key int
if reader.readerOptions.IncrementImplicitKey {
int_key = incr_key
} else {
int_key = i
}
str_key := strconv.Itoa(int_key + 1)
incr_key++
value := mlrval.FromDeferredType(kv[0])
_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
_, err := record.PutReferenceMaybeDedupe(str_key, value, dedupeFieldNames)
if err != nil {
return nil, err
}
} else {
key := kv[0]
str_key := kv[0]
value := mlrval.FromDeferredType(kv[1])
_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
_, err := record.PutReferenceMaybeDedupe(str_key, value, dedupeFieldNames)
if err != nil {
return nil, err
}
@ -204,9 +221,9 @@ func recordFromNIDXLine(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrm
var i int = 0
for _, value := range values {
i++
key := strconv.Itoa(i)
str_key := strconv.Itoa(i)
mval := mlrval.FromDeferredType(value)
record.PutReference(key, mval)
record.PutReference(str_key, mval)
}
return record, nil
}