From fccdf215e6c0df617c8b02108de21da2a006b2b0 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 20 Jul 2025 17:05:24 -0400 Subject: [PATCH] DKVP `--incr-key` option (#1839) * Code support for --incr-key * Add source code for online help for new flag * Run `make dev` --- docs/src/manpage.md | 11 ++++++++++ docs/src/manpage.txt | 11 ++++++++++ docs/src/online-help.md | 1 + docs/src/reference-main-flag-list.md | 9 ++++++++ man/manpage.txt | 11 ++++++++++ man/mlr.1 | 19 ++++++++++++++++ pkg/cli/option_parse.go | 26 ++++++++++++++++++++++ pkg/cli/option_types.go | 11 +++++----- pkg/input/record_reader_dkvp_nidx.go | 33 +++++++++++++++++++++------- 9 files changed, 119 insertions(+), 13 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index e6cf26b77..2bc383fe7 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -145,6 +145,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p mlr help comments-in-data-flags mlr help compressed-data-flags mlr help csv/tsv-only-flags + mlr help dkvp-only-flags mlr help file-format-flags mlr help flatten-unflatten-flags mlr help format-conversion-keystroke-saver-flags @@ -356,6 +357,16 @@ This is simply a copy of what you should see on running `man mlr` at a command p -N Keystroke-saver for `--implicit-csv-header --headerless-csv-output`. +1mDKVP-ONLY FLAGS0m + These are flags which are applicable to DKVP format. + + --incr-key Without this option, keyless DKVP fields are keyed by + field number. For example: `a=10,b=20,30,d=40,50` is + ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With + this option, they're keyed by a running counter of + keyless fields. For example: `a=10,b=20,30,d=40,50` + is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`. + 1mFILE-FORMAT FLAGS0m See the File formats doc page, and or `mlr help file-formats`, for more about file formats Miller supports. diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 130bbcabd..bf4e6c618 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -124,6 +124,7 @@ mlr help comments-in-data-flags mlr help compressed-data-flags mlr help csv/tsv-only-flags + mlr help dkvp-only-flags mlr help file-format-flags mlr help flatten-unflatten-flags mlr help format-conversion-keystroke-saver-flags @@ -335,6 +336,16 @@ -N Keystroke-saver for `--implicit-csv-header --headerless-csv-output`. +1mDKVP-ONLY FLAGS0m + These are flags which are applicable to DKVP format. + + --incr-key Without this option, keyless DKVP fields are keyed by + field number. For example: `a=10,b=20,30,d=40,50` is + ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With + this option, they're keyed by a running counter of + keyless fields. For example: `a=10,b=20,30,d=40,50` + is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`. + 1mFILE-FORMAT FLAGS0m See the File formats doc page, and or `mlr help file-formats`, for more about file formats Miller supports. diff --git a/docs/src/online-help.md b/docs/src/online-help.md index f1b8d4d18..bb8185e10 100644 --- a/docs/src/online-help.md +++ b/docs/src/online-help.md @@ -55,6 +55,7 @@ Flags: mlr help comments-in-data-flags mlr help compressed-data-flags mlr help csv/tsv-only-flags + mlr help dkvp-only-flags mlr help file-format-flags mlr help flatten-unflatten-flags mlr help format-conversion-keystroke-saver-flags diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index 28adc12f6..86d3ce042 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -128,6 +128,15 @@ These are flags which are applicable to CSV format. * `--quote-all`: Force double-quoting of CSV fields. * `-N`: Keystroke-saver for `--implicit-csv-header --headerless-csv-output`. +## DKVP-only flags + +These are flags which are applicable to DKVP format. + + +**Flags:** + +* `--incr-key`: Without this option, keyless DKVP fields are keyed by field number. For example: `a=10,b=20,30,d=40,50` is ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With this option, they're keyed by a running counter of keyless fields. For example: `a=10,b=20,30,d=40,50` is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`. + ## File-format flags See the File formats doc page, and or `mlr help file-formats`, for more diff --git a/man/manpage.txt b/man/manpage.txt index 130bbcabd..bf4e6c618 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -124,6 +124,7 @@ mlr help comments-in-data-flags mlr help compressed-data-flags mlr help csv/tsv-only-flags + mlr help dkvp-only-flags mlr help file-format-flags mlr help flatten-unflatten-flags mlr help format-conversion-keystroke-saver-flags @@ -335,6 +336,16 @@ -N Keystroke-saver for `--implicit-csv-header --headerless-csv-output`. +1mDKVP-ONLY FLAGS0m + These are flags which are applicable to DKVP format. + + --incr-key Without this option, keyless DKVP fields are keyed by + field number. For example: `a=10,b=20,30,d=40,50` is + ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With + this option, they're keyed by a running counter of + keyless fields. For example: `a=10,b=20,30,d=40,50` + is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`. + 1mFILE-FORMAT FLAGS0m See the File formats doc page, and or `mlr help file-formats`, for more about file formats Miller supports. diff --git a/man/mlr.1 b/man/mlr.1 index 3dca0b9a7..8d32a9817 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -161,6 +161,7 @@ Flags: mlr help comments-in-data-flags mlr help compressed-data-flags mlr help csv/tsv-only-flags + mlr help dkvp-only-flags mlr help file-format-flags mlr help flatten-unflatten-flags mlr help format-conversion-keystroke-saver-flags @@ -410,6 +411,24 @@ These are flags which are applicable to CSV format. .fi .if n \{\ .RE +.SH "DKVP-ONLY FLAGS" +.sp + +.if n \{\ +.RS 0 +.\} +.nf +These are flags which are applicable to DKVP format. + +--incr-key Without this option, keyless DKVP fields are keyed by + field number. For example: `a=10,b=20,30,d=40,50` is + ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With + this option, they're keyed by a running counter of + keyless fields. For example: `a=10,b=20,30,d=40,50` + is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`. +.fi +.if n \{\ +.RE .SH "FILE-FORMAT FLAGS" .sp diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go index 156a056a3..dd5ede99f 100644 --- a/pkg/cli/option_parse.go +++ b/pkg/cli/option_parse.go @@ -104,6 +104,7 @@ var FLAG_TABLE = FlagTable{ &CSVTSVOnlyFlagSection, &JSONOnlyFlagSection, &PPRINTOnlyFlagSection, + &DKVPOnlyFlagSection, &CompressedDataFlagSection, &CommentsInDataFlagSection, &OutputColorizationFlagSection, @@ -523,6 +524,31 @@ var PPRINTOnlyFlagSection = FlagSection{ }, } +// ================================================================ +// DKVP-ONLY FLAGS + +func DKVPOnlyPrintInfo() { + fmt.Println("These are flags which are applicable to DKVP format.") +} + +func init() { DKVPOnlyFlagSection.Sort() } + +var DKVPOnlyFlagSection = FlagSection{ + name: "DKVP-only flags", + infoPrinter: DKVPOnlyPrintInfo, + flags: []Flag{ + + { + name: "--incr-key", + help: "Without this option, keyless DKVP fields are keyed by field number. For example: `a=10,b=20,30,d=40,50` is ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With this option, they're keyed by a running counter of keyless fields. For example: `a=10,b=20,30,d=40,50` is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`.", + parser: func(args []string, argc int, pargi *int, options *TOptions) { + options.WriterOptions.BarredPprintOutput = true + *pargi += 1 + }, + }, + }, +} + // ================================================================ // LEGACY FLAGS diff --git a/pkg/cli/option_types.go b/pkg/cli/option_types.go index 19227fd73..58917728a 100644 --- a/pkg/cli/option_types.go +++ b/pkg/cli/option_types.go @@ -53,11 +53,12 @@ type TReaderOptions struct { irsWasSpecified bool allowRepeatIFSWasSpecified bool - UseImplicitHeader bool - AllowRaggedCSVInput bool - CSVLazyQuotes bool - CSVTrimLeadingSpace bool - BarredPprintInput bool + UseImplicitHeader bool + AllowRaggedCSVInput bool + CSVLazyQuotes bool + CSVTrimLeadingSpace bool + BarredPprintInput bool + IncrementImplicitKey bool CommentHandling TCommentHandling CommentString string diff --git a/pkg/input/record_reader_dkvp_nidx.go b/pkg/input/record_reader_dkvp_nidx.go index efc0ae385..6a53c8c26 100644 --- a/pkg/input/record_reader_dkvp_nidx.go +++ b/pkg/input/record_reader_dkvp_nidx.go @@ -14,7 +14,7 @@ import ( "github.com/johnkerl/miller/v6/pkg/types" ) -// splitter_DKVP_NIDX is a function type for the one bit of code differing +// line_splitter_DKVP_NIDX is a function type for the one bit of code differing // between the DKVP reader and the NIDX reader, namely, how it splits lines. type line_splitter_DKVP_NIDX func(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrmap, error) @@ -169,25 +169,42 @@ func recordFromDKVPLine(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrm pairs := reader.fieldSplitter.Split(line) + // Without --incr-key: + // echo 'a,z=b,c' | mlr cat gives 1=a,z=b,3=c + // I.e. implicit keys are taken from the 1-up field counter. + // With it: + // echo 'a,z=b,c' | mlr cat gives 1=a,z=b,2=c + // I.e. implicit keys are taken from a 1-up count of fields lacking explicit keys. + incr_key := 0 + for i, pair := range pairs { kv := reader.pairSplitter.Split(pair) if len(kv) == 0 || (len(kv) == 1 && kv[0] == "") { // Ignore. This is expected when splitting with repeated IFS. } else if len(kv) == 1 { - // E.g the pair has no equals sign: "a" rather than "a=1" or + // E.g. the pair has no equals sign: "a" rather than "a=1" or // "a=". Here we use the positional index as the key. This way // DKVP is a generalization of NIDX. - key := strconv.Itoa(i + 1) // Miller userspace indices are 1-up + // + // Also: recall that Miller userspace indices are 1-up. + var int_key int + if reader.readerOptions.IncrementImplicitKey { + int_key = incr_key + } else { + int_key = i + } + str_key := strconv.Itoa(int_key + 1) + incr_key++ value := mlrval.FromDeferredType(kv[0]) - _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) + _, err := record.PutReferenceMaybeDedupe(str_key, value, dedupeFieldNames) if err != nil { return nil, err } } else { - key := kv[0] + str_key := kv[0] value := mlrval.FromDeferredType(kv[1]) - _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) + _, err := record.PutReferenceMaybeDedupe(str_key, value, dedupeFieldNames) if err != nil { return nil, err } @@ -204,9 +221,9 @@ func recordFromNIDXLine(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrm var i int = 0 for _, value := range values { i++ - key := strconv.Itoa(i) + str_key := strconv.Itoa(i) mval := mlrval.FromDeferredType(value) - record.PutReference(key, mval) + record.PutReference(str_key, mval) } return record, nil }