DKVP --incr-key option (#1839)

* Code support for --incr-key

* Add source code for online help for new flag

* Run `make dev`
This commit is contained in:
John Kerl 2025-07-20 17:05:24 -04:00 committed by GitHub
parent d264f562dc
commit fccdf215e6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 119 additions and 13 deletions

View file

@ -104,6 +104,7 @@ var FLAG_TABLE = FlagTable{
&CSVTSVOnlyFlagSection,
&JSONOnlyFlagSection,
&PPRINTOnlyFlagSection,
&DKVPOnlyFlagSection,
&CompressedDataFlagSection,
&CommentsInDataFlagSection,
&OutputColorizationFlagSection,
@ -523,6 +524,31 @@ var PPRINTOnlyFlagSection = FlagSection{
},
}
// ================================================================
// DKVP-ONLY FLAGS
func DKVPOnlyPrintInfo() {
fmt.Println("These are flags which are applicable to DKVP format.")
}
func init() { DKVPOnlyFlagSection.Sort() }
var DKVPOnlyFlagSection = FlagSection{
name: "DKVP-only flags",
infoPrinter: DKVPOnlyPrintInfo,
flags: []Flag{
{
name: "--incr-key",
help: "Without this option, keyless DKVP fields are keyed by field number. For example: `a=10,b=20,30,d=40,50` is ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With this option, they're keyed by a running counter of keyless fields. For example: `a=10,b=20,30,d=40,50` is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`.",
parser: func(args []string, argc int, pargi *int, options *TOptions) {
options.WriterOptions.BarredPprintOutput = true
*pargi += 1
},
},
},
}
// ================================================================
// LEGACY FLAGS

View file

@ -53,11 +53,12 @@ type TReaderOptions struct {
irsWasSpecified bool
allowRepeatIFSWasSpecified bool
UseImplicitHeader bool
AllowRaggedCSVInput bool
CSVLazyQuotes bool
CSVTrimLeadingSpace bool
BarredPprintInput bool
UseImplicitHeader bool
AllowRaggedCSVInput bool
CSVLazyQuotes bool
CSVTrimLeadingSpace bool
BarredPprintInput bool
IncrementImplicitKey bool
CommentHandling TCommentHandling
CommentString string

View file

@ -14,7 +14,7 @@ import (
"github.com/johnkerl/miller/v6/pkg/types"
)
// splitter_DKVP_NIDX is a function type for the one bit of code differing
// line_splitter_DKVP_NIDX is a function type for the one bit of code differing
// between the DKVP reader and the NIDX reader, namely, how it splits lines.
type line_splitter_DKVP_NIDX func(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrmap, error)
@ -169,25 +169,42 @@ func recordFromDKVPLine(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrm
pairs := reader.fieldSplitter.Split(line)
// Without --incr-key:
// echo 'a,z=b,c' | mlr cat gives 1=a,z=b,3=c
// I.e. implicit keys are taken from the 1-up field counter.
// With it:
// echo 'a,z=b,c' | mlr cat gives 1=a,z=b,2=c
// I.e. implicit keys are taken from a 1-up count of fields lacking explicit keys.
incr_key := 0
for i, pair := range pairs {
kv := reader.pairSplitter.Split(pair)
if len(kv) == 0 || (len(kv) == 1 && kv[0] == "") {
// Ignore. This is expected when splitting with repeated IFS.
} else if len(kv) == 1 {
// E.g the pair has no equals sign: "a" rather than "a=1" or
// E.g. the pair has no equals sign: "a" rather than "a=1" or
// "a=". Here we use the positional index as the key. This way
// DKVP is a generalization of NIDX.
key := strconv.Itoa(i + 1) // Miller userspace indices are 1-up
//
// Also: recall that Miller userspace indices are 1-up.
var int_key int
if reader.readerOptions.IncrementImplicitKey {
int_key = incr_key
} else {
int_key = i
}
str_key := strconv.Itoa(int_key + 1)
incr_key++
value := mlrval.FromDeferredType(kv[0])
_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
_, err := record.PutReferenceMaybeDedupe(str_key, value, dedupeFieldNames)
if err != nil {
return nil, err
}
} else {
key := kv[0]
str_key := kv[0]
value := mlrval.FromDeferredType(kv[1])
_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
_, err := record.PutReferenceMaybeDedupe(str_key, value, dedupeFieldNames)
if err != nil {
return nil, err
}
@ -204,9 +221,9 @@ func recordFromNIDXLine(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrm
var i int = 0
for _, value := range values {
i++
key := strconv.Itoa(i)
str_key := strconv.Itoa(i)
mval := mlrval.FromDeferredType(value)
record.PutReference(key, mval)
record.PutReference(str_key, mval)
}
return record, nil
}