From e67bdef98eeecb6fdfdcb9c33a9500c07758b4bc Mon Sep 17 00:00:00 2001 From: Balki Date: Thu, 3 Jul 2025 22:54:09 +0000 Subject: [PATCH] cut: Consider `-o` flag even when using regexes with `-r` (#1823) * cut: Consider `-o` flag even when using regexes with `-r` * update doc for cut -r flag --- docs/src/manpage.md | 2 +- docs/src/manpage.txt | 2 +- docs/src/mlr.1 | 2 +- docs/src/reference-verbs.md | 2 +- man/manpage.txt | 2 +- man/mlr.1 | 2 +- pkg/transformers/cut.go | 31 ++++++++++++++++++++++++++++--- test/cases/cli-help/0001/expout | 2 +- 8 files changed, 35 insertions(+), 10 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index b9af6e51c..d17c5de45 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -1033,7 +1033,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p -r Treat field names as regular expressions. "ab", "a.*b" will match any field name containing the substring "ab" or matching "a.*b", respectively; anchors of the form "^ab$", "^a.*b$" may - be used. The -o flag is ignored when -r is present. + be used. -h|--help Show this message. Examples: mlr cut -f hostname,status diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index aa0b21b9b..c19bbc49d 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1012,7 +1012,7 @@ -r Treat field names as regular expressions. "ab", "a.*b" will match any field name containing the substring "ab" or matching "a.*b", respectively; anchors of the form "^ab$", "^a.*b$" may - be used. The -o flag is ignored when -r is present. + be used. -h|--help Show this message. Examples: mlr cut -f hostname,status diff --git a/docs/src/mlr.1 b/docs/src/mlr.1 index 0a026dd4f..5aa5bd16f 100644 --- a/docs/src/mlr.1 +++ b/docs/src/mlr.1 @@ -722,7 +722,7 @@ Passes through input records with specified fields included/excluded. -r Treat field names as regular expressions. "ab", "a.*b" will match any field name containing the substring "ab" or matching "a.*b", respectively; anchors of the form "^ab$", "^a.*b$" may - be used. The -o flag is ignored when -r is present. + be used. Examples: mlr cut -f hostname,status mlr cut -x -f hostname,status diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index c3e121644..ab7a599fa 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -804,7 +804,7 @@ Options: -r Treat field names as regular expressions. "ab", "a.*b" will match any field name containing the substring "ab" or matching "a.*b", respectively; anchors of the form "^ab$", "^a.*b$" may - be used. The -o flag is ignored when -r is present. + be used. -h|--help Show this message. Examples: mlr cut -f hostname,status diff --git a/man/manpage.txt b/man/manpage.txt index aa0b21b9b..c19bbc49d 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -1012,7 +1012,7 @@ -r Treat field names as regular expressions. "ab", "a.*b" will match any field name containing the substring "ab" or matching "a.*b", respectively; anchors of the form "^ab$", "^a.*b$" may - be used. The -o flag is ignored when -r is present. + be used. -h|--help Show this message. Examples: mlr cut -f hostname,status diff --git a/man/mlr.1 b/man/mlr.1 index 967d53e31..f862890cf 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -1251,7 +1251,7 @@ Options: -r Treat field names as regular expressions. "ab", "a.*b" will match any field name containing the substring "ab" or matching "a.*b", respectively; anchors of the form "^ab$", "^a.*b$" may - be used. The -o flag is ignored when -r is present. + be used. -h|--help Show this message. Examples: mlr cut -f hostname,status diff --git a/pkg/transformers/cut.go b/pkg/transformers/cut.go index f8933d586..e9e57d438 100644 --- a/pkg/transformers/cut.go +++ b/pkg/transformers/cut.go @@ -1,10 +1,12 @@ package transformers import ( + "cmp" "container/list" "fmt" "os" "regexp" + "slices" "strings" "github.com/johnkerl/miller/v6/pkg/cli" @@ -36,7 +38,7 @@ func transformerCutUsage( fmt.Fprintf(o, " -r Treat field names as regular expressions. \"ab\", \"a.*b\" will\n") fmt.Fprintf(o, " match any field name containing the substring \"ab\" or matching\n") fmt.Fprintf(o, " \"a.*b\", respectively; anchors of the form \"^ab$\", \"^a.*b$\" may\n") - fmt.Fprintf(o, " be used. The -o flag is ignored when -r is present.\n") + fmt.Fprintf(o, " be used.\n") fmt.Fprintf(o, "-h|--help Show this message.\n") fmt.Fprintf(o, "Examples:\n") fmt.Fprintf(o, " %s %s -f hostname,status\n", "mlr", verbNameCut) @@ -129,6 +131,7 @@ type TransformerCut struct { fieldNameSet map[string]bool doComplement bool + doArgOrder bool regexes []*regexp.Regexp recordTransformerFunc RecordTransformerFunc @@ -143,6 +146,8 @@ func NewTransformerCut( tr := &TransformerCut{} + tr.doArgOrder = doArgOrder + if !doRegexes { tr.fieldNameList = fieldNames tr.fieldNameSet = lib.StringListToSet(fieldNames) @@ -257,6 +262,11 @@ func (tr *TransformerCut) exclude( outputRecordsAndContexts.PushBack(inrecAndContext) } +type entryIndex struct { + index int + entry *mlrval.MlrmapEntry +} + // ---------------------------------------------------------------- func (tr *TransformerCut) processWithRegexes( inrecAndContext *types.RecordAndContext, @@ -267,11 +277,14 @@ func (tr *TransformerCut) processWithRegexes( if !inrecAndContext.EndOfStream { inrec := inrecAndContext.Record newrec := mlrval.NewMlrmapAsRecord() + var entries []entryIndex for pe := inrec.Head; pe != nil; pe = pe.Next { matchesAny := false - for _, regex := range tr.regexes { + var index int + for i, regex := range tr.regexes { if regex.MatchString(pe.Key) { matchesAny = true + index = i break } } @@ -279,7 +292,19 @@ func (tr *TransformerCut) processWithRegexes( if matchesAny != tr.doComplement { // Pointer-motion is OK since the inrec is being hereby discarded. // We're simply transferring ownership to the newrec. - newrec.PutReference(pe.Key, pe.Value) + if tr.doArgOrder { + entries = append(entries, entryIndex{index, pe}) + } else { + newrec.PutReference(pe.Key, pe.Value) + } + } + } + if tr.doArgOrder { + slices.SortStableFunc(entries, func(a, b entryIndex) int { + return cmp.Compare(a.index, b.index) + }) + for _, ei := range entries { + newrec.PutReference(ei.entry.Key, ei.entry.Value) } } outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context)) diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index e7eed5d74..a451ac0c5 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -138,7 +138,7 @@ Options: -r Treat field names as regular expressions. "ab", "a.*b" will match any field name containing the substring "ab" or matching "a.*b", respectively; anchors of the form "^ab$", "^a.*b$" may - be used. The -o flag is ignored when -r is present. + be used. -h|--help Show this message. Examples: mlr cut -f hostname,status