mlr sort -b feature (#1833)

* `mlr sort -b` feature

* mlr regtest -p test/cases/cli-help && make dev
This commit is contained in:
John Kerl 2025-07-11 12:41:04 -04:00 committed by GitHub
parent 865c9cc563
commit e7fe363d9a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 33 additions and 10 deletions

View file

@ -26,7 +26,7 @@ Vertical-tabular format is good for a quick look at CSV data layout -- seeing wh
<b>wc -l data/flins.csv</b>
</pre>
<pre class="pre-non-highlight-in-pair">
36635 data/flins.csv
36635 data/flins.csv
</pre>
<pre class="pre-highlight-in-pair">
@ -227,7 +227,7 @@ Peek at the data:
<b>wc -l data/colored-shapes.dkvp</b>
</pre>
<pre class="pre-non-highlight-in-pair">
10078 data/colored-shapes.dkvp
10078 data/colored-shapes.dkvp
</pre>
<pre class="pre-highlight-in-pair">

View file

@ -68,7 +68,7 @@ date,qoh
<b>wc -l data/miss-date.csv</b>
</pre>
<pre class="pre-non-highlight-in-pair">
1372 data/miss-date.csv
1372 data/miss-date.csv
</pre>
Since there are 1372 lines in the data file, some automation is called for. To find the missing dates, you can convert the dates to seconds since the epoch using `strptime`, then compute adjacent differences (the `cat -n` simply inserts record-counters):

View file

@ -1837,6 +1837,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p
-nf {comma-separated field names} Same as -n
-nr {comma-separated field names} Numerical descending; nulls sort first
-t {comma-separated field names} Natural ascending
-b Move sort fields to start of record, as in reorder -b
-tr|-rt {comma-separated field names} Natural descending
-h|--help Show this message.
@ -3745,5 +3746,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p
MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
https://miller.readthedocs.io
2025-07-04 4mMILLER24m(1)
2025-07-11 4mMILLER24m(1)
</pre>

View file

@ -1816,6 +1816,7 @@
-nf {comma-separated field names} Same as -n
-nr {comma-separated field names} Numerical descending; nulls sort first
-t {comma-separated field names} Natural ascending
-b Move sort fields to start of record, as in reorder -b
-tr|-rt {comma-separated field names} Natural descending
-h|--help Show this message.
@ -3724,4 +3725,4 @@
MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
https://miller.readthedocs.io
2025-07-04 4mMILLER24m(1)
2025-07-11 4mMILLER24m(1)

View file

@ -230,6 +230,7 @@ Options:
-nf {comma-separated field names} Same as -n
-nr {comma-separated field names} Numerical descending; nulls sort first
-t {comma-separated field names} Natural ascending
-b Move sort fields to start of record, as in reorder -b
-tr|-rt {comma-separated field names} Natural descending
-h|--help Show this message.

View file

@ -2960,6 +2960,7 @@ Options:
-nf {comma-separated field names} Same as -n
-nr {comma-separated field names} Numerical descending; nulls sort first
-t {comma-separated field names} Natural ascending
-b Move sort fields to start of record, as in reorder -b
-tr|-rt {comma-separated field names} Natural descending
-h|--help Show this message.
@ -4133,7 +4134,7 @@ There are two main ways to use `mlr uniq`: the first way is with `-g` to specify
<b>wc -l data/colored-shapes.csv</b>
</pre>
<pre class="pre-non-highlight-in-pair">
10079 data/colored-shapes.csv
10079 data/colored-shapes.csv
</pre>
<pre class="pre-highlight-in-pair">
@ -4290,7 +4291,7 @@ color=purple,shape=square,flag=0
<b>wc -l data/repeats.dkvp</b>
</pre>
<pre class="pre-non-highlight-in-pair">
57 data/repeats.dkvp
57 data/repeats.dkvp
</pre>
<pre class="pre-highlight-in-pair">

View file

@ -1816,6 +1816,7 @@
-nf {comma-separated field names} Same as -n
-nr {comma-separated field names} Numerical descending; nulls sort first
-t {comma-separated field names} Natural ascending
-b Move sort fields to start of record, as in reorder -b
-tr|-rt {comma-separated field names} Natural descending
-h|--help Show this message.
@ -3724,4 +3725,4 @@
MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
https://miller.readthedocs.io
2025-07-04 4mMILLER24m(1)
2025-07-11 4mMILLER24m(1)

View file

@ -2,12 +2,12 @@
.\" Title: mlr
.\" Author: [see the "AUTHOR" section]
.\" Generator: ./mkman.rb
.\" Date: 2025-07-04
.\" Date: 2025-07-11
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "MILLER" "1" "2025-07-04" "\ \&" "\ \&"
.TH "MILLER" "1" "2025-07-11" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Portability definitions
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -2289,6 +2289,7 @@ Options:
-nf {comma-separated field names} Same as -n
-nr {comma-separated field names} Numerical descending; nulls sort first
-t {comma-separated field names} Natural ascending
-b Move sort fields to start of record, as in reorder -b
-tr|-rt {comma-separated field names} Natural descending
-h|--help Show this message.

View file

@ -83,6 +83,7 @@ func transformerSortUsage(
fmt.Fprintf(o, "-nf {comma-separated field names} Same as -n\n")
fmt.Fprintf(o, "-nr {comma-separated field names} Numerical descending; nulls sort first\n")
fmt.Fprintf(o, "-t {comma-separated field names} Natural ascending\n")
fmt.Fprintf(o, "-b Move sort fields to start of record, as in reorder -b\n")
fmt.Fprintf(o, "-tr|-rt {comma-separated field names} Natural descending\n")
fmt.Fprintf(o, "-h|--help Show this message.\n")
fmt.Fprintf(o, "\n")
@ -107,6 +108,7 @@ func transformerSortParseCLI(
groupByFieldNames := make([]string, 0)
comparatorFuncs := make([]mlrval.CmpFuncInt, 0)
doMoveToHead := false
for argi < argc /* variable increment: 1 or 2 depending on flag */ {
opt := args[argi]
@ -255,6 +257,9 @@ func transformerSortParseCLI(
comparatorFuncs = append(comparatorFuncs, mlrval.NumericDescendingComparator)
}
} else if opt == "-b" {
doMoveToHead = true
} else {
transformerSortUsage(os.Stderr)
os.Exit(1)
@ -274,6 +279,7 @@ func transformerSortParseCLI(
transformer, err := NewTransformerSort(
groupByFieldNames,
comparatorFuncs,
doMoveToHead,
)
if err != nil {
fmt.Fprintln(os.Stderr, err)
@ -304,6 +310,7 @@ type TransformerSort struct {
// -- Input
groupByFieldNames []string
comparatorFuncs []mlrval.CmpFuncInt
doMoveToHead bool
// -- State
// Map from string to *list.List:
@ -316,11 +323,13 @@ type TransformerSort struct {
func NewTransformerSort(
groupByFieldNames []string,
comparatorFuncs []mlrval.CmpFuncInt,
doMoveToHead bool,
) (*TransformerSort, error) {
tr := &TransformerSort{
groupByFieldNames: groupByFieldNames,
comparatorFuncs: comparatorFuncs,
doMoveToHead: doMoveToHead,
recordListsByGroup: lib.NewOrderedMap(),
groupHeads: lib.NewOrderedMap(),
@ -346,6 +355,13 @@ func (tr *TransformerSort) Transform(
if !inrecAndContext.EndOfStream {
inrec := inrecAndContext.Record
if tr.doMoveToHead {
n := len(tr.groupByFieldNames)
for i := n - 1; i >= 0; i-- {
inrec.MoveToHead(tr.groupByFieldNames[i])
}
}
groupingKey, selectedValues, ok := inrec.GetSelectedValuesAndJoined(
tr.groupByFieldNames,
)

View file

@ -982,6 +982,7 @@ Options:
-nf {comma-separated field names} Same as -n
-nr {comma-separated field names} Numerical descending; nulls sort first
-t {comma-separated field names} Natural ascending
-b Move sort fields to start of record, as in reorder -b
-tr|-rt {comma-separated field names} Natural descending
-h|--help Show this message.