minor to-do neatens including mlr histogram default nbins

This commit is contained in:
John Kerl 2021-11-03 20:44:35 -04:00
parent 95593d59e4
commit 041e09bf5b
13 changed files with 31 additions and 34 deletions

View file

@ -1172,7 +1172,7 @@ VERBS
-f {a,b,c} Value-field names for histogram counts
--lo {lo} Histogram low value
--hi {hi} Histogram high value
--nbins {n} Number of histogram bins
--nbins {n} Number of histogram bins. Defaults to 20.
--auto Automatically computes limits, ignoring --lo and --hi.
Holds all values in memory before producing any output.
-o {prefix} Prefix for output field name. Default: no prefix.
@ -2992,5 +2992,5 @@ SEE ALSO
2021-11-03 MILLER(1)
2021-11-04 MILLER(1)
</pre>

View file

@ -1151,7 +1151,7 @@ VERBS
-f {a,b,c} Value-field names for histogram counts
--lo {lo} Histogram low value
--hi {hi} Histogram high value
--nbins {n} Number of histogram bins
--nbins {n} Number of histogram bins. Defaults to 20.
--auto Automatically computes limits, ignoring --lo and --hi.
Holds all values in memory before producing any output.
-o {prefix} Prefix for output field name. Default: no prefix.
@ -2971,4 +2971,4 @@ SEE ALSO
2021-11-03 MILLER(1)
2021-11-04 MILLER(1)

View file

@ -202,6 +202,8 @@ Parse error on token ">" at line 63 columnn 7.
## Changes from Miller 5
The following differences are rather technical. If they don't sound familiar to you, not to worry! Most users won't be affected by the (relatively minor) changes between Miller 5 and Miller 6.
* Line endings: The `--auto` flag is now ignored. Before, if a file had CR/LF (Windows-style) line endings on input (on any platform), it would have the same on output; likewise, LF (Unix-style) line endings. Now, files with CR/LF or LF line endings are processed on any platform, but the output line-ending is for the platform. E.g. reading CR/LF files on Linux will now produce LF output.
* Since IFS and IPS can be regular expressions now, if your `IFS` or `IPS` is a `.` (which is a regular-expression metacharacter) you can either use `--ifs '\.'` or `--ifs . --no-ifs-regex` (and similarly for `IPS`). Please see the section on [multi-character and regular-expression separators](reference-main-separators.md#multi-character-and-regular-expression-separators).
* JSON formatting:

View file

@ -162,6 +162,8 @@ GENMD_EOF
## Changes from Miller 5
The following differences are rather technical. If they don't sound familiar to you, not to worry! Most users won't be affected by the (relatively minor) changes between Miller 5 and Miller 6.
* Line endings: The `--auto` flag is now ignored. Before, if a file had CR/LF (Windows-style) line endings on input (on any platform), it would have the same on output; likewise, LF (Unix-style) line endings. Now, files with CR/LF or LF line endings are processed on any platform, but the output line-ending is for the platform. E.g. reading CR/LF files on Linux will now produce LF output.
* Since IFS and IPS can be regular expressions now, if your `IFS` or `IPS` is a `.` (which is a regular-expression metacharacter) you can either use `--ifs '\.'` or `--ifs . --no-ifs-regex` (and similarly for `IPS`). Please see the section on [multi-character and regular-expression separators](reference-main-separators.md#multi-character-and-regular-expression-separators).
* JSON formatting:

View file

@ -1498,7 +1498,7 @@ Usage: mlr histogram [options]
-f {a,b,c} Value-field names for histogram counts
--lo {lo} Histogram low value
--hi {hi} Histogram high value
--nbins {n} Number of histogram bins
--nbins {n} Number of histogram bins. Defaults to 20.
--auto Automatically computes limits, ignoring --lo and --hi.
Holds all values in memory before producing any output.
-o {prefix} Prefix for output field name. Default: no prefix.

View file

@ -391,7 +391,7 @@ Usage: mlr histogram [options]
-f {a,b,c} Value-field names for histogram counts
--lo {lo} Histogram low value
--hi {hi} Histogram high value
--nbins {n} Number of histogram bins
--nbins {n} Number of histogram bins. Defaults to 20.
--auto Automatically computes limits, ignoring --lo and --hi.
Holds all values in memory before producing any output.
-o {prefix} Prefix for output field name. Default: no prefix.

View file

@ -12,6 +12,7 @@ import (
// ----------------------------------------------------------------
const verbNameHistogram = "histogram"
const histogramDefaultBinCount = 20
var HistogramSetup = TransformerSetup{
Verb: verbNameHistogram,
@ -32,7 +33,7 @@ func transformerHistogramUsage(
fmt.Fprintf(o, "-f {a,b,c} Value-field names for histogram counts\n")
fmt.Fprintf(o, "--lo {lo} Histogram low value\n")
fmt.Fprintf(o, "--hi {hi} Histogram high value\n")
fmt.Fprintf(o, "--nbins {n} Number of histogram bins\n")
fmt.Fprintf(o, "--nbins {n} Number of histogram bins. Defaults to %d.\n", histogramDefaultBinCount)
fmt.Fprintf(o, "--auto Automatically computes limits, ignoring --lo and --hi.\n")
fmt.Fprintf(o, " Holds all values in memory before producing any output.\n")
fmt.Fprintf(o, "-o {prefix} Prefix for output field name. Default: no prefix.\n")
@ -58,7 +59,7 @@ func transformerHistogramParseCLI(
// Parse local flags
var valueFieldNames []string = nil
lo := 0.0
nbins := 0
nbins := histogramDefaultBinCount
hi := 0.0
doAuto := false
outputPrefix := ""
@ -100,7 +101,7 @@ func transformerHistogramParseCLI(
transformerHistogramUsage(os.Stderr, true, 1)
}
if nbins == 0 {
if nbins <= 0 {
transformerHistogramUsage(os.Stderr, true, 1)
}

View file

@ -132,7 +132,7 @@ func (mlrmap *Mlrmap) PutCopyWithMlrvalIndex(key *Mlrval, value *Mlrval) error {
return nil
} else {
return errors.New(
"mlr: record/map indices must be string or int; got " + key.GetTypeName(),
"mlr: record/map indices must be string, int, or array thereof; got " + key.GetTypeName(),
)
}
}
@ -263,7 +263,7 @@ func (mlrmap *Mlrmap) getWithMlrvalSingleIndex(index *Mlrval) (*Mlrval, error) {
return mlrmap.Get(index.String()), nil
} else {
return nil, errors.New(
"Record/map indices must be string or int; got " + index.GetTypeName(),
"Record/map indices must be string, int, or array thereof; got " + index.GetTypeName(),
)
}
}

View file

@ -343,7 +343,7 @@ func putIndexedOnMap(baseMap *Mlrmap, indices []*Mlrval, rvalue *Mlrval) error {
if baseIndex.mvtype != MT_STRING && baseIndex.mvtype != MT_INT {
// Base is map, index is invalid type
return errors.New(
"mlr: map indices must be string or int; got " + baseIndex.GetTypeName(),
"mlr: map indices must be string, int, or array thereof; got " + baseIndex.GetTypeName(),
)
}
@ -416,7 +416,7 @@ func putIndexedOnArray(
}
} else {
return errors.New(
"mlr: indices must be string or int; got " + nextIndex.GetTypeName(),
"mlr: indices must be string, int, or array thereof; got " + nextIndex.GetTypeName(),
)
}
@ -469,7 +469,7 @@ func removeIndexedOnMap(baseMap *Mlrmap, indices []*Mlrval) error {
return nil
} else {
return errors.New(
"mlr: map indices must be string or int; got " +
"mlr: map indices must be string, int, or array thereof; got " +
baseIndex.GetTypeName(),
)
}
@ -486,7 +486,7 @@ func removeIndexedOnMap(baseMap *Mlrmap, indices []*Mlrval) error {
} else {
// Base is map, index is invalid type
return errors.New(
"mlr: map indices must be string or int; got " + baseIndex.GetTypeName(),
"mlr: map indices must be string, int, or array thereof; got " + baseIndex.GetTypeName(),
)
}

View file

@ -193,7 +193,7 @@ func NewMlrvalForAutoDeepen(mvtype MVType) (*Mlrval, error) {
return empty, nil
} else {
return nil, errors.New(
"mlr: indices must be string or int; got " + GetTypeName(mvtype),
"mlr: indices must be string, int, or array thereof; got " + GetTypeName(mvtype),
)
}
}

View file

@ -6,31 +6,23 @@ PUNCHDOWN LIST
* $cpg
o merge-field voids
o check all formats for when/when not multi-char (or any non-LF) IRS
o mapexcept ...
o xtab crlf & non-std irs generally ...
o fmtnum "%3d%%" -- ?
o release notes per se
o ./configure whatever equivalent
* separators:
o xtab crlf & non-std irs generally ...
o check all formats for when/when not multi-char (or any non-LF) IRS
o csv force ifslen==1
o csv force irs == "\n"
o csv force ors == "\n"
o separators page 'IRS must be a single character (nominally \n).' -> not true anymore
o xtab ifs/irs
* histogram nbins required: make it more clear
o maybe default nbins
* RT
o CASEDIR cmd -> mlr via env not oosvar
* cases/dsl-min-max-types: cmp-matrices need to be fixed to follow the advertised rule for mixed types
NUMERICS < BOOL < VOID < STRING
* 'string or int' -> 'string, int, or array thereof' throughout
* regex
o authoritative regex docs accompanied by thorough UT
- expand existing regex webdoc
@ -63,9 +55,9 @@ PUNCHDOWN LIST
o doc6
o check issues
o TODO in *.go & *.mi
o nim6 disclaimer: these are all nitty so you can probably disregard & just keep going
! readthedocs -- find out what's necessary to get per-version history
o release notes per se
o ./configure whatever equivalent
o readthedocs -- find out what's necessary to get per-version history
* doc
o new-in-miller-6: missings:

View file

@ -1151,7 +1151,7 @@ VERBS
-f {a,b,c} Value-field names for histogram counts
--lo {lo} Histogram low value
--hi {hi} Histogram high value
--nbins {n} Number of histogram bins
--nbins {n} Number of histogram bins. Defaults to 20.
--auto Automatically computes limits, ignoring --lo and --hi.
Holds all values in memory before producing any output.
-o {prefix} Prefix for output field name. Default: no prefix.
@ -2971,4 +2971,4 @@ SEE ALSO
2021-11-03 MILLER(1)
2021-11-04 MILLER(1)

View file

@ -2,12 +2,12 @@
.\" Title: mlr
.\" Author: [see the "AUTHOR" section]
.\" Generator: ./mkman.rb
.\" Date: 2021-11-03
.\" Date: 2021-11-04
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "MILLER" "1" "2021-11-03" "\ \&" "\ \&"
.TH "MILLER" "1" "2021-11-04" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Portability definitions
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -1458,7 +1458,7 @@ Usage: mlr histogram [options]
-f {a,b,c} Value-field names for histogram counts
--lo {lo} Histogram low value
--hi {hi} Histogram high value
--nbins {n} Number of histogram bins
--nbins {n} Number of histogram bins. Defaults to 20.
--auto Automatically computes limits, ignoring --lo and --hi.
Holds all values in memory before producing any output.
-o {prefix} Prefix for output field name. Default: no prefix.