Make fmtnum accept C-style printf format strings for backward compatibility (#717)

This commit is contained in:
John Kerl 2021-10-30 23:18:05 -04:00 committed by GitHub
parent 36459281a4
commit 2be2badfd2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 75 additions and 45 deletions

View file

@ -2056,7 +2056,7 @@ FUNCTIONS FOR FILTER/PUT
(class=math #args=1) Floor: nearest integer at or below.
fmtnum
(class=conversion #args=2) Convert int/float/bool to string using printf-style format string, e.g. '$s = fmtnum($n, "%06lld")'.
(class=conversion #args=2) Convert int/float/bool to string using printf-style format string, e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'.
fold
(class=higher-order-functions #args=3) Given a map or array as first argument and a function as second argument, accumulates entries into a final output -- for example, sum or product. For arrays, the function should take two arguments, for accumulated value and array element. For maps, it should take four arguments, for accumulated key and value, and map-element key and value; it should return the updated accumulator as a new key-value pair (i.e. a single-entry map). The start value for the accumulator is taken from the third argument.
@ -2968,5 +2968,5 @@ SEE ALSO
2021-10-28 MILLER(1)
2021-10-31 MILLER(1)
</pre>

View file

@ -2035,7 +2035,7 @@ FUNCTIONS FOR FILTER/PUT
(class=math #args=1) Floor: nearest integer at or below.
fmtnum
(class=conversion #args=2) Convert int/float/bool to string using printf-style format string, e.g. '$s = fmtnum($n, "%06lld")'.
(class=conversion #args=2) Convert int/float/bool to string using printf-style format string, e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'.
fold
(class=higher-order-functions #args=3) Given a map or array as first argument and a function as second argument, accumulates entries into a final output -- for example, sum or product. For arrays, the function should take two arguments, for accumulated value and array element. For maps, it should take four arguments, for accumulated key and value, and map-element key and value; it should return the updated accumulator as a new key-value pair (i.e. a single-entry map). The start value for the accumulator is taken from the third argument.
@ -2947,4 +2947,4 @@ SEE ALSO
2021-10-28 MILLER(1)
2021-10-31 MILLER(1)

View file

@ -477,7 +477,7 @@ float (class=conversion #args=1) Convert int/float/bool/string to float.
### fmtnum
<pre class="pre-non-highlight-non-pair">
fmtnum (class=conversion #args=2) Convert int/float/bool to string using printf-style format string, e.g. '$s = fmtnum($n, "%06lld")'.
fmtnum (class=conversion #args=2) Convert int/float/bool to string using printf-style format string, e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'.
</pre>

View file

@ -1083,24 +1083,24 @@ wye pan 5.000000 0.573288 0.863624
<b>mlr --opprint format-values -i %08llx -f %.6le -s X%sX data/small</b>
</pre>
<pre class="pre-non-highlight-in-pair">
a b i x y
XpanX XpanX %!l(int=00000001)lx %!l(float64=0.346791)e %!l(float64=0.726802)e
XeksX XpanX %!l(int=00000002)lx %!l(float64=0.758679)e %!l(float64=0.522151)e
XwyeX XwyeX %!l(int=00000003)lx %!l(float64=0.204603)e %!l(float64=0.338318)e
XeksX XwyeX %!l(int=00000004)lx %!l(float64=0.381399)e %!l(float64=0.134188)e
XwyeX XpanX %!l(int=00000005)lx %!l(float64=0.573288)e %!l(float64=0.863624)e
a b i x y
XpanX XpanX 00000001 3.467910e-01 7.268020e-01
XeksX XpanX 00000002 7.586790e-01 5.221510e-01
XwyeX XwyeX 00000003 2.046030e-01 3.383180e-01
XeksX XwyeX 00000004 3.813990e-01 1.341880e-01
XwyeX XpanX 00000005 5.732880e-01 8.636240e-01
</pre>
<pre class="pre-highlight-in-pair">
<b>mlr --opprint format-values -i %08llx -f %.6le -s X%sX -n data/small</b>
</pre>
<pre class="pre-non-highlight-in-pair">
a b i x y
XpanX XpanX %!l(float64=1)e %!l(float64=0.346791)e %!l(float64=0.726802)e
XeksX XpanX %!l(float64=2)e %!l(float64=0.758679)e %!l(float64=0.522151)e
XwyeX XwyeX %!l(float64=3)e %!l(float64=0.204603)e %!l(float64=0.338318)e
XeksX XwyeX %!l(float64=4)e %!l(float64=0.381399)e %!l(float64=0.134188)e
XwyeX XpanX %!l(float64=5)e %!l(float64=0.573288)e %!l(float64=0.863624)e
a b i x y
XpanX XpanX 1.000000e+00 3.467910e-01 7.268020e-01
XeksX XpanX 2.000000e+00 7.586790e-01 5.221510e-01
XwyeX XwyeX 3.000000e+00 2.046030e-01 3.383180e-01
XeksX XwyeX 4.000000e+00 3.813990e-01 1.341880e-01
XwyeX XpanX 5.000000e+00 5.732880e-01 8.636240e-01
</pre>
## fraction

View file

@ -1351,7 +1351,7 @@ strftime_local.`,
name: "fmtnum",
class: FUNC_CLASS_CONVERSION,
help: `Convert int/float/bool to string using printf-style format string, e.g.
'$s = fmtnum($n, "%06lld")'.`,
'$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'.`,
binaryFunc: types.BIF_fmtnum,
},

View file

@ -1,6 +1,7 @@
package types
import (
"errors"
"fmt"
"strconv"
"strings"
@ -52,6 +53,8 @@ func GetMlrvalFormatter(
// Cache miss
formatter, err := newMlrvalFormatter(userLevelFormatString)
if err != nil {
// TODO: temp exit
fmt.Printf("mlr: %v\n", err)
return nil, err
}
@ -65,37 +68,64 @@ type IMlrvalFormatter interface {
FormatFloat(floatValue float64) string // for --ofmt
}
// People can pass in things like "X%sX" unfortunately :(
func newMlrvalFormatter(
userLevelFormatString string,
) (IMlrvalFormatter, error) {
// TODO: very temporary. Pending full parse.
// Including but not limited to "%08lld" -> "%08d" C-impl back-compat etc.
if strings.HasSuffix(userLevelFormatString, "d") {
return newMlrvalFormatterToInt(userLevelFormatString), nil
numPercents := strings.Count(userLevelFormatString, "%")
if numPercents < 1 {
return nil, errors.New(
fmt.Sprintf("unhandled format string \"%s\": no leading \"%%\"", userLevelFormatString),
)
}
if strings.HasSuffix(userLevelFormatString, "x") {
return newMlrvalFormatterToInt(userLevelFormatString), nil
if numPercents > 1 {
return nil, errors.New(
fmt.Sprintf("unhandled format string \"%s\": needs no \"%%\" after the first", userLevelFormatString),
)
}
if strings.HasSuffix(userLevelFormatString, "f") {
return newMlrvalFormatterToFloat(userLevelFormatString), nil
// TODO: perhaps a full format-string parser. At present, there's nothing to stop people
// from doing silly things like "%lllld".
goFormatString := userLevelFormatString
goFormatString = strings.ReplaceAll(goFormatString, "lld", "d")
goFormatString = strings.ReplaceAll(goFormatString, "llx", "x")
goFormatString = strings.ReplaceAll(goFormatString, "ld", "d")
goFormatString = strings.ReplaceAll(goFormatString, "lx", "x")
goFormatString = strings.ReplaceAll(goFormatString, "lf", "f")
goFormatString = strings.ReplaceAll(goFormatString, "le", "e")
goFormatString = strings.ReplaceAll(goFormatString, "lg", "g")
// MIller 5 and below required C format strings compatible with 64-bit ints
// and double-precision floats: e.g. "%08lld" and "%9.6lf". For Miller 6,
// We must still accept these for backward compatibility.
if strings.HasSuffix(goFormatString, "d") {
return newMlrvalFormatterToInt(goFormatString), nil
}
if strings.HasSuffix(userLevelFormatString, "e") {
return newMlrvalFormatterToFloat(userLevelFormatString), nil
}
if strings.HasSuffix(userLevelFormatString, "g") {
return newMlrvalFormatterToFloat(userLevelFormatString), nil
if strings.HasSuffix(goFormatString, "x") {
return newMlrvalFormatterToInt(goFormatString), nil
}
if strings.HasSuffix(userLevelFormatString, "s") {
return newMlrvalFormatterToString(userLevelFormatString), nil
if strings.HasSuffix(goFormatString, "f") {
return newMlrvalFormatterToFloat(goFormatString), nil
}
if strings.HasSuffix(goFormatString, "e") {
return newMlrvalFormatterToFloat(goFormatString), nil
}
if strings.HasSuffix(goFormatString, "g") {
return newMlrvalFormatterToFloat(goFormatString), nil
}
// TODO: finish porting
// return nil, errors.New("TBD") // TODO
return newMlrvalFormatterToString(userLevelFormatString), nil
if strings.HasSuffix(goFormatString, "s") {
return newMlrvalFormatterToString(goFormatString), nil
}
// TODO:
// return nil, errors.New(fmt.Sprintf("unhandled format string \"%s\"", userLevelFormatString))
return newMlrvalFormatterToString(goFormatString), nil
}
//func regularizeFormat
// ----------------------------------------------------------------
type mlrvalFormatterToFloat struct {
goFormatString string

View file

@ -5,7 +5,6 @@ TOP OF LIST:
* nikos materials -> fold in
* CSV BOM
* fmtnum
* JSON comment-handling
* r-strings/implicit-r/297: double-check end of reference-main-data-types.md.in
@ -29,8 +28,6 @@ TOP OF LIST:
o $cpg
! csv bom
! fmtnum
- merge-field voids
- json comments
@ -205,6 +202,8 @@ NON-BLOCKERS
* print w/ #{...}; defer variadic printf
* meta: nf,nr,keys?
* mlr -f {arg}, mlr -F {arg}, etc
* non-streaming DSL-enabled cut
https://github.com/johnkerl/miller/discussions/613
@ -223,6 +222,8 @@ NON-BLOCKERS
* precedence for `:` in slicing syntax
* full format-string parser for corner cases like "X%08lldX"
* more of:
o colored-shapes.dkvp -> csv; also mkdat2
o data/small -> csv throughout. and/or just use example.csv
@ -435,7 +436,6 @@ i emitp/emitf -- note for-loops didn't appear until 4.1.0 & emits are much older
o if keep, need careful MT_VOID at from-string constructor -- ? or not ?
o comment clearly regardless
* bitwise_and_dispositions et al should not have _absn for collections -- _erro instead
* fmtnum: auto-replace %l_, %ll_ -> no l/ll for backwards compatibility, and comment accordingly
* ast-parex separate mlr auxents entrypoint?
* port u/window*.mlr from mlrc to mlrgo (actually, fix mlrgo of course)
* line/column caret at parse-error messages -- would require some GOCC refactoring

View file

@ -2035,7 +2035,7 @@ FUNCTIONS FOR FILTER/PUT
(class=math #args=1) Floor: nearest integer at or below.
fmtnum
(class=conversion #args=2) Convert int/float/bool to string using printf-style format string, e.g. '$s = fmtnum($n, "%06lld")'.
(class=conversion #args=2) Convert int/float/bool to string using printf-style format string, e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'.
fold
(class=higher-order-functions #args=3) Given a map or array as first argument and a function as second argument, accumulates entries into a final output -- for example, sum or product. For arrays, the function should take two arguments, for accumulated value and array element. For maps, it should take four arguments, for accumulated key and value, and map-element key and value; it should return the updated accumulator as a new key-value pair (i.e. a single-entry map). The start value for the accumulator is taken from the third argument.
@ -2947,4 +2947,4 @@ SEE ALSO
2021-10-28 MILLER(1)
2021-10-31 MILLER(1)

View file

@ -2,12 +2,12 @@
.\" Title: mlr
.\" Author: [see the "AUTHOR" section]
.\" Generator: ./mkman.rb
.\" Date: 2021-10-28
.\" Date: 2021-10-31
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "MILLER" "1" "2021-10-28" "\ \&" "\ \&"
.TH "MILLER" "1" "2021-10-31" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Portability definitions
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -2866,7 +2866,7 @@ Two-argument version: flatten($*, ".") is the same as flatten("", ".", $*).
.RS 0
.\}
.nf
(class=conversion #args=2) Convert int/float/bool to string using printf-style format string, e.g. '$s = fmtnum($n, "%06lld")'.
(class=conversion #args=2) Convert int/float/bool to string using printf-style format string, e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'.
.fi
.if n \{\
.RE