diff --git a/docs/src/manpage.md b/docs/src/manpage.md index eee87469b..120b4b550 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -2388,7 +2388,15 @@ MILLER(1) MILLER(1) (class=system #args=0) Returns the hostname as a string. 1mint0m - (class=conversion #args=1) Convert int/float/bool/string to int. + (class=conversion #args=1,2) Convert int/float/bool/string to int. If the second argument is omitted and the first argument is a string, base is inferred from the first argument's prefix. If the second argument is provided and the first argument is a string, the second argument is used as the base. If the second argument is provided and the first argument is not a string, the second argument is ignored. + Examples: + int("345") gives decimal 345 (base-10/decimal input is inferred) + int("0xff") gives decimal 255 (base-16/hexadecimal input is inferred) + int("0377") gives decimal 255 (base-8/octal input is inferred) + int("0b11010011") gives decimal 211 which is hexadecimal 0xd3 (base-2/binary input is inferred) + int("0377", 10) gives decimal 377 + int(345, 16) gives decimal 345 + int(string(345), 16) gives decimal 837 1minvqnorm0m (class=math #args=1) Inverse of normal cumulative distribution function. Note that invqorm(urand()) is normally distributed. @@ -3338,5 +3346,5 @@ MILLER(1) MILLER(1) - 2023-03-12 MILLER(1) + 2023-03-24 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 72c8a2c67..c78f1b415 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -2367,7 +2367,15 @@ MILLER(1) MILLER(1) (class=system #args=0) Returns the hostname as a string. 1mint0m - (class=conversion #args=1) Convert int/float/bool/string to int. + (class=conversion #args=1,2) Convert int/float/bool/string to int. If the second argument is omitted and the first argument is a string, base is inferred from the first argument's prefix. If the second argument is provided and the first argument is a string, the second argument is used as the base. If the second argument is provided and the first argument is not a string, the second argument is ignored. + Examples: + int("345") gives decimal 345 (base-10/decimal input is inferred) + int("0xff") gives decimal 255 (base-16/hexadecimal input is inferred) + int("0377") gives decimal 255 (base-8/octal input is inferred) + int("0b11010011") gives decimal 211 which is hexadecimal 0xd3 (base-2/binary input is inferred) + int("0377", 10) gives decimal 377 + int(345, 16) gives decimal 345 + int(string(345), 16) gives decimal 837 1minvqnorm0m (class=math #args=1) Inverse of normal cumulative distribution function. Note that invqorm(urand()) is normally distributed. @@ -3317,4 +3325,4 @@ MILLER(1) MILLER(1) - 2023-03-12 MILLER(1) + 2023-03-24 MILLER(1) diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 116c00be7..90bf09e00 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -547,7 +547,15 @@ hexfmt (class=conversion #args=1) Convert int to hex string, e.g. 255 to "0xff" ### int
-int (class=conversion #args=1) Convert int/float/bool/string to int.
+int (class=conversion #args=1,2) Convert int/float/bool/string to int. If the second argument is omitted and the first argument is a string, base is inferred from the first argument's prefix. If the second argument is provided and the first argument is a string, the second argument is used as the base. If the second argument is provided and the first argument is not a string, the second argument is ignored.
+Examples:
+int("345") gives decimal 345 (base-10/decimal input is inferred)
+int("0xff") gives decimal 255 (base-16/hexadecimal input is inferred)
+int("0377") gives decimal 255 (base-8/octal input is inferred)
+int("0b11010011") gives decimal 211 which is hexadecimal 0xd3 (base-2/binary input is inferred)
+int("0377", 10) gives decimal 377
+int(345, 16) gives decimal 345
+int(string(345), 16) gives decimal 837
diff --git a/internal/pkg/bifs/types.go b/internal/pkg/bifs/types.go
index f1e1b6a30..b876aa780 100644
--- a/internal/pkg/bifs/types.go
+++ b/internal/pkg/bifs/types.go
@@ -55,6 +55,53 @@ func BIF_int(input1 *mlrval.Mlrval) *mlrval.Mlrval {
return to_int_dispositions[input1.Type()](input1)
}
+// ----------------------------------------------------------------
+func string_to_int_with_base(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
+ i, ok := lib.TryIntFromStringWithBase(input1.AcquireStringValue(), input2.AcquireIntValue())
+ if ok {
+ return mlrval.FromInt(i)
+ } else {
+ return mlrval.ERROR
+ }
+}
+
+func int_to_int_with_base(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
+ return mlrval.FromInt(int64(input1.AcquireIntValue()))
+}
+
+func float_to_int_with_base(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
+ return mlrval.FromInt(int64(input1.AcquireFloatValue()))
+}
+
+func bool_to_int_with_base(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
+ if input1.AcquireBoolValue() == true {
+ return mlrval.FromInt(1)
+ } else {
+ return mlrval.FromInt(0)
+ }
+}
+
+var to_int_with_base_dispositions = [mlrval.MT_DIM]BinaryFunc{
+ /*INT */ int_to_int_with_base,
+ /*FLOAT */ float_to_int_with_base,
+ /*BOOL */ bool_to_int_with_base,
+ /*VOID */ _void,
+ /*STRING */ string_to_int_with_base,
+ /*ARRAY */ _erro,
+ /*MAP */ _erro,
+ /*FUNC */ _erro,
+ /*ERROR */ _erro,
+ /*NULL */ _null,
+ /*ABSENT */ _absn,
+}
+
+func BIF_int_with_base(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
+ if !input2.IsInt() {
+ return mlrval.ERROR
+ }
+ return to_int_with_base_dispositions[input1.Type()](input1, input2)
+}
+
// ----------------------------------------------------------------
func string_to_float(input1 *mlrval.Mlrval) *mlrval.Mlrval {
f, ok := lib.TryFloatFromString(input1.AcquireStringValue())
diff --git a/internal/pkg/dsl/cst/builtin_function_manager.go b/internal/pkg/dsl/cst/builtin_function_manager.go
index 72ae7558b..8a6ca2d67 100644
--- a/internal/pkg/dsl/cst/builtin_function_manager.go
+++ b/internal/pkg/dsl/cst/builtin_function_manager.go
@@ -1529,10 +1529,25 @@ Note that NaN has the property that NaN != NaN, so you need 'is_nan(x)' rather t
},
{
- name: "int",
- class: FUNC_CLASS_CONVERSION,
- help: "Convert int/float/bool/string to int.",
- unaryFunc: bifs.BIF_int,
+ name: "int",
+ class: FUNC_CLASS_CONVERSION,
+ help: `Convert int/float/bool/string to int.
+If the second argument is omitted and the first argument is a string, base is inferred from the first argument's prefix.
+If the second argument is provided and the first argument is a string, the second argument is used as the base.
+If the second argument is provided and the first argument is not a string, the second argument is ignored.`,
+
+ unaryFunc: bifs.BIF_int,
+ binaryFunc: bifs.BIF_int_with_base,
+ hasMultipleArities: true,
+ examples: []string{
+ `int("345") gives decimal 345 (base-10/decimal input is inferred)`,
+ `int("0xff") gives decimal 255 (base-16/hexadecimal input is inferred)`,
+ `int("0377") gives decimal 255 (base-8/octal input is inferred)`,
+ `int("0b11010011") gives decimal 211 which is hexadecimal 0xd3 (base-2/binary input is inferred)`,
+ `int("0377", 10) gives decimal 377`,
+ `int(345, 16) gives decimal 345`,
+ `int(string(345), 16) gives decimal 837`,
+ },
},
{
diff --git a/internal/pkg/lib/util.go b/internal/pkg/lib/util.go
index 00c139da3..4a8faa86d 100644
--- a/internal/pkg/lib/util.go
+++ b/internal/pkg/lib/util.go
@@ -122,6 +122,27 @@ func TryIntFromString(input string) (int64, bool) {
return 0, false
}
+// TryIntFromStringWithBase allows the user to choose the base that's used,
+// rather than inferring from 0x prefix, etc as TryIntFromString does.
+func TryIntFromStringWithBase(input string, base int64) (int64, bool) {
+ // Go's strconv parses "1_2" as 12; not OK for Miller syntax. (Also not valid JSON.)
+ for i := 0; i < len(input); i++ {
+ if input[i] == '_' {
+ return 0, false
+ }
+ }
+
+ i64, ierr := strconv.ParseInt(input, int(base), 64)
+ if ierr == nil {
+ return i64, true
+ }
+ u64, uerr := strconv.ParseUint(input, int(base), 64)
+ if uerr == nil {
+ return int64(u64), true
+ }
+ return 0, false
+}
+
func TryFloatFromString(input string) (float64, bool) {
// Go's strconv parses "1_2.3_4" as 12.34; not OK for Miller syntax. (Also not valid JSON.)
for i := 0; i < len(input); i++ {
diff --git a/man/manpage.txt b/man/manpage.txt
index 72c8a2c67..c78f1b415 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -2367,7 +2367,15 @@ MILLER(1) MILLER(1)
(class=system #args=0) Returns the hostname as a string.
1mint0m
- (class=conversion #args=1) Convert int/float/bool/string to int.
+ (class=conversion #args=1,2) Convert int/float/bool/string to int. If the second argument is omitted and the first argument is a string, base is inferred from the first argument's prefix. If the second argument is provided and the first argument is a string, the second argument is used as the base. If the second argument is provided and the first argument is not a string, the second argument is ignored.
+ Examples:
+ int("345") gives decimal 345 (base-10/decimal input is inferred)
+ int("0xff") gives decimal 255 (base-16/hexadecimal input is inferred)
+ int("0377") gives decimal 255 (base-8/octal input is inferred)
+ int("0b11010011") gives decimal 211 which is hexadecimal 0xd3 (base-2/binary input is inferred)
+ int("0377", 10) gives decimal 377
+ int(345, 16) gives decimal 345
+ int(string(345), 16) gives decimal 837
1minvqnorm0m
(class=math #args=1) Inverse of normal cumulative distribution function. Note that invqorm(urand()) is normally distributed.
@@ -3317,4 +3325,4 @@ MILLER(1) MILLER(1)
- 2023-03-12 MILLER(1)
+ 2023-03-24 MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 296057e05..a56102ec0 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
.\" Title: mlr
.\" Author: [see the "AUTHOR" section]
.\" Generator: ./mkman.rb
-.\" Date: 2023-03-12
+.\" Date: 2023-03-24
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
-.TH "MILLER" "1" "2023-03-12" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-03-24" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Portability definitions
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -3362,7 +3362,15 @@ gsub("prefix4529:suffix8567", "(....ix)([0-9]+)", "[\e1 : \e2]") gives "[prefix
.RS 0
.\}
.nf
- (class=conversion #args=1) Convert int/float/bool/string to int.
+ (class=conversion #args=1,2) Convert int/float/bool/string to int. If the second argument is omitted and the first argument is a string, base is inferred from the first argument's prefix. If the second argument is provided and the first argument is a string, the second argument is used as the base. If the second argument is provided and the first argument is not a string, the second argument is ignored.
+Examples:
+int("345") gives decimal 345 (base-10/decimal input is inferred)
+int("0xff") gives decimal 255 (base-16/hexadecimal input is inferred)
+int("0377") gives decimal 255 (base-8/octal input is inferred)
+int("0b11010011") gives decimal 211 which is hexadecimal 0xd3 (base-2/binary input is inferred)
+int("0377", 10) gives decimal 377
+int(345, 16) gives decimal 345
+int(string(345), 16) gives decimal 837
.fi
.if n \{\
.RE
diff --git a/test/cases/dsl-int-function/0001/cmd b/test/cases/dsl-int-function/0001/cmd
new file mode 100644
index 000000000..d7f76d762
--- /dev/null
+++ b/test/cases/dsl-int-function/0001/cmd
@@ -0,0 +1 @@
+mlr --nidx --from ${CASEDIR}/input -S put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-int-function/0001/experr b/test/cases/dsl-int-function/0001/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-int-function/0001/expout b/test/cases/dsl-int-function/0001/expout
new file mode 100644
index 000000000..ab8496f1a
--- /dev/null
+++ b/test/cases/dsl-int-function/0001/expout
@@ -0,0 +1,4 @@
+345 345
+0xff 255
+0377 255
+0b11010011 211
diff --git a/test/cases/dsl-int-function/0001/input b/test/cases/dsl-int-function/0001/input
new file mode 100644
index 000000000..99c79a8dc
--- /dev/null
+++ b/test/cases/dsl-int-function/0001/input
@@ -0,0 +1,4 @@
+345
+0xff
+0377
+0b11010011
diff --git a/test/cases/dsl-int-function/0001/mlr b/test/cases/dsl-int-function/0001/mlr
new file mode 100644
index 000000000..7296cbee4
--- /dev/null
+++ b/test/cases/dsl-int-function/0001/mlr
@@ -0,0 +1 @@
+$2 = int($1) + 0
diff --git a/test/cases/dsl-int-function/0002/cmd b/test/cases/dsl-int-function/0002/cmd
new file mode 100644
index 000000000..d7f76d762
--- /dev/null
+++ b/test/cases/dsl-int-function/0002/cmd
@@ -0,0 +1 @@
+mlr --nidx --from ${CASEDIR}/input -S put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-int-function/0002/experr b/test/cases/dsl-int-function/0002/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-int-function/0002/expout b/test/cases/dsl-int-function/0002/expout
new file mode 100644
index 000000000..718c74309
--- /dev/null
+++ b/test/cases/dsl-int-function/0002/expout
@@ -0,0 +1,4 @@
+345 345
+ff (error)
+0377 377
+11010011 11010011
diff --git a/test/cases/dsl-int-function/0002/input b/test/cases/dsl-int-function/0002/input
new file mode 100644
index 000000000..a96c3f79b
--- /dev/null
+++ b/test/cases/dsl-int-function/0002/input
@@ -0,0 +1,4 @@
+345
+ff
+0377
+11010011
diff --git a/test/cases/dsl-int-function/0002/mlr b/test/cases/dsl-int-function/0002/mlr
new file mode 100644
index 000000000..6abe84bfd
--- /dev/null
+++ b/test/cases/dsl-int-function/0002/mlr
@@ -0,0 +1 @@
+$2 = int($1, 10) + 0
diff --git a/test/cases/dsl-int-function/0003/cmd b/test/cases/dsl-int-function/0003/cmd
new file mode 100644
index 000000000..d7f76d762
--- /dev/null
+++ b/test/cases/dsl-int-function/0003/cmd
@@ -0,0 +1 @@
+mlr --nidx --from ${CASEDIR}/input -S put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-int-function/0003/experr b/test/cases/dsl-int-function/0003/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-int-function/0003/expout b/test/cases/dsl-int-function/0003/expout
new file mode 100644
index 000000000..221c7d603
--- /dev/null
+++ b/test/cases/dsl-int-function/0003/expout
@@ -0,0 +1,4 @@
+345 837
+ff 255
+0377 887
+11010011 285278225
diff --git a/test/cases/dsl-int-function/0003/input b/test/cases/dsl-int-function/0003/input
new file mode 100644
index 000000000..a96c3f79b
--- /dev/null
+++ b/test/cases/dsl-int-function/0003/input
@@ -0,0 +1,4 @@
+345
+ff
+0377
+11010011
diff --git a/test/cases/dsl-int-function/0003/mlr b/test/cases/dsl-int-function/0003/mlr
new file mode 100644
index 000000000..200604370
--- /dev/null
+++ b/test/cases/dsl-int-function/0003/mlr
@@ -0,0 +1 @@
+$2 = int($1, 16) + 0
diff --git a/test/cases/dsl-int-function/0004/cmd b/test/cases/dsl-int-function/0004/cmd
new file mode 100644
index 000000000..d7f76d762
--- /dev/null
+++ b/test/cases/dsl-int-function/0004/cmd
@@ -0,0 +1 @@
+mlr --nidx --from ${CASEDIR}/input -S put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-int-function/0004/experr b/test/cases/dsl-int-function/0004/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-int-function/0004/expout b/test/cases/dsl-int-function/0004/expout
new file mode 100644
index 000000000..5aa6437f4
--- /dev/null
+++ b/test/cases/dsl-int-function/0004/expout
@@ -0,0 +1,4 @@
+345 229
+ff (error)
+0377 255
+11010011 2363401
diff --git a/test/cases/dsl-int-function/0004/input b/test/cases/dsl-int-function/0004/input
new file mode 100644
index 000000000..a96c3f79b
--- /dev/null
+++ b/test/cases/dsl-int-function/0004/input
@@ -0,0 +1,4 @@
+345
+ff
+0377
+11010011
diff --git a/test/cases/dsl-int-function/0004/mlr b/test/cases/dsl-int-function/0004/mlr
new file mode 100644
index 000000000..b43ceab48
--- /dev/null
+++ b/test/cases/dsl-int-function/0004/mlr
@@ -0,0 +1 @@
+$2 = int($1, 8) + 0
diff --git a/test/cases/dsl-int-function/0005/cmd b/test/cases/dsl-int-function/0005/cmd
new file mode 100644
index 000000000..d7f76d762
--- /dev/null
+++ b/test/cases/dsl-int-function/0005/cmd
@@ -0,0 +1 @@
+mlr --nidx --from ${CASEDIR}/input -S put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-int-function/0005/experr b/test/cases/dsl-int-function/0005/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-int-function/0005/expout b/test/cases/dsl-int-function/0005/expout
new file mode 100644
index 000000000..edd424e78
--- /dev/null
+++ b/test/cases/dsl-int-function/0005/expout
@@ -0,0 +1,4 @@
+345 (error)
+ff (error)
+0377 (error)
+11010011 211
diff --git a/test/cases/dsl-int-function/0005/input b/test/cases/dsl-int-function/0005/input
new file mode 100644
index 000000000..a96c3f79b
--- /dev/null
+++ b/test/cases/dsl-int-function/0005/input
@@ -0,0 +1,4 @@
+345
+ff
+0377
+11010011
diff --git a/test/cases/dsl-int-function/0005/mlr b/test/cases/dsl-int-function/0005/mlr
new file mode 100644
index 000000000..c7010266e
--- /dev/null
+++ b/test/cases/dsl-int-function/0005/mlr
@@ -0,0 +1 @@
+$2 = int($1, 2) + 0
diff --git a/test/cases/dsl-int-function/0006/cmd b/test/cases/dsl-int-function/0006/cmd
new file mode 100644
index 000000000..44a3e2871
--- /dev/null
+++ b/test/cases/dsl-int-function/0006/cmd
@@ -0,0 +1 @@
+mlr --nidx --from ${CASEDIR}/input put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-int-function/0006/experr b/test/cases/dsl-int-function/0006/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-int-function/0006/expout b/test/cases/dsl-int-function/0006/expout
new file mode 100644
index 000000000..0e069fb60
--- /dev/null
+++ b/test/cases/dsl-int-function/0006/expout
@@ -0,0 +1,4 @@
+345 345
+ff (error)
+0377 255
+11010011 11010011
diff --git a/test/cases/dsl-int-function/0006/input b/test/cases/dsl-int-function/0006/input
new file mode 100644
index 000000000..a96c3f79b
--- /dev/null
+++ b/test/cases/dsl-int-function/0006/input
@@ -0,0 +1,4 @@
+345
+ff
+0377
+11010011
diff --git a/test/cases/dsl-int-function/0006/mlr b/test/cases/dsl-int-function/0006/mlr
new file mode 100644
index 000000000..b43ceab48
--- /dev/null
+++ b/test/cases/dsl-int-function/0006/mlr
@@ -0,0 +1 @@
+$2 = int($1, 8) + 0