Have clean_whitespace re-run type inference (#1464)

* Have `clean_whitespace` re-infer types

* make dev output

* unit-test files

* drive-by typofix

* make dev output
This commit is contained in:
John Kerl 2024-01-01 16:39:27 -07:00 committed by GitHub
parent 2f42c6f508
commit d2559b8387
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 38 additions and 13 deletions

View file

@ -2312,7 +2312,7 @@ MILLER(1) MILLER(1)
(class=math #args=1) Ceiling: nearest integer at or above.
1mclean_whitespace0m
(class=string #args=1) Same as collapse_whitespace and strip.
(class=string #args=1) Same as collapse_whitespace and strip, followed by type inference.
1mcollapse_whitespace0m
(class=string #args=1) Strip repeated whitespace from string.
@ -3011,7 +3011,7 @@ MILLER(1) MILLER(1)
strmatch(12345, "34") is true
1mstrmatchx0m
(class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
(class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in contrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
Examples:
strmatchx("a", "abc") returns:
{

View file

@ -2291,7 +2291,7 @@ MILLER(1) MILLER(1)
(class=math #args=1) Ceiling: nearest integer at or above.
1mclean_whitespace0m
(class=string #args=1) Same as collapse_whitespace and strip.
(class=string #args=1) Same as collapse_whitespace and strip, followed by type inference.
1mcollapse_whitespace0m
(class=string #args=1) Strip repeated whitespace from string.
@ -2990,7 +2990,7 @@ MILLER(1) MILLER(1)
strmatch(12345, "34") is true
1mstrmatchx0m
(class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
(class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in contrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
Examples:
strmatchx("a", "abc") returns:
{

View file

@ -1209,7 +1209,7 @@ capitalize (class=string #args=1) Convert string's first character to uppercase
### clean_whitespace
<pre class="pre-non-highlight-non-pair">
clean_whitespace (class=string #args=1) Same as collapse_whitespace and strip.
clean_whitespace (class=string #args=1) Same as collapse_whitespace and strip, followed by type inference.
</pre>
@ -1364,7 +1364,7 @@ strmatch(12345, "34") is true
### strmatchx
<pre class="pre-non-highlight-non-pair">
strmatchx (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
strmatchx (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in contrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
Examples:
strmatchx("a", "abc") returns:
{

View file

@ -2291,7 +2291,7 @@ MILLER(1) MILLER(1)
(class=math #args=1) Ceiling: nearest integer at or above.
1mclean_whitespace0m
(class=string #args=1) Same as collapse_whitespace and strip.
(class=string #args=1) Same as collapse_whitespace and strip, followed by type inference.
1mcollapse_whitespace0m
(class=string #args=1) Strip repeated whitespace from string.
@ -2990,7 +2990,7 @@ MILLER(1) MILLER(1)
strmatch(12345, "34") is true
1mstrmatchx0m
(class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
(class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in contrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
Examples:
strmatchx("a", "abc") returns:
{

View file

@ -3100,7 +3100,7 @@ Map example: apply({"a":1, "b":3, "c":5}, func(k,v) {return {toupper(k): v ** 2}
.RS 0
.\}
.nf
(class=string #args=1) Same as collapse_whitespace and strip.
(class=string #args=1) Same as collapse_whitespace and strip, followed by type inference.
.fi
.if n \{\
.RE
@ -4675,7 +4675,7 @@ strmatch(12345, "34") is true
.RS 0
.\}
.nf
(class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \e1, \e2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \e1 through \e9, an arbitrary number are supported here.
(class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \e1, \e2, etc. are not set, in contrast to the `=~` operator. As well, while the `=~` operator limits matches to \e1 through \e9, an arbitrary number are supported here.
Examples:
strmatchx("a", "abc") returns:
{

View file

@ -344,11 +344,12 @@ func BIF_capitalize(input1 *mlrval.Mlrval) *mlrval.Mlrval {
// ----------------------------------------------------------------
func BIF_clean_whitespace(input1 *mlrval.Mlrval) *mlrval.Mlrval {
return BIF_strip(
mv := BIF_strip(
BIF_collapse_whitespace_regexp(
input1, _whitespace_regexp,
),
)
return mlrval.FromInferredType(mv.String())
}
// ================================================================

View file

@ -355,7 +355,7 @@ used within subsequent DSL statements. See also "Regular expressions" at ` + lib
{
name: "strmatchx",
class: FUNC_CLASS_STRING,
help: `Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the ` + "`=~` operator. As well, while the `=~` operator limits matches to \\1 through \\9, an arbitrary number are supported here.",
help: `Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in contrast to the ` + "`=~` operator. As well, while the `=~` operator limits matches to \\1 through \\9, an arbitrary number are supported here.",
examples: []string{
`strmatchx("a", "abc") returns:`,
` {`,
@ -444,7 +444,7 @@ used within subsequent DSL statements. See also "Regular expressions" at ` + lib
{
name: "clean_whitespace",
class: FUNC_CLASS_STRING,
help: "Same as collapse_whitespace and strip.",
help: "Same as collapse_whitespace and strip, followed by type inference.",
unaryFunc: bifs.BIF_clean_whitespace,
},

View file

@ -0,0 +1 @@
mlr --icsv --ojson clean-whitespace then put -f ${CASEDIR}/mlr ${CASEDIR}/input.csv

View file

@ -0,0 +1,18 @@
[
{
"a": 1,
"b": 2,
"c": 3,
"d": 4,
"e": 9,
"t": "int"
},
{
"a": 5,
"b": 6,
"c": 7,
"d": 8,
"e": 13,
"t": "int"
}
]

View file

@ -0,0 +1,3 @@
a, b, c, d
1, 2, 3, 4
5, 6, 7, 8
1 a b c d
2 1 2 3 4
3 5 6 7 8

View file

@ -0,0 +1,2 @@
$e = $d + 5;
$t = typeof($d)