Absent variable on left side of boolean OR (||) expression makes it absent (#1434)

* Absent-handling with short-circuiting operators `&&` and `||`

* add a missing file

* artifacts from make dev

* type-errors

* doc content

* artifacts from make dev
This commit is contained in:
John Kerl 2023-12-02 16:00:05 -05:00 committed by GitHub
parent 3a3595e404
commit bae1daf847
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 268 additions and 72 deletions

View file

@ -11,6 +11,9 @@ build:
@echo "Build complete. The Miller executable is ./mlr (or .\mlr.exe on Windows)."
@echo "You can use 'make check' to run tests".
quiet:
@go build github.com/johnkerl/miller/cmd/mlr
# For interactive use, 'mlr regtest' offers more options and transparency.
check: unit-test regression-test
@echo "Tests complete. You can use 'make install' if you like, optionally preceded"

View file

@ -0,0 +1,2 @@
data/a.csv
data/b.csv

View file

@ -178,6 +178,7 @@ MILLER(1) MILLER(1)
mlr help mlrrc
mlr help output-colorization
mlr help type-arithmetic-info
mlr help type-arithmetic-info-extended
Shorthands:
mlr -g = mlr help flags
mlr -l = mlr help list-verbs
@ -3648,5 +3649,5 @@ MILLER(1) MILLER(1)
2023-11-12 MILLER(1)
2023-12-02 MILLER(1)
</pre>

View file

@ -157,6 +157,7 @@ MILLER(1) MILLER(1)
mlr help mlrrc
mlr help output-colorization
mlr help type-arithmetic-info
mlr help type-arithmetic-info-extended
Shorthands:
mlr -g = mlr help flags
mlr -l = mlr help list-verbs
@ -3627,4 +3628,4 @@ MILLER(1) MILLER(1)
2023-11-12 MILLER(1)
2023-12-02 MILLER(1)

View file

@ -86,6 +86,7 @@ Other:
mlr help mlrrc
mlr help output-colorization
mlr help type-arithmetic-info
mlr help type-arithmetic-info-extended
Shorthands:
mlr -g = mlr help flags
mlr -l = mlr help list-verbs

View file

@ -154,8 +154,7 @@ with 1) for too-long rows:
},
{
"a": 4,
"b": 5,
"c": ""
"b": 5
},
{
"a": 7,
@ -455,7 +454,9 @@ Miller handles explicit header changes as just shown. If your CSV input contains
<pre class="pre-non-highlight-in-pair">
a,b,c
1,2,3
4,5,
a,b
4,5
a,b,c,4
7,8,9,10

View file

@ -239,17 +239,44 @@ resource=/some/other/path,loadsec=0.97,ok=false,loadmillis=970
## Arithmetic rules
If you're interested in a formal description of how empty and absent fields participate in arithmetic, here's a table for plus (other arithmetic/boolean/bitwise operators are similar):
If you're interested in a formal description of how empty and absent fields participate in arithmetic, here's a table for `+`, `&&, and `||`. Notes:
* Other arithmetic, boolean, and bitwise operators besides `&&` and `||` are similar to `+`.
* The `&&` and `||` obey _short-circuiting semantics_. That is:
* `false && X` is `false` and `X` is not evaluated even if it is a complex expression (maybe including function calls)
* `true || X` is `true` and `X` is not evaluated even if it is a complex expression (maybe including function calls)
* This means in particular that:
* `false && X` is false even if `X` is an error, a non-boolean type, etc.
* `true || X` is true even if `X` is an error, a non-boolean type, etc.
<pre class="pre-highlight-in-pair">
<b>mlr help type-arithmetic-info</b>
<b>mlr help type-arithmetic-info-extended</b>
</pre>
<pre class="pre-non-highlight-in-pair">
(+) | 1 2.5 (empty) (absent) (error)
------ + ------ ------ ------ ------ ------
1 | 2 3.5 1 1 (error)
2.5 | 3.5 5 2.5 2.5 (error)
(empty) | 1 2.5 (empty) (absent) (error)
(absent) | 1 2.5 (absent) (absent) (error)
(error) | (error) (error) (error) (error) (error)
(+) | 1 2.5 true (empty) (absent) (error)
------ + ------ ------ ------ ------ ------ ------
1 | 2 3.5 (error) 1 1 (error)
2.5 | 3.5 5 (error) 2.5 2.5 (error)
true | (error) (error) (error) (error) (error) (error)
(empty) | 1 2.5 (error) (empty) (absent) (error)
(absent) | 1 2.5 (error) (absent) (absent) (error)
(error) | (error) (error) (error) (error) (error) (error)
(&&) | true false 3 (empty) (absent) (error)
------ + ------ ------ ------ ------ ------ ------
true | true false (error) (error) (absent) (error)
false | false false false false false false
3 | (error) (error) (error) (error) (absent) (error)
(empty) | true false (error) (error) (absent) (error)
(absent) | true false (error) (absent) (absent) (error)
(error) | (error) (error) (error) (error) (error) (error)
(||) | true false 3 (empty) (absent) (error)
------ + ------ ------ ------ ------ ------ ------
true | true true true true true true
false | true false (error) (error) (absent) (error)
3 | (error) (error) (error) (error) (absent) (error)
(empty) | true false (error) (error) (absent) (error)
(absent) | true false (error) (absent) (absent) (error)
(error) | (error) (error) (error) (error) (error) (error)
</pre>

View file

@ -119,8 +119,16 @@ GENMD-EOF
## Arithmetic rules
If you're interested in a formal description of how empty and absent fields participate in arithmetic, here's a table for plus (other arithmetic/boolean/bitwise operators are similar):
If you're interested in a formal description of how empty and absent fields participate in arithmetic, here's a table for `+`, `&&, and `||`. Notes:
* Other arithmetic, boolean, and bitwise operators besides `&&` and `||` are similar to `+`.
* The `&&` and `||` obey _short-circuiting semantics_. That is:
* `false && X` is `false` and `X` is not evaluated even if it is a complex expression (maybe including function calls)
* `true || X` is `true` and `X` is not evaluated even if it is a complex expression (maybe including function calls)
* This means in particular that:
* `false && X` is false even if `X` is an error, a non-boolean type, etc.
* `true || X` is true even if `X` is an error, a non-boolean type, etc.
GENMD-RUN-COMMAND
mlr help type-arithmetic-info
mlr help type-arithmetic-info-extended
GENMD-EOF

View file

@ -157,6 +157,7 @@ MILLER(1) MILLER(1)
mlr help mlrrc
mlr help output-colorization
mlr help type-arithmetic-info
mlr help type-arithmetic-info-extended
Shorthands:
mlr -g = mlr help flags
mlr -l = mlr help list-verbs
@ -3627,4 +3628,4 @@ MILLER(1) MILLER(1)
2023-11-12 MILLER(1)
2023-12-02 MILLER(1)

View file

@ -2,12 +2,12 @@
.\" Title: mlr
.\" Author: [see the "AUTHOR" section]
.\" Generator: ./mkman.rb
.\" Date: 2023-11-11
.\" Date: 2023-12-02
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "MILLER" "1" "2023-11-11" "\ \&" "\ \&"
.TH "MILLER" "1" "2023-12-02" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Portability definitions
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -192,6 +192,7 @@ Other:
mlr help mlrrc
mlr help output-colorization
mlr help type-arithmetic-info
mlr help type-arithmetic-info-extended
Shorthands:
mlr -g = mlr help flags
mlr -l = mlr help list-verbs

View file

@ -32,7 +32,7 @@ func (root *RootNode) BuildBuiltinFunctionCallsiteNode(
if builtinFunctionInfo.hasMultipleArities { // E.g. "+" and "-"
return root.BuildMultipleArityFunctionCallsiteNode(astNode, builtinFunctionInfo)
} else if builtinFunctionInfo.zaryFunc != nil {
return root.BuildZaryFunctionCallsiteNode(astNode, builtinFunctionInfo)
return BuildZaryFunctionCallsiteNode(astNode, builtinFunctionInfo)
} else if builtinFunctionInfo.unaryFunc != nil {
return root.BuildUnaryFunctionCallsiteNode(astNode, builtinFunctionInfo)
} else if builtinFunctionInfo.unaryFuncWithContext != nil {
@ -89,7 +89,7 @@ type ZaryFunctionCallsiteNode struct {
zaryFunc bifs.ZaryFunc
}
func (root *RootNode) BuildZaryFunctionCallsiteNode(
func BuildZaryFunctionCallsiteNode(
astNode *dsl.ASTNode,
builtinFunctionInfo *BuiltinFunctionInfo,
) (IEvaluable, error) {
@ -228,25 +228,25 @@ func (root *RootNode) BuildBinaryFunctionCallsiteNode(
// Special short-circuiting cases
if builtinFunctionInfo.name == "&&" {
return root.BuildLogicalANDOperatorNode(
return BuildLogicalANDOperatorNode(
evaluable1,
evaluable2,
), nil
}
if builtinFunctionInfo.name == "||" {
return root.BuildLogicalOROperatorNode(
return BuildLogicalOROperatorNode(
evaluable1,
evaluable2,
), nil
}
if builtinFunctionInfo.name == "??" {
return root.BuildAbsentCoalesceOperatorNode(
return BuildAbsentCoalesceOperatorNode(
evaluable1,
evaluable2,
), nil
}
if builtinFunctionInfo.name == "???" {
return root.BuildEmptyCoalesceOperatorNode(
return BuildEmptyCoalesceOperatorNode(
evaluable1,
evaluable2,
), nil
@ -557,7 +557,7 @@ func (root *RootNode) BuildTernaryFunctionCallsiteNode(
// Special short-circuiting case
if builtinFunctionInfo.name == "?:" {
return root.BuildStandardTernaryOperatorNode(
return BuildStandardTernaryOperatorNode(
evaluable1,
evaluable2,
evaluable3,
@ -703,7 +703,7 @@ type LogicalANDOperatorNode struct {
a, b IEvaluable
}
func (root *RootNode) BuildLogicalANDOperatorNode(a, b IEvaluable) *LogicalANDOperatorNode {
func BuildLogicalANDOperatorNode(a, b IEvaluable) *LogicalANDOperatorNode {
return &LogicalANDOperatorNode{
a: a,
b: b,
@ -712,53 +712,74 @@ func (root *RootNode) BuildLogicalANDOperatorNode(a, b IEvaluable) *LogicalANDOp
// This is different from most of the evaluator functions in that it does
// short-circuiting: since is logical AND, the second argument is not evaluated
// if the first argument is false.
// if the first argument is false. Thus we cannot use disposition matrices.
//
// Disposition matrix:
//
// {
//a b ERROR ABSENT EMPTY STRING INT FLOAT BOOL
//ERROR : {ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR},
//ABSENT : {ERROR, absent, ERROR, ERROR, ERROR, ERROR, absent},
//EMPTY : {ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR},
//STRING : {ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR},
//INT : {ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR},
//FLOAT : {ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR},
//BOOL : {ERROR, absent, ERROR, ERROR, ERROR, ERROR, a&&b},
// }
//
// which without the all-error rows/columns reduces to
//
// {
//a b ABSENT BOOL
//ABSENT : {absent, absent},
//BOOL : {absent, a&&b},
// }
//
// So:
// * Evaluate a
// * If a is not absent or bool: return error
// * If a is absent: return absent
// * If a is false: return a
// * Now a is boolean true
// * Evaluate b
// * If b is not absent or bool: return error
// * If b is absent: return absent
// * Return a && b
// * evaluate a
// * if a is error:
// * return a
// * elif a is absent:
// * Evaluate b
// * if b is error: return error
// * elif b is empty or absent: return absent
// * elif b is empty or absent: return absent
// * else: return b
// * elif a is empty:
// * evaluate b
// * if b is error: return error
// * elif b is empty: return empty
// * elif b is absent: return absent
// * else: return b
// * else:
// * return the BIF (using its disposition matrix)
// mlr help type-arithmetic-info-extended | lumin -c red .error. | lumin -c blue .absent. | lumin -c green .empty.
func (node *LogicalANDOperatorNode) Evaluate(
state *runtime.State,
) *mlrval.Mlrval {
aout := node.a.Evaluate(state)
atype := aout.Type()
if !(atype == mlrval.MT_ABSENT || atype == mlrval.MT_BOOL) {
return mlrval.FromNotNamedTypeError("&&", aout, "absent or boolean")
if atype == mlrval.MT_ERROR {
return aout
}
if atype == mlrval.MT_ABSENT {
return mlrval.ABSENT
bout := node.b.Evaluate(state)
btype := bout.Type()
if btype == mlrval.MT_ERROR {
return bout
}
if btype == mlrval.MT_VOID || btype == mlrval.MT_ABSENT {
return mlrval.ABSENT
}
if btype != mlrval.MT_BOOL {
return mlrval.FromNotNamedTypeError("&&", bout, "absent or boolean")
}
return bout
}
if atype == mlrval.MT_VOID {
bout := node.b.Evaluate(state)
btype := bout.Type()
if btype == mlrval.MT_ERROR {
return bout
}
if btype == mlrval.MT_VOID {
return mlrval.FromNotNamedTypeError("&&", bout, "absent or boolean")
}
if btype == mlrval.MT_ABSENT {
return mlrval.ABSENT
}
if btype != mlrval.MT_BOOL {
return mlrval.FromNotNamedTypeError("&&", bout, "absent or boolean")
}
return bout
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if aout.IsFalse() {
// This means false && bogus type evaluates to true, which is sad but
// This means false && bogus type evaluates to false, which is sad but
// which we MUST do in order to not violate the short-circuiting
// property. We would have to evaluate b to know if it were error or
// not.
@ -782,7 +803,7 @@ type LogicalOROperatorNode struct {
a, b IEvaluable
}
func (root *RootNode) BuildLogicalOROperatorNode(a, b IEvaluable) *LogicalOROperatorNode {
func BuildLogicalOROperatorNode(a, b IEvaluable) *LogicalOROperatorNode {
return &LogicalOROperatorNode{
a: a,
b: b,
@ -792,19 +813,54 @@ func (root *RootNode) BuildLogicalOROperatorNode(a, b IEvaluable) *LogicalOROper
// This is different from most of the evaluator functions in that it does
// short-circuiting: since is logical OR, the second argument is not evaluated
// if the first argument is false.
//
// See the disposition-matrix discussion for LogicalANDOperator.
func (node *LogicalOROperatorNode) Evaluate(
state *runtime.State,
) *mlrval.Mlrval {
aout := node.a.Evaluate(state)
atype := aout.Type()
if !(atype == mlrval.MT_ABSENT || atype == mlrval.MT_BOOL) {
return mlrval.FromNotNamedTypeError("||", aout, "absent or boolean")
if atype == mlrval.MT_ERROR {
return aout
}
if atype == mlrval.MT_ABSENT {
return mlrval.ABSENT
bout := node.b.Evaluate(state)
btype := bout.Type()
if btype == mlrval.MT_ERROR {
return bout
}
if btype == mlrval.MT_VOID || btype == mlrval.MT_ABSENT {
return mlrval.ABSENT
}
if btype == mlrval.MT_VOID {
return mlrval.FromNotNamedTypeError("||", bout, "absent or boolean")
}
if btype != mlrval.MT_BOOL {
return mlrval.FromNotNamedTypeError("||", bout, "absent or boolean")
}
return bout
}
if atype == mlrval.MT_VOID {
bout := node.b.Evaluate(state)
btype := bout.Type()
if btype == mlrval.MT_ERROR {
return bout
}
if btype == mlrval.MT_VOID {
return mlrval.FromNotNamedTypeError("||", bout, "absent or boolean")
}
if btype == mlrval.MT_ABSENT {
return mlrval.ABSENT
}
if btype != mlrval.MT_BOOL {
return mlrval.FromNotNamedTypeError("||", bout, "absent or boolean")
}
return bout
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if aout.IsTrue() {
// This means true || bogus type evaluates to true, which is sad but
// which we MUST do in order to not violate the short-circuiting
@ -821,6 +877,7 @@ func (node *LogicalOROperatorNode) Evaluate(
if btype == mlrval.MT_ABSENT {
return mlrval.ABSENT
}
return bifs.BIF_logical_OR(aout, bout)
}
@ -829,7 +886,7 @@ func (node *LogicalOROperatorNode) Evaluate(
// current record has no field $foo.
type AbsentCoalesceOperatorNode struct{ a, b IEvaluable }
func (root *RootNode) BuildAbsentCoalesceOperatorNode(a, b IEvaluable) *AbsentCoalesceOperatorNode {
func BuildAbsentCoalesceOperatorNode(a, b IEvaluable) *AbsentCoalesceOperatorNode {
return &AbsentCoalesceOperatorNode{a: a, b: b}
}
@ -852,7 +909,7 @@ func (node *AbsentCoalesceOperatorNode) Evaluate(
// when the current record has no field $foo, or when $foo is empty..
type EmptyCoalesceOperatorNode struct{ a, b IEvaluable }
func (root *RootNode) BuildEmptyCoalesceOperatorNode(a, b IEvaluable) *EmptyCoalesceOperatorNode {
func BuildEmptyCoalesceOperatorNode(a, b IEvaluable) *EmptyCoalesceOperatorNode {
return &EmptyCoalesceOperatorNode{a: a, b: b}
}
@ -874,7 +931,7 @@ func (node *EmptyCoalesceOperatorNode) Evaluate(
// ================================================================
type StandardTernaryOperatorNode struct{ a, b, c IEvaluable }
func (root *RootNode) BuildStandardTernaryOperatorNode(a, b, c IEvaluable) *StandardTernaryOperatorNode {
func BuildStandardTernaryOperatorNode(a, b, c IEvaluable) *StandardTernaryOperatorNode {
return &StandardTernaryOperatorNode{a: a, b: b, c: c}
}
func (node *StandardTernaryOperatorNode) Evaluate(

View file

@ -375,6 +375,24 @@ func (node *NullLiteralNode) Evaluate(
return node.literal
}
// ----------------------------------------------------------------
// Used for testing purposes; not used by the main DSL.
type MlrvalLiteralNode struct {
literal *mlrval.Mlrval
}
func BuildMlrvalLiteralNode(literal *mlrval.Mlrval) *MlrvalLiteralNode {
return &MlrvalLiteralNode{
literal: literal.Copy(),
}
}
func (node *MlrvalLiteralNode) Evaluate(
state *runtime.State,
) *mlrval.Mlrval {
return node.literal
}
// ================================================================
func (root *RootNode) BuildContextVariableNode(astNode *dsl.ASTNode) (IEvaluable, error) {
lib.InternalCodingErrorIf(astNode.Token == nil)

View file

@ -16,6 +16,7 @@ import (
"github.com/johnkerl/miller/pkg/dsl/cst"
"github.com/johnkerl/miller/pkg/lib"
"github.com/johnkerl/miller/pkg/mlrval"
"github.com/johnkerl/miller/pkg/runtime"
"github.com/johnkerl/miller/pkg/transformers"
)
@ -114,6 +115,7 @@ func init() {
{name: "mlrrc", zaryHandlerFunc: helpMlrrc},
{name: "output-colorization", zaryHandlerFunc: helpOutputColorization},
{name: "type-arithmetic-info", zaryHandlerFunc: helpTypeArithmeticInfo},
{name: "type-arithmetic-info-extended", zaryHandlerFunc: helpTypeArithmeticInfoExtended},
},
},
{
@ -483,9 +485,18 @@ func helpOutputColorization() {
// ----------------------------------------------------------------
func helpTypeArithmeticInfo() {
helpTypeArithmeticInfoAux(false)
}
func helpTypeArithmeticInfoExtended() {
helpTypeArithmeticInfoAux(true)
}
func helpTypeArithmeticInfoAux(extended bool) {
mlrvals := []*mlrval.Mlrval{
mlrval.FromInt(1),
mlrval.FromFloat(2.5),
mlrval.FromBool(true),
mlrval.VOID,
mlrval.ABSENT,
mlrval.FromAnonymousError(),
@ -524,6 +535,70 @@ func helpTypeArithmeticInfo() {
fmt.Println()
}
if !extended {
return
}
mlrvals = []*mlrval.Mlrval{
mlrval.FromBool(true),
mlrval.FromBool(false),
mlrval.FromInt(3),
mlrval.VOID,
mlrval.ABSENT,
mlrval.FromAnonymousError(),
}
n = len(mlrvals)
state := runtime.NewEmptyState(cli.DefaultOptions(), false)
descs := []string{"(&&)", "(||)"}
for k, desc := range descs {
fmt.Println()
for i := -2; i < n; i++ {
if i == -2 {
fmt.Printf("%-10s |", desc)
} else if i == -1 {
fmt.Printf("%-10s +", "------")
} else if mlrvals[i].IsVoid() {
fmt.Printf("%-10s |", "(empty)")
} else {
fmt.Printf("%-10s |", mlrvals[i].String())
}
for j := 0; j < n; j++ {
if i == -2 {
if mlrvals[j].IsVoid() {
fmt.Printf("%-10s", "(empty)")
} else {
fmt.Printf(" %-10s", mlrvals[j].String())
}
} else if i == -1 {
fmt.Printf(" %-10s", "------")
} else {
inode := cst.BuildMlrvalLiteralNode(mlrvals[i])
jnode := cst.BuildMlrvalLiteralNode(mlrvals[j])
var binary_node cst.IEvaluable
if k == 0 {
binary_node = cst.BuildLogicalANDOperatorNode(inode, jnode)
} else {
binary_node = cst.BuildLogicalOROperatorNode(inode, jnode)
}
output := binary_node.Evaluate(state)
if output.IsVoid() {
fmt.Printf(" %-10s", "(empty)")
} else {
fmt.Printf(" %-10s", output.String())
}
}
}
fmt.Println()
}
}
}
// ----------------------------------------------------------------