diff --git a/internal/pkg/auxents/repl/entry.go b/internal/pkg/auxents/repl/entry.go index 9ebc8a4b5..a911767b8 100644 --- a/internal/pkg/auxents/repl/entry.go +++ b/internal/pkg/auxents/repl/entry.go @@ -88,6 +88,7 @@ func ReplMain(args []string) int { showPrompts := true astPrintMode := ASTPrintNone doWarnings := false + strictMode := false options := cli.DefaultOptions() for argi < argc /* variable increment: 1 or 2 depending on flag */ { @@ -117,6 +118,10 @@ func ReplMain(args []string) int { doWarnings = true argi++ + } else if args[argi] == "-z" { + strictMode = true + argi++ + } else if args[argi] == "--load" { if argc-argi < 2 { replUsage(replName, os.Stderr, 1) @@ -162,6 +167,7 @@ func ReplMain(args []string) int { showPrompts, astPrintMode, doWarnings, + strictMode, options, recordOutputFileName, recordOutputStream, diff --git a/internal/pkg/auxents/repl/session.go b/internal/pkg/auxents/repl/session.go index 4a73c3523..797d1f0fd 100644 --- a/internal/pkg/auxents/repl/session.go +++ b/internal/pkg/auxents/repl/session.go @@ -43,6 +43,7 @@ func NewRepl( showPrompts bool, astPrintMode ASTPrintMode, doWarnings bool, + strictMode bool, options *cli.TOptions, recordOutputFileName string, recordOutputStream *os.File, @@ -63,7 +64,7 @@ func NewRepl( // NR is 0, etc until/unless the user opens a file and reads records from it. context := types.NewContext() - runtimeState := runtime.NewEmptyState(options) + runtimeState := runtime.NewEmptyState(options, strictMode) runtimeState.Update(inrec, context) // The filter expression for the main Miller DSL is any non-assignment // statement like 'true' or '$x > 0.5' etc. For the REPL, we re-use this for @@ -78,7 +79,9 @@ func NewRepl( signal.Notify(sysToSignalHandlerChannel, os.Interrupt, syscall.SIGTERM) go controlCHandler(sysToSignalHandlerChannel, appSignalNotificationChannel) - cstRootNode := cst.NewEmptyRoot(&options.WriterOptions, cst.DSLInstanceTypeREPL).WithRedefinableUDFUDS() + cstRootNode := cst.NewEmptyRoot( + &options.WriterOptions, cst.DSLInstanceTypeREPL, + ).WithRedefinableUDFUDS().WithStrictMode(strictMode) // TODO diff --git a/internal/pkg/bifs/strings.go b/internal/pkg/bifs/strings.go index 46c168f7f..def2d9349 100644 --- a/internal/pkg/bifs/strings.go +++ b/internal/pkg/bifs/strings.go @@ -182,7 +182,7 @@ func BIF_strip(input1 *mlrval.Mlrval) *mlrval.Mlrval { // ---------------------------------------------------------------- func BIF_collapse_whitespace(input1 *mlrval.Mlrval) *mlrval.Mlrval { - return BIF_collapse_whitespace_regexp(input1, WhitespaceRegexp()) + return BIF_collapse_whitespace_regexp(input1, _whitespace_regexp) } func BIF_collapse_whitespace_regexp(input1 *mlrval.Mlrval, whitespaceRegexp *regexp.Regexp) *mlrval.Mlrval { @@ -193,9 +193,7 @@ func BIF_collapse_whitespace_regexp(input1 *mlrval.Mlrval, whitespaceRegexp *reg } } -func WhitespaceRegexp() *regexp.Regexp { - return regexp.MustCompile(`\s+`) -} +var _whitespace_regexp = regexp.MustCompile(`\s+`) // ================================================================ func BIF_toupper(input1 *mlrval.Mlrval) *mlrval.Mlrval { @@ -239,7 +237,7 @@ func BIF_capitalize(input1 *mlrval.Mlrval) *mlrval.Mlrval { func BIF_clean_whitespace(input1 *mlrval.Mlrval) *mlrval.Mlrval { return BIF_strip( BIF_collapse_whitespace_regexp( - input1, WhitespaceRegexp(), + input1, _whitespace_regexp, ), ) } diff --git a/internal/pkg/dsl/cst/builtin_functions.go b/internal/pkg/dsl/cst/builtin_functions.go index 922e4fdcc..eb8ec1912 100644 --- a/internal/pkg/dsl/cst/builtin_functions.go +++ b/internal/pkg/dsl/cst/builtin_functions.go @@ -498,6 +498,7 @@ func (root *RootNode) BuildDotCallsiteNode( func (node *DotCallsiteNode) Evaluate( state *runtime.State, ) *mlrval.Mlrval { + // For strict mode, absence should be detected on the node.evaluable1 evaluator. value1 := node.evaluable1.Evaluate(state) mapvalue1 := value1.GetMap() @@ -506,7 +507,7 @@ func (node *DotCallsiteNode) Evaluate( // Case 1: map.attribute as shorthand for map["attribute"] value2 := mapvalue1.Get(node.string2) if value2 == nil { - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "map access [" + node.string2 + "]") } else { return value2 } diff --git a/internal/pkg/dsl/cst/collections.go b/internal/pkg/dsl/cst/collections.go index 7cf4b2727..bf117b00e 100644 --- a/internal/pkg/dsl/cst/collections.go +++ b/internal/pkg/dsl/cst/collections.go @@ -52,7 +52,7 @@ func (node *ArrayLiteralNode) Evaluate( } // ---------------------------------------------------------------- -type CollectionIndexAccessNode struct { +type ArrayOrMapIndexAccessNode struct { baseEvaluable IEvaluable indexEvaluable IEvaluable } @@ -75,13 +75,13 @@ func (node *RootNode) BuildArrayOrMapIndexAccessNode( return nil, err } - return &CollectionIndexAccessNode{ + return &ArrayOrMapIndexAccessNode{ baseEvaluable: baseEvaluable, indexEvaluable: indexEvaluable, }, nil } -func (node *CollectionIndexAccessNode) Evaluate( +func (node *ArrayOrMapIndexAccessNode) Evaluate( state *runtime.State, ) *mlrval.Mlrval { baseMlrval := node.baseEvaluable.Evaluate(state) @@ -109,6 +109,7 @@ func (node *CollectionIndexAccessNode) Evaluate( return mlrval.FromString(string(runes[zindex])) } else if baseMlrval.IsAbsent() { + // For strict mode, absence should be detected on the baseMlrval and indexMlrval evaluators. return mlrval.ABSENT } else { return mlrval.ERROR @@ -162,6 +163,7 @@ func (node *ArraySliceAccessNode) Evaluate( upperIndexMlrval := node.upperIndexEvaluable.Evaluate(state) if baseMlrval.IsAbsent() { + // For strict mode, absence should be detected on the baseMlrval and indexMlrval evaluators. return mlrval.ABSENT } if baseMlrval.IsString() { @@ -229,7 +231,7 @@ func (node *PositionalFieldNameNode) Evaluate( ) *mlrval.Mlrval { indexMlrval := node.indexEvaluable.Evaluate(state) if indexMlrval.IsAbsent() { - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "$[[(absent)]]") } index, ok := indexMlrval.GetIntValue() @@ -239,7 +241,7 @@ func (node *PositionalFieldNameNode) Evaluate( name, ok := state.Inrec.GetNameAtPositionalIndex(index) if !ok { - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "$[["+indexMlrval.String()+"]]") } return mlrval.FromString(name) @@ -275,7 +277,7 @@ func (node *PositionalFieldValueNode) Evaluate( ) *mlrval.Mlrval { indexMlrval := node.indexEvaluable.Evaluate(state) if indexMlrval.IsAbsent() { - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "$[[[(absent)]]]") } index, ok := indexMlrval.GetIntValue() @@ -285,7 +287,7 @@ func (node *PositionalFieldValueNode) Evaluate( retval := state.Inrec.GetWithPositionalIndex(index) if retval == nil { - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "$[[["+indexMlrval.String()+"]]]") } return retval @@ -330,6 +332,7 @@ func (node *ArrayOrMapPositionalNameAccessNode) Evaluate( indexMlrval := node.indexEvaluable.Evaluate(state) if indexMlrval.IsAbsent() { + // For strict mode, absence should be detected on the baseMlrval and indexMlrval evaluators. return mlrval.ABSENT } @@ -356,6 +359,7 @@ func (node *ArrayOrMapPositionalNameAccessNode) Evaluate( } } else if baseMlrval.IsAbsent() { + // For strict mode, absence should be detected on the baseMlrval and indexMlrval evaluators. return mlrval.ABSENT } else { @@ -402,6 +406,7 @@ func (node *ArrayOrMapPositionalValueAccessNode) Evaluate( indexMlrval := node.indexEvaluable.Evaluate(state) if indexMlrval.IsAbsent() { + // For strict mode, absence should be detected on the baseMlrval and indexMlrval evaluators. return mlrval.ABSENT } @@ -418,12 +423,14 @@ func (node *ArrayOrMapPositionalValueAccessNode) Evaluate( } else if baseMlrval.IsMap() { value := baseMlrval.GetMap().GetWithPositionalIndex(index) if value == nil { + // For strict mode, absence should be detected on the baseMlrval and indexMlrval evaluators. return mlrval.ABSENT } return value } else if baseMlrval.IsAbsent() { + // For strict mode, absence should be detected on the baseMlrval and indexMlrval evaluators. return mlrval.ABSENT } else { diff --git a/internal/pkg/dsl/cst/env.go b/internal/pkg/dsl/cst/env.go index d53fb80d9..24e2647bb 100644 --- a/internal/pkg/dsl/cst/env.go +++ b/internal/pkg/dsl/cst/env.go @@ -38,7 +38,7 @@ func (node *EnvironmentVariableNode) Evaluate( ) *mlrval.Mlrval { name := node.nameEvaluable.Evaluate(state) if name.IsAbsent() { - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "ENV[(absent)]") } if !name.IsString() { return mlrval.ERROR diff --git a/internal/pkg/dsl/cst/evaluable.go b/internal/pkg/dsl/cst/evaluable.go index 203ee438b..82e1e063e 100644 --- a/internal/pkg/dsl/cst/evaluable.go +++ b/internal/pkg/dsl/cst/evaluable.go @@ -109,7 +109,7 @@ func (node *IndirectFieldValueNode) Evaluate( ) *mlrval.Mlrval { // TODO: err fieldName := node.fieldNameEvaluable.Evaluate(state) if fieldName.IsAbsent() { - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "$[(absent)]") } // For normal DSL use the CST validator will prohibit this from being @@ -118,7 +118,7 @@ func (node *IndirectFieldValueNode) Evaluate( // print inrec attributes. Also, a UDF/UDS invoked from begin/end could try // to access the inrec, and that would get past the validator. if state.Inrec == nil { - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "$*") } value, err := state.Inrec.GetWithMlrvalIndex(fieldName) @@ -129,7 +129,7 @@ func (node *IndirectFieldValueNode) Evaluate( os.Exit(1) } if value == nil { - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "$[" + fieldName.String() + "]") } return value } @@ -159,12 +159,12 @@ func (node *IndirectOosvarValueNode) Evaluate( ) *mlrval.Mlrval { // TODO: err oosvarName := node.oosvarNameEvaluable.Evaluate(state) if oosvarName.IsAbsent() { - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "@[(absent)]") } value := state.Oosvars.Get(oosvarName.String()) if value == nil { - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "@[" + oosvarName.String() + "]") } return value diff --git a/internal/pkg/dsl/cst/leaves.go b/internal/pkg/dsl/cst/leaves.go index e8f158b37..c5aeb56b0 100644 --- a/internal/pkg/dsl/cst/leaves.go +++ b/internal/pkg/dsl/cst/leaves.go @@ -102,11 +102,11 @@ func (node *DirectFieldRvalueNode) Evaluate( // print inrec attributes. Also, a UDF/UDS invoked from begin/end could try // to access the inrec, and that would get past the validator. if state.Inrec == nil { - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "$*") } value := state.Inrec.Get(node.fieldName) if value == nil { - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "$"+node.fieldName) } else { return value } @@ -128,7 +128,7 @@ func (node *FullSrecRvalueNode) Evaluate( // print inrec attributes. Also, a UDF/UDS invoked from begin/end could try // to access the inrec, and that would get past the validator. if state.Inrec == nil { - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "$*") } else { return mlrval.FromMap(state.Inrec) } @@ -149,7 +149,7 @@ func (node *DirectOosvarRvalueNode) Evaluate( ) *mlrval.Mlrval { value := state.Oosvars.Get(node.variableName) if value == nil { - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "@"+node.variableName) } else { return value } @@ -206,7 +206,7 @@ func (node *LocalVariableNode) Evaluate( // prerequisite since UDFs and BIFs are managed in quite different // structures. - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "local variable "+node.stackVariable.GetName()) } // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/lvalues.go b/internal/pkg/dsl/cst/lvalues.go index f6cb18171..826bb0d40 100644 --- a/internal/pkg/dsl/cst/lvalues.go +++ b/internal/pkg/dsl/cst/lvalues.go @@ -779,10 +779,19 @@ type LocalVariableLvalueNode struct { func (root *RootNode) BuildLocalVariableLvalueNode(astNode *dsl.ASTNode) (IAssignable, error) { lib.InternalCodingErrorIf(astNode.Type != dsl.NodeTypeLocalVariable) + // TODO require type mask in strict mode + variableName := string(astNode.Token.Lit) typeName := "any" defineTypedAtScope := false - if astNode.Children != nil { // typed, like 'num x = 3' + if astNode.Children == nil { // untyped, like 'x = 3' + if root.strictMode { + return nil, fmt.Errorf( + "mlr: need typedecl such as \"var\", \"str\", \"num\", etc. for variable \"%s\" in strict mode", + variableName, + ) + } + } else { // typed, like 'num x = 3' typeNode := astNode.Children[0] lib.InternalCodingErrorIf(typeNode.Type != dsl.NodeTypeTypedecl) typeName = string(typeNode.Token.Lit) diff --git a/internal/pkg/dsl/cst/root.go b/internal/pkg/dsl/cst/root.go index eda36edc6..9a48f2a48 100644 --- a/internal/pkg/dsl/cst/root.go +++ b/internal/pkg/dsl/cst/root.go @@ -52,6 +52,12 @@ func (root *RootNode) WithRedefinableUDFUDS() *RootNode { return root } +// WithStrictMode allows for runtime handling of absent-reads and untyped assignments. +func (root *RootNode) WithStrictMode(strictMode bool) *RootNode { + root.strictMode = strictMode + return root +} + // ---------------------------------------------------------------- // ASTBuildVisitorFunc is a callback, used by RootNode's Build method, which diff --git a/internal/pkg/dsl/cst/types.go b/internal/pkg/dsl/cst/types.go index 39b1d121b..11464f90d 100644 --- a/internal/pkg/dsl/cst/types.go +++ b/internal/pkg/dsl/cst/types.go @@ -49,6 +49,7 @@ type RootNode struct { outputHandlerManagers *list.List recordWriterOptions *cli.TWriterOptions dslInstanceType DSLInstanceType // put, filter, repl + strictMode bool } // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/udf.go b/internal/pkg/dsl/cst/udf.go index 40b33b3e5..aafc2bd1b 100644 --- a/internal/pkg/dsl/cst/udf.go +++ b/internal/pkg/dsl/cst/udf.go @@ -265,7 +265,10 @@ func (site *UDFCallsite) EvaluateWithArguments( fmt.Fprint(os.Stderr, err) os.Exit(1) } - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck( + state.StrictMode, + "function "+udf.signature.funcOrSubrName+" implicit return value", + ) } // TODO: should be an internal coding error. This would be break or @@ -277,7 +280,10 @@ func (site *UDFCallsite) EvaluateWithArguments( fmt.Fprint(os.Stderr, err) os.Exit(1) } - return mlrval.ABSENT + return mlrval.ABSENT.StrictModeCheck( + state.StrictMode, + "function "+udf.signature.funcOrSubrName+" abnormal exit", + ) } // Definitely a Miller internal coding error if the user put 'return x' in @@ -290,6 +296,12 @@ func (site *UDFCallsite) EvaluateWithArguments( fmt.Fprint(os.Stderr, err) os.Exit(1) } + + blockExitPayload.blockReturnValue.StrictModeCheck( + state.StrictMode, + "function "+udf.signature.funcOrSubrName+" return value", + ) + return blockExitPayload.blockReturnValue.Copy() } diff --git a/internal/pkg/mlrval/mlrval_get.go b/internal/pkg/mlrval/mlrval_get.go index 43ea67a5a..cdf9775f9 100644 --- a/internal/pkg/mlrval/mlrval_get.go +++ b/internal/pkg/mlrval/mlrval_get.go @@ -146,3 +146,11 @@ func (mv *Mlrval) GetNumericToFloatValueOrDie() (floatValue float64) { func (mv *Mlrval) AssertNumeric() { _ = mv.GetNumericToFloatValueOrDie() } + +func (mv *Mlrval) StrictModeCheck(strictMode bool, description string) *Mlrval{ + if strictMode && mv.IsAbsent() { + fmt.Fprintf(os.Stderr, "mlr: %s is absent and strict mode was requested.\n", description) + os.Exit(1) + } + return mv +} diff --git a/internal/pkg/runtime/state.go b/internal/pkg/runtime/state.go index 7df865705..8c3a6caca 100644 --- a/internal/pkg/runtime/state.go +++ b/internal/pkg/runtime/state.go @@ -27,9 +27,12 @@ type State struct { // '$x =~ "(..)_(...)"', and interpolated via things like '$y = "\2:\1"'. RegexCaptures []string Options *cli.TOptions + + // StrictMode allows for runtime handling of absent-reads and untyped assignments. + StrictMode bool } -func NewEmptyState(options *cli.TOptions) *State { +func NewEmptyState(options *cli.TOptions, strictMode bool) *State { oosvars := mlrval.NewMlrmap() return &State{ Inrec: nil, @@ -43,6 +46,8 @@ func NewEmptyState(options *cli.TOptions) *State { // See lib.MakeEmptyRegexCaptures for context. RegexCaptures: lib.MakeEmptyRegexCaptures(), Options: options, + + StrictMode: strictMode, } } diff --git a/internal/pkg/transformers/put_or_filter.go b/internal/pkg/transformers/put_or_filter.go index 00adbd6e4..d08ab6cf2 100644 --- a/internal/pkg/transformers/put_or_filter.go +++ b/internal/pkg/transformers/put_or_filter.go @@ -206,6 +206,7 @@ func transformerPutOrFilterParseCLI( exitAfterParse := false doWarnings := false warningsAreFatal := false + strictMode := false invertFilter := false suppressOutputRecord := false presets := make([]string, 0) @@ -291,6 +292,11 @@ func transformerPutOrFilterParseCLI( } else if opt == "-w" { doWarnings = true warningsAreFatal = false + } else if opt == "-z" { + // TODO: perhaps doWarnings and warningsAreFatal as well. + // But first I want to see what can be caught at runtime + // without static analysis. + strictMode = true } else if opt == "-W" { doWarnings = true warningsAreFatal = true @@ -355,6 +361,7 @@ func transformerPutOrFilterParseCLI( exitAfterParse, doWarnings, warningsAreFatal, + strictMode, invertFilter, suppressOutputRecord, options, @@ -388,12 +395,13 @@ func NewTransformerPut( exitAfterParse bool, doWarnings bool, warningsAreFatal bool, + strictMode bool, invertFilter bool, suppressOutputRecord bool, options *cli.TOptions, ) (*TransformerPut, error) { - cstRootNode := cst.NewEmptyRoot(&options.WriterOptions, dslInstanceType) + cstRootNode := cst.NewEmptyRoot(&options.WriterOptions, dslInstanceType).WithStrictMode(strictMode) err := cstRootNode.Build( dslStrings, @@ -434,7 +442,7 @@ func NewTransformerPut( return nil, err } - runtimeState := runtime.NewEmptyState(options) + runtimeState := runtime.NewEmptyState(options, strictMode) // E.g. // mlr put -s sum=0 diff --git a/todo.txt b/todo.txt index 8dcf82f99..24f2b76cc 100644 --- a/todo.txt +++ b/todo.txt @@ -25,6 +25,40 @@ RELEASES ================================================================ FEATURES +---------------------------------------------------------------- +STRICT MODE + +i theme is handling of 'absent' + +? what about handling of 'error' ? + +* improve wording: + mlr: couldn't assign variable int function return value from value absent (absent) + +* need $?x and @?x in the grammar & CST + +* flags: + o mlr -z and mlr put -z + o note put has -w (warn) and -W (fatal) + - then strict mode includes -W? + +* tests: + mlr --csv --from $exv put -z 'x = 1' + mlr --csv --from $exv put -z 'var x = a' + mlr --csv --from $exv put -z 'var x = $nonesuch' + mlr --csv --from $exv put -z 'var x = $["asdf"]' + mlr --csv --from $exv put -z 'var x = $[nonesuch]' + mlr --csv --from $exv put -z 'var x = $[[999]]' + mlr --csv --from $exv put -z 'var x = $[[[999]]]' + mlr --csv --from $exv put -z 'begin { var m = $* }' + mlr --csv --from $exv put -z 'var x = @nonesuch' + mlr --csv --from $exv put -z 'var x = @["nonesuch"]' + mlr --csv --from $exv put -z 'func f(): int {}; $x = f()' + mlr --csv --from $exv put -z 'func f() {}; $x = f()' + mlr --csv --from $exv put -z 'func f() {return nonesuch}; $x = f()' + mlr --csv --from $exv put -z '$env = ENV[nonesuch]' + mlr --csv --from $exv put -z '$env = ENV["nonesuch"]' + ---------------------------------------------------------------- EXTENDED FIELD ACCESSORS @@ -94,16 +128,6 @@ inference: o webdocs as in #933 description * for data files: --symbol-true yes --symbol-false off --symbol-infinity inf --symbol-not-available N/A ----------------------------------------------------------------- -strict-mode ideas -* localvar: - o LHS: just require typedecl (even just var) - o RHS: like put -w, but with turning warnings into errors -* oosvar: - o abend unless @?x -- ? -* srec: - o abend unless $?x -- ? - ---------------------------------------------------------------- ! sysdate, sysdate_local; datediff ...