From 0ed9f11fbc60e2f2e52e5903786083080cb3f0f5 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Wed, 3 Feb 2021 03:52:02 +0000 Subject: [PATCH] separate ingest-AST and resolve passes for the CST-builder --- go/.gitignore | 1 + .../cases/case-dsl-multipart-scripts.sh | 1 - .../case-dsl-multipart-scripts.sh.out | 13 +-- go/reg-test/input/filter-script-piece-1 | 2 +- go/reg-test/input/filter-script-piece-2 | 2 +- go/src/miller/auxents/repl.go | 6 +- go/src/miller/dsl/cst/root.go | 13 ++- go/src/miller/transformers/put_or_filter.go | 89 ++++++++++--------- go/todo.txt | 4 + 9 files changed, 73 insertions(+), 58 deletions(-) diff --git a/go/.gitignore b/go/.gitignore index 1313bac73..7aa2b4dad 100644 --- a/go/.gitignore +++ b/go/.gitignore @@ -1,6 +1,7 @@ big big.json bin/ +pkg/ *.ast r s diff --git a/go/reg-test/cases/case-dsl-multipart-scripts.sh b/go/reg-test/cases/case-dsl-multipart-scripts.sh index 795dc6ee0..5f71bfc71 100644 --- a/go/reg-test/cases/case-dsl-multipart-scripts.sh +++ b/go/reg-test/cases/case-dsl-multipart-scripts.sh @@ -8,5 +8,4 @@ run_mlr --opprint --from $indir/abixy filter -e 'NR == 7' run_mlr --opprint --from $indir/abixy put -e 'print "PRE";' -f $indir/put-script-piece-1 -f $indir/put-script-piece-2 -f $indir/put-script-piece-3 -e 'print "POST"' -run_mlr --opprint --from $indir/abixy filter -f $indir/filter-script-piece-1 run_mlr --opprint --from $indir/abixy filter -f $indir/filter-script-piece-1 -f $indir/filter-script-piece-2 diff --git a/go/reg-test/expected/case-dsl-multipart-scripts.sh.out b/go/reg-test/expected/case-dsl-multipart-scripts.sh.out index de3b9ef45..6cc6faa37 100644 --- a/go/reg-test/expected/case-dsl-multipart-scripts.sh.out +++ b/go/reg-test/expected/case-dsl-multipart-scripts.sh.out @@ -88,16 +88,11 @@ zee wye 8 0.5985540091064224 0.976181385699006 9.574735394805428 8 YES hat wye 9 0.03144187646093577 0.7495507603507059 9.780992636811641 9 YES pan wye 10 0.5026260055412137 0.9526183602969864 11.455244365838201 10 YES -mlr --opprint --from ./reg-test/input/abixy filter -f ./reg-test/input/filter-script-piece-1 -a b i x y -eks pan 2 0.7586799647899636 0.5221511083334797 -wye pan 5 0.5732889198020006 0.8636244699032729 -zee pan 6 0.5271261600918548 0.49322128674835697 -eks zee 7 0.6117840605678454 0.1878849191181694 -zee wye 8 0.5985540091064224 0.976181385699006 -pan wye 10 0.5026260055412137 0.9526183602969864 - mlr --opprint --from ./reg-test/input/abixy filter -f ./reg-test/input/filter-script-piece-1 -f ./reg-test/input/filter-script-piece-2 a b i x y eks pan 2 0.7586799647899636 0.5221511083334797 +wye pan 5 0.5732889198020006 0.8636244699032729 +zee pan 6 0.5271261600918548 0.49322128674835697 +eks zee 7 0.6117840605678454 0.1878849191181694 +zee wye 8 0.5985540091064224 0.976181385699006 diff --git a/go/reg-test/input/filter-script-piece-1 b/go/reg-test/input/filter-script-piece-1 index 55a59de8e..58e4e5ebe 100644 --- a/go/reg-test/input/filter-script-piece-1 +++ b/go/reg-test/input/filter-script-piece-1 @@ -1 +1 @@ -$x > 0.5 +f($x) > 0.5 diff --git a/go/reg-test/input/filter-script-piece-2 b/go/reg-test/input/filter-script-piece-2 index edb1568dc..1b2f7a773 100644 --- a/go/reg-test/input/filter-script-piece-2 +++ b/go/reg-test/input/filter-script-piece-2 @@ -1 +1 @@ -&& $i < 5 +func f(x) { return $x * 0.99 } diff --git a/go/src/miller/auxents/repl.go b/go/src/miller/auxents/repl.go index a001d57b9..a4be53a35 100644 --- a/go/src/miller/auxents/repl.go +++ b/go/src/miller/auxents/repl.go @@ -460,7 +460,11 @@ func (this *Repl) HandleDSLString(dslString string) error { } this.cstRootNode.ResetForREPL() - err = this.cstRootNode.Build(astRootNode, this.isFilter) + err = this.cstRootNode.IngestAST(astRootNode, this.isFilter) + if err != nil { + return err + } + err = this.cstRootNode.Resolve() if err != nil { return err } diff --git a/go/src/miller/dsl/cst/root.go b/go/src/miller/dsl/cst/root.go index 5ac816ece..69a916d21 100644 --- a/go/src/miller/dsl/cst/root.go +++ b/go/src/miller/dsl/cst/root.go @@ -46,7 +46,11 @@ func (this *RootNode) WithRedefinableUDFS() *RootNode { } // ---------------------------------------------------------------- -func (this *RootNode) Build( +// If the user has multiple put -f / put -e pieces, we can AST-parse each +// separately and build them. However we cannot resolve UDF/UDS references +// until after they're all ingested -- e.g. first piece calls a function which +// the second defines, or mutual recursion across pieces, etc. +func (this *RootNode) IngestAST( ast *dsl.AST, isFilter bool, // false for 'mlr put', true for 'mlr filter' ) error { @@ -67,7 +71,12 @@ func (this *RootNode) Build( return err } - err = this.resolveFunctionCallsites() + return nil +} + +func (this *RootNode) Resolve() error { + + err := this.resolveFunctionCallsites() if err != nil { return err } diff --git a/go/src/miller/transformers/put_or_filter.go b/go/src/miller/transformers/put_or_filter.go index 9c9cefec7..49be1176e 100644 --- a/go/src/miller/transformers/put_or_filter.go +++ b/go/src/miller/transformers/put_or_filter.go @@ -50,13 +50,12 @@ func transformerPutOrFilterParseCLI( verb := args[argi] argi++ - dslString := "" + var dslStrings []string = make([]string, 0) verbose := false printASTOnly := false printASTSingleLine := false invertFilter := false suppressOutputRecord := false - needExpressionArg := true presets := make([]string, 0) // TODO: make sure this is a full nested-struct copy. @@ -89,19 +88,12 @@ func transformerPutOrFilterParseCLI( fmt.Println(err) return nil } - if dslString != "" { - dslString += "\n" - } - dslString += string(data) - - needExpressionArg = false + dslString := string(data) + dslStrings = append(dslStrings, dslString) } else if opt == "-e" { - if dslString != "" { - dslString += ";\n" - } - dslString += cliutil.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) - needExpressionArg = false + dslString := cliutil.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) + dslStrings = append(dslStrings, dslString) } else if opt == "-s" { // E.g. @@ -155,33 +147,36 @@ func transformerPutOrFilterParseCLI( // If they've used either of 'mlr put -f {filename}' or 'mlr put -e // {expression}' then that specifies their DSL expression. But if they've // done neither then we expect 'mlr put {expression}'. - if needExpressionArg { + if len(dslStrings) == 0 { // Get the DSL string from the command line, after the flags if argi >= argc { transformerPutUsage(os.Stderr, true, 1) } - dslString = args[argi] + dslString := args[argi] + dslStrings = append(dslStrings, dslString) argi++ } if printASTOnly { - astRootNode, err := BuildASTFromStringWithMessage(dslString, false) - if err == nil { - if printASTSingleLine { - astRootNode.PrintParexOneLine() + for _, dslString := range dslStrings { + astRootNode, err := BuildASTFromStringWithMessage(dslString, false) + if err == nil { + if printASTSingleLine { + astRootNode.PrintParexOneLine() + } else { + astRootNode.PrintParex() + } + os.Exit(0) } else { - astRootNode.PrintParex() + // error message already printed out + os.Exit(1) } - os.Exit(0) - } else { - // error message already printed out - os.Exit(1) } } isFilter := verb == "filter" transformer, err := NewTransformerPut( - dslString, + dslStrings, isFilter, presets, verbose, @@ -273,7 +268,6 @@ semicolons to separate expressions.) // ---------------------------------------------------------------- type TransformerPut struct { - astRootNode *dsl.AST cstRootNode *cst.RootNode runtimeState *runtime.State callCount int @@ -283,7 +277,7 @@ type TransformerPut struct { } func NewTransformerPut( - dslString string, + dslStrings []string, isFilter bool, presets []string, verbose bool, @@ -292,26 +286,36 @@ func NewTransformerPut( recordWriterOptions *cliutil.TWriterOptions, ) (*TransformerPut, error) { - astRootNode, err := BuildASTFromStringWithMessage(dslString, verbose) - if err != nil { - // Error message already printed out - return nil, err - } - - if verbose { - fmt.Println("DSL EXPRESSION:") - fmt.Println(dslString) - fmt.Println("RAW AST:") - astRootNode.Print() - fmt.Println() - } - cstRootNode := cst.NewEmptyRoot(recordWriterOptions) - err = cstRootNode.Build(astRootNode, isFilter) + + for _, dslString := range dslStrings { + astRootNode, err := BuildASTFromStringWithMessage(dslString, verbose) + if err != nil { + // Error message already printed out + return nil, err + } + + if verbose { + fmt.Println("DSL EXPRESSION:") + fmt.Println(dslString) + fmt.Println("RAW AST:") + astRootNode.Print() + fmt.Println() + } + + err = cstRootNode.IngestAST(astRootNode, isFilter) + if err != nil { + fmt.Fprintln(os.Stderr, err) + return nil, err + } + } + + err := cstRootNode.Resolve() if err != nil { fmt.Fprintln(os.Stderr, err) return nil, err } + runtimeState := runtime.NewEmptyState() // E.g. @@ -337,7 +341,6 @@ func NewTransformerPut( } return &TransformerPut{ - astRootNode: astRootNode, cstRootNode: cstRootNode, runtimeState: runtimeState, callCount: 0, diff --git a/go/todo.txt b/go/todo.txt index 86f240048..dcfc2b289 100644 --- a/go/todo.txt +++ b/go/todo.txt @@ -19,6 +19,8 @@ TOP OF LIST: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * repl foo: + * mutual recursion fg.mlr / gf.mlr -- needs a single resolver at the end ... + - maybe separate ingest-ASTs and resolver passes d doc MLR_REPL_PS1/MLR_REPL_PS2 env vars d :load {source files} d :begin and :main and :end ... @@ -514,3 +516,5 @@ i https://en.wikipedia.org/wiki/Delimiter#Delimiter_collision // Unlike other transformers, we can't use flagSet here. The syntax of 'mlr put' // and 'mlr filter' is they need to be able to take -f and/or -e more than // once, and Go flags can't handle that. + +* doc re multi-load: can't '$x >' and '3' in separate -f anymore. no worries.