separate ingest-AST and resolve passes for the CST-builder

This commit is contained in:
John Kerl 2021-02-03 03:52:02 +00:00
parent b956198f72
commit 0ed9f11fbc
9 changed files with 73 additions and 58 deletions

1
go/.gitignore vendored
View file

@ -1,6 +1,7 @@
big
big.json
bin/
pkg/
*.ast
r
s

View file

@ -8,5 +8,4 @@ run_mlr --opprint --from $indir/abixy filter -e 'NR == 7'
run_mlr --opprint --from $indir/abixy put -e 'print "PRE";' -f $indir/put-script-piece-1 -f $indir/put-script-piece-2 -f $indir/put-script-piece-3 -e 'print "POST"'
run_mlr --opprint --from $indir/abixy filter -f $indir/filter-script-piece-1
run_mlr --opprint --from $indir/abixy filter -f $indir/filter-script-piece-1 -f $indir/filter-script-piece-2

View file

@ -88,16 +88,11 @@ zee wye 8 0.5985540091064224 0.976181385699006 9.574735394805428 8 YES
hat wye 9 0.03144187646093577 0.7495507603507059 9.780992636811641 9 YES
pan wye 10 0.5026260055412137 0.9526183602969864 11.455244365838201 10 YES
mlr --opprint --from ./reg-test/input/abixy filter -f ./reg-test/input/filter-script-piece-1
a b i x y
eks pan 2 0.7586799647899636 0.5221511083334797
wye pan 5 0.5732889198020006 0.8636244699032729
zee pan 6 0.5271261600918548 0.49322128674835697
eks zee 7 0.6117840605678454 0.1878849191181694
zee wye 8 0.5985540091064224 0.976181385699006
pan wye 10 0.5026260055412137 0.9526183602969864
mlr --opprint --from ./reg-test/input/abixy filter -f ./reg-test/input/filter-script-piece-1 -f ./reg-test/input/filter-script-piece-2
a b i x y
eks pan 2 0.7586799647899636 0.5221511083334797
wye pan 5 0.5732889198020006 0.8636244699032729
zee pan 6 0.5271261600918548 0.49322128674835697
eks zee 7 0.6117840605678454 0.1878849191181694
zee wye 8 0.5985540091064224 0.976181385699006

View file

@ -1 +1 @@
$x > 0.5
f($x) > 0.5

View file

@ -1 +1 @@
&& $i < 5
func f(x) { return $x * 0.99 }

View file

@ -460,7 +460,11 @@ func (this *Repl) HandleDSLString(dslString string) error {
}
this.cstRootNode.ResetForREPL()
err = this.cstRootNode.Build(astRootNode, this.isFilter)
err = this.cstRootNode.IngestAST(astRootNode, this.isFilter)
if err != nil {
return err
}
err = this.cstRootNode.Resolve()
if err != nil {
return err
}

View file

@ -46,7 +46,11 @@ func (this *RootNode) WithRedefinableUDFS() *RootNode {
}
// ----------------------------------------------------------------
func (this *RootNode) Build(
// If the user has multiple put -f / put -e pieces, we can AST-parse each
// separately and build them. However we cannot resolve UDF/UDS references
// until after they're all ingested -- e.g. first piece calls a function which
// the second defines, or mutual recursion across pieces, etc.
func (this *RootNode) IngestAST(
ast *dsl.AST,
isFilter bool, // false for 'mlr put', true for 'mlr filter'
) error {
@ -67,7 +71,12 @@ func (this *RootNode) Build(
return err
}
err = this.resolveFunctionCallsites()
return nil
}
func (this *RootNode) Resolve() error {
err := this.resolveFunctionCallsites()
if err != nil {
return err
}

View file

@ -50,13 +50,12 @@ func transformerPutOrFilterParseCLI(
verb := args[argi]
argi++
dslString := ""
var dslStrings []string = make([]string, 0)
verbose := false
printASTOnly := false
printASTSingleLine := false
invertFilter := false
suppressOutputRecord := false
needExpressionArg := true
presets := make([]string, 0)
// TODO: make sure this is a full nested-struct copy.
@ -89,19 +88,12 @@ func transformerPutOrFilterParseCLI(
fmt.Println(err)
return nil
}
if dslString != "" {
dslString += "\n"
}
dslString += string(data)
needExpressionArg = false
dslString := string(data)
dslStrings = append(dslStrings, dslString)
} else if opt == "-e" {
if dslString != "" {
dslString += ";\n"
}
dslString += cliutil.VerbGetStringArgOrDie(verb, opt, args, &argi, argc)
needExpressionArg = false
dslString := cliutil.VerbGetStringArgOrDie(verb, opt, args, &argi, argc)
dslStrings = append(dslStrings, dslString)
} else if opt == "-s" {
// E.g.
@ -155,33 +147,36 @@ func transformerPutOrFilterParseCLI(
// If they've used either of 'mlr put -f {filename}' or 'mlr put -e
// {expression}' then that specifies their DSL expression. But if they've
// done neither then we expect 'mlr put {expression}'.
if needExpressionArg {
if len(dslStrings) == 0 {
// Get the DSL string from the command line, after the flags
if argi >= argc {
transformerPutUsage(os.Stderr, true, 1)
}
dslString = args[argi]
dslString := args[argi]
dslStrings = append(dslStrings, dslString)
argi++
}
if printASTOnly {
astRootNode, err := BuildASTFromStringWithMessage(dslString, false)
if err == nil {
if printASTSingleLine {
astRootNode.PrintParexOneLine()
for _, dslString := range dslStrings {
astRootNode, err := BuildASTFromStringWithMessage(dslString, false)
if err == nil {
if printASTSingleLine {
astRootNode.PrintParexOneLine()
} else {
astRootNode.PrintParex()
}
os.Exit(0)
} else {
astRootNode.PrintParex()
// error message already printed out
os.Exit(1)
}
os.Exit(0)
} else {
// error message already printed out
os.Exit(1)
}
}
isFilter := verb == "filter"
transformer, err := NewTransformerPut(
dslString,
dslStrings,
isFilter,
presets,
verbose,
@ -273,7 +268,6 @@ semicolons to separate expressions.)
// ----------------------------------------------------------------
type TransformerPut struct {
astRootNode *dsl.AST
cstRootNode *cst.RootNode
runtimeState *runtime.State
callCount int
@ -283,7 +277,7 @@ type TransformerPut struct {
}
func NewTransformerPut(
dslString string,
dslStrings []string,
isFilter bool,
presets []string,
verbose bool,
@ -292,26 +286,36 @@ func NewTransformerPut(
recordWriterOptions *cliutil.TWriterOptions,
) (*TransformerPut, error) {
astRootNode, err := BuildASTFromStringWithMessage(dslString, verbose)
if err != nil {
// Error message already printed out
return nil, err
}
if verbose {
fmt.Println("DSL EXPRESSION:")
fmt.Println(dslString)
fmt.Println("RAW AST:")
astRootNode.Print()
fmt.Println()
}
cstRootNode := cst.NewEmptyRoot(recordWriterOptions)
err = cstRootNode.Build(astRootNode, isFilter)
for _, dslString := range dslStrings {
astRootNode, err := BuildASTFromStringWithMessage(dslString, verbose)
if err != nil {
// Error message already printed out
return nil, err
}
if verbose {
fmt.Println("DSL EXPRESSION:")
fmt.Println(dslString)
fmt.Println("RAW AST:")
astRootNode.Print()
fmt.Println()
}
err = cstRootNode.IngestAST(astRootNode, isFilter)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return nil, err
}
}
err := cstRootNode.Resolve()
if err != nil {
fmt.Fprintln(os.Stderr, err)
return nil, err
}
runtimeState := runtime.NewEmptyState()
// E.g.
@ -337,7 +341,6 @@ func NewTransformerPut(
}
return &TransformerPut{
astRootNode: astRootNode,
cstRootNode: cstRootNode,
runtimeState: runtimeState,
callCount: 0,

View file

@ -19,6 +19,8 @@ TOP OF LIST:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* repl foo:
* mutual recursion fg.mlr / gf.mlr -- needs a single resolver at the end ...
- maybe separate ingest-ASTs and resolver passes
d doc MLR_REPL_PS1/MLR_REPL_PS2 env vars
d :load {source files}
d :begin and :main and :end ...
@ -514,3 +516,5 @@ i https://en.wikipedia.org/wiki/Delimiter#Delimiter_collision
// Unlike other transformers, we can't use flagSet here. The syntax of 'mlr put'
// and 'mlr filter' is they need to be able to take -f and/or -e more than
// once, and Go flags can't handle that.
* doc re multi-load: can't '$x >' and '3' in separate -f anymore. no worries.