Proof of concept for extended verb-field-accessor feature

This commit is contained in:
John Kerl 2022-02-19 00:44:43 -05:00
parent f6d897bf7d
commit 5a56846c13
9 changed files with 191 additions and 7 deletions

View file

@ -53,7 +53,7 @@ func (repl *Repl) handleDSLStringAux(
isReplImmediate,
doWarnings,
false, // warningsAreFatal
func(dslString string, astNode *dsl.AST) {
func(dslString string, astNode *dsl.AST) error {
if repl.astPrintMode == ASTPrintParex {
astNode.PrintParex()
} else if repl.astPrintMode == ASTPrintParexOneLine {
@ -61,6 +61,7 @@ func (repl *Repl) handleDSLStringAux(
} else if repl.astPrintMode == ASTPrintIndent {
astNode.Print()
}
return nil
},
)
if err != nil {

View file

@ -482,7 +482,7 @@ func handleSkipOrProcessUntil(repl *Repl, dslString string, processingNotSkippin
true, // isReplImmediate
repl.doWarnings,
false, // warningsAreFatal
func(dslString string, astNode *dsl.AST) {
func(dslString string, astNode *dsl.AST) error {
if repl.astPrintMode == ASTPrintParex {
astNode.PrintParex()
} else if repl.astPrintMode == ASTPrintParexOneLine {
@ -490,6 +490,7 @@ func handleSkipOrProcessUntil(repl *Repl, dslString string, processingNotSkippin
} else if repl.astPrintMode == ASTPrintIndent {
astNode.Print()
}
return nil
},
)
if err != nil {

View file

@ -21,6 +21,7 @@ package cst
import (
"github.com/johnkerl/miller/internal/pkg/dsl"
"github.com/johnkerl/miller/internal/pkg/lib"
"github.com/johnkerl/miller/internal/pkg/mlrval" // xxx temp
"github.com/johnkerl/miller/internal/pkg/runtime"
)
@ -61,6 +62,11 @@ func (node *BareBooleanStatementNode) Execute(state *runtime.State) (*BlockExitP
return nil, nil
}
// xxx temp
func (node *BareBooleanStatementNode) Evaluate(state *runtime.State) *mlrval.Mlrval {
return node.bareBooleanEvaluable.Evaluate(state)
}
// ----------------------------------------------------------------
// FilterStatementNode is for explicit filter statements such as mlr put
// 'filter NR < 10', where the word "filter" appears within the single quotes

View file

@ -57,7 +57,7 @@ func (root *RootNode) WithRedefinableUDFUDS() *RootNode {
// ASTBuildVisitorFunc is a callback, used by RootNode's Build method, which
// CST-builder callsites can use to visit parse-to-AST result of multi-string
// DSL inputs. Nominal use: mlr put -v, mlr put -d, etc.
type ASTBuildVisitorFunc func(dslString string, astNode *dsl.AST)
type ASTBuildVisitorFunc func(dslString string, astNode *dsl.AST) error
// Used by DSL -> AST -> CST callsites including mlr put, mlr filter, and mlr
// repl. The RootNode must be separately instantiated (e.g. NewEmptyRoot())
@ -81,7 +81,10 @@ func (root *RootNode) Build(
// E.g. mlr put -v -- let it print out what it needs to.
if astBuildVisitorFunc != nil {
astBuildVisitorFunc(dslString, astRootNode)
err := astBuildVisitorFunc(dslString, astRootNode)
if err != nil {
return err
}
}
err = root.IngestAST(

View file

@ -32,6 +32,7 @@ const (
DSLInstanceTypePut = iota
DSLInstanceTypeFilter
DSLInstanceTypeREPL
DSLInstanceTypeVerbFieldAccessor
)
// ----------------------------------------------------------------

View file

@ -0,0 +1,124 @@
// ================================================================
// This is an experimental technique for doing things like
//
// mlr cut -f 'item,[[[3]]],item.name,foo["bar"]
//
// where the expressions aren't simple strings but rather correspond to DSL expressions like
//
// $item $[[[3]]] $item.name $foo["bar"]
//
// ================================================================
package cst
import (
"fmt"
"github.com/johnkerl/miller/internal/pkg/cli"
"github.com/johnkerl/miller/internal/pkg/dsl"
"github.com/johnkerl/miller/internal/pkg/mlrval"
"github.com/johnkerl/miller/internal/pkg/runtime"
)
//type State struct {
// Inrec *mlrval.Mlrmap
// Context *types.Context
// Oosvars *mlrval.Mlrmap
// FilterExpression *mlrval.Mlrval
// Stack *Stack
// OutputRecordsAndContexts *list.List // list of *types.RecordAndContext
//
// // For holding "\0".."\9" between where they are set via things like
// // '$x =~ "(..)_(...)"', and interpolated via things like '$y = "\2:\1"'.
// RegexCaptures []string
// Options *cli.TOptions
//}
type VerbFieldAccessor struct {
cstRootNode *RootNode
runtimeState *runtime.State
}
// NodeTypeDirectFieldValue
// NodeTypeIndirectFieldValue
// (BracedFieldValue is DirectFieldValue)
// Indexed with .
// Indexed with []
// NodeTypePositionalFieldName
// NodeTypePositionalFieldValue
// mlr -n put -v '$item; ${item}; $["item"]; $item.name; $item["name"]; $[[3]]; $[[[3]]]'
// AST:
// * statement block
// * bare boolean
// * direct field value "item"
// * bare boolean
// * direct field value "item"
// * bare boolean
// * indirect field value "$[]"
// * string literal "item"
// * bare boolean
// * dot operator "."
// * direct field value "item"
// * local variable "name"
// * bare boolean
// * array or map index access "[]"
// * direct field value "item"
// * string literal "name"
// * bare boolean
// * positionally-indexed field name "$[]"
// * int literal "3"
// * bare boolean
// * positionally-indexed field value "$[]"
// * int literal "3"
func verbFieldAccessorASTValidator(dslString string, astNode *dsl.AST) error {
// TODO: flesh this out
err := fmt.Errorf("malformed field-selector syntax: \"%s\"", dslString)
if astNode.RootNode.Type != dsl.NodeTypeStatementBlock {
return err
}
if len(astNode.RootNode.Children) != 1 {
return err
}
if astNode.RootNode.Children[0].Type != dsl.NodeTypeBareBoolean {
return err
}
if len(astNode.RootNode.Children[0].Children) != 1 {
return err
}
return nil
}
func NewVerbFieldAccessor(input string) (*VerbFieldAccessor, error) {
cstRootNode := NewEmptyRoot(nil, DSLInstanceTypeVerbFieldAccessor)
err := cstRootNode.Build(
[]string{"$" + input}, // dslStrings []string
DSLInstanceTypeVerbFieldAccessor, // dslInstanceType DSLInstanceType
false, // isReplImmediate bool
false, // doWarnings bool
false, // warningsAreFatal bool
verbFieldAccessorASTValidator, // astBuildVisitorFunc ASTBuildVisitorFunc
)
if err != nil {
return nil, err
}
options := cli.DefaultOptions()
runtimeState := runtime.NewEmptyState(options)
return &VerbFieldAccessor{
cstRootNode,
runtimeState,
}, nil
}
func (g *VerbFieldAccessor) Get(record *mlrval.Mlrmap) *mlrval.Mlrval {
// TODO: rework all the CST stuff to not have so much extra.
// This is just a POC for now.
g.runtimeState.Inrec = record
node := g.cstRootNode.mainBlock.executables[0].(*BareBooleanStatementNode)
return node.Evaluate(g.runtimeState)
}

View file

@ -8,6 +8,7 @@ import (
"strings"
"github.com/johnkerl/miller/internal/pkg/cli"
"github.com/johnkerl/miller/internal/pkg/dsl/cst"
"github.com/johnkerl/miller/internal/pkg/lib"
"github.com/johnkerl/miller/internal/pkg/mlrval"
"github.com/johnkerl/miller/internal/pkg/types"
@ -69,6 +70,7 @@ func transformerCutParseCLI(
doArgOrder := false
doComplement := false
doRegexes := false
doExtended := false // xxx temp/experimental
for argi < argc /* variable increment: 1 or 2 depending on flag */ {
opt := args[argi]
@ -92,6 +94,9 @@ func transformerCutParseCLI(
} else if opt == "-x" {
doComplement = true
} else if opt == "-e" {
doExtended = true
} else if opt == "--complement" {
doComplement = true
@ -117,6 +122,7 @@ func transformerCutParseCLI(
doArgOrder,
doComplement,
doRegexes,
doExtended,
)
if err != nil {
fmt.Fprintln(os.Stderr, err)
@ -135,6 +141,9 @@ type TransformerCut struct {
regexes []*regexp.Regexp
recordTransformerFunc RecordTransformerFunc
// xxx temp/experimental
verbFieldAccessors []*cst.VerbFieldAccessor
}
func NewTransformerCut(
@ -142,11 +151,23 @@ func NewTransformerCut(
doArgOrder bool,
doComplement bool,
doRegexes bool,
doExtended bool,
) (*TransformerCut, error) {
tr := &TransformerCut{}
if doExtended {
// xxx temp/experimental
tr.verbFieldAccessors = make([]*cst.VerbFieldAccessor, len(fieldNames))
for i, fieldName := range fieldNames {
verbFieldAccessor, err := cst.NewVerbFieldAccessor(fieldName)
if err != nil {
return nil, err
}
tr.verbFieldAccessors[i] = verbFieldAccessor
}
tr.recordTransformerFunc = tr.extended
if !doRegexes {
} else if !doRegexes {
tr.fieldNameList = fieldNames
tr.fieldNameSet = lib.StringListToSet(fieldNames)
if !doComplement {
@ -158,6 +179,7 @@ func NewTransformerCut(
} else {
tr.recordTransformerFunc = tr.exclude
}
} else {
tr.doComplement = doComplement
tr.regexes = make([]*regexp.Regexp, len(fieldNames))
@ -290,3 +312,28 @@ func (tr *TransformerCut) processWithRegexes(
outputRecordsAndContexts.PushBack(inrecAndContext)
}
}
// ----------------------------------------------------------------
// mlr cut -x -f a,b,c
func (tr *TransformerCut) extended(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
inputDownstreamDoneChannel <-chan bool,
outputDownstreamDoneChannel chan<- bool,
) {
if !inrecAndContext.EndOfStream {
inrec := inrecAndContext.Record
outrec := mlrval.NewMlrmap()
for _, verbFieldAccessor := range tr.verbFieldAccessors {
value := verbFieldAccessor.Get(inrec)
if value != nil && !value.IsAbsent() {
// TODO: needs a Put too ...
outrec.PutReference("temp", value) // inrec will be GC'ed
}
}
outrecAndContext := types.NewRecordAndContext(outrec, &inrecAndContext.Context)
outputRecordsAndContexts.PushBack(outrecAndContext)
} else {
outputRecordsAndContexts.PushBack(inrecAndContext)
}
}

View file

@ -402,7 +402,7 @@ func NewTransformerPut(
doWarnings,
warningsAreFatal,
func(dslString string, astNode *dsl.AST) {
func(dslString string, astNode *dsl.AST) error {
if echoDSLString {
fmt.Println("DSL EXPRESSION:")
@ -423,6 +423,7 @@ func NewTransformerPut(
fmt.Println()
}
return nil
},
)

View file

@ -1,5 +1,5 @@
===============================================================
RELEASES
* plan 6.1.0
o unsparsify -f CSV by default -- ? into CSV record-writer -- ? caveat that record 1 controls all ...
o mlr join --left-fields a,b,c