mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
Proof of concept for extended verb-field-accessor feature
This commit is contained in:
parent
f6d897bf7d
commit
5a56846c13
9 changed files with 191 additions and 7 deletions
|
|
@ -53,7 +53,7 @@ func (repl *Repl) handleDSLStringAux(
|
|||
isReplImmediate,
|
||||
doWarnings,
|
||||
false, // warningsAreFatal
|
||||
func(dslString string, astNode *dsl.AST) {
|
||||
func(dslString string, astNode *dsl.AST) error {
|
||||
if repl.astPrintMode == ASTPrintParex {
|
||||
astNode.PrintParex()
|
||||
} else if repl.astPrintMode == ASTPrintParexOneLine {
|
||||
|
|
@ -61,6 +61,7 @@ func (repl *Repl) handleDSLStringAux(
|
|||
} else if repl.astPrintMode == ASTPrintIndent {
|
||||
astNode.Print()
|
||||
}
|
||||
return nil
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -482,7 +482,7 @@ func handleSkipOrProcessUntil(repl *Repl, dslString string, processingNotSkippin
|
|||
true, // isReplImmediate
|
||||
repl.doWarnings,
|
||||
false, // warningsAreFatal
|
||||
func(dslString string, astNode *dsl.AST) {
|
||||
func(dslString string, astNode *dsl.AST) error {
|
||||
if repl.astPrintMode == ASTPrintParex {
|
||||
astNode.PrintParex()
|
||||
} else if repl.astPrintMode == ASTPrintParexOneLine {
|
||||
|
|
@ -490,6 +490,7 @@ func handleSkipOrProcessUntil(repl *Repl, dslString string, processingNotSkippin
|
|||
} else if repl.astPrintMode == ASTPrintIndent {
|
||||
astNode.Print()
|
||||
}
|
||||
return nil
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ package cst
|
|||
import (
|
||||
"github.com/johnkerl/miller/internal/pkg/dsl"
|
||||
"github.com/johnkerl/miller/internal/pkg/lib"
|
||||
"github.com/johnkerl/miller/internal/pkg/mlrval" // xxx temp
|
||||
"github.com/johnkerl/miller/internal/pkg/runtime"
|
||||
)
|
||||
|
||||
|
|
@ -61,6 +62,11 @@ func (node *BareBooleanStatementNode) Execute(state *runtime.State) (*BlockExitP
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
// xxx temp
|
||||
func (node *BareBooleanStatementNode) Evaluate(state *runtime.State) *mlrval.Mlrval {
|
||||
return node.bareBooleanEvaluable.Evaluate(state)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// FilterStatementNode is for explicit filter statements such as mlr put
|
||||
// 'filter NR < 10', where the word "filter" appears within the single quotes
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ func (root *RootNode) WithRedefinableUDFUDS() *RootNode {
|
|||
// ASTBuildVisitorFunc is a callback, used by RootNode's Build method, which
|
||||
// CST-builder callsites can use to visit parse-to-AST result of multi-string
|
||||
// DSL inputs. Nominal use: mlr put -v, mlr put -d, etc.
|
||||
type ASTBuildVisitorFunc func(dslString string, astNode *dsl.AST)
|
||||
type ASTBuildVisitorFunc func(dslString string, astNode *dsl.AST) error
|
||||
|
||||
// Used by DSL -> AST -> CST callsites including mlr put, mlr filter, and mlr
|
||||
// repl. The RootNode must be separately instantiated (e.g. NewEmptyRoot())
|
||||
|
|
@ -81,7 +81,10 @@ func (root *RootNode) Build(
|
|||
|
||||
// E.g. mlr put -v -- let it print out what it needs to.
|
||||
if astBuildVisitorFunc != nil {
|
||||
astBuildVisitorFunc(dslString, astRootNode)
|
||||
err := astBuildVisitorFunc(dslString, astRootNode)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
err = root.IngestAST(
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ const (
|
|||
DSLInstanceTypePut = iota
|
||||
DSLInstanceTypeFilter
|
||||
DSLInstanceTypeREPL
|
||||
DSLInstanceTypeVerbFieldAccessor
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
|
|
|||
124
internal/pkg/dsl/cst/verb_field_accessor.go
Normal file
124
internal/pkg/dsl/cst/verb_field_accessor.go
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
// ================================================================
|
||||
// This is an experimental technique for doing things like
|
||||
//
|
||||
// mlr cut -f 'item,[[[3]]],item.name,foo["bar"]
|
||||
//
|
||||
// where the expressions aren't simple strings but rather correspond to DSL expressions like
|
||||
//
|
||||
// $item $[[[3]]] $item.name $foo["bar"]
|
||||
//
|
||||
// ================================================================
|
||||
|
||||
package cst
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/johnkerl/miller/internal/pkg/cli"
|
||||
"github.com/johnkerl/miller/internal/pkg/dsl"
|
||||
"github.com/johnkerl/miller/internal/pkg/mlrval"
|
||||
"github.com/johnkerl/miller/internal/pkg/runtime"
|
||||
)
|
||||
|
||||
//type State struct {
|
||||
// Inrec *mlrval.Mlrmap
|
||||
// Context *types.Context
|
||||
// Oosvars *mlrval.Mlrmap
|
||||
// FilterExpression *mlrval.Mlrval
|
||||
// Stack *Stack
|
||||
// OutputRecordsAndContexts *list.List // list of *types.RecordAndContext
|
||||
//
|
||||
// // For holding "\0".."\9" between where they are set via things like
|
||||
// // '$x =~ "(..)_(...)"', and interpolated via things like '$y = "\2:\1"'.
|
||||
// RegexCaptures []string
|
||||
// Options *cli.TOptions
|
||||
//}
|
||||
|
||||
type VerbFieldAccessor struct {
|
||||
cstRootNode *RootNode
|
||||
runtimeState *runtime.State
|
||||
}
|
||||
|
||||
// NodeTypeDirectFieldValue
|
||||
// NodeTypeIndirectFieldValue
|
||||
// (BracedFieldValue is DirectFieldValue)
|
||||
// Indexed with .
|
||||
// Indexed with []
|
||||
// NodeTypePositionalFieldName
|
||||
// NodeTypePositionalFieldValue
|
||||
|
||||
// mlr -n put -v '$item; ${item}; $["item"]; $item.name; $item["name"]; $[[3]]; $[[[3]]]'
|
||||
// AST:
|
||||
// * statement block
|
||||
// * bare boolean
|
||||
// * direct field value "item"
|
||||
// * bare boolean
|
||||
// * direct field value "item"
|
||||
// * bare boolean
|
||||
// * indirect field value "$[]"
|
||||
// * string literal "item"
|
||||
// * bare boolean
|
||||
// * dot operator "."
|
||||
// * direct field value "item"
|
||||
// * local variable "name"
|
||||
// * bare boolean
|
||||
// * array or map index access "[]"
|
||||
// * direct field value "item"
|
||||
// * string literal "name"
|
||||
// * bare boolean
|
||||
// * positionally-indexed field name "$[]"
|
||||
// * int literal "3"
|
||||
// * bare boolean
|
||||
// * positionally-indexed field value "$[]"
|
||||
// * int literal "3"
|
||||
|
||||
func verbFieldAccessorASTValidator(dslString string, astNode *dsl.AST) error {
|
||||
// TODO: flesh this out
|
||||
err := fmt.Errorf("malformed field-selector syntax: \"%s\"", dslString)
|
||||
|
||||
if astNode.RootNode.Type != dsl.NodeTypeStatementBlock {
|
||||
return err
|
||||
}
|
||||
if len(astNode.RootNode.Children) != 1 {
|
||||
return err
|
||||
}
|
||||
if astNode.RootNode.Children[0].Type != dsl.NodeTypeBareBoolean {
|
||||
return err
|
||||
}
|
||||
if len(astNode.RootNode.Children[0].Children) != 1 {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewVerbFieldAccessor(input string) (*VerbFieldAccessor, error) {
|
||||
cstRootNode := NewEmptyRoot(nil, DSLInstanceTypeVerbFieldAccessor)
|
||||
err := cstRootNode.Build(
|
||||
[]string{"$" + input}, // dslStrings []string
|
||||
DSLInstanceTypeVerbFieldAccessor, // dslInstanceType DSLInstanceType
|
||||
false, // isReplImmediate bool
|
||||
false, // doWarnings bool
|
||||
false, // warningsAreFatal bool
|
||||
verbFieldAccessorASTValidator, // astBuildVisitorFunc ASTBuildVisitorFunc
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
options := cli.DefaultOptions()
|
||||
runtimeState := runtime.NewEmptyState(options)
|
||||
|
||||
return &VerbFieldAccessor{
|
||||
cstRootNode,
|
||||
runtimeState,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (g *VerbFieldAccessor) Get(record *mlrval.Mlrmap) *mlrval.Mlrval {
|
||||
// TODO: rework all the CST stuff to not have so much extra.
|
||||
// This is just a POC for now.
|
||||
g.runtimeState.Inrec = record
|
||||
node := g.cstRootNode.mainBlock.executables[0].(*BareBooleanStatementNode)
|
||||
return node.Evaluate(g.runtimeState)
|
||||
}
|
||||
|
|
@ -8,6 +8,7 @@ import (
|
|||
"strings"
|
||||
|
||||
"github.com/johnkerl/miller/internal/pkg/cli"
|
||||
"github.com/johnkerl/miller/internal/pkg/dsl/cst"
|
||||
"github.com/johnkerl/miller/internal/pkg/lib"
|
||||
"github.com/johnkerl/miller/internal/pkg/mlrval"
|
||||
"github.com/johnkerl/miller/internal/pkg/types"
|
||||
|
|
@ -69,6 +70,7 @@ func transformerCutParseCLI(
|
|||
doArgOrder := false
|
||||
doComplement := false
|
||||
doRegexes := false
|
||||
doExtended := false // xxx temp/experimental
|
||||
|
||||
for argi < argc /* variable increment: 1 or 2 depending on flag */ {
|
||||
opt := args[argi]
|
||||
|
|
@ -92,6 +94,9 @@ func transformerCutParseCLI(
|
|||
} else if opt == "-x" {
|
||||
doComplement = true
|
||||
|
||||
} else if opt == "-e" {
|
||||
doExtended = true
|
||||
|
||||
} else if opt == "--complement" {
|
||||
doComplement = true
|
||||
|
||||
|
|
@ -117,6 +122,7 @@ func transformerCutParseCLI(
|
|||
doArgOrder,
|
||||
doComplement,
|
||||
doRegexes,
|
||||
doExtended,
|
||||
)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
|
|
@ -135,6 +141,9 @@ type TransformerCut struct {
|
|||
regexes []*regexp.Regexp
|
||||
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
|
||||
// xxx temp/experimental
|
||||
verbFieldAccessors []*cst.VerbFieldAccessor
|
||||
}
|
||||
|
||||
func NewTransformerCut(
|
||||
|
|
@ -142,11 +151,23 @@ func NewTransformerCut(
|
|||
doArgOrder bool,
|
||||
doComplement bool,
|
||||
doRegexes bool,
|
||||
doExtended bool,
|
||||
) (*TransformerCut, error) {
|
||||
|
||||
tr := &TransformerCut{}
|
||||
if doExtended {
|
||||
// xxx temp/experimental
|
||||
tr.verbFieldAccessors = make([]*cst.VerbFieldAccessor, len(fieldNames))
|
||||
for i, fieldName := range fieldNames {
|
||||
verbFieldAccessor, err := cst.NewVerbFieldAccessor(fieldName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tr.verbFieldAccessors[i] = verbFieldAccessor
|
||||
}
|
||||
tr.recordTransformerFunc = tr.extended
|
||||
|
||||
if !doRegexes {
|
||||
} else if !doRegexes {
|
||||
tr.fieldNameList = fieldNames
|
||||
tr.fieldNameSet = lib.StringListToSet(fieldNames)
|
||||
if !doComplement {
|
||||
|
|
@ -158,6 +179,7 @@ func NewTransformerCut(
|
|||
} else {
|
||||
tr.recordTransformerFunc = tr.exclude
|
||||
}
|
||||
|
||||
} else {
|
||||
tr.doComplement = doComplement
|
||||
tr.regexes = make([]*regexp.Regexp, len(fieldNames))
|
||||
|
|
@ -290,3 +312,28 @@ func (tr *TransformerCut) processWithRegexes(
|
|||
outputRecordsAndContexts.PushBack(inrecAndContext)
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// mlr cut -x -f a,b,c
|
||||
func (tr *TransformerCut) extended(
|
||||
inrecAndContext *types.RecordAndContext,
|
||||
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
|
||||
inputDownstreamDoneChannel <-chan bool,
|
||||
outputDownstreamDoneChannel chan<- bool,
|
||||
) {
|
||||
if !inrecAndContext.EndOfStream {
|
||||
inrec := inrecAndContext.Record
|
||||
outrec := mlrval.NewMlrmap()
|
||||
for _, verbFieldAccessor := range tr.verbFieldAccessors {
|
||||
value := verbFieldAccessor.Get(inrec)
|
||||
if value != nil && !value.IsAbsent() {
|
||||
// TODO: needs a Put too ...
|
||||
outrec.PutReference("temp", value) // inrec will be GC'ed
|
||||
}
|
||||
}
|
||||
outrecAndContext := types.NewRecordAndContext(outrec, &inrecAndContext.Context)
|
||||
outputRecordsAndContexts.PushBack(outrecAndContext)
|
||||
} else {
|
||||
outputRecordsAndContexts.PushBack(inrecAndContext)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -402,7 +402,7 @@ func NewTransformerPut(
|
|||
doWarnings,
|
||||
warningsAreFatal,
|
||||
|
||||
func(dslString string, astNode *dsl.AST) {
|
||||
func(dslString string, astNode *dsl.AST) error {
|
||||
|
||||
if echoDSLString {
|
||||
fmt.Println("DSL EXPRESSION:")
|
||||
|
|
@ -423,6 +423,7 @@ func NewTransformerPut(
|
|||
fmt.Println()
|
||||
}
|
||||
|
||||
return nil
|
||||
},
|
||||
)
|
||||
|
||||
|
|
|
|||
2
todo.txt
2
todo.txt
|
|
@ -1,5 +1,5 @@
|
|||
===============================================================
|
||||
RELEASES
|
||||
|
||||
* plan 6.1.0
|
||||
o unsparsify -f CSV by default -- ? into CSV record-writer -- ? caveat that record 1 controls all ...
|
||||
o mlr join --left-fields a,b,c
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue