This commit is contained in:
John Kerl 2020-10-11 17:01:32 -04:00
parent c2ea680188
commit f6df932b8a
6 changed files with 201 additions and 1 deletions

View file

@ -16,6 +16,7 @@ var MAPPER_LOOKUP_TABLE = []mapping.MapperSetup{
mappers.CountSetup,
mappers.CutSetup,
mappers.FilterSetup,
mappers.GrepSetup,
mappers.GroupBySetup,
mappers.GroupLikeSetup,
mappers.HeadSetup,

View file

@ -0,0 +1,164 @@
package mappers
import (
"flag"
"fmt"
"os"
"regexp"
"miller/clitypes"
"miller/mapping"
"miller/types"
)
// ----------------------------------------------------------------
var GrepSetup = mapping.MapperSetup{
Verb: "grep",
ParseCLIFunc: mapperGrepParseCLI,
IgnoresInput: false,
}
func mapperGrepParseCLI(
pargi *int,
argc int,
args []string,
errorHandling flag.ErrorHandling, // ContinueOnError or ExitOnError
_ *clitypes.TReaderOptions,
__ *clitypes.TWriterOptions,
) mapping.IRecordMapper {
// Get the verb name from the current spot in the mlr command line
argi := *pargi
verb := args[argi]
argi++
// Parse local flags
flagSet := flag.NewFlagSet(verb, errorHandling)
pIgnoreCase := flagSet.Bool(
"i",
false,
`Use case-insensitive search`,
)
pInvert := flagSet.Bool(
"v",
false,
`Invert: pass through records which do not match the regex.`,
)
flagSet.Usage = func() {
ostream := os.Stderr
if errorHandling == flag.ContinueOnError { // help intentionally requested
ostream = os.Stdout
}
mapperGrepUsage(ostream, args[0], verb, flagSet)
}
flagSet.Parse(args[argi:])
if errorHandling == flag.ContinueOnError { // help intentionally requested
return nil
}
// Find out how many flags were consumed by this verb and advance for the
// next verb
argi = len(args) - len(flagSet.Args())
// Get the regex from the command line
if argi >= argc {
flagSet.Usage()
os.Exit(1)
}
pattern := args[argi]
argi += 1
// TODO: maybe CompilePOSIX
regexp, err := regexp.Compile(pattern)
if err != nil {
fmt.Fprintf(os.Stderr, "%s %s: couldn't compile regex \"%s\"\n",
args[0], verb, pattern)
os.Exit(1)
}
mapper, _ := NewMapperGrep(
regexp,
*pIgnoreCase,
*pInvert,
)
*pargi = argi
return mapper
}
func mapperGrepUsage(
o *os.File,
argv0 string,
verb string,
flagSet *flag.FlagSet,
) {
fmt.Fprintf(o, "Usage: %s %s [options] {regular expression}\n", argv0, verb)
fmt.Fprintf(o, "Passes through records which match the regular expression.\n")
// flagSet.PrintDefaults() doesn't let us control stdout vs stderr
fmt.Fprint(o, "Options:\n")
flagSet.VisitAll(func(f *flag.Flag) {
fmt.Fprintf(o, " -%v (default %v) %v\n", f.Name, f.Value, f.Usage) // f.Name, f.Value
})
fmt.Fprint(o, `Note that "mlr filter" is more powerful, but requires you to know field names.
By contrast, "mlr grep" allows you to regex-match the entire record. It does
this by formatting each record in memory as DKVP, using command-line-specified
ORS/OFS/OPS, and matching the resulting line against the regex specified
here. In particular, the regex is not applied to the input stream: if you
have CSV with header line "x,y,z" and data line "1,2,3" then the regex will
be matched, not against either of these lines, but against the DKVP line
"x=1,y=2,z=3". Furthermore, not all the options to system grep are supported,
and this command is intended to be merely a keystroke-saver. To get all the
features of system grep, you can do
"mlr --odkvp ... | grep ... | mlr --idkvp ..."
`)
}
// ----------------------------------------------------------------
type MapperGrep struct {
regexp *regexp.Regexp
ignoreCase bool
invert bool
}
func NewMapperGrep(
regexp *regexp.Regexp,
ignoreCase bool,
invert bool,
) (*MapperGrep, error) {
this := &MapperGrep{
regexp: regexp,
ignoreCase: ignoreCase,
invert: invert,
}
return this, nil
}
// ----------------------------------------------------------------
func (this *MapperGrep) Map(
inrecAndContext *types.RecordAndContext,
outputChannel chan<- *types.RecordAndContext,
) {
inrec := inrecAndContext.Record
if inrec != nil { // not end of record stream
inrecAsString := inrec.ToDKVPString()
// TODO: ignore case
matches := this.regexp.Match([]byte(inrecAsString))
if this.invert {
if !matches {
outputChannel <- inrecAndContext
}
} else {
if matches {
outputChannel <- inrecAndContext
}
}
} else {
outputChannel <- inrecAndContext
}
}

View file

@ -4,8 +4,10 @@ TOP OF LIST:
* UDFs
* local vars w/ typing
* grep -i
* some more verbs
o altkv decimate grep having-fields remove-empty-columns reorder
o altkv decimate having-fields remove-empty-columns reorder
o stats1 stats2
* double-check rand-seeding

View file

@ -44,6 +44,24 @@ TODO: put detailed on-line help here.
full transparency on the precedence and associativity rules of
Miller's grammar, to stdout.
================================================================
Usage: ./mlr grep [options] {regular expression}
Passes through records which match the regular expression.
Options:
-i (default false) Use case-insensitive search
-v (default false) Invert: pass through records which do not match the regex.
Note that "mlr filter" is more powerful, but requires you to know field names.
By contrast, "mlr grep" allows you to regex-match the entire record. It does
this by formatting each record in memory as DKVP, using command-line-specified
ORS/OFS/OPS, and matching the resulting line against the regex specified
here. In particular, the regex is not applied to the input stream: if you
have CSV with header line "x,y,z" and data line "1,2,3" then the regex will
be matched, not against either of these lines, but against the DKVP line
"x=1,y=2,z=3". Furthermore, not all the options to system grep are supported,
and this command is intended to be merely a keystroke-saver. To get all the
features of system grep, you can do
"mlr --odkvp ... | grep ... | mlr --idkvp ..."
================================================================
Usage: ./mlr group-by [options]
Outputs records in batches having identical values at specified field names.

View file

@ -66,3 +66,6 @@ run_mlr --opprint --from u/medium.dkvp count -g a -n
run_mlr --opprint --from u/medium.dkvp count -g a,b -n
run_mlr --opprint --from u/medium.dkvp count -o NAME
run_mlr --opprint --from u/medium.dkvp count -g a -o NAME
run_mlr --opprint --from u/s.dkvp grep pan
run_mlr --opprint --from u/s.dkvp grep -v pan

View file

@ -484,3 +484,15 @@ eks 10
wye 7
zee 8
hat 7
----------------------------------------------------------------
mlr --opprint --from u/s.dkvp grep pan
a b i x y
pan pan 1 0.3467901443380824 0.7268028627434533
eks pan 2 0.7586799647899636 0.5221511083334797
----------------------------------------------------------------
mlr --opprint --from u/s.dkvp grep -v pan
a b i x y
wye wye 3 0.20460330576630303 0.33831852551664776
eks wye 4 0.38139939387114097 0.13418874328430463