mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
* Update package version * Update makefile targets * Update readme packages * Remaining old packages via rg/sd
184 lines
4.6 KiB
Go
184 lines
4.6 KiB
Go
package transformers
|
|
|
|
import (
|
|
"container/list"
|
|
"fmt"
|
|
"os"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/johnkerl/miller/v6/pkg/cli"
|
|
"github.com/johnkerl/miller/v6/pkg/types"
|
|
)
|
|
|
|
// ----------------------------------------------------------------
|
|
const verbNameGrep = "grep"
|
|
|
|
var GrepSetup = TransformerSetup{
|
|
Verb: verbNameGrep,
|
|
UsageFunc: transformerGrepUsage,
|
|
ParseCLIFunc: transformerGrepParseCLI,
|
|
IgnoresInput: false,
|
|
}
|
|
|
|
func transformerGrepUsage(
|
|
o *os.File,
|
|
) {
|
|
fmt.Fprintf(o, "Usage: %s %s [options] {regular expression}\n", "mlr", verbNameGrep)
|
|
fmt.Fprintf(o, "Passes through records which match the regular expression.\n")
|
|
|
|
fmt.Fprint(o, "Options:\n")
|
|
fmt.Fprint(o, "-i Use case-insensitive search.\n")
|
|
fmt.Fprint(o, "-v Invert: pass through records which do not match the regex.\n")
|
|
fmt.Fprint(o, "-a Only grep for values, not keys and values.\n")
|
|
fmt.Fprintf(o, "-h|--help Show this message.\n")
|
|
|
|
fmt.Fprintf(o, `Note that "%s filter" is more powerful, but requires you to know field names.
|
|
By contrast, "%s grep" allows you to regex-match the entire record. It does this
|
|
by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using
|
|
OFS "," and OPS "=", and matching the resulting line against the regex specified
|
|
here. In particular, the regex is not applied to the input stream: if you have
|
|
CSV with header line "x,y,z" and data line "1,2,3" then the regex will be
|
|
matched, not against either of these lines, but against the DKVP line
|
|
"x=1,y=2,z=3". Furthermore, not all the options to system grep are supported,
|
|
and this command is intended to be merely a keystroke-saver. To get all the
|
|
features of system grep, you can do
|
|
"%s --odkvp ... | grep ... | %s --idkvp ..."
|
|
`, "mlr", "mlr", "mlr", "mlr")
|
|
}
|
|
|
|
func transformerGrepParseCLI(
|
|
pargi *int,
|
|
argc int,
|
|
args []string,
|
|
_ *cli.TOptions,
|
|
doConstruct bool, // false for first pass of CLI-parse, true for second pass
|
|
) IRecordTransformer {
|
|
|
|
// Skip the verb name from the current spot in the mlr command line
|
|
argi := *pargi
|
|
verb := args[argi]
|
|
argi++
|
|
|
|
ignoreCase := false
|
|
invert := false
|
|
valuesOnly := false
|
|
|
|
for argi < argc /* variable increment: 1 or 2 depending on flag */ {
|
|
opt := args[argi]
|
|
if !strings.HasPrefix(opt, "-") {
|
|
break // No more flag options to process
|
|
}
|
|
if args[argi] == "--" {
|
|
break // All transformers must do this so main-flags can follow verb-flags
|
|
}
|
|
argi++
|
|
|
|
if opt == "-h" || opt == "--help" {
|
|
transformerGrepUsage(os.Stdout)
|
|
os.Exit(0)
|
|
|
|
} else if opt == "-i" {
|
|
ignoreCase = true
|
|
|
|
} else if opt == "-v" {
|
|
invert = true
|
|
|
|
} else if opt == "-a" {
|
|
valuesOnly = true
|
|
|
|
} else {
|
|
transformerGrepUsage(os.Stderr)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
// Get the regex from the command line
|
|
if argi >= argc {
|
|
transformerGrepUsage(os.Stderr)
|
|
os.Exit(1)
|
|
}
|
|
pattern := args[argi]
|
|
argi++
|
|
|
|
if ignoreCase {
|
|
pattern = "(?i)" + pattern
|
|
}
|
|
|
|
// TODO: maybe CompilePOSIX
|
|
regexp, err := regexp.Compile(pattern)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "%s %s: couldn't compile regex \"%s\"\n",
|
|
"mlr", verb, pattern)
|
|
os.Exit(1)
|
|
}
|
|
|
|
*pargi = argi
|
|
if !doConstruct { // All transformers must do this for main command-line parsing
|
|
return nil
|
|
}
|
|
|
|
transformer, err := NewTransformerGrep(
|
|
regexp,
|
|
invert,
|
|
valuesOnly,
|
|
)
|
|
if err != nil {
|
|
fmt.Fprintln(os.Stderr, err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
return transformer
|
|
}
|
|
|
|
// ----------------------------------------------------------------
|
|
type TransformerGrep struct {
|
|
regexp *regexp.Regexp
|
|
invert bool
|
|
valuesOnly bool
|
|
}
|
|
|
|
func NewTransformerGrep(
|
|
regexp *regexp.Regexp,
|
|
invert bool,
|
|
valuesOnly bool,
|
|
) (*TransformerGrep, error) {
|
|
tr := &TransformerGrep{
|
|
regexp: regexp,
|
|
invert: invert,
|
|
valuesOnly: valuesOnly,
|
|
}
|
|
return tr, nil
|
|
}
|
|
|
|
// ----------------------------------------------------------------
|
|
|
|
func (tr *TransformerGrep) Transform(
|
|
inrecAndContext *types.RecordAndContext,
|
|
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
|
|
inputDownstreamDoneChannel <-chan bool,
|
|
outputDownstreamDoneChannel chan<- bool,
|
|
) {
|
|
HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
|
|
if !inrecAndContext.EndOfStream {
|
|
inrec := inrecAndContext.Record
|
|
var inrecAsString string
|
|
if tr.valuesOnly {
|
|
inrecAsString = inrec.ToNIDXString()
|
|
} else {
|
|
inrecAsString = inrec.ToDKVPString()
|
|
}
|
|
matches := tr.regexp.MatchString(inrecAsString)
|
|
if tr.invert {
|
|
if !matches {
|
|
outputRecordsAndContexts.PushBack(inrecAndContext)
|
|
}
|
|
} else {
|
|
if matches {
|
|
outputRecordsAndContexts.PushBack(inrecAndContext)
|
|
}
|
|
}
|
|
} else {
|
|
outputRecordsAndContexts.PushBack(inrecAndContext)
|
|
}
|
|
}
|