miller/pkg/transformers/grep.go
Adam Lesperance 085e831668
The package version must match the major tag version (#1654)
* Update package version

* Update makefile targets

* Update readme packages

* Remaining old packages via rg/sd
2024-09-20 12:10:11 -04:00

184 lines
4.6 KiB
Go

package transformers
import (
"container/list"
"fmt"
"os"
"regexp"
"strings"
"github.com/johnkerl/miller/v6/pkg/cli"
"github.com/johnkerl/miller/v6/pkg/types"
)
// ----------------------------------------------------------------
const verbNameGrep = "grep"
var GrepSetup = TransformerSetup{
Verb: verbNameGrep,
UsageFunc: transformerGrepUsage,
ParseCLIFunc: transformerGrepParseCLI,
IgnoresInput: false,
}
func transformerGrepUsage(
o *os.File,
) {
fmt.Fprintf(o, "Usage: %s %s [options] {regular expression}\n", "mlr", verbNameGrep)
fmt.Fprintf(o, "Passes through records which match the regular expression.\n")
fmt.Fprint(o, "Options:\n")
fmt.Fprint(o, "-i Use case-insensitive search.\n")
fmt.Fprint(o, "-v Invert: pass through records which do not match the regex.\n")
fmt.Fprint(o, "-a Only grep for values, not keys and values.\n")
fmt.Fprintf(o, "-h|--help Show this message.\n")
fmt.Fprintf(o, `Note that "%s filter" is more powerful, but requires you to know field names.
By contrast, "%s grep" allows you to regex-match the entire record. It does this
by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using
OFS "," and OPS "=", and matching the resulting line against the regex specified
here. In particular, the regex is not applied to the input stream: if you have
CSV with header line "x,y,z" and data line "1,2,3" then the regex will be
matched, not against either of these lines, but against the DKVP line
"x=1,y=2,z=3". Furthermore, not all the options to system grep are supported,
and this command is intended to be merely a keystroke-saver. To get all the
features of system grep, you can do
"%s --odkvp ... | grep ... | %s --idkvp ..."
`, "mlr", "mlr", "mlr", "mlr")
}
func transformerGrepParseCLI(
pargi *int,
argc int,
args []string,
_ *cli.TOptions,
doConstruct bool, // false for first pass of CLI-parse, true for second pass
) IRecordTransformer {
// Skip the verb name from the current spot in the mlr command line
argi := *pargi
verb := args[argi]
argi++
ignoreCase := false
invert := false
valuesOnly := false
for argi < argc /* variable increment: 1 or 2 depending on flag */ {
opt := args[argi]
if !strings.HasPrefix(opt, "-") {
break // No more flag options to process
}
if args[argi] == "--" {
break // All transformers must do this so main-flags can follow verb-flags
}
argi++
if opt == "-h" || opt == "--help" {
transformerGrepUsage(os.Stdout)
os.Exit(0)
} else if opt == "-i" {
ignoreCase = true
} else if opt == "-v" {
invert = true
} else if opt == "-a" {
valuesOnly = true
} else {
transformerGrepUsage(os.Stderr)
os.Exit(1)
}
}
// Get the regex from the command line
if argi >= argc {
transformerGrepUsage(os.Stderr)
os.Exit(1)
}
pattern := args[argi]
argi++
if ignoreCase {
pattern = "(?i)" + pattern
}
// TODO: maybe CompilePOSIX
regexp, err := regexp.Compile(pattern)
if err != nil {
fmt.Fprintf(os.Stderr, "%s %s: couldn't compile regex \"%s\"\n",
"mlr", verb, pattern)
os.Exit(1)
}
*pargi = argi
if !doConstruct { // All transformers must do this for main command-line parsing
return nil
}
transformer, err := NewTransformerGrep(
regexp,
invert,
valuesOnly,
)
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
return transformer
}
// ----------------------------------------------------------------
type TransformerGrep struct {
regexp *regexp.Regexp
invert bool
valuesOnly bool
}
func NewTransformerGrep(
regexp *regexp.Regexp,
invert bool,
valuesOnly bool,
) (*TransformerGrep, error) {
tr := &TransformerGrep{
regexp: regexp,
invert: invert,
valuesOnly: valuesOnly,
}
return tr, nil
}
// ----------------------------------------------------------------
func (tr *TransformerGrep) Transform(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
inputDownstreamDoneChannel <-chan bool,
outputDownstreamDoneChannel chan<- bool,
) {
HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
if !inrecAndContext.EndOfStream {
inrec := inrecAndContext.Record
var inrecAsString string
if tr.valuesOnly {
inrecAsString = inrec.ToNIDXString()
} else {
inrecAsString = inrec.ToDKVPString()
}
matches := tr.regexp.MatchString(inrecAsString)
if tr.invert {
if !matches {
outputRecordsAndContexts.PushBack(inrecAndContext)
}
} else {
if matches {
outputRecordsAndContexts.PushBack(inrecAndContext)
}
}
} else {
outputRecordsAndContexts.PushBack(inrecAndContext)
}
}