mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
255 lines
7.6 KiB
Go
255 lines
7.6 KiB
Go
package transformers
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/johnkerl/miller/v6/pkg/cli"
|
|
"github.com/johnkerl/miller/v6/pkg/lib"
|
|
"github.com/johnkerl/miller/v6/pkg/types"
|
|
)
|
|
|
|
// ----------------------------------------------------------------
|
|
const verbNameRename = "rename"
|
|
|
|
var RenameSetup = TransformerSetup{
|
|
Verb: verbNameRename,
|
|
UsageFunc: transformerRenameUsage,
|
|
ParseCLIFunc: transformerRenameParseCLI,
|
|
IgnoresInput: false,
|
|
}
|
|
|
|
func transformerRenameUsage(
|
|
o *os.File,
|
|
) {
|
|
exeName := "mlr"
|
|
verb := verbNameRename
|
|
|
|
fmt.Fprintf(o, "Usage: %s %s [options] {old1,new1,old2,new2,...}\n", "mlr", verbNameRename)
|
|
fmt.Fprintf(o, "Renames specified fields.\n")
|
|
fmt.Fprintf(o, "Options:\n")
|
|
fmt.Fprintf(o, "-r Treat old field names as regular expressions. \"ab\", \"a.*b\"\n")
|
|
fmt.Fprintf(o, " will match any field name containing the substring \"ab\" or\n")
|
|
fmt.Fprintf(o, " matching \"a.*b\", respectively; anchors of the form \"^ab$\",\n")
|
|
fmt.Fprintf(o, " \"^a.*b$\" may be used. New field names may be plain strings,\n")
|
|
fmt.Fprintf(o, " or may contain capture groups of the form \"\\1\" through\n")
|
|
fmt.Fprintf(o, " \"\\9\". Wrapping the regex in double quotes is optional, but\n")
|
|
fmt.Fprintf(o, " is required if you wish to follow it with 'i' to indicate\n")
|
|
fmt.Fprintf(o, " case-insensitivity.\n")
|
|
fmt.Fprintf(o, "-g Do global replacement within each field name rather than\n")
|
|
fmt.Fprintf(o, " first-match replacement.\n")
|
|
fmt.Fprintf(o, "-h|--help Show this message.\n")
|
|
fmt.Fprintf(o, "Examples:\n")
|
|
fmt.Fprintf(o, "%s %s old_name,new_name\n", exeName, verb)
|
|
fmt.Fprintf(o, "%s %s old_name_1,new_name_1,old_name_2,new_name_2\n", exeName, verb)
|
|
fmt.Fprintf(o, "%s %s -r 'Date_[0-9]+,Date' Rename all such fields to be \"Date\"\n", exeName, verb)
|
|
fmt.Fprintf(o, "%s %s -r '\"Date_[0-9]+\",Date' Same\n", exeName, verb)
|
|
fmt.Fprintf(o, "%s %s -r 'Date_([0-9]+).*,\\1' Rename all such fields to be of the form 20151015\n", exeName, verb)
|
|
fmt.Fprintf(o, "%s %s -r '\"name\"i,Name' Rename \"name\", \"Name\", \"NAME\", etc. to \"Name\"\n", exeName, verb)
|
|
}
|
|
|
|
func transformerRenameParseCLI(
|
|
pargi *int,
|
|
argc int,
|
|
args []string,
|
|
_ *cli.TOptions,
|
|
doConstruct bool, // false for first pass of CLI-parse, true for second pass
|
|
) IRecordTransformer {
|
|
|
|
// Skip the verb name from the current spot in the mlr command line
|
|
argi := *pargi
|
|
argi++
|
|
|
|
doRegexes := false
|
|
doGsub := false
|
|
|
|
for argi < argc /* variable increment: 1 or 2 depending on flag */ {
|
|
opt := args[argi]
|
|
if !strings.HasPrefix(opt, "-") {
|
|
break // No more flag options to process
|
|
}
|
|
if args[argi] == "--" {
|
|
break // All transformers must do this so main-flags can follow verb-flags
|
|
}
|
|
argi++
|
|
|
|
if opt == "-h" || opt == "--help" {
|
|
transformerRenameUsage(os.Stdout)
|
|
os.Exit(0)
|
|
|
|
} else if opt == "-r" {
|
|
doRegexes = true
|
|
|
|
} else if opt == "-g" {
|
|
doGsub = true
|
|
|
|
} else {
|
|
transformerRenameUsage(os.Stderr)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
if doGsub {
|
|
doRegexes = true
|
|
}
|
|
|
|
// Get the rename field names from the command line
|
|
if argi >= argc {
|
|
transformerRenameUsage(os.Stderr)
|
|
os.Exit(1)
|
|
}
|
|
names := lib.SplitString(args[argi], ",")
|
|
if len(names)%2 != 0 {
|
|
transformerRenameUsage(os.Stderr)
|
|
os.Exit(1)
|
|
}
|
|
argi++
|
|
|
|
*pargi = argi
|
|
if !doConstruct { // All transformers must do this for main command-line parsing
|
|
return nil
|
|
}
|
|
|
|
transformer, err := NewTransformerRename(
|
|
names,
|
|
doRegexes,
|
|
doGsub,
|
|
)
|
|
if err != nil {
|
|
fmt.Fprintln(os.Stderr, err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
return transformer
|
|
}
|
|
|
|
// ----------------------------------------------------------------
|
|
type tRegexAndReplacement struct {
|
|
regex *regexp.Regexp
|
|
replacement string
|
|
replacementCaptureMatrix [][]int // TODO: comment
|
|
}
|
|
|
|
type TransformerRename struct {
|
|
oldToNewNames *lib.OrderedMap
|
|
regexesAndReplacements *types.List[*tRegexAndReplacement]
|
|
doGsub bool
|
|
recordTransformerFunc RecordTransformerFunc
|
|
}
|
|
|
|
func NewTransformerRename(
|
|
names []string,
|
|
doRegexes bool,
|
|
doGsub bool,
|
|
) (*TransformerRename, error) {
|
|
if len(names)%2 != 0 {
|
|
return nil, fmt.Errorf("mlr rename: names string must have even length")
|
|
}
|
|
|
|
oldToNewNames := lib.NewOrderedMap()
|
|
n := len(names)
|
|
for i := 0; i < n; i += 2 {
|
|
oldName := names[i]
|
|
newName := names[i+1]
|
|
oldToNewNames.Put(oldName, newName)
|
|
}
|
|
|
|
tr := &TransformerRename{}
|
|
|
|
if !doRegexes {
|
|
tr.oldToNewNames = oldToNewNames
|
|
tr.doGsub = false
|
|
tr.recordTransformerFunc = tr.transformWithoutRegexes
|
|
} else {
|
|
tr.regexesAndReplacements = types.NewList[*tRegexAndReplacement](10)
|
|
for pe := oldToNewNames.Head; pe != nil; pe = pe.Next {
|
|
regexString := pe.Key
|
|
regex := lib.CompileMillerRegexOrDie(regexString)
|
|
replacement := pe.Value.(string)
|
|
_, replacementCaptureMatrix := lib.ReplacementHasCaptures(replacement)
|
|
regexAndReplacement := tRegexAndReplacement{
|
|
regex: regex,
|
|
replacement: replacement,
|
|
replacementCaptureMatrix: replacementCaptureMatrix,
|
|
}
|
|
tr.regexesAndReplacements.PushBack(®exAndReplacement)
|
|
}
|
|
tr.doGsub = doGsub
|
|
tr.recordTransformerFunc = tr.transformWithRegexes
|
|
}
|
|
|
|
return tr, nil
|
|
}
|
|
|
|
// ----------------------------------------------------------------
|
|
|
|
func (tr *TransformerRename) Transform(
|
|
inrecAndContext *types.RecordAndContext,
|
|
outputRecordsAndContexts *types.List[*types.RecordAndContext],
|
|
inputDownstreamDoneChannel <-chan bool,
|
|
outputDownstreamDoneChannel chan<- bool,
|
|
) {
|
|
HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
|
|
tr.recordTransformerFunc(inrecAndContext, outputRecordsAndContexts, inputDownstreamDoneChannel, outputDownstreamDoneChannel)
|
|
}
|
|
|
|
// ----------------------------------------------------------------
|
|
func (tr *TransformerRename) transformWithoutRegexes(
|
|
inrecAndContext *types.RecordAndContext,
|
|
outputRecordsAndContexts *types.List[*types.RecordAndContext],
|
|
inputDownstreamDoneChannel <-chan bool,
|
|
outputDownstreamDoneChannel chan<- bool,
|
|
) {
|
|
if !inrecAndContext.EndOfStream {
|
|
inrec := inrecAndContext.Record
|
|
|
|
for pe := inrec.Head; pe != nil; pe = pe.Next {
|
|
if tr.oldToNewNames.Has(pe.Key) {
|
|
newName := tr.oldToNewNames.Get(pe.Key).(string)
|
|
inrec.Rename(pe.Key, newName)
|
|
}
|
|
|
|
}
|
|
}
|
|
outputRecordsAndContexts.PushBack(inrecAndContext) // including end-of-stream marker
|
|
}
|
|
|
|
// ----------------------------------------------------------------
|
|
func (tr *TransformerRename) transformWithRegexes(
|
|
inrecAndContext *types.RecordAndContext,
|
|
outputRecordsAndContexts *types.List[*types.RecordAndContext],
|
|
inputDownstreamDoneChannel <-chan bool,
|
|
outputDownstreamDoneChannel chan<- bool,
|
|
) {
|
|
if !inrecAndContext.EndOfStream {
|
|
inrec := inrecAndContext.Record
|
|
|
|
for pr := tr.regexesAndReplacements.Front(); pr != nil; pr = pr.Next() {
|
|
regexAndReplacement := pr.Value.(*tRegexAndReplacement)
|
|
regex := regexAndReplacement.regex
|
|
replacement := regexAndReplacement.replacement
|
|
replacementCaptureMatrix := regexAndReplacement.replacementCaptureMatrix
|
|
|
|
for pe := inrec.Head; pe != nil; pe = pe.Next {
|
|
oldName := pe.Key
|
|
if tr.doGsub {
|
|
newName := regex.ReplaceAllString(oldName, replacement)
|
|
if newName != oldName {
|
|
inrec.Rename(oldName, newName)
|
|
}
|
|
} else {
|
|
newName := lib.RegexCompiledSub(oldName, regex, replacement, replacementCaptureMatrix)
|
|
if newName != oldName {
|
|
inrec.Rename(oldName, newName)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
outputRecordsAndContexts.PushBack(inrecAndContext)
|
|
} else {
|
|
outputRecordsAndContexts.PushBack(inrecAndContext) // including end-of-stream marker
|
|
}
|
|
}
|