miller/pkg/transformers/head.go
Adam Lesperance 085e831668
The package version must match the major tag version (#1654)
* Update package version

* Update makefile targets

* Update readme packages

* Remaining old packages via rg/sd
2024-09-20 12:10:11 -04:00

207 lines
5.6 KiB
Go

package transformers
import (
"container/list"
"fmt"
"os"
"strings"
"github.com/johnkerl/miller/v6/pkg/cli"
"github.com/johnkerl/miller/v6/pkg/types"
)
// ----------------------------------------------------------------
const verbNameHead = "head"
var HeadSetup = TransformerSetup{
Verb: verbNameHead,
UsageFunc: transformerHeadUsage,
ParseCLIFunc: transformerHeadParseCLI,
IgnoresInput: false,
}
func transformerHeadUsage(
o *os.File,
) {
fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameHead)
fmt.Fprintf(o, "Passes through the first n records, optionally by category.\n")
fmt.Fprintf(o, "Without -g, ceases consuming more input (i.e. is fast) when n records have been read.\n")
fmt.Fprintf(o, "Options:\n")
fmt.Fprintf(o, "-g {a,b,c} Optional group-by-field names for head counts, e.g. a,b,c.\n")
fmt.Fprintf(o, "-n {n} Head-count to print. Default 10.\n")
fmt.Fprintf(o, "-h|--help Show this message.\n")
}
func transformerHeadParseCLI(
pargi *int,
argc int,
args []string,
_ *cli.TOptions,
doConstruct bool, // false for first pass of CLI-parse, true for second pass
) IRecordTransformer {
// Skip the verb name from the current spot in the mlr command line
argi := *pargi
verb := args[argi]
argi++
headCount := int64(10)
var groupByFieldNames []string = nil
for argi < argc /* variable increment: 1 or 2 depending on flag */ {
opt := args[argi]
if !strings.HasPrefix(opt, "-") {
break // No more flag options to process
}
if args[argi] == "--" {
break // All transformers must do this so main-flags can follow verb-flags
}
argi++
if opt == "-h" || opt == "--help" {
transformerHeadUsage(os.Stdout)
os.Exit(0)
} else if opt == "-n" {
headCount = cli.VerbGetIntArgOrDie(verb, opt, args, &argi, argc)
// This is a bit of a hack. In our Getoptify routine we preprocess
// the command line sending '-xyz' to '-x -y -z', but leaving
// '--xyz' as-is. Also, Unix-like tools often support 'head -n4'
// and 'tail -n4' in addition to 'head -n 4' and 'tail -n 4'. Our
// getoptify paradigm, combined with syntax familiar to users,
// means we get '-n -4' here. So, take the absolute value to handle this.
if headCount < 0 {
headCount = -headCount
}
} else if opt == "-g" {
groupByFieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
} else {
transformerHeadUsage(os.Stderr)
os.Exit(1)
}
}
*pargi = argi
if !doConstruct { // All transformers must do this for main command-line parsing
return nil
}
transformer, err := NewTransformerHead(
headCount,
groupByFieldNames,
)
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
return transformer
}
// ----------------------------------------------------------------
type TransformerHead struct {
// input
headCount int64
groupByFieldNames []string
// state
recordTransformerFunc RecordTransformerFunc
unkeyedRecordCount int64
keyedRecordCounts map[string]int64
// See ChainTransformer
wroteDownstreamDone bool
}
func NewTransformerHead(
headCount int64,
groupByFieldNames []string,
) (*TransformerHead, error) {
tr := &TransformerHead{
headCount: headCount,
groupByFieldNames: groupByFieldNames,
unkeyedRecordCount: 0,
keyedRecordCounts: make(map[string]int64),
wroteDownstreamDone: false,
}
if groupByFieldNames == nil {
tr.recordTransformerFunc = tr.transformUnkeyed
} else {
tr.recordTransformerFunc = tr.transformKeyed
}
return tr, nil
}
// ----------------------------------------------------------------
func (tr *TransformerHead) Transform(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
inputDownstreamDoneChannel <-chan bool,
outputDownstreamDoneChannel chan<- bool,
) {
HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
tr.recordTransformerFunc(inrecAndContext, outputRecordsAndContexts, inputDownstreamDoneChannel, outputDownstreamDoneChannel)
}
func (tr *TransformerHead) transformUnkeyed(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
inputDownstreamDoneChannel <-chan bool,
outputDownstreamDoneChannel chan<- bool,
) {
if !inrecAndContext.EndOfStream {
tr.unkeyedRecordCount++
if tr.unkeyedRecordCount <= tr.headCount {
outputRecordsAndContexts.PushBack(inrecAndContext)
} else if !tr.wroteDownstreamDone {
// Signify to data producers upstream that we'll ignore further
// data, so as far as we're concerned they can stop sending it. See
// ChainTransformer.
//TODO: maybe remove: outputRecordsAndContexts.PushBack(types.NewEndOfStreamMarker(&inrecAndContext.Context))
outputDownstreamDoneChannel <- true
tr.wroteDownstreamDone = true
}
} else {
outputRecordsAndContexts.PushBack(inrecAndContext)
}
}
func (tr *TransformerHead) transformKeyed(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
inputDownstreamDoneChannel <-chan bool,
outputDownstreamDoneChannel chan<- bool,
) {
if !inrecAndContext.EndOfStream {
inrec := inrecAndContext.Record
groupingKey, ok := inrec.GetSelectedValuesJoined(tr.groupByFieldNames)
if !ok {
return
}
count, present := tr.keyedRecordCounts[groupingKey]
if !present { // first time
tr.keyedRecordCounts[groupingKey] = 1
count = 1
} else {
tr.keyedRecordCounts[groupingKey] += 1
count += 1
}
if count <= tr.headCount {
outputRecordsAndContexts.PushBack(inrecAndContext)
}
} else {
outputRecordsAndContexts.PushBack(inrecAndContext)
}
}