miller/pkg/transformers/count.go
Adam Lesperance 085e831668
The package version must match the major tag version (#1654)
* Update package version

* Update makefile targets

* Update readme packages

* Remaining old packages via rg/sd
2024-09-20 12:10:11 -04:00

244 lines
6.8 KiB
Go

package transformers
import (
"container/list"
"fmt"
"os"
"strings"
"github.com/johnkerl/miller/v6/pkg/cli"
"github.com/johnkerl/miller/v6/pkg/lib"
"github.com/johnkerl/miller/v6/pkg/mlrval"
"github.com/johnkerl/miller/v6/pkg/types"
)
// ----------------------------------------------------------------
const verbNameCount = "count"
var CountSetup = TransformerSetup{
Verb: verbNameCount,
UsageFunc: transformerCountUsage,
ParseCLIFunc: transformerCountParseCLI,
IgnoresInput: false,
}
func transformerCountUsage(
o *os.File,
) {
fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameCount)
fmt.Fprint(o,
`Prints number of records, optionally grouped by distinct values for specified field names.
`)
fmt.Fprintf(o, "Options:\n")
fmt.Fprintf(o, "-g {a,b,c} Optional group-by-field names for counts, e.g. a,b,c\n")
fmt.Fprintf(o, "-n {n} Show only the number of distinct values. Not interesting without -g.\n")
fmt.Fprintf(o, "-o {name} Field name for output-count. Default \"count\".\n")
fmt.Fprintf(o, "-h|--help Show this message.\n")
}
func transformerCountParseCLI(
pargi *int,
argc int,
args []string,
_ *cli.TOptions,
doConstruct bool, // false for first pass of CLI-parse, true for second pass
) IRecordTransformer {
// Skip the verb name from the current spot in the mlr command line
argi := *pargi
verb := args[argi]
argi++
var groupByFieldNames []string = nil
showCountsOnly := false
outputFieldName := "count"
for argi < argc /* variable increment: 1 or 2 depending on flag */ {
opt := args[argi]
if !strings.HasPrefix(opt, "-") {
break // No more flag options to process
}
if args[argi] == "--" {
break // All transformers must do this so main-flags can follow verb-flags
}
argi++
if opt == "-h" || opt == "--help" {
transformerCountUsage(os.Stdout)
os.Exit(0)
} else if opt == "-g" {
groupByFieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
} else if opt == "-n" {
showCountsOnly = true
} else if opt == "-o" {
outputFieldName = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc)
} else {
transformerCountUsage(os.Stderr)
os.Exit(1)
}
}
*pargi = argi
if !doConstruct { // All transformers must do this for main command-line parsing
return nil
}
transformer, err := NewTransformerCount(
groupByFieldNames,
showCountsOnly,
outputFieldName,
)
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
return transformer
}
// ----------------------------------------------------------------
type TransformerCount struct {
// input
groupByFieldNames []string
showCountsOnly bool
outputFieldName string
// state
recordTransformerFunc RecordTransformerFunc
ungroupedCount int64
// Example:
// * Suppose group-by fields are a,b.
// * One record has a=foo,b=bar
// * Another record has a=baz,b=quux
// * Map keys are strings "foo,bar" and "baz,quux".
// * groupedCounts maps "foo,bar" to 1 and "baz,quux" to 1.
// * groupByValues maps "foo,bar" to ["foo", "bar"] and "baz,quux" to ["baz", "quux"].
groupedCounts *lib.OrderedMap
groupingValues *lib.OrderedMap
}
func NewTransformerCount(
groupByFieldNames []string,
showCountsOnly bool,
outputFieldName string,
) (*TransformerCount, error) {
tr := &TransformerCount{
groupByFieldNames: groupByFieldNames,
showCountsOnly: showCountsOnly,
outputFieldName: outputFieldName,
ungroupedCount: 0,
groupedCounts: lib.NewOrderedMap(),
groupingValues: lib.NewOrderedMap(),
}
if groupByFieldNames == nil {
tr.recordTransformerFunc = tr.countUngrouped
} else {
tr.recordTransformerFunc = tr.countGrouped
}
return tr, nil
}
// ----------------------------------------------------------------
func (tr *TransformerCount) Transform(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
inputDownstreamDoneChannel <-chan bool,
outputDownstreamDoneChannel chan<- bool,
) {
HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
tr.recordTransformerFunc(inrecAndContext, outputRecordsAndContexts, inputDownstreamDoneChannel, outputDownstreamDoneChannel)
}
// ----------------------------------------------------------------
func (tr *TransformerCount) countUngrouped(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
inputDownstreamDoneChannel <-chan bool,
outputDownstreamDoneChannel chan<- bool,
) {
if !inrecAndContext.EndOfStream {
tr.ungroupedCount++
} else {
newrec := mlrval.NewMlrmapAsRecord()
newrec.PutCopy(tr.outputFieldName, mlrval.FromInt(tr.ungroupedCount))
outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context))
outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker
}
}
// ----------------------------------------------------------------
func (tr *TransformerCount) countGrouped(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
inputDownstreamDoneChannel <-chan bool,
outputDownstreamDoneChannel chan<- bool,
) {
if !inrecAndContext.EndOfStream {
inrec := inrecAndContext.Record
groupingKey, selectedValues, ok := inrec.GetSelectedValuesAndJoined(
tr.groupByFieldNames,
)
if !ok { // Current record does not have specified fields; ignore
return
}
if !tr.groupedCounts.Has(groupingKey) {
var count int64 = 1
tr.groupedCounts.Put(groupingKey, count)
tr.groupingValues.Put(groupingKey, selectedValues)
} else {
tr.groupedCounts.Put(
groupingKey,
tr.groupedCounts.Get(groupingKey).(int64)+1,
)
}
} else {
if tr.showCountsOnly {
newrec := mlrval.NewMlrmapAsRecord()
newrec.PutCopy(tr.outputFieldName, mlrval.FromInt(tr.groupedCounts.FieldCount))
outrecAndContext := types.NewRecordAndContext(newrec, &inrecAndContext.Context)
outputRecordsAndContexts.PushBack(outrecAndContext)
} else {
for outer := tr.groupedCounts.Head; outer != nil; outer = outer.Next {
groupingKey := outer.Key
newrec := mlrval.NewMlrmapAsRecord()
// Example:
// * Suppose group-by fields are a,b.
// * Record has a=foo,b=bar
// * Grouping key is "foo,bar"
// * Grouping values for key is ["foo", "bar"]
// Here we populate a record with "a=foo,b=bar".
groupingValuesForKey := tr.groupingValues.Get(groupingKey).([]*mlrval.Mlrval)
i := 0
for _, groupingValueForKey := range groupingValuesForKey {
newrec.PutCopy(tr.groupByFieldNames[i], groupingValueForKey)
i++
}
countForGroup := outer.Value.(int64)
newrec.PutCopy(tr.outputFieldName, mlrval.FromInt(countForGroup))
outrecAndContext := types.NewRecordAndContext(newrec, &inrecAndContext.Context)
outputRecordsAndContexts.PushBack(outrecAndContext)
}
}
outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker
}
}