mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
Port histogram verb from C to Go
This commit is contained in:
parent
ca320d8475
commit
d48226feb7
9 changed files with 478 additions and 78 deletions
|
|
@ -14,7 +14,7 @@ import (
|
|||
|
||||
// ----------------------------------------------------------------
|
||||
func main() {
|
||||
runtime.GOMAXPROCS(4) // Seems reasonable these days
|
||||
runtime.GOMAXPROCS(4) // Seems reasonable these days
|
||||
debug.SetGCPercent(500) // Empirical: See README-profiling.md
|
||||
|
||||
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
|
|
|
|||
|
|
@ -1,71 +1,71 @@
|
|||
|
||||
mlr --opprint histogram -f x,y --lo 0 --hi 1 --nbins 20 ./reg-test/input/small
|
||||
bin_lo bin_hi x_count y_count
|
||||
0.000000 0.050000 1 0
|
||||
0.050000 0.100000 0 0
|
||||
0.100000 0.150000 0 1
|
||||
0.150000 0.200000 0 1
|
||||
0.200000 0.250000 1 0
|
||||
0.250000 0.300000 0 0
|
||||
0.300000 0.350000 1 1
|
||||
0.350000 0.400000 1 0
|
||||
0.400000 0.450000 0 0
|
||||
0.450000 0.500000 0 1
|
||||
0.500000 0.550000 2 1
|
||||
0.550000 0.600000 2 0
|
||||
0.600000 0.650000 1 0
|
||||
0.650000 0.700000 0 0
|
||||
0.700000 0.750000 0 2
|
||||
0.750000 0.800000 1 0
|
||||
0.800000 0.850000 0 0
|
||||
0.850000 0.900000 0 1
|
||||
0.900000 0.950000 0 0
|
||||
0.950000 1.000000 0 2
|
||||
bin_lo bin_hi x_count y_count
|
||||
0 0.05 1 0
|
||||
0.05 0.1 0 0
|
||||
0.1 0.15 0 1
|
||||
0.15 0.2 0 1
|
||||
0.2 0.25 1 0
|
||||
0.25 0.3 0 0
|
||||
0.3 0.35 1 1
|
||||
0.35 0.4 1 0
|
||||
0.4 0.45 0 0
|
||||
0.45 0.5 0 1
|
||||
0.5 0.55 2 1
|
||||
0.55 0.6 2 0
|
||||
0.6 0.65 1 0
|
||||
0.65 0.7 0 0
|
||||
0.7 0.75 0 2
|
||||
0.75 0.8 1 0
|
||||
0.8 0.85 0 0
|
||||
0.85 0.9 0 1
|
||||
0.9 0.95 0 0
|
||||
0.95 1 0 2
|
||||
|
||||
mlr --opprint histogram -f x,y --lo 0 --hi 1 --nbins 20 -o foo_ ./reg-test/input/small
|
||||
foo_bin_lo foo_bin_hi foo_x_count foo_y_count
|
||||
0.000000 0.050000 1 0
|
||||
0.050000 0.100000 0 0
|
||||
0.100000 0.150000 0 1
|
||||
0.150000 0.200000 0 1
|
||||
0.200000 0.250000 1 0
|
||||
0.250000 0.300000 0 0
|
||||
0.300000 0.350000 1 1
|
||||
0.350000 0.400000 1 0
|
||||
0.400000 0.450000 0 0
|
||||
0.450000 0.500000 0 1
|
||||
0.500000 0.550000 2 1
|
||||
0.550000 0.600000 2 0
|
||||
0.600000 0.650000 1 0
|
||||
0.650000 0.700000 0 0
|
||||
0.700000 0.750000 0 2
|
||||
0.750000 0.800000 1 0
|
||||
0.800000 0.850000 0 0
|
||||
0.850000 0.900000 0 1
|
||||
0.900000 0.950000 0 0
|
||||
0.950000 1.000000 0 2
|
||||
0 0.05 1 0
|
||||
0.05 0.1 0 0
|
||||
0.1 0.15 0 1
|
||||
0.15 0.2 0 1
|
||||
0.2 0.25 1 0
|
||||
0.25 0.3 0 0
|
||||
0.3 0.35 1 1
|
||||
0.35 0.4 1 0
|
||||
0.4 0.45 0 0
|
||||
0.45 0.5 0 1
|
||||
0.5 0.55 2 1
|
||||
0.55 0.6 2 0
|
||||
0.6 0.65 1 0
|
||||
0.65 0.7 0 0
|
||||
0.7 0.75 0 2
|
||||
0.75 0.8 1 0
|
||||
0.8 0.85 0 0
|
||||
0.85 0.9 0 1
|
||||
0.9 0.95 0 0
|
||||
0.95 1 0 2
|
||||
|
||||
mlr --opprint histogram --nbins 9 --auto -f x,y ./reg-test/input/ints.dkvp
|
||||
bin_lo bin_hi x_count y_count
|
||||
0.000000 1.000000 8 1
|
||||
1.000000 2.000000 2 2
|
||||
2.000000 3.000000 5 5
|
||||
3.000000 4.000000 4 1
|
||||
4.000000 5.000000 3 2
|
||||
5.000000 6.000000 1 4
|
||||
6.000000 7.000000 3 4
|
||||
7.000000 8.000000 2 4
|
||||
8.000000 9.000000 2 7
|
||||
bin_lo bin_hi x_count y_count
|
||||
0 1 8 1
|
||||
1 2 2 2
|
||||
2 3 5 5
|
||||
3 4 4 1
|
||||
4 5 3 2
|
||||
5 6 1 4
|
||||
6 7 3 4
|
||||
7 8 2 4
|
||||
8 9 2 7
|
||||
|
||||
mlr --opprint histogram --nbins 9 --auto -f x,y -o foo_ ./reg-test/input/ints.dkvp
|
||||
foo_bin_lo bin_hi foo_x_count foo_y_count
|
||||
0.000000 1.000000 8 1
|
||||
1.000000 2.000000 2 2
|
||||
2.000000 3.000000 5 5
|
||||
3.000000 4.000000 4 1
|
||||
4.000000 5.000000 3 2
|
||||
5.000000 6.000000 1 4
|
||||
6.000000 7.000000 3 4
|
||||
7.000000 8.000000 2 4
|
||||
8.000000 9.000000 2 7
|
||||
foo_bin_lo foo_bin_hi foo_x_count foo_y_count
|
||||
0 1 8 1
|
||||
1 2 2 2
|
||||
2 3 5 5
|
||||
3 4 4 1
|
||||
4 5 3 2
|
||||
5 6 1 4
|
||||
6 7 3 4
|
||||
7 8 2 4
|
||||
8 9 2 7
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ var MAPPER_LOOKUP_TABLE = []transforming.TransformerSetup{
|
|||
transformers.GroupLikeSetup,
|
||||
transformers.HavingFieldsSetup,
|
||||
transformers.HeadSetup,
|
||||
transformers.HistogramSetup,
|
||||
transformers.JoinSetup,
|
||||
transformers.JSONParseSetup,
|
||||
transformers.JSONStringifySetup,
|
||||
|
|
|
|||
|
|
@ -54,3 +54,19 @@ func VerbGetIntArgOrDie(verb string, opt string, args []string, pargi *int, argc
|
|||
}
|
||||
return retval
|
||||
}
|
||||
|
||||
// E.g. with ["-n", "10.3"], makes sure there is something in the "10.3"
|
||||
// position, scans it as float, and returns it.
|
||||
func VerbGetFloatArgOrDie(verb string, opt string, args []string, pargi *int, argc int) float64 {
|
||||
flag := args[*pargi]
|
||||
stringArg := VerbGetStringArgOrDie(verb, opt, args, pargi, argc)
|
||||
retval, err := strconv.ParseFloat(stringArg, 64)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr,
|
||||
"%s %s: could not scan flag \"%s\" argument \"%s\" as float.\n",
|
||||
lib.MlrExeName(), verb, flag, stringArg,
|
||||
)
|
||||
os.Exit(1)
|
||||
}
|
||||
return retval
|
||||
}
|
||||
|
|
|
|||
|
|
@ -100,9 +100,9 @@ func TryIntFromString(input string) (int, bool) {
|
|||
}
|
||||
|
||||
func TryFloat64FromString(input string) (float64, bool) {
|
||||
ival, err := strconv.ParseFloat(input, 64)
|
||||
fval, err := strconv.ParseFloat(input, 64)
|
||||
if err == nil {
|
||||
return ival, true
|
||||
return fval, true
|
||||
} else {
|
||||
return 0, false
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ func transformerCatParseCLI(
|
|||
// Parse local flags
|
||||
doCounters := false
|
||||
counterFieldName := ""
|
||||
groupByFieldNames := ""
|
||||
var groupByFieldNames []string = nil
|
||||
|
||||
for argi < argc /* variable increment: 1 or 2 depending on flag */ {
|
||||
opt := args[argi]
|
||||
|
|
@ -74,7 +74,7 @@ func transformerCatParseCLI(
|
|||
counterFieldName = cliutil.VerbGetStringArgOrDie(verb, opt, args, &argi, argc)
|
||||
|
||||
} else if opt == "-g" {
|
||||
groupByFieldNames = cliutil.VerbGetStringArgOrDie(verb, opt, args, &argi, argc)
|
||||
groupByFieldNames = cliutil.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
|
||||
|
||||
} else {
|
||||
transformerCatUsage(os.Stderr, true, 1)
|
||||
|
|
@ -92,8 +92,8 @@ func transformerCatParseCLI(
|
|||
|
||||
// ----------------------------------------------------------------
|
||||
type TransformerCat struct {
|
||||
doCounters bool
|
||||
groupByFieldNameList []string
|
||||
doCounters bool
|
||||
groupByFieldNames []string
|
||||
|
||||
counter int
|
||||
countsByGroup map[string]int
|
||||
|
|
@ -106,27 +106,25 @@ type TransformerCat struct {
|
|||
func NewTransformerCat(
|
||||
doCounters bool,
|
||||
counterFieldName string,
|
||||
groupByFieldNames string,
|
||||
groupByFieldNames []string,
|
||||
) (*TransformerCat, error) {
|
||||
|
||||
groupByFieldNameList := lib.SplitString(groupByFieldNames, ",")
|
||||
|
||||
if counterFieldName != "" {
|
||||
doCounters = true
|
||||
}
|
||||
|
||||
this := &TransformerCat{
|
||||
doCounters: doCounters,
|
||||
groupByFieldNameList: groupByFieldNameList,
|
||||
counter: 0,
|
||||
countsByGroup: make(map[string]int),
|
||||
counterFieldName: counterFieldName,
|
||||
doCounters: doCounters,
|
||||
groupByFieldNames: groupByFieldNames,
|
||||
counter: 0,
|
||||
countsByGroup: make(map[string]int),
|
||||
counterFieldName: counterFieldName,
|
||||
}
|
||||
|
||||
if !doCounters {
|
||||
this.recordTransformerFunc = this.simpleCat
|
||||
} else {
|
||||
if groupByFieldNames == "" {
|
||||
if groupByFieldNames == nil {
|
||||
this.recordTransformerFunc = this.countersUngrouped
|
||||
} else {
|
||||
this.recordTransformerFunc = this.countersGrouped
|
||||
|
|
@ -175,7 +173,7 @@ func (this *TransformerCat) countersGrouped(
|
|||
if !inrecAndContext.EndOfStream {
|
||||
inrec := inrecAndContext.Record
|
||||
|
||||
groupingKey, ok := inrec.GetSelectedValuesJoined(this.groupByFieldNameList)
|
||||
groupingKey, ok := inrec.GetSelectedValuesJoined(this.groupByFieldNames)
|
||||
var counter int = 0
|
||||
if !ok {
|
||||
// Treat as unkeyed
|
||||
|
|
|
|||
366
go/src/transformers/histogram.go
Normal file
366
go/src/transformers/histogram.go
Normal file
|
|
@ -0,0 +1,366 @@
|
|||
package transformers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameHistogram = "histogram"
|
||||
|
||||
var HistogramSetup = transforming.TransformerSetup{
|
||||
Verb: verbNameHistogram,
|
||||
UsageFunc: transformerHistogramUsage,
|
||||
ParseCLIFunc: transformerHistogramParseCLI,
|
||||
IgnoresInput: false,
|
||||
}
|
||||
|
||||
func transformerHistogramUsage(
|
||||
o *os.File,
|
||||
doExit bool,
|
||||
exitCode int,
|
||||
) {
|
||||
argv0 := lib.MlrExeName()
|
||||
verb := verbNameHistogram
|
||||
fmt.Fprintf(o, "Just a histogram. Input values < lo or > hi are not counted.\n")
|
||||
fmt.Fprintf(o, "Usage: %s %s [options]\n", argv0, verb)
|
||||
fmt.Fprintf(o, "-f {a,b,c} Value-field names for histogram counts\n")
|
||||
fmt.Fprintf(o, "--lo {lo} Histogram low value\n")
|
||||
fmt.Fprintf(o, "--hi {hi} Histogram high value\n")
|
||||
fmt.Fprintf(o, "--nbins {n} Number of histogram bins\n")
|
||||
fmt.Fprintf(o, "--auto Automatically computes limits, ignoring --lo and --hi.\n")
|
||||
fmt.Fprintf(o, " Holds all values in memory before producing any output.\n")
|
||||
fmt.Fprintf(o, "-o {prefix} Prefix for output field name. Default: no prefix.\n")
|
||||
fmt.Fprintf(o, "-h|--help Show this message.\n")
|
||||
|
||||
if doExit {
|
||||
os.Exit(exitCode)
|
||||
}
|
||||
}
|
||||
|
||||
func transformerHistogramParseCLI(
|
||||
pargi *int,
|
||||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TReaderOptions,
|
||||
__ *cliutil.TWriterOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
verb := args[argi]
|
||||
argi++
|
||||
|
||||
// Parse local flags
|
||||
var valueFieldNames []string = nil
|
||||
lo := 0.0
|
||||
nbins := 0
|
||||
hi := 0.0
|
||||
doAuto := false
|
||||
outputPrefix := ""
|
||||
|
||||
for argi < argc /* variable increment: 1 or 2 depending on flag */ {
|
||||
opt := args[argi]
|
||||
if !strings.HasPrefix(opt, "-") {
|
||||
break // No more flag options to process
|
||||
}
|
||||
argi++
|
||||
|
||||
if opt == "-h" || opt == "--help" {
|
||||
transformerHistogramUsage(os.Stdout, true, 0)
|
||||
|
||||
} else if opt == "-f" {
|
||||
valueFieldNames = cliutil.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
|
||||
|
||||
} else if opt == "--lo" {
|
||||
lo = cliutil.VerbGetFloatArgOrDie(verb, opt, args, &argi, argc)
|
||||
|
||||
} else if opt == "--nbins" {
|
||||
nbins = cliutil.VerbGetIntArgOrDie(verb, opt, args, &argi, argc)
|
||||
|
||||
} else if opt == "--hi" {
|
||||
hi = cliutil.VerbGetFloatArgOrDie(verb, opt, args, &argi, argc)
|
||||
|
||||
} else if opt == "--auto" {
|
||||
doAuto = true
|
||||
|
||||
} else if opt == "-o" {
|
||||
outputPrefix = cliutil.VerbGetStringArgOrDie(verb, opt, args, &argi, argc)
|
||||
|
||||
} else {
|
||||
transformerHistogramUsage(os.Stderr, true, 1)
|
||||
}
|
||||
}
|
||||
|
||||
if valueFieldNames == nil {
|
||||
transformerHistogramUsage(os.Stderr, true, 1)
|
||||
}
|
||||
|
||||
if nbins == 0 {
|
||||
transformerHistogramUsage(os.Stderr, true, 1)
|
||||
}
|
||||
|
||||
if lo == hi && !doAuto {
|
||||
transformerHistogramUsage(os.Stderr, true, 1)
|
||||
}
|
||||
|
||||
transformer, _ := NewTransformerHistogram(
|
||||
valueFieldNames,
|
||||
lo,
|
||||
nbins,
|
||||
hi,
|
||||
doAuto,
|
||||
outputPrefix,
|
||||
)
|
||||
|
||||
*pargi = argi
|
||||
return transformer
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const histogramVectorInitialSize = 1024
|
||||
|
||||
type TransformerHistogram struct {
|
||||
valueFieldNames []string
|
||||
lo float64
|
||||
nbins int
|
||||
hi float64
|
||||
mul float64
|
||||
|
||||
countsByField map[string][]int
|
||||
vectorsByFieldName map[string][]float64 // For auto-mode
|
||||
outputPrefix string
|
||||
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func NewTransformerHistogram(
|
||||
valueFieldNames []string,
|
||||
lo float64,
|
||||
nbins int,
|
||||
hi float64,
|
||||
doAuto bool,
|
||||
outputPrefix string,
|
||||
) (*TransformerHistogram, error) {
|
||||
|
||||
countsByField := make(map[string][]int)
|
||||
for _, valueFieldName := range valueFieldNames {
|
||||
countsByField[valueFieldName] = make([]int, nbins)
|
||||
for i := 0; i < nbins; i++ {
|
||||
countsByField[valueFieldName][i] = 0
|
||||
}
|
||||
}
|
||||
|
||||
this := &TransformerHistogram{
|
||||
valueFieldNames: valueFieldNames,
|
||||
countsByField: countsByField,
|
||||
outputPrefix: outputPrefix,
|
||||
nbins: nbins,
|
||||
}
|
||||
|
||||
if !doAuto {
|
||||
this.recordTransformerFunc = this.transformNonAuto
|
||||
this.lo = lo
|
||||
this.hi = hi
|
||||
this.mul = float64(nbins) / (hi - lo)
|
||||
} else {
|
||||
this.vectorsByFieldName = make(map[string][]float64)
|
||||
for _, valueFieldName := range valueFieldNames {
|
||||
this.vectorsByFieldName[valueFieldName] = make([]float64, 0, histogramVectorInitialSize)
|
||||
}
|
||||
|
||||
this.recordTransformerFunc = this.transformAuto
|
||||
}
|
||||
|
||||
return this, nil
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func (this *TransformerHistogram) Transform(
|
||||
inrecAndContext *types.RecordAndContext,
|
||||
outputChannel chan<- *types.RecordAndContext,
|
||||
) {
|
||||
this.recordTransformerFunc(inrecAndContext, outputChannel)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func (this *TransformerHistogram) transformNonAuto(
|
||||
inrecAndContext *types.RecordAndContext,
|
||||
outputChannel chan<- *types.RecordAndContext,
|
||||
) {
|
||||
if !inrecAndContext.EndOfStream {
|
||||
this.ingestNonAuto(inrecAndContext)
|
||||
} else {
|
||||
this.emitNonAuto(&inrecAndContext.Context, outputChannel)
|
||||
outputChannel <- inrecAndContext // end-of-stream marker
|
||||
}
|
||||
}
|
||||
|
||||
func (this *TransformerHistogram) ingestNonAuto(
|
||||
inrecAndContext *types.RecordAndContext,
|
||||
) {
|
||||
inrec := inrecAndContext.Record
|
||||
for _, valueFieldName := range this.valueFieldNames {
|
||||
stringValue := inrec.Get(valueFieldName)
|
||||
if stringValue != nil {
|
||||
floatValue, ok := stringValue.GetNumericToFloatValue()
|
||||
if !ok {
|
||||
fmt.Fprintf(
|
||||
os.Stderr,
|
||||
"%s %s: cannot parse \"%s\" as float.\n",
|
||||
lib.MlrExeName(), verbNameHistogram, stringValue.String(),
|
||||
)
|
||||
os.Exit(1)
|
||||
}
|
||||
if (floatValue >= this.lo) && (floatValue < this.hi) {
|
||||
idx := int((floatValue - this.lo) * this.mul)
|
||||
this.countsByField[valueFieldName][idx]++
|
||||
} else if floatValue == this.hi {
|
||||
idx := this.nbins - 1
|
||||
this.countsByField[valueFieldName][idx]++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (this *TransformerHistogram) emitNonAuto(
|
||||
endOfStreamContext *types.Context,
|
||||
outputChannel chan<- *types.RecordAndContext,
|
||||
) {
|
||||
countFieldNames := make(map[string]string)
|
||||
for _, valueFieldName := range this.valueFieldNames {
|
||||
countFieldNames[valueFieldName] = this.outputPrefix + valueFieldName + "_count"
|
||||
}
|
||||
for i := 0; i < this.nbins; i++ {
|
||||
outrec := types.NewMlrmapAsRecord()
|
||||
|
||||
outrec.PutReference(
|
||||
this.outputPrefix+"bin_lo",
|
||||
types.MlrvalPointerFromFloat64((this.lo+float64(i))/this.mul),
|
||||
)
|
||||
outrec.PutReference(
|
||||
this.outputPrefix+"bin_hi",
|
||||
types.MlrvalPointerFromFloat64((this.lo+float64(i+1))/this.mul),
|
||||
)
|
||||
|
||||
for _, valueFieldName := range this.valueFieldNames {
|
||||
outrec.PutReference(
|
||||
countFieldNames[valueFieldName],
|
||||
types.MlrvalPointerFromInt(this.countsByField[valueFieldName][i]),
|
||||
)
|
||||
}
|
||||
|
||||
outputChannel <- types.NewRecordAndContext(outrec, endOfStreamContext)
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func (this *TransformerHistogram) transformAuto(
|
||||
inrecAndContext *types.RecordAndContext,
|
||||
outputChannel chan<- *types.RecordAndContext,
|
||||
) {
|
||||
if !inrecAndContext.EndOfStream {
|
||||
this.ingestAuto(inrecAndContext)
|
||||
} else {
|
||||
this.emitAuto(&inrecAndContext.Context, outputChannel)
|
||||
outputChannel <- inrecAndContext // end-of-stream marker
|
||||
}
|
||||
}
|
||||
|
||||
func (this *TransformerHistogram) ingestAuto(
|
||||
inrecAndContext *types.RecordAndContext,
|
||||
) {
|
||||
inrec := inrecAndContext.Record
|
||||
for _, valueFieldName := range this.valueFieldNames {
|
||||
mvalue := inrec.Get(valueFieldName)
|
||||
if mvalue != nil {
|
||||
value := mvalue.GetNumericToFloatValueOrDie()
|
||||
this.vectorsByFieldName[valueFieldName] = append(this.vectorsByFieldName[valueFieldName], value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (this *TransformerHistogram) emitAuto(
|
||||
endOfStreamContext *types.Context,
|
||||
outputChannel chan<- *types.RecordAndContext,
|
||||
) {
|
||||
haveLoHi := false
|
||||
lo := 0.0
|
||||
hi := 1.0
|
||||
nbins := this.nbins
|
||||
|
||||
// Limits pass
|
||||
for _, valueFieldName := range this.valueFieldNames {
|
||||
vector := this.vectorsByFieldName[valueFieldName]
|
||||
n := len(vector)
|
||||
for i := 0; i < n; i++ {
|
||||
value := vector[i]
|
||||
if haveLoHi {
|
||||
if lo > value {
|
||||
lo = value
|
||||
}
|
||||
if hi < value {
|
||||
hi = value
|
||||
}
|
||||
} else {
|
||||
lo = value
|
||||
hi = value
|
||||
haveLoHi = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Binning pass
|
||||
mul := float64(nbins) / (hi - lo)
|
||||
for _, valueFieldName := range this.valueFieldNames {
|
||||
vector := this.vectorsByFieldName[valueFieldName]
|
||||
counts := this.countsByField[valueFieldName]
|
||||
lib.InternalCodingErrorIf(counts == nil)
|
||||
n := len(vector)
|
||||
for i := 0; i < n; i++ {
|
||||
value := vector[i]
|
||||
if (value >= lo) && (value < hi) {
|
||||
idx := int(((value - lo) * mul))
|
||||
counts[idx]++
|
||||
} else if value == hi {
|
||||
idx := nbins - 1
|
||||
counts[idx]++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Emission pass
|
||||
countFieldNames := make(map[string]string)
|
||||
for _, valueFieldName := range this.valueFieldNames {
|
||||
countFieldNames[valueFieldName] = this.outputPrefix + valueFieldName + "_count"
|
||||
}
|
||||
|
||||
for i := 0; i < nbins; i++ {
|
||||
outrec := types.NewMlrmapAsRecord()
|
||||
|
||||
outrec.PutReference(
|
||||
this.outputPrefix+"bin_lo",
|
||||
types.MlrvalPointerFromFloat64((lo+float64(i))/mul),
|
||||
)
|
||||
outrec.PutReference(
|
||||
this.outputPrefix+"bin_hi",
|
||||
types.MlrvalPointerFromFloat64((lo+float64(i+1))/mul),
|
||||
)
|
||||
|
||||
for _, valueFieldName := range this.valueFieldNames {
|
||||
outrec.PutReference(
|
||||
countFieldNames[valueFieldName],
|
||||
types.MlrvalPointerFromInt(this.countsByField[valueFieldName][i]),
|
||||
)
|
||||
}
|
||||
|
||||
outputChannel <- types.NewRecordAndContext(outrec, endOfStreamContext)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,8 @@
|
|||
package types
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"miller/src/lib"
|
||||
|
|
@ -132,6 +134,19 @@ func (this *Mlrval) GetNumericToFloatValue() (floatValue float64, isFloat bool)
|
|||
}
|
||||
}
|
||||
|
||||
func (this *Mlrval) GetNumericToFloatValueOrDie() (floatValue float64) {
|
||||
floatValue, ok := this.GetNumericToFloatValue()
|
||||
if !ok {
|
||||
fmt.Fprintf(
|
||||
os.Stderr,
|
||||
"%s: couldn't parse \"%s\" as number.",
|
||||
lib.MlrExeName(), this.String(),
|
||||
)
|
||||
os.Exit(1)
|
||||
}
|
||||
return floatValue
|
||||
}
|
||||
|
||||
func (this *Mlrval) GetBoolValue() (boolValue bool, isBool bool) {
|
||||
if this.mvtype == MT_BOOL {
|
||||
return this.boolval, true
|
||||
|
|
|
|||
|
|
@ -1,6 +1,10 @@
|
|||
================================================================
|
||||
TOP OF LIST:
|
||||
|
||||
* audit
|
||||
groupByFieldNames = cliutil.VerbGetStringArgOrDie
|
||||
-> cliutil.VerbGetStringArrayArgOrDie
|
||||
|
||||
* regexes
|
||||
o finish stats1 -r
|
||||
o regex captures ...
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue