From 77811a47882618821c79bf755f1926651101fcb0 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 23 Jan 2022 23:03:46 -0500 Subject: [PATCH] Sliding window averages (#894) * todo * Neaten existing DSL sketch * rebase on #893, and sketch * code-complete * build artifacts for previous commit * replace - with _ in shift and slwin --- docs/src/manpage.md | 10 +- docs/src/manpage.txt | 10 +- docs/src/reference-verbs.md | 8 +- internal/pkg/transformers/step.go | 270 ++++++++++++++++++++++-------- man/manpage.txt | 10 +- man/mlr.1 | 12 +- test/cases/cli-help/0001/expout | 8 +- test/cases/verb-step/0011/cmd | 2 +- test/cases/verb-step/0012/cmd | 2 +- test/cases/verb-step/0013/cmd | 2 +- test/cases/verb-step/0014/cmd | 2 +- test/cases/verb-step/0015/cmd | 2 +- test/cases/verb-step/0016/cmd | 2 +- test/cases/verb-step/0017/cmd | 1 + test/cases/verb-step/0017/experr | 0 test/cases/verb-step/0017/expout | 11 ++ test/cases/verb-step/0018/cmd | 1 + test/cases/verb-step/0018/experr | 0 test/cases/verb-step/0018/expout | 11 ++ test/cases/verb-step/0019/cmd | 1 + test/cases/verb-step/0019/experr | 0 test/cases/verb-step/0019/expout | 11 ++ test/cases/verb-step/0020/cmd | 1 + test/cases/verb-step/0020/experr | 0 test/cases/verb-step/0020/expout | 11 ++ test/cases/verb-step/0021/cmd | 1 + test/cases/verb-step/0021/experr | 0 test/cases/verb-step/0021/expout | 11 ++ test/cases/verb-step/0022/cmd | 1 + test/cases/verb-step/0022/experr | 0 test/cases/verb-step/0022/expout | 10 ++ test/cases/verb-step/0023/cmd | 1 + test/cases/verb-step/0023/experr | 0 test/cases/verb-step/0023/expout | 10 ++ test/input/window.mlr | 20 ++- test/input/window2.mlr | 5 + todo.txt | 12 +- 37 files changed, 351 insertions(+), 108 deletions(-) create mode 100644 test/cases/verb-step/0017/cmd create mode 100644 test/cases/verb-step/0017/experr create mode 100644 test/cases/verb-step/0017/expout create mode 100644 test/cases/verb-step/0018/cmd create mode 100644 test/cases/verb-step/0018/experr create mode 100644 test/cases/verb-step/0018/expout create mode 100644 test/cases/verb-step/0019/cmd create mode 100644 test/cases/verb-step/0019/experr create mode 100644 test/cases/verb-step/0019/expout create mode 100644 test/cases/verb-step/0020/cmd create mode 100644 test/cases/verb-step/0020/experr create mode 100644 test/cases/verb-step/0020/expout create mode 100644 test/cases/verb-step/0021/cmd create mode 100644 test/cases/verb-step/0021/experr create mode 100644 test/cases/verb-step/0021/expout create mode 100644 test/cases/verb-step/0022/cmd create mode 100644 test/cases/verb-step/0022/experr create mode 100644 test/cases/verb-step/0022/expout create mode 100644 test/cases/verb-step/0023/cmd create mode 100644 test/cases/verb-step/0023/experr create mode 100644 test/cases/verb-step/0023/expout diff --git a/docs/src/manpage.md b/docs/src/manpage.md index eb35c6611..f365dd707 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -1834,9 +1834,10 @@ VERBS from-first Compute differences in field(s) from first record ratio Compute ratios in field(s) between successive records rsum Compute running sums of field(s) between successive records - shift Alias for shift-lag - shift-lag Include value(s) in field(s) from the previous record, if any - shift-lead Include value(s) in field(s) from the next record, if any + shift Alias for shift_lag + shift_lag Include value(s) in field(s) from the previous record, if any + shift_lead Include value(s) in field(s) from the next record, if any + slwin Sliding-window averages over m records back and n forward. E.g. slwin-7-2 for 7 back and 2 forward. -f {a,b,c} Value-field names on which to compute statistics -g {d,e,f} Optional group-by-field names @@ -1859,6 +1860,7 @@ VERBS mlr step -a ewma -d 0.1,0.9 -f x,y mlr step -a ewma -d 0.1,0.9 -o smooth,rough -f x,y mlr step -a ewma -d 0.1,0.9 -o smooth,rough -f x,y -g group_name + mlr step -a slwin-9-0,slwin-0-9 -f x Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#filter or https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average @@ -3088,5 +3090,5 @@ SEE ALSO - 2022-01-23 MILLER(1) + 2022-01-24 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 62d3f38a6..5ac3ea50c 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1813,9 +1813,10 @@ VERBS from-first Compute differences in field(s) from first record ratio Compute ratios in field(s) between successive records rsum Compute running sums of field(s) between successive records - shift Alias for shift-lag - shift-lag Include value(s) in field(s) from the previous record, if any - shift-lead Include value(s) in field(s) from the next record, if any + shift Alias for shift_lag + shift_lag Include value(s) in field(s) from the previous record, if any + shift_lead Include value(s) in field(s) from the next record, if any + slwin Sliding-window averages over m records back and n forward. E.g. slwin-7-2 for 7 back and 2 forward. -f {a,b,c} Value-field names on which to compute statistics -g {d,e,f} Optional group-by-field names @@ -1838,6 +1839,7 @@ VERBS mlr step -a ewma -d 0.1,0.9 -f x,y mlr step -a ewma -d 0.1,0.9 -o smooth,rough -f x,y mlr step -a ewma -d 0.1,0.9 -o smooth,rough -f x,y -g group_name + mlr step -a slwin-9-0,slwin-0-9 -f x Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#filter or https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average @@ -3067,4 +3069,4 @@ SEE ALSO - 2022-01-23 MILLER(1) + 2022-01-24 MILLER(1) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 4d738c70a..22223df94 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -3289,9 +3289,10 @@ Options: from-first Compute differences in field(s) from first record ratio Compute ratios in field(s) between successive records rsum Compute running sums of field(s) between successive records - shift Alias for shift-lag - shift-lag Include value(s) in field(s) from the previous record, if any - shift-lead Include value(s) in field(s) from the next record, if any + shift Alias for shift_lag + shift_lag Include value(s) in field(s) from the previous record, if any + shift_lead Include value(s) in field(s) from the next record, if any + slwin Sliding-window averages over m records back and n forward. E.g. slwin-7-2 for 7 back and 2 forward. -f {a,b,c} Value-field names on which to compute statistics -g {d,e,f} Optional group-by-field names @@ -3314,6 +3315,7 @@ Examples: mlr step -a ewma -d 0.1,0.9 -f x,y mlr step -a ewma -d 0.1,0.9 -o smooth,rough -f x,y mlr step -a ewma -d 0.1,0.9 -o smooth,rough -f x,y -g group_name + mlr step -a slwin-9-0,slwin-0-9 -f x Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#filter or https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average diff --git a/internal/pkg/transformers/step.go b/internal/pkg/transformers/step.go index d4d716988..b174381aa 100644 --- a/internal/pkg/transformers/step.go +++ b/internal/pkg/transformers/step.go @@ -29,15 +29,15 @@ // circle,23,3 // // This is (rather, was) straightforward until we added the ability to do *forward* operations such -// as shift-lead. Namely: +// as shift_lead. Namely: // -// * If the stepper is shift-lead then output lags input by one, e.g. we emit the 10th record only +// * If the stepper is shift_lead then output lags input by one, e.g. we emit the 10th record only // after seeing the 11th. Likewise, for sliding-window average with look-forward of 4, we emit the // 10th record only after seeing the 14th. More generally, if there are multiple steppers // specified with -a, then the delay is the max of each stepper's look-forward. // // * Then we need to produce output at the end of the record stream -- e.g. if there are only 20 -// records and we're doing shift-lead, then we'd normally emit the 20th record only when the 21st +// records and we're doing shift_lead, then we'd normally emit the 20th record only when the 21st // is received -- but there isn't one. And we can't use a simple next-is-nil rule for the last // record received in the group-by case. For example, if a given record has shape=square and we're // grouping by shape, we don't know a priori where in the record stream the next record with @@ -46,16 +46,16 @@ // * If we keep a simple hashmap from grouping key to delayed records and process that at end of // record stream, since Go hashmaps don't preserve insertion order, we'd have non-deterministic // output ordering which would frustrate users and would also break automated regression tests. -// For example, doing shift-lead with the above sample data, the last square and circle record +// For example, doing shift_lead with the above sample data, the last square and circle record // could appear in either order. // // * For these reasons we have an ordered hashmap -- basically a mashup of hashmap and doubly linked // list -- of all "window" objects per grouping-key. // // * The window object is just the current record along with previous/next records as required by a -// given stepper. The shift-lag stepper keeps the previous and current record; when the 10th +// given stepper. The shift_lag stepper keeps the previous and current record; when the 10th // record is ingested, the previous is the 9th, and it emits the 10th record with a value from the -// 9th. The shift-lead stepper has a current and next. When the 11th record is ingested, the +// 9th. The shift_lead stepper has a current and next. When the 11th record is ingested, the // 'current' is the 10th record and the 'next' is the 11th, and it emits the 10th record with a // value from the 11th. // @@ -99,7 +99,7 @@ func transformerStepUsage( doExit bool, exitCode int, ) { - fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameStep) + fmt.Fprintf(o, "Usage: mlr %s [options]\n", verbNameStep) fmt.Fprintf(o, "Computes values dependent on earlier/later records, optionally grouped by category.\n") fmt.Fprintf(o, "Options:\n") @@ -120,7 +120,7 @@ func transformerStepUsage( fmt.Fprintf(o, "-d {x,y,z} Weights for EWMA. 1 means current sample gets all weight (no\n") fmt.Fprintf(o, " smoothing), near under under 1 is light smoothing, near over 0 is\n") fmt.Fprintf(o, " heavy smoothing. Multiple weights may be specified, e.g.\n") - fmt.Fprintf(o, " \"%s %s -a ewma -f sys_load -d 0.01,0.1,0.9\". Default if omitted\n", "mlr", verbNameStep) + fmt.Fprintf(o, " \"mlr %s -a ewma -f sys_load -d 0.01,0.1,0.9\". Default if omitted\n", verbNameStep) fmt.Fprintf(o, " is \"-d %s\".\n", DEFAULT_STRING_ALPHA) fmt.Fprintf(o, "-o {a,b,c} Custom suffixes for EWMA output fields. If omitted, these default to\n") @@ -130,11 +130,12 @@ func transformerStepUsage( fmt.Fprintf(o, "\n") fmt.Fprintf(o, "Examples:\n") - fmt.Fprintf(o, " %s %s -a rsum -f request_size\n", "mlr", verbNameStep) - fmt.Fprintf(o, " %s %s -a delta -f request_size -g hostname\n", "mlr", verbNameStep) - fmt.Fprintf(o, " %s %s -a ewma -d 0.1,0.9 -f x,y\n", "mlr", verbNameStep) - fmt.Fprintf(o, " %s %s -a ewma -d 0.1,0.9 -o smooth,rough -f x,y\n", "mlr", verbNameStep) - fmt.Fprintf(o, " %s %s -a ewma -d 0.1,0.9 -o smooth,rough -f x,y -g group_name\n", "mlr", verbNameStep) + fmt.Fprintf(o, " mlr %s -a rsum -f request_size\n", verbNameStep) + fmt.Fprintf(o, " mlr %s -a delta -f request_size -g hostname\n", verbNameStep) + fmt.Fprintf(o, " mlr %s -a ewma -d 0.1,0.9 -f x,y\n", verbNameStep) + fmt.Fprintf(o, " mlr %s -a ewma -d 0.1,0.9 -o smooth,rough -f x,y\n", verbNameStep) + fmt.Fprintf(o, " mlr %s -a ewma -d 0.1,0.9 -o smooth,rough -f x,y -g group_name\n", verbNameStep) + fmt.Fprintf(o, " mlr %s -a slwin-9-0,slwin-0-9 -f x\n", verbNameStep) fmt.Fprintf(o, "\n") fmt.Fprintf(o, "Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#filter or\n") @@ -243,8 +244,8 @@ type tStepLogEntry struct { recordAndContext *types.RecordAndContext windowKeeper *utils.TWindowKeeper // Map from value field name to stepper name to stepper. E.g. with 'mlr step -g a,b -f x,y -a - // shift-lag,shift-lead', value field names are 'x' and 'y', and stepper names are 'shift-lag' - // and 'shift-lead'. + // shift_lag,shift_lead', value field names are 'x' and 'y', and stepper names are 'shift_lag' + // and 'shift_lead'. steppers map[string]map[string]tStepper } @@ -365,7 +366,7 @@ func (tr *TransformerStep) Transform( } else { // As described in comments at the top of this file: process through all delayed-input - // records for shift-lead, forward-sliding-window, etc. steppers. + // records for shift_lead, forward-sliding-window, etc. steppers. for pe := tr.log.Head; pe != nil; pe = pe.Next { logEntry := pe.Value.(*tStepLogEntry) // Shift by one -- if 'current' is the 9th record and 'next' is 10th, and there's no @@ -380,7 +381,7 @@ func (tr *TransformerStep) Transform( } // handleRecord processes records received before the end of the record stream is seen. -// The records emitted here are the ones we can emit now. For example, with shift-lead, if the most +// The records emitted here are the ones we can emit now. For example, with shift_lead, if the most // recent input record is the 11th, then here we're emitting the 10th. At EOS, we'll drain any // delayed-input records in the order in which they were received. func (tr *TransformerStep) handleRecord( @@ -465,7 +466,7 @@ func (tr *TransformerStep) handleRecord( } // handleDrainRecord processes records received after the end of the record stream is seen. The -// records emitted here are the ones we couldn't emit before. For example, with shift-lead, if the +// records emitted here are the ones we couldn't emit before. For example, with shift_lead, if the // most recent input record is the 11th, then before EOS we emitted the 10th. Here, we'll drain any // delayed-input records in the order in which they were received. func (tr *TransformerStep) handleDrainRecord( @@ -494,14 +495,14 @@ func (tr *TransformerStep) handleDrainRecord( for _, stepperInput := range tr.stepperInputs { stepper, present := accFieldToAccState[stepperInput.name] lib.InternalCodingErrorIf(!present) - lib.InternalCodingErrorIf(windowKeeper.Get(0) == nil) stepper.process(windowKeeper) } } - lib.InternalCodingErrorIf(windowKeeper.Get(0) == nil) - outrecAndContext := windowKeeper.Get(0).(*types.RecordAndContext) - outputRecordsAndContexts.PushBack(outrecAndContext) + if windowKeeper.Get(0) != nil { + outrecAndContext := windowKeeper.Get(0).(*types.RecordAndContext) + outputRecordsAndContexts.PushBack(outrecAndContext) + } } // insertToLog remembers a delayed-input record so we can process it in the order it was received, @@ -521,7 +522,7 @@ func (tr *TransformerStep) insertToLog( }) } -// removeFromLog shifts records out of the log. For example, with shift-lead, we only have +// removeFromLog shifts records out of the log. For example, with shift_lead, we only have // look-forward of 1, so the log will only have one record per grouping key. func (tr *TransformerStep) removeFromLog( recordAndContext *types.RecordAndContext, @@ -546,7 +547,12 @@ type tStepperInputFromName func( stepperName string, ) *tStepperInput +type tOwnsPrefix func( + stepperName string, +) bool + type tStepperAllocator func( + stepperInput *tStepperInput, inputFieldName string, stringAlphas []string, ewmaSuffixes []string, @@ -564,6 +570,8 @@ type tStepper interface { type tStepperLookup struct { name string + nameIsVariable bool + ownsPrefix tOwnsPrefix stepperInputFromName tStepperInputFromName stepperAllocator tStepperAllocator desc string @@ -571,58 +579,66 @@ type tStepperLookup struct { var STEPPER_LOOKUP_TABLE = []tStepperLookup{ { - "counter", - stepperCounterInputFromName, - stepperCounterAlloc, - "Count instances of field(s) between successive records", + name: "counter", + stepperInputFromName: stepperCounterInputFromName, + stepperAllocator: stepperCounterAlloc, + desc: "Count instances of field(s) between successive records", }, { - "delta", - stepperDeltaInputFromName, - stepperDeltaAlloc, - "Compute differences in field(s) between successive records", + name: "delta", + stepperInputFromName: stepperDeltaInputFromName, + stepperAllocator: stepperDeltaAlloc, + desc: "Compute differences in field(s) between successive records", }, { - "ewma", - stepperEWMAInputFromName, - stepperEWMAAlloc, - "Exponentially weighted moving average over successive records", + name: "ewma", + stepperInputFromName: stepperEWMAInputFromName, + stepperAllocator: stepperEWMAAlloc, + desc: "Exponentially weighted moving average over successive records", }, { - "from-first", - stepperFromFirstInputFromName, - stepperFromFirstAlloc, - "Compute differences in field(s) from first record", + name: "from-first", + stepperInputFromName: stepperFromFirstInputFromName, + stepperAllocator: stepperFromFirstAlloc, + desc: "Compute differences in field(s) from first record", }, { - "ratio", - stepperRatioInputFromName, - stepperRatioAlloc, - "Compute ratios in field(s) between successive records", + name: "ratio", + stepperInputFromName: stepperRatioInputFromName, + stepperAllocator: stepperRatioAlloc, + desc: "Compute ratios in field(s) between successive records", }, { - "rsum", - stepperRsumInputFromName, - stepperRsumAlloc, - "Compute running sums of field(s) between successive records", + name: "rsum", + stepperInputFromName: stepperRsumInputFromName, + stepperAllocator: stepperRsumAlloc, + desc: "Compute running sums of field(s) between successive records", }, { - "shift", - stepperShiftInputFromName, - stepperShiftAlloc, - "Alias for shift-lag", + name: "shift", + stepperInputFromName: stepperShiftInputFromName, + stepperAllocator: stepperShiftAlloc, + desc: "Alias for shift_lag", }, { - "shift-lag", - stepperShiftLagInputFromName, - stepperShiftLagAlloc, - "Include value(s) in field(s) from the previous record, if any", + name: "shift_lag", + stepperInputFromName: stepperShiftLagInputFromName, + stepperAllocator: stepperShiftLagAlloc, + desc: "Include value(s) in field(s) from the previous record, if any", }, { - "shift-lead", - stepperShiftLeadInputFromName, - stepperShiftLeadAlloc, - "Include value(s) in field(s) from the next record, if any", + name: "shift_lead", + stepperInputFromName: stepperShiftLeadInputFromName, + stepperAllocator: stepperShiftLeadAlloc, + desc: "Include value(s) in field(s) from the next record, if any", + }, + { + name: "slwin", + nameIsVariable: true, + ownsPrefix: stepperSlwintOwnsPrefix, + stepperInputFromName: stepperSlwinInputFromName, + stepperAllocator: stepperSlwinAlloc, + desc: "Sliding-window averages over m records back and n forward. E.g. slwin-7-2 for 7 back and 2 forward.", }, } @@ -630,8 +646,15 @@ func stepperInputFromName( name string, ) *tStepperInput { for _, stepperLookup := range STEPPER_LOOKUP_TABLE { - if stepperLookup.name == name { - return stepperLookup.stepperInputFromName(name) + if stepperLookup.nameIsVariable { + stepperInput := stepperLookup.stepperInputFromName(name) + if stepperInput != nil { + return stepperInput + } + } else { + if stepperLookup.name == name { + return stepperLookup.stepperInputFromName(name) + } } } return nil @@ -644,12 +667,24 @@ func allocateStepper( ewmaSuffixes []string, ) tStepper { for _, stepperLookup := range STEPPER_LOOKUP_TABLE { - if stepperLookup.name == stepperInput.name { - return stepperLookup.stepperAllocator( - inputFieldName, - stringAlphas, - ewmaSuffixes, - ) + if stepperLookup.nameIsVariable { + if stepperLookup.ownsPrefix(stepperInput.name) { + return stepperLookup.stepperAllocator( + stepperInput, + inputFieldName, + stringAlphas, + ewmaSuffixes, + ) + } + } else { + if stepperLookup.name == stepperInput.name { + return stepperLookup.stepperAllocator( + stepperInput, + inputFieldName, + stringAlphas, + ewmaSuffixes, + ) + } } } return nil @@ -675,6 +710,7 @@ func stepperDeltaInputFromName( } func stepperDeltaAlloc( + stepperInput *tStepperInput, inputFieldName string, _unused1 []string, _unused2 []string, @@ -742,6 +778,7 @@ func stepperShiftLagInputFromName( } func stepperShiftAlloc( + stepperInput *tStepperInput, inputFieldName string, _unused1 []string, _unused2 []string, @@ -753,6 +790,7 @@ func stepperShiftAlloc( } func stepperShiftLagAlloc( + stepperInput *tStepperInput, inputFieldName string, _unused1 []string, _unused2 []string, @@ -805,6 +843,7 @@ func stepperShiftLeadInputFromName( } func stepperShiftLeadAlloc( + stepperInput *tStepperInput, inputFieldName string, _unused1 []string, _unused2 []string, @@ -856,6 +895,7 @@ func stepperFromFirstInputFromName( } func stepperFromFirstAlloc( + stepperInput *tStepperInput, inputFieldName string, _unused1 []string, _unused2 []string, @@ -904,6 +944,7 @@ func stepperRatioInputFromName( } func stepperRatioAlloc( + stepperInput *tStepperInput, inputFieldName string, _unused1 []string, _unused2 []string, @@ -961,6 +1002,7 @@ func stepperRsumInputFromName( } func stepperRsumAlloc( + stepperInput *tStepperInput, inputFieldName string, _unused1 []string, _unused2 []string, @@ -1009,6 +1051,7 @@ func stepperCounterInputFromName( } func stepperCounterAlloc( + stepperInput *tStepperInput, inputFieldName string, _unused1 []string, _unused2 []string, @@ -1062,6 +1105,7 @@ func stepperEWMAInputFromName( } func stepperEWMAAlloc( + stepperInput *tStepperInput, inputFieldName string, stringAlphas []string, ewmaSuffixes []string, @@ -1138,3 +1182,95 @@ func (stepper *tStepperEWMA) process( } } } + +// ================================================================ +type tStepperSlwin struct { + inputFieldName string + numRecordsBackward int + numRecordsForward int + outputFieldName string +} + +func stepperSlwintOwnsPrefix( + stepperName string, +) bool { + return strings.HasPrefix(stepperName, "slwin") +} + +func stepperSlwinInputFromName( + stepperName string, +) *tStepperInput { + var numRecordsBackward, numRecordsForward int + n, err := fmt.Sscanf(stepperName, "slwin_%d_%d", &numRecordsBackward, &numRecordsForward) + if n == 2 && err == nil { + if numRecordsBackward < 0 || numRecordsForward < 0 { + fmt.Fprintf( + os.Stderr, + "mlr %s: stepper needed non-negative num-backward & num-forward in %s.\n", + verbNameStep, + stepperName, + ) + os.Exit(1) + } + return &tStepperInput{ + name: stepperName, + numRecordsBackward: numRecordsBackward, // doesn't use record-windowing; retains its own accumulators + numRecordsForward: numRecordsForward, + } + } else { + return nil + } +} + +func stepperSlwinAlloc( + stepperInput *tStepperInput, + inputFieldName string, + _unused1 []string, + _unused2 []string, +) tStepper { + nb := stepperInput.numRecordsBackward + nf := stepperInput.numRecordsForward + return &tStepperSlwin{ + inputFieldName: inputFieldName, + outputFieldName: fmt.Sprintf("%s_%d_%d", inputFieldName, nb, nf), + numRecordsBackward: nb, + numRecordsForward: nf, + } +} + +func (stepper *tStepperSlwin) process( + windowKeeper *utils.TWindowKeeper, +) { + count := 0 + sum := mlrval.FromFloat(0.0) + for i := -stepper.numRecordsBackward; i <= stepper.numRecordsForward; i++ { + irac := windowKeeper.Get(i) + if irac == nil { + continue + } + rac := irac.(*types.RecordAndContext) + rec := rac.Record + val := rec.Get(stepper.inputFieldName) + if val.IsVoid() { + continue + } + sum = bifs.BIF_plus_binary(sum, val) + count++ + } + + icur := windowKeeper.Get(0) + if icur == nil { + return + } + currac := icur.(*types.RecordAndContext) + currec := currac.Record + + if count == 0 { + currec.PutCopy(stepper.outputFieldName, mlrval.VOID) + } else { + currec.PutReference( + stepper.outputFieldName, + bifs.BIF_divide(sum, mlrval.FromInt(count)), + ) + } +} diff --git a/man/manpage.txt b/man/manpage.txt index 62d3f38a6..5ac3ea50c 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -1813,9 +1813,10 @@ VERBS from-first Compute differences in field(s) from first record ratio Compute ratios in field(s) between successive records rsum Compute running sums of field(s) between successive records - shift Alias for shift-lag - shift-lag Include value(s) in field(s) from the previous record, if any - shift-lead Include value(s) in field(s) from the next record, if any + shift Alias for shift_lag + shift_lag Include value(s) in field(s) from the previous record, if any + shift_lead Include value(s) in field(s) from the next record, if any + slwin Sliding-window averages over m records back and n forward. E.g. slwin-7-2 for 7 back and 2 forward. -f {a,b,c} Value-field names on which to compute statistics -g {d,e,f} Optional group-by-field names @@ -1838,6 +1839,7 @@ VERBS mlr step -a ewma -d 0.1,0.9 -f x,y mlr step -a ewma -d 0.1,0.9 -o smooth,rough -f x,y mlr step -a ewma -d 0.1,0.9 -o smooth,rough -f x,y -g group_name + mlr step -a slwin-9-0,slwin-0-9 -f x Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#filter or https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average @@ -3067,4 +3069,4 @@ SEE ALSO - 2022-01-23 MILLER(1) + 2022-01-24 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index e40a79860..f241604d3 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2022-01-23 +.\" Date: 2022-01-24 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2022-01-23" "\ \&" "\ \&" +.TH "MILLER" "1" "2022-01-24" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2282,9 +2282,10 @@ Options: from-first Compute differences in field(s) from first record ratio Compute ratios in field(s) between successive records rsum Compute running sums of field(s) between successive records - shift Alias for shift-lag - shift-lag Include value(s) in field(s) from the previous record, if any - shift-lead Include value(s) in field(s) from the next record, if any + shift Alias for shift_lag + shift_lag Include value(s) in field(s) from the previous record, if any + shift_lead Include value(s) in field(s) from the next record, if any + slwin Sliding-window averages over m records back and n forward. E.g. slwin-7-2 for 7 back and 2 forward. -f {a,b,c} Value-field names on which to compute statistics -g {d,e,f} Optional group-by-field names @@ -2307,6 +2308,7 @@ Examples: mlr step -a ewma -d 0.1,0.9 -f x,y mlr step -a ewma -d 0.1,0.9 -o smooth,rough -f x,y mlr step -a ewma -d 0.1,0.9 -o smooth,rough -f x,y -g group_name + mlr step -a slwin-9-0,slwin-0-9 -f x Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#filter or https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index 0cf5dbc8b..02ffc0235 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -1030,9 +1030,10 @@ Options: from-first Compute differences in field(s) from first record ratio Compute ratios in field(s) between successive records rsum Compute running sums of field(s) between successive records - shift Alias for shift-lag - shift-lag Include value(s) in field(s) from the previous record, if any - shift-lead Include value(s) in field(s) from the next record, if any + shift Alias for shift_lag + shift_lag Include value(s) in field(s) from the previous record, if any + shift_lead Include value(s) in field(s) from the next record, if any + slwin Sliding-window averages over m records back and n forward. E.g. slwin-7-2 for 7 back and 2 forward. -f {a,b,c} Value-field names on which to compute statistics -g {d,e,f} Optional group-by-field names @@ -1055,6 +1056,7 @@ Examples: mlr step -a ewma -d 0.1,0.9 -f x,y mlr step -a ewma -d 0.1,0.9 -o smooth,rough -f x,y mlr step -a ewma -d 0.1,0.9 -o smooth,rough -f x,y -g group_name + mlr step -a slwin-9-0,slwin-0-9 -f x Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#filter or https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average diff --git a/test/cases/verb-step/0011/cmd b/test/cases/verb-step/0011/cmd index 5db891f3b..18800637d 100644 --- a/test/cases/verb-step/0011/cmd +++ b/test/cases/verb-step/0011/cmd @@ -1 +1 @@ -mlr --opprint --from test/input/abixy step -a shift-lag -f i +mlr --opprint --from test/input/abixy step -a shift_lag -f i diff --git a/test/cases/verb-step/0012/cmd b/test/cases/verb-step/0012/cmd index 6b3cf913a..32ce93745 100644 --- a/test/cases/verb-step/0012/cmd +++ b/test/cases/verb-step/0012/cmd @@ -1 +1 @@ -mlr --opprint --from test/input/abixy step -a shift-lead -f i +mlr --opprint --from test/input/abixy step -a shift_lead -f i diff --git a/test/cases/verb-step/0013/cmd b/test/cases/verb-step/0013/cmd index 36da8ed39..a401023e0 100644 --- a/test/cases/verb-step/0013/cmd +++ b/test/cases/verb-step/0013/cmd @@ -1 +1 @@ -mlr --opprint --from test/input/abixy step -a shift-lag,shift-lead -f i +mlr --opprint --from test/input/abixy step -a shift_lag,shift_lead -f i diff --git a/test/cases/verb-step/0014/cmd b/test/cases/verb-step/0014/cmd index 3b41e69a0..1e051e5f3 100644 --- a/test/cases/verb-step/0014/cmd +++ b/test/cases/verb-step/0014/cmd @@ -1 +1 @@ -mlr --opprint --from test/input/abixy step -a shift-lag -f i -g a then sort -f a +mlr --opprint --from test/input/abixy step -a shift_lag -f i -g a then sort -f a diff --git a/test/cases/verb-step/0015/cmd b/test/cases/verb-step/0015/cmd index 843fd66a2..c40b25c96 100644 --- a/test/cases/verb-step/0015/cmd +++ b/test/cases/verb-step/0015/cmd @@ -1 +1 @@ -mlr --opprint --from test/input/abixy step -a shift-lead -f i -g a then sort -f a +mlr --opprint --from test/input/abixy step -a shift_lead -f i -g a then sort -f a diff --git a/test/cases/verb-step/0016/cmd b/test/cases/verb-step/0016/cmd index 5f1691591..19dc6432e 100644 --- a/test/cases/verb-step/0016/cmd +++ b/test/cases/verb-step/0016/cmd @@ -1 +1 @@ -mlr --opprint --from test/input/abixy step -a shift-lag,shift-lead -f i -g a then sort -f a +mlr --opprint --from test/input/abixy step -a shift_lag,shift_lead -f i -g a then sort -f a diff --git a/test/cases/verb-step/0017/cmd b/test/cases/verb-step/0017/cmd new file mode 100644 index 000000000..9af642f2b --- /dev/null +++ b/test/cases/verb-step/0017/cmd @@ -0,0 +1 @@ +mlr --opprint --from test/input/abixy step -a slwin_1_0 -f i diff --git a/test/cases/verb-step/0017/experr b/test/cases/verb-step/0017/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-step/0017/expout b/test/cases/verb-step/0017/expout new file mode 100644 index 000000000..4c2ca8069 --- /dev/null +++ b/test/cases/verb-step/0017/expout @@ -0,0 +1,11 @@ +a b i x y i_1_0 +pan pan 1 0.3467901443380824 0.7268028627434533 1 +eks pan 2 0.7586799647899636 0.5221511083334797 1.5 +wye wye 3 0.20460330576630303 0.33831852551664776 2.5 +eks wye 4 0.38139939387114097 0.13418874328430463 3.5 +wye pan 5 0.5732889198020006 0.8636244699032729 4.5 +zee pan 6 0.5271261600918548 0.49322128674835697 5.5 +eks zee 7 0.6117840605678454 0.1878849191181694 6.5 +zee wye 8 0.5985540091064224 0.976181385699006 7.5 +hat wye 9 0.03144187646093577 0.7495507603507059 8.5 +pan wye 10 0.5026260055412137 0.9526183602969864 9.5 diff --git a/test/cases/verb-step/0018/cmd b/test/cases/verb-step/0018/cmd new file mode 100644 index 000000000..33c32a286 --- /dev/null +++ b/test/cases/verb-step/0018/cmd @@ -0,0 +1 @@ +mlr --opprint --from test/input/abixy step -a slwin_0_1 -f i diff --git a/test/cases/verb-step/0018/experr b/test/cases/verb-step/0018/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-step/0018/expout b/test/cases/verb-step/0018/expout new file mode 100644 index 000000000..cd85eff80 --- /dev/null +++ b/test/cases/verb-step/0018/expout @@ -0,0 +1,11 @@ +a b i x y i_0_1 +pan pan 1 0.3467901443380824 0.7268028627434533 1.5 +eks pan 2 0.7586799647899636 0.5221511083334797 2.5 +wye wye 3 0.20460330576630303 0.33831852551664776 3.5 +eks wye 4 0.38139939387114097 0.13418874328430463 4.5 +wye pan 5 0.5732889198020006 0.8636244699032729 5.5 +zee pan 6 0.5271261600918548 0.49322128674835697 6.5 +eks zee 7 0.6117840605678454 0.1878849191181694 7.5 +zee wye 8 0.5985540091064224 0.976181385699006 8.5 +hat wye 9 0.03144187646093577 0.7495507603507059 9.5 +pan wye 10 0.5026260055412137 0.9526183602969864 10 diff --git a/test/cases/verb-step/0019/cmd b/test/cases/verb-step/0019/cmd new file mode 100644 index 000000000..79e489c31 --- /dev/null +++ b/test/cases/verb-step/0019/cmd @@ -0,0 +1 @@ +mlr --opprint --from test/input/abixy step -a shift_lag,slwin_0_3 -f i diff --git a/test/cases/verb-step/0019/experr b/test/cases/verb-step/0019/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-step/0019/expout b/test/cases/verb-step/0019/expout new file mode 100644 index 000000000..60de1cd88 --- /dev/null +++ b/test/cases/verb-step/0019/expout @@ -0,0 +1,11 @@ +a b i x y i_shift_lag i_0_3 +pan pan 1 0.3467901443380824 0.7268028627434533 - 2.5 +eks pan 2 0.7586799647899636 0.5221511083334797 1 3.5 +wye wye 3 0.20460330576630303 0.33831852551664776 2 4.5 +eks wye 4 0.38139939387114097 0.13418874328430463 3 5.5 +wye pan 5 0.5732889198020006 0.8636244699032729 4 6.5 +zee pan 6 0.5271261600918548 0.49322128674835697 5 7.5 +eks zee 7 0.6117840605678454 0.1878849191181694 6 8.5 +zee wye 8 0.5985540091064224 0.976181385699006 7 9 +hat wye 9 0.03144187646093577 0.7495507603507059 8 9.5 +pan wye 10 0.5026260055412137 0.9526183602969864 9 10 diff --git a/test/cases/verb-step/0020/cmd b/test/cases/verb-step/0020/cmd new file mode 100644 index 000000000..38d7bba6b --- /dev/null +++ b/test/cases/verb-step/0020/cmd @@ -0,0 +1 @@ +mlr --opprint --from test/input/abixy step -a slwin_2_1 -f i -g a then sort -f a diff --git a/test/cases/verb-step/0020/experr b/test/cases/verb-step/0020/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-step/0020/expout b/test/cases/verb-step/0020/expout new file mode 100644 index 000000000..0d9a40591 --- /dev/null +++ b/test/cases/verb-step/0020/expout @@ -0,0 +1,11 @@ +a b i x y i_2_1 +eks pan 2 0.7586799647899636 0.5221511083334797 3 +eks wye 4 0.38139939387114097 0.13418874328430463 4.333333333333333 +eks zee 7 0.6117840605678454 0.1878849191181694 4.333333333333333 +hat wye 9 0.03144187646093577 0.7495507603507059 9 +pan pan 1 0.3467901443380824 0.7268028627434533 5.5 +pan wye 10 0.5026260055412137 0.9526183602969864 5.5 +wye wye 3 0.20460330576630303 0.33831852551664776 4 +wye pan 5 0.5732889198020006 0.8636244699032729 4 +zee pan 6 0.5271261600918548 0.49322128674835697 7 +zee wye 8 0.5985540091064224 0.976181385699006 7 diff --git a/test/cases/verb-step/0021/cmd b/test/cases/verb-step/0021/cmd new file mode 100644 index 000000000..258723715 --- /dev/null +++ b/test/cases/verb-step/0021/cmd @@ -0,0 +1 @@ +mlr --opprint --from test/input/abixy step -a shift_lead,slwin_2_1 -f i -g a then sort -f a diff --git a/test/cases/verb-step/0021/experr b/test/cases/verb-step/0021/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-step/0021/expout b/test/cases/verb-step/0021/expout new file mode 100644 index 000000000..d0fb367ec --- /dev/null +++ b/test/cases/verb-step/0021/expout @@ -0,0 +1,11 @@ +a b i x y i_shift_lead i_2_1 +eks pan 2 0.7586799647899636 0.5221511083334797 4 3 +eks wye 4 0.38139939387114097 0.13418874328430463 7 4.333333333333333 +eks zee 7 0.6117840605678454 0.1878849191181694 - 4.333333333333333 +hat wye 9 0.03144187646093577 0.7495507603507059 - 9 +pan pan 1 0.3467901443380824 0.7268028627434533 10 5.5 +pan wye 10 0.5026260055412137 0.9526183602969864 - 5.5 +wye wye 3 0.20460330576630303 0.33831852551664776 5 4 +wye pan 5 0.5732889198020006 0.8636244699032729 - 4 +zee pan 6 0.5271261600918548 0.49322128674835697 8 7 +zee wye 8 0.5985540091064224 0.976181385699006 - 7 diff --git a/test/cases/verb-step/0022/cmd b/test/cases/verb-step/0022/cmd new file mode 100644 index 000000000..73cc1c511 --- /dev/null +++ b/test/cases/verb-step/0022/cmd @@ -0,0 +1 @@ +mlr --opprint --from test/input/abixy step -a slwin_2_0,slwin_1_1,slwin_0_2 -f i -g a then sort -f a diff --git a/test/cases/verb-step/0022/experr b/test/cases/verb-step/0022/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-step/0022/expout b/test/cases/verb-step/0022/expout new file mode 100644 index 000000000..b5f968f16 --- /dev/null +++ b/test/cases/verb-step/0022/expout @@ -0,0 +1,10 @@ +a b i x y i_2_0 i_1_1 i_0_2 +eks pan 2 0.7586799647899636 0.5221511083334797 2 3 4.333333333333333 +eks wye 4 0.38139939387114097 0.13418874328430463 3 4.333333333333333 5.5 +eks zee 7 0.6117840605678454 0.1878849191181694 4.333333333333333 5.5 7 +pan pan 1 0.3467901443380824 0.7268028627434533 1 5.5 5.5 +pan wye 10 0.5026260055412137 0.9526183602969864 5.5 5.5 10 +wye wye 3 0.20460330576630303 0.33831852551664776 3 4 4 +wye pan 5 0.5732889198020006 0.8636244699032729 4 4 5 +zee pan 6 0.5271261600918548 0.49322128674835697 6 7 7 +zee wye 8 0.5985540091064224 0.976181385699006 7 7 8 diff --git a/test/cases/verb-step/0023/cmd b/test/cases/verb-step/0023/cmd new file mode 100644 index 000000000..73cc1c511 --- /dev/null +++ b/test/cases/verb-step/0023/cmd @@ -0,0 +1 @@ +mlr --opprint --from test/input/abixy step -a slwin_2_0,slwin_1_1,slwin_0_2 -f i -g a then sort -f a diff --git a/test/cases/verb-step/0023/experr b/test/cases/verb-step/0023/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-step/0023/expout b/test/cases/verb-step/0023/expout new file mode 100644 index 000000000..b5f968f16 --- /dev/null +++ b/test/cases/verb-step/0023/expout @@ -0,0 +1,10 @@ +a b i x y i_2_0 i_1_1 i_0_2 +eks pan 2 0.7586799647899636 0.5221511083334797 2 3 4.333333333333333 +eks wye 4 0.38139939387114097 0.13418874328430463 3 4.333333333333333 5.5 +eks zee 7 0.6117840605678454 0.1878849191181694 4.333333333333333 5.5 7 +pan pan 1 0.3467901443380824 0.7268028627434533 1 5.5 5.5 +pan wye 10 0.5026260055412137 0.9526183602969864 5.5 5.5 10 +wye wye 3 0.20460330576630303 0.33831852551664776 3 4 4 +wye pan 5 0.5732889198020006 0.8636244699032729 4 4 5 +zee pan 6 0.5271261600918548 0.49322128674835697 6 7 7 +zee wye 8 0.5985540091064224 0.976181385699006 7 7 8 diff --git a/test/input/window.mlr b/test/input/window.mlr index d53bf2411..4c4d38ebc 100644 --- a/test/input/window.mlr +++ b/test/input/window.mlr @@ -1,3 +1,7 @@ +# ================================================================ +# Sliding average with window over n previous rows and current row. +# ================================================================ + begin { # INPUT PARAMETERS # They can do 'mlr put -s window_size=4 -s input_field_names=x,y ...' @@ -7,17 +11,17 @@ begin { # In Miller 6 (Go port) we'll have arrays and you'll be able to do # @input_field_names = ["x", "y"]. if (is_absent(@input_field_names)) { - @input_field_names = splitnv("x,y", ",") + @input_field_names = splitax("x,y", ",") } else { - @input_field_names = splitnv(@input_field_names, ",") + @input_field_names = splitax(@input_field_names, ",") } # INITIALIZATION @output_field_names = {}; - for (_, name in @input_field_names) { + for (name in @input_field_names) { @output_field_names[name] = name . "_avg"; } - for (_, name in @input_field_names) { + for (name in @input_field_names) { for (i = 0; i < @window_size; i+=1) { @windows[name][i] = 0; } @@ -26,19 +30,19 @@ begin { } # Slide the windows -for (_, name in @input_field_names) { +for (name in @input_field_names) { for (i = 1; i < @window_size; i+=1) { @windows[name][i-1] = @windows[name][i]; } } # Update the windows with new data -for (_, name in @input_field_names) { +for (name in @input_field_names) { @windows[name][@window_size - 1] = $[name]; } # Compute the averages sums = {}; -for (_, name in @input_field_names) { +for (name in @input_field_names) { for (i = 0; i < @window_size; i+=1) { sums[name] += @windows[name][i]; } @@ -47,6 +51,6 @@ denominator = @window_size; if (NR < @window_size) { denominator = NR } -for (_, name in @input_field_names) { +for (name in @input_field_names) { $[@output_field_names[name]] = sums[name] / denominator; } diff --git a/test/input/window2.mlr b/test/input/window2.mlr index 3cc824f94..a55b8d97b 100644 --- a/test/input/window2.mlr +++ b/test/input/window2.mlr @@ -1,3 +1,8 @@ +# ================================================================ +# Sliding average with window over m previous rows, current row, and +# n subsequent rows. +# ================================================================ + begin { # Input parameters # They can do 'mlr put -s input_field_names=x,y ...' diff --git a/todo.txt b/todo.txt index ac007f6b1..7f0239615 100644 --- a/todo.txt +++ b/todo.txt @@ -29,8 +29,10 @@ a-b.go -> a_b.go renamer PR ---------------------------------------------------------------- ! sliding window / moving average - o port u/window*.mlr from mlrc to mlr (actually, fix mlr of course) - o sliding-window averages into mapper step (C + Go) + i https://github.com/johnkerl/miller/issues/362 + i $ ~/git/johnkerl/scripts-math/stats/cump 0.9 3 100 > ccump.dat + $ pgr -nc -n -p -ms 5 ccump.dat & + o new example entry, with ccump and pgr ! make a lag-by-n and lead-by-n @@ -97,11 +99,11 @@ a-b.go -> a_b.go renamer PR ================================================================ UX +* ?xyz and ??xyz in repl, for :help and :help find respectively + ! bnf fix for '[[' ']]' etc -- make it a nesting of singles. since otherwise no '[[3,4]]' literals :( ! broadly rethink os.Exit, especially as affecting mlr repl -* ?xyz and ??xyz in repl, for :help and :help find respectively - * consider expanding '(error)' to have more useful error-text * sync-print option; or (yuck) another xprint variant; or ...; emph dump/eprint * strptime w/ ...00.Z -> error @@ -183,6 +185,8 @@ w contact re https://jsonlines.org/on_the_web/ =============================================================== TESTING +! ./mlr vs mlr ... + ! pos/neg 0x/0b/0o UTs * RT ngrams.sh -v -o 1 one-word-list.txt