Add mlr step -a rprod for running products (#1228)

* mlr step -a rprod

* artifacts from `make dev`

* codespell
This commit is contained in:
John Kerl 2023-03-12 13:32:23 -04:00 committed by GitHub
parent 748a908b7a
commit bfc8ab5ce2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 120 additions and 41 deletions

View file

@ -63,7 +63,7 @@ regression-test: build
# ----------------------------------------------------------------
# Formatting
# go fmt ./... finds experimental C files which we want to ignore.
fmt:
fmt format:
-go fmt ./cmd/...
-go fmt ./internal/pkg/...
-go fmt ./regression_test.go

View file

@ -1933,6 +1933,7 @@ MILLER(1) MILLER(1)
ewma Exponentially weighted moving average over successive records
from-first Compute differences in field(s) from first record
ratio Compute ratios in field(s) between successive records
rprod Compute running products of field(s) between successive records
rsum Compute running sums of field(s) between successive records
shift Alias for shift_lag
shift_lag Include value(s) in field(s) from the previous record, if any
@ -3337,5 +3338,5 @@ MILLER(1) MILLER(1)
2023-03-06 MILLER(1)
2023-03-12 MILLER(1)
</pre>

View file

@ -1912,6 +1912,7 @@ MILLER(1) MILLER(1)
ewma Exponentially weighted moving average over successive records
from-first Compute differences in field(s) from first record
ratio Compute ratios in field(s) between successive records
rprod Compute running products of field(s) between successive records
rsum Compute running sums of field(s) between successive records
shift Alias for shift_lag
shift_lag Include value(s) in field(s) from the previous record, if any
@ -3316,4 +3317,4 @@ MILLER(1) MILLER(1)
2023-03-06 MILLER(1)
2023-03-12 MILLER(1)

View file

@ -261,4 +261,4 @@ Miller has the verbs
[`json-stringify`](reference-verbs.md#json-stringify), and the DSL functions
[`json_parse`](reference-dsl-builtin-functions.md#json_parse) and
[`json_stringify`](reference-dsl-builtin-functions.md#json_stringify).
In some other lannguages these are called `json_decode` and `json_encode`.
In some other languages these are called `json_decode` and `json_encode`.

View file

@ -219,4 +219,4 @@ Miller has the verbs
[`json-stringify`](reference-verbs.md#json-stringify), and the DSL functions
[`json_parse`](reference-dsl-builtin-functions.md#json_parse) and
[`json_stringify`](reference-dsl-builtin-functions.md#json_stringify).
In some other lannguages these are called `json_decode` and `json_encode`.
In some other languages these are called `json_decode` and `json_encode`.

View file

@ -20,14 +20,14 @@ Verbs are the building blocks of how you can use Miller to process your data.
When you type
<pre class="pre-highlight-in-pair">
<b>mlr --icsv --opprint sort -n quanity then head -n 4 example.csv</b>
<b>mlr --icsv --opprint sort -n quantity then head -n 4 example.csv</b>
</pre>
<pre class="pre-non-highlight-in-pair">
color shape flag k index quantity rate
yellow triangle true 1 11 43.6498 9.8870
red square true 2 15 79.2778 0.0130
red circle true 3 16 13.8103 2.9010
red square false 4 48 77.5542 7.4670
color shape flag k index quantity rate
red circle true 3 16 13.8103 2.9010
yellow triangle true 1 11 43.6498 9.8870
yellow circle true 9 87 63.5058 8.3350
yellow circle true 8 73 63.9785 4.2370
</pre>
the `sort` and `head` bits are _verbs_. See the [Miller command
@ -3404,6 +3404,7 @@ Options:
ewma Exponentially weighted moving average over successive records
from-first Compute differences in field(s) from first record
ratio Compute ratios in field(s) between successive records
rprod Compute running products of field(s) between successive records
rsum Compute running sums of field(s) between successive records
shift Alias for shift_lag
shift_lag Include value(s) in field(s) from the previous record, if any

View file

@ -4,7 +4,7 @@ Verbs are the building blocks of how you can use Miller to process your data.
When you type
GENMD-RUN-COMMAND
mlr --icsv --opprint sort -n quanity then head -n 4 example.csv
mlr --icsv --opprint sort -n quantity then head -n 4 example.csv
GENMD-EOF
the `sort` and `head` bits are _verbs_. See the [Miller command

View file

@ -90,7 +90,7 @@ func processToStdout(
//
// I could have implemented this with a single construction of the transformers
// and having each transformers implement a Reset() method. However, having
// effectively two initalizers per transformers -- constructor and reset method
// effectively two initializers per transformers -- constructor and reset method
// -- I'd surely miss some logic somewhere. With in-place mode being a less
// frequently used code path, this would likely lead to latent bugs. So this
// approach leads to greater code stability.

View file

@ -604,6 +604,12 @@ var STEPPER_LOOKUP_TABLE = []tStepperLookup{
stepperAllocator: stepperRatioAlloc,
desc: "Compute ratios in field(s) between successive records",
},
{
name: "rprod",
stepperInputFromName: stepperRprodInputFromName,
stepperAllocator: stepperRprodAlloc,
desc: "Compute running products of field(s) between successive records",
},
{
name: "rsum",
stepperInputFromName: stepperRsumInputFromName,
@ -980,6 +986,55 @@ func (stepper *tStepperRatio) process(
currec.PutCopy(stepper.outputFieldName, ratio.Copy())
}
// ================================================================
type tStepperRprod struct {
rprod *mlrval.Mlrval
inputFieldName string
outputFieldName string
}
func stepperRprodInputFromName(
stepperName string,
) *tStepperInput {
return &tStepperInput{
name: stepperName,
numRecordsBackward: 0, // doesn't use record-windowing; retains its own pointer
numRecordsForward: 0,
}
}
func stepperRprodAlloc(
stepperInput *tStepperInput,
inputFieldName string,
_unused1 []string,
_unused2 []string,
) tStepper {
return &tStepperRprod{
rprod: mlrval.FromInt(1),
inputFieldName: inputFieldName,
outputFieldName: inputFieldName + "_rprod",
}
}
func (stepper *tStepperRprod) process(
windowKeeper *utils.TWindowKeeper,
) {
icur := windowKeeper.Get(0)
if icur == nil {
return
}
currecAndContext := icur.(*types.RecordAndContext)
currec := currecAndContext.Record
currval := currec.Get(stepper.inputFieldName)
if currval.IsVoid() {
currec.PutCopy(stepper.outputFieldName, mlrval.VOID)
} else {
stepper.rprod = bifs.BIF_times(currval, stepper.rprod)
currec.PutCopy(stepper.outputFieldName, stepper.rprod)
}
}
// ================================================================
type tStepperRsum struct {
rsum *mlrval.Mlrval

View file

@ -1912,6 +1912,7 @@ MILLER(1) MILLER(1)
ewma Exponentially weighted moving average over successive records
from-first Compute differences in field(s) from first record
ratio Compute ratios in field(s) between successive records
rprod Compute running products of field(s) between successive records
rsum Compute running sums of field(s) between successive records
shift Alias for shift_lag
shift_lag Include value(s) in field(s) from the previous record, if any
@ -3316,4 +3317,4 @@ MILLER(1) MILLER(1)
2023-03-06 MILLER(1)
2023-03-12 MILLER(1)

View file

@ -2,12 +2,12 @@
.\" Title: mlr
.\" Author: [see the "AUTHOR" section]
.\" Generator: ./mkman.rb
.\" Date: 2023-03-06
.\" Date: 2023-03-12
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "MILLER" "1" "2023-03-06" "\ \&" "\ \&"
.TH "MILLER" "1" "2023-03-12" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Portability definitions
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -2407,6 +2407,7 @@ Options:
ewma Exponentially weighted moving average over successive records
from-first Compute differences in field(s) from first record
ratio Compute ratios in field(s) between successive records
rprod Compute running products of field(s) between successive records
rsum Compute running sums of field(s) between successive records
shift Alias for shift_lag
shift_lag Include value(s) in field(s) from the previous record, if any

View file

@ -1110,6 +1110,7 @@ Options:
ewma Exponentially weighted moving average over successive records
from-first Compute differences in field(s) from first record
ratio Compute ratios in field(s) between successive records
rprod Compute running products of field(s) between successive records
rsum Compute running sums of field(s) between successive records
shift Alias for shift_lag
shift_lag Include value(s) in field(s) from the previous record, if any

View file

@ -1 +1 @@
mlr --opprint step -a rsum,shift,delta,counter -f x,y test/input/abixy
mlr --opprint step -a rsum,rprod,shift,delta,counter -f x,y test/input/abixy

View file

@ -1,11 +1,11 @@
a b i x y x_rsum x_shift x_delta x_counter y_rsum y_shift y_delta y_counter
pan pan 1 0.34679014 0.72680286 0.34679014 - 0 1 0.72680286 - 0 1
eks pan 2 0.75867996 0.52215111 1.10547011 0.34679014 0.41188982 2 1.24895397 0.72680286 -0.20465175 2
wye wye 3 0.20460331 0.33831853 1.31007341 0.75867996 -0.55407666 3 1.58727250 0.52215111 -0.18383258 3
eks wye 4 0.38139939 0.13418874 1.69147281 0.20460331 0.17679609 4 1.72146124 0.33831853 -0.20412978 4
wye pan 5 0.57328892 0.86362447 2.26476173 0.38139939 0.19188953 5 2.58508571 0.13418874 0.72943573 5
zee pan 6 0.52712616 0.49322129 2.79188789 0.57328892 -0.04616276 6 3.07830700 0.86362447 -0.37040318 6
eks zee 7 0.61178406 0.18788492 3.40367195 0.52712616 0.08465790 7 3.26619192 0.49322129 -0.30533637 7
zee wye 8 0.59855401 0.97618139 4.00222596 0.61178406 -0.01323005 8 4.24237330 0.18788492 0.78829647 8
hat wye 9 0.03144188 0.74955076 4.03366783 0.59855401 -0.56711213 9 4.99192406 0.97618139 -0.22663063 9
pan wye 10 0.50262601 0.95261836 4.53629384 0.03144188 0.47118413 10 5.94454242 0.74955076 0.20306760 10
a b i x y x_rsum x_rprod x_shift x_delta x_counter y_rsum y_rprod y_shift y_delta y_counter
pan pan 1 0.34679014 0.72680286 0.34679014 0.34679014 - 0 1 0.72680286 0.72680286 - 0 1
eks pan 2 0.75867996 0.52215111 1.10547011 0.26310273 0.34679014 0.41188982 2 1.24895397 0.37950092 0.72680286 -0.20465175 2
wye wye 3 0.20460331 0.33831853 1.31007341 0.05383169 0.75867996 -0.55407666 3 1.58727250 0.12839219 0.52215111 -0.18383258 3
eks wye 4 0.38139939 0.13418874 1.69147281 0.02053137 0.20460331 0.17679609 4 1.72146124 0.01722879 0.33831853 -0.20412978 4
wye pan 5 0.57328892 0.86362447 2.26476173 0.01177041 0.38139939 0.19188953 5 2.58508571 0.01487920 0.13418874 0.72943573 5
zee pan 6 0.52712616 0.49322129 2.79188789 0.00620449 0.57328892 -0.04616276 6 3.07830700 0.00733874 0.86362447 -0.37040318 6
eks zee 7 0.61178406 0.18788492 3.40367195 0.00379581 0.52712616 0.08465790 7 3.26619192 0.00137884 0.49322129 -0.30533637 7
zee wye 8 0.59855401 0.97618139 4.00222596 0.00227200 0.61178406 -0.01323005 8 4.24237330 0.00134600 0.18788492 0.78829647 8
hat wye 9 0.03144188 0.74955076 4.03366783 0.00007144 0.59855401 -0.56711213 9 4.99192406 0.00100889 0.97618139 -0.22663063 9
pan wye 10 0.50262601 0.95261836 4.53629384 0.00003591 0.03144188 0.47118413 10 5.94454242 0.00096109 0.74955076 0.20306760 10

View file

@ -1 +1 @@
mlr --opprint step -a rsum,shift,delta,counter -f x,y -g a test/input/abixy
mlr --opprint step -a rsum,rprod,shift,delta,counter -f x,y -g a test/input/abixy

View file

@ -1,11 +1,11 @@
a b i x y x_rsum x_shift x_delta x_counter y_rsum y_shift y_delta y_counter
pan pan 1 0.34679014 0.72680286 0.34679014 - 0 1 0.72680286 - 0 1
eks pan 2 0.75867996 0.52215111 0.75867996 - 0 1 0.52215111 - 0 1
wye wye 3 0.20460331 0.33831853 0.20460331 - 0 1 0.33831853 - 0 1
eks wye 4 0.38139939 0.13418874 1.14007936 0.75867996 -0.37728057 2 0.65633985 0.52215111 -0.38796237 2
wye pan 5 0.57328892 0.86362447 0.77789223 0.20460331 0.36868561 2 1.20194300 0.33831853 0.52530594 2
zee pan 6 0.52712616 0.49322129 0.52712616 - 0 1 0.49322129 - 0 1
eks zee 7 0.61178406 0.18788492 1.75186342 0.38139939 0.23038467 3 0.84422477 0.13418874 0.05369618 3
zee wye 8 0.59855401 0.97618139 1.12568017 0.52712616 0.07142785 2 1.46940267 0.49322129 0.48296010 2
hat wye 9 0.03144188 0.74955076 0.03144188 - 0 1 0.74955076 - 0 1
pan wye 10 0.50262601 0.95261836 0.84941615 0.34679014 0.15583586 2 1.67942122 0.72680286 0.22581550 2
a b i x y x_rsum x_rprod x_shift x_delta x_counter y_rsum y_rprod y_shift y_delta y_counter
pan pan 1 0.34679014 0.72680286 0.34679014 0.34679014 - 0 1 0.72680286 0.72680286 - 0 1
eks pan 2 0.75867996 0.52215111 0.75867996 0.75867996 - 0 1 0.52215111 0.52215111 - 0 1
wye wye 3 0.20460331 0.33831853 0.20460331 0.20460331 - 0 1 0.33831853 0.33831853 - 0 1
eks wye 4 0.38139939 0.13418874 1.14007936 0.28936008 0.75867996 -0.37728057 2 0.65633985 0.07006680 0.52215111 -0.38796237 2
wye pan 5 0.57328892 0.86362447 0.77789223 0.11729681 0.20460331 0.36868561 2 1.20194300 0.29218016 0.33831853 0.52530594 2
zee pan 6 0.52712616 0.49322129 0.52712616 0.52712616 - 0 1 0.49322129 0.49322129 - 0 1
eks zee 7 0.61178406 0.18788492 1.75186342 0.17702588 0.38139939 0.23038467 3 0.84422477 0.01316450 0.13418874 0.05369618 3
zee wye 8 0.59855401 0.97618139 1.12568017 0.31551348 0.52712616 0.07142785 2 1.46940267 0.48147344 0.49322129 0.48296010 2
hat wye 9 0.03144188 0.74955076 0.03144188 0.03144188 - 0 1 0.74955076 0.74955076 - 0 1
pan wye 10 0.50262601 0.95261836 0.84941615 0.17430575 0.34679014 0.15583586 2 1.67942122 0.69236575 0.72680286 0.22581550 2

View file

@ -1 +1 @@
mlr --ojson step -a rsum,shift,delta,counter -f x,y test/input/abixy-het
mlr --ojson step -a rsum,rprod,shift,delta,counter -f x,y test/input/abixy-het

View file

@ -6,10 +6,12 @@
"x": 0.34679014,
"y": 0.72680286,
"x_rsum": 0.34679014,
"x_rprod": 0.34679014,
"x_shift": "",
"x_delta": 0,
"x_counter": 1,
"y_rsum": 0.72680286,
"y_rprod": 0.72680286,
"y_shift": "",
"y_delta": 0,
"y_counter": 1
@ -21,10 +23,12 @@
"x": 0.75867996,
"y": 0.52215111,
"x_rsum": 1.10547011,
"x_rprod": 0.26310273,
"x_shift": 0.34679014,
"x_delta": 0.41188982,
"x_counter": 2,
"y_rsum": 1.24895397,
"y_rprod": 0.37950092,
"y_shift": 0.72680286,
"y_delta": -0.20465175,
"y_counter": 2
@ -36,10 +40,12 @@
"x": 0.20460331,
"y": 0.33831853,
"x_rsum": 1.31007341,
"x_rprod": 0.05383169,
"x_shift": 0.75867996,
"x_delta": -0.55407666,
"x_counter": 3,
"y_rsum": 1.58727250,
"y_rprod": 0.12839219,
"y_shift": 0.52215111,
"y_delta": -0.18383258,
"y_counter": 3
@ -51,10 +57,12 @@
"x": 0.38139939,
"y": 0.13418874,
"x_rsum": 1.69147281,
"x_rprod": 0.02053137,
"x_shift": 0.20460331,
"x_delta": 0.17679609,
"x_counter": 4,
"y_rsum": 1.72146124,
"y_rprod": 0.01722879,
"y_shift": 0.33831853,
"y_delta": -0.20412978,
"y_counter": 4
@ -66,6 +74,7 @@
"xxx": 0.57328892,
"y": 0.86362447,
"y_rsum": 2.58508571,
"y_rprod": 0.01487920,
"y_shift": 0.13418874,
"y_delta": 0.72943573,
"y_counter": 5
@ -77,10 +86,12 @@
"x": 0.52712616,
"y": 0.49322129,
"x_rsum": 2.21859897,
"x_rprod": 0.01082262,
"x_shift": "",
"x_delta": 0,
"x_counter": 5,
"y_rsum": 3.07830700,
"y_rprod": 0.00733874,
"y_shift": 0.86362447,
"y_delta": -0.37040318,
"y_counter": 6
@ -92,10 +103,12 @@
"x": 0.61178406,
"y": 0.18788492,
"x_rsum": 2.83038303,
"x_rprod": 0.00662111,
"x_shift": 0.52712616,
"x_delta": 0.08465790,
"x_counter": 6,
"y_rsum": 3.26619192,
"y_rprod": 0.00137884,
"y_shift": 0.49322129,
"y_delta": -0.30533637,
"y_counter": 7
@ -107,6 +120,7 @@
"x": 0.59855401,
"yyy": 0.97618139,
"x_rsum": 3.42893704,
"x_rprod": 0.00396309,
"x_shift": 0.61178406,
"x_delta": -0.01323005,
"x_counter": 7
@ -118,10 +132,12 @@
"x": 0.03144188,
"y": 0.74955076,
"x_rsum": 3.46037891,
"x_rprod": 0.00012461,
"x_shift": 0.59855401,
"x_delta": -0.56711213,
"x_counter": 8,
"y_rsum": 4.01574268,
"y_rprod": 0.00103351,
"y_shift": "",
"y_delta": 0,
"y_counter": 8
@ -133,10 +149,12 @@
"x": 0.50262601,
"y": 0.95261836,
"x_rsum": 3.96300492,
"x_rprod": 0.00006263,
"x_shift": 0.03144188,
"x_delta": 0.47118413,
"x_counter": 9,
"y_rsum": 4.96836104,
"y_rprod": 0.00098454,
"y_shift": 0.74955076,
"y_delta": 0.20306760,
"y_counter": 9