miller/test/input/test-percentiles.mlr
John Kerl d341cc6dd3
DSL functions for summary stats over arrays / maps (#1364)
* DSL stats functions [WIP]

* refactor

* move percentile computation to bifs module; iterate

* mode and antimode

* percentile iterate

* percentile sketching

* neaten

* unit-test iterate

* unify old & new min & max functions

* unit-test cases

* code-dedupe between mode and antimode

* make mode/antimode ties deterministic via first-found-wins rule

* online help strings for new stats DSL functions

* artifacts from `make dev`

* help info on how min/max now recurse into collections

* artifacts from `make dev`

* typofix
2023-08-26 16:02:30 -04:00

44 lines
1.4 KiB
Text

begin {
@a = [];
@m = {};
# @field must be given by put -s field=namegoeshere in the script invocation.
# This lets us test percentiles over various field names/types while re-using
# this same script.
}
@a[NR] = $[@field];
@m[NR] = $[@field];
end {
outputs = {
"a_min": min(@a),
"a_max": max(@a),
"a_minlen": minlen(@a),
"a_maxlen": maxlen(@a),
"a_median": median(@a),
"a_ps": percentiles(@a, [0,1,10,25,50,75,90,99,100]),
"a_psi": percentiles(@a, [0,1,10,25,50,75,90,99,100], {"interpolate_linearly":true}),
"a_psa": percentiles(@a, [0,1,10,25,50,75,90,99,100], {"output_array_not_map":true}),
"a_psia": percentiles(@a, [0,1,10,25,50,75,90,99,100], {
"interpolate_linearly": true,
"output_array_not_map":true,
}),
"m_min": min(@m),
"m_max": max(@m),
"m_minlen": minlen(@m),
"m_maxlen": maxlen(@m),
"m_median": median(@m),
"m_ps": percentiles(@m, [0,1,10,25,50,75,90,99,100]),
"m_psi": percentiles(@m, [0,1,10,25,50,75,90,99,100], {"interpolate_linearly":true}),
"m_psa": percentiles(@m, [0,1,10,25,50,75,90,99,100], {"output_array_not_map":true}),
"m_psia": percentiles(@m, [0,1,10,25,50,75,90,99,100], {
"interpolate_linearly": true,
"output_array_not_map":true,
}),
};
emit outputs;
}