DSL functions for summary stats over arrays / maps (#1364)

* DSL stats functions [WIP]

* refactor

* move percentile computation to bifs module; iterate

* mode and antimode

* percentile iterate

* percentile sketching

* neaten

* unit-test iterate

* unify old & new min & max functions

* unit-test cases

* code-dedupe between mode and antimode

* make mode/antimode ties deterministic via first-found-wins rule

* online help strings for new stats DSL functions

* artifacts from `make dev`

* help info on how min/max now recurse into collections

* artifacts from `make dev`

* typofix
This commit is contained in:
John Kerl 2023-08-26 16:02:30 -04:00 committed by GitHub
parent 392b34fd04
commit d341cc6dd3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
93 changed files with 3731 additions and 361 deletions

View file

@ -0,0 +1 @@
mlr -n --ofmtf 6 --xtab put -f ${CASEDIR}/mlr

View file

@ -0,0 +1,20 @@
count_0 (error)
count_0_type error
count_null (error)
count_null_type error
count_empty_array 0
count_empty_array_type int
count_array_1 1
count_array_1_type int
count_array_3 3
count_array_3_type int
count_array_nested 3
count_array_nested_type int
count_empty_map 0
count_empty_map_type int
count_map_1 1
count_map_1_type int
count_map_3 3
count_map_3_type int
count_map_nested 3
count_map_nested_type int

View file

@ -0,0 +1,26 @@
end {
outputs = {};
outputs["count_0"] = count(0);
outputs["count_null"] = count(null);
outputs["count_nonesuch"] = count(nonesuch);
outputs["count_empty_array"] = count([]);
outputs["count_array_1"] = count([7]);
outputs["count_array_3"] = count([7,8,9]);
outputs["count_array_nested"] = count([7,[80,90],9]);
outputs["count_empty_map"] = count({});
outputs["count_map_1"] = count({ "a" : 7} );
outputs["count_map_3"] = count({ "a" : 7, "b" : 8, "c" : 9 } );
outputs["count_map_nested"] = count({ "a" : 7, "b" : [80,90], "c" : 9 });
typed_outputs = {};
for (k, v in outputs) {
typed_outputs[k] = v;
typed_outputs[k."_type"] = typeof(v);
}
emit typed_outputs;
}

View file

@ -0,0 +1 @@
mlr -n --ofmtf 6 --xtab put -f ${CASEDIR}/mlr

View file

@ -0,0 +1,32 @@
distinct_count_0 (error)
distinct_count_0_type error
distinct_count_null (error)
distinct_count_null_type error
distinct_count_empty_array 0
distinct_count_empty_array_type int
distinct_count_array_1 1
distinct_count_array_1_type int
distinct_count_array_3a 3
distinct_count_array_3a_type int
distinct_count_array_3b 2
distinct_count_array_3b_type int
distinct_count_array_3c 1
distinct_count_array_3c_type int
distinct_count_array_3d 1
distinct_count_array_3d_type int
distinct_count_array_nested 2
distinct_count_array_nested_type int
distinct_count_empty_map 0
distinct_count_empty_map_type int
distinct_count_map_1 1
distinct_count_map_1_type int
distinct_count_map_3a 3
distinct_count_map_3a_type int
distinct_count_map_3b 2
distinct_count_map_3b_type int
distinct_count_map_3c 1
distinct_count_map_3c_type int
distinct_count_map_3d 1
distinct_count_map_3d_type int
distinct_count_map_nested 2
distinct_count_map_nested_type int

View file

@ -0,0 +1,32 @@
end {
outputs = {};
outputs["distinct_count_0"] = distinct_count(0);
outputs["distinct_count_null"] = distinct_count(null);
outputs["distinct_count_nonesuch"] = distinct_count(nonesuch);
outputs["distinct_count_empty_array"] = distinct_count([]);
outputs["distinct_count_array_1"] = distinct_count([7]);
outputs["distinct_count_array_3a"] = distinct_count([7,8,9]);
outputs["distinct_count_array_3b"] = distinct_count([7,7,9]);
outputs["distinct_count_array_3c"] = distinct_count([7,7,7]);
outputs["distinct_count_array_3d"] = distinct_count([null,null,null]);
outputs["distinct_count_array_nested"] = distinct_count([7,[7],7]);
outputs["distinct_count_empty_map"] = distinct_count({});
outputs["distinct_count_map_1"] = distinct_count({ "a" : 7} );
outputs["distinct_count_map_3a"] = distinct_count({ "a" : 7, "b" : 8, "c" : 9 } );
outputs["distinct_count_map_3b"] = distinct_count({ "a" : 7, "b" : 7, "c" : 9 } );
outputs["distinct_count_map_3c"] = distinct_count({ "a" : 7, "b" : 7, "c" : 7 } );
outputs["distinct_count_map_3d"] = distinct_count({ "a" : null, "b" : null, "c" : null } );
outputs["distinct_count_map_nested"] = distinct_count({ "a" : 7, "b" : [7], "c" : 7 });
typed_outputs = {};
for (k, v in outputs) {
typed_outputs[k] = v;
typed_outputs[k."_type"] = typeof(v);
}
emit typed_outputs;
}

View file

@ -0,0 +1 @@
mlr -n --ofmtf 6 --xtab put -f ${CASEDIR}/mlr

View file

View file

@ -0,0 +1,24 @@
mode_0 (error)
mode_0_type error
mode_null (error)
mode_null_type error
mode_empty_array
mode_empty_array_type empty
mode_array_1 7
mode_array_1_type int
mode_array_3a 7
mode_array_3a_type int
mode_array_3b 7
mode_array_3b_type int
mode_array_nested 9
mode_array_nested_type int
mode_empty_map
mode_empty_map_type empty
mode_map_1 7
mode_map_1_type int
mode_map_3a 7
mode_map_3a_type int
mode_map_3b 7
mode_map_3b_type int
mode_map_nested 9
mode_map_nested_type int

View file

@ -0,0 +1,28 @@
end {
outputs = {};
outputs["mode_0"] = mode(0);
outputs["mode_null"] = mode(null);
outputs["mode_nonesuch"] = mode(nonesuch);
outputs["mode_empty_array"] = mode([]);
outputs["mode_array_1"] = mode([7]);
outputs["mode_array_3a"] = mode([7,8,9]);
outputs["mode_array_3b"] = mode([7,8,7]);
outputs["mode_array_nested"] = mode([7,[8,8,8,8,8,8],9,9,9]);
outputs["mode_empty_map"] = mode({});
outputs["mode_map_1"] = mode({ "a" : 7} );
outputs["mode_map_3a"] = mode({ "a" : 7, "b" : 8, "c" : 9 } );
outputs["mode_map_3b"] = mode({ "a" : 7, "b" : 8, "c" : 7 } );
outputs["mode_map_nested"] = mode({ "a" : 7, "b" : [8,8,8,8,8,8], "c" : 9, "d": 9, "e": 9 });
typed_outputs = {};
for (k, v in outputs) {
typed_outputs[k] = v;
typed_outputs[k."_type"] = typeof(v);
}
emit typed_outputs;
}

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --from test/input/abixy head -n 0 then put -q -f test/input/test-moments.mlr

View file

@ -0,0 +1,26 @@
[
{
"a_count": 0,
"a_sum": 0,
"a_sum2": 0,
"a_sum3": 0,
"a_sum4": 0,
"a_mean": "",
"a_var": "",
"a_stddev": "",
"a_meaneb": "",
"a_skewness": "",
"a_kurtosis": "",
"m_count": 0,
"m_sum": 0,
"m_sum2": 0,
"m_sum3": 0,
"m_sum4": 0,
"m_mean": "",
"m_var": "",
"m_stddev": "",
"m_meaneb": "",
"m_skewness": "",
"m_kurtosis": ""
}
]

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --from test/input/abixy head -n 1 then put -q -f test/input/test-moments.mlr

View file

@ -0,0 +1,26 @@
[
{
"a_count": 1,
"a_sum": 1,
"a_sum2": 1,
"a_sum3": 1,
"a_sum4": 1,
"a_mean": 1,
"a_var": "",
"a_stddev": "",
"a_meaneb": "",
"a_skewness": "",
"a_kurtosis": "",
"m_count": 1,
"m_sum": 1,
"m_sum2": 1,
"m_sum3": 1,
"m_sum4": 1,
"m_mean": 1,
"m_var": "",
"m_stddev": "",
"m_meaneb": "",
"m_skewness": "",
"m_kurtosis": ""
}
]

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --from test/input/abixy head -n 2 then put -q -f test/input/test-moments.mlr

View file

@ -0,0 +1,26 @@
[
{
"a_count": 2,
"a_sum": 3,
"a_sum2": 5,
"a_sum3": 9,
"a_sum4": 17,
"a_mean": 1.500000,
"a_var": 0.500000,
"a_stddev": 0.707107,
"a_meaneb": 0.500000,
"a_skewness": 0.000000,
"a_kurtosis": -2.000000,
"m_count": 2,
"m_sum": 3,
"m_sum2": 5,
"m_sum3": 9,
"m_sum4": 17,
"m_mean": 1.500000,
"m_var": 0.500000,
"m_stddev": 0.707107,
"m_meaneb": 0.500000,
"m_skewness": 0.000000,
"m_kurtosis": -2.000000
}
]

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --from test/input/abixy head -n 3 then put -q -f test/input/test-moments.mlr

View file

@ -0,0 +1,26 @@
[
{
"a_count": 3,
"a_sum": 6,
"a_sum2": 14,
"a_sum3": 36,
"a_sum4": 98,
"a_mean": 2,
"a_var": 1.000000,
"a_stddev": 1.000000,
"a_meaneb": 0.577350,
"a_skewness": 0.000000,
"a_kurtosis": -1.500000,
"m_count": 3,
"m_sum": 6,
"m_sum2": 14,
"m_sum3": 36,
"m_sum4": 98,
"m_mean": 2,
"m_var": 1.000000,
"m_stddev": 1.000000,
"m_meaneb": 0.577350,
"m_skewness": 0.000000,
"m_kurtosis": -1.500000
}
]

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --from test/input/abixy head -n 4 then put -q -f test/input/test-moments.mlr

View file

@ -0,0 +1,26 @@
[
{
"a_count": 4,
"a_sum": 10,
"a_sum2": 30,
"a_sum3": 100,
"a_sum4": 354,
"a_mean": 2.500000,
"a_var": 1.666667,
"a_stddev": 1.290994,
"a_meaneb": 0.645497,
"a_skewness": 0.000000,
"a_kurtosis": -1.360000,
"m_count": 4,
"m_sum": 10,
"m_sum2": 30,
"m_sum3": 100,
"m_sum4": 354,
"m_mean": 2.500000,
"m_var": 1.666667,
"m_stddev": 1.290994,
"m_meaneb": 0.645497,
"m_skewness": 0.000000,
"m_kurtosis": -1.360000
}
]

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --from test/input/abixy put -q -f test/input/test-moments.mlr

View file

@ -0,0 +1,26 @@
[
{
"a_count": 10,
"a_sum": 55,
"a_sum2": 385,
"a_sum3": 3025,
"a_sum4": 25333,
"a_mean": 5.500000,
"a_var": 9.166667,
"a_stddev": 3.027650,
"a_meaneb": 0.957427,
"a_skewness": 0.000000,
"a_kurtosis": -1.224242,
"m_count": 10,
"m_sum": 55,
"m_sum2": 385,
"m_sum3": 3025,
"m_sum4": 25333,
"m_mean": 5.500000,
"m_var": 9.166667,
"m_stddev": 3.027650,
"m_meaneb": 0.957427,
"m_skewness": 0.000000,
"m_kurtosis": -1.224242
}
]

View file

@ -0,0 +1 @@
mlr -n --ofmtf 6 --xtab put -f ${CASEDIR}/mlr

View file

@ -0,0 +1,20 @@
null_count_0 (error)
null_count_0_type error
null_count_null (error)
null_count_null_type error
null_count_empty_array 0
null_count_empty_array_type int
null_count_array_1 0
null_count_array_1_type int
null_count_array_2 0
null_count_array_2_type int
null_count_array_3 2
null_count_array_3_type int
null_count_empty_map 0
null_count_empty_map_type int
null_count_map_1 0
null_count_map_1_type int
null_count_map_2 0
null_count_map_2_type int
null_count_map_3 2
null_count_map_3_type int

View file

@ -0,0 +1,28 @@
end {
outputs = {};
# Only empty string and JSON-null count as nulls
outputs["null_count_0"] = null_count(0);
outputs["null_count_null"] = null_count(null);
outputs["null_count_nonesuch"] = null_count(nonesuch);
outputs["null_count_empty_array"] = null_count([]);
outputs["null_count_array_1"] = null_count([7]);
outputs["null_count_array_2"] = null_count([7,8]);
outputs["null_count_array_3"] = null_count(["",null,nonesuch]);
outputs["null_count_empty_map"] = null_count({});
outputs["null_count_map_1"] = null_count({ "a" : 7});
outputs["null_count_map_2"] = null_count({ "a" : 7, "b" : 8 });
outputs["null_count_map_3"] = null_count({ "a" : "", "b" : null, "c" : nonesuch });
typed_outputs = {};
for (k, v in outputs) {
typed_outputs[k] = v;
typed_outputs[k."_type"] = typeof(v);
}
emit typed_outputs;
}

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 0 then put -q -f test/input/test-percentiles.mlr -s field=a

View file

@ -0,0 +1,62 @@
[
{
"a_min": "",
"a_max": "",
"a_minlen": "",
"a_maxlen": "",
"a_median": "",
"a_ps": {
"0": "",
"1": "",
"10": "",
"25": "",
"50": "",
"75": "",
"90": "",
"99": "",
"100": ""
},
"a_psi": {
"0": "",
"1": "",
"10": "",
"25": "",
"50": "",
"75": "",
"90": "",
"99": "",
"100": ""
},
"a_psa": ["", "", "", "", "", "", "", "", ""],
"a_psia": ["", "", "", "", "", "", "", "", ""],
"m_min": "",
"m_max": "",
"m_minlen": "",
"m_maxlen": "",
"m_median": "",
"m_ps": {
"0": "",
"1": "",
"10": "",
"25": "",
"50": "",
"75": "",
"90": "",
"99": "",
"100": ""
},
"m_psi": {
"0": "",
"1": "",
"10": "",
"25": "",
"50": "",
"75": "",
"90": "",
"99": "",
"100": ""
},
"m_psa": ["", "", "", "", "", "", "", "", ""],
"m_psia": ["", "", "", "", "", "", "", "", ""]
}
]

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 1 then put -q -f test/input/test-percentiles.mlr -s field=a

View file

@ -0,0 +1,62 @@
[
{
"a_min": "pan",
"a_max": "pan",
"a_minlen": 3,
"a_maxlen": 3,
"a_median": "pan",
"a_ps": {
"0": "pan",
"1": "pan",
"10": "pan",
"25": "pan",
"50": "pan",
"75": "pan",
"90": "pan",
"99": "pan",
"100": "pan"
},
"a_psi": {
"0": "pan",
"1": "pan",
"10": "pan",
"25": "pan",
"50": "pan",
"75": "pan",
"90": "pan",
"99": "pan",
"100": "pan"
},
"a_psa": ["pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan"],
"a_psia": ["pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan"],
"m_min": "pan",
"m_max": "pan",
"m_minlen": 3,
"m_maxlen": 3,
"m_median": "pan",
"m_ps": {
"0": "pan",
"1": "pan",
"10": "pan",
"25": "pan",
"50": "pan",
"75": "pan",
"90": "pan",
"99": "pan",
"100": "pan"
},
"m_psi": {
"0": "pan",
"1": "pan",
"10": "pan",
"25": "pan",
"50": "pan",
"75": "pan",
"90": "pan",
"99": "pan",
"100": "pan"
},
"m_psa": ["pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan"],
"m_psia": ["pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan"]
}
]

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 2 then put -q -f test/input/test-percentiles.mlr -s field=a

View file

@ -0,0 +1,62 @@
[
{
"a_min": "eks",
"a_max": "pan",
"a_minlen": 3,
"a_maxlen": 3,
"a_median": "pan",
"a_ps": {
"0": "eks",
"1": "eks",
"10": "eks",
"25": "eks",
"50": "pan",
"75": "pan",
"90": "pan",
"99": "pan",
"100": "pan"
},
"a_psi": {
"0": (error),
"1": (error),
"10": (error),
"25": (error),
"50": (error),
"75": (error),
"90": (error),
"99": (error),
"100": "pan"
},
"a_psa": ["eks", "eks", "eks", "eks", "pan", "pan", "pan", "pan", "pan"],
"a_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "pan"],
"m_min": "eks",
"m_max": "pan",
"m_minlen": 3,
"m_maxlen": 3,
"m_median": "pan",
"m_ps": {
"0": "eks",
"1": "eks",
"10": "eks",
"25": "eks",
"50": "pan",
"75": "pan",
"90": "pan",
"99": "pan",
"100": "pan"
},
"m_psi": {
"0": (error),
"1": (error),
"10": (error),
"25": (error),
"50": (error),
"75": (error),
"90": (error),
"99": (error),
"100": "pan"
},
"m_psa": ["eks", "eks", "eks", "eks", "pan", "pan", "pan", "pan", "pan"],
"m_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "pan"]
}
]

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 3 then put -q -f test/input/test-percentiles.mlr -s field=a

View file

@ -0,0 +1,62 @@
[
{
"a_min": "eks",
"a_max": "wye",
"a_minlen": 3,
"a_maxlen": 3,
"a_median": "pan",
"a_ps": {
"0": "eks",
"1": "eks",
"10": "eks",
"25": "eks",
"50": "pan",
"75": "wye",
"90": "wye",
"99": "wye",
"100": "wye"
},
"a_psi": {
"0": (error),
"1": (error),
"10": (error),
"25": (error),
"50": (error),
"75": (error),
"90": (error),
"99": (error),
"100": "wye"
},
"a_psa": ["eks", "eks", "eks", "eks", "pan", "wye", "wye", "wye", "wye"],
"a_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "wye"],
"m_min": "eks",
"m_max": "wye",
"m_minlen": 3,
"m_maxlen": 3,
"m_median": "pan",
"m_ps": {
"0": "eks",
"1": "eks",
"10": "eks",
"25": "eks",
"50": "pan",
"75": "wye",
"90": "wye",
"99": "wye",
"100": "wye"
},
"m_psi": {
"0": (error),
"1": (error),
"10": (error),
"25": (error),
"50": (error),
"75": (error),
"90": (error),
"99": (error),
"100": "wye"
},
"m_psa": ["eks", "eks", "eks", "eks", "pan", "wye", "wye", "wye", "wye"],
"m_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "wye"]
}
]

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 4 then put -q -f test/input/test-percentiles.mlr -s field=a

View file

@ -0,0 +1,62 @@
[
{
"a_min": "eks",
"a_max": "wye",
"a_minlen": 3,
"a_maxlen": 3,
"a_median": "pan",
"a_ps": {
"0": "eks",
"1": "eks",
"10": "eks",
"25": "eks",
"50": "pan",
"75": "wye",
"90": "wye",
"99": "wye",
"100": "wye"
},
"a_psi": {
"0": (error),
"1": (error),
"10": (error),
"25": (error),
"50": (error),
"75": (error),
"90": (error),
"99": (error),
"100": "wye"
},
"a_psa": ["eks", "eks", "eks", "eks", "pan", "wye", "wye", "wye", "wye"],
"a_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "wye"],
"m_min": "eks",
"m_max": "wye",
"m_minlen": 3,
"m_maxlen": 3,
"m_median": "pan",
"m_ps": {
"0": "eks",
"1": "eks",
"10": "eks",
"25": "eks",
"50": "pan",
"75": "wye",
"90": "wye",
"99": "wye",
"100": "wye"
},
"m_psi": {
"0": (error),
"1": (error),
"10": (error),
"25": (error),
"50": (error),
"75": (error),
"90": (error),
"99": (error),
"100": "wye"
},
"m_psa": ["eks", "eks", "eks", "eks", "pan", "wye", "wye", "wye", "wye"],
"m_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "wye"]
}
]

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --zin --from test/input/medium.z put -q -f test/input/test-percentiles.mlr -s field=a

View file

@ -0,0 +1,62 @@
[
{
"a_min": "eks",
"a_max": "zee",
"a_minlen": 3,
"a_maxlen": 3,
"a_median": "pan",
"a_ps": {
"0": "eks",
"1": "eks",
"10": "eks",
"25": "hat",
"50": "pan",
"75": "wye",
"90": "zee",
"99": "zee",
"100": "zee"
},
"a_psi": {
"0": (error),
"1": (error),
"10": (error),
"25": (error),
"50": (error),
"75": (error),
"90": (error),
"99": (error),
"100": "zee"
},
"a_psa": ["eks", "eks", "eks", "hat", "pan", "wye", "zee", "zee", "zee"],
"a_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "zee"],
"m_min": "eks",
"m_max": "zee",
"m_minlen": 3,
"m_maxlen": 3,
"m_median": "pan",
"m_ps": {
"0": "eks",
"1": "eks",
"10": "eks",
"25": "hat",
"50": "pan",
"75": "wye",
"90": "zee",
"99": "zee",
"100": "zee"
},
"m_psi": {
"0": (error),
"1": (error),
"10": (error),
"25": (error),
"50": (error),
"75": (error),
"90": (error),
"99": (error),
"100": "zee"
},
"m_psa": ["eks", "eks", "eks", "hat", "pan", "wye", "zee", "zee", "zee"],
"m_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "zee"]
}
]

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 0 then put -q -f test/input/test-percentiles.mlr -s field=i

View file

@ -0,0 +1,62 @@
[
{
"a_min": "",
"a_max": "",
"a_minlen": "",
"a_maxlen": "",
"a_median": "",
"a_ps": {
"0": "",
"1": "",
"10": "",
"25": "",
"50": "",
"75": "",
"90": "",
"99": "",
"100": ""
},
"a_psi": {
"0": "",
"1": "",
"10": "",
"25": "",
"50": "",
"75": "",
"90": "",
"99": "",
"100": ""
},
"a_psa": ["", "", "", "", "", "", "", "", ""],
"a_psia": ["", "", "", "", "", "", "", "", ""],
"m_min": "",
"m_max": "",
"m_minlen": "",
"m_maxlen": "",
"m_median": "",
"m_ps": {
"0": "",
"1": "",
"10": "",
"25": "",
"50": "",
"75": "",
"90": "",
"99": "",
"100": ""
},
"m_psi": {
"0": "",
"1": "",
"10": "",
"25": "",
"50": "",
"75": "",
"90": "",
"99": "",
"100": ""
},
"m_psa": ["", "", "", "", "", "", "", "", ""],
"m_psia": ["", "", "", "", "", "", "", "", ""]
}
]

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 1 then put -q -f test/input/test-percentiles.mlr -s field=i

View file

@ -0,0 +1,62 @@
[
{
"a_min": 1,
"a_max": 1,
"a_minlen": 1,
"a_maxlen": 1,
"a_median": 1,
"a_ps": {
"0": 1,
"1": 1,
"10": 1,
"25": 1,
"50": 1,
"75": 1,
"90": 1,
"99": 1,
"100": 1
},
"a_psi": {
"0": 1,
"1": 1,
"10": 1,
"25": 1,
"50": 1,
"75": 1,
"90": 1,
"99": 1,
"100": 1
},
"a_psa": [1, 1, 1, 1, 1, 1, 1, 1, 1],
"a_psia": [1, 1, 1, 1, 1, 1, 1, 1, 1],
"m_min": 1,
"m_max": 1,
"m_minlen": 1,
"m_maxlen": 1,
"m_median": 1,
"m_ps": {
"0": 1,
"1": 1,
"10": 1,
"25": 1,
"50": 1,
"75": 1,
"90": 1,
"99": 1,
"100": 1
},
"m_psi": {
"0": 1,
"1": 1,
"10": 1,
"25": 1,
"50": 1,
"75": 1,
"90": 1,
"99": 1,
"100": 1
},
"m_psa": [1, 1, 1, 1, 1, 1, 1, 1, 1],
"m_psia": [1, 1, 1, 1, 1, 1, 1, 1, 1]
}
]

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 2 then put -q -f test/input/test-percentiles.mlr -s field=i

View file

@ -0,0 +1,62 @@
[
{
"a_min": 1,
"a_max": 2,
"a_minlen": 1,
"a_maxlen": 1,
"a_median": 2,
"a_ps": {
"0": 1,
"1": 1,
"10": 1,
"25": 1,
"50": 2,
"75": 2,
"90": 2,
"99": 2,
"100": 2
},
"a_psi": {
"0": 1.000000,
"1": 1.010000,
"10": 1.100000,
"25": 1.250000,
"50": 1.500000,
"75": 1.750000,
"90": 1.900000,
"99": 1.990000,
"100": 2
},
"a_psa": [1, 1, 1, 1, 2, 2, 2, 2, 2],
"a_psia": [1.000000, 1.010000, 1.100000, 1.250000, 1.500000, 1.750000, 1.900000, 1.990000, 2],
"m_min": 1,
"m_max": 2,
"m_minlen": 1,
"m_maxlen": 1,
"m_median": 2,
"m_ps": {
"0": 1,
"1": 1,
"10": 1,
"25": 1,
"50": 2,
"75": 2,
"90": 2,
"99": 2,
"100": 2
},
"m_psi": {
"0": 1.000000,
"1": 1.010000,
"10": 1.100000,
"25": 1.250000,
"50": 1.500000,
"75": 1.750000,
"90": 1.900000,
"99": 1.990000,
"100": 2
},
"m_psa": [1, 1, 1, 1, 2, 2, 2, 2, 2],
"m_psia": [1.000000, 1.010000, 1.100000, 1.250000, 1.500000, 1.750000, 1.900000, 1.990000, 2]
}
]

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 3 then put -q -f test/input/test-percentiles.mlr -s field=i

View file

@ -0,0 +1,62 @@
[
{
"a_min": 1,
"a_max": 3,
"a_minlen": 1,
"a_maxlen": 1,
"a_median": 2,
"a_ps": {
"0": 1,
"1": 1,
"10": 1,
"25": 1,
"50": 2,
"75": 3,
"90": 3,
"99": 3,
"100": 3
},
"a_psi": {
"0": 1.000000,
"1": 1.020000,
"10": 1.200000,
"25": 1.500000,
"50": 2.000000,
"75": 2.500000,
"90": 2.800000,
"99": 2.980000,
"100": 3
},
"a_psa": [1, 1, 1, 1, 2, 3, 3, 3, 3],
"a_psia": [1.000000, 1.020000, 1.200000, 1.500000, 2.000000, 2.500000, 2.800000, 2.980000, 3],
"m_min": 1,
"m_max": 3,
"m_minlen": 1,
"m_maxlen": 1,
"m_median": 2,
"m_ps": {
"0": 1,
"1": 1,
"10": 1,
"25": 1,
"50": 2,
"75": 3,
"90": 3,
"99": 3,
"100": 3
},
"m_psi": {
"0": 1.000000,
"1": 1.020000,
"10": 1.200000,
"25": 1.500000,
"50": 2.000000,
"75": 2.500000,
"90": 2.800000,
"99": 2.980000,
"100": 3
},
"m_psa": [1, 1, 1, 1, 2, 3, 3, 3, 3],
"m_psia": [1.000000, 1.020000, 1.200000, 1.500000, 2.000000, 2.500000, 2.800000, 2.980000, 3]
}
]

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 4 then put -q -f test/input/test-percentiles.mlr -s field=i

View file

@ -0,0 +1,62 @@
[
{
"a_min": 1,
"a_max": 4,
"a_minlen": 1,
"a_maxlen": 1,
"a_median": 3,
"a_ps": {
"0": 1,
"1": 1,
"10": 1,
"25": 2,
"50": 3,
"75": 4,
"90": 4,
"99": 4,
"100": 4
},
"a_psi": {
"0": 1.000000,
"1": 1.030000,
"10": 1.300000,
"25": 1.750000,
"50": 2.500000,
"75": 3.250000,
"90": 3.700000,
"99": 3.970000,
"100": 4
},
"a_psa": [1, 1, 1, 2, 3, 4, 4, 4, 4],
"a_psia": [1.000000, 1.030000, 1.300000, 1.750000, 2.500000, 3.250000, 3.700000, 3.970000, 4],
"m_min": 1,
"m_max": 4,
"m_minlen": 1,
"m_maxlen": 1,
"m_median": 3,
"m_ps": {
"0": 1,
"1": 1,
"10": 1,
"25": 2,
"50": 3,
"75": 4,
"90": 4,
"99": 4,
"100": 4
},
"m_psi": {
"0": 1.000000,
"1": 1.030000,
"10": 1.300000,
"25": 1.750000,
"50": 2.500000,
"75": 3.250000,
"90": 3.700000,
"99": 3.970000,
"100": 4
},
"m_psa": [1, 1, 1, 2, 3, 4, 4, 4, 4],
"m_psia": [1.000000, 1.030000, 1.300000, 1.750000, 2.500000, 3.250000, 3.700000, 3.970000, 4]
}
]

View file

@ -0,0 +1 @@
mlr --ofmtf 6 --ojson --zin --from test/input/medium.z put -q -f test/input/test-percentiles.mlr -s field=i

View file

@ -0,0 +1,62 @@
[
{
"a_min": 1,
"a_max": 10000,
"a_minlen": 1,
"a_maxlen": 5,
"a_median": 5001,
"a_ps": {
"0": 1,
"1": 101,
"10": 1001,
"25": 2501,
"50": 5001,
"75": 7501,
"90": 9001,
"99": 9901,
"100": 10000
},
"a_psi": {
"0": 1.000000,
"1": 100.990000,
"10": 1000.900000,
"25": 2500.750000,
"50": 5000.500000,
"75": 7500.250000,
"90": 9000.100000,
"99": 9900.010000,
"100": 10000
},
"a_psa": [1, 101, 1001, 2501, 5001, 7501, 9001, 9901, 10000],
"a_psia": [1.000000, 100.990000, 1000.900000, 2500.750000, 5000.500000, 7500.250000, 9000.100000, 9900.010000, 10000],
"m_min": 1,
"m_max": 10000,
"m_minlen": 1,
"m_maxlen": 5,
"m_median": 5001,
"m_ps": {
"0": 1,
"1": 101,
"10": 1001,
"25": 2501,
"50": 5001,
"75": 7501,
"90": 9001,
"99": 9901,
"100": 10000
},
"m_psi": {
"0": 1.000000,
"1": 100.990000,
"10": 1000.900000,
"25": 2500.750000,
"50": 5000.500000,
"75": 7500.250000,
"90": 9000.100000,
"99": 9900.010000,
"100": 10000
},
"m_psa": [1, 101, 1001, 2501, 5001, 7501, 9001, 9901, 10000],
"m_psia": [1.000000, 100.990000, 1000.900000, 2500.750000, 5000.500000, 7500.250000, 9000.100000, 9900.010000, 10000]
}
]

View file

@ -0,0 +1 @@
Coverage via unit-test framework, not regression-test framework

View file

@ -0,0 +1,39 @@
begin {
@a = [];
@m = {};
@field = "i";
}
@a[NR] = $[@field];
@m[NR] = $[@field];
end {
outputs = {
"a_count": count(@a),
"a_sum": sum(@a),
"a_sum2": sum2(@a),
"a_sum3": sum3(@a),
"a_sum4": sum4(@a),
"a_mean": mean(@a),
"a_var": variance(@a),
"a_stddev": stddev(@a),
"a_meaneb": meaneb(@a),
"a_skewness": skewness(@a),
"a_kurtosis": kurtosis(@a),
"m_count": count(@m),
"m_sum": sum(@m),
"m_sum2": sum2(@m),
"m_sum3": sum3(@m),
"m_sum4": sum4(@m),
"m_mean": mean(@m),
"m_var": variance(@m),
"m_stddev": stddev(@m),
"m_meaneb": meaneb(@m),
"m_skewness": skewness(@m),
"m_kurtosis": kurtosis(@m),
};
emit outputs;
}

View file

@ -0,0 +1,44 @@
begin {
@a = [];
@m = {};
# @field must be given by put -s field=namegoeshere in the script invocation.
# This lets us test percentiles over various field names/types while re-using
# this same script.
}
@a[NR] = $[@field];
@m[NR] = $[@field];
end {
outputs = {
"a_min": min(@a),
"a_max": max(@a),
"a_minlen": minlen(@a),
"a_maxlen": maxlen(@a),
"a_median": median(@a),
"a_ps": percentiles(@a, [0,1,10,25,50,75,90,99,100]),
"a_psi": percentiles(@a, [0,1,10,25,50,75,90,99,100], {"interpolate_linearly":true}),
"a_psa": percentiles(@a, [0,1,10,25,50,75,90,99,100], {"output_array_not_map":true}),
"a_psia": percentiles(@a, [0,1,10,25,50,75,90,99,100], {
"interpolate_linearly": true,
"output_array_not_map":true,
}),
"m_min": min(@m),
"m_max": max(@m),
"m_minlen": minlen(@m),
"m_maxlen": maxlen(@m),
"m_median": median(@m),
"m_ps": percentiles(@m, [0,1,10,25,50,75,90,99,100]),
"m_psi": percentiles(@m, [0,1,10,25,50,75,90,99,100], {"interpolate_linearly":true}),
"m_psa": percentiles(@m, [0,1,10,25,50,75,90,99,100], {"output_array_not_map":true}),
"m_psia": percentiles(@m, [0,1,10,25,50,75,90,99,100], {
"interpolate_linearly": true,
"output_array_not_map":true,
}),
};
emit outputs;
}