mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 18:25:45 +00:00
54 lines
1.3 KiB
Markdown
54 lines
1.3 KiB
Markdown
# Statistics examples
|
|
|
|
## Computing interquartile ranges
|
|
|
|
For one or more specified field names, simply compute p25 and p75, then write the IQR as the difference of p75 and p25:
|
|
|
|
GENMD_RUN_COMMAND
|
|
mlr --oxtab stats1 -f x -a p25,p75 \
|
|
then put '$x_iqr = $x_p75 - $x_p25' \
|
|
data/medium
|
|
GENMD_EOF
|
|
|
|
For wildcarded field names, first compute p25 and p75, then loop over field names with `p25` in them:
|
|
|
|
GENMD_RUN_COMMAND
|
|
mlr --oxtab stats1 --fr '[i-z]' -a p25,p75 \
|
|
then put 'for (k,v in $*) {
|
|
if (k =~ "(.*)_p25") {
|
|
$["\1_iqr"] = $["\1_p75"] - $["\1_p25"]
|
|
}
|
|
}' \
|
|
data/medium
|
|
GENMD_EOF
|
|
|
|
## Computing weighted means
|
|
|
|
This might be more elegantly implemented as an option within the `stats1` verb. Meanwhile, it's expressible within the DSL:
|
|
|
|
GENMD_RUN_COMMAND
|
|
mlr --from data/medium put -q '
|
|
# Using the y field for weighting in this example
|
|
weight = $y;
|
|
|
|
# Using the a field for weighted aggregation in this example
|
|
@sumwx[$a] += weight * $i;
|
|
@sumw[$a] += weight;
|
|
|
|
@sumx[$a] += $i;
|
|
@sumn[$a] += 1;
|
|
|
|
end {
|
|
map wmean = {};
|
|
map mean = {};
|
|
for (a in @sumwx) {
|
|
wmean[a] = @sumwx[a] / @sumw[a]
|
|
}
|
|
for (a in @sumx) {
|
|
mean[a] = @sumx[a] / @sumn[a]
|
|
}
|
|
#emit wmean, "a";
|
|
#emit mean, "a";
|
|
emit (wmean, mean), "a";
|
|
}'
|
|
GENMD_EOF
|