From 077fc3702dd617c33bf03afa9704f64e068ee37f Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 26 Aug 2023 16:41:37 -0400 Subject: [PATCH] more doc-neatens for percentiles on-line help --- Makefile | 3 ++- docs/src/reference-dsl-builtin-functions.md | 26 +++++++++++-------- .../pkg/dsl/cst/builtin_function_manager.go | 15 ++++++++--- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index b5d7a6760..60c7ce578 100644 --- a/Makefile +++ b/Makefile @@ -98,7 +98,8 @@ dev: make -C docs @echo DONE -docs: +docs: build + make -C docs/src forcebuild make -C docs # ---------------------------------------------------------------- diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 34a420f0d..718e78e49 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -1086,8 +1086,7 @@ percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi" percentiles (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags. Examples: -Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort -the input before computing percentiles: +Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort the input before computing percentiles: percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 } percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" } @@ -1096,36 +1095,41 @@ Use "output_array_not_map" (or shorthand "oa") to get the outputs as an array: percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9] -Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces -,error on string inputs: +Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces error values on string inputs: percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 } -The percentiles function always sorts its inputs before computing percentiles. If you know your input -is already sorted -- see also the sort_collection function -- then computation will be faster on -large input if you pass in "array_is_sorted": +The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais": x = [6,5,9,10,4,3] - percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect + percentiles(x, [25,75], {"ais":true}) gives { "25": 5, "75": 4 } which is incorrect x = sort_collection(x) - percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct + percentiles(x, [25,75], {"ais":true}) gives { "25": 4, "75": 9 } which is correct You can also leverage this feature to compute percentiles on a sort of your choosing. For example: Non-sorted input: + x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ") x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"] - Percentiles are taken over the original positions of the words in the array -- "dogs" is last - and hence appears as p99: + + Percentiles are taken over the original positions of the words in the array -- "dogs" is last and hence appears as p99: + percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"] + With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99: + percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + With default sorting done outside percentiles, the same: + x = sort(x) # or x = sort_collection(x) x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"] percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"] percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + Now sorting by word length, "loquaciously" is longest and hence is the p99: + x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } ) x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"] percentiles(x, [50, 99], {"oa":true, "ais":true}) diff --git a/internal/pkg/dsl/cst/builtin_function_manager.go b/internal/pkg/dsl/cst/builtin_function_manager.go index ef474345a..2ea781c98 100644 --- a/internal/pkg/dsl/cst/builtin_function_manager.go +++ b/internal/pkg/dsl/cst/builtin_function_manager.go @@ -1187,28 +1187,37 @@ is normally distributed.`, ``, ` percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 }`, ``, - `The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted":`, + `The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais":`, ``, ` x = [6,5,9,10,4,3]`, - ` percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect`, + ` percentiles(x, [25,75], {"ais":true}) gives { "25": 5, "75": 4 } which is incorrect`, ` x = sort_collection(x)`, - ` percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct`, + ` percentiles(x, [25,75], {"ais":true}) gives { "25": 4, "75": 9 } which is correct`, ``, `You can also leverage this feature to compute percentiles on a sort of your choosing. For example:`, ``, ` Non-sorted input:`, + ``, ` x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ")`, ` x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"]`, + ``, ` Percentiles are taken over the original positions of the words in the array -- "dogs" is last and hence appears as p99:`, + ``, ` percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"]`, + ``, ` With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99:`, + ``, ` percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]`, + ``, ` With default sorting done outside percentiles, the same:`, + ``, ` x = sort(x) # or x = sort_collection(x)`, ` x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"]`, ` percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"]`, ` percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]`, + ``, ` Now sorting by word length, "loquaciously" is longest and hence is the p99:`, + ``, ` x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } )`, ` x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"]`, ` percentiles(x, [50, 99], {"oa":true, "ais":true})`,