From e158e1c616f9ee343b94255207c8b9347969e86e Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 4 Jun 2023 16:38:18 -0400 Subject: [PATCH 001/456] post-6.8.0 --- docs/src/manpage.md | 2 +- docs/src/manpage.txt | 2 +- internal/pkg/version/version.go | 2 +- man/manpage.txt | 2 +- man/mlr.1 | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index d1d2e3e3d..725906a36 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -50,7 +50,7 @@ MILLER(1) MILLER(1) insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.8.0. + manpage documents mlr 6.8.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index f5952e8e0..6a726e413 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -29,7 +29,7 @@ MILLER(1) MILLER(1) insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.8.0. + manpage documents mlr 6.8.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/internal/pkg/version/version.go b/internal/pkg/version/version.go index f36ee4fcf..3072110fa 100644 --- a/internal/pkg/version/version.go +++ b/internal/pkg/version/version.go @@ -4,4 +4,4 @@ package version // Nominally things like "6.0.0" for a release, then "6.0.0-dev" in between. // This makes it clear that a given build is on the main dev branch, not a // particular snapshot tag. -var STRING string = "6.8.0" +var STRING string = "6.8.0-dev" diff --git a/man/manpage.txt b/man/manpage.txt index f5952e8e0..6a726e413 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -29,7 +29,7 @@ MILLER(1) MILLER(1) insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.8.0. + manpage documents mlr 6.8.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/man/mlr.1 b/man/mlr.1 index 51ee6b081..18909ad91 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -47,7 +47,7 @@ on integer-indexed fields: if the natural data structure for the latter is the array, then Miller's natural data structure is the insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as -a special case.) This manpage documents mlr 6.8.0. +a special case.) This manpage documents mlr 6.8.0-dev. .SH "EXAMPLES" .sp From 21fb5f9cd6c57a845bc5edd934d7e38ade04a00e Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 4 Jun 2023 17:12:57 -0400 Subject: [PATCH 002/456] release 6.8.0 docs --- docs/src/release-docs.md | 1 + docs/src/release-docs.md.in | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/src/release-docs.md b/docs/src/release-docs.md index de6e3ad5a..f2542f3eb 100644 --- a/docs/src/release-docs.md +++ b/docs/src/release-docs.md @@ -23,6 +23,7 @@ If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, witho | Release | Docs | Release notes | |---------|---------------------------------------------------------------------|---------------| +6.8.0 | [Miller 6.8.0](https://miller.readthedocs.io/en/6.8.0) | [New case verb, index DSL function, and more](https://github.com/johnkerl/miller/releases/tag/v6.8.0) | 6.7.0 | [Miller 6.7.0](https://miller.readthedocs.io/en/6.7.0) | [New leftpad/rightpad DSL functions, unspace verb, and more](https://github.com/johnkerl/miller/releases/tag/v6.7.0) | 6.6.0 | [Miller 6.6.0](https://miller.readthedocs.io/en/6.6.0) | [Bugfixes and unspace verb](https://github.com/johnkerl/miller/releases/tag/v6.6.0) | 6.5.0 | [Miller 6.5.0](https://miller.readthedocs.io/en/6.5.0) | [Bugfixes and memory-reduction optimizations](https://github.com/johnkerl/miller/releases/tag/v6.5.0) | diff --git a/docs/src/release-docs.md.in b/docs/src/release-docs.md.in index 933527921..d67d59161 100644 --- a/docs/src/release-docs.md.in +++ b/docs/src/release-docs.md.in @@ -7,6 +7,7 @@ If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, witho | Release | Docs | Release notes | |---------|---------------------------------------------------------------------|---------------| +6.8.0 | [Miller 6.8.0](https://miller.readthedocs.io/en/6.8.0) | [New case verb, index DSL function, and more](https://github.com/johnkerl/miller/releases/tag/v6.8.0) | 6.7.0 | [Miller 6.7.0](https://miller.readthedocs.io/en/6.7.0) | [New leftpad/rightpad DSL functions, unspace verb, and more](https://github.com/johnkerl/miller/releases/tag/v6.7.0) | 6.6.0 | [Miller 6.6.0](https://miller.readthedocs.io/en/6.6.0) | [Bugfixes and unspace verb](https://github.com/johnkerl/miller/releases/tag/v6.6.0) | 6.5.0 | [Miller 6.5.0](https://miller.readthedocs.io/en/6.5.0) | [Bugfixes and memory-reduction optimizations](https://github.com/johnkerl/miller/releases/tag/v6.5.0) | From ab4705ab7a45c0629d41c280d02ad472ee3fd8b1 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 4 Jun 2023 17:53:42 -0400 Subject: [PATCH 003/456] Update readthedocs notes in the how-to-release page (#1308) --- docs/src/how-to-release.md | 11 +++++++++-- docs/src/how-to-release.md.in | 11 +++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/docs/src/how-to-release.md b/docs/src/how-to-release.md index bce2f83a9..4c7e97628 100644 --- a/docs/src/how-to-release.md +++ b/docs/src/how-to-release.md @@ -45,9 +45,16 @@ In this example I am using version 6.2.0 to 6.3.0; of course that will change fo * Before marking the release as public, download an executable from among the generated binaries and make sure its `mlr version` prints what you expect -- else, restart this process. * Then mark the release as public. -* Check the release-specific docs: +* Build the release-specific docs: - * Look at [https://miller.readthedocs.io](https://miller.readthedocs.io) for new-version docs, after a few minutes' propagation time. + * Note: the GitHub release above created a tag `v6.3.0` which is correct. Here we'll create a branch named `6.3.0` which is also correct. + * Create a branch `6.3.0` (not `v6.3.0`). Locally: `git checkout -b 6.3.0`, then `git push`. + * Edit `docs/mkdocs.yml`, replacing "Miller Dev Documentation" with "Miller 6.3.0 Documentation". Commit and push. + * At the Miller Read the Docs admin page, [https://readthedocs.org/projects/miller](https://readthedocs.org/projects/miller), in the Versions tab, scroll down to _Activate a version_, then activate 6.3.0. + * In the Admin tab, in Advanced Settings, set the Default Version and Default Branch both to 6.3.0. Scroll to the end of the page and poke Save. + * In the Builds tab, if they're not already building, build 6.3.0 as well as latest. + * Verify that [https://miller.readthedocs.io/en/6.3.0](https://miller.readthedocs.io/en/6.3.0) now exists. + * Verify that [https://miller.readthedocs.io/en/latest](https://miller.readthedocs.io/en/latest) (with hard page-reload) shows _Miller 6.8.0 Documentation_ in the upper left of the doc pages. * Notify: diff --git a/docs/src/how-to-release.md.in b/docs/src/how-to-release.md.in index eab84fb23..ad26704cf 100644 --- a/docs/src/how-to-release.md.in +++ b/docs/src/how-to-release.md.in @@ -29,9 +29,16 @@ In this example I am using version 6.2.0 to 6.3.0; of course that will change fo * Before marking the release as public, download an executable from among the generated binaries and make sure its `mlr version` prints what you expect -- else, restart this process. * Then mark the release as public. -* Check the release-specific docs: +* Build the release-specific docs: - * Look at [https://miller.readthedocs.io](https://miller.readthedocs.io) for new-version docs, after a few minutes' propagation time. + * Note: the GitHub release above created a tag `v6.3.0` which is correct. Here we'll create a branch named `6.3.0` which is also correct. + * Create a branch `6.3.0` (not `v6.3.0`). Locally: `git checkout -b 6.3.0`, then `git push`. + * Edit `docs/mkdocs.yml`, replacing "Miller Dev Documentation" with "Miller 6.3.0 Documentation". Commit and push. + * At the Miller Read the Docs admin page, [https://readthedocs.org/projects/miller](https://readthedocs.org/projects/miller), in the Versions tab, scroll down to _Activate a version_, then activate 6.3.0. + * In the Admin tab, in Advanced Settings, set the Default Version and Default Branch both to 6.3.0. Scroll to the end of the page and poke Save. + * In the Builds tab, if they're not already building, build 6.3.0 as well as latest. + * Verify that [https://miller.readthedocs.io/en/6.3.0](https://miller.readthedocs.io/en/6.3.0) now exists. + * Verify that [https://miller.readthedocs.io/en/latest](https://miller.readthedocs.io/en/latest) (with hard page-reload) shows _Miller 6.8.0 Documentation_ in the upper left of the doc pages. * Notify: From 4050f566fa0e5aac55f055649368c068e6815a49 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 4 Jun 2023 18:01:03 -0400 Subject: [PATCH 004/456] fix mis-spelling for head docs --- docs/src/release-docs.md | 2 +- docs/src/release-docs.md.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/release-docs.md b/docs/src/release-docs.md index f2542f3eb..868e25337 100644 --- a/docs/src/release-docs.md +++ b/docs/src/release-docs.md @@ -17,7 +17,7 @@ Quick links: # Documents for releases If your `mlr version` says something like `mlr 6.0.0-dev`, with the `-dev` suffix, you're likely building from source, or you've obtained a recent artifact from GitHub Actions -- -the page [https://miller.readthedocs.io](https://miller.readthedocs.io) contains information for the latest contributions to the [Miller repository](https://github.com/johnkerl/miller). +the page [https://miller.readthedocs.io/en/main](https://miller.readthedocs.io/en/main) contains information for the latest contributions to the [Miller repository](https://github.com/johnkerl/miller). If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, without the `-dev` suffix, you're likely using a Miller executable from a package manager -- please see below for the documentation for Miller as of the release you're using. diff --git a/docs/src/release-docs.md.in b/docs/src/release-docs.md.in index d67d59161..b98058192 100644 --- a/docs/src/release-docs.md.in +++ b/docs/src/release-docs.md.in @@ -1,7 +1,7 @@ # Documents for releases If your `mlr version` says something like `mlr 6.0.0-dev`, with the `-dev` suffix, you're likely building from source, or you've obtained a recent artifact from GitHub Actions -- -the page [https://miller.readthedocs.io](https://miller.readthedocs.io) contains information for the latest contributions to the [Miller repository](https://github.com/johnkerl/miller). +the page [https://miller.readthedocs.io/en/main](https://miller.readthedocs.io/en/main) contains information for the latest contributions to the [Miller repository](https://github.com/johnkerl/miller). If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, without the `-dev` suffix, you're likely using a Miller executable from a package manager -- please see below for the documentation for Miller as of the release you're using. From c5ceb20a4e6887e535269d5ce2b140ee12bfbd0d Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 6 Jun 2023 00:18:51 -0400 Subject: [PATCH 005/456] Fix `mlr grep` docs re OFS/OPS (#1309) * Fix `mlr grep` doc re OFS/OPS * make-dev artifacts --- docs/src/manpage.md | 16 ++++++++-------- docs/src/manpage.txt | 16 ++++++++-------- docs/src/reference-verbs.md | 14 +++++++------- internal/pkg/transformers/grep.go | 14 +++++++------- man/manpage.txt | 16 ++++++++-------- man/mlr.1 | 18 +++++++++--------- test/cases/cli-help/0001/expout | 14 +++++++------- 7 files changed, 54 insertions(+), 54 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 725906a36..3b15b2493 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -1210,13 +1210,13 @@ MILLER(1) MILLER(1) Note that "mlr filter" is more powerful, but requires you to know field names. By contrast, "mlr grep" allows you to regex-match the entire record. It does this by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using - command-line-specified ORS/OFS/OPS, and matching the resulting line against the - regex specified here. In particular, the regex is not applied to the input - stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the - regex will be matched, not against either of these lines, but against the DKVP - line "x=1,y=2,z=3". Furthermore, not all the options to system grep are - supported, and this command is intended to be merely a keystroke-saver. To get - all the features of system grep, you can do + OFS "," and OPS "=", and matching the resulting line against the regex specified + here. In particular, the regex is not applied to the input stream: if you have + CSV with header line "x,y,z" and data line "1,2,3" then the regex will be + matched, not against either of these lines, but against the DKVP line + "x=1,y=2,z=3". Furthermore, not all the options to system grep are supported, + and this command is intended to be merely a keystroke-saver. To get all the + features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." 1mgroup-by0m @@ -3354,5 +3354,5 @@ MILLER(1) MILLER(1) - 2023-06-04 MILLER(1) + 2023-06-06 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 6a726e413..8db971e79 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1189,13 +1189,13 @@ MILLER(1) MILLER(1) Note that "mlr filter" is more powerful, but requires you to know field names. By contrast, "mlr grep" allows you to regex-match the entire record. It does this by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using - command-line-specified ORS/OFS/OPS, and matching the resulting line against the - regex specified here. In particular, the regex is not applied to the input - stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the - regex will be matched, not against either of these lines, but against the DKVP - line "x=1,y=2,z=3". Furthermore, not all the options to system grep are - supported, and this command is intended to be merely a keystroke-saver. To get - all the features of system grep, you can do + OFS "," and OPS "=", and matching the resulting line against the regex specified + here. In particular, the regex is not applied to the input stream: if you have + CSV with header line "x,y,z" and data line "1,2,3" then the regex will be + matched, not against either of these lines, but against the DKVP line + "x=1,y=2,z=3". Furthermore, not all the options to system grep are supported, + and this command is intended to be merely a keystroke-saver. To get all the + features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." 1mgroup-by0m @@ -3333,4 +3333,4 @@ MILLER(1) MILLER(1) - 2023-06-04 MILLER(1) + 2023-06-06 MILLER(1) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index be11f8903..27463e333 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -1355,13 +1355,13 @@ Options: Note that "mlr filter" is more powerful, but requires you to know field names. By contrast, "mlr grep" allows you to regex-match the entire record. It does this by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using -command-line-specified ORS/OFS/OPS, and matching the resulting line against the -regex specified here. In particular, the regex is not applied to the input -stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the -regex will be matched, not against either of these lines, but against the DKVP -line "x=1,y=2,z=3". Furthermore, not all the options to system grep are -supported, and this command is intended to be merely a keystroke-saver. To get -all the features of system grep, you can do +OFS "," and OPS "=", and matching the resulting line against the regex specified +here. In particular, the regex is not applied to the input stream: if you have +CSV with header line "x,y,z" and data line "1,2,3" then the regex will be +matched, not against either of these lines, but against the DKVP line +"x=1,y=2,z=3". Furthermore, not all the options to system grep are supported, +and this command is intended to be merely a keystroke-saver. To get all the +features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." diff --git a/internal/pkg/transformers/grep.go b/internal/pkg/transformers/grep.go index 6e692364a..5f3f217fd 100644 --- a/internal/pkg/transformers/grep.go +++ b/internal/pkg/transformers/grep.go @@ -36,13 +36,13 @@ func transformerGrepUsage( fmt.Fprintf(o, `Note that "%s filter" is more powerful, but requires you to know field names. By contrast, "%s grep" allows you to regex-match the entire record. It does this by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using -command-line-specified ORS/OFS/OPS, and matching the resulting line against the -regex specified here. In particular, the regex is not applied to the input -stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the -regex will be matched, not against either of these lines, but against the DKVP -line "x=1,y=2,z=3". Furthermore, not all the options to system grep are -supported, and this command is intended to be merely a keystroke-saver. To get -all the features of system grep, you can do +OFS "," and OPS "=", and matching the resulting line against the regex specified +here. In particular, the regex is not applied to the input stream: if you have +CSV with header line "x,y,z" and data line "1,2,3" then the regex will be +matched, not against either of these lines, but against the DKVP line +"x=1,y=2,z=3". Furthermore, not all the options to system grep are supported, +and this command is intended to be merely a keystroke-saver. To get all the +features of system grep, you can do "%s --odkvp ... | grep ... | %s --idkvp ..." `, "mlr", "mlr", "mlr", "mlr") } diff --git a/man/manpage.txt b/man/manpage.txt index 6a726e413..8db971e79 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -1189,13 +1189,13 @@ MILLER(1) MILLER(1) Note that "mlr filter" is more powerful, but requires you to know field names. By contrast, "mlr grep" allows you to regex-match the entire record. It does this by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using - command-line-specified ORS/OFS/OPS, and matching the resulting line against the - regex specified here. In particular, the regex is not applied to the input - stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the - regex will be matched, not against either of these lines, but against the DKVP - line "x=1,y=2,z=3". Furthermore, not all the options to system grep are - supported, and this command is intended to be merely a keystroke-saver. To get - all the features of system grep, you can do + OFS "," and OPS "=", and matching the resulting line against the regex specified + here. In particular, the regex is not applied to the input stream: if you have + CSV with header line "x,y,z" and data line "1,2,3" then the regex will be + matched, not against either of these lines, but against the DKVP line + "x=1,y=2,z=3". Furthermore, not all the options to system grep are supported, + and this command is intended to be merely a keystroke-saver. To get all the + features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." 1mgroup-by0m @@ -3333,4 +3333,4 @@ MILLER(1) MILLER(1) - 2023-06-04 MILLER(1) + 2023-06-06 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index 18909ad91..0fa01052d 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2023-06-04 +.\" Date: 2023-06-06 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2023-06-04" "\ \&" "\ \&" +.TH "MILLER" "1" "2023-06-06" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1480,13 +1480,13 @@ Options: Note that "mlr filter" is more powerful, but requires you to know field names. By contrast, "mlr grep" allows you to regex-match the entire record. It does this by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using -command-line-specified ORS/OFS/OPS, and matching the resulting line against the -regex specified here. In particular, the regex is not applied to the input -stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the -regex will be matched, not against either of these lines, but against the DKVP -line "x=1,y=2,z=3". Furthermore, not all the options to system grep are -supported, and this command is intended to be merely a keystroke-saver. To get -all the features of system grep, you can do +OFS "," and OPS "=", and matching the resulting line against the regex specified +here. In particular, the regex is not applied to the input stream: if you have +CSV with header line "x,y,z" and data line "1,2,3" then the regex will be +matched, not against either of these lines, but against the DKVP line +"x=1,y=2,z=3". Furthermore, not all the options to system grep are supported, +and this command is intended to be merely a keystroke-saver. To get all the +features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." .fi .if n \{\ diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index 45dc38c63..a03731513 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -353,13 +353,13 @@ Options: Note that "mlr filter" is more powerful, but requires you to know field names. By contrast, "mlr grep" allows you to regex-match the entire record. It does this by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using -command-line-specified ORS/OFS/OPS, and matching the resulting line against the -regex specified here. In particular, the regex is not applied to the input -stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the -regex will be matched, not against either of these lines, but against the DKVP -line "x=1,y=2,z=3". Furthermore, not all the options to system grep are -supported, and this command is intended to be merely a keystroke-saver. To get -all the features of system grep, you can do +OFS "," and OPS "=", and matching the resulting line against the regex specified +here. In particular, the regex is not applied to the input stream: if you have +CSV with header line "x,y,z" and data line "1,2,3" then the regex will be +matched, not against either of these lines, but against the DKVP line +"x=1,y=2,z=3". Furthermore, not all the options to system grep are supported, +and this command is intended to be merely a keystroke-saver. To get all the +features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." ================================================================ From d5c03e8a8b943de788ebed7a0afee15df1e58a4c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Jun 2023 08:27:54 -0400 Subject: [PATCH 006/456] Bump actions/checkout from 3.5.2 to 3.5.3 (#1319) Bumps [actions/checkout](https://github.com/actions/checkout) from 3.5.2 to 3.5.3. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/8e5e7e5ab8b370d6c329ec480221332ada57f0ab...c85c95e3d7251135ab7dc9ce3241c5835cc595a9) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/codespell.yml | 2 +- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index e65c0dc21..9fc41f704 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -36,7 +36,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab + uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 56fac440b..f5d68e119 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -21,7 +21,7 @@ jobs: steps: # Check out the code base - name: Check out code - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab + uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 with: # Full git history is needed to get a proper list of changed files within `super-linter` fetch-depth: 0 diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index e119563ba..0faa6a934 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -15,7 +15,7 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] steps: - - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 - name: Set up Go uses: actions/setup-go@fac708d6674e30b6ba41289acaab6d4b75aa0753 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 726ba0f51..b920ff19f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -23,7 +23,7 @@ jobs: id: go - name: Check out code into the Go module directory - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab + uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 with: fetch-depth: 0 From adeab1153ba1144a2a0305db423cd28f48818f35 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Jun 2023 08:28:12 -0400 Subject: [PATCH 007/456] Bump github/codeql-action from 2.3.6 to 2.13.4 (#1318) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.3.6 to 2.13.4. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/83f0fe6c4988d98a455712a27f0255212bba9bd4...cdcdbb579706841c47f7063dda365e292e5cad7a) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 9fc41f704..c26776f08 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@83f0fe6c4988d98a455712a27f0255212bba9bd4 + uses: github/codeql-action/init@cdcdbb579706841c47f7063dda365e292e5cad7a with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@83f0fe6c4988d98a455712a27f0255212bba9bd4 + uses: github/codeql-action/autobuild@cdcdbb579706841c47f7063dda365e292e5cad7a # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@83f0fe6c4988d98a455712a27f0255212bba9bd4 + uses: github/codeql-action/analyze@cdcdbb579706841c47f7063dda365e292e5cad7a From be68f5fc902bc031caa6d9ae9e1e1808796c26ff Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 13 Jun 2023 09:40:20 -0400 Subject: [PATCH 008/456] Bump golang.org/x/term from 0.8.0 to 0.9.0 (#1321) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.8.0 to 0.9.0. - [Commits](https://github.com/golang/term/compare/v0.8.0...v0.9.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 45d3e144a..68e0489bb 100644 --- a/go.mod +++ b/go.mod @@ -25,8 +25,8 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.8.4 - golang.org/x/sys v0.8.0 - golang.org/x/term v0.8.0 + golang.org/x/sys v0.9.0 + golang.org/x/term v0.9.0 golang.org/x/text v0.9.0 ) diff --git a/go.sum b/go.sum index 8c5fe4b45..268bee40a 100644 --- a/go.sum +++ b/go.sum @@ -38,10 +38,10 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= -golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.8.0 h1:n5xxQn2i3PC0yLAbjTpNT85q/Kgzcr2gIoX9OrJUols= -golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= +golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.9.0 h1:GRRCnKYhdQrD8kfRAdQ6Zcw1P0OcELxGLKJvtjVMZ28= +golang.org/x/term v0.9.0/go.mod h1:M6DEAAIenWoTxdKrOltXcmDY3rSplQUkrvaDU5FcQyo= golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From 2086c154fd53cac6c7c6ac853fc3d6e7c404f214 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 13 Jun 2023 09:41:22 -0400 Subject: [PATCH 009/456] Bump goreleaser/goreleaser-action from 4.2.0 to 4.3.0 (#1320) Bumps [goreleaser/goreleaser-action](https://github.com/goreleaser/goreleaser-action) from 4.2.0 to 4.3.0. - [Release notes](https://github.com/goreleaser/goreleaser-action/releases) - [Commits](https://github.com/goreleaser/goreleaser-action/compare/f82d6c1c344bcacabba2c841718984797f664a6b...336e29918d653399e599bfca99fadc1d7ffbc9f7) --- updated-dependencies: - dependency-name: goreleaser/goreleaser-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b920ff19f..44b46b8b1 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -40,7 +40,7 @@ jobs: # https://goreleaser.com/ci/actions/ - name: Run GoReleaser - uses: goreleaser/goreleaser-action@f82d6c1c344bcacabba2c841718984797f664a6b + uses: goreleaser/goreleaser-action@336e29918d653399e599bfca99fadc1d7ffbc9f7 #if: startsWith(github.ref, 'refs/tags/v') with: version: latest From 4c0731d395eb82140cea981670de691c3d62ef15 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 13 Jun 2023 10:01:02 -0400 Subject: [PATCH 010/456] Bump golang.org/x/text from 0.9.0 to 0.10.0 (#1322) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.9.0 to 0.10.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.9.0...v0.10.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 68e0489bb..6ef5e3c59 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/stretchr/testify v1.8.4 golang.org/x/sys v0.9.0 golang.org/x/term v0.9.0 - golang.org/x/text v0.9.0 + golang.org/x/text v0.10.0 ) require ( diff --git a/go.sum b/go.sum index 268bee40a..aca9a90a6 100644 --- a/go.sum +++ b/go.sum @@ -42,8 +42,8 @@ golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.9.0 h1:GRRCnKYhdQrD8kfRAdQ6Zcw1P0OcELxGLKJvtjVMZ28= golang.org/x/term v0.9.0/go.mod h1:M6DEAAIenWoTxdKrOltXcmDY3rSplQUkrvaDU5FcQyo= -golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= -golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58= +golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From d72ef826fb5ecc41a4cde0d0fcb2402082b83ca1 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 24 Jun 2023 17:05:15 -0400 Subject: [PATCH 011/456] Add DSL functions for integer nanoseconds since the epoch (#1326) * DSL functions for 64-bit nano-epoch timestamps * strfntime * nsec2gmt; move sec/nsec pairs adjacent to one another * update on-line help * artifacts from `make dev` * unit-test files --- docs/src/manpage.md | 126 ++++++-- docs/src/manpage.txt | 126 ++++++-- docs/src/online-help.md | 11 + docs/src/reference-dsl-builtin-functions.md | 116 +++++++- docs/src/reference-verbs.md | 14 +- internal/pkg/bifs/datetime.go | 281 +++++++++++++++--- .../pkg/dsl/cst/builtin_function_manager.go | 175 ++++++++++- internal/pkg/lib/time.go | 94 +++++- internal/pkg/lib/time_test.go | 101 +++++++ man/manpage.txt | 126 ++++++-- man/mlr.1 | 200 +++++++++++-- .../dsl-gmt-date-time-functions/0005n/cmd | 1 + .../dsl-gmt-date-time-functions/0005n/experr | 0 .../dsl-gmt-date-time-functions/0005n/expout | 29 ++ .../dsl-gmt-date-time-functions/0005n/mlr | 1 + .../dsl-gmt-date-time-functions/0011n/cmd | 1 + .../dsl-gmt-date-time-functions/0011n/experr | 0 .../dsl-gmt-date-time-functions/0011n/expout | 29 ++ .../dsl-gmt-date-time-functions/0011n/mlr | 1 + .../0019n/0019/cmd | 1 + .../0019n/0019/experr | 0 .../0019n/0019/expout | 29 ++ .../0019n/0019/mlr | 1 + .../dsl-gmt-date-time-functions/0019n/cmd | 1 + .../dsl-gmt-date-time-functions/0019n/experr | 0 .../dsl-gmt-date-time-functions/0019n/expout | 29 ++ .../dsl-gmt-date-time-functions/0019n/mlr | 1 + .../dsl-gmt-date-time-functions/0020n/cmd | 1 + .../dsl-gmt-date-time-functions/0020n/experr | 0 .../dsl-gmt-date-time-functions/0020n/expout | 10 + .../dsl-gmt-date-time-functions/0020n/mlr | 12 + .../dsl-local-date-time-functions/0003n/cmd | 1 + .../dsl-local-date-time-functions/0003n/env | 1 + .../0003n/experr | 0 .../0003n/expout | 11 + .../dsl-local-date-time-functions/0003n/input | 10 + .../dsl-local-date-time-functions/0003n/mlr | 3 + .../dsl-local-date-time-functions/0004n/cmd | 1 + .../dsl-local-date-time-functions/0004n/env | 1 + .../0004n/experr | 0 .../0004n/expout | 11 + .../dsl-local-date-time-functions/0004n/input | 10 + .../dsl-local-date-time-functions/0004n/mlr | 3 + .../local-with-tzs-nsec/cmd | 1 + .../local-with-tzs-nsec/experr | 0 .../local-with-tzs-nsec/expout | 23 ++ .../local-with-tzs-nsec/mlr | 39 +++ .../strfntime-istanbul/cmd | 1 + .../strfntime-istanbul/experr | 0 .../strfntime-istanbul/expout | 12 + .../strfntime-sao_paulo/cmd | 1 + .../strfntime-sao_paulo/experr | 0 .../strfntime-sao_paulo/expout | 12 + .../strfntime-utc/cmd | 1 + .../strfntime-utc/experr | 0 .../strfntime-utc/expout | 12 + .../strfntime_local-istanbul/cmd | 1 + .../strfntime_local-istanbul/experr | 0 .../strfntime_local-istanbul/expout | 11 + .../strfntime_local-sao_paulo/cmd | 1 + .../strfntime_local-sao_paulo/experr | 0 .../strfntime_local-sao_paulo/expout | 11 + .../strfntime_local-utc/cmd | 1 + .../strfntime_local-utc/experr | 0 .../strfntime_local-utc/expout | 11 + .../strpntime-istanbul/cmd | 1 + .../strpntime-istanbul/experr | 0 .../strpntime-istanbul/expout | 9 + .../strpntime-j/cmd | 1 + .../strpntime-j/experr | 0 .../strpntime-j/expout | 2 + .../strpntime-j/mlr | 4 + .../strpntime-sao_paulo/cmd | 1 + .../strpntime-sao_paulo/experr | 0 .../strpntime-sao_paulo/expout | 9 + .../strpntime-utc/cmd | 1 + .../strpntime-utc/experr | 0 .../strpntime-utc/expout | 9 + .../strpntime-z/cmd | 1 + .../strpntime-z/experr | 0 .../strpntime-z/expout | 15 + .../strpntime-z/mlr | 17 ++ .../strpntime_local-istanbul/cmd | 1 + .../strpntime_local-istanbul/experr | 0 .../strpntime_local-istanbul/expout | 4 + .../strpntime_local-sao_paulo/cmd | 1 + .../strpntime_local-sao_paulo/experr | 0 .../strpntime_local-sao_paulo/expout | 4 + .../strpntime_local-utc/cmd | 1 + .../strpntime_local-utc/experr | 0 .../strpntime_local-utc/expout | 4 + test/cases/dsl-sec2gmt/0001n/cmd | 1 + test/cases/dsl-sec2gmt/0001n/experr | 0 test/cases/dsl-sec2gmt/0001n/expout | 11 + test/cases/dsl-sec2gmt/0002n/cmd | 1 + test/cases/dsl-sec2gmt/0002n/experr | 0 test/cases/dsl-sec2gmt/0002n/expout | 11 + test/cases/dsl-sec2gmt/0003n/cmd | 1 + test/cases/dsl-sec2gmt/0003n/experr | 0 test/cases/dsl-sec2gmt/0003n/expout | 11 + test/cases/dsl-sec2gmt/0004n/cmd | 1 + test/cases/dsl-sec2gmt/0004n/experr | 0 test/cases/dsl-sec2gmt/0004n/expout | 11 + test/cases/help/0014/expout | 18 ++ test/cases/help/0016/expout | 8 + test/cases/repl-help/0014/expout | 18 ++ test/cases/repl-help/0016/expout | 8 + test/input/gmt2nsec | 29 ++ test/input/strfntime-tz.mlr | 17 ++ test/input/strfntime_local-tz.mlr | 17 ++ test/input/strpntime-tz.mlr | 14 + test/input/strpntime_local-tz.mlr | 9 + 112 files changed, 1805 insertions(+), 173 deletions(-) create mode 100644 internal/pkg/lib/time_test.go create mode 100644 test/cases/dsl-gmt-date-time-functions/0005n/cmd create mode 100644 test/cases/dsl-gmt-date-time-functions/0005n/experr create mode 100644 test/cases/dsl-gmt-date-time-functions/0005n/expout create mode 100644 test/cases/dsl-gmt-date-time-functions/0005n/mlr create mode 100644 test/cases/dsl-gmt-date-time-functions/0011n/cmd create mode 100644 test/cases/dsl-gmt-date-time-functions/0011n/experr create mode 100644 test/cases/dsl-gmt-date-time-functions/0011n/expout create mode 100644 test/cases/dsl-gmt-date-time-functions/0011n/mlr create mode 100644 test/cases/dsl-gmt-date-time-functions/0019n/0019/cmd create mode 100644 test/cases/dsl-gmt-date-time-functions/0019n/0019/experr create mode 100644 test/cases/dsl-gmt-date-time-functions/0019n/0019/expout create mode 100644 test/cases/dsl-gmt-date-time-functions/0019n/0019/mlr create mode 100644 test/cases/dsl-gmt-date-time-functions/0019n/cmd create mode 100644 test/cases/dsl-gmt-date-time-functions/0019n/experr create mode 100644 test/cases/dsl-gmt-date-time-functions/0019n/expout create mode 100644 test/cases/dsl-gmt-date-time-functions/0019n/mlr create mode 100644 test/cases/dsl-gmt-date-time-functions/0020n/cmd create mode 100644 test/cases/dsl-gmt-date-time-functions/0020n/experr create mode 100644 test/cases/dsl-gmt-date-time-functions/0020n/expout create mode 100644 test/cases/dsl-gmt-date-time-functions/0020n/mlr create mode 100644 test/cases/dsl-local-date-time-functions/0003n/cmd create mode 100644 test/cases/dsl-local-date-time-functions/0003n/env create mode 100644 test/cases/dsl-local-date-time-functions/0003n/experr create mode 100644 test/cases/dsl-local-date-time-functions/0003n/expout create mode 100644 test/cases/dsl-local-date-time-functions/0003n/input create mode 100644 test/cases/dsl-local-date-time-functions/0003n/mlr create mode 100644 test/cases/dsl-local-date-time-functions/0004n/cmd create mode 100644 test/cases/dsl-local-date-time-functions/0004n/env create mode 100644 test/cases/dsl-local-date-time-functions/0004n/experr create mode 100644 test/cases/dsl-local-date-time-functions/0004n/expout create mode 100644 test/cases/dsl-local-date-time-functions/0004n/input create mode 100644 test/cases/dsl-local-date-time-functions/0004n/mlr create mode 100644 test/cases/dsl-local-date-time-functions/local-with-tzs-nsec/cmd create mode 100644 test/cases/dsl-local-date-time-functions/local-with-tzs-nsec/experr create mode 100644 test/cases/dsl-local-date-time-functions/local-with-tzs-nsec/expout create mode 100644 test/cases/dsl-local-date-time-functions/local-with-tzs-nsec/mlr create mode 100644 test/cases/dsl-local-date-time-functions/strfntime-istanbul/cmd create mode 100644 test/cases/dsl-local-date-time-functions/strfntime-istanbul/experr create mode 100644 test/cases/dsl-local-date-time-functions/strfntime-istanbul/expout create mode 100644 test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/cmd create mode 100644 test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/experr create mode 100644 test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/expout create mode 100644 test/cases/dsl-local-date-time-functions/strfntime-utc/cmd create mode 100644 test/cases/dsl-local-date-time-functions/strfntime-utc/experr create mode 100644 test/cases/dsl-local-date-time-functions/strfntime-utc/expout create mode 100644 test/cases/dsl-local-date-time-functions/strfntime_local-istanbul/cmd create mode 100644 test/cases/dsl-local-date-time-functions/strfntime_local-istanbul/experr create mode 100644 test/cases/dsl-local-date-time-functions/strfntime_local-istanbul/expout create mode 100644 test/cases/dsl-local-date-time-functions/strfntime_local-sao_paulo/cmd create mode 100644 test/cases/dsl-local-date-time-functions/strfntime_local-sao_paulo/experr create mode 100644 test/cases/dsl-local-date-time-functions/strfntime_local-sao_paulo/expout create mode 100644 test/cases/dsl-local-date-time-functions/strfntime_local-utc/cmd create mode 100644 test/cases/dsl-local-date-time-functions/strfntime_local-utc/experr create mode 100644 test/cases/dsl-local-date-time-functions/strfntime_local-utc/expout create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-istanbul/cmd create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-istanbul/experr create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-istanbul/expout create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-j/cmd create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-j/experr create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-j/expout create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-j/mlr create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-sao_paulo/cmd create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-sao_paulo/experr create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-sao_paulo/expout create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-utc/cmd create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-utc/experr create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-utc/expout create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-z/cmd create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-z/experr create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-z/expout create mode 100644 test/cases/dsl-local-date-time-functions/strpntime-z/mlr create mode 100644 test/cases/dsl-local-date-time-functions/strpntime_local-istanbul/cmd create mode 100644 test/cases/dsl-local-date-time-functions/strpntime_local-istanbul/experr create mode 100644 test/cases/dsl-local-date-time-functions/strpntime_local-istanbul/expout create mode 100644 test/cases/dsl-local-date-time-functions/strpntime_local-sao_paulo/cmd create mode 100644 test/cases/dsl-local-date-time-functions/strpntime_local-sao_paulo/experr create mode 100644 test/cases/dsl-local-date-time-functions/strpntime_local-sao_paulo/expout create mode 100644 test/cases/dsl-local-date-time-functions/strpntime_local-utc/cmd create mode 100644 test/cases/dsl-local-date-time-functions/strpntime_local-utc/experr create mode 100644 test/cases/dsl-local-date-time-functions/strpntime_local-utc/expout create mode 100644 test/cases/dsl-sec2gmt/0001n/cmd create mode 100644 test/cases/dsl-sec2gmt/0001n/experr create mode 100644 test/cases/dsl-sec2gmt/0001n/expout create mode 100644 test/cases/dsl-sec2gmt/0002n/cmd create mode 100644 test/cases/dsl-sec2gmt/0002n/experr create mode 100644 test/cases/dsl-sec2gmt/0002n/expout create mode 100644 test/cases/dsl-sec2gmt/0003n/cmd create mode 100644 test/cases/dsl-sec2gmt/0003n/experr create mode 100644 test/cases/dsl-sec2gmt/0003n/expout create mode 100644 test/cases/dsl-sec2gmt/0004n/cmd create mode 100644 test/cases/dsl-sec2gmt/0004n/experr create mode 100644 test/cases/dsl-sec2gmt/0004n/expout create mode 100644 test/input/gmt2nsec create mode 100644 test/input/strfntime-tz.mlr create mode 100644 test/input/strfntime_local-tz.mlr create mode 100644 test/input/strpntime-tz.mlr create mode 100644 test/input/strpntime_local-tz.mlr diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 3b15b2493..2f884002f 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -211,21 +211,23 @@ MILLER(1) MILLER(1) capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh depth dhms2fsec dhms2sec erf erfc every exec exp expm1 flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys get_values - gmt2localtime gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec hostname index - int invqnorm is_absent is_array is_bool is_boolean is_empty is_empty_map - is_error is_float is_int is_map is_nan is_nonempty_map is_not_array - is_not_empty is_not_map is_not_null is_null is_numeric is_present is_string - joink joinkv joinv json_parse json_stringify latin1_to_utf8 leafcount leftpad - length localtime2gmt localtime2sec log log10 log1p logifit lstrip madd mapdiff - mapexcept mapselect mapsum max md5 mexp min mmul msub os pow qnorm reduce - regextract regextract_or_else rightpad round roundm rstrip sec2dhms sec2gmt - sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 sha512 - sin sinh sort splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub - strftime strftime_local string strip strlen strptime strptime_local sub substr - substr0 substr1 system systime systimeint tan tanh tolower toupper truncate - typeof unflatten unformat unformatx uptime urand urand32 urandelement urandint - urandrange utf8_to_latin1 version ! != !=~ % & && * ** + - . .* .+ .- ./ / // - < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec + hostname index int invqnorm is_absent is_array is_bool is_boolean is_empty + is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map + is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present + is_string joink joinkv joinv json_parse json_stringify latin1_to_utf8 + leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 + log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 mexp min + mmul msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime os pow qnorm + reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms + sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 + sha512 sin sinh sort splita splitax splitkv splitkvx splitnv splitnvx sqrt + ssub strfntime strfntime_local strftime strftime_local string strip strlen + strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 + sysntime system systime systimeint tan tanh tolower toupper truncate typeof + unflatten unformat unformatx upntime uptime urand urand32 urandelement + urandint urandrange utf8_to_latin1 version ! != !=~ % & && * ** + - . .* .+ .- + ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -1210,13 +1212,13 @@ MILLER(1) MILLER(1) Note that "mlr filter" is more powerful, but requires you to know field names. By contrast, "mlr grep" allows you to regex-match the entire record. It does this by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using - OFS "," and OPS "=", and matching the resulting line against the regex specified - here. In particular, the regex is not applied to the input stream: if you have - CSV with header line "x,y,z" and data line "1,2,3" then the regex will be - matched, not against either of these lines, but against the DKVP line - "x=1,y=2,z=3". Furthermore, not all the options to system grep are supported, - and this command is intended to be merely a keystroke-saver. To get all the - features of system grep, you can do + command-line-specified ORS/OFS/OPS, and matching the resulting line against the + regex specified here. In particular, the regex is not applied to the input + stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the + regex will be matched, not against either of these lines, but against the DKVP + line "x=1,y=2,z=3". Furthermore, not all the options to system grep are + supported, and this command is intended to be merely a keystroke-saver. To get + all the features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." 1mgroup-by0m @@ -2353,6 +2355,11 @@ MILLER(1) MILLER(1) gmt2localtime("1999-12-31T22:00:00Z") = "2000-01-01 00:00:00" with TZ="Asia/Istanbul" gmt2localtime("1999-12-31T22:00:00Z", "Asia/Istanbul") = "2000-01-01 00:00:00" + 1mgmt2nsec0m + (class=time #args=1) Parses GMT timestamp as integer nanoseconds since the epoch. + Example: + gmt2nsec("2001-02-03T04:05:06Z") = 981173106000000000 + 1mgmt2sec0m (class=time #args=1) Parses GMT timestamp as integer seconds since the epoch. Example: @@ -2518,6 +2525,12 @@ MILLER(1) MILLER(1) localtime2gmt("2000-01-01 00:00:00") = "1999-12-31T22:00:00Z" with TZ="Asia/Istanbul" localtime2gmt("2000-01-01 00:00:00", "Asia/Istanbul") = "1999-12-31T22:00:00Z" + 1mlocaltime2nsec0m + (class=time #args=1,2) Parses local timestamp as integer nanoseconds since the epoch. Consults $TZ environment variable, unless second argument is supplied. + Examples: + localtime2nsec("2001-02-03 04:05:06") = 981165906000000000 with TZ="Asia/Istanbul" + localtime2nsec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906000000000" + 1mlocaltime2sec0m (class=time #args=1,2) Parses local timestamp as integer seconds since the epoch. Consults $TZ environment variable, unless second argument is supplied. Examples: @@ -2572,6 +2585,32 @@ MILLER(1) MILLER(1) 1mmsub0m (class=arithmetic #args=3) a - b mod m (integers) + 1mnsec2gmt0m + (class=time #args=1,2) Formats integer nanoseconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. + Examples: + nsec2gmt(1234567890000000000) = "2009-02-13T23:31:30Z" + nsec2gmt(1234567890123456789) = "2009-02-13T23:31:30Z" + nsec2gmt(1234567890123456789, 6) = "2009-02-13T23:31:30.123456Z" + + 1mnsec2gmtdate0m + (class=time #args=1) Formats integer nanoseconds since epoch as GMT timestamp with year-month-date. Leaves non-numbers as-is. + Example: + sec2gmtdate(1440768801700000000) = "2015-08-28". + + 1mnsec2localdate0m + (class=time #args=1,2) Formats integer nanoseconds since epoch as local timestamp with year-month-date. Leaves non-numbers as-is. Consults $TZ environment variable unless second argument is supplied. + Examples: + nsec2localdate(1440768801700000000) = "2015-08-28" with TZ="Asia/Istanbul" + nsec2localdate(1440768801700000000, "Asia/Istanbul") = "2015-08-28" + + 1mnsec2localtime0m + (class=time #args=1,2,3) Formats integer nanoseconds since epoch as local timestamp. Consults $TZ environment variable unless third argument is supplied. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part + Examples: + nsec2localtime(1234567890000000000) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul" + nsec2localtime(1234567890123456789) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul" + nsec2localtime(1234567890123456789, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul" + nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456" + 1mos0m (class=system #args=0) Returns the operating-system name as a string. @@ -2725,6 +2764,21 @@ MILLER(1) MILLER(1) Example: ssub("abc.def", ".", "X") gives "abcXdef" + 1mstrfntime0m + (class=time #args=2) Formats integer nanoseconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local. + Examples: + strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%SZ") = "2015-08-28T13:33:21Z" + strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%3SZ") = "2015-08-28T13:33:21.123Z" + strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%6SZ") = "2015-08-28T13:33:21.123456Z" + + 1mstrfntime_local0m + (class=time #args=2,3) Like strfntime but consults the $TZ environment variable to get local time zone. + Examples: + strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%S %z") = "2015-08-28 16:33:21 +0300" with TZ="Asia/Istanbul" + strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%3S %z") = "2015-08-28 16:33:21.123 +0300" with TZ="Asia/Istanbul" + strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%3S %z", "Asia/Istanbul") = "2015-08-28 16:33:21.123 +0300" + strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%9S %z", "Asia/Istanbul") = "2015-08-28 16:33:21.123456789 +0300" + 1mstrftime0m (class=time #args=2) Formats seconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local. Examples: @@ -2747,16 +2801,32 @@ MILLER(1) MILLER(1) 1mstrlen0m (class=string #args=1) String length. + 1mstrpntime0m + (class=time #args=2) strpntime: Parses timestamp as integer nanoseconds since the epoch. See also strpntime_local. + Examples: + strpntime("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801000000000 + strpntime("2015-08-28T13:33:21.345Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801345000000 + strpntime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z") = 14400000000000 + strpntime("1970-01-01 00:00:00 +0200", "%Y-%m-%d %H:%M:%S %z") = -7200000000000 + + 1mstrpntime_local0m + (class=time #args=2,3) Like strpntime but consults the $TZ environment variable to get local time zone. + Examples: + strpntime_local("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440758001000000000 with TZ="Asia/Istanbul" + strpntime_local("2015-08-28T13:33:21.345Z","%Y-%m-%dT%H:%M:%SZ") = 1440758001345000000 with TZ="Asia/Istanbul" + strpntime_local("2015-08-28 13:33:21", "%Y-%m-%d %H:%M:%S") = 1440758001000000000 with TZ="Asia/Istanbul" + strpntime_local("2015-08-28 13:33:21", "%Y-%m-%d %H:%M:%S", "Asia/Istanbul") = 1440758001000000000 + 1mstrptime0m (class=time #args=2) strptime: Parses timestamp as floating-point seconds since the epoch. See also strptime_local. Examples: strptime("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801.000000 strptime("2015-08-28T13:33:21.345Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801.345000 strptime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z") = 14400 - strptime("1970-01-01 00:00:00 EET", "%Y-%m-%d %H:%M:%S %Z") = -7200 + strptime("1970-01-01 00:00:00 +0200", "%Y-%m-%d %H:%M:%S %z") = -7200 1mstrptime_local0m - (class=time #args=2,3) Like strftime but consults the $TZ environment variable to get local time zone. + (class=time #args=2,3) Like strptime but consults the $TZ environment variable to get local time zone. Examples: strptime_local("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440758001 with TZ="Asia/Istanbul" strptime_local("2015-08-28T13:33:21.345Z","%Y-%m-%dT%H:%M:%SZ") = 1440758001.345 with TZ="Asia/Istanbul" @@ -2781,6 +2851,9 @@ MILLER(1) MILLER(1) 1msubstr10m (class=string #args=3) substr1(s,m,n) gives substring of s from 1-up position m to n inclusive. Negative indices -len .. -1 alias to 1 .. len. See also substr and substr0. + 1msysntime0m + (class=time #args=0) Returns the system time in 64-bit nanoseconds since the epoch. + 1msystem0m (class=system #args=1) Run command string, yielding its stdout minus final carriage return. @@ -2827,6 +2900,9 @@ MILLER(1) MILLER(1) unformatx("{}h{}m{}s", "3h47m22s") gives ["3", "47", "22"]. is_error(unformatx("{}h{}m{}s", "3:47:22")) gives true. + 1mupntime0m + (class=time #args=0) Returns the time in 64-bit nanoseconds since the current Miller program was started. + 1muptime0m (class=time #args=0) Returns the time in floating-point seconds since the current Miller program was started. @@ -3354,5 +3430,5 @@ MILLER(1) MILLER(1) - 2023-06-06 MILLER(1) + 2023-06-24 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 8db971e79..abb828010 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -190,21 +190,23 @@ MILLER(1) MILLER(1) capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh depth dhms2fsec dhms2sec erf erfc every exec exp expm1 flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys get_values - gmt2localtime gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec hostname index - int invqnorm is_absent is_array is_bool is_boolean is_empty is_empty_map - is_error is_float is_int is_map is_nan is_nonempty_map is_not_array - is_not_empty is_not_map is_not_null is_null is_numeric is_present is_string - joink joinkv joinv json_parse json_stringify latin1_to_utf8 leafcount leftpad - length localtime2gmt localtime2sec log log10 log1p logifit lstrip madd mapdiff - mapexcept mapselect mapsum max md5 mexp min mmul msub os pow qnorm reduce - regextract regextract_or_else rightpad round roundm rstrip sec2dhms sec2gmt - sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 sha512 - sin sinh sort splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub - strftime strftime_local string strip strlen strptime strptime_local sub substr - substr0 substr1 system systime systimeint tan tanh tolower toupper truncate - typeof unflatten unformat unformatx uptime urand urand32 urandelement urandint - urandrange utf8_to_latin1 version ! != !=~ % & && * ** + - . .* .+ .- ./ / // - < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec + hostname index int invqnorm is_absent is_array is_bool is_boolean is_empty + is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map + is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present + is_string joink joinkv joinv json_parse json_stringify latin1_to_utf8 + leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 + log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 mexp min + mmul msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime os pow qnorm + reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms + sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 + sha512 sin sinh sort splita splitax splitkv splitkvx splitnv splitnvx sqrt + ssub strfntime strfntime_local strftime strftime_local string strip strlen + strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 + sysntime system systime systimeint tan tanh tolower toupper truncate typeof + unflatten unformat unformatx upntime uptime urand urand32 urandelement + urandint urandrange utf8_to_latin1 version ! != !=~ % & && * ** + - . .* .+ .- + ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -1189,13 +1191,13 @@ MILLER(1) MILLER(1) Note that "mlr filter" is more powerful, but requires you to know field names. By contrast, "mlr grep" allows you to regex-match the entire record. It does this by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using - OFS "," and OPS "=", and matching the resulting line against the regex specified - here. In particular, the regex is not applied to the input stream: if you have - CSV with header line "x,y,z" and data line "1,2,3" then the regex will be - matched, not against either of these lines, but against the DKVP line - "x=1,y=2,z=3". Furthermore, not all the options to system grep are supported, - and this command is intended to be merely a keystroke-saver. To get all the - features of system grep, you can do + command-line-specified ORS/OFS/OPS, and matching the resulting line against the + regex specified here. In particular, the regex is not applied to the input + stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the + regex will be matched, not against either of these lines, but against the DKVP + line "x=1,y=2,z=3". Furthermore, not all the options to system grep are + supported, and this command is intended to be merely a keystroke-saver. To get + all the features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." 1mgroup-by0m @@ -2332,6 +2334,11 @@ MILLER(1) MILLER(1) gmt2localtime("1999-12-31T22:00:00Z") = "2000-01-01 00:00:00" with TZ="Asia/Istanbul" gmt2localtime("1999-12-31T22:00:00Z", "Asia/Istanbul") = "2000-01-01 00:00:00" + 1mgmt2nsec0m + (class=time #args=1) Parses GMT timestamp as integer nanoseconds since the epoch. + Example: + gmt2nsec("2001-02-03T04:05:06Z") = 981173106000000000 + 1mgmt2sec0m (class=time #args=1) Parses GMT timestamp as integer seconds since the epoch. Example: @@ -2497,6 +2504,12 @@ MILLER(1) MILLER(1) localtime2gmt("2000-01-01 00:00:00") = "1999-12-31T22:00:00Z" with TZ="Asia/Istanbul" localtime2gmt("2000-01-01 00:00:00", "Asia/Istanbul") = "1999-12-31T22:00:00Z" + 1mlocaltime2nsec0m + (class=time #args=1,2) Parses local timestamp as integer nanoseconds since the epoch. Consults $TZ environment variable, unless second argument is supplied. + Examples: + localtime2nsec("2001-02-03 04:05:06") = 981165906000000000 with TZ="Asia/Istanbul" + localtime2nsec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906000000000" + 1mlocaltime2sec0m (class=time #args=1,2) Parses local timestamp as integer seconds since the epoch. Consults $TZ environment variable, unless second argument is supplied. Examples: @@ -2551,6 +2564,32 @@ MILLER(1) MILLER(1) 1mmsub0m (class=arithmetic #args=3) a - b mod m (integers) + 1mnsec2gmt0m + (class=time #args=1,2) Formats integer nanoseconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. + Examples: + nsec2gmt(1234567890000000000) = "2009-02-13T23:31:30Z" + nsec2gmt(1234567890123456789) = "2009-02-13T23:31:30Z" + nsec2gmt(1234567890123456789, 6) = "2009-02-13T23:31:30.123456Z" + + 1mnsec2gmtdate0m + (class=time #args=1) Formats integer nanoseconds since epoch as GMT timestamp with year-month-date. Leaves non-numbers as-is. + Example: + sec2gmtdate(1440768801700000000) = "2015-08-28". + + 1mnsec2localdate0m + (class=time #args=1,2) Formats integer nanoseconds since epoch as local timestamp with year-month-date. Leaves non-numbers as-is. Consults $TZ environment variable unless second argument is supplied. + Examples: + nsec2localdate(1440768801700000000) = "2015-08-28" with TZ="Asia/Istanbul" + nsec2localdate(1440768801700000000, "Asia/Istanbul") = "2015-08-28" + + 1mnsec2localtime0m + (class=time #args=1,2,3) Formats integer nanoseconds since epoch as local timestamp. Consults $TZ environment variable unless third argument is supplied. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part + Examples: + nsec2localtime(1234567890000000000) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul" + nsec2localtime(1234567890123456789) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul" + nsec2localtime(1234567890123456789, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul" + nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456" + 1mos0m (class=system #args=0) Returns the operating-system name as a string. @@ -2704,6 +2743,21 @@ MILLER(1) MILLER(1) Example: ssub("abc.def", ".", "X") gives "abcXdef" + 1mstrfntime0m + (class=time #args=2) Formats integer nanoseconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local. + Examples: + strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%SZ") = "2015-08-28T13:33:21Z" + strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%3SZ") = "2015-08-28T13:33:21.123Z" + strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%6SZ") = "2015-08-28T13:33:21.123456Z" + + 1mstrfntime_local0m + (class=time #args=2,3) Like strfntime but consults the $TZ environment variable to get local time zone. + Examples: + strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%S %z") = "2015-08-28 16:33:21 +0300" with TZ="Asia/Istanbul" + strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%3S %z") = "2015-08-28 16:33:21.123 +0300" with TZ="Asia/Istanbul" + strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%3S %z", "Asia/Istanbul") = "2015-08-28 16:33:21.123 +0300" + strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%9S %z", "Asia/Istanbul") = "2015-08-28 16:33:21.123456789 +0300" + 1mstrftime0m (class=time #args=2) Formats seconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local. Examples: @@ -2726,16 +2780,32 @@ MILLER(1) MILLER(1) 1mstrlen0m (class=string #args=1) String length. + 1mstrpntime0m + (class=time #args=2) strpntime: Parses timestamp as integer nanoseconds since the epoch. See also strpntime_local. + Examples: + strpntime("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801000000000 + strpntime("2015-08-28T13:33:21.345Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801345000000 + strpntime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z") = 14400000000000 + strpntime("1970-01-01 00:00:00 +0200", "%Y-%m-%d %H:%M:%S %z") = -7200000000000 + + 1mstrpntime_local0m + (class=time #args=2,3) Like strpntime but consults the $TZ environment variable to get local time zone. + Examples: + strpntime_local("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440758001000000000 with TZ="Asia/Istanbul" + strpntime_local("2015-08-28T13:33:21.345Z","%Y-%m-%dT%H:%M:%SZ") = 1440758001345000000 with TZ="Asia/Istanbul" + strpntime_local("2015-08-28 13:33:21", "%Y-%m-%d %H:%M:%S") = 1440758001000000000 with TZ="Asia/Istanbul" + strpntime_local("2015-08-28 13:33:21", "%Y-%m-%d %H:%M:%S", "Asia/Istanbul") = 1440758001000000000 + 1mstrptime0m (class=time #args=2) strptime: Parses timestamp as floating-point seconds since the epoch. See also strptime_local. Examples: strptime("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801.000000 strptime("2015-08-28T13:33:21.345Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801.345000 strptime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z") = 14400 - strptime("1970-01-01 00:00:00 EET", "%Y-%m-%d %H:%M:%S %Z") = -7200 + strptime("1970-01-01 00:00:00 +0200", "%Y-%m-%d %H:%M:%S %z") = -7200 1mstrptime_local0m - (class=time #args=2,3) Like strftime but consults the $TZ environment variable to get local time zone. + (class=time #args=2,3) Like strptime but consults the $TZ environment variable to get local time zone. Examples: strptime_local("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440758001 with TZ="Asia/Istanbul" strptime_local("2015-08-28T13:33:21.345Z","%Y-%m-%dT%H:%M:%SZ") = 1440758001.345 with TZ="Asia/Istanbul" @@ -2760,6 +2830,9 @@ MILLER(1) MILLER(1) 1msubstr10m (class=string #args=3) substr1(s,m,n) gives substring of s from 1-up position m to n inclusive. Negative indices -len .. -1 alias to 1 .. len. See also substr and substr0. + 1msysntime0m + (class=time #args=0) Returns the system time in 64-bit nanoseconds since the epoch. + 1msystem0m (class=system #args=1) Run command string, yielding its stdout minus final carriage return. @@ -2806,6 +2879,9 @@ MILLER(1) MILLER(1) unformatx("{}h{}m{}s", "3h47m22s") gives ["3", "47", "22"]. is_error(unformatx("{}h{}m{}s", "3:47:22")) gives true. + 1mupntime0m + (class=time #args=0) Returns the time in 64-bit nanoseconds since the current Miller program was started. + 1muptime0m (class=time #args=0) Returns the time in floating-point seconds since the current Miller program was started. @@ -3333,4 +3409,4 @@ MILLER(1) MILLER(1) - 2023-06-06 MILLER(1) + 2023-06-24 MILLER(1) diff --git a/docs/src/online-help.md b/docs/src/online-help.md index fc6cda292..8318c4f0a 100644 --- a/docs/src/online-help.md +++ b/docs/src/online-help.md @@ -143,6 +143,9 @@ gmt2localtime (class=time #args=1,2) Convert from a GMT-time string to a local- Examples: gmt2localtime("1999-12-31T22:00:00Z") = "2000-01-01 00:00:00" with TZ="Asia/Istanbul" gmt2localtime("1999-12-31T22:00:00Z", "Asia/Istanbul") = "2000-01-01 00:00:00" +gmt2nsec (class=time #args=1) Parses GMT timestamp as integer nanoseconds since the epoch. +Example: +gmt2nsec("2001-02-03T04:05:06Z") = 981173106000000000 gmt2sec (class=time #args=1) Parses GMT timestamp as integer seconds since the epoch. Example: gmt2sec("2001-02-03T04:05:06Z") = 981173106 @@ -150,6 +153,14 @@ localtime2gmt (class=time #args=1,2) Convert from a local-time string to a GMT- Examples: localtime2gmt("2000-01-01 00:00:00") = "1999-12-31T22:00:00Z" with TZ="Asia/Istanbul" localtime2gmt("2000-01-01 00:00:00", "Asia/Istanbul") = "1999-12-31T22:00:00Z" +nsec2gmt (class=time #args=1,2) Formats integer nanoseconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. +Examples: +nsec2gmt(1234567890000000000) = "2009-02-13T23:31:30Z" +nsec2gmt(1234567890123456789) = "2009-02-13T23:31:30Z" +nsec2gmt(1234567890123456789, 6) = "2009-02-13T23:31:30.123456Z" +nsec2gmtdate (class=time #args=1) Formats integer nanoseconds since epoch as GMT timestamp with year-month-date. Leaves non-numbers as-is. +Example: +sec2gmtdate(1440768801700000000) = "2015-08-28". sec2gmt (class=time #args=1,2) Formats seconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. Examples: sec2gmt(1234567890) = "2009-02-13T23:31:30Z" diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index ae8f5d517..7bdb1d5bf 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -76,7 +76,7 @@ is 2. Unary operators such as `!` and `~` show argument-count of 1; the ternary * [**Math functions**](#math-functions): [abs](#abs), [acos](#acos), [acosh](#acosh), [asin](#asin), [asinh](#asinh), [atan](#atan), [atan2](#atan2), [atanh](#atanh), [cbrt](#cbrt), [ceil](#ceil), [cos](#cos), [cosh](#cosh), [erf](#erf), [erfc](#erfc), [exp](#exp), [expm1](#expm1), [floor](#floor), [invqnorm](#invqnorm), [log](#log), [log10](#log10), [log1p](#log1p), [logifit](#logifit), [max](#max), [min](#min), [qnorm](#qnorm), [round](#round), [roundm](#roundm), [sgn](#sgn), [sin](#sin), [sinh](#sinh), [sqrt](#sqrt), [tan](#tan), [tanh](#tanh), [urand](#urand), [urand32](#urand32), [urandelement](#urandelement), [urandint](#urandint), [urandrange](#urandrange). * [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). * [**System functions**](#system-functions): [exec](#exec), [hostname](#hostname), [os](#os), [system](#system), [version](#version). -* [**Time functions**](#time-functions): [dhms2fsec](#dhms2fsec), [dhms2sec](#dhms2sec), [fsec2dhms](#fsec2dhms), [fsec2hms](#fsec2hms), [gmt2localtime](#gmt2localtime), [gmt2sec](#gmt2sec), [hms2fsec](#hms2fsec), [hms2sec](#hms2sec), [localtime2gmt](#localtime2gmt), [localtime2sec](#localtime2sec), [sec2dhms](#sec2dhms), [sec2gmt](#sec2gmt), [sec2gmtdate](#sec2gmtdate), [sec2hms](#sec2hms), [sec2localdate](#sec2localdate), [sec2localtime](#sec2localtime), [strftime](#strftime), [strftime_local](#strftime_local), [strptime](#strptime), [strptime_local](#strptime_local), [systime](#systime), [systimeint](#systimeint), [uptime](#uptime). +* [**Time functions**](#time-functions): [dhms2fsec](#dhms2fsec), [dhms2sec](#dhms2sec), [fsec2dhms](#fsec2dhms), [fsec2hms](#fsec2hms), [gmt2localtime](#gmt2localtime), [gmt2nsec](#gmt2nsec), [gmt2sec](#gmt2sec), [hms2fsec](#hms2fsec), [hms2sec](#hms2sec), [localtime2gmt](#localtime2gmt), [localtime2nsec](#localtime2nsec), [localtime2sec](#localtime2sec), [nsec2gmt](#nsec2gmt), [nsec2gmtdate](#nsec2gmtdate), [nsec2localdate](#nsec2localdate), [nsec2localtime](#nsec2localtime), [sec2dhms](#sec2dhms), [sec2gmt](#sec2gmt), [sec2gmtdate](#sec2gmtdate), [sec2hms](#sec2hms), [sec2localdate](#sec2localdate), [sec2localtime](#sec2localtime), [strfntime](#strfntime), [strfntime_local](#strfntime_local), [strftime](#strftime), [strftime_local](#strftime_local), [strpntime](#strpntime), [strpntime_local](#strpntime_local), [strptime](#strptime), [strptime_local](#strptime_local), [sysntime](#sysntime), [systime](#systime), [systimeint](#systimeint), [upntime](#upntime), [uptime](#uptime). * [**Typing functions**](#typing-functions): [asserting_absent](#asserting_absent), [asserting_array](#asserting_array), [asserting_bool](#asserting_bool), [asserting_boolean](#asserting_boolean), [asserting_empty](#asserting_empty), [asserting_empty_map](#asserting_empty_map), [asserting_error](#asserting_error), [asserting_float](#asserting_float), [asserting_int](#asserting_int), [asserting_map](#asserting_map), [asserting_nonempty_map](#asserting_nonempty_map), [asserting_not_array](#asserting_not_array), [asserting_not_empty](#asserting_not_empty), [asserting_not_map](#asserting_not_map), [asserting_not_null](#asserting_not_null), [asserting_null](#asserting_null), [asserting_numeric](#asserting_numeric), [asserting_present](#asserting_present), [asserting_string](#asserting_string), [is_absent](#is_absent), [is_array](#is_array), [is_bool](#is_bool), [is_boolean](#is_boolean), [is_empty](#is_empty), [is_empty_map](#is_empty_map), [is_error](#is_error), [is_float](#is_float), [is_int](#is_int), [is_map](#is_map), [is_nan](#is_nan), [is_nonempty_map](#is_nonempty_map), [is_not_array](#is_not_array), [is_not_empty](#is_not_empty), [is_not_map](#is_not_map), [is_not_null](#is_not_null), [is_null](#is_null), [is_numeric](#is_numeric), [is_present](#is_present), [is_string](#is_string), [typeof](#typeof). ## Arithmetic functions @@ -1267,6 +1267,14 @@ gmt2localtime("1999-12-31T22:00:00Z", "Asia/Istanbul") = "2000-01-01 00:00:00" +### gmt2nsec +
+gmt2nsec  (class=time #args=1) Parses GMT timestamp as integer nanoseconds since the epoch.
+Example:
+gmt2nsec("2001-02-03T04:05:06Z") = 981173106000000000
+
+ + ### gmt2sec
 gmt2sec  (class=time #args=1) Parses GMT timestamp as integer seconds since the epoch.
@@ -1296,6 +1304,15 @@ localtime2gmt("2000-01-01 00:00:00", "Asia/Istanbul") = "1999-12-31T22:00:00Z"
 
+### localtime2nsec +
+localtime2nsec  (class=time #args=1,2) Parses local timestamp as integer nanoseconds since the epoch. Consults $TZ environment variable, unless second argument is supplied.
+Examples:
+localtime2nsec("2001-02-03 04:05:06") = 981165906000000000 with TZ="Asia/Istanbul"
+localtime2nsec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906000000000"
+
+ + ### localtime2sec
 localtime2sec  (class=time #args=1,2) Parses local timestamp as integer seconds since the epoch. Consults $TZ environment variable, unless second argument is supplied.
@@ -1305,6 +1322,44 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906"
 
+### nsec2gmt +
+nsec2gmt  (class=time #args=1,2) Formats integer nanoseconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part.
+Examples:
+nsec2gmt(1234567890000000000)    = "2009-02-13T23:31:30Z"
+nsec2gmt(1234567890123456789)    = "2009-02-13T23:31:30Z"
+nsec2gmt(1234567890123456789, 6) = "2009-02-13T23:31:30.123456Z"
+
+ + +### nsec2gmtdate +
+nsec2gmtdate  (class=time #args=1) Formats integer nanoseconds since epoch as GMT timestamp with year-month-date. Leaves non-numbers as-is.
+Example:
+sec2gmtdate(1440768801700000000) = "2015-08-28".
+
+ + +### nsec2localdate +
+nsec2localdate  (class=time #args=1,2) Formats integer nanoseconds since epoch as local timestamp with year-month-date. Leaves non-numbers as-is. Consults $TZ environment variable unless second argument is supplied.
+Examples:
+nsec2localdate(1440768801700000000) = "2015-08-28" with TZ="Asia/Istanbul"
+nsec2localdate(1440768801700000000, "Asia/Istanbul") = "2015-08-28"
+
+ + +### nsec2localtime +
+nsec2localtime  (class=time #args=1,2,3) Formats integer nanoseconds since epoch as local timestamp. Consults $TZ environment variable unless third argument is supplied. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part
+Examples:
+nsec2localtime(1234567890000000000)    = "2009-02-14 01:31:30"        with TZ="Asia/Istanbul"
+nsec2localtime(1234567890123456789)    = "2009-02-14 01:31:30"        with TZ="Asia/Istanbul"
+nsec2localtime(1234567890123456789, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul"
+nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456"
+
+ + ### sec2dhms
 sec2dhms  (class=time #args=1) Formats integer seconds as in sec2dhms(500000) = "5d18h53m20s"
@@ -1355,6 +1410,27 @@ sec2localtime(1234567890.123456, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.1234
 
+### strfntime +
+strfntime  (class=time #args=2) Formats integer nanoseconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local.
+Examples:
+strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%SZ")  = "2015-08-28T13:33:21Z"
+strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%3SZ") = "2015-08-28T13:33:21.123Z"
+strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%6SZ") = "2015-08-28T13:33:21.123456Z"
+
+ + +### strfntime_local +
+strfntime_local  (class=time #args=2,3) Like strfntime but consults the $TZ environment variable to get local time zone.
+Examples:
+strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%S %z")  = "2015-08-28 16:33:21 +0300" with TZ="Asia/Istanbul"
+strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%3S %z") = "2015-08-28 16:33:21.123 +0300" with TZ="Asia/Istanbul"
+strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%3S %z", "Asia/Istanbul") = "2015-08-28 16:33:21.123 +0300"
+strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%9S %z", "Asia/Istanbul") = "2015-08-28 16:33:21.123456789 +0300"
+
+ + ### strftime
 strftime  (class=time #args=2) Formats seconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local.
@@ -1374,6 +1450,28 @@ strftime_local(1440768801.7, "%Y-%m-%d %H:%M:%3S %z", "Asia/Istanbul") = "2015-0
 
+### strpntime +
+strpntime  (class=time #args=2) strpntime: Parses timestamp as integer nanoseconds since the epoch. See also strpntime_local.
+Examples:
+strpntime("2015-08-28T13:33:21Z",      "%Y-%m-%dT%H:%M:%SZ")   = 1440768801000000000
+strpntime("2015-08-28T13:33:21.345Z",  "%Y-%m-%dT%H:%M:%SZ")   = 1440768801345000000
+strpntime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z") = 14400000000000
+strpntime("1970-01-01 00:00:00 +0200", "%Y-%m-%d %H:%M:%S %z") = -7200000000000
+
+ + +### strpntime_local +
+strpntime_local  (class=time #args=2,3) Like strpntime but consults the $TZ environment variable to get local time zone.
+Examples:
+strpntime_local("2015-08-28T13:33:21Z",    "%Y-%m-%dT%H:%M:%SZ") = 1440758001000000000 with TZ="Asia/Istanbul"
+strpntime_local("2015-08-28T13:33:21.345Z","%Y-%m-%dT%H:%M:%SZ") = 1440758001345000000 with TZ="Asia/Istanbul"
+strpntime_local("2015-08-28 13:33:21",     "%Y-%m-%d %H:%M:%S")  = 1440758001000000000 with TZ="Asia/Istanbul"
+strpntime_local("2015-08-28 13:33:21",     "%Y-%m-%d %H:%M:%S", "Asia/Istanbul") = 1440758001000000000
+
+ + ### strptime
 strptime  (class=time #args=2) strptime: Parses timestamp as floating-point seconds since the epoch. See also strptime_local.
@@ -1381,13 +1479,13 @@ Examples:
 strptime("2015-08-28T13:33:21Z",      "%Y-%m-%dT%H:%M:%SZ")   = 1440768801.000000
 strptime("2015-08-28T13:33:21.345Z",  "%Y-%m-%dT%H:%M:%SZ")   = 1440768801.345000
 strptime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z") = 14400
-strptime("1970-01-01 00:00:00 EET",   "%Y-%m-%d %H:%M:%S %Z") = -7200
+strptime("1970-01-01 00:00:00 +0200", "%Y-%m-%d %H:%M:%S %z") = -7200
 
### strptime_local
-strptime_local  (class=time #args=2,3) Like strftime but consults the $TZ environment variable to get local time zone.
+strptime_local  (class=time #args=2,3) Like strptime but consults the $TZ environment variable to get local time zone.
 Examples:
 strptime_local("2015-08-28T13:33:21Z",    "%Y-%m-%dT%H:%M:%SZ") = 1440758001     with TZ="Asia/Istanbul"
 strptime_local("2015-08-28T13:33:21.345Z","%Y-%m-%dT%H:%M:%SZ") = 1440758001.345 with TZ="Asia/Istanbul"
@@ -1396,6 +1494,12 @@ strptime_local("2015-08-28 13:33:21",     "%Y-%m-%d %H:%M:%S", "Asia/Istanbul")
 
+### sysntime +
+sysntime  (class=time #args=0) Returns the system time in 64-bit nanoseconds since the epoch.
+
+ + ### systime
 systime  (class=time #args=0) Returns the system time in floating-point seconds since the epoch.
@@ -1408,6 +1512,12 @@ systimeint  (class=time #args=0) Returns the system time in integer seconds sinc
 
+### upntime +
+upntime  (class=time #args=0) Returns the time in 64-bit nanoseconds since the current Miller program was started.
+
+ + ### uptime
 uptime  (class=time #args=0) Returns the time in floating-point seconds since the current Miller program was started.
diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md
index 27463e333..be11f8903 100644
--- a/docs/src/reference-verbs.md
+++ b/docs/src/reference-verbs.md
@@ -1355,13 +1355,13 @@ Options:
 Note that "mlr filter" is more powerful, but requires you to know field names.
 By contrast, "mlr grep" allows you to regex-match the entire record. It does this
 by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using
-OFS "," and OPS "=", and matching the resulting line against the regex specified
-here. In particular, the regex is not applied to the input stream: if you have
-CSV with header line "x,y,z" and data line "1,2,3" then the regex will be
-matched, not against either of these lines, but against the DKVP line
-"x=1,y=2,z=3".  Furthermore, not all the options to system grep are supported,
-and this command is intended to be merely a keystroke-saver. To get all the
-features of system grep, you can do
+command-line-specified ORS/OFS/OPS, and matching the resulting line against the
+regex specified here. In particular, the regex is not applied to the input
+stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the
+regex will be matched, not against either of these lines, but against the DKVP
+line "x=1,y=2,z=3".  Furthermore, not all the options to system grep are
+supported, and this command is intended to be merely a keystroke-saver. To get
+all the features of system grep, you can do
   "mlr --odkvp ... | grep ... | mlr --idkvp ..."
 
diff --git a/internal/pkg/bifs/datetime.go b/internal/pkg/bifs/datetime.go index d389556ee..47e908b16 100644 --- a/internal/pkg/bifs/datetime.go +++ b/internal/pkg/bifs/datetime.go @@ -28,16 +28,27 @@ func BIF_systimeint() *mlrval.Mlrval { return mlrval.FromInt(time.Now().Unix()) } +func BIF_sysntime() *mlrval.Mlrval { + return mlrval.FromInt(time.Now().UnixNano()) +} + var startTime float64 +var startNTime int64 func init() { startTime = float64(time.Now().UnixNano()) / 1.0e9 + startNTime = time.Now().UnixNano() } func BIF_uptime() *mlrval.Mlrval { return mlrval.FromFloat( float64(time.Now().UnixNano())/1.0e9 - startTime, ) } +func BIF_upntime() *mlrval.Mlrval { + return mlrval.FromInt( + time.Now().UnixNano() - startNTime, + ) +} // ================================================================ @@ -46,75 +57,125 @@ func BIF_sec2gmt_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !isNumeric { return input1 } - numDecimalPlaces := 0 - return mlrval.FromString(lib.Sec2GMT(floatValue, numDecimalPlaces)) } +func BIF_nsec2gmt_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { + intValue, ok := input1.GetIntValue() + if !ok { + return mlrval.ERROR + } + numDecimalPlaces := 0 + return mlrval.FromString(lib.Nsec2GMT(intValue, numDecimalPlaces)) +} + func BIF_sec2gmt_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { floatValue, isNumeric := input1.GetNumericToFloatValue() if !isNumeric { return input1 } - numDecimalPlaces, isInt := input2.GetIntValue() if !isInt { return mlrval.ERROR } - return mlrval.FromString(lib.Sec2GMT(floatValue, int(numDecimalPlaces))) } +func BIF_nsec2gmt_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + intValue, ok := input1.GetIntValue() + if !ok { + return input1 + } + numDecimalPlaces, isInt := input2.GetIntValue() + if !isInt { + return mlrval.ERROR + } + return mlrval.FromString(lib.Nsec2GMT(intValue, int(numDecimalPlaces))) +} + func BIF_sec2localtime_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { floatValue, isNumeric := input1.GetNumericToFloatValue() if !isNumeric { return input1 } - numDecimalPlaces := 0 - return mlrval.FromString(lib.Sec2LocalTime(floatValue, numDecimalPlaces)) } +func BIF_nsec2localtime_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { + intValue, ok := input1.GetIntValue() + if !ok { + return input1 + } + numDecimalPlaces := 0 + return mlrval.FromString(lib.Nsec2LocalTime(intValue, numDecimalPlaces)) +} + func BIF_sec2localtime_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { floatValue, isNumeric := input1.GetNumericToFloatValue() if !isNumeric { return input1 } - numDecimalPlaces, isInt := input2.GetIntValue() if !isInt { return mlrval.ERROR } - return mlrval.FromString(lib.Sec2LocalTime(floatValue, int(numDecimalPlaces))) } +func BIF_nsec2localtime_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + intValue, ok := input1.GetIntValue() + if !ok { + return input1 + } + numDecimalPlaces, isInt := input2.GetIntValue() + if !isInt { + return mlrval.ERROR + } + return mlrval.FromString(lib.Nsec2LocalTime(intValue, int(numDecimalPlaces))) +} + func BIF_sec2localtime_ternary(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { floatValue, isNumeric := input1.GetNumericToFloatValue() if !isNumeric { return input1 } - numDecimalPlaces, isInt := input2.GetIntValue() if !isInt { return mlrval.ERROR } - locationString, isString := input3.GetStringValue() if !isString { return mlrval.ERROR } - location, err := time.LoadLocation(locationString) if err != nil { return mlrval.ERROR } - return mlrval.FromString(lib.Sec2LocationTime(floatValue, int(numDecimalPlaces), location)) } +func BIF_nsec2localtime_ternary(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { + intValue, isNumeric := input1.GetIntValue() + if !isNumeric { + return input1 + } + numDecimalPlaces, isInt := input2.GetIntValue() + if !isInt { + return mlrval.ERROR + } + locationString, isString := input3.GetStringValue() + if !isString { + return mlrval.ERROR + } + location, err := time.LoadLocation(locationString) + if err != nil { + return mlrval.ERROR + } + return mlrval.FromString(lib.Nsec2LocationTime(intValue, int(numDecimalPlaces), location)) +} + func BIF_sec2gmtdate(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsNumeric() { return input1 @@ -122,6 +183,13 @@ func BIF_sec2gmtdate(input1 *mlrval.Mlrval) *mlrval.Mlrval { return BIF_strftime(input1, ptr_YMD_FORMAT) } +func BIF_nsec2gmtdate(input1 *mlrval.Mlrval) *mlrval.Mlrval { + if !input1.IsNumeric() { + return input1 + } + return BIF_strfntime(input1, ptr_YMD_FORMAT) +} + func BIF_sec2localdate_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsNumeric() { return input1 @@ -129,6 +197,13 @@ func BIF_sec2localdate_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { return BIF_strftime_local_binary(input1, ptr_YMD_FORMAT) } +func BIF_nsec2localdate_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { + if !input1.IsNumeric() { + return input1 + } + return BIF_strfntime_local_binary(input1, ptr_YMD_FORMAT) +} + func BIF_sec2localdate_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsNumeric() { return input1 @@ -136,34 +211,40 @@ func BIF_sec2localdate_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return BIF_strftime_local_ternary(input1, ptr_YMD_FORMAT, input2) } -// ---------------------------------------------------------------- +func BIF_nsec2localdate_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + if !input1.IsNumeric() { + return input1 + } + return BIF_strfntime_local_ternary(input1, ptr_YMD_FORMAT, input2) +} +// ---------------------------------------------------------------- func BIF_localtime2gmt_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsString() { return mlrval.ERROR } - return BIF_sec2gmt_unary(BIF_localtime2sec_unary(input1)) + return BIF_nsec2gmt_unary(BIF_localtime2nsec_unary(input1)) } func BIF_localtime2gmt_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsString() { return mlrval.ERROR } - return BIF_sec2gmt_unary(BIF_localtime2sec_binary(input1, input2)) + return BIF_nsec2gmt_unary(BIF_localtime2nsec_binary(input1, input2)) } func BIF_gmt2localtime_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsString() { return mlrval.ERROR } - return BIF_sec2localtime_unary(BIF_gmt2sec(input1)) + return BIF_nsec2localtime_unary(BIF_gmt2nsec(input1)) } func BIF_gmt2localtime_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsString() { return mlrval.ERROR } - return BIF_sec2localtime_ternary(BIF_gmt2sec(input1), mlrval.FromInt(0), input2) + return BIF_nsec2localtime_ternary(BIF_gmt2nsec(input1), mlrval.FromInt(0), input2) } // ================================================================ @@ -176,10 +257,18 @@ func BIF_strftime(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return strftimeHelper(input1, input2, false, nil) } +func BIF_strfntime(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return strfntimeHelper(input1, input2, false, nil) +} + func BIF_strftime_local_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return strftimeHelper(input1, input2, true, nil) } +func BIF_strfntime_local_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return strfntimeHelper(input1, input2, true, nil) +} + func BIF_strftime_local_ternary(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { locationString, isString := input3.GetStringValue() if !isString { @@ -194,6 +283,18 @@ func BIF_strftime_local_ternary(input1, input2, input3 *mlrval.Mlrval) *mlrval.M return strftimeHelper(input1, input2, true, location) } +func BIF_strfntime_local_ternary(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { + locationString, isString := input3.GetStringValue() + if !isString { + return mlrval.ERROR + } + location, err := time.LoadLocation(locationString) + if err != nil { + return mlrval.ERROR + } + return strfntimeHelper(input1, input2, true, location) +} + func strftimeHelper(input1, input2 *mlrval.Mlrval, doLocal bool, location *time.Location) *mlrval.Mlrval { if input1.IsVoid() { return input1 @@ -236,6 +337,48 @@ func strftimeHelper(input1, input2 *mlrval.Mlrval, doLocal bool, location *time. return mlrval.FromString(outputString) } +func strfntimeHelper(input1, input2 *mlrval.Mlrval, doLocal bool, location *time.Location) *mlrval.Mlrval { + if input1.IsVoid() { + return input1 + } + epochNanoseconds, ok := input1.GetIntValue() + if !ok { + return mlrval.ERROR + } + if !input2.IsString() { + return mlrval.ERROR + } + + // Convert argument1 from float seconds since the epoch to a Go time. + var inputTime time.Time + if doLocal { + if location != nil { + inputTime = lib.EpochNanosecondsToLocationTime(epochNanoseconds, location) + } else { + inputTime = lib.EpochNanosecondsToLocalTime(epochNanoseconds) + } + } else { + inputTime = lib.EpochNanosecondsToGMT(epochNanoseconds) + } + + // Convert argument 2 to a strfntime format string. + // + // Miller fractional-second formats are like "%6S", and were so in the C + // implementation. However, in the strfntime package we're using in the Go + // port, extension-formats are only a single byte so we need to rewrite + // them to "%6". + formatString := extensionRegex.ReplaceAllString(input2.AcquireStringValue(), "$1") + + formatter, err := strftime.New(formatString, strftimeExtensions) + if err != nil { + return mlrval.ERROR + } + + outputString := formatter.FormatString(inputTime) + + return mlrval.FromString(outputString) +} + // ---------------------------------------------------------------- // This is support for %1S .. %9S in format strings, using github.com/lestrrat-go/strftime. @@ -301,6 +444,14 @@ func init() { // Argument 1 is formatted date string like "2021-03-04 02:59:50". // Argument 2 is format string like "%Y-%m-%d %H:%M:%S". func BIF_strptime(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return bif_strptime_unary_aux(input1, input2, false, false) +} + +func BIF_strpntime(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return bif_strptime_unary_aux(input1, input2, false, true) +} + +func bif_strptime_unary_aux(input1, input2 *mlrval.Mlrval, doLocal, produceNanoseconds bool) *mlrval.Mlrval { if !input1.IsString() { return mlrval.ERROR } @@ -310,15 +461,53 @@ func BIF_strptime(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { timeString := input1.AcquireStringValue() formatString := input2.AcquireStringValue() - t, err := strptime.Parse(timeString, formatString) + var t time.Time + var err error + if doLocal { + t, err = strptime.ParseLocal(timeString, formatString) + } else { + t, err = strptime.Parse(timeString, formatString) + } if err != nil { return mlrval.ERROR } - return mlrval.FromFloat(float64(t.UnixNano()) / 1.0e9) + if produceNanoseconds { + return mlrval.FromInt(t.UnixNano()) + } else { + return mlrval.FromFloat(float64(t.UnixNano()) / 1.0e9) + } } +// Argument 1 is formatted date string like "2021-03-04T02:59:50Z". +func BIF_gmt2sec(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return bif_strptime_unary_aux(input1, ptr_ISO8601_TIME_FORMAT, false, false) +} + +// Argument 1 is formatted date string like "2021-03-04T02:59:50Z". +func BIF_gmt2nsec(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return bif_strptime_unary_aux(input1, ptr_ISO8601_TIME_FORMAT, false, true) +} + +func BIF_localtime2sec_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return bif_strptime_unary_aux(input1, ptr_ISO8601_LOCAL_TIME_FORMAT, true, false) +} + +func BIF_localtime2nsec_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return bif_strptime_unary_aux(input1, ptr_ISO8601_LOCAL_TIME_FORMAT, true, true) +} + +// ---------------------------------------------------------------- + func BIF_strptime_local_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return bif_strptime_binary_aux(input1, input2, true, false) +} + +func BIF_strpntime_local_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return bif_strptime_binary_aux(input1, input2, true, true) +} + +func bif_strptime_binary_aux(input1, input2 *mlrval.Mlrval, doLocal, produceNanoseconds bool) *mlrval.Mlrval { if !input1.IsString() { return mlrval.ERROR } @@ -328,15 +517,43 @@ func BIF_strptime_local_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { timeString := input1.AcquireStringValue() formatString := input2.AcquireStringValue() - t, err := strptime.ParseLocal(timeString, formatString) + var t time.Time + var err error + if doLocal { + t, err = strptime.ParseLocal(timeString, formatString) + } else { + t, err = strptime.Parse(timeString, formatString) + } if err != nil { return mlrval.ERROR } - return mlrval.FromFloat(float64(t.UnixNano()) / 1.0e9) + if produceNanoseconds { + return mlrval.FromInt(t.UnixNano()) + } else { + return mlrval.FromFloat(float64(t.UnixNano()) / 1.0e9) + } } +// ---------------------------------------------------------------- + func BIF_strptime_local_ternary(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { + return bif_strptime_local_ternary_aux(input1, input2, input3, false) +} + +func BIF_strpntime_local_ternary(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { + return bif_strptime_local_ternary_aux(input1, input2, input3, true) +} + +func BIF_localtime2sec_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return bif_strptime_local_ternary_aux(input1, ptr_ISO8601_LOCAL_TIME_FORMAT, input2, false) +} + +func BIF_localtime2nsec_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return bif_strptime_local_ternary_aux(input1, ptr_ISO8601_LOCAL_TIME_FORMAT, input2, true) +} + +func bif_strptime_local_ternary_aux(input1, input2, input3 *mlrval.Mlrval, produceNanoseconds bool) *mlrval.Mlrval { if !input1.IsString() { return mlrval.ERROR } @@ -356,26 +573,14 @@ func BIF_strptime_local_ternary(input1, input2, input3 *mlrval.Mlrval) *mlrval.M return mlrval.ERROR } - // TODO: use location - t, err := strptime.ParseLocation(timeString, formatString, location) if err != nil { return mlrval.ERROR } - return mlrval.FromFloat(float64(t.UnixNano()) / 1.0e9) -} - -// ================================================================ -// Argument 1 is formatted date string like "2021-03-04T02:59:50Z". -func BIF_gmt2sec(input1 *mlrval.Mlrval) *mlrval.Mlrval { - return BIF_strptime(input1, ptr_ISO8601_TIME_FORMAT) -} - -func BIF_localtime2sec_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { - return BIF_strptime_local_binary(input1, ptr_ISO8601_LOCAL_TIME_FORMAT) -} - -func BIF_localtime2sec_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { - return BIF_strptime_local_ternary(input1, ptr_ISO8601_LOCAL_TIME_FORMAT, input2) + if produceNanoseconds { + return mlrval.FromInt(t.UnixNano()) + } else { + return mlrval.FromFloat(float64(t.UnixNano()) / 1.0e9) + } } diff --git a/internal/pkg/dsl/cst/builtin_function_manager.go b/internal/pkg/dsl/cst/builtin_function_manager.go index 6f616baa4..876fcdb29 100644 --- a/internal/pkg/dsl/cst/builtin_function_manager.go +++ b/internal/pkg/dsl/cst/builtin_function_manager.go @@ -971,6 +971,16 @@ is normally distributed.`, unaryFunc: bifs.BIF_gmt2sec, }, + { + name: "gmt2nsec", + class: FUNC_CLASS_TIME, + help: `Parses GMT timestamp as integer nanoseconds since the epoch.`, + examples: []string{ + `gmt2nsec("2001-02-03T04:05:06Z") = 981173106000000000`, + }, + unaryFunc: bifs.BIF_gmt2nsec, + }, + { name: "localtime2sec", class: FUNC_CLASS_TIME, @@ -980,12 +990,25 @@ unless second argument is supplied.`, `localtime2sec("2001-02-03 04:05:06") = 981165906 with TZ="Asia/Istanbul"`, `localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906"`, }, - // TODO: help-string unaryFunc: bifs.BIF_localtime2sec_unary, binaryFunc: bifs.BIF_localtime2sec_binary, hasMultipleArities: true, }, + { + name: "localtime2nsec", + class: FUNC_CLASS_TIME, + help: `Parses local timestamp as integer nanoseconds since the epoch. Consults $TZ environment variable, +unless second argument is supplied.`, + examples: []string{ + `localtime2nsec("2001-02-03 04:05:06") = 981165906000000000 with TZ="Asia/Istanbul"`, + `localtime2nsec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906000000000"`, + }, + unaryFunc: bifs.BIF_localtime2nsec_unary, + binaryFunc: bifs.BIF_localtime2nsec_binary, + hasMultipleArities: true, + }, + { name: "sec2gmt", class: FUNC_CLASS_TIME, @@ -1001,6 +1024,21 @@ argument n, includes n decimal places for the seconds part.`, hasMultipleArities: true, }, + { + name: "nsec2gmt", + class: FUNC_CLASS_TIME, + help: `Formats integer nanoseconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer +argument n, includes n decimal places for the seconds part.`, + examples: []string{ + `nsec2gmt(1234567890000000000) = "2009-02-13T23:31:30Z"`, + `nsec2gmt(1234567890123456789) = "2009-02-13T23:31:30Z"`, + `nsec2gmt(1234567890123456789, 6) = "2009-02-13T23:31:30.123456Z"`, + }, + unaryFunc: bifs.BIF_nsec2gmt_unary, + binaryFunc: bifs.BIF_nsec2gmt_binary, + hasMultipleArities: true, + }, + { name: "sec2localtime", class: FUNC_CLASS_TIME, @@ -1019,6 +1057,24 @@ includes n decimal places for the seconds part`, hasMultipleArities: true, }, + { + name: "nsec2localtime", + class: FUNC_CLASS_TIME, + help: `Formats integer nanoseconds since epoch as local timestamp. Consults $TZ +environment variable unless third argument is supplied. Leaves non-numbers as-is. With second integer argument n, +includes n decimal places for the seconds part`, + examples: []string{ + `nsec2localtime(1234567890000000000) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul"`, + `nsec2localtime(1234567890123456789) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul"`, + `nsec2localtime(1234567890123456789, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul"`, + `nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456"`, + }, + unaryFunc: bifs.BIF_nsec2localtime_unary, + binaryFunc: bifs.BIF_nsec2localtime_binary, + ternaryFunc: bifs.BIF_nsec2localtime_ternary, + hasMultipleArities: true, + }, + { name: "sec2gmtdate", class: FUNC_CLASS_TIME, @@ -1030,6 +1086,17 @@ Leaves non-numbers as-is.`, unaryFunc: bifs.BIF_sec2gmtdate, }, + { + name: "nsec2gmtdate", + class: FUNC_CLASS_TIME, + help: `Formats integer nanoseconds since epoch as GMT timestamp with year-month-date. +Leaves non-numbers as-is.`, + examples: []string{ + `sec2gmtdate(1440768801700000000) = "2015-08-28".`, + }, + unaryFunc: bifs.BIF_nsec2gmtdate, + }, + { name: "sec2localdate", class: FUNC_CLASS_TIME, @@ -1044,6 +1111,20 @@ Leaves non-numbers as-is. Consults $TZ environment variable unless second argume hasMultipleArities: true, }, + { + name: "nsec2localdate", + class: FUNC_CLASS_TIME, + help: `Formats integer nanoseconds since epoch as local timestamp with year-month-date. +Leaves non-numbers as-is. Consults $TZ environment variable unless second argument is supplied.`, + examples: []string{ + `nsec2localdate(1440768801700000000) = "2015-08-28" with TZ="Asia/Istanbul"`, + `nsec2localdate(1440768801700000000, "Asia/Istanbul") = "2015-08-28"`, + }, + unaryFunc: bifs.BIF_nsec2localdate_unary, + binaryFunc: bifs.BIF_nsec2localdate_binary, + hasMultipleArities: true, + }, + { name: "localtime2gmt", class: FUNC_CLASS_TIME, @@ -1088,17 +1169,19 @@ See also strftime_local.`, }, { - name: "strptime", + name: "strfntime", class: FUNC_CLASS_TIME, - help: `strptime: Parses timestamp as floating-point seconds since the epoch. See also strptime_local.`, + help: `Formats integer nanoseconds since the epoch as timestamp. Format strings are as at +https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" +through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no +decimal places.) See also ` + lib.DOC_URL + `/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). +See also strftime_local.`, examples: []string{ - `strptime("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801.000000`, - `strptime("2015-08-28T13:33:21.345Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801.345000`, - `strptime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z") = 14400`, - `strptime("1970-01-01 00:00:00 EET", "%Y-%m-%d %H:%M:%S %Z") = -7200`, + `strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%SZ") = "2015-08-28T13:33:21Z"`, + `strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%3SZ") = "2015-08-28T13:33:21.123Z"`, + `strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%6SZ") = "2015-08-28T13:33:21.123456Z"`, }, - - binaryFunc: bifs.BIF_strptime, + binaryFunc: bifs.BIF_strfntime, }, { @@ -1115,10 +1198,51 @@ See also strftime_local.`, hasMultipleArities: true, }, + { + name: "strfntime_local", + class: FUNC_CLASS_TIME, + help: `Like strfntime but consults the $TZ environment variable to get local time zone.`, + examples: []string{ + `strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%S %z") = "2015-08-28 16:33:21 +0300" with TZ="Asia/Istanbul"`, + `strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%3S %z") = "2015-08-28 16:33:21.123 +0300" with TZ="Asia/Istanbul"`, + `strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%3S %z", "Asia/Istanbul") = "2015-08-28 16:33:21.123 +0300"`, + `strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%9S %z", "Asia/Istanbul") = "2015-08-28 16:33:21.123456789 +0300"`, + }, + binaryFunc: bifs.BIF_strfntime_local_binary, + ternaryFunc: bifs.BIF_strfntime_local_ternary, + hasMultipleArities: true, + }, + + { + name: "strptime", + class: FUNC_CLASS_TIME, + help: `strptime: Parses timestamp as floating-point seconds since the epoch. See also strptime_local.`, + examples: []string{ + `strptime("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801.000000`, + `strptime("2015-08-28T13:33:21.345Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801.345000`, + `strptime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z") = 14400`, + `strptime("1970-01-01 00:00:00 +0200", "%Y-%m-%d %H:%M:%S %z") = -7200`, + }, + binaryFunc: bifs.BIF_strptime, + }, + + { + name: "strpntime", + class: FUNC_CLASS_TIME, + help: `strpntime: Parses timestamp as integer nanoseconds since the epoch. See also strpntime_local.`, + examples: []string{ + `strpntime("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801000000000`, + `strpntime("2015-08-28T13:33:21.345Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801345000000`, + `strpntime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z") = 14400000000000`, + `strpntime("1970-01-01 00:00:00 +0200", "%Y-%m-%d %H:%M:%S %z") = -7200000000000`, + }, + binaryFunc: bifs.BIF_strpntime, + }, + { name: "strptime_local", class: FUNC_CLASS_TIME, - help: `Like strftime but consults the $TZ environment variable to get local time zone.`, + help: `Like strptime but consults the $TZ environment variable to get local time zone.`, examples: []string{ `strptime_local("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440758001 with TZ="Asia/Istanbul"`, `strptime_local("2015-08-28T13:33:21.345Z","%Y-%m-%dT%H:%M:%SZ") = 1440758001.345 with TZ="Asia/Istanbul"`, @@ -1132,6 +1256,23 @@ See also strftime_local.`, hasMultipleArities: true, }, + { + name: "strpntime_local", + class: FUNC_CLASS_TIME, + help: `Like strpntime but consults the $TZ environment variable to get local time zone.`, + examples: []string{ + `strpntime_local("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440758001000000000 with TZ="Asia/Istanbul"`, + `strpntime_local("2015-08-28T13:33:21.345Z","%Y-%m-%dT%H:%M:%SZ") = 1440758001345000000 with TZ="Asia/Istanbul"`, + `strpntime_local("2015-08-28 13:33:21", "%Y-%m-%d %H:%M:%S") = 1440758001000000000 with TZ="Asia/Istanbul"`, + `strpntime_local("2015-08-28 13:33:21", "%Y-%m-%d %H:%M:%S", "Asia/Istanbul") = 1440758001000000000`, + // TODO: fix parse error on decimal part + //`strpntime_local("2015-08-28 13:33:21.345","%Y-%m-%d %H:%M:%S") = 1440758001.345`, + }, + binaryFunc: bifs.BIF_strpntime_local_binary, + ternaryFunc: bifs.BIF_strpntime_local_ternary, + hasMultipleArities: true, + }, + { name: "dhms2fsec", class: FUNC_CLASS_TIME, @@ -1195,6 +1336,13 @@ See also strftime_local.`, zaryFunc: bifs.BIF_systime, }, + { + name: "sysntime", + class: FUNC_CLASS_TIME, + help: "Returns the system time in 64-bit nanoseconds since the epoch.", + zaryFunc: bifs.BIF_sysntime, + }, + { name: "systimeint", class: FUNC_CLASS_TIME, @@ -1209,6 +1357,13 @@ See also strftime_local.`, zaryFunc: bifs.BIF_uptime, }, + { + name: "upntime", + class: FUNC_CLASS_TIME, + help: "Returns the time in 64-bit nanoseconds since the current Miller program was started.", + zaryFunc: bifs.BIF_upntime, + }, + // ---------------------------------------------------------------- // FUNC_CLASS_TYPING diff --git a/internal/pkg/lib/time.go b/internal/pkg/lib/time.go index 94c31c327..4fa6818c5 100644 --- a/internal/pkg/lib/time.go +++ b/internal/pkg/lib/time.go @@ -2,7 +2,6 @@ package lib import ( "fmt" - "math" "os" "time" ) @@ -26,33 +25,67 @@ func SetTZFromEnv() error { } func Sec2GMT(epochSeconds float64, numDecimalPlaces int) string { - return sec2Time(epochSeconds, numDecimalPlaces, false, nil) + return secToFormattedTime(epochSeconds, numDecimalPlaces, false, nil) } + +func Nsec2GMT(epochNanoseconds int64, numDecimalPlaces int) string { + return nsecToFormattedTime(epochNanoseconds, numDecimalPlaces, false, nil) +} + func Sec2LocalTime(epochSeconds float64, numDecimalPlaces int) string { - return sec2Time(epochSeconds, numDecimalPlaces, true, nil) + return secToFormattedTime(epochSeconds, numDecimalPlaces, true, nil) +} + +func Nsec2LocalTime(epochNanoseconds int64, numDecimalPlaces int) string { + return nsecToFormattedTime(epochNanoseconds, numDecimalPlaces, true, nil) } func Sec2LocationTime(epochSeconds float64, numDecimalPlaces int, location *time.Location) string { - return sec2Time(epochSeconds, numDecimalPlaces, true, location) + return secToFormattedTime(epochSeconds, numDecimalPlaces, true, location) } -// sec2Time is for DSL functions sec2gmt and sec2localtime. If doLocal is +func Nsec2LocationTime(epochNanoseconds int64, numDecimalPlaces int, location *time.Location) string { + return nsecToFormattedTime(epochNanoseconds, numDecimalPlaces, true, location) +} + +// secToFormattedTime is for DSL functions sec2gmt and sec2localtime. If doLocal is // false, use UTC. Else if location is nil, use $TZ environment variable. Else // use the specified location. -func sec2Time(epochSeconds float64, numDecimalPlaces int, doLocal bool, location *time.Location) string { - if numDecimalPlaces > 9 { - numDecimalPlaces = 9 - } - +func secToFormattedTime(epochSeconds float64, numDecimalPlaces int, doLocal bool, location *time.Location) string { intPart := int64(epochSeconds) fractionalPart := epochSeconds - float64(intPart) if fractionalPart < 0 { intPart -= 1 fractionalPart += 1.0 } - decimalPart := int64(fractionalPart * math.Pow(10.0, float64(numDecimalPlaces))) - t := time.Unix(intPart, 0) + t := time.Unix(intPart, int64(fractionalPart*1e9)) + return goTimeToFormattedTime(t, numDecimalPlaces, doLocal, location) +} + +// nsecToFormattedTime is for DSL functions nsec2gmt and nsec2localtime. If doLocal is +// false, use UTC. Else if location is nil, use $TZ environment variable. Else +// use the specified location. +func nsecToFormattedTime(epochNanoseconds int64, numDecimalPlaces int, doLocal bool, location *time.Location) string { + t := time.Unix(epochNanoseconds/1000000000, epochNanoseconds%1000000000) + return goTimeToFormattedTime(t, numDecimalPlaces, doLocal, location) +} + +// This is how much to divide nanoseconds by to get a desired number of decimal places +var nsToFracDivisors = []int{ + /* 0 */ 0, /* unused */ + /* 1 */ 100000000, + /* 2 */ 10000000, + /* 3 */ 1000000, + /* 4 */ 100000, + /* 5 */ 10000, + /* 6 */ 1000, + /* 7 */ 100, + /* 8 */ 10, + /* 9 */ 1, +} + +func goTimeToFormattedTime(t time.Time, numDecimalPlaces int, doLocal bool, location *time.Location) string { if doLocal { if location != nil { t = t.In(location) @@ -70,6 +103,12 @@ func sec2Time(epochSeconds float64, numDecimalPlaces int, doLocal bool, location mm := t.Minute() ss := t.Second() + if numDecimalPlaces < 0 { + numDecimalPlaces = 0 + } else if numDecimalPlaces > 9 { + numDecimalPlaces = 9 + } + if numDecimalPlaces == 0 { if doLocal { return fmt.Sprintf( @@ -81,14 +120,15 @@ func sec2Time(epochSeconds float64, numDecimalPlaces int, doLocal bool, location YYYY, MM, DD, hh, mm, ss) } } else { + fractionalPart := t.Nanosecond() / nsToFracDivisors[numDecimalPlaces] if doLocal { return fmt.Sprintf( "%04d-%02d-%02d %02d:%02d:%02d.%0*d", - YYYY, MM, DD, hh, mm, ss, numDecimalPlaces, decimalPart) + YYYY, MM, DD, hh, mm, ss, numDecimalPlaces, fractionalPart) } else { return fmt.Sprintf( "%04d-%02d-%02dT%02d:%02d:%02d.%0*dZ", - YYYY, MM, DD, hh, mm, ss, numDecimalPlaces, decimalPart) + YYYY, MM, DD, hh, mm, ss, numDecimalPlaces, fractionalPart) } } } @@ -97,14 +137,26 @@ func EpochSecondsToGMT(epochSeconds float64) time.Time { return epochSecondsToTime(epochSeconds, false, nil) } +func EpochNanosecondsToGMT(epochNanoseconds int64) time.Time { + return epochNanosecondsToTime(epochNanoseconds, false, nil) +} + func EpochSecondsToLocalTime(epochSeconds float64) time.Time { return epochSecondsToTime(epochSeconds, true, nil) } +func EpochNanosecondsToLocalTime(epochNanoseconds int64) time.Time { + return epochNanosecondsToTime(epochNanoseconds, true, nil) +} + func EpochSecondsToLocationTime(epochSeconds float64, location *time.Location) time.Time { return epochSecondsToTime(epochSeconds, true, location) } +func EpochNanosecondsToLocationTime(epochNanoseconds int64, location *time.Location) time.Time { + return epochNanosecondsToTime(epochNanoseconds, true, location) +} + func epochSecondsToTime(epochSeconds float64, doLocal bool, location *time.Location) time.Time { intPart := int64(epochSeconds) fractionalPart := epochSeconds - float64(intPart) @@ -119,3 +171,17 @@ func epochSecondsToTime(epochSeconds float64, doLocal bool, location *time.Locat return time.Unix(intPart, decimalPart).UTC() } } + +func epochNanosecondsToTime(epochNanoseconds int64, doLocal bool, location *time.Location) time.Time { + intPart := epochNanoseconds / 1000000000 + fractionalPart := epochNanoseconds % 1000000000 + if doLocal { + if location == nil { + return time.Unix(intPart, fractionalPart).Local() + } else { + return time.Unix(intPart, fractionalPart).In(location) + } + } else { + return time.Unix(intPart, fractionalPart).UTC() + } +} diff --git a/internal/pkg/lib/time_test.go b/internal/pkg/lib/time_test.go new file mode 100644 index 000000000..f2f2c6690 --- /dev/null +++ b/internal/pkg/lib/time_test.go @@ -0,0 +1,101 @@ +// ================================================================ +// Most Miller tests (thousands of them) are command-line-driven via +// mlr regtest. Here are some cases needing special focus. +// ================================================================ + +package lib + +import ( + "time" + + "github.com/stretchr/testify/assert" + "testing" +) + +// ---------------------------------------------------------------- +type tDataForSec2GMT struct { + epochSeconds float64 + numDecimalPlaces int + expectedOutput string +} + +var dataForSec2GMT = []tDataForSec2GMT{ + {0.0, 0, "1970-01-01T00:00:00Z"}, + {0.0, 6, "1970-01-01T00:00:00.000000Z"}, + {1.0, 6, "1970-01-01T00:00:01.000000Z"}, + {123456789.25, 3, "1973-11-29T21:33:09.250Z"}, +} + +func TestSec2GMT(t *testing.T) { + for _, entry := range dataForSec2GMT { + assert.Equal(t, entry.expectedOutput, Sec2GMT(entry.epochSeconds, entry.numDecimalPlaces)) + } +} + +// ---------------------------------------------------------------- +type tDataForNsec2GMT struct { + epochNanoseconds int64 + numDecimalPlaces int + expectedOutput string +} + +var dataForNsec2GMT = []tDataForNsec2GMT{ + {0, 0, "1970-01-01T00:00:00Z"}, + {0, 6, "1970-01-01T00:00:00.000000Z"}, + {946684800123456789, 0, "2000-01-01T00:00:00Z"}, + {946684800123456789, 1, "2000-01-01T00:00:00.1Z"}, + {946684800123456789, 2, "2000-01-01T00:00:00.12Z"}, + {946684800123456789, 3, "2000-01-01T00:00:00.123Z"}, + {946684800123456789, 4, "2000-01-01T00:00:00.1234Z"}, + {946684800123456789, 5, "2000-01-01T00:00:00.12345Z"}, + {946684800123456789, 6, "2000-01-01T00:00:00.123456Z"}, + {946684800123456789, 7, "2000-01-01T00:00:00.1234567Z"}, + {946684800123456789, 8, "2000-01-01T00:00:00.12345678Z"}, + {946684800123456789, 9, "2000-01-01T00:00:00.123456789Z"}, +} + +func TestNsec2GMT(t *testing.T) { + for _, entry := range dataForNsec2GMT { + actualOutput := Nsec2GMT(entry.epochNanoseconds, entry.numDecimalPlaces) + assert.Equal(t, entry.expectedOutput, actualOutput) + } +} + +// ---------------------------------------------------------------- +type tDataForEpochSecondsToGMT struct { + epochSeconds float64 + expectedOutput time.Time +} + +var dataForEpochSecondsToGMT = []tDataForEpochSecondsToGMT{ + {0.0, time.Unix(0, 0).UTC()}, + {1.25, time.Unix(1, 250000000).UTC()}, + {123456789.25, time.Unix(123456789, 250000000).UTC()}, +} + +func TestEpochSecondsToGMT(t *testing.T) { + for _, entry := range dataForEpochSecondsToGMT { + assert.Equal(t, entry.expectedOutput, EpochSecondsToGMT(entry.epochSeconds)) + } +} + +// ---------------------------------------------------------------- +type tDataForEpochNanosecondsToGMT struct { + epochNanoseconds int64 + expectedOutput time.Time +} + +var dataForEpochNanosecondsToGMT = []tDataForEpochNanosecondsToGMT{ + {0, time.Unix(0, 0).UTC()}, + {1000000000, time.Unix(1, 0).UTC()}, + {1200000000, time.Unix(1, 200000000).UTC()}, + {-1000000000, time.Unix(-1, 0).UTC()}, + {-1200000000, time.Unix(-1, -200000000).UTC()}, + {123456789250000047, time.Unix(123456789, 250000047).UTC()}, +} + +func TestEpochNanosecondsToGMT(t *testing.T) { + for _, entry := range dataForEpochNanosecondsToGMT { + assert.Equal(t, entry.expectedOutput, EpochNanosecondsToGMT(entry.epochNanoseconds)) + } +} diff --git a/man/manpage.txt b/man/manpage.txt index 8db971e79..abb828010 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -190,21 +190,23 @@ MILLER(1) MILLER(1) capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh depth dhms2fsec dhms2sec erf erfc every exec exp expm1 flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys get_values - gmt2localtime gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec hostname index - int invqnorm is_absent is_array is_bool is_boolean is_empty is_empty_map - is_error is_float is_int is_map is_nan is_nonempty_map is_not_array - is_not_empty is_not_map is_not_null is_null is_numeric is_present is_string - joink joinkv joinv json_parse json_stringify latin1_to_utf8 leafcount leftpad - length localtime2gmt localtime2sec log log10 log1p logifit lstrip madd mapdiff - mapexcept mapselect mapsum max md5 mexp min mmul msub os pow qnorm reduce - regextract regextract_or_else rightpad round roundm rstrip sec2dhms sec2gmt - sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 sha512 - sin sinh sort splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub - strftime strftime_local string strip strlen strptime strptime_local sub substr - substr0 substr1 system systime systimeint tan tanh tolower toupper truncate - typeof unflatten unformat unformatx uptime urand urand32 urandelement urandint - urandrange utf8_to_latin1 version ! != !=~ % & && * ** + - . .* .+ .- ./ / // - < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec + hostname index int invqnorm is_absent is_array is_bool is_boolean is_empty + is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map + is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present + is_string joink joinkv joinv json_parse json_stringify latin1_to_utf8 + leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 + log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 mexp min + mmul msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime os pow qnorm + reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms + sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 + sha512 sin sinh sort splita splitax splitkv splitkvx splitnv splitnvx sqrt + ssub strfntime strfntime_local strftime strftime_local string strip strlen + strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 + sysntime system systime systimeint tan tanh tolower toupper truncate typeof + unflatten unformat unformatx upntime uptime urand urand32 urandelement + urandint urandrange utf8_to_latin1 version ! != !=~ % & && * ** + - . .* .+ .- + ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -1189,13 +1191,13 @@ MILLER(1) MILLER(1) Note that "mlr filter" is more powerful, but requires you to know field names. By contrast, "mlr grep" allows you to regex-match the entire record. It does this by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using - OFS "," and OPS "=", and matching the resulting line against the regex specified - here. In particular, the regex is not applied to the input stream: if you have - CSV with header line "x,y,z" and data line "1,2,3" then the regex will be - matched, not against either of these lines, but against the DKVP line - "x=1,y=2,z=3". Furthermore, not all the options to system grep are supported, - and this command is intended to be merely a keystroke-saver. To get all the - features of system grep, you can do + command-line-specified ORS/OFS/OPS, and matching the resulting line against the + regex specified here. In particular, the regex is not applied to the input + stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the + regex will be matched, not against either of these lines, but against the DKVP + line "x=1,y=2,z=3". Furthermore, not all the options to system grep are + supported, and this command is intended to be merely a keystroke-saver. To get + all the features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." 1mgroup-by0m @@ -2332,6 +2334,11 @@ MILLER(1) MILLER(1) gmt2localtime("1999-12-31T22:00:00Z") = "2000-01-01 00:00:00" with TZ="Asia/Istanbul" gmt2localtime("1999-12-31T22:00:00Z", "Asia/Istanbul") = "2000-01-01 00:00:00" + 1mgmt2nsec0m + (class=time #args=1) Parses GMT timestamp as integer nanoseconds since the epoch. + Example: + gmt2nsec("2001-02-03T04:05:06Z") = 981173106000000000 + 1mgmt2sec0m (class=time #args=1) Parses GMT timestamp as integer seconds since the epoch. Example: @@ -2497,6 +2504,12 @@ MILLER(1) MILLER(1) localtime2gmt("2000-01-01 00:00:00") = "1999-12-31T22:00:00Z" with TZ="Asia/Istanbul" localtime2gmt("2000-01-01 00:00:00", "Asia/Istanbul") = "1999-12-31T22:00:00Z" + 1mlocaltime2nsec0m + (class=time #args=1,2) Parses local timestamp as integer nanoseconds since the epoch. Consults $TZ environment variable, unless second argument is supplied. + Examples: + localtime2nsec("2001-02-03 04:05:06") = 981165906000000000 with TZ="Asia/Istanbul" + localtime2nsec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906000000000" + 1mlocaltime2sec0m (class=time #args=1,2) Parses local timestamp as integer seconds since the epoch. Consults $TZ environment variable, unless second argument is supplied. Examples: @@ -2551,6 +2564,32 @@ MILLER(1) MILLER(1) 1mmsub0m (class=arithmetic #args=3) a - b mod m (integers) + 1mnsec2gmt0m + (class=time #args=1,2) Formats integer nanoseconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. + Examples: + nsec2gmt(1234567890000000000) = "2009-02-13T23:31:30Z" + nsec2gmt(1234567890123456789) = "2009-02-13T23:31:30Z" + nsec2gmt(1234567890123456789, 6) = "2009-02-13T23:31:30.123456Z" + + 1mnsec2gmtdate0m + (class=time #args=1) Formats integer nanoseconds since epoch as GMT timestamp with year-month-date. Leaves non-numbers as-is. + Example: + sec2gmtdate(1440768801700000000) = "2015-08-28". + + 1mnsec2localdate0m + (class=time #args=1,2) Formats integer nanoseconds since epoch as local timestamp with year-month-date. Leaves non-numbers as-is. Consults $TZ environment variable unless second argument is supplied. + Examples: + nsec2localdate(1440768801700000000) = "2015-08-28" with TZ="Asia/Istanbul" + nsec2localdate(1440768801700000000, "Asia/Istanbul") = "2015-08-28" + + 1mnsec2localtime0m + (class=time #args=1,2,3) Formats integer nanoseconds since epoch as local timestamp. Consults $TZ environment variable unless third argument is supplied. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part + Examples: + nsec2localtime(1234567890000000000) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul" + nsec2localtime(1234567890123456789) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul" + nsec2localtime(1234567890123456789, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul" + nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456" + 1mos0m (class=system #args=0) Returns the operating-system name as a string. @@ -2704,6 +2743,21 @@ MILLER(1) MILLER(1) Example: ssub("abc.def", ".", "X") gives "abcXdef" + 1mstrfntime0m + (class=time #args=2) Formats integer nanoseconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local. + Examples: + strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%SZ") = "2015-08-28T13:33:21Z" + strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%3SZ") = "2015-08-28T13:33:21.123Z" + strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%6SZ") = "2015-08-28T13:33:21.123456Z" + + 1mstrfntime_local0m + (class=time #args=2,3) Like strfntime but consults the $TZ environment variable to get local time zone. + Examples: + strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%S %z") = "2015-08-28 16:33:21 +0300" with TZ="Asia/Istanbul" + strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%3S %z") = "2015-08-28 16:33:21.123 +0300" with TZ="Asia/Istanbul" + strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%3S %z", "Asia/Istanbul") = "2015-08-28 16:33:21.123 +0300" + strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%9S %z", "Asia/Istanbul") = "2015-08-28 16:33:21.123456789 +0300" + 1mstrftime0m (class=time #args=2) Formats seconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local. Examples: @@ -2726,16 +2780,32 @@ MILLER(1) MILLER(1) 1mstrlen0m (class=string #args=1) String length. + 1mstrpntime0m + (class=time #args=2) strpntime: Parses timestamp as integer nanoseconds since the epoch. See also strpntime_local. + Examples: + strpntime("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801000000000 + strpntime("2015-08-28T13:33:21.345Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801345000000 + strpntime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z") = 14400000000000 + strpntime("1970-01-01 00:00:00 +0200", "%Y-%m-%d %H:%M:%S %z") = -7200000000000 + + 1mstrpntime_local0m + (class=time #args=2,3) Like strpntime but consults the $TZ environment variable to get local time zone. + Examples: + strpntime_local("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440758001000000000 with TZ="Asia/Istanbul" + strpntime_local("2015-08-28T13:33:21.345Z","%Y-%m-%dT%H:%M:%SZ") = 1440758001345000000 with TZ="Asia/Istanbul" + strpntime_local("2015-08-28 13:33:21", "%Y-%m-%d %H:%M:%S") = 1440758001000000000 with TZ="Asia/Istanbul" + strpntime_local("2015-08-28 13:33:21", "%Y-%m-%d %H:%M:%S", "Asia/Istanbul") = 1440758001000000000 + 1mstrptime0m (class=time #args=2) strptime: Parses timestamp as floating-point seconds since the epoch. See also strptime_local. Examples: strptime("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801.000000 strptime("2015-08-28T13:33:21.345Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801.345000 strptime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z") = 14400 - strptime("1970-01-01 00:00:00 EET", "%Y-%m-%d %H:%M:%S %Z") = -7200 + strptime("1970-01-01 00:00:00 +0200", "%Y-%m-%d %H:%M:%S %z") = -7200 1mstrptime_local0m - (class=time #args=2,3) Like strftime but consults the $TZ environment variable to get local time zone. + (class=time #args=2,3) Like strptime but consults the $TZ environment variable to get local time zone. Examples: strptime_local("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440758001 with TZ="Asia/Istanbul" strptime_local("2015-08-28T13:33:21.345Z","%Y-%m-%dT%H:%M:%SZ") = 1440758001.345 with TZ="Asia/Istanbul" @@ -2760,6 +2830,9 @@ MILLER(1) MILLER(1) 1msubstr10m (class=string #args=3) substr1(s,m,n) gives substring of s from 1-up position m to n inclusive. Negative indices -len .. -1 alias to 1 .. len. See also substr and substr0. + 1msysntime0m + (class=time #args=0) Returns the system time in 64-bit nanoseconds since the epoch. + 1msystem0m (class=system #args=1) Run command string, yielding its stdout minus final carriage return. @@ -2806,6 +2879,9 @@ MILLER(1) MILLER(1) unformatx("{}h{}m{}s", "3h47m22s") gives ["3", "47", "22"]. is_error(unformatx("{}h{}m{}s", "3:47:22")) gives true. + 1mupntime0m + (class=time #args=0) Returns the time in 64-bit nanoseconds since the current Miller program was started. + 1muptime0m (class=time #args=0) Returns the time in floating-point seconds since the current Miller program was started. @@ -3333,4 +3409,4 @@ MILLER(1) MILLER(1) - 2023-06-06 MILLER(1) + 2023-06-24 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index 0fa01052d..63f502b2a 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2023-06-06 +.\" Date: 2023-06-24 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2023-06-06" "\ \&" "\ \&" +.TH "MILLER" "1" "2023-06-24" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -237,21 +237,23 @@ asserting_present asserting_string atan atan2 atanh bitcount boolean capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh depth dhms2fsec dhms2sec erf erfc every exec exp expm1 flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys get_values -gmt2localtime gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec hostname index -int invqnorm is_absent is_array is_bool is_boolean is_empty is_empty_map -is_error is_float is_int is_map is_nan is_nonempty_map is_not_array -is_not_empty is_not_map is_not_null is_null is_numeric is_present is_string -joink joinkv joinv json_parse json_stringify latin1_to_utf8 leafcount leftpad -length localtime2gmt localtime2sec log log10 log1p logifit lstrip madd mapdiff -mapexcept mapselect mapsum max md5 mexp min mmul msub os pow qnorm reduce -regextract regextract_or_else rightpad round roundm rstrip sec2dhms sec2gmt -sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 sha512 -sin sinh sort splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub -strftime strftime_local string strip strlen strptime strptime_local sub substr -substr0 substr1 system systime systimeint tan tanh tolower toupper truncate -typeof unflatten unformat unformatx uptime urand urand32 urandelement urandint -urandrange utf8_to_latin1 version ! != !=~ % & && * ** + - . .* .+ .- ./ / // -< << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ +gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec +hostname index int invqnorm is_absent is_array is_bool is_boolean is_empty +is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map +is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present +is_string joink joinkv joinv json_parse json_stringify latin1_to_utf8 +leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 +log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 mexp min +mmul msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime os pow qnorm +reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms +sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 +sha512 sin sinh sort splita splitax splitkv splitkvx splitnv splitnvx sqrt +ssub strfntime strfntime_local strftime strftime_local string strip strlen +strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 +sysntime system systime systimeint tan tanh tolower toupper truncate typeof +unflatten unformat unformatx upntime uptime urand urand32 urandelement +urandint urandrange utf8_to_latin1 version ! != !=~ % & && * ** + - . .* .+ .- +\&./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ .fi .if n \{\ .RE @@ -1480,13 +1482,13 @@ Options: Note that "mlr filter" is more powerful, but requires you to know field names. By contrast, "mlr grep" allows you to regex-match the entire record. It does this by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using -OFS "," and OPS "=", and matching the resulting line against the regex specified -here. In particular, the regex is not applied to the input stream: if you have -CSV with header line "x,y,z" and data line "1,2,3" then the regex will be -matched, not against either of these lines, but against the DKVP line -"x=1,y=2,z=3". Furthermore, not all the options to system grep are supported, -and this command is intended to be merely a keystroke-saver. To get all the -features of system grep, you can do +command-line-specified ORS/OFS/OPS, and matching the resulting line against the +regex specified here. In particular, the regex is not applied to the input +stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the +regex will be matched, not against either of these lines, but against the DKVP +line "x=1,y=2,z=3". Furthermore, not all the options to system grep are +supported, and this command is intended to be merely a keystroke-saver. To get +all the features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." .fi .if n \{\ @@ -3269,6 +3271,17 @@ gmt2localtime("1999-12-31T22:00:00Z", "Asia/Istanbul") = "2000-01-01 00:00:00" .fi .if n \{\ .RE +.SS "gmt2nsec" +.if n \{\ +.RS 0 +.\} +.nf + (class=time #args=1) Parses GMT timestamp as integer nanoseconds since the epoch. +Example: +gmt2nsec("2001-02-03T04:05:06Z") = 981173106000000000 +.fi +.if n \{\ +.RE .SS "gmt2sec" .if n \{\ .RS 0 @@ -3680,6 +3693,18 @@ localtime2gmt("2000-01-01 00:00:00", "Asia/Istanbul") = "1999-12-31T22:00:00Z" .fi .if n \{\ .RE +.SS "localtime2nsec" +.if n \{\ +.RS 0 +.\} +.nf + (class=time #args=1,2) Parses local timestamp as integer nanoseconds since the epoch. Consults $TZ environment variable, unless second argument is supplied. +Examples: +localtime2nsec("2001-02-03 04:05:06") = 981165906000000000 with TZ="Asia/Istanbul" +localtime2nsec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906000000000" +.fi +.if n \{\ +.RE .SS "localtime2sec" .if n \{\ .RS 0 @@ -3836,6 +3861,56 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906" .fi .if n \{\ .RE +.SS "nsec2gmt" +.if n \{\ +.RS 0 +.\} +.nf + (class=time #args=1,2) Formats integer nanoseconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. +Examples: +nsec2gmt(1234567890000000000) = "2009-02-13T23:31:30Z" +nsec2gmt(1234567890123456789) = "2009-02-13T23:31:30Z" +nsec2gmt(1234567890123456789, 6) = "2009-02-13T23:31:30.123456Z" +.fi +.if n \{\ +.RE +.SS "nsec2gmtdate" +.if n \{\ +.RS 0 +.\} +.nf + (class=time #args=1) Formats integer nanoseconds since epoch as GMT timestamp with year-month-date. Leaves non-numbers as-is. +Example: +sec2gmtdate(1440768801700000000) = "2015-08-28". +.fi +.if n \{\ +.RE +.SS "nsec2localdate" +.if n \{\ +.RS 0 +.\} +.nf + (class=time #args=1,2) Formats integer nanoseconds since epoch as local timestamp with year-month-date. Leaves non-numbers as-is. Consults $TZ environment variable unless second argument is supplied. +Examples: +nsec2localdate(1440768801700000000) = "2015-08-28" with TZ="Asia/Istanbul" +nsec2localdate(1440768801700000000, "Asia/Istanbul") = "2015-08-28" +.fi +.if n \{\ +.RE +.SS "nsec2localtime" +.if n \{\ +.RS 0 +.\} +.nf + (class=time #args=1,2,3) Formats integer nanoseconds since epoch as local timestamp. Consults $TZ environment variable unless third argument is supplied. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part +Examples: +nsec2localtime(1234567890000000000) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul" +nsec2localtime(1234567890123456789) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul" +nsec2localtime(1234567890123456789, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul" +nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456" +.fi +.if n \{\ +.RE .SS "os" .if n \{\ .RS 0 @@ -4181,6 +4256,33 @@ ssub("abc.def", ".", "X") gives "abcXdef" .fi .if n \{\ .RE +.SS "strfntime" +.if n \{\ +.RS 0 +.\} +.nf + (class=time #args=2) Formats integer nanoseconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local. +Examples: +strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%SZ") = "2015-08-28T13:33:21Z" +strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%3SZ") = "2015-08-28T13:33:21.123Z" +strfntime(1440768801123456789,"%Y-%m-%dT%H:%M:%6SZ") = "2015-08-28T13:33:21.123456Z" +.fi +.if n \{\ +.RE +.SS "strfntime_local" +.if n \{\ +.RS 0 +.\} +.nf + (class=time #args=2,3) Like strfntime but consults the $TZ environment variable to get local time zone. +Examples: +strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%S %z") = "2015-08-28 16:33:21 +0300" with TZ="Asia/Istanbul" +strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%3S %z") = "2015-08-28 16:33:21.123 +0300" with TZ="Asia/Istanbul" +strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%3S %z", "Asia/Istanbul") = "2015-08-28 16:33:21.123 +0300" +strfntime_local(1440768801123456789, "%Y-%m-%d %H:%M:%9S %z", "Asia/Istanbul") = "2015-08-28 16:33:21.123456789 +0300" +.fi +.if n \{\ +.RE .SS "strftime" .if n \{\ .RS 0 @@ -4233,6 +4335,34 @@ strftime_local(1440768801.7, "%Y-%m-%d %H:%M:%3S %z", "Asia/Istanbul") = "2015-0 .fi .if n \{\ .RE +.SS "strpntime" +.if n \{\ +.RS 0 +.\} +.nf + (class=time #args=2) strpntime: Parses timestamp as integer nanoseconds since the epoch. See also strpntime_local. +Examples: +strpntime("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801000000000 +strpntime("2015-08-28T13:33:21.345Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801345000000 +strpntime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z") = 14400000000000 +strpntime("1970-01-01 00:00:00 +0200", "%Y-%m-%d %H:%M:%S %z") = -7200000000000 +.fi +.if n \{\ +.RE +.SS "strpntime_local" +.if n \{\ +.RS 0 +.\} +.nf + (class=time #args=2,3) Like strpntime but consults the $TZ environment variable to get local time zone. +Examples: +strpntime_local("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440758001000000000 with TZ="Asia/Istanbul" +strpntime_local("2015-08-28T13:33:21.345Z","%Y-%m-%dT%H:%M:%SZ") = 1440758001345000000 with TZ="Asia/Istanbul" +strpntime_local("2015-08-28 13:33:21", "%Y-%m-%d %H:%M:%S") = 1440758001000000000 with TZ="Asia/Istanbul" +strpntime_local("2015-08-28 13:33:21", "%Y-%m-%d %H:%M:%S", "Asia/Istanbul") = 1440758001000000000 +.fi +.if n \{\ +.RE .SS "strptime" .if n \{\ .RS 0 @@ -4243,7 +4373,7 @@ Examples: strptime("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801.000000 strptime("2015-08-28T13:33:21.345Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801.345000 strptime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z") = 14400 -strptime("1970-01-01 00:00:00 EET", "%Y-%m-%d %H:%M:%S %Z") = -7200 +strptime("1970-01-01 00:00:00 +0200", "%Y-%m-%d %H:%M:%S %z") = -7200 .fi .if n \{\ .RE @@ -4252,7 +4382,7 @@ strptime("1970-01-01 00:00:00 EET", "%Y-%m-%d %H:%M:%S %Z") = -7200 .RS 0 .\} .nf - (class=time #args=2,3) Like strftime but consults the $TZ environment variable to get local time zone. + (class=time #args=2,3) Like strptime but consults the $TZ environment variable to get local time zone. Examples: strptime_local("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440758001 with TZ="Asia/Istanbul" strptime_local("2015-08-28T13:33:21.345Z","%Y-%m-%dT%H:%M:%SZ") = 1440758001.345 with TZ="Asia/Istanbul" @@ -4303,6 +4433,15 @@ sub("prefix4529:suffix8567", "suffix([0-9]+)", "name\e1") gives "prefix4529:name .fi .if n \{\ .RE +.SS "sysntime" +.if n \{\ +.RS 0 +.\} +.nf + (class=time #args=0) Returns the system time in 64-bit nanoseconds since the epoch. +.fi +.if n \{\ +.RE .SS "system" .if n \{\ .RS 0 @@ -4421,6 +4560,15 @@ is_error(unformatx("{}h{}m{}s", "3:47:22")) gives true. .fi .if n \{\ .RE +.SS "upntime" +.if n \{\ +.RS 0 +.\} +.nf + (class=time #args=0) Returns the time in 64-bit nanoseconds since the current Miller program was started. +.fi +.if n \{\ +.RE .SS "uptime" .if n \{\ .RS 0 diff --git a/test/cases/dsl-gmt-date-time-functions/0005n/cmd b/test/cases/dsl-gmt-date-time-functions/0005n/cmd new file mode 100644 index 000000000..6d58b7d24 --- /dev/null +++ b/test/cases/dsl-gmt-date-time-functions/0005n/cmd @@ -0,0 +1 @@ +mlr --csvlite put -f ${CASEDIR}/mlr test/input/gmt2nsec diff --git a/test/cases/dsl-gmt-date-time-functions/0005n/experr b/test/cases/dsl-gmt-date-time-functions/0005n/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-gmt-date-time-functions/0005n/expout b/test/cases/dsl-gmt-date-time-functions/0005n/expout new file mode 100644 index 000000000..c523dc29e --- /dev/null +++ b/test/cases/dsl-gmt-date-time-functions/0005n/expout @@ -0,0 +1,29 @@ +gmt,sec +1970-01-01T00:00:00Z,0 +1970-01-01T00:00:00.Z,(error) +1970-01-01T00:00:01Z,1000000000 +1970-01-01T00:00:01.0Z,1000000000 +1970-01-01T00:00:10Z,10000000000 +1970-01-01T00:00:10.00Z,10000000000 +1970-01-01T00:01:40Z,100000000000 +1970-01-01T00:01:40.1Z,100100000000 +1970-01-01T00:16:40Z,1000000000000 +1970-01-01T00:16:40.12Z,1000120000000 +1970-01-01T02:46:40Z,10000000000000 +1970-01-01T02:46:40.123Z,10000123000000 +1970-01-02T03:46:40Z,100000000000000 +1970-01-02T03:46:40.1234Z,100000123400000 +1970-01-12T13:46:40Z,1000000000000000 +1970-01-12T13:46:40.12345Z,1000000123450000 +1970-04-26T17:46:40Z,10000000000000000 +1970-04-26T17:46:40.123456Z,10000000123456000 +1973-03-03T09:46:40Z,100000000000000000 +1973-03-03T09:46:40.1234567Z,100000000123456700 +2001-09-09T01:46:40Z,1000000000000000000 +2001-09-09T01:46:40.12345678Z,1000000000123456780 +2015-05-19T11:49:40Z,1432036180000000000 +2015-05-19T11:49:40.123456789Z,1432036180123456789 +2017-07-14T02:40:00Z,1500000000000000000 +2017-07-14T02:40:00.999Z,1500000000999000000 +2033-05-18T03:33:20Z,2000000000000000000 +2033-05-18T03:33:20.999999Z,2000000000999999000 diff --git a/test/cases/dsl-gmt-date-time-functions/0005n/mlr b/test/cases/dsl-gmt-date-time-functions/0005n/mlr new file mode 100644 index 000000000..797ae3bd1 --- /dev/null +++ b/test/cases/dsl-gmt-date-time-functions/0005n/mlr @@ -0,0 +1 @@ +$sec = gmt2nsec($gmt) diff --git a/test/cases/dsl-gmt-date-time-functions/0011n/cmd b/test/cases/dsl-gmt-date-time-functions/0011n/cmd new file mode 100644 index 000000000..f8d98687e --- /dev/null +++ b/test/cases/dsl-gmt-date-time-functions/0011n/cmd @@ -0,0 +1 @@ +mlr --icsv --opprint put -f ${CASEDIR}/mlr test/input/gmt2nsec diff --git a/test/cases/dsl-gmt-date-time-functions/0011n/experr b/test/cases/dsl-gmt-date-time-functions/0011n/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-gmt-date-time-functions/0011n/expout b/test/cases/dsl-gmt-date-time-functions/0011n/expout new file mode 100644 index 000000000..6318a0f2f --- /dev/null +++ b/test/cases/dsl-gmt-date-time-functions/0011n/expout @@ -0,0 +1,29 @@ +gmt sec +1970-01-01T00:00:00Z 0 +1970-01-01T00:00:00.Z (error) +1970-01-01T00:00:01Z 1000000000 +1970-01-01T00:00:01.0Z 1000000000 +1970-01-01T00:00:10Z 10000000000 +1970-01-01T00:00:10.00Z 10000000000 +1970-01-01T00:01:40Z 100000000000 +1970-01-01T00:01:40.1Z 100100000000 +1970-01-01T00:16:40Z 1000000000000 +1970-01-01T00:16:40.12Z 1000120000000 +1970-01-01T02:46:40Z 10000000000000 +1970-01-01T02:46:40.123Z 10000123000000 +1970-01-02T03:46:40Z 100000000000000 +1970-01-02T03:46:40.1234Z 100000123400000 +1970-01-12T13:46:40Z 1000000000000000 +1970-01-12T13:46:40.12345Z 1000000123450000 +1970-04-26T17:46:40Z 10000000000000000 +1970-04-26T17:46:40.123456Z 10000000123456000 +1973-03-03T09:46:40Z 100000000000000000 +1973-03-03T09:46:40.1234567Z 100000000123456700 +2001-09-09T01:46:40Z 1000000000000000000 +2001-09-09T01:46:40.12345678Z 1000000000123456780 +2015-05-19T11:49:40Z 1432036180000000000 +2015-05-19T11:49:40.123456789Z 1432036180123456789 +2017-07-14T02:40:00Z 1500000000000000000 +2017-07-14T02:40:00.999Z 1500000000999000000 +2033-05-18T03:33:20Z 2000000000000000000 +2033-05-18T03:33:20.999999Z 2000000000999999000 diff --git a/test/cases/dsl-gmt-date-time-functions/0011n/mlr b/test/cases/dsl-gmt-date-time-functions/0011n/mlr new file mode 100644 index 000000000..c35ad3dd3 --- /dev/null +++ b/test/cases/dsl-gmt-date-time-functions/0011n/mlr @@ -0,0 +1 @@ +$sec = strpntime($gmt, "%Y-%m-%dT%H:%M:%SZ") diff --git a/test/cases/dsl-gmt-date-time-functions/0019n/0019/cmd b/test/cases/dsl-gmt-date-time-functions/0019n/0019/cmd new file mode 100644 index 000000000..a5997d665 --- /dev/null +++ b/test/cases/dsl-gmt-date-time-functions/0019n/0019/cmd @@ -0,0 +1 @@ +mlr --icsv --opprint put -f ${CASEDIR}/mlr test/input/gmt2sec diff --git a/test/cases/dsl-gmt-date-time-functions/0019n/0019/experr b/test/cases/dsl-gmt-date-time-functions/0019n/0019/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-gmt-date-time-functions/0019n/0019/expout b/test/cases/dsl-gmt-date-time-functions/0019n/0019/expout new file mode 100644 index 000000000..2f4f196c0 --- /dev/null +++ b/test/cases/dsl-gmt-date-time-functions/0019n/0019/expout @@ -0,0 +1,29 @@ +gmt sec +1970-01-01T00:00:00Z 0.00000000 +1970-01-01T00:00:00.Z (error) +1970-01-01T00:00:01Z 1.00000000 +1970-01-01T00:00:01.0Z 1.00000000 +1970-01-01T00:00:10Z 10.00000000 +1970-01-01T00:00:10.00Z 10.00000000 +1970-01-01T00:01:40Z 100.00000000 +1970-01-01T00:01:40.1Z 100.10000000 +1970-01-01T00:16:40Z 1000.00000000 +1970-01-01T00:16:40.12Z 1000.12000000 +1970-01-01T02:46:40Z 10000.00000000 +1970-01-01T02:46:40.123Z 10000.12300000 +1970-01-02T03:46:40Z 100000.00000000 +1970-01-02T03:46:40.1234Z 100000.12340000 +1970-01-12T13:46:40Z 1000000.00000000 +1970-01-12T13:46:40.12345Z 1000000.12345000 +1970-04-26T17:46:40Z 10000000.00000000 +1970-04-26T17:46:40.123456Z 10000000.12345600 +1973-03-03T09:46:40Z 100000000.00000000 +1973-03-03T09:46:40.1234567Z 100000000.12345670 +2001-09-09T01:46:40Z 1000000000.00000000 +2001-09-09T01:46:40.12345678Z 1000000000.12345672 +2015-05-19T11:49:40Z 1432036180.00000000 +2015-05-19T11:49:40.123456789Z 1432036180.12345672 +2017-07-14T02:40:00Z 1500000000.00000000 +2017-07-14T02:40:00.999Z 1500000000.99900007 +2033-05-18T03:33:20Z 2000000000.00000000 +2033-05-18T03:33:20.999999Z 2000000000.99999905 diff --git a/test/cases/dsl-gmt-date-time-functions/0019n/0019/mlr b/test/cases/dsl-gmt-date-time-functions/0019n/0019/mlr new file mode 100644 index 000000000..6391a7e85 --- /dev/null +++ b/test/cases/dsl-gmt-date-time-functions/0019n/0019/mlr @@ -0,0 +1 @@ +$sec = strptime($gmt, "%FT%TZ") diff --git a/test/cases/dsl-gmt-date-time-functions/0019n/cmd b/test/cases/dsl-gmt-date-time-functions/0019n/cmd new file mode 100644 index 000000000..f8d98687e --- /dev/null +++ b/test/cases/dsl-gmt-date-time-functions/0019n/cmd @@ -0,0 +1 @@ +mlr --icsv --opprint put -f ${CASEDIR}/mlr test/input/gmt2nsec diff --git a/test/cases/dsl-gmt-date-time-functions/0019n/experr b/test/cases/dsl-gmt-date-time-functions/0019n/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-gmt-date-time-functions/0019n/expout b/test/cases/dsl-gmt-date-time-functions/0019n/expout new file mode 100644 index 000000000..6318a0f2f --- /dev/null +++ b/test/cases/dsl-gmt-date-time-functions/0019n/expout @@ -0,0 +1,29 @@ +gmt sec +1970-01-01T00:00:00Z 0 +1970-01-01T00:00:00.Z (error) +1970-01-01T00:00:01Z 1000000000 +1970-01-01T00:00:01.0Z 1000000000 +1970-01-01T00:00:10Z 10000000000 +1970-01-01T00:00:10.00Z 10000000000 +1970-01-01T00:01:40Z 100000000000 +1970-01-01T00:01:40.1Z 100100000000 +1970-01-01T00:16:40Z 1000000000000 +1970-01-01T00:16:40.12Z 1000120000000 +1970-01-01T02:46:40Z 10000000000000 +1970-01-01T02:46:40.123Z 10000123000000 +1970-01-02T03:46:40Z 100000000000000 +1970-01-02T03:46:40.1234Z 100000123400000 +1970-01-12T13:46:40Z 1000000000000000 +1970-01-12T13:46:40.12345Z 1000000123450000 +1970-04-26T17:46:40Z 10000000000000000 +1970-04-26T17:46:40.123456Z 10000000123456000 +1973-03-03T09:46:40Z 100000000000000000 +1973-03-03T09:46:40.1234567Z 100000000123456700 +2001-09-09T01:46:40Z 1000000000000000000 +2001-09-09T01:46:40.12345678Z 1000000000123456780 +2015-05-19T11:49:40Z 1432036180000000000 +2015-05-19T11:49:40.123456789Z 1432036180123456789 +2017-07-14T02:40:00Z 1500000000000000000 +2017-07-14T02:40:00.999Z 1500000000999000000 +2033-05-18T03:33:20Z 2000000000000000000 +2033-05-18T03:33:20.999999Z 2000000000999999000 diff --git a/test/cases/dsl-gmt-date-time-functions/0019n/mlr b/test/cases/dsl-gmt-date-time-functions/0019n/mlr new file mode 100644 index 000000000..26e34300a --- /dev/null +++ b/test/cases/dsl-gmt-date-time-functions/0019n/mlr @@ -0,0 +1 @@ +$sec = strpntime($gmt, "%FT%TZ") diff --git a/test/cases/dsl-gmt-date-time-functions/0020n/cmd b/test/cases/dsl-gmt-date-time-functions/0020n/cmd new file mode 100644 index 000000000..6add080d4 --- /dev/null +++ b/test/cases/dsl-gmt-date-time-functions/0020n/cmd @@ -0,0 +1 @@ +mlr -n put -f ${CASEDIR}/mlr diff --git a/test/cases/dsl-gmt-date-time-functions/0020n/experr b/test/cases/dsl-gmt-date-time-functions/0020n/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-gmt-date-time-functions/0020n/expout b/test/cases/dsl-gmt-date-time-functions/0020n/expout new file mode 100644 index 000000000..ce9d672ee --- /dev/null +++ b/test/cases/dsl-gmt-date-time-functions/0020n/expout @@ -0,0 +1,10 @@ +0 +0 +14400000000000 +0 +0 +31276800000000000 +80430000000000 +138000000000000 +(error) +31536000123456000.00000000 diff --git a/test/cases/dsl-gmt-date-time-functions/0020n/mlr b/test/cases/dsl-gmt-date-time-functions/0020n/mlr new file mode 100644 index 000000000..a59a0f29f --- /dev/null +++ b/test/cases/dsl-gmt-date-time-functions/0020n/mlr @@ -0,0 +1,12 @@ +end { + print strpntime("1970-01-01T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ"); + print strpntime("1970-01-01T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ"); + print strpntime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z"); + print strpntime("1970-01-01%00:00:00Z", "%Y-%m-%d%%%H:%M:%SZ"); + print strpntime("1970-01-01T00:00:00Z", "%FT%TZ"); + print strpntime("1970:363", "%Y:%j"); + print strpntime("1970-01-01 10:20:30 PM", "%F %r"); + print strpntime("01/02/70 14:20", "%D %R"); + print strpntime("01/02/70 14:20", "%D %X"); # no such format code + print fmtnum(strpntime("1971-01-01T00:00:00.123456Z", "%Y-%m-%dT%H:%M:%S.%fZ"), "%.6f"); +} diff --git a/test/cases/dsl-local-date-time-functions/0003n/cmd b/test/cases/dsl-local-date-time-functions/0003n/cmd new file mode 100644 index 000000000..11d677b75 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/0003n/cmd @@ -0,0 +1 @@ +mlr --opprint --from ${CASEDIR}/input put -f ${CASEDIR}/mlr diff --git a/test/cases/dsl-local-date-time-functions/0003n/env b/test/cases/dsl-local-date-time-functions/0003n/env new file mode 100644 index 000000000..416a800e9 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/0003n/env @@ -0,0 +1 @@ +TZ=America/Sao_Paulo diff --git a/test/cases/dsl-local-date-time-functions/0003n/experr b/test/cases/dsl-local-date-time-functions/0003n/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/0003n/expout b/test/cases/dsl-local-date-time-functions/0003n/expout new file mode 100644 index 000000000..9c75c2640 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/0003n/expout @@ -0,0 +1,11 @@ +a b c tz +2017-02-18 23:00:00 1487466000000000000 2017-02-18 23:00:00 America/Sao_Paulo +2017-02-18 23:59:59 1487469599000000000 2017-02-18 23:59:59 America/Sao_Paulo +2017-02-19 00:00:00 1487473200000000000 2017-02-19 00:00:00 America/Sao_Paulo +2017-02-19 00:30:00 1487475000000000000 2017-02-19 00:30:00 America/Sao_Paulo +2017-02-19 01:00:00 1487476800000000000 2017-02-19 01:00:00 America/Sao_Paulo +2017-10-14 23:00:00 1508032800000000000 2017-10-14 23:00:00 America/Sao_Paulo +2017-10-14 23:59:59 1508036399000000000 2017-10-14 23:59:59 America/Sao_Paulo +2017-10-15 00:00:00 1508032800000000000 2017-10-14 23:00:00 America/Sao_Paulo +2017-10-15 00:30:00 1508034600000000000 2017-10-14 23:30:00 America/Sao_Paulo +2017-10-15 01:00:00 1508036400000000000 2017-10-15 01:00:00 America/Sao_Paulo diff --git a/test/cases/dsl-local-date-time-functions/0003n/input b/test/cases/dsl-local-date-time-functions/0003n/input new file mode 100644 index 000000000..da8ccab3e --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/0003n/input @@ -0,0 +1,10 @@ +a=2017-02-18 23:00:00 +a=2017-02-18 23:59:59 +a=2017-02-19 00:00:00 +a=2017-02-19 00:30:00 +a=2017-02-19 01:00:00 +a=2017-10-14 23:00:00 +a=2017-10-14 23:59:59 +a=2017-10-15 00:00:00 +a=2017-10-15 00:30:00 +a=2017-10-15 01:00:00 diff --git a/test/cases/dsl-local-date-time-functions/0003n/mlr b/test/cases/dsl-local-date-time-functions/0003n/mlr new file mode 100644 index 000000000..182e8b915 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/0003n/mlr @@ -0,0 +1,3 @@ +$b = strpntime_local($a, "%Y-%m-%d %H:%M:%S"); +$c = strfntime_local($b, "%Y-%m-%d %H:%M:%S"); +$tz = ENV["TZ"]; diff --git a/test/cases/dsl-local-date-time-functions/0004n/cmd b/test/cases/dsl-local-date-time-functions/0004n/cmd new file mode 100644 index 000000000..11d677b75 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/0004n/cmd @@ -0,0 +1 @@ +mlr --opprint --from ${CASEDIR}/input put -f ${CASEDIR}/mlr diff --git a/test/cases/dsl-local-date-time-functions/0004n/env b/test/cases/dsl-local-date-time-functions/0004n/env new file mode 100644 index 000000000..416a800e9 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/0004n/env @@ -0,0 +1 @@ +TZ=America/Sao_Paulo diff --git a/test/cases/dsl-local-date-time-functions/0004n/experr b/test/cases/dsl-local-date-time-functions/0004n/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/0004n/expout b/test/cases/dsl-local-date-time-functions/0004n/expout new file mode 100644 index 000000000..9c75c2640 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/0004n/expout @@ -0,0 +1,11 @@ +a b c tz +2017-02-18 23:00:00 1487466000000000000 2017-02-18 23:00:00 America/Sao_Paulo +2017-02-18 23:59:59 1487469599000000000 2017-02-18 23:59:59 America/Sao_Paulo +2017-02-19 00:00:00 1487473200000000000 2017-02-19 00:00:00 America/Sao_Paulo +2017-02-19 00:30:00 1487475000000000000 2017-02-19 00:30:00 America/Sao_Paulo +2017-02-19 01:00:00 1487476800000000000 2017-02-19 01:00:00 America/Sao_Paulo +2017-10-14 23:00:00 1508032800000000000 2017-10-14 23:00:00 America/Sao_Paulo +2017-10-14 23:59:59 1508036399000000000 2017-10-14 23:59:59 America/Sao_Paulo +2017-10-15 00:00:00 1508032800000000000 2017-10-14 23:00:00 America/Sao_Paulo +2017-10-15 00:30:00 1508034600000000000 2017-10-14 23:30:00 America/Sao_Paulo +2017-10-15 01:00:00 1508036400000000000 2017-10-15 01:00:00 America/Sao_Paulo diff --git a/test/cases/dsl-local-date-time-functions/0004n/input b/test/cases/dsl-local-date-time-functions/0004n/input new file mode 100644 index 000000000..da8ccab3e --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/0004n/input @@ -0,0 +1,10 @@ +a=2017-02-18 23:00:00 +a=2017-02-18 23:59:59 +a=2017-02-19 00:00:00 +a=2017-02-19 00:30:00 +a=2017-02-19 01:00:00 +a=2017-10-14 23:00:00 +a=2017-10-14 23:59:59 +a=2017-10-15 00:00:00 +a=2017-10-15 00:30:00 +a=2017-10-15 01:00:00 diff --git a/test/cases/dsl-local-date-time-functions/0004n/mlr b/test/cases/dsl-local-date-time-functions/0004n/mlr new file mode 100644 index 000000000..182e8b915 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/0004n/mlr @@ -0,0 +1,3 @@ +$b = strpntime_local($a, "%Y-%m-%d %H:%M:%S"); +$c = strfntime_local($b, "%Y-%m-%d %H:%M:%S"); +$tz = ENV["TZ"]; diff --git a/test/cases/dsl-local-date-time-functions/local-with-tzs-nsec/cmd b/test/cases/dsl-local-date-time-functions/local-with-tzs-nsec/cmd new file mode 100644 index 000000000..6add080d4 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/local-with-tzs-nsec/cmd @@ -0,0 +1 @@ +mlr -n put -f ${CASEDIR}/mlr diff --git a/test/cases/dsl-local-date-time-functions/local-with-tzs-nsec/experr b/test/cases/dsl-local-date-time-functions/local-with-tzs-nsec/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/local-with-tzs-nsec/expout b/test/cases/dsl-local-date-time-functions/local-with-tzs-nsec/expout new file mode 100644 index 000000000..5825285ff --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/local-with-tzs-nsec/expout @@ -0,0 +1,23 @@ +1969-12-31 21:00:00 +1970-01-01 02:00:00 + +1969-12-31 +1970-01-01 + +10800000000000 +-7200000000000 + +10800000000000 +-7200000000000 + +10800000000000 +-7200000000000 + +1969-12-31 21:00:00 +1970-01-01 02:00:00 + +1969-12-31 21:00:00 +1970-01-01 02:00:00 + +1970-01-01T03:00:00Z +1969-12-31T22:00:00Z diff --git a/test/cases/dsl-local-date-time-functions/local-with-tzs-nsec/mlr b/test/cases/dsl-local-date-time-functions/local-with-tzs-nsec/mlr new file mode 100644 index 000000000..67a5bdde3 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/local-with-tzs-nsec/mlr @@ -0,0 +1,39 @@ +end { + sao = "America/Sao_Paulo"; + ist = "Asia/Istanbul"; + iso = "%Y-%m-%dT%H:%M:%SZ"; + loc = "%Y-%m-%d %H:%M:%S"; + zeg = "1970-01-01T00:00:00Z"; + zel = "1970-01-01 00:00:00"; + + print(nsec2localtime(0, 0, sao)); + print(nsec2localtime(0, 0, ist)); + print; + + print(nsec2localdate(0, sao)); + print(nsec2localdate(0, ist)); + print; + + print(localtime2nsec(zel, sao)); + print(localtime2nsec(zel, ist)); + print; + + print(localtime2nsec(zel, sao)); + print(localtime2nsec(zel, ist)); + print; + + print(strpntime_local(zel, loc, sao)); + print(strpntime_local(zel, loc, ist)); + print; + + print(strfntime_local(0, loc, sao)); + print(strfntime_local(0, loc, ist)); + print; + + print(gmt2localtime(zeg, sao)); + print(gmt2localtime(zeg, ist)); + print; + + print(localtime2gmt(zel, sao)); + print(localtime2gmt(zel, ist)); +} diff --git a/test/cases/dsl-local-date-time-functions/strfntime-istanbul/cmd b/test/cases/dsl-local-date-time-functions/strfntime-istanbul/cmd new file mode 100644 index 000000000..65600b063 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strfntime-istanbul/cmd @@ -0,0 +1 @@ +mlr --tz Asia/Istanbul -n put -f test/input/strfntime-tz.mlr diff --git a/test/cases/dsl-local-date-time-functions/strfntime-istanbul/experr b/test/cases/dsl-local-date-time-functions/strfntime-istanbul/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/strfntime-istanbul/expout b/test/cases/dsl-local-date-time-functions/strfntime-istanbul/expout new file mode 100644 index 000000000..057ff69bd --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strfntime-istanbul/expout @@ -0,0 +1,12 @@ +---------------------------------------------------------------- TIMEZONE +TZ is Asia/Istanbul +---------------------------------------------------------------- STRFNTIME +1970-01-01T00:00:00Z +1970-01-01 00:00:00 +1970-01-01 00:00:00.000 +1970-01-01 00:00:00 UTC +1970-01-01 00:00:00 +0000 +1970-01-01 00:00:00 UTC +1970-01-01 00:00:00 +0000 +1970-01-01 00:00:00 UTC +1970-01-01 00:00:00 +0000 diff --git a/test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/cmd b/test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/cmd new file mode 100644 index 000000000..d7c102c1d --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/cmd @@ -0,0 +1 @@ +mlr --tz America/Sao_Paulo -n put -f test/input/strfntime-tz.mlr diff --git a/test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/experr b/test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/expout b/test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/expout new file mode 100644 index 000000000..36a591e67 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/expout @@ -0,0 +1,12 @@ +---------------------------------------------------------------- TIMEZONE +TZ is America/Sao_Paulo +---------------------------------------------------------------- STRFNTIME +1970-01-01T00:00:00Z +1970-01-01 00:00:00 +1970-01-01 00:00:00.000 +1970-01-01 00:00:00 UTC +1970-01-01 00:00:00 +0000 +1970-01-01 00:00:00 UTC +1970-01-01 00:00:00 +0000 +1970-01-01 00:00:00 UTC +1970-01-01 00:00:00 +0000 diff --git a/test/cases/dsl-local-date-time-functions/strfntime-utc/cmd b/test/cases/dsl-local-date-time-functions/strfntime-utc/cmd new file mode 100644 index 000000000..8518d6688 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strfntime-utc/cmd @@ -0,0 +1 @@ +mlr --tz UTC -n put -f test/input/strfntime-tz.mlr diff --git a/test/cases/dsl-local-date-time-functions/strfntime-utc/experr b/test/cases/dsl-local-date-time-functions/strfntime-utc/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/strfntime-utc/expout b/test/cases/dsl-local-date-time-functions/strfntime-utc/expout new file mode 100644 index 000000000..62b9c1fe6 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strfntime-utc/expout @@ -0,0 +1,12 @@ +---------------------------------------------------------------- TIMEZONE +TZ is UTC +---------------------------------------------------------------- STRFNTIME +1970-01-01T00:00:00Z +1970-01-01 00:00:00 +1970-01-01 00:00:00.000 +1970-01-01 00:00:00 UTC +1970-01-01 00:00:00 +0000 +1970-01-01 00:00:00 UTC +1970-01-01 00:00:00 +0000 +1970-01-01 00:00:00 UTC +1970-01-01 00:00:00 +0000 diff --git a/test/cases/dsl-local-date-time-functions/strfntime_local-istanbul/cmd b/test/cases/dsl-local-date-time-functions/strfntime_local-istanbul/cmd new file mode 100644 index 000000000..565748aea --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strfntime_local-istanbul/cmd @@ -0,0 +1 @@ +mlr --tz Asia/Istanbul -n put -f test/input/strfntime_local-tz.mlr diff --git a/test/cases/dsl-local-date-time-functions/strfntime_local-istanbul/experr b/test/cases/dsl-local-date-time-functions/strfntime_local-istanbul/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/strfntime_local-istanbul/expout b/test/cases/dsl-local-date-time-functions/strfntime_local-istanbul/expout new file mode 100644 index 000000000..a8147319e --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strfntime_local-istanbul/expout @@ -0,0 +1,11 @@ +---------------------------------------------------------------- TIMEZONE +TZ is Asia/Istanbul +---------------------------------------------------------------- STRFNTIME_LOCAL +1970-01-01 02:00:00 +1970-01-01 02:00:00.000 +1970-01-01 02:00:00 EET +1970-01-01 02:00:00 +0200 +1970-01-01 02:00:00 EET +1970-01-01 02:00:00 +0200 +1970-01-01 02:00:00 EET +1970-01-01 02:00:00 +0200 diff --git a/test/cases/dsl-local-date-time-functions/strfntime_local-sao_paulo/cmd b/test/cases/dsl-local-date-time-functions/strfntime_local-sao_paulo/cmd new file mode 100644 index 000000000..73b3d5155 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strfntime_local-sao_paulo/cmd @@ -0,0 +1 @@ +mlr --tz America/Sao_Paulo -n put -f test/input/strfntime_local-tz.mlr diff --git a/test/cases/dsl-local-date-time-functions/strfntime_local-sao_paulo/experr b/test/cases/dsl-local-date-time-functions/strfntime_local-sao_paulo/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/strfntime_local-sao_paulo/expout b/test/cases/dsl-local-date-time-functions/strfntime_local-sao_paulo/expout new file mode 100644 index 000000000..6df103333 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strfntime_local-sao_paulo/expout @@ -0,0 +1,11 @@ +---------------------------------------------------------------- TIMEZONE +TZ is America/Sao_Paulo +---------------------------------------------------------------- STRFNTIME_LOCAL +1969-12-31 21:00:00 +1969-12-31 21:00:00.000 +1969-12-31 21:00:00 -03 +1969-12-31 21:00:00 -0300 +1969-12-31 21:00:00 -03 +1969-12-31 21:00:00 -0300 +1969-12-31 21:00:00 -03 +1969-12-31 21:00:00 -0300 diff --git a/test/cases/dsl-local-date-time-functions/strfntime_local-utc/cmd b/test/cases/dsl-local-date-time-functions/strfntime_local-utc/cmd new file mode 100644 index 000000000..ec9ec60a8 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strfntime_local-utc/cmd @@ -0,0 +1 @@ +mlr --tz UTC -n put -f test/input/strfntime_local-tz.mlr diff --git a/test/cases/dsl-local-date-time-functions/strfntime_local-utc/experr b/test/cases/dsl-local-date-time-functions/strfntime_local-utc/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/strfntime_local-utc/expout b/test/cases/dsl-local-date-time-functions/strfntime_local-utc/expout new file mode 100644 index 000000000..675ea2587 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strfntime_local-utc/expout @@ -0,0 +1,11 @@ +---------------------------------------------------------------- TIMEZONE +TZ is UTC +---------------------------------------------------------------- STRFNTIME_LOCAL +1970-01-01 00:00:00 +1970-01-01 00:00:00.000 +1970-01-01 00:00:00 UTC +1970-01-01 00:00:00 +0000 +1970-01-01 00:00:00 UTC +1970-01-01 00:00:00 +0000 +1970-01-01 00:00:00 UTC +1970-01-01 00:00:00 +0000 diff --git a/test/cases/dsl-local-date-time-functions/strpntime-istanbul/cmd b/test/cases/dsl-local-date-time-functions/strpntime-istanbul/cmd new file mode 100644 index 000000000..fc3ffbdd2 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime-istanbul/cmd @@ -0,0 +1 @@ +mlr --tz Asia/Istanbul -n put -f test/input/strpntime-tz.mlr diff --git a/test/cases/dsl-local-date-time-functions/strpntime-istanbul/experr b/test/cases/dsl-local-date-time-functions/strpntime-istanbul/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/strpntime-istanbul/expout b/test/cases/dsl-local-date-time-functions/strpntime-istanbul/expout new file mode 100644 index 000000000..5291615b8 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime-istanbul/expout @@ -0,0 +1,9 @@ +---------------------------------------------------------------- TIMEZONE +TZ is Asia/Istanbul +---------------------------------------------------------------- STRPNTIME +0 +345000000 +345000000 +345000000 +14400345000000 +-14399655000000 diff --git a/test/cases/dsl-local-date-time-functions/strpntime-j/cmd b/test/cases/dsl-local-date-time-functions/strpntime-j/cmd new file mode 100644 index 000000000..e281b3385 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime-j/cmd @@ -0,0 +1 @@ +mlr --tz UTC -n put -f ${CASEDIR}/mlr diff --git a/test/cases/dsl-local-date-time-functions/strpntime-j/experr b/test/cases/dsl-local-date-time-functions/strpntime-j/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/strpntime-j/expout b/test/cases/dsl-local-date-time-functions/strpntime-j/expout new file mode 100644 index 000000000..7d7dd945c --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime-j/expout @@ -0,0 +1,2 @@ +2021-01-01 +2021-12-29 diff --git a/test/cases/dsl-local-date-time-functions/strpntime-j/mlr b/test/cases/dsl-local-date-time-functions/strpntime-j/mlr new file mode 100644 index 000000000..3551856d9 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime-j/mlr @@ -0,0 +1,4 @@ +end { + print strfntime(strpntime("001 2021", "%j %Y"),"%Y-%m-%d"); + print strfntime(strpntime("363 2021", "%j %Y"),"%Y-%m-%d"); +} diff --git a/test/cases/dsl-local-date-time-functions/strpntime-sao_paulo/cmd b/test/cases/dsl-local-date-time-functions/strpntime-sao_paulo/cmd new file mode 100644 index 000000000..d6e2caf95 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime-sao_paulo/cmd @@ -0,0 +1 @@ +mlr --tz America/Sao_Paulo -n put -f test/input/strpntime-tz.mlr diff --git a/test/cases/dsl-local-date-time-functions/strpntime-sao_paulo/experr b/test/cases/dsl-local-date-time-functions/strpntime-sao_paulo/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/strpntime-sao_paulo/expout b/test/cases/dsl-local-date-time-functions/strpntime-sao_paulo/expout new file mode 100644 index 000000000..80fd08f7b --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime-sao_paulo/expout @@ -0,0 +1,9 @@ +---------------------------------------------------------------- TIMEZONE +TZ is America/Sao_Paulo +---------------------------------------------------------------- STRPNTIME +0 +345000000 +345000000 +345000000 +14400345000000 +-14399655000000 diff --git a/test/cases/dsl-local-date-time-functions/strpntime-utc/cmd b/test/cases/dsl-local-date-time-functions/strpntime-utc/cmd new file mode 100644 index 000000000..28e7f9deb --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime-utc/cmd @@ -0,0 +1 @@ +mlr --tz UTC -n put -f test/input/strpntime-tz.mlr diff --git a/test/cases/dsl-local-date-time-functions/strpntime-utc/experr b/test/cases/dsl-local-date-time-functions/strpntime-utc/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/strpntime-utc/expout b/test/cases/dsl-local-date-time-functions/strpntime-utc/expout new file mode 100644 index 000000000..c83b55187 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime-utc/expout @@ -0,0 +1,9 @@ +---------------------------------------------------------------- TIMEZONE +TZ is UTC +---------------------------------------------------------------- STRPNTIME +0 +345000000 +345000000 +345000000 +14400345000000 +-14399655000000 diff --git a/test/cases/dsl-local-date-time-functions/strpntime-z/cmd b/test/cases/dsl-local-date-time-functions/strpntime-z/cmd new file mode 100644 index 000000000..6add080d4 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime-z/cmd @@ -0,0 +1 @@ +mlr -n put -f ${CASEDIR}/mlr diff --git a/test/cases/dsl-local-date-time-functions/strpntime-z/experr b/test/cases/dsl-local-date-time-functions/strpntime-z/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/strpntime-z/expout b/test/cases/dsl-local-date-time-functions/strpntime-z/expout new file mode 100644 index 000000000..29d23fe0d --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime-z/expout @@ -0,0 +1,15 @@ + +TZ UTC +intime 1970-01-01T00:00:00-0400 +parsed 14400000000000 +formatted 1970-01-01T04:00:00+0000 + +TZ America/Sao_Paulo +intime 1970-01-01T00:00:00-0400 +parsed 14400000000000 +formatted 1970-01-01T04:00:00+0000 + +TZ Asia/Istanbul +intime 1970-01-01T00:00:00-0400 +parsed 14400000000000 +formatted 1970-01-01T04:00:00+0000 diff --git a/test/cases/dsl-local-date-time-functions/strpntime-z/mlr b/test/cases/dsl-local-date-time-functions/strpntime-z/mlr new file mode 100644 index 000000000..d14ed4c39 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime-z/mlr @@ -0,0 +1,17 @@ +end { + tzs = ["UTC", "America/Sao_Paulo", "Asia/Istanbul"]; + + for (tz in tzs) { + ENV["TZ"] = tz; + # Expect these to not vary with $TZ since we are using %z + intime = "1970-01-01T00:00:00-0400"; + parsed = strpntime(intime, "%Y-%m-%dT%H:%M:%S%z"); + formatted = strfntime(parsed, "%Y-%m-%dT%H:%M:%S%z"); + + print; + print "TZ ", tz; + print "intime ", intime; + print "parsed ", parsed; + print "formatted", formatted; + } +} diff --git a/test/cases/dsl-local-date-time-functions/strpntime_local-istanbul/cmd b/test/cases/dsl-local-date-time-functions/strpntime_local-istanbul/cmd new file mode 100644 index 000000000..972b0d54a --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime_local-istanbul/cmd @@ -0,0 +1 @@ +mlr --tz Asia/Istanbul -n put -f test/input/strpntime_local-tz.mlr diff --git a/test/cases/dsl-local-date-time-functions/strpntime_local-istanbul/experr b/test/cases/dsl-local-date-time-functions/strpntime_local-istanbul/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/strpntime_local-istanbul/expout b/test/cases/dsl-local-date-time-functions/strpntime_local-istanbul/expout new file mode 100644 index 000000000..2e34ce4ae --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime_local-istanbul/expout @@ -0,0 +1,4 @@ +---------------------------------------------------------------- TIMEZONE +TZ is Asia/Istanbul +---------------------------------------------------------------- STRPNTIME_LOCAL +-7200000000000 diff --git a/test/cases/dsl-local-date-time-functions/strpntime_local-sao_paulo/cmd b/test/cases/dsl-local-date-time-functions/strpntime_local-sao_paulo/cmd new file mode 100644 index 000000000..25733cc0d --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime_local-sao_paulo/cmd @@ -0,0 +1 @@ +mlr --tz America/Sao_Paulo -n put -f test/input/strpntime_local-tz.mlr diff --git a/test/cases/dsl-local-date-time-functions/strpntime_local-sao_paulo/experr b/test/cases/dsl-local-date-time-functions/strpntime_local-sao_paulo/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/strpntime_local-sao_paulo/expout b/test/cases/dsl-local-date-time-functions/strpntime_local-sao_paulo/expout new file mode 100644 index 000000000..53115b27f --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime_local-sao_paulo/expout @@ -0,0 +1,4 @@ +---------------------------------------------------------------- TIMEZONE +TZ is America/Sao_Paulo +---------------------------------------------------------------- STRPNTIME_LOCAL +10800000000000 diff --git a/test/cases/dsl-local-date-time-functions/strpntime_local-utc/cmd b/test/cases/dsl-local-date-time-functions/strpntime_local-utc/cmd new file mode 100644 index 000000000..bbd8e93e8 --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime_local-utc/cmd @@ -0,0 +1 @@ +mlr --tz UTC -n put -f test/input/strpntime_local-tz.mlr diff --git a/test/cases/dsl-local-date-time-functions/strpntime_local-utc/experr b/test/cases/dsl-local-date-time-functions/strpntime_local-utc/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-local-date-time-functions/strpntime_local-utc/expout b/test/cases/dsl-local-date-time-functions/strpntime_local-utc/expout new file mode 100644 index 000000000..2db059d0b --- /dev/null +++ b/test/cases/dsl-local-date-time-functions/strpntime_local-utc/expout @@ -0,0 +1,4 @@ +---------------------------------------------------------------- TIMEZONE +TZ is UTC +---------------------------------------------------------------- STRPNTIME_LOCAL +0 diff --git a/test/cases/dsl-sec2gmt/0001n/cmd b/test/cases/dsl-sec2gmt/0001n/cmd new file mode 100644 index 000000000..566159173 --- /dev/null +++ b/test/cases/dsl-sec2gmt/0001n/cmd @@ -0,0 +1 @@ +mlr --from test/input/ten.dkvp --opprint put '$z=nsec2gmt($i)' diff --git a/test/cases/dsl-sec2gmt/0001n/experr b/test/cases/dsl-sec2gmt/0001n/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-sec2gmt/0001n/expout b/test/cases/dsl-sec2gmt/0001n/expout new file mode 100644 index 000000000..148459888 --- /dev/null +++ b/test/cases/dsl-sec2gmt/0001n/expout @@ -0,0 +1,11 @@ +a b i x y z +pan pan 1 0.34679014 0.72680286 1970-01-01T00:00:00Z +eks pan 2 0.75867996 -0.52215111 1970-01-01T00:00:00Z +wye wye 3 0.20460331 0.33831853 1970-01-01T00:00:00Z +eks wye 4 0.38139939 -0.13418874 1970-01-01T00:00:00Z +wye pan 5 0.57328892 0.86362447 1970-01-01T00:00:00Z +zee pan 6 0.52712616 -0.49322129 1970-01-01T00:00:00Z +eks zee 7 0.61178406 0.18788492 1970-01-01T00:00:00Z +zee wye 8 0.59855401 0.97618139 1970-01-01T00:00:00Z +hat wye 9 0.03144188 -0.74955076 1970-01-01T00:00:00Z +pan wye 10 0.50262601 0.95261836 1970-01-01T00:00:00Z diff --git a/test/cases/dsl-sec2gmt/0002n/cmd b/test/cases/dsl-sec2gmt/0002n/cmd new file mode 100644 index 000000000..c2c143a72 --- /dev/null +++ b/test/cases/dsl-sec2gmt/0002n/cmd @@ -0,0 +1 @@ +mlr --from test/input/ten.dkvp --opprint put '$z=nsec2gmt($i, $i-1)' diff --git a/test/cases/dsl-sec2gmt/0002n/experr b/test/cases/dsl-sec2gmt/0002n/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-sec2gmt/0002n/expout b/test/cases/dsl-sec2gmt/0002n/expout new file mode 100644 index 000000000..ea79cc3e3 --- /dev/null +++ b/test/cases/dsl-sec2gmt/0002n/expout @@ -0,0 +1,11 @@ +a b i x y z +pan pan 1 0.34679014 0.72680286 1970-01-01T00:00:00Z +eks pan 2 0.75867996 -0.52215111 1970-01-01T00:00:00.0Z +wye wye 3 0.20460331 0.33831853 1970-01-01T00:00:00.00Z +eks wye 4 0.38139939 -0.13418874 1970-01-01T00:00:00.000Z +wye pan 5 0.57328892 0.86362447 1970-01-01T00:00:00.0000Z +zee pan 6 0.52712616 -0.49322129 1970-01-01T00:00:00.00000Z +eks zee 7 0.61178406 0.18788492 1970-01-01T00:00:00.000000Z +zee wye 8 0.59855401 0.97618139 1970-01-01T00:00:00.0000000Z +hat wye 9 0.03144188 -0.74955076 1970-01-01T00:00:00.00000000Z +pan wye 10 0.50262601 0.95261836 1970-01-01T00:00:00.000000010Z diff --git a/test/cases/dsl-sec2gmt/0003n/cmd b/test/cases/dsl-sec2gmt/0003n/cmd new file mode 100644 index 000000000..7b1b46e13 --- /dev/null +++ b/test/cases/dsl-sec2gmt/0003n/cmd @@ -0,0 +1 @@ +mlr --from test/input/ten.dkvp --opprint put '$z=nsec2gmt($i * 1000000000 * 123456789)' diff --git a/test/cases/dsl-sec2gmt/0003n/experr b/test/cases/dsl-sec2gmt/0003n/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-sec2gmt/0003n/expout b/test/cases/dsl-sec2gmt/0003n/expout new file mode 100644 index 000000000..ce09f20de --- /dev/null +++ b/test/cases/dsl-sec2gmt/0003n/expout @@ -0,0 +1,11 @@ +a b i x y z +pan pan 1 0.34679014 0.72680286 1973-11-29T21:33:09Z +eks pan 2 0.75867996 -0.52215111 1977-10-28T19:06:18Z +wye wye 3 0.20460331 0.33831853 1981-09-26T16:39:27Z +eks wye 4 0.38139939 -0.13418874 1985-08-25T14:12:36Z +wye pan 5 0.57328892 0.86362447 1989-07-24T11:45:45Z +zee pan 6 0.52712616 -0.49322129 1993-06-22T09:18:54Z +eks zee 7 0.61178406 0.18788492 1997-05-21T06:52:03Z +zee wye 8 0.59855401 0.97618139 2001-04-19T04:25:12Z +hat wye 9 0.03144188 -0.74955076 2005-03-18T01:58:21Z +pan wye 10 0.50262601 0.95261836 2009-02-13T23:31:30Z diff --git a/test/cases/dsl-sec2gmt/0004n/cmd b/test/cases/dsl-sec2gmt/0004n/cmd new file mode 100644 index 000000000..6d9a0cf22 --- /dev/null +++ b/test/cases/dsl-sec2gmt/0004n/cmd @@ -0,0 +1 @@ +mlr --from test/input/ten.dkvp --opprint put '$z=nsec2gmt($i * 1000000000 + 123456789,$i-1)' diff --git a/test/cases/dsl-sec2gmt/0004n/experr b/test/cases/dsl-sec2gmt/0004n/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-sec2gmt/0004n/expout b/test/cases/dsl-sec2gmt/0004n/expout new file mode 100644 index 000000000..bbfcc0f89 --- /dev/null +++ b/test/cases/dsl-sec2gmt/0004n/expout @@ -0,0 +1,11 @@ +a b i x y z +pan pan 1 0.34679014 0.72680286 1970-01-01T00:00:01Z +eks pan 2 0.75867996 -0.52215111 1970-01-01T00:00:02.1Z +wye wye 3 0.20460331 0.33831853 1970-01-01T00:00:03.12Z +eks wye 4 0.38139939 -0.13418874 1970-01-01T00:00:04.123Z +wye pan 5 0.57328892 0.86362447 1970-01-01T00:00:05.1234Z +zee pan 6 0.52712616 -0.49322129 1970-01-01T00:00:06.12345Z +eks zee 7 0.61178406 0.18788492 1970-01-01T00:00:07.123456Z +zee wye 8 0.59855401 0.97618139 1970-01-01T00:00:08.1234567Z +hat wye 9 0.03144188 -0.74955076 1970-01-01T00:00:09.12345678Z +pan wye 10 0.50262601 0.95261836 1970-01-01T00:00:10.123456789Z diff --git a/test/cases/help/0014/expout b/test/cases/help/0014/expout index 97977a3db..c0f1a0c10 100644 --- a/test/cases/help/0014/expout +++ b/test/cases/help/0014/expout @@ -22,6 +22,24 @@ Options: -h|--help Show this message. fsec2dhms (class=time #args=1) Formats floating-point seconds as in fsec2dhms(500000.25) = "5d18h53m20.250000s" fsec2hms (class=time #args=1) Formats floating-point seconds as in fsec2hms(5000.25) = "01:23:20.250000" +nsec2gmt (class=time #args=1,2) Formats integer nanoseconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. +Examples: +nsec2gmt(1234567890000000000) = "2009-02-13T23:31:30Z" +nsec2gmt(1234567890123456789) = "2009-02-13T23:31:30Z" +nsec2gmt(1234567890123456789, 6) = "2009-02-13T23:31:30.123456Z" +nsec2gmtdate (class=time #args=1) Formats integer nanoseconds since epoch as GMT timestamp with year-month-date. Leaves non-numbers as-is. +Example: +sec2gmtdate(1440768801700000000) = "2015-08-28". +nsec2localdate (class=time #args=1,2) Formats integer nanoseconds since epoch as local timestamp with year-month-date. Leaves non-numbers as-is. Consults $TZ environment variable unless second argument is supplied. +Examples: +nsec2localdate(1440768801700000000) = "2015-08-28" with TZ="Asia/Istanbul" +nsec2localdate(1440768801700000000, "Asia/Istanbul") = "2015-08-28" +nsec2localtime (class=time #args=1,2,3) Formats integer nanoseconds since epoch as local timestamp. Consults $TZ environment variable unless third argument is supplied. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part +Examples: +nsec2localtime(1234567890000000000) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul" +nsec2localtime(1234567890123456789) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul" +nsec2localtime(1234567890123456789, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul" +nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456" sec2dhms (class=time #args=1) Formats integer seconds as in sec2dhms(500000) = "5d18h53m20s" sec2gmt (class=time #args=1,2) Formats seconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. Examples: diff --git a/test/cases/help/0016/expout b/test/cases/help/0016/expout index 87aa76721..eb9d95c77 100644 --- a/test/cases/help/0016/expout +++ b/test/cases/help/0016/expout @@ -20,6 +20,14 @@ Options: --micros Input numbers are treated as microseconds since the epoch. --nanos Input numbers are treated as nanoseconds since the epoch. -h|--help Show this message. +nsec2gmt (class=time #args=1,2) Formats integer nanoseconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. +Examples: +nsec2gmt(1234567890000000000) = "2009-02-13T23:31:30Z" +nsec2gmt(1234567890123456789) = "2009-02-13T23:31:30Z" +nsec2gmt(1234567890123456789, 6) = "2009-02-13T23:31:30.123456Z" +nsec2gmtdate (class=time #args=1) Formats integer nanoseconds since epoch as GMT timestamp with year-month-date. Leaves non-numbers as-is. +Example: +sec2gmtdate(1440768801700000000) = "2015-08-28". sec2gmt (class=time #args=1,2) Formats seconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. Examples: sec2gmt(1234567890) = "2009-02-13T23:31:30Z" diff --git a/test/cases/repl-help/0014/expout b/test/cases/repl-help/0014/expout index a710a4f26..8e1c88ce0 100644 --- a/test/cases/repl-help/0014/expout +++ b/test/cases/repl-help/0014/expout @@ -1,5 +1,23 @@ fsec2dhms (class=time #args=1) Formats floating-point seconds as in fsec2dhms(500000.25) = "5d18h53m20.250000s" fsec2hms (class=time #args=1) Formats floating-point seconds as in fsec2hms(5000.25) = "01:23:20.250000" +nsec2gmt (class=time #args=1,2) Formats integer nanoseconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. +Examples: +nsec2gmt(1234567890000000000) = "2009-02-13T23:31:30Z" +nsec2gmt(1234567890123456789) = "2009-02-13T23:31:30Z" +nsec2gmt(1234567890123456789, 6) = "2009-02-13T23:31:30.123456Z" +nsec2gmtdate (class=time #args=1) Formats integer nanoseconds since epoch as GMT timestamp with year-month-date. Leaves non-numbers as-is. +Example: +sec2gmtdate(1440768801700000000) = "2015-08-28". +nsec2localdate (class=time #args=1,2) Formats integer nanoseconds since epoch as local timestamp with year-month-date. Leaves non-numbers as-is. Consults $TZ environment variable unless second argument is supplied. +Examples: +nsec2localdate(1440768801700000000) = "2015-08-28" with TZ="Asia/Istanbul" +nsec2localdate(1440768801700000000, "Asia/Istanbul") = "2015-08-28" +nsec2localtime (class=time #args=1,2,3) Formats integer nanoseconds since epoch as local timestamp. Consults $TZ environment variable unless third argument is supplied. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part +Examples: +nsec2localtime(1234567890000000000) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul" +nsec2localtime(1234567890123456789) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul" +nsec2localtime(1234567890123456789, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul" +nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456" sec2dhms (class=time #args=1) Formats integer seconds as in sec2dhms(500000) = "5d18h53m20s" sec2gmt (class=time #args=1,2) Formats seconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. Examples: diff --git a/test/cases/repl-help/0016/expout b/test/cases/repl-help/0016/expout index ad1e26c87..ca42bd62d 100644 --- a/test/cases/repl-help/0016/expout +++ b/test/cases/repl-help/0016/expout @@ -1,3 +1,11 @@ +nsec2gmt (class=time #args=1,2) Formats integer nanoseconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. +Examples: +nsec2gmt(1234567890000000000) = "2009-02-13T23:31:30Z" +nsec2gmt(1234567890123456789) = "2009-02-13T23:31:30Z" +nsec2gmt(1234567890123456789, 6) = "2009-02-13T23:31:30.123456Z" +nsec2gmtdate (class=time #args=1) Formats integer nanoseconds since epoch as GMT timestamp with year-month-date. Leaves non-numbers as-is. +Example: +sec2gmtdate(1440768801700000000) = "2015-08-28". sec2gmt (class=time #args=1,2) Formats seconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. Examples: sec2gmt(1234567890) = "2009-02-13T23:31:30Z" diff --git a/test/input/gmt2nsec b/test/input/gmt2nsec new file mode 100644 index 000000000..f63c71714 --- /dev/null +++ b/test/input/gmt2nsec @@ -0,0 +1,29 @@ +gmt +1970-01-01T00:00:00Z +1970-01-01T00:00:00.Z +1970-01-01T00:00:01Z +1970-01-01T00:00:01.0Z +1970-01-01T00:00:10Z +1970-01-01T00:00:10.00Z +1970-01-01T00:01:40Z +1970-01-01T00:01:40.1Z +1970-01-01T00:16:40Z +1970-01-01T00:16:40.12Z +1970-01-01T02:46:40Z +1970-01-01T02:46:40.123Z +1970-01-02T03:46:40Z +1970-01-02T03:46:40.1234Z +1970-01-12T13:46:40Z +1970-01-12T13:46:40.12345Z +1970-04-26T17:46:40Z +1970-04-26T17:46:40.123456Z +1973-03-03T09:46:40Z +1973-03-03T09:46:40.1234567Z +2001-09-09T01:46:40Z +2001-09-09T01:46:40.12345678Z +2015-05-19T11:49:40Z +2015-05-19T11:49:40.123456789Z +2017-07-14T02:40:00Z +2017-07-14T02:40:00.999Z +2033-05-18T03:33:20Z +2033-05-18T03:33:20.999999Z diff --git a/test/input/strfntime-tz.mlr b/test/input/strfntime-tz.mlr new file mode 100644 index 000000000..45f507617 --- /dev/null +++ b/test/input/strfntime-tz.mlr @@ -0,0 +1,17 @@ +end { + + print "---------------------------------------------------------------- TIMEZONE"; + print "TZ is", ENV["TZ"]; + + print "---------------------------------------------------------------- STRFNTIME"; + print strfntime(123456, "%Y-%m-%dT%H:%M:%SZ"); + print strfntime(0, "%Y-%m-%d %H:%M:%S"); + print strfntime(0, "%Y-%m-%d %H:%M:%3S"); + print strfntime(0, "%Y-%m-%d %H:%M:%S %Z"); + print strfntime(0, "%Y-%m-%d %H:%M:%S %z"); + print strfntime(123456, "%Y-%m-%d %H:%M:%S %Z"); + print strfntime(123456, "%Y-%m-%d %H:%M:%S %z"); + print strfntime(0, "%Y-%m-%d %H:%M:%S %Z"); + print strfntime(0, "%Y-%m-%d %H:%M:%S %z"); + +} diff --git a/test/input/strfntime_local-tz.mlr b/test/input/strfntime_local-tz.mlr new file mode 100644 index 000000000..2e903b163 --- /dev/null +++ b/test/input/strfntime_local-tz.mlr @@ -0,0 +1,17 @@ +end { + + print "---------------------------------------------------------------- TIMEZONE"; + print "TZ is", ENV["TZ"]; + + print "---------------------------------------------------------------- STRFNTIME_LOCAL"; + print strfntime_local(0, "%Y-%m-%d %H:%M:%S"); + print strfntime_local(0, "%Y-%m-%d %H:%M:%3S"); + + print strfntime_local(0, "%Y-%m-%d %H:%M:%S %Z"); + print strfntime_local(0, "%Y-%m-%d %H:%M:%S %z"); + print strfntime_local(123456, "%Y-%m-%d %H:%M:%S %Z"); + print strfntime_local(123456, "%Y-%m-%d %H:%M:%S %z"); + print strfntime_local(0, "%Y-%m-%d %H:%M:%S %Z"); + print strfntime_local(0, "%Y-%m-%d %H:%M:%S %z"); + +} diff --git a/test/input/strpntime-tz.mlr b/test/input/strpntime-tz.mlr new file mode 100644 index 000000000..a4d7ea7b4 --- /dev/null +++ b/test/input/strpntime-tz.mlr @@ -0,0 +1,14 @@ +end { + + print "---------------------------------------------------------------- TIMEZONE"; + print "TZ is", ENV["TZ"]; + + print "---------------------------------------------------------------- STRPNTIME"; + print strpntime("1970-01-01T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ"); + print strpntime("1970-01-01T00:00:00.345Z", "%Y-%m-%dT%H:%M:%SZ"); + print strpntime("1970-01-01T00:00:00.345 UTC", "%Y-%m-%dT%H:%M:%S %Z"); + print strpntime("1970-01-01T00:00:00.345 EST", "%Y-%m-%dT%H:%M:%S %Z"); + print strpntime("1970-01-01T00:00:00.345 -0400", "%Y-%m-%dT%H:%M:%S %z"); + print strpntime("1970-01-01T00:00:00.345 +0400", "%Y-%m-%dT%H:%M:%S %z"); + +} diff --git a/test/input/strpntime_local-tz.mlr b/test/input/strpntime_local-tz.mlr new file mode 100644 index 000000000..c34636bb3 --- /dev/null +++ b/test/input/strpntime_local-tz.mlr @@ -0,0 +1,9 @@ +end { + + print "---------------------------------------------------------------- TIMEZONE"; + print "TZ is", ENV["TZ"]; + + print "---------------------------------------------------------------- STRPNTIME_LOCAL"; + print strpntime_local("1970-01-01 00:00:00", "%Y-%m-%d %H:%M:%S"); + +} From dff2206b62305dc806626680e97fdb83b26735a1 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 25 Jun 2023 15:40:06 -0400 Subject: [PATCH 012/456] todo --- todo.txt | 48 ++++++++++++------------------------------------ 1 file changed, 12 insertions(+), 36 deletions(-) diff --git a/todo.txt b/todo.txt index 77d6dc69c..eea98acf3 100644 --- a/todo.txt +++ b/todo.txt @@ -1,41 +1,19 @@ =============================================================== -* 404 -- what broke? - https://github.com/johnkerl/miller/pull/757/files -* https://github.com/johnkerl/miller/issues?q=is%3Aissue+is%3Aopen+label%3Aneeds-documentation -* https://squidfunk.github.io/mkdocs-material/setup/setting-up-versioning/ - -RELEASES -* 6.4.0 ideas: - - ! -r splits for rename, merge-fields, cut - ! summing up empty data - ! emitv2 - ! csv check for empty field names - ! strmatch - ! transposed output - - o mrpl exits ... - o opt-in type-infers for inf, true, etc - o #982 '.' ',' etc - o extended field accessors for #763 and #948 (positional & json.nested) - o strict mode - o awk-like exit - o unsparsify -f CSV by default -- ? into CSV record-writer -- ? caveat that record 1 controls all ... - o mlr split -- needs an example page along with the tee DSL function - - some mlr merge somehow -- ? would need a verb-API refactor - - ? zlen csv? - ? datediff et al. - ? rank - ? YAML - ? #908 inferencing options - ? gogll +* 1050 mlr check w/ empty csv column name +* 283 strmatch DSL function +* 440 strict mode +* 1128 bash/zsh autocompletions +* 1025 emitv2 +* 1082 summary/type +* 1105 too many open files +* opt-in type-infers for inf, true, etc +* 982 '.' ',' etc +* extended field accessors for #763 and #948 (positional & json.nested) +* awk-like exit +* mrpl exits ... ================================================================ -FEATURES - ----------------------------------------------------------------- STRICT MODE ? re silent zero-pass for-loops on non-collections: @@ -328,8 +306,6 @@ w contact re https://jsonlines.org/on_the_web/ =============================================================== TESTING -! ./mlr vs mlr ... - ! pos/neg 0x/0b/0o UTs * RT ngrams.sh -v -o 1 one-word-list.txt From 3e5c3e239845b3be25ce4c2b1a02597681795832 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 25 Jun 2023 19:12:26 -0400 Subject: [PATCH 013/456] Add empty-key check to `mlr check` (#1330) * Add empty-key check to `mlr check` * Update `mlr check --help` * Update to on-line help --- docs/src/manpage.md | 21 ++++++++++-------- docs/src/manpage.txt | 21 ++++++++++-------- docs/src/reference-verbs.md | 19 +++++++++------- internal/pkg/transformers/check.go | 33 +++++++++++++++++++++++++--- man/manpage.txt | 21 ++++++++++-------- man/mlr.1 | 23 ++++++++++--------- test/cases/cli-help/0001/expout | 6 ++++- test/cases/verb-check/0001/cmd | 1 + test/cases/verb-check/0001/experr | 0 test/cases/verb-check/0001/expout | 0 test/cases/verb-check/0001/input.csv | 3 +++ test/cases/verb-check/0002/cmd | 1 + test/cases/verb-check/0002/experr | 1 + test/cases/verb-check/0002/expout | 0 test/cases/verb-check/0002/input.csv | 3 +++ 15 files changed, 104 insertions(+), 49 deletions(-) create mode 100644 test/cases/verb-check/0001/cmd create mode 100644 test/cases/verb-check/0001/experr create mode 100644 test/cases/verb-check/0001/expout create mode 100644 test/cases/verb-check/0001/input.csv create mode 100644 test/cases/verb-check/0002/cmd create mode 100644 test/cases/verb-check/0002/experr create mode 100644 test/cases/verb-check/0002/expout create mode 100644 test/cases/verb-check/0002/input.csv diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 2f884002f..c458367be 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -936,8 +936,11 @@ MILLER(1) MILLER(1) 1mcheck0m Usage: mlr check [options] - Consumes records without printing any output. + Consumes records without printing any output, Useful for doing a well-formatted check on input data. + with the exception that warnings are printed to stderr. + Current checks are: + * If any key is the empty string Options: -h|--help Show this message. @@ -1212,13 +1215,13 @@ MILLER(1) MILLER(1) Note that "mlr filter" is more powerful, but requires you to know field names. By contrast, "mlr grep" allows you to regex-match the entire record. It does this by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using - command-line-specified ORS/OFS/OPS, and matching the resulting line against the - regex specified here. In particular, the regex is not applied to the input - stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the - regex will be matched, not against either of these lines, but against the DKVP - line "x=1,y=2,z=3". Furthermore, not all the options to system grep are - supported, and this command is intended to be merely a keystroke-saver. To get - all the features of system grep, you can do + OFS "," and OPS "=", and matching the resulting line against the regex specified + here. In particular, the regex is not applied to the input stream: if you have + CSV with header line "x,y,z" and data line "1,2,3" then the regex will be + matched, not against either of these lines, but against the DKVP line + "x=1,y=2,z=3". Furthermore, not all the options to system grep are supported, + and this command is intended to be merely a keystroke-saver. To get all the + features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." 1mgroup-by0m @@ -3430,5 +3433,5 @@ MILLER(1) MILLER(1) - 2023-06-24 MILLER(1) + 2023-06-25 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index abb828010..4aad07549 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -915,8 +915,11 @@ MILLER(1) MILLER(1) 1mcheck0m Usage: mlr check [options] - Consumes records without printing any output. + Consumes records without printing any output, Useful for doing a well-formatted check on input data. + with the exception that warnings are printed to stderr. + Current checks are: + * If any key is the empty string Options: -h|--help Show this message. @@ -1191,13 +1194,13 @@ MILLER(1) MILLER(1) Note that "mlr filter" is more powerful, but requires you to know field names. By contrast, "mlr grep" allows you to regex-match the entire record. It does this by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using - command-line-specified ORS/OFS/OPS, and matching the resulting line against the - regex specified here. In particular, the regex is not applied to the input - stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the - regex will be matched, not against either of these lines, but against the DKVP - line "x=1,y=2,z=3". Furthermore, not all the options to system grep are - supported, and this command is intended to be merely a keystroke-saver. To get - all the features of system grep, you can do + OFS "," and OPS "=", and matching the resulting line against the regex specified + here. In particular, the regex is not applied to the input stream: if you have + CSV with header line "x,y,z" and data line "1,2,3" then the regex will be + matched, not against either of these lines, but against the DKVP line + "x=1,y=2,z=3". Furthermore, not all the options to system grep are supported, + and this command is intended to be merely a keystroke-saver. To get all the + features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." 1mgroup-by0m @@ -3409,4 +3412,4 @@ MILLER(1) MILLER(1) - 2023-06-24 MILLER(1) + 2023-06-25 MILLER(1) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index be11f8903..e34c9f788 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -376,8 +376,11 @@ n a b i x y
 Usage: mlr check [options]
-Consumes records without printing any output.
+Consumes records without printing any output,
 Useful for doing a well-formatted check on input data.
+with the exception that warnings are printed to stderr.
+Current checks are:
+* If any key is the empty string
 Options:
 -h|--help Show this message.
 
@@ -1355,13 +1358,13 @@ Options: Note that "mlr filter" is more powerful, but requires you to know field names. By contrast, "mlr grep" allows you to regex-match the entire record. It does this by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using -command-line-specified ORS/OFS/OPS, and matching the resulting line against the -regex specified here. In particular, the regex is not applied to the input -stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the -regex will be matched, not against either of these lines, but against the DKVP -line "x=1,y=2,z=3". Furthermore, not all the options to system grep are -supported, and this command is intended to be merely a keystroke-saver. To get -all the features of system grep, you can do +OFS "," and OPS "=", and matching the resulting line against the regex specified +here. In particular, the regex is not applied to the input stream: if you have +CSV with header line "x,y,z" and data line "1,2,3" then the regex will be +matched, not against either of these lines, but against the DKVP line +"x=1,y=2,z=3". Furthermore, not all the options to system grep are supported, +and this command is intended to be merely a keystroke-saver. To get all the +features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." diff --git a/internal/pkg/transformers/check.go b/internal/pkg/transformers/check.go index c313abd4e..0301dad7d 100644 --- a/internal/pkg/transformers/check.go +++ b/internal/pkg/transformers/check.go @@ -24,8 +24,12 @@ func transformerCheckUsage( o *os.File, ) { fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameCheck) - fmt.Fprintf(o, "Consumes records without printing any output.\n") + fmt.Fprintf(o, "Consumes records without printing any output,\n") fmt.Fprintf(o, "Useful for doing a well-formatted check on input data.\n") + fmt.Fprintf(o, "with the exception that warnings are printed to stderr.\n") + fmt.Fprintf(o, "Current checks are:\n") + fmt.Fprintf(o, "* Data are parseable\n") + fmt.Fprintf(o, "* If any key is the empty string\n") fmt.Fprintf(o, "Options:\n") fmt.Fprintf(o, "-h|--help Show this message.\n") } @@ -79,10 +83,13 @@ func transformerCheckParseCLI( // ---------------------------------------------------------------- type TransformerCheck struct { // stateless + messagedReEmptyKey map[string]bool } func NewTransformerCheck() (*TransformerCheck, error) { - return &TransformerCheck{}, nil + return &TransformerCheck{ + messagedReEmptyKey: make(map[string]bool), + }, nil } func (tr *TransformerCheck) Transform( @@ -92,7 +99,27 @@ func (tr *TransformerCheck) Transform( outputDownstreamDoneChannel chan<- bool, ) { HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) - if inrecAndContext.EndOfStream { + if !inrecAndContext.EndOfStream { + inrec := inrecAndContext.Record + for pe := inrec.Head; pe != nil; pe = pe.Next { + if pe.Key == "" { + context := inrecAndContext.Context + + // Most Miller users are CSV users. And for CSV this will be an error on + // *every* record, or none -- so let's not print this multiple times. + if tr.messagedReEmptyKey[context.FILENAME] { + continue + } + + message := fmt.Sprintf( + "mlr: warning: empty-string key at filename %s record number %d", + context.FILENAME, context.NR, + ) + fmt.Fprintln(os.Stderr, message) + tr.messagedReEmptyKey[context.FILENAME] = true + } + } + } else { outputRecordsAndContexts.PushBack(inrecAndContext) } } diff --git a/man/manpage.txt b/man/manpage.txt index abb828010..4aad07549 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -915,8 +915,11 @@ MILLER(1) MILLER(1) 1mcheck0m Usage: mlr check [options] - Consumes records without printing any output. + Consumes records without printing any output, Useful for doing a well-formatted check on input data. + with the exception that warnings are printed to stderr. + Current checks are: + * If any key is the empty string Options: -h|--help Show this message. @@ -1191,13 +1194,13 @@ MILLER(1) MILLER(1) Note that "mlr filter" is more powerful, but requires you to know field names. By contrast, "mlr grep" allows you to regex-match the entire record. It does this by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using - command-line-specified ORS/OFS/OPS, and matching the resulting line against the - regex specified here. In particular, the regex is not applied to the input - stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the - regex will be matched, not against either of these lines, but against the DKVP - line "x=1,y=2,z=3". Furthermore, not all the options to system grep are - supported, and this command is intended to be merely a keystroke-saver. To get - all the features of system grep, you can do + OFS "," and OPS "=", and matching the resulting line against the regex specified + here. In particular, the regex is not applied to the input stream: if you have + CSV with header line "x,y,z" and data line "1,2,3" then the regex will be + matched, not against either of these lines, but against the DKVP line + "x=1,y=2,z=3". Furthermore, not all the options to system grep are supported, + and this command is intended to be merely a keystroke-saver. To get all the + features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." 1mgroup-by0m @@ -3409,4 +3412,4 @@ MILLER(1) MILLER(1) - 2023-06-24 MILLER(1) + 2023-06-25 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index 63f502b2a..a0bd1c531 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2023-06-24 +.\" Date: 2023-06-25 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2023-06-24" "\ \&" "\ \&" +.TH "MILLER" "1" "2023-06-25" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1122,8 +1122,11 @@ Options: .\} .nf Usage: mlr check [options] -Consumes records without printing any output. +Consumes records without printing any output, Useful for doing a well-formatted check on input data. +with the exception that warnings are printed to stderr. +Current checks are: +* If any key is the empty string Options: -h|--help Show this message. .fi @@ -1482,13 +1485,13 @@ Options: Note that "mlr filter" is more powerful, but requires you to know field names. By contrast, "mlr grep" allows you to regex-match the entire record. It does this by formatting each record in memory as DKVP (or NIDX, if -a is supplied), using -command-line-specified ORS/OFS/OPS, and matching the resulting line against the -regex specified here. In particular, the regex is not applied to the input -stream: if you have CSV with header line "x,y,z" and data line "1,2,3" then the -regex will be matched, not against either of these lines, but against the DKVP -line "x=1,y=2,z=3". Furthermore, not all the options to system grep are -supported, and this command is intended to be merely a keystroke-saver. To get -all the features of system grep, you can do +OFS "," and OPS "=", and matching the resulting line against the regex specified +here. In particular, the regex is not applied to the input stream: if you have +CSV with header line "x,y,z" and data line "1,2,3" then the regex will be +matched, not against either of these lines, but against the DKVP line +"x=1,y=2,z=3". Furthermore, not all the options to system grep are supported, +and this command is intended to be merely a keystroke-saver. To get all the +features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." .fi .if n \{\ diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index a03731513..398f42702 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -63,8 +63,12 @@ Options: ================================================================ check Usage: mlr check [options] -Consumes records without printing any output. +Consumes records without printing any output, Useful for doing a well-formatted check on input data. +with the exception that warnings are printed to stderr. +Current checks are: +* Data are parseable +* If any key is the empty string Options: -h|--help Show this message. diff --git a/test/cases/verb-check/0001/cmd b/test/cases/verb-check/0001/cmd new file mode 100644 index 000000000..c8a6cee73 --- /dev/null +++ b/test/cases/verb-check/0001/cmd @@ -0,0 +1 @@ +mlr --csv check ${CASEDIR}/input.csv diff --git a/test/cases/verb-check/0001/experr b/test/cases/verb-check/0001/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-check/0001/expout b/test/cases/verb-check/0001/expout new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-check/0001/input.csv b/test/cases/verb-check/0001/input.csv new file mode 100644 index 000000000..88700c714 --- /dev/null +++ b/test/cases/verb-check/0001/input.csv @@ -0,0 +1,3 @@ +a,b,c +1,2,3 +4,5,6 diff --git a/test/cases/verb-check/0002/cmd b/test/cases/verb-check/0002/cmd new file mode 100644 index 000000000..c8a6cee73 --- /dev/null +++ b/test/cases/verb-check/0002/cmd @@ -0,0 +1 @@ +mlr --csv check ${CASEDIR}/input.csv diff --git a/test/cases/verb-check/0002/experr b/test/cases/verb-check/0002/experr new file mode 100644 index 000000000..3f1a47b50 --- /dev/null +++ b/test/cases/verb-check/0002/experr @@ -0,0 +1 @@ +mlr: warning: empty-string key at filename test/cases/verb-check/0002/input.csv record number 1 diff --git a/test/cases/verb-check/0002/expout b/test/cases/verb-check/0002/expout new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-check/0002/input.csv b/test/cases/verb-check/0002/input.csv new file mode 100644 index 000000000..47c5605f6 --- /dev/null +++ b/test/cases/verb-check/0002/input.csv @@ -0,0 +1,3 @@ +a,,c +1,2,3 +4,5,6 From 3baebea7a3e33efe9d0d6c41fffef105e2535ad3 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 2 Jul 2023 15:49:41 -0400 Subject: [PATCH 014/456] Add `%N` and `%O` for `strfntime` (#1334) * Add `%N` and `%O` for strfntime * Unit-test mods * artifacts from `make dev` --- docs/src/data-diving-examples.md | 46 +++++++++---------- docs/src/manpage.md | 3 +- docs/src/manpage.txt | 3 +- docs/src/reference-dsl-time.md | 8 +++- docs/src/reference-dsl-time.md.in | 6 ++- docs/src/reference-verbs.md | 39 ++++++++-------- docs/src/two-pass-algorithms.md | 4 +- internal/pkg/bifs/datetime.go | 14 ++++++ man/manpage.txt | 3 +- man/mlr.1 | 5 +- .../strfntime-istanbul/expout | 2 + .../strfntime-sao_paulo/expout | 2 + .../strfntime-utc/expout | 2 + .../strftime-istanbul/expout | 2 + .../strftime-sao_paulo/expout | 2 + .../strftime-utc/expout | 2 + test/input/strfntime-tz.mlr | 2 + test/input/strftime-tz.mlr | 2 + 18 files changed, 96 insertions(+), 51 deletions(-) diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 100716ec2..39738f193 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -160,11 +160,11 @@ CITRUS COUNTY 1332.9 79974.9 483785.1 stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
-tiv_2011_tiv_2012_corr  0.9730497632351701
-tiv_2011_tiv_2012_ols_m 0.9835583980337732
-tiv_2011_tiv_2012_ols_b 433854.6428968301
+tiv_2011_tiv_2012_corr  0.9730497632351692
+tiv_2011_tiv_2012_ols_m 0.9835583980337723
+tiv_2011_tiv_2012_ols_b 433854.6428968317
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320204
+tiv_2011_tiv_2012_r2    0.9468258417320189
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199852926
+0.1334180491027861 -0.011319841199866178
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887242  -0.018565536587084836
-orange   square   0.17685855992752933   -0.07104431573805543
- green   circle   0.05764419437577257   0.011795729888018455
-   red   square    0.0557447712489348 -0.0006801456507506415
-yellow triangle    0.0445727377196281   0.024604310103079844
-yellow   square    0.0437917292729612  -0.044621972016306265
-purple   circle   0.03587354936895115    0.13411339541407613
-  blue   square   0.03241153095761152   -0.05350764811965621
-  blue triangle  0.015356427073158612 -0.0006089997461408209
-orange   circle  0.010518953877704181    -0.1627939732927932
-   red triangle   0.00809782571528054    0.01248662135795501
-purple triangle  0.005155190909099739   -0.04505790925621933
-purple   square  -0.02568027696337717   0.057694296479293694
- green   square -0.025776073450284875 -0.0032651732520739014
-orange triangle -0.030456661186085584   -0.13186999819263814
-yellow   circle  -0.06477331572781515     0.0736944981970553
-  blue   circle   -0.1023476190192966  -0.030528539069839333
- green triangle  -0.10901825107358747   -0.04848782060162855
+   red   circle    0.9807984401887236   -0.01856553658708754
+orange   square   0.17685855992752927   -0.07104431573806054
+ green   circle   0.05764419437577255    0.01179572988801509
+   red   square   0.05574477124893523 -0.0006801456507510942
+yellow triangle   0.04457273771962798   0.024604310103081825
+yellow   square   0.04379172927296089   -0.04462197201631237
+purple   circle   0.03587354936895086     0.1341133954140899
+  blue   square   0.03241153095761164  -0.053507648119643196
+  blue triangle  0.015356427073158766 -0.0006089997461435399
+orange   circle  0.010518953877704048   -0.16279397329279383
+   red triangle   0.00809782571528034   0.012486621357942596
+purple triangle  0.005155190909099334  -0.045057909256220656
+purple   square -0.025680276963377404    0.05769429647930396
+ green   square   -0.0257760734502851  -0.003265173252087127
+orange triangle -0.030456661186085785    -0.1318699981926352
+yellow   circle  -0.06477331572781474    0.07369449819706045
+  blue   circle  -0.10234761901929677  -0.030528539069837757
+ green triangle  -0.10901825107358765   -0.04848782060162929
 
diff --git a/docs/src/manpage.md b/docs/src/manpage.md index c458367be..a6220789a 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -940,6 +940,7 @@ MILLER(1) MILLER(1) Useful for doing a well-formatted check on input data. with the exception that warnings are printed to stderr. Current checks are: + * Data are parseable * If any key is the empty string Options: -h|--help Show this message. @@ -3433,5 +3434,5 @@ MILLER(1) MILLER(1) - 2023-06-25 MILLER(1) + 2023-07-02 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 4aad07549..f5f2c3090 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -919,6 +919,7 @@ MILLER(1) MILLER(1) Useful for doing a well-formatted check on input data. with the exception that warnings are printed to stderr. Current checks are: + * Data are parseable * If any key is the empty string Options: -h|--help Show this message. @@ -3412,4 +3413,4 @@ MILLER(1) MILLER(1) - 2023-06-25 MILLER(1) + 2023-07-02 MILLER(1) diff --git a/docs/src/reference-dsl-time.md b/docs/src/reference-dsl-time.md index 680057fb1..d7f0111a3 100644 --- a/docs/src/reference-dsl-time.md +++ b/docs/src/reference-dsl-time.md @@ -246,7 +246,7 @@ Notes: * For `strftime`, this is thanks to [https://github.com/lestrrat-go/strftime](https://github.com/lestrrat-go/strftime), with a Miller-specific modification for fractional seconds. * For `strftime`, this is thanks to [https://github.com/pbnjay/strptime](https://github.com/pbnjay/strptime), with Miller-specific modifications. -Available format strings for `strftime`, taken directly from [https://github.com/lestrrat-go/strftime](https://github.com/lestrrat-go/strftime): +Available format strings for `strftime`, taken directly from [https://github.com/lestrrat-go/strftime](https://github.com/lestrrat-go/strftime) except for `%1..%9`, `%N`, and `%O` which are Miller-specific additions: | Pattern | Description | |---------|-------------| @@ -269,6 +269,8 @@ Available format strings for `strftime`, taken directly from [https://github.com | `%M` | the minute as a decimal number (00-59) | | `%m` | the month as a decimal number (01-12) | | `%n` | a newline | +| `%N` | zero-padded nanoseconds | +| `%O` | non-zero-padded nanoseconds | | `%p` | national representation of either "ante meridiem" (a.m.) or "post meridiem" (p.m.) as appropriate. | | `%R` | equivalent to `%H:%M` | | `%r` | equivalent to `%I:%M:%S %p` | @@ -317,11 +319,15 @@ Examples: mlr -n put 'end { print strftime(0, "%Y-%m-%dT%H:%M:%SZ"); print strftime(0, "%FT%TZ"); + print strfntime(123, "%N"); + print strfntime(123, "%O"); }'
 1970-01-01T00:00:00Z
 1970-01-01T00:00:00Z
+000000123
+123
 
diff --git a/docs/src/reference-dsl-time.md.in b/docs/src/reference-dsl-time.md.in
index 565715b77..9259abb14 100644
--- a/docs/src/reference-dsl-time.md.in
+++ b/docs/src/reference-dsl-time.md.in
@@ -178,7 +178,7 @@ Notes:
   * For `strftime`, this is thanks to [https://github.com/lestrrat-go/strftime](https://github.com/lestrrat-go/strftime), with a Miller-specific modification for fractional seconds.
   * For `strftime`, this is thanks to [https://github.com/pbnjay/strptime](https://github.com/pbnjay/strptime), with Miller-specific modifications.
 
-Available format strings for `strftime`, taken directly from [https://github.com/lestrrat-go/strftime](https://github.com/lestrrat-go/strftime):
+Available format strings for `strftime`, taken directly from [https://github.com/lestrrat-go/strftime](https://github.com/lestrrat-go/strftime) except for `%1..%9`, `%N`, and `%O` which are Miller-specific additions:
 
 | Pattern | Description |
 |---------|-------------|
@@ -201,6 +201,8 @@ Available format strings for `strftime`, taken directly from [https://github.com
 | `%M` | the minute as a decimal number (00-59) |
 | `%m` | the month as a decimal number (01-12) |
 | `%n` | a newline |
+| `%N` | zero-padded nanoseconds |
+| `%O` | non-zero-padded nanoseconds |
 | `%p` | national representation of either "ante meridiem" (a.m.) or "post meridiem" (p.m.) as appropriate. |
 | `%R` | equivalent to `%H:%M` |
 | `%r` | equivalent to `%I:%M:%S %p` |
@@ -249,6 +251,8 @@ GENMD-RUN-COMMAND
 mlr -n put 'end {
   print strftime(0, "%Y-%m-%dT%H:%M:%SZ");
   print strftime(0, "%FT%TZ");
+  print strfntime(123, "%N");
+  print strfntime(123, "%O");
 }'
 GENMD-EOF
 
diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md
index e34c9f788..6607ad219 100644
--- a/docs/src/reference-verbs.md
+++ b/docs/src/reference-verbs.md
@@ -380,6 +380,7 @@ Consumes records without printing any output,
 Useful for doing a well-formatted check on input data.
 with the exception that warnings are printed to stderr.
 Current checks are:
+* Data are parseable
 * If any key is the empty string
 Options:
 -h|--help Show this message.
@@ -3306,14 +3307,14 @@ fields, optionally categorized by one or more fields.
   data/medium
 
-x_y_cov    0.00004257482082749404
-x_y_corr   0.0005042001844473328
-y_y_cov    0.08461122467974005
+x_y_cov    0.000042574820827444476
+x_y_corr   0.0005042001844467462
+y_y_cov    0.08461122467974003
 y_y_corr   1
-x2_xy_cov  0.041883822817793716
-x2_xy_corr 0.6301743420379936
-x2_y2_cov  -0.0003095372596253918
-x2_y2_corr -0.003424908876111875
+x2_xy_cov  0.04188382281779374
+x2_xy_corr 0.630174342037994
+x2_y2_cov  -0.00030953725962542085
+x2_y2_corr -0.0034249088761121966
 
@@ -3322,12 +3323,12 @@ x2_y2_corr -0.003424908876111875
   data/medium
 
-a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
-eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
-wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
-zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
-hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
+a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
+eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
+wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
+zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
+hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
 
Here's an example simple line-fit. The `x` and `y` @@ -3413,11 +3414,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.030097447953594 -upsec_count_pca_b 979.9883413064917 +upsec_count_pca_m -39.03009744795354 +upsec_count_pca_b 979.9883413064914 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.108529196302943 +donesec 25.10852919630297 ## step @@ -3645,9 +3646,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 -var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 -skewness - - 0 -0.0006899591185517494 -0.01784976012013298 +stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 +var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 +skewness - - 0 -0.0006899591185521965 -0.017849760120133784 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index e475aebf3..146f3a81e 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144457 -x_stddev 0.29029251511440074 +x_var 0.08426974433144456 +x_stddev 0.2902925151144007
diff --git a/internal/pkg/bifs/datetime.go b/internal/pkg/bifs/datetime.go
index 47e908b16..f94da527d 100644
--- a/internal/pkg/bifs/datetime.go
+++ b/internal/pkg/bifs/datetime.go
@@ -425,6 +425,18 @@ func init() {
 	appender9 := strftime.AppendFunc(func(b []byte, t time.Time) []byte {
 		return specificationHelper(b, t, "%09d", 1)
 	})
+	appenderN := strftime.AppendFunc(func(b []byte, t time.Time) []byte {
+		nanos := int(t.Nanosecond())
+		s := fmt.Sprintf("%09d", nanos)
+		//return append(b, []byte(s))
+		return append(b, s...)
+	})
+	appenderO := strftime.AppendFunc(func(b []byte, t time.Time) []byte {
+		nanos := int(t.Nanosecond())
+		s := fmt.Sprintf("%d", nanos)
+		//return append(b, []byte(s))
+		return append(b, s...)
+	})
 
 	ss := strftime.NewSpecificationSet()
 	ss.Set('1', appender1)
@@ -436,6 +448,8 @@ func init() {
 	ss.Set('7', appender7)
 	ss.Set('8', appender8)
 	ss.Set('9', appender9)
+	ss.Set('N', appenderN)
+	ss.Set('O', appenderO)
 
 	strftimeExtensions = strftime.WithSpecificationSet(ss)
 }
diff --git a/man/manpage.txt b/man/manpage.txt
index 4aad07549..f5f2c3090 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -919,6 +919,7 @@ MILLER(1)                                                            MILLER(1)
        Useful for doing a well-formatted check on input data.
        with the exception that warnings are printed to stderr.
        Current checks are:
+       * Data are parseable
        * If any key is the empty string
        Options:
        -h|--help Show this message.
@@ -3412,4 +3413,4 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-06-25                         MILLER(1)
+                                  2023-07-02                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index a0bd1c531..8b3fdb2e1 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2023-06-25
+.\"      Date: 2023-07-02
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2023-06-25" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-07-02" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1126,6 +1126,7 @@ Consumes records without printing any output,
 Useful for doing a well-formatted check on input data.
 with the exception that warnings are printed to stderr.
 Current checks are:
+* Data are parseable
 * If any key is the empty string
 Options:
 -h|--help Show this message.
diff --git a/test/cases/dsl-local-date-time-functions/strfntime-istanbul/expout b/test/cases/dsl-local-date-time-functions/strfntime-istanbul/expout
index 057ff69bd..6e6a4997a 100644
--- a/test/cases/dsl-local-date-time-functions/strfntime-istanbul/expout
+++ b/test/cases/dsl-local-date-time-functions/strfntime-istanbul/expout
@@ -10,3 +10,5 @@ TZ is Asia/Istanbul
 1970-01-01 00:00:00 +0000
 1970-01-01 00:00:00 UTC
 1970-01-01 00:00:00 +0000
+00 000123456
+00 123456
diff --git a/test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/expout b/test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/expout
index 36a591e67..d87191609 100644
--- a/test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/expout
+++ b/test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/expout
@@ -10,3 +10,5 @@ TZ is America/Sao_Paulo
 1970-01-01 00:00:00 +0000
 1970-01-01 00:00:00 UTC
 1970-01-01 00:00:00 +0000
+00 000123456
+00 123456
diff --git a/test/cases/dsl-local-date-time-functions/strfntime-utc/expout b/test/cases/dsl-local-date-time-functions/strfntime-utc/expout
index 62b9c1fe6..b6ea47b6f 100644
--- a/test/cases/dsl-local-date-time-functions/strfntime-utc/expout
+++ b/test/cases/dsl-local-date-time-functions/strfntime-utc/expout
@@ -10,3 +10,5 @@ TZ is UTC
 1970-01-01 00:00:00 +0000
 1970-01-01 00:00:00 UTC
 1970-01-01 00:00:00 +0000
+00 000123456
+00 123456
diff --git a/test/cases/dsl-local-date-time-functions/strftime-istanbul/expout b/test/cases/dsl-local-date-time-functions/strftime-istanbul/expout
index b87bc3d3a..6dc7ae793 100644
--- a/test/cases/dsl-local-date-time-functions/strftime-istanbul/expout
+++ b/test/cases/dsl-local-date-time-functions/strftime-istanbul/expout
@@ -10,3 +10,5 @@ TZ is Asia/Istanbul
 1970-01-01 00:00:00 +0000
 1970-01-01 00:00:00 UTC
 1970-01-01 00:00:00 +0000
+00 123456000
+00 123456000
diff --git a/test/cases/dsl-local-date-time-functions/strftime-sao_paulo/expout b/test/cases/dsl-local-date-time-functions/strftime-sao_paulo/expout
index addbf579f..af2973066 100644
--- a/test/cases/dsl-local-date-time-functions/strftime-sao_paulo/expout
+++ b/test/cases/dsl-local-date-time-functions/strftime-sao_paulo/expout
@@ -10,3 +10,5 @@ TZ is America/Sao_Paulo
 1970-01-01 00:00:00 +0000
 1970-01-01 00:00:00 UTC
 1970-01-01 00:00:00 +0000
+00 123456000
+00 123456000
diff --git a/test/cases/dsl-local-date-time-functions/strftime-utc/expout b/test/cases/dsl-local-date-time-functions/strftime-utc/expout
index d47ca354f..ba9190df5 100644
--- a/test/cases/dsl-local-date-time-functions/strftime-utc/expout
+++ b/test/cases/dsl-local-date-time-functions/strftime-utc/expout
@@ -10,3 +10,5 @@ TZ is UTC
 1970-01-01 00:00:00 +0000
 1970-01-01 00:00:00 UTC
 1970-01-01 00:00:00 +0000
+00 123456000
+00 123456000
diff --git a/test/input/strfntime-tz.mlr b/test/input/strfntime-tz.mlr
index 45f507617..0fc2dd72b 100644
--- a/test/input/strfntime-tz.mlr
+++ b/test/input/strfntime-tz.mlr
@@ -13,5 +13,7 @@ end {
   print strfntime(123456, "%Y-%m-%d %H:%M:%S %z");
   print strfntime(0,      "%Y-%m-%d %H:%M:%S %Z");
   print strfntime(0,      "%Y-%m-%d %H:%M:%S %z");
+  print strfntime(123456, "%S %N");
+  print strfntime(123456, "%S %O");
 
 }
diff --git a/test/input/strftime-tz.mlr b/test/input/strftime-tz.mlr
index f789f2fcc..fecb46b77 100644
--- a/test/input/strftime-tz.mlr
+++ b/test/input/strftime-tz.mlr
@@ -13,5 +13,7 @@ end {
   print strftime(0.123456, "%Y-%m-%d %H:%M:%S %z");
   print strftime(0.0,      "%Y-%m-%d %H:%M:%S %Z");
   print strftime(0.0,      "%Y-%m-%d %H:%M:%S %z");
+  print strftime(0.123456, "%S %N");
+  print strftime(0.123456, "%S %O");
 
 }

From b30aceae36ae893482bbe959e152e5fb1f8f50b7 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Tue, 4 Jul 2023 17:00:02 -0400
Subject: [PATCH 015/456] Add `%s` format specifier for `strftime` (#1335)

---
 docs/src/manpage.md                                         | 2 +-
 docs/src/manpage.txt                                        | 2 +-
 docs/src/reference-dsl-time.md                              | 3 ++-
 docs/src/reference-dsl-time.md.in                           | 3 ++-
 internal/pkg/bifs/datetime.go                               | 6 ++++++
 man/manpage.txt                                             | 2 +-
 man/mlr.1                                                   | 4 ++--
 .../dsl-local-date-time-functions/strfntime-istanbul/expout | 2 +-
 .../strfntime-sao_paulo/expout                              | 2 +-
 .../dsl-local-date-time-functions/strfntime-utc/expout      | 2 +-
 .../dsl-local-date-time-functions/strftime-istanbul/expout  | 2 +-
 .../dsl-local-date-time-functions/strftime-sao_paulo/expout | 2 +-
 .../cases/dsl-local-date-time-functions/strftime-utc/expout | 2 +-
 test/input/strfntime-tz.mlr                                 | 2 +-
 test/input/strftime-tz.mlr                                  | 2 +-
 15 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index a6220789a..60b9f783e 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -3434,5 +3434,5 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-07-02                         MILLER(1)
+                                  2023-07-04                         MILLER(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index f5f2c3090..749124fd1 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -3413,4 +3413,4 @@ MILLER(1) MILLER(1) - 2023-07-02 MILLER(1) + 2023-07-04 MILLER(1) diff --git a/docs/src/reference-dsl-time.md b/docs/src/reference-dsl-time.md index d7f0111a3..867bc8dc1 100644 --- a/docs/src/reference-dsl-time.md +++ b/docs/src/reference-dsl-time.md @@ -246,7 +246,7 @@ Notes: * For `strftime`, this is thanks to [https://github.com/lestrrat-go/strftime](https://github.com/lestrrat-go/strftime), with a Miller-specific modification for fractional seconds. * For `strftime`, this is thanks to [https://github.com/pbnjay/strptime](https://github.com/pbnjay/strptime), with Miller-specific modifications. -Available format strings for `strftime`, taken directly from [https://github.com/lestrrat-go/strftime](https://github.com/lestrrat-go/strftime) except for `%1..%9`, `%N`, and `%O` which are Miller-specific additions: +Available format strings for `strftime`, taken directly from [https://github.com/lestrrat-go/strftime](https://github.com/lestrrat-go/strftime) except for `%1..%9`, `%s`, `%N`, and `%O` which are Miller-specific additions: | Pattern | Description | |---------|-------------| @@ -274,6 +274,7 @@ Available format strings for `strftime`, taken directly from [https://github.com | `%p` | national representation of either "ante meridiem" (a.m.) or "post meridiem" (p.m.) as appropriate. | | `%R` | equivalent to `%H:%M` | | `%r` | equivalent to `%I:%M:%S %p` | +| `%s` | integer seconds since the epoch | | `%S` | the second as a decimal number (00-60) | | `%1S`, ..., `%9S` | the second as a decimal number (00-60) with 1..9 decimal places, respectively | | `%T` | equivalent to `%H:%M:%S` | diff --git a/docs/src/reference-dsl-time.md.in b/docs/src/reference-dsl-time.md.in index 9259abb14..e2e02c397 100644 --- a/docs/src/reference-dsl-time.md.in +++ b/docs/src/reference-dsl-time.md.in @@ -178,7 +178,7 @@ Notes: * For `strftime`, this is thanks to [https://github.com/lestrrat-go/strftime](https://github.com/lestrrat-go/strftime), with a Miller-specific modification for fractional seconds. * For `strftime`, this is thanks to [https://github.com/pbnjay/strptime](https://github.com/pbnjay/strptime), with Miller-specific modifications. -Available format strings for `strftime`, taken directly from [https://github.com/lestrrat-go/strftime](https://github.com/lestrrat-go/strftime) except for `%1..%9`, `%N`, and `%O` which are Miller-specific additions: +Available format strings for `strftime`, taken directly from [https://github.com/lestrrat-go/strftime](https://github.com/lestrrat-go/strftime) except for `%1..%9`, `%s`, `%N`, and `%O` which are Miller-specific additions: | Pattern | Description | |---------|-------------| @@ -206,6 +206,7 @@ Available format strings for `strftime`, taken directly from [https://github.com | `%p` | national representation of either "ante meridiem" (a.m.) or "post meridiem" (p.m.) as appropriate. | | `%R` | equivalent to `%H:%M` | | `%r` | equivalent to `%I:%M:%S %p` | +| `%s` | integer seconds since the epoch | | `%S` | the second as a decimal number (00-60) | | `%1S`, ..., `%9S` | the second as a decimal number (00-60) with 1..9 decimal places, respectively | | `%T` | equivalent to `%H:%M:%S` | diff --git a/internal/pkg/bifs/datetime.go b/internal/pkg/bifs/datetime.go index f94da527d..9ab5d3031 100644 --- a/internal/pkg/bifs/datetime.go +++ b/internal/pkg/bifs/datetime.go @@ -437,6 +437,11 @@ func init() { //return append(b, []byte(s)) return append(b, s...) }) + appenderS := strftime.AppendFunc(func(b []byte, t time.Time) []byte { + epochSeconds := t.Unix() + s := fmt.Sprintf("%d", epochSeconds) + return append(b, s...) + }) ss := strftime.NewSpecificationSet() ss.Set('1', appender1) @@ -450,6 +455,7 @@ func init() { ss.Set('9', appender9) ss.Set('N', appenderN) ss.Set('O', appenderO) + ss.Set('s', appenderS) strftimeExtensions = strftime.WithSpecificationSet(ss) } diff --git a/man/manpage.txt b/man/manpage.txt index f5f2c3090..749124fd1 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -3413,4 +3413,4 @@ MILLER(1) MILLER(1) - 2023-07-02 MILLER(1) + 2023-07-04 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index 8b3fdb2e1..8a6079eff 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2023-07-02 +.\" Date: 2023-07-04 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2023-07-02" "\ \&" "\ \&" +.TH "MILLER" "1" "2023-07-04" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/test/cases/dsl-local-date-time-functions/strfntime-istanbul/expout b/test/cases/dsl-local-date-time-functions/strfntime-istanbul/expout index 6e6a4997a..585e2c8ac 100644 --- a/test/cases/dsl-local-date-time-functions/strfntime-istanbul/expout +++ b/test/cases/dsl-local-date-time-functions/strfntime-istanbul/expout @@ -10,5 +10,5 @@ TZ is Asia/Istanbul 1970-01-01 00:00:00 +0000 1970-01-01 00:00:00 UTC 1970-01-01 00:00:00 +0000 -00 000123456 +0 000123456 00 123456 diff --git a/test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/expout b/test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/expout index d87191609..19ad3c083 100644 --- a/test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/expout +++ b/test/cases/dsl-local-date-time-functions/strfntime-sao_paulo/expout @@ -10,5 +10,5 @@ TZ is America/Sao_Paulo 1970-01-01 00:00:00 +0000 1970-01-01 00:00:00 UTC 1970-01-01 00:00:00 +0000 -00 000123456 +0 000123456 00 123456 diff --git a/test/cases/dsl-local-date-time-functions/strfntime-utc/expout b/test/cases/dsl-local-date-time-functions/strfntime-utc/expout index b6ea47b6f..926c6da41 100644 --- a/test/cases/dsl-local-date-time-functions/strfntime-utc/expout +++ b/test/cases/dsl-local-date-time-functions/strfntime-utc/expout @@ -10,5 +10,5 @@ TZ is UTC 1970-01-01 00:00:00 +0000 1970-01-01 00:00:00 UTC 1970-01-01 00:00:00 +0000 -00 000123456 +0 000123456 00 123456 diff --git a/test/cases/dsl-local-date-time-functions/strftime-istanbul/expout b/test/cases/dsl-local-date-time-functions/strftime-istanbul/expout index 6dc7ae793..06c3473f5 100644 --- a/test/cases/dsl-local-date-time-functions/strftime-istanbul/expout +++ b/test/cases/dsl-local-date-time-functions/strftime-istanbul/expout @@ -10,5 +10,5 @@ TZ is Asia/Istanbul 1970-01-01 00:00:00 +0000 1970-01-01 00:00:00 UTC 1970-01-01 00:00:00 +0000 -00 123456000 +0 123456000 00 123456000 diff --git a/test/cases/dsl-local-date-time-functions/strftime-sao_paulo/expout b/test/cases/dsl-local-date-time-functions/strftime-sao_paulo/expout index af2973066..66d9b8480 100644 --- a/test/cases/dsl-local-date-time-functions/strftime-sao_paulo/expout +++ b/test/cases/dsl-local-date-time-functions/strftime-sao_paulo/expout @@ -10,5 +10,5 @@ TZ is America/Sao_Paulo 1970-01-01 00:00:00 +0000 1970-01-01 00:00:00 UTC 1970-01-01 00:00:00 +0000 -00 123456000 +0 123456000 00 123456000 diff --git a/test/cases/dsl-local-date-time-functions/strftime-utc/expout b/test/cases/dsl-local-date-time-functions/strftime-utc/expout index ba9190df5..a8b422e80 100644 --- a/test/cases/dsl-local-date-time-functions/strftime-utc/expout +++ b/test/cases/dsl-local-date-time-functions/strftime-utc/expout @@ -10,5 +10,5 @@ TZ is UTC 1970-01-01 00:00:00 +0000 1970-01-01 00:00:00 UTC 1970-01-01 00:00:00 +0000 -00 123456000 +0 123456000 00 123456000 diff --git a/test/input/strfntime-tz.mlr b/test/input/strfntime-tz.mlr index 0fc2dd72b..06eeded4e 100644 --- a/test/input/strfntime-tz.mlr +++ b/test/input/strfntime-tz.mlr @@ -13,7 +13,7 @@ end { print strfntime(123456, "%Y-%m-%d %H:%M:%S %z"); print strfntime(0, "%Y-%m-%d %H:%M:%S %Z"); print strfntime(0, "%Y-%m-%d %H:%M:%S %z"); - print strfntime(123456, "%S %N"); + print strfntime(123456, "%s %N"); print strfntime(123456, "%S %O"); } diff --git a/test/input/strftime-tz.mlr b/test/input/strftime-tz.mlr index fecb46b77..f36a5ab20 100644 --- a/test/input/strftime-tz.mlr +++ b/test/input/strftime-tz.mlr @@ -13,7 +13,7 @@ end { print strftime(0.123456, "%Y-%m-%d %H:%M:%S %z"); print strftime(0.0, "%Y-%m-%d %H:%M:%S %Z"); print strftime(0.0, "%Y-%m-%d %H:%M:%S %z"); - print strftime(0.123456, "%S %N"); + print strftime(0.123456, "%s %N"); print strftime(0.123456, "%S %O"); } From ff658202143d2a5724c803146fdfa9bb34ea10db Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 5 Jul 2023 07:36:09 -0400 Subject: [PATCH 016/456] Bump golang.org/x/text from 0.10.0 to 0.11.0 (#1337) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.10.0 to 0.11.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.10.0...v0.11.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 6ef5e3c59..de615ec6e 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/stretchr/testify v1.8.4 golang.org/x/sys v0.9.0 golang.org/x/term v0.9.0 - golang.org/x/text v0.10.0 + golang.org/x/text v0.11.0 ) require ( diff --git a/go.sum b/go.sum index aca9a90a6..33a4c64df 100644 --- a/go.sum +++ b/go.sum @@ -42,8 +42,8 @@ golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.9.0 h1:GRRCnKYhdQrD8kfRAdQ6Zcw1P0OcELxGLKJvtjVMZ28= golang.org/x/term v0.9.0/go.mod h1:M6DEAAIenWoTxdKrOltXcmDY3rSplQUkrvaDU5FcQyo= -golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58= -golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4= +golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From 1f69807836e1999a7f493f4571641590580d6a50 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 5 Jul 2023 09:52:53 -0400 Subject: [PATCH 017/456] Bump golang.org/x/sys from 0.9.0 to 0.10.0 (#1336) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.9.0 to 0.10.0. - [Commits](https://github.com/golang/sys/compare/v0.9.0...v0.10.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index de615ec6e..536b6ddbc 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,7 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.8.4 - golang.org/x/sys v0.9.0 + golang.org/x/sys v0.10.0 golang.org/x/term v0.9.0 golang.org/x/text v0.11.0 ) diff --git a/go.sum b/go.sum index 33a4c64df..d1aa86faa 100644 --- a/go.sum +++ b/go.sum @@ -38,8 +38,8 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= -golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.9.0 h1:GRRCnKYhdQrD8kfRAdQ6Zcw1P0OcELxGLKJvtjVMZ28= golang.org/x/term v0.9.0/go.mod h1:M6DEAAIenWoTxdKrOltXcmDY3rSplQUkrvaDU5FcQyo= golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4= From 3e23153aac86b71fbc29de799a2585ded0c21df5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 6 Jul 2023 08:40:13 -0400 Subject: [PATCH 018/456] Bump golang.org/x/term from 0.9.0 to 0.10.0 (#1338) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.9.0 to 0.10.0. - [Commits](https://github.com/golang/term/compare/v0.9.0...v0.10.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 536b6ddbc..c03e8ed3d 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.8.4 golang.org/x/sys v0.10.0 - golang.org/x/term v0.9.0 + golang.org/x/term v0.10.0 golang.org/x/text v0.11.0 ) diff --git a/go.sum b/go.sum index d1aa86faa..17575027f 100644 --- a/go.sum +++ b/go.sum @@ -40,8 +40,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.9.0 h1:GRRCnKYhdQrD8kfRAdQ6Zcw1P0OcELxGLKJvtjVMZ28= -golang.org/x/term v0.9.0/go.mod h1:M6DEAAIenWoTxdKrOltXcmDY3rSplQUkrvaDU5FcQyo= +golang.org/x/term v0.10.0 h1:3R7pNqamzBraeqj/Tj8qt1aQ2HpmlC+Cx/qL/7hn4/c= +golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o= golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4= golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From 7aa94835287ed6336c4038df7b1edac8c29f0160 Mon Sep 17 00:00:00 2001 From: Benson Muite Date: Wed, 12 Jul 2023 15:27:23 +0300 Subject: [PATCH 019/456] Update Fedora link (#1339) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4c6e7a7cd..c1a1977b4 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ There's a good chance you can get Miller pre-built for your system: [![Ubuntu](https://img.shields.io/badge/distros-ubuntu-db4923.svg)](https://launchpad.net/ubuntu/+source/miller) [![Ubuntu 16.04 LTS](https://img.shields.io/badge/distros-ubuntu1604lts-db4923.svg)](https://launchpad.net/ubuntu/xenial/+package/miller) -[![Fedora](https://img.shields.io/badge/distros-fedora-173b70.svg)](https://apps.fedoraproject.org/packages/miller) +[![Fedora](https://img.shields.io/badge/distros-fedora-173b70.svg)](https://packages.fedoraproject.org/pkgs/miller/miller/) [![Debian](https://img.shields.io/badge/distros-debian-c70036.svg)](https://packages.debian.org/stable/miller) [![Gentoo](https://img.shields.io/badge/distros-gentoo-4e4371.svg)](https://packages.gentoo.org/packages/sys-apps/miller) From ad10d16f4edce062a4d389848b86742959bc2f78 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Aug 2023 07:57:37 -0400 Subject: [PATCH 020/456] Bump golang.org/x/sys from 0.10.0 to 0.11.0 (#1347) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.10.0 to 0.11.0. - [Commits](https://github.com/golang/sys/compare/v0.10.0...v0.11.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index c03e8ed3d..0bbf522f6 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,7 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.8.4 - golang.org/x/sys v0.10.0 + golang.org/x/sys v0.11.0 golang.org/x/term v0.10.0 golang.org/x/text v0.11.0 ) diff --git a/go.sum b/go.sum index 17575027f..0789b74c7 100644 --- a/go.sum +++ b/go.sum @@ -38,8 +38,8 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= -golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM= +golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.10.0 h1:3R7pNqamzBraeqj/Tj8qt1aQ2HpmlC+Cx/qL/7hn4/c= golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o= golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4= From f409aa4fd206d8f1dbb0322a1d7888148681965b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Aug 2023 11:42:13 -0400 Subject: [PATCH 021/456] Bump golang.org/x/text from 0.11.0 to 0.12.0 (#1349) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.11.0 to 0.12.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.11.0...v0.12.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 0bbf522f6..46893eb5d 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/stretchr/testify v1.8.4 golang.org/x/sys v0.11.0 golang.org/x/term v0.10.0 - golang.org/x/text v0.11.0 + golang.org/x/text v0.12.0 ) require ( diff --git a/go.sum b/go.sum index 0789b74c7..84c50d04f 100644 --- a/go.sum +++ b/go.sum @@ -42,8 +42,8 @@ golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.10.0 h1:3R7pNqamzBraeqj/Tj8qt1aQ2HpmlC+Cx/qL/7hn4/c= golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o= -golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4= -golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.12.0 h1:k+n5B8goJNdU7hSvEtMUz3d1Q6D/XW4COJSJR6fN0mc= +golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From fcd201d147235bd56ce53f8977c22d8dd935e4ee Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 9 Aug 2023 08:09:08 -0400 Subject: [PATCH 022/456] Bump actions/setup-go from 4.0.1 to 4.1.0 (#1351) Bumps [actions/setup-go](https://github.com/actions/setup-go) from 4.0.1 to 4.1.0. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/fac708d6674e30b6ba41289acaab6d4b75aa0753...93397bea11091df50f3d7e59dc26a7711a8bcfbe) --- updated-dependencies: - dependency-name: actions/setup-go dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 0faa6a934..1d44a7b35 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -18,7 +18,7 @@ jobs: - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 - name: Set up Go - uses: actions/setup-go@fac708d6674e30b6ba41289acaab6d4b75aa0753 + uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe with: go-version: 1.18 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 44b46b8b1..767fcd373 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,7 +17,7 @@ jobs: runs-on: ${{ matrix.platform }} steps: - name: Set up Go - uses: actions/setup-go@fac708d6674e30b6ba41289acaab6d4b75aa0753 + uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe with: go-version: ${{ env.GO_VERSION }} id: go From 52db2bf4222131bd521c6e31fbce2396246e44e3 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Wed, 9 Aug 2023 10:50:26 -0400 Subject: [PATCH 023/456] Small typos in documentation of `mlr nest` (#1352) * Typofix in `nest` documentation * update test/cases/cli-help * artifacts from `make dev` --- docs/src/manpage.md | 6 +++--- docs/src/manpage.txt | 6 +++--- docs/src/reference-verbs.md | 4 ++-- internal/pkg/transformers/nest.go | 4 ++-- man/manpage.txt | 6 +++--- man/mlr.1 | 8 ++++---- test/cases/cli-help/0001/expout | 4 ++-- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 60b9f783e..f2588aeb3 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -1442,8 +1442,8 @@ MILLER(1) MILLER(1) -f {field name} Required. --nested-fs {string} Defaults to ";". Field separator for nested values. --nested-ps {string} Defaults to ":". Pair separator for nested key-value pairs. - --evar {string} Shorthand for --explode --values ---across-records --nested-fs {string} - --ivar {string} Shorthand for --implode --values ---across-records --nested-fs {string} + --evar {string} Shorthand for --explode --values --across-records --nested-fs {string} + --ivar {string} Shorthand for --implode --values --across-records --nested-fs {string} Please use "mlr --usage-separator-options" for information on specifying separators. Examples: @@ -3434,5 +3434,5 @@ MILLER(1) MILLER(1) - 2023-07-04 MILLER(1) + 2023-08-09 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 749124fd1..a2690e7bb 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1421,8 +1421,8 @@ MILLER(1) MILLER(1) -f {field name} Required. --nested-fs {string} Defaults to ";". Field separator for nested values. --nested-ps {string} Defaults to ":". Pair separator for nested key-value pairs. - --evar {string} Shorthand for --explode --values ---across-records --nested-fs {string} - --ivar {string} Shorthand for --implode --values ---across-records --nested-fs {string} + --evar {string} Shorthand for --explode --values --across-records --nested-fs {string} + --ivar {string} Shorthand for --implode --values --across-records --nested-fs {string} Please use "mlr --usage-separator-options" for information on specifying separators. Examples: @@ -3413,4 +3413,4 @@ MILLER(1) MILLER(1) - 2023-07-04 MILLER(1) + 2023-08-09 MILLER(1) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 6607ad219..998900ddf 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -2188,8 +2188,8 @@ Options: -f {field name} Required. --nested-fs {string} Defaults to ";". Field separator for nested values. --nested-ps {string} Defaults to ":". Pair separator for nested key-value pairs. - --evar {string} Shorthand for --explode --values ---across-records --nested-fs {string} - --ivar {string} Shorthand for --implode --values ---across-records --nested-fs {string} + --evar {string} Shorthand for --explode --values --across-records --nested-fs {string} + --ivar {string} Shorthand for --implode --values --across-records --nested-fs {string} Please use "mlr --usage-separator-options" for information on specifying separators. Examples: diff --git a/internal/pkg/transformers/nest.go b/internal/pkg/transformers/nest.go index 29a034989..c8e98d486 100644 --- a/internal/pkg/transformers/nest.go +++ b/internal/pkg/transformers/nest.go @@ -40,8 +40,8 @@ func transformerNestUsage( fmt.Fprintf(o, " -f {field name} Required.\n") fmt.Fprintf(o, " --nested-fs {string} Defaults to \";\". Field separator for nested values.\n") fmt.Fprintf(o, " --nested-ps {string} Defaults to \":\". Pair separator for nested key-value pairs.\n") - fmt.Fprintf(o, " --evar {string} Shorthand for --explode --values ---across-records --nested-fs {string}\n") - fmt.Fprintf(o, " --ivar {string} Shorthand for --implode --values ---across-records --nested-fs {string}\n") + fmt.Fprintf(o, " --evar {string} Shorthand for --explode --values --across-records --nested-fs {string}\n") + fmt.Fprintf(o, " --ivar {string} Shorthand for --implode --values --across-records --nested-fs {string}\n") fmt.Fprintf(o, "Please use \"%s --usage-separator-options\" for information on specifying separators.\n", argv0) diff --git a/man/manpage.txt b/man/manpage.txt index 749124fd1..a2690e7bb 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -1421,8 +1421,8 @@ MILLER(1) MILLER(1) -f {field name} Required. --nested-fs {string} Defaults to ";". Field separator for nested values. --nested-ps {string} Defaults to ":". Pair separator for nested key-value pairs. - --evar {string} Shorthand for --explode --values ---across-records --nested-fs {string} - --ivar {string} Shorthand for --implode --values ---across-records --nested-fs {string} + --evar {string} Shorthand for --explode --values --across-records --nested-fs {string} + --ivar {string} Shorthand for --implode --values --across-records --nested-fs {string} Please use "mlr --usage-separator-options" for information on specifying separators. Examples: @@ -3413,4 +3413,4 @@ MILLER(1) MILLER(1) - 2023-07-04 MILLER(1) + 2023-08-09 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index 8a6079eff..0f43ac245 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2023-07-04 +.\" Date: 2023-08-09 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2023-07-04" "\ \&" "\ \&" +.TH "MILLER" "1" "2023-08-09" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1796,8 +1796,8 @@ Options: -f {field name} Required. --nested-fs {string} Defaults to ";". Field separator for nested values. --nested-ps {string} Defaults to ":". Pair separator for nested key-value pairs. - --evar {string} Shorthand for --explode --values ---across-records --nested-fs {string} - --ivar {string} Shorthand for --implode --values ---across-records --nested-fs {string} + --evar {string} Shorthand for --explode --values --across-records --nested-fs {string} + --ivar {string} Shorthand for --implode --values --across-records --nested-fs {string} Please use "mlr --usage-separator-options" for information on specifying separators. Examples: diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index 398f42702..d6f70fe41 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -597,8 +597,8 @@ Options: -f {field name} Required. --nested-fs {string} Defaults to ";". Field separator for nested values. --nested-ps {string} Defaults to ":". Pair separator for nested key-value pairs. - --evar {string} Shorthand for --explode --values ---across-records --nested-fs {string} - --ivar {string} Shorthand for --implode --values ---across-records --nested-fs {string} + --evar {string} Shorthand for --explode --values --across-records --nested-fs {string} + --ivar {string} Shorthand for --implode --values --across-records --nested-fs {string} Please use "mlr --usage-separator-options" for information on specifying separators. Examples: From e62a09e9b9cca8b91e3794c5d63e20c9dc7a434f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 11 Aug 2023 00:53:57 -0400 Subject: [PATCH 024/456] Bump goreleaser/goreleaser-action from 4.3.0 to 4.4.0 (#1354) Bumps [goreleaser/goreleaser-action](https://github.com/goreleaser/goreleaser-action) from 4.3.0 to 4.4.0. - [Release notes](https://github.com/goreleaser/goreleaser-action/releases) - [Commits](https://github.com/goreleaser/goreleaser-action/compare/336e29918d653399e599bfca99fadc1d7ffbc9f7...3fa32b8bb5620a2c1afe798654bbad59f9da4906) --- updated-dependencies: - dependency-name: goreleaser/goreleaser-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 767fcd373..9a04cf966 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -40,7 +40,7 @@ jobs: # https://goreleaser.com/ci/actions/ - name: Run GoReleaser - uses: goreleaser/goreleaser-action@336e29918d653399e599bfca99fadc1d7ffbc9f7 + uses: goreleaser/goreleaser-action@3fa32b8bb5620a2c1afe798654bbad59f9da4906 #if: startsWith(github.ref, 'refs/tags/v') with: version: latest From c1572f4787f6b30d18908474beb0345a97375b96 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 19 Aug 2023 12:22:54 -0400 Subject: [PATCH 025/456] Bump golang.org/x/term from 0.10.0 to 0.11.0 (#1348) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.10.0 to 0.11.0. - [Commits](https://github.com/golang/term/compare/v0.10.0...v0.11.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 46893eb5d..b9e11f7eb 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.8.4 golang.org/x/sys v0.11.0 - golang.org/x/term v0.10.0 + golang.org/x/term v0.11.0 golang.org/x/text v0.12.0 ) diff --git a/go.sum b/go.sum index 84c50d04f..e896c8f4e 100644 --- a/go.sum +++ b/go.sum @@ -40,8 +40,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.10.0 h1:3R7pNqamzBraeqj/Tj8qt1aQ2HpmlC+Cx/qL/7hn4/c= -golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o= +golang.org/x/term v0.11.0 h1:F9tnn/DA/Im8nCwm+fX+1/eBwi4qFjRT++MhtVC4ZX0= +golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU= golang.org/x/text v0.12.0 h1:k+n5B8goJNdU7hSvEtMUz3d1Q6D/XW4COJSJR6fN0mc= golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From 8b22708c274dc95270133aebb6efdce73aab7846 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Aug 2023 13:32:09 -0400 Subject: [PATCH 026/456] Support comments in `mlr -s` files (#1359) * Support comments in `mlr -s` files * doc mods * artifacts from `make dev` * neaten --- docs/src/data-diving-examples.md | 46 +++++++++++++------------- docs/src/example-mlr-s-script | 2 +- docs/src/manpage.md | 7 +++- docs/src/manpage.txt | 7 +++- docs/src/reference-main-flag-list.md | 1 + docs/src/reference-verbs.md | 38 ++++++++++----------- docs/src/scripting.md | 3 +- docs/src/scripting.md.in | 1 + docs/src/two-pass-algorithms.md | 4 +-- internal/pkg/cli/option_parse.go | 11 ++++++ internal/pkg/climain/mlrcli_shebang.go | 18 +++++++--- man/manpage.txt | 7 +++- man/mlr.1 | 9 +++-- 13 files changed, 99 insertions(+), 55 deletions(-) diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 39738f193..100716ec2 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -160,11 +160,11 @@ CITRUS COUNTY 1332.9 79974.9 483785.1 stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
-tiv_2011_tiv_2012_corr  0.9730497632351692
-tiv_2011_tiv_2012_ols_m 0.9835583980337723
-tiv_2011_tiv_2012_ols_b 433854.6428968317
+tiv_2011_tiv_2012_corr  0.9730497632351701
+tiv_2011_tiv_2012_ols_m 0.9835583980337732
+tiv_2011_tiv_2012_ols_b 433854.6428968301
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320189
+tiv_2011_tiv_2012_r2    0.9468258417320204
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199866178
+0.1334180491027861 -0.011319841199852926
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887236   -0.01856553658708754
-orange   square   0.17685855992752927   -0.07104431573806054
- green   circle   0.05764419437577255    0.01179572988801509
-   red   square   0.05574477124893523 -0.0006801456507510942
-yellow triangle   0.04457273771962798   0.024604310103081825
-yellow   square   0.04379172927296089   -0.04462197201631237
-purple   circle   0.03587354936895086     0.1341133954140899
-  blue   square   0.03241153095761164  -0.053507648119643196
-  blue triangle  0.015356427073158766 -0.0006089997461435399
-orange   circle  0.010518953877704048   -0.16279397329279383
-   red triangle   0.00809782571528034   0.012486621357942596
-purple triangle  0.005155190909099334  -0.045057909256220656
-purple   square -0.025680276963377404    0.05769429647930396
- green   square   -0.0257760734502851  -0.003265173252087127
-orange triangle -0.030456661186085785    -0.1318699981926352
-yellow   circle  -0.06477331572781474    0.07369449819706045
-  blue   circle  -0.10234761901929677  -0.030528539069837757
- green triangle  -0.10901825107358765   -0.04848782060162929
+   red   circle    0.9807984401887242  -0.018565536587084836
+orange   square   0.17685855992752933   -0.07104431573805543
+ green   circle   0.05764419437577257   0.011795729888018455
+   red   square    0.0557447712489348 -0.0006801456507506415
+yellow triangle    0.0445727377196281   0.024604310103079844
+yellow   square    0.0437917292729612  -0.044621972016306265
+purple   circle   0.03587354936895115    0.13411339541407613
+  blue   square   0.03241153095761152   -0.05350764811965621
+  blue triangle  0.015356427073158612 -0.0006089997461408209
+orange   circle  0.010518953877704181    -0.1627939732927932
+   red triangle   0.00809782571528054    0.01248662135795501
+purple triangle  0.005155190909099739   -0.04505790925621933
+purple   square  -0.02568027696337717   0.057694296479293694
+ green   square -0.025776073450284875 -0.0032651732520739014
+orange triangle -0.030456661186085584   -0.13186999819263814
+yellow   circle  -0.06477331572781515     0.0736944981970553
+  blue   circle   -0.1023476190192966  -0.030528539069839333
+ green triangle  -0.10901825107358747   -0.04848782060162855
 
diff --git a/docs/src/example-mlr-s-script b/docs/src/example-mlr-s-script index 7b9cdb972..50e3f5db4 100755 --- a/docs/src/example-mlr-s-script +++ b/docs/src/example-mlr-s-script @@ -1,5 +1,5 @@ #!/usr/bin/env mlr -s --c2p -filter '$quantity != 20' +filter '$quantity != 20' # Here is a comment then count-distinct -f shape then fraction -f count diff --git a/docs/src/manpage.md b/docs/src/manpage.md index f2588aeb3..74d7c6c9a 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -572,6 +572,11 @@ MILLER(1) MILLER(1) to be modified, except when input is from `tail -f`. See also https://miller.readthedocs.io/en/latest/reference-main-flag-list/. + --s-no-comment-strip {file name} + Take command-line flags from file name, like -s, but + with no comment-stripping. For more information + please see + https://miller.readthedocs.io/en/latest/scripting/. --seed {n} with `n` of the form `12345678` or `0xcafefeed`. For `put`/`filter` `urand`, `urandint`, and `urand32`. --tz {timezone} Specify timezone, overriding `$TZ` environment @@ -3434,5 +3439,5 @@ MILLER(1) MILLER(1) - 2023-08-09 MILLER(1) + 2023-08-19 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index a2690e7bb..8d79e4f60 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -551,6 +551,11 @@ MILLER(1) MILLER(1) to be modified, except when input is from `tail -f`. See also https://miller.readthedocs.io/en/latest/reference-main-flag-list/. + --s-no-comment-strip {file name} + Take command-line flags from file name, like -s, but + with no comment-stripping. For more information + please see + https://miller.readthedocs.io/en/latest/scripting/. --seed {n} with `n` of the form `12345678` or `0xcafefeed`. For `put`/`filter` `urand`, `urandint`, and `urand32`. --tz {timezone} Specify timezone, overriding `$TZ` environment @@ -3413,4 +3418,4 @@ MILLER(1) MILLER(1) - 2023-08-09 MILLER(1) + 2023-08-19 MILLER(1) diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index 24e2cbc38..b07c0847e 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -281,6 +281,7 @@ These are flags which don't fit into any other category. * `--ofmtf {n}`: Use --ofmtf 6 as shorthand for --ofmt %.6f, etc. * `--ofmtg {n}`: Use --ofmtg 6 as shorthand for --ofmt %.6g, etc. * `--records-per-batch {n}`: This is an internal parameter for maximum number of records in a batch size. Normally this does not need to be modified, except when input is from `tail -f`. See also https://miller.readthedocs.io/en/latest/reference-main-flag-list/. +* `--s-no-comment-strip {file name}`: Take command-line flags from file name, like -s, but with no comment-stripping. For more information please see https://miller.readthedocs.io/en/latest/scripting/. * `--seed {n}`: with `n` of the form `12345678` or `0xcafefeed`. For `put`/`filter` `urand`, `urandint`, and `urand32`. * `--tz {timezone}`: Specify timezone, overriding `$TZ` environment variable (if any). * `-I`: Process files in-place. For each file name on the command line, output is written to a temp file in the same directory, which is then renamed over the original. Each file is processed in isolation: if the output format is CSV, CSV headers will be present in each output file, statistics are only over each file's own records; and so on. diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 998900ddf..a9abbcfe5 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -3307,14 +3307,14 @@ fields, optionally categorized by one or more fields. data/medium
-x_y_cov    0.000042574820827444476
-x_y_corr   0.0005042001844467462
-y_y_cov    0.08461122467974003
+x_y_cov    0.00004257482082749404
+x_y_corr   0.0005042001844473328
+y_y_cov    0.08461122467974005
 y_y_corr   1
-x2_xy_cov  0.04188382281779374
-x2_xy_corr 0.630174342037994
-x2_y2_cov  -0.00030953725962542085
-x2_y2_corr -0.0034249088761121966
+x2_xy_cov  0.041883822817793716
+x2_xy_corr 0.6301743420379936
+x2_y2_cov  -0.0003095372596253918
+x2_y2_corr -0.003424908876111875
 
@@ -3323,12 +3323,12 @@ x2_y2_corr -0.0034249088761121966
   data/medium
 
-a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
-eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
-wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
-zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
-hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
+a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
+eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
+wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
+zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
+hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
 
Here's an example simple line-fit. The `x` and `y` @@ -3414,11 +3414,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.03009744795354 -upsec_count_pca_b 979.9883413064914 +upsec_count_pca_m -39.030097447953594 +upsec_count_pca_b 979.9883413064917 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.10852919630297 +donesec 25.108529196302943 ## step @@ -3646,9 +3646,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 -var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 -skewness - - 0 -0.0006899591185521965 -0.017849760120133784 +stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 +var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 +skewness - - 0 -0.0006899591185517494 -0.01784976012013298 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/scripting.md b/docs/src/scripting.md index 29cac3fb7..71c6b22a0 100644 --- a/docs/src/scripting.md +++ b/docs/src/scripting.md @@ -137,7 +137,7 @@ Here instead of putting `#!/bin/bash` on the first line, we can put `mlr` direct
 #!/usr/bin/env mlr -s
 --c2p
-filter '$quantity != 20'
+filter '$quantity != 20' # Here is a comment
 then count-distinct -f shape
 then fraction -f count
 
@@ -149,6 +149,7 @@ Points: * You leave off the initial `mlr` since that's present on line 1. * You don't need all the backslashing for line-continuations. * You don't need the explicit `--` or `"$@"`. +* All text from `#` to end of line is stripped out. If for any reason you need to suppress this, please use `mlr --s-no-comment-strip` in place of `mlr -s`. Then you can do diff --git a/docs/src/scripting.md.in b/docs/src/scripting.md.in index 0e4afc9ac..3234c9398 100644 --- a/docs/src/scripting.md.in +++ b/docs/src/scripting.md.in @@ -67,6 +67,7 @@ Points: * You leave off the initial `mlr` since that's present on line 1. * You don't need all the backslashing for line-continuations. * You don't need the explicit `--` or `"$@"`. +* All text from `#` to end of line is stripped out. If for any reason you need to suppress this, please use `mlr --s-no-comment-strip` in place of `mlr -s`. Then you can do diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index 146f3a81e..e475aebf3 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144456 -x_stddev 0.2902925151144007 +x_var 0.08426974433144457 +x_stddev 0.29029251511440074
diff --git a/internal/pkg/cli/option_parse.go b/internal/pkg/cli/option_parse.go
index cb01c2741..034f3a690 100644
--- a/internal/pkg/cli/option_parse.go
+++ b/internal/pkg/cli/option_parse.go
@@ -2988,5 +2988,16 @@ has its own overhead.`,
 				*pargi += 2
 			},
 		},
+
+		{
+			name: "--s-no-comment-strip",
+			arg:  "{file name}",
+			help: `Take command-line flags from file name, like -s, but with no comment-stripping. For more information please see ` +
+				lib.DOC_URL + `/en/latest/scripting/.`,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				// Already handled in main(). Nothing to do here except to accept this as valid syntax.
+				*pargi += 2
+			},
+		},
 	},
 }
diff --git a/internal/pkg/climain/mlrcli_shebang.go b/internal/pkg/climain/mlrcli_shebang.go
index 99811e6a8..e465ed2b7 100644
--- a/internal/pkg/climain/mlrcli_shebang.go
+++ b/internal/pkg/climain/mlrcli_shebang.go
@@ -3,6 +3,7 @@ package climain
 import (
 	"fmt"
 	"io/ioutil"
+	"regexp"
 	"strings"
 
 	"github.com/johnkerl/miller/internal/pkg/lib"
@@ -25,10 +26,16 @@ import (
 // * This is how shebang lines work
 // * There are Miller verbs with -s flags and we don't want to disrupt their behavior.
 func maybeInterpolateDashS(args []string) ([]string, error) {
+	stripComments := true
+
 	if len(args) < 2 {
 		return args, nil
 	}
-	if args[1] != "-s" { // Normal case
+	if args[1] == "-s" {
+		stripComments = true
+	} else if args[1] == "--s-no-comment-strip" {
+		stripComments = false
+	} else { // Normal case
 		return args, nil
 	}
 	if len(args) < 3 {
@@ -59,9 +66,12 @@ func maybeInterpolateDashS(args []string) ([]string, error) {
 		}
 	}
 
-	// TODO: maybe support comment lines deeper within the script-file.
-	// Make sure they're /^[\s]+#/ since we don't want to disrupt a "#" within
-	// strings which are not actually comment characters.
+	if stripComments {
+		re := regexp.MustCompile(`#.*`)
+		for i, _ := range lines {
+			lines[i] = re.ReplaceAllString(lines[i], "")
+		}
+	}
 
 	// Re-join lines to strings, and pass off to a shell-parser to split into
 	// an args[]-style array.
diff --git a/man/manpage.txt b/man/manpage.txt
index a2690e7bb..8d79e4f60 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -551,6 +551,11 @@ MILLER(1)                                                            MILLER(1)
                                 to be modified, except when input is from `tail -f`.
                                 See also
                                 https://miller.readthedocs.io/en/latest/reference-main-flag-list/.
+       --s-no-comment-strip {file name}
+                                Take command-line flags from file name, like -s, but
+                                with no comment-stripping. For more information
+                                please see
+                                https://miller.readthedocs.io/en/latest/scripting/.
        --seed {n}               with `n` of the form `12345678` or `0xcafefeed`. For
                                 `put`/`filter` `urand`, `urandint`, and `urand32`.
        --tz {timezone}          Specify timezone, overriding `$TZ` environment
@@ -3413,4 +3418,4 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-08-09                         MILLER(1)
+                                  2023-08-19                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 0f43ac245..1d25bb6c0 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2023-08-09
+.\"      Date: 2023-08-19
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2023-08-09" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-08-19" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -670,6 +670,11 @@ These are flags which don't fit into any other category.
                          to be modified, except when input is from `tail -f`.
                          See also
                          https://miller.readthedocs.io/en/latest/reference-main-flag-list/.
+--s-no-comment-strip {file name}
+                         Take command-line flags from file name, like -s, but
+                         with no comment-stripping. For more information
+                         please see
+                         https://miller.readthedocs.io/en/latest/scripting/.
 --seed {n}               with `n` of the form `12345678` or `0xcafefeed`. For
                          `put`/`filter` `urand`, `urandint`, and `urand32`.
 --tz {timezone}          Specify timezone, overriding `$TZ` environment

From d4a3bf99b22cda273d551f9424db579836e8846a Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 19 Aug 2023 15:22:59 -0400
Subject: [PATCH 027/456] Support ZSTD compression in-process (#1360)

* Support ZSTD compression in-process

* doc mods

* unit-test cases

* doc-gen artifacts
---
 docs/src/customization.md                     |   2 +-
 docs/src/customization.md.in                  |   2 +-
 docs/src/glossary.md                          |   5 +++
 docs/src/glossary.md.in                       |   5 +++
 docs/src/manpage.md                           |   8 +++--
 docs/src/manpage.txt                          |   8 +++--
 docs/src/new-in-miller-6.md                   |   2 +-
 docs/src/new-in-miller-6.md.in                |   2 +-
 docs/src/reference-main-compressed-data.md    |  12 +++----
 docs/src/reference-main-compressed-data.md.in |  12 +++----
 docs/src/reference-main-flag-list.md          |   6 ++--
 go.mod                                        |   1 +
 go.sum                                        |   2 ++
 internal/pkg/cli/option_parse.go              |  24 +++++++++++--
 internal/pkg/lib/file_readers.go              |  33 ++++++++++++++++++
 man/manpage.txt                               |   8 +++--
 man/mlr.1                                     |   8 +++--
 test/cases/io-compressed-input/0014/cmd       |   1 +
 test/cases/io-compressed-input/0014/experr    |   0
 test/cases/io-compressed-input/0014/expout    |   5 +++
 test/cases/io-compressed-input/0015/cmd       |   1 +
 test/cases/io-compressed-input/0015/experr    |   0
 test/cases/io-compressed-input/0015/expout    |   5 +++
 test/cases/io-compressed-input/0016/cmd       |   1 +
 test/cases/io-compressed-input/0016/experr    |   0
 test/cases/io-compressed-input/0016/expout    |   5 +++
 test/input/medium.zst                         | Bin 0 -> 957 bytes
 27 files changed, 130 insertions(+), 28 deletions(-)
 create mode 100644 test/cases/io-compressed-input/0014/cmd
 create mode 100644 test/cases/io-compressed-input/0014/experr
 create mode 100644 test/cases/io-compressed-input/0014/expout
 create mode 100644 test/cases/io-compressed-input/0015/cmd
 create mode 100644 test/cases/io-compressed-input/0015/experr
 create mode 100644 test/cases/io-compressed-input/0015/expout
 create mode 100644 test/cases/io-compressed-input/0016/cmd
 create mode 100644 test/cases/io-compressed-input/0016/experr
 create mode 100644 test/cases/io-compressed-input/0016/expout
 create mode 100644 test/input/medium.zst

diff --git a/docs/src/customization.md b/docs/src/customization.md
index 5a787ad4f..cbc69928f 100644
--- a/docs/src/customization.md
+++ b/docs/src/customization.md
@@ -50,7 +50,7 @@ and the `--csv` part will automatically be understood. If you do want to process
 
 * You can include any command-line flags, except the "terminal" ones such as `--help`.
 
-* The `--prepipe`, `--load`, and `--mload` flags aren't allowed in `.mlrrc` as they control code execution, and could result in your scripts running things you don't expect if you receive data from someone with a `./.mlrrc` in it. You can use `--prepipe-bz2`, `--prepipe-gunzip`, and `--prepipe-zcat` in `.mlrrc`, though.
+* The `--prepipe`, `--load`, and `--mload` flags aren't allowed in `.mlrrc` as they control code execution, and could result in your scripts running things you don't expect if you receive data from someone with a `./.mlrrc` in it. You can use `--prepipe-bz2`, `--prepipe-gunzip`, `--prepipe-zcat`, and `--prepipe-zstdcat` in `.mlrrc`, though.
 
 * The formatting rule is you need to put one flag beginning with `--` per line: for example, `--csv` on one line and `--nr-progress-mod 1000` on a separate line.
 
diff --git a/docs/src/customization.md.in b/docs/src/customization.md.in
index 9a1d2894b..00367b2f7 100644
--- a/docs/src/customization.md.in
+++ b/docs/src/customization.md.in
@@ -34,7 +34,7 @@ and the `--csv` part will automatically be understood. If you do want to process
 
 * You can include any command-line flags, except the "terminal" ones such as `--help`.
 
-* The `--prepipe`, `--load`, and `--mload` flags aren't allowed in `.mlrrc` as they control code execution, and could result in your scripts running things you don't expect if you receive data from someone with a `./.mlrrc` in it. You can use `--prepipe-bz2`, `--prepipe-gunzip`, and `--prepipe-zcat` in `.mlrrc`, though.
+* The `--prepipe`, `--load`, and `--mload` flags aren't allowed in `.mlrrc` as they control code execution, and could result in your scripts running things you don't expect if you receive data from someone with a `./.mlrrc` in it. You can use `--prepipe-bz2`, `--prepipe-gunzip`, `--prepipe-zcat`, and `--prepipe-zstdcat` in `.mlrrc`, though.
 
 * The formatting rule is you need to put one flag beginning with `--` per line: for example, `--csv` on one line and `--nr-progress-mod 1000` on a separate line.
 
diff --git a/docs/src/glossary.md b/docs/src/glossary.md
index bb731297b..774975c41 100644
--- a/docs/src/glossary.md
+++ b/docs/src/glossary.md
@@ -905,3 +905,8 @@ See also the [arrays page](reference-main-arrays.md), as well as the page on
 
 A [data-compression format supported by Miller](reference-main-compressed-data.md).
 Files compressed using ZLIB compression normally end in `.z`.
+
+## ZSTD / .zst
+
+A [data-compression format supported by Miller](reference-main-compressed-data.md).
+Files compressed using ZSTD compression normally end in`.zst`.
diff --git a/docs/src/glossary.md.in b/docs/src/glossary.md.in
index 7e03b7d11..b8eb8f417 100644
--- a/docs/src/glossary.md.in
+++ b/docs/src/glossary.md.in
@@ -889,3 +889,8 @@ See also the [arrays page](reference-main-arrays.md), as well as the page on
 
 A [data-compression format supported by Miller](reference-main-compressed-data.md).
 Files compressed using ZLIB compression normally end in `.z`.
+
+## ZSTD / .zst
+
+A [data-compression format supported by Miller](reference-main-compressed-data.md).
+Files compressed using ZSTD compression normally end in`.zst`.
diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 74d7c6c9a..aad8a4f50 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -262,7 +262,7 @@ MILLER(1)                                                            MILLER(1)
        Miller offers a few different ways to handle reading data files
             which have been compressed.
 
-       * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin`
+       * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin``--zstdin`
        * Decompression done outside the Miller process: `--prepipe` `--prepipex`
 
        Using `--prepipe` and `--prepipex` you can specify an action to be
@@ -285,7 +285,7 @@ MILLER(1)                                                            MILLER(1)
 
        Lastly, note that if `--prepipe` or `--prepipex` is specified, it replaces any
        decisions that might have been made based on the file suffix. Likewise,
-       `--gzin`/`--bz2in`/`--zin` are ignored if `--prepipe` is also specified.
+       `--gzin`/`--bz2in`/`--zin``--zin` are ignored if `--prepipe` is also specified.
 
        --bz2in                  Uncompress bzip2 within the Miller process. Done by
                                 default if file ends in `.bz2`.
@@ -302,6 +302,8 @@ MILLER(1)                                                            MILLER(1)
                                 `.mlrrc`.
        --prepipe-zcat           Same as `--prepipe zcat`, except this is allowed in
                                 `.mlrrc`.
+       --prepipe-zstdcat        Same as `--prepipe zstdcat`, except this is allowed
+                                in `.mlrrc`.
        --prepipex {decompression command}
                                 Like `--prepipe` with one exception: doesn't insert
                                 `<` between command and filename at runtime. Useful
@@ -310,6 +312,8 @@ MILLER(1)                                                            MILLER(1)
                                 in `.mlrrc` to avoid unexpected code execution.
        --zin                    Uncompress zlib within the Miller process. Done by
                                 default if file ends in `.z`.
+       --zstdin                 Uncompress zstd within the Miller process. Done by
+                                default if file ends in `.zstd`.
 
 1mCSV/TSV-ONLY FLAGS0m
        These are flags which are applicable to CSV format.
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt
index 8d79e4f60..1d5912853 100644
--- a/docs/src/manpage.txt
+++ b/docs/src/manpage.txt
@@ -241,7 +241,7 @@ MILLER(1)                                                            MILLER(1)
        Miller offers a few different ways to handle reading data files
             which have been compressed.
 
-       * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin`
+       * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin``--zstdin`
        * Decompression done outside the Miller process: `--prepipe` `--prepipex`
 
        Using `--prepipe` and `--prepipex` you can specify an action to be
@@ -264,7 +264,7 @@ MILLER(1)                                                            MILLER(1)
 
        Lastly, note that if `--prepipe` or `--prepipex` is specified, it replaces any
        decisions that might have been made based on the file suffix. Likewise,
-       `--gzin`/`--bz2in`/`--zin` are ignored if `--prepipe` is also specified.
+       `--gzin`/`--bz2in`/`--zin``--zin` are ignored if `--prepipe` is also specified.
 
        --bz2in                  Uncompress bzip2 within the Miller process. Done by
                                 default if file ends in `.bz2`.
@@ -281,6 +281,8 @@ MILLER(1)                                                            MILLER(1)
                                 `.mlrrc`.
        --prepipe-zcat           Same as `--prepipe zcat`, except this is allowed in
                                 `.mlrrc`.
+       --prepipe-zstdcat        Same as `--prepipe zstdcat`, except this is allowed
+                                in `.mlrrc`.
        --prepipex {decompression command}
                                 Like `--prepipe` with one exception: doesn't insert
                                 `<` between command and filename at runtime. Useful
@@ -289,6 +291,8 @@ MILLER(1)                                                            MILLER(1)
                                 in `.mlrrc` to avoid unexpected code execution.
        --zin                    Uncompress zlib within the Miller process. Done by
                                 default if file ends in `.z`.
+       --zstdin                 Uncompress zstd within the Miller process. Done by
+                                default if file ends in `.zstd`.
 
 1mCSV/TSV-ONLY FLAGS0m
        These are flags which are applicable to CSV format.
diff --git a/docs/src/new-in-miller-6.md b/docs/src/new-in-miller-6.md
index 3170819c9..32633b6f8 100644
--- a/docs/src/new-in-miller-6.md
+++ b/docs/src/new-in-miller-6.md
@@ -143,7 +143,7 @@ the `TZ` environment variable. Please see [DSL datetime/timezone functions](refe
 
 ### In-process support for compressed input
 
-In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z` and `.bz2` files.  Please see the page on [Compressed data](reference-main-compressed-data.md) for more information.
+In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z`, `.bz2`, and `.zst` files.  Please see the page on [Compressed data](reference-main-compressed-data.md) for more information.
 
 ### Support for reading web URLs
 
diff --git a/docs/src/new-in-miller-6.md.in b/docs/src/new-in-miller-6.md.in
index 43ea44d90..c450a9622 100644
--- a/docs/src/new-in-miller-6.md.in
+++ b/docs/src/new-in-miller-6.md.in
@@ -125,7 +125,7 @@ the `TZ` environment variable. Please see [DSL datetime/timezone functions](refe
 
 ### In-process support for compressed input
 
-In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z` and `.bz2` files.  Please see the page on [Compressed data](reference-main-compressed-data.md) for more information.
+In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z`, `.bz2`, and `.zst` files.  Please see the page on [Compressed data](reference-main-compressed-data.md) for more information.
 
 ### Support for reading web URLs
 
diff --git a/docs/src/reference-main-compressed-data.md b/docs/src/reference-main-compressed-data.md
index a54ed8026..729cf5bbc 100644
--- a/docs/src/reference-main-compressed-data.md
+++ b/docs/src/reference-main-compressed-data.md
@@ -16,13 +16,13 @@ Quick links:
 
 # Compressed data
 
-As of [Miller 6](new-in-miller-6.md), Miller supports reading GZIP, BZIP2, and
-ZLIB formats transparently, and in-process. And (as before Miller 6) you have a
+As of [Miller 6](new-in-miller-6.md), Miller supports reading GZIP, BZIP2, ZLIB, and
+ZSTD formats transparently, and in-process. And (as before Miller 6) you have a
 more general `--prepipe` option to support other decompression programs.
 
 ## Automatic detection on input
 
-If your files end in `.gz`, `.bz2`, or `.z` then Miller will autodetect by file extension:
+If your files end in `.gz`, `.bz2`, `.z`, or `.zst` then Miller will autodetect by file extension:
 
 
 file gz-example.csv.gz
@@ -52,7 +52,7 @@ This will decompress the input data on the fly, while leaving the disk file unmo
 
 ## Manual detection on input
 
-If the filename doesn't in in `.gz`, `.bz2`, or `.z` then you can use the flags `--gzin`, `--bz2in`, or `--zin` to let Miller know:
+If the filename doesn't in in `.gz`, `.bz2`, `-z`, or `.zst` then you can use the flags `--gzin`, `--bz2in`, `--zin`, or `--zstdin` to let Miller know:
 
 
 mlr --csv --gzin sort -f color myfile.bin # myfile.bin has gzip contents
@@ -94,7 +94,7 @@ If the command has flags, quote them: e.g. `mlr --prepipe 'zcat -cf'`.
 
 In your [.mlrrc file](customization.md), `--prepipe` and `--prepipex` are not
 allowed as they could be used for unexpected code execution. You can use
-`--prepipe-bz2`, `--prepipe-gunzip`, and `--prepipe-zcat` in `.mlrrc`, though.
+`--prepipe-bz2`, `--prepipe-gunzip`, `--prepipe-zcat`, and `--prepipe-zstdcat` in `.mlrrc`, though.
 
 Note that this feature is quite general and is not limited to decompression
 utilities. You can use it to apply per-file filters of your choice: e.g. `mlr
@@ -107,7 +107,7 @@ There is a `--prepipe` and a `--prepipex`:
 
 Lastly, note that if `--prepipe` or `--prepipex` is specified on the Miller
 command line, it replaces any autodetect decisions that might have been made
-based on the filename extension. Likewise, `--gzin`/`--bz2in`/`--zin` are ignored if
+based on the filename extension. Likewise, `--gzin`/`--bz2in`/`--zin`/`--zstdin` are ignored if
 `--prepipe` or `--prepipex` is also specified.
 
 ## Compressed output
diff --git a/docs/src/reference-main-compressed-data.md.in b/docs/src/reference-main-compressed-data.md.in
index b13e5e732..cbca6a3c3 100644
--- a/docs/src/reference-main-compressed-data.md.in
+++ b/docs/src/reference-main-compressed-data.md.in
@@ -1,12 +1,12 @@
 # Compressed data
 
-As of [Miller 6](new-in-miller-6.md), Miller supports reading GZIP, BZIP2, and
-ZLIB formats transparently, and in-process. And (as before Miller 6) you have a
+As of [Miller 6](new-in-miller-6.md), Miller supports reading GZIP, BZIP2, ZLIB, and
+ZSTD formats transparently, and in-process. And (as before Miller 6) you have a
 more general `--prepipe` option to support other decompression programs.
 
 ## Automatic detection on input
 
-If your files end in `.gz`, `.bz2`, or `.z` then Miller will autodetect by file extension:
+If your files end in `.gz`, `.bz2`, `.z`, or `.zst` then Miller will autodetect by file extension:
 
 GENMD-CARDIFY-HIGHLIGHT-ONE
 file gz-example.csv.gz
@@ -21,7 +21,7 @@ This will decompress the input data on the fly, while leaving the disk file unmo
 
 ## Manual detection on input
 
-If the filename doesn't in in `.gz`, `.bz2`, or `.z` then you can use the flags `--gzin`, `--bz2in`, or `--zin` to let Miller know:
+If the filename doesn't in in `.gz`, `.bz2`, `-z`, or `.zst` then you can use the flags `--gzin`, `--bz2in`, `--zin`, or `--zstdin` to let Miller know:
 
 GENMD-CARDIFY-HIGHLIGHT-ONE
 mlr --csv --gzin sort -f color myfile.bin # myfile.bin has gzip contents
@@ -50,7 +50,7 @@ If the command has flags, quote them: e.g. `mlr --prepipe 'zcat -cf'`.
 
 In your [.mlrrc file](customization.md), `--prepipe` and `--prepipex` are not
 allowed as they could be used for unexpected code execution. You can use
-`--prepipe-bz2`, `--prepipe-gunzip`, and `--prepipe-zcat` in `.mlrrc`, though.
+`--prepipe-bz2`, `--prepipe-gunzip`, `--prepipe-zcat`, and `--prepipe-zstdcat` in `.mlrrc`, though.
 
 Note that this feature is quite general and is not limited to decompression
 utilities. You can use it to apply per-file filters of your choice: e.g. `mlr
@@ -63,7 +63,7 @@ There is a `--prepipe` and a `--prepipex`:
 
 Lastly, note that if `--prepipe` or `--prepipex` is specified on the Miller
 command line, it replaces any autodetect decisions that might have been made
-based on the filename extension. Likewise, `--gzin`/`--bz2in`/`--zin` are ignored if
+based on the filename extension. Likewise, `--gzin`/`--bz2in`/`--zin`/`--zstdin` are ignored if
 `--prepipe` or `--prepipex` is also specified.
 
 ## Compressed output
diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md
index b07c0847e..8e2daf9d0 100644
--- a/docs/src/reference-main-flag-list.md
+++ b/docs/src/reference-main-flag-list.md
@@ -72,7 +72,7 @@ Notes:
 Miller offers a few different ways to handle reading data files
 	which have been compressed.
 
-* Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin`
+* Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin``--zstdin`
 * Decompression done outside the Miller process: `--prepipe` `--prepipex`
 
 Using `--prepipe` and `--prepipex` you can specify an action to be
@@ -95,7 +95,7 @@ compression (or other) utilities, simply pipe the output:
 
 Lastly, note that if `--prepipe` or `--prepipex` is specified, it replaces any
 decisions that might have been made based on the file suffix. Likewise,
-`--gzin`/`--bz2in`/`--zin` are ignored if `--prepipe` is also specified.
+`--gzin`/`--bz2in`/`--zin``--zin` are ignored if `--prepipe` is also specified.
 
 
 **Flags:**
@@ -106,8 +106,10 @@ decisions that might have been made based on the file suffix. Likewise,
 * `--prepipe-bz2`: Same as  `--prepipe bz2`, except this is allowed in `.mlrrc`.
 * `--prepipe-gunzip`: Same as  `--prepipe gunzip`, except this is allowed in `.mlrrc`.
 * `--prepipe-zcat`: Same as  `--prepipe zcat`, except this is allowed in `.mlrrc`.
+* `--prepipe-zstdcat`: Same as  `--prepipe zstdcat`, except this is allowed in `.mlrrc`.
 * `--prepipex {decompression command}`: Like `--prepipe` with one exception: doesn't insert `<` between command and filename at runtime. Useful for some commands like `unzip -qc` which don't read standard input.  Allowed at the command line, but not in `.mlrrc` to avoid unexpected code execution.
 * `--zin`: Uncompress zlib within the Miller process. Done by default if file ends in `.z`.
+* `--zstdin`: Uncompress zstd within the Miller process. Done by default if file ends in `.zstd`.
 
 ## CSV/TSV-only flags
 
diff --git a/go.mod b/go.mod
index b9e11f7eb..2373dea14 100644
--- a/go.mod
+++ b/go.mod
@@ -34,6 +34,7 @@ require (
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/felixge/fgprof v0.9.3 // indirect
 	github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect
+	github.com/klauspost/compress v1.16.7 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
diff --git a/go.sum b/go.sum
index e896c8f4e..84593de57 100644
--- a/go.sum
+++ b/go.sum
@@ -15,6 +15,8 @@ github.com/johnkerl/lumin v1.0.0 h1:CV34cHZOJ92Y02RbQ0rd4gA0C06Qck9q8blOyaPoWpU=
 github.com/johnkerl/lumin v1.0.0/go.mod h1:eLf5AdQOaLvzZ2zVy4REr/DSeEwG+CZreHwNLICqv9E=
 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8=
+github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
+github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
 github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8=
 github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is=
 github.com/lestrrat-go/strftime v1.0.6 h1:CFGsDEt1pOpFNU+TJB0nhz9jl+K0hZSLE205AhTIGQQ=
diff --git a/internal/pkg/cli/option_parse.go b/internal/pkg/cli/option_parse.go
index 034f3a690..0ee362f2b 100644
--- a/internal/pkg/cli/option_parse.go
+++ b/internal/pkg/cli/option_parse.go
@@ -2200,7 +2200,8 @@ func CompressedDataPrintInfo() {
 	fmt.Print(`Miller offers a few different ways to handle reading data files
 	which have been compressed.
 
-* Decompression done within the Miller process itself: ` + "`--bz2in`" + ` ` + "`--gzin`" + ` ` + "`--zin`" + `
+* Decompression done within the Miller process itself: ` + "`--bz2in`" + ` ` + "`--gzin`" + ` ` + "`--zin`" + "`--zstdin`" +
+		`
 * Decompression done outside the Miller process: ` + "`--prepipe`" + ` ` + "`--prepipex`" + `
 
 Using ` + "`--prepipe`" + ` and ` + "`--prepipex`" + ` you can specify an action to be
@@ -2223,7 +2224,7 @@ compression (or other) utilities, simply pipe the output:
 
 Lastly, note that if ` + "`--prepipe`" + ` or ` + "`--prepipex`" + ` is specified, it replaces any
 decisions that might have been made based on the file suffix. Likewise,
-` + "`--gzin`" + `/` + "`--bz2in`" + `/` + "`--zin`" + ` are ignored if ` + "`--prepipe`" + ` is also specified.
+` + "`--gzin`" + `/` + "`--bz2in`" + `/` + "`--zin`" + "`--zin`" + ` are ignored if ` + "`--prepipe`" + ` is also specified.
 `)
 }
 
@@ -2278,6 +2279,16 @@ var CompressedDataFlagSection = FlagSection{
 			},
 		},
 
+		{
+			name: "--prepipe-zstdcat",
+			help: "Same as  `--prepipe zstdcat`, except this is allowed in `.mlrrc`.",
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.Prepipe = "zstdcat"
+				options.ReaderOptions.PrepipeIsRaw = false
+				*pargi += 1
+			},
+		},
+
 		{
 			name: "--prepipe-bz2",
 			help: "Same as  `--prepipe bz2`, except this is allowed in `.mlrrc`.",
@@ -2314,6 +2325,15 @@ var CompressedDataFlagSection = FlagSection{
 				*pargi += 1
 			},
 		},
+
+		{
+			name: "--zstdin",
+			help: "Uncompress zstd within the Miller process. Done by default if file ends in `.zstd`.",
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.FileInputEncoding = lib.FileInputEncodingZstd
+				*pargi += 1
+			},
+		},
 	},
 }
 
diff --git a/internal/pkg/lib/file_readers.go b/internal/pkg/lib/file_readers.go
index fa42688ef..151120040 100644
--- a/internal/pkg/lib/file_readers.go
+++ b/internal/pkg/lib/file_readers.go
@@ -25,6 +25,7 @@ import (
 	"compress/gzip"
 	"compress/zlib"
 	"fmt"
+	"github.com/klauspost/compress/zstd"
 	"io"
 	"net/http"
 	"os"
@@ -38,6 +39,7 @@ const (
 	FileInputEncodingBzip2
 	FileInputEncodingGzip
 	FileInputEncodingZlib
+	FileInputEncodingZstd
 )
 
 // OpenFileForRead: If prepipe is non-empty, popens "{prepipe} < {filename}"
@@ -160,6 +162,8 @@ func openEncodedHandleForRead(
 		return gzip.NewReader(handle)
 	case FileInputEncodingZlib:
 		return zlib.NewReader(handle)
+	case FileInputEncodingZstd:
+		return NewZstdReadCloser(handle)
 	}
 
 	InternalCodingErrorIf(encoding != FileInputEncodingDefault)
@@ -173,6 +177,9 @@ func openEncodedHandleForRead(
 	if strings.HasSuffix(filename, ".z") {
 		return zlib.NewReader(handle)
 	}
+	if strings.HasSuffix(filename, ".zst") {
+		return NewZstdReadCloser(handle)
+	}
 
 	// Pass along os.Stdin or os.Open(filename)
 	return handle, nil
@@ -200,6 +207,32 @@ func (rc *BZip2ReadCloser) Close() error {
 	return rc.originalHandle.Close()
 }
 
+// ----------------------------------------------------------------
+// ZstdReadCloser remedies the fact that zstd.NewReader does not implement io.ReadCloser.
+type ZstdReadCloser struct {
+	originalHandle io.ReadCloser
+	zstdHandle     io.Reader
+}
+
+func NewZstdReadCloser(handle io.ReadCloser) (*ZstdReadCloser, error) {
+	zstdHandle, err := zstd.NewReader(handle)
+	if err != nil {
+		return nil, err
+	}
+	return &ZstdReadCloser{
+		originalHandle: handle,
+		zstdHandle:     zstdHandle,
+	}, nil
+}
+
+func (rc *ZstdReadCloser) Read(p []byte) (n int, err error) {
+	return rc.zstdHandle.Read(p)
+}
+
+func (rc *ZstdReadCloser) Close() error {
+	return rc.originalHandle.Close()
+}
+
 // ----------------------------------------------------------------
 
 // IsEOF handles the following problem: reading past end of files opened with
diff --git a/man/manpage.txt b/man/manpage.txt
index 8d79e4f60..1d5912853 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -241,7 +241,7 @@ MILLER(1)                                                            MILLER(1)
        Miller offers a few different ways to handle reading data files
             which have been compressed.
 
-       * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin`
+       * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin``--zstdin`
        * Decompression done outside the Miller process: `--prepipe` `--prepipex`
 
        Using `--prepipe` and `--prepipex` you can specify an action to be
@@ -264,7 +264,7 @@ MILLER(1)                                                            MILLER(1)
 
        Lastly, note that if `--prepipe` or `--prepipex` is specified, it replaces any
        decisions that might have been made based on the file suffix. Likewise,
-       `--gzin`/`--bz2in`/`--zin` are ignored if `--prepipe` is also specified.
+       `--gzin`/`--bz2in`/`--zin``--zin` are ignored if `--prepipe` is also specified.
 
        --bz2in                  Uncompress bzip2 within the Miller process. Done by
                                 default if file ends in `.bz2`.
@@ -281,6 +281,8 @@ MILLER(1)                                                            MILLER(1)
                                 `.mlrrc`.
        --prepipe-zcat           Same as `--prepipe zcat`, except this is allowed in
                                 `.mlrrc`.
+       --prepipe-zstdcat        Same as `--prepipe zstdcat`, except this is allowed
+                                in `.mlrrc`.
        --prepipex {decompression command}
                                 Like `--prepipe` with one exception: doesn't insert
                                 `<` between command and filename at runtime. Useful
@@ -289,6 +291,8 @@ MILLER(1)                                                            MILLER(1)
                                 in `.mlrrc` to avoid unexpected code execution.
        --zin                    Uncompress zlib within the Miller process. Done by
                                 default if file ends in `.z`.
+       --zstdin                 Uncompress zstd within the Miller process. Done by
+                                default if file ends in `.zstd`.
 
 1mCSV/TSV-ONLY FLAGS0m
        These are flags which are applicable to CSV format.
diff --git a/man/mlr.1 b/man/mlr.1
index 1d25bb6c0..583b5dc11 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -304,7 +304,7 @@ Notes:
 Miller offers a few different ways to handle reading data files
 	which have been compressed.
 
-* Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin`
+* Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin``--zstdin`
 * Decompression done outside the Miller process: `--prepipe` `--prepipex`
 
 Using `--prepipe` and `--prepipex` you can specify an action to be
@@ -327,7 +327,7 @@ compression (or other) utilities, simply pipe the output:
 
 Lastly, note that if `--prepipe` or `--prepipex` is specified, it replaces any
 decisions that might have been made based on the file suffix. Likewise,
-`--gzin`/`--bz2in`/`--zin` are ignored if `--prepipe` is also specified.
+`--gzin`/`--bz2in`/`--zin``--zin` are ignored if `--prepipe` is also specified.
 
 --bz2in                  Uncompress bzip2 within the Miller process. Done by
                          default if file ends in `.bz2`.
@@ -344,6 +344,8 @@ decisions that might have been made based on the file suffix. Likewise,
                          `.mlrrc`.
 --prepipe-zcat           Same as `--prepipe zcat`, except this is allowed in
                          `.mlrrc`.
+--prepipe-zstdcat        Same as `--prepipe zstdcat`, except this is allowed
+                         in `.mlrrc`.
 --prepipex {decompression command}
                          Like `--prepipe` with one exception: doesn't insert
                          `<` between command and filename at runtime. Useful
@@ -352,6 +354,8 @@ decisions that might have been made based on the file suffix. Likewise,
                          in `.mlrrc` to avoid unexpected code execution.
 --zin                    Uncompress zlib within the Miller process. Done by
                          default if file ends in `.z`.
+--zstdin                 Uncompress zstd within the Miller process. Done by
+                         default if file ends in `.zstd`.
 .fi
 .if n \{\
 .RE
diff --git a/test/cases/io-compressed-input/0014/cmd b/test/cases/io-compressed-input/0014/cmd
new file mode 100644
index 000000000..f6141361e
--- /dev/null
+++ b/test/cases/io-compressed-input/0014/cmd
@@ -0,0 +1 @@
+mlr count -g a test/input/medium.zst
diff --git a/test/cases/io-compressed-input/0014/experr b/test/cases/io-compressed-input/0014/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-compressed-input/0014/expout b/test/cases/io-compressed-input/0014/expout
new file mode 100644
index 000000000..7dcf14212
--- /dev/null
+++ b/test/cases/io-compressed-input/0014/expout
@@ -0,0 +1,5 @@
+a=pan,count=8
+a=eks,count=10
+a=wye,count=7
+a=zee,count=8
+a=hat,count=7
diff --git a/test/cases/io-compressed-input/0015/cmd b/test/cases/io-compressed-input/0015/cmd
new file mode 100644
index 000000000..8a6e18c1e
--- /dev/null
+++ b/test/cases/io-compressed-input/0015/cmd
@@ -0,0 +1 @@
+mlr --zstdin count -g a < test/input/medium.zst
diff --git a/test/cases/io-compressed-input/0015/experr b/test/cases/io-compressed-input/0015/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-compressed-input/0015/expout b/test/cases/io-compressed-input/0015/expout
new file mode 100644
index 000000000..7dcf14212
--- /dev/null
+++ b/test/cases/io-compressed-input/0015/expout
@@ -0,0 +1,5 @@
+a=pan,count=8
+a=eks,count=10
+a=wye,count=7
+a=zee,count=8
+a=hat,count=7
diff --git a/test/cases/io-compressed-input/0016/cmd b/test/cases/io-compressed-input/0016/cmd
new file mode 100644
index 000000000..7d38bc22a
--- /dev/null
+++ b/test/cases/io-compressed-input/0016/cmd
@@ -0,0 +1 @@
+mlr --zstdin count -g a test/input/medium.zst
diff --git a/test/cases/io-compressed-input/0016/experr b/test/cases/io-compressed-input/0016/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-compressed-input/0016/expout b/test/cases/io-compressed-input/0016/expout
new file mode 100644
index 000000000..7dcf14212
--- /dev/null
+++ b/test/cases/io-compressed-input/0016/expout
@@ -0,0 +1,5 @@
+a=pan,count=8
+a=eks,count=10
+a=wye,count=7
+a=zee,count=8
+a=hat,count=7
diff --git a/test/input/medium.zst b/test/input/medium.zst
new file mode 100644
index 0000000000000000000000000000000000000000..f7b5c9a0d729b4df5c16d7bd6767108b4eab3005
GIT binary patch
literal 957
zcmV;u148^LwJ-eyScM$`N>daH7@#T30iBn9E!->i4=+nfDN{;GwEE#G7_I=70FnT2
zg;<7WuTcjK0tyC#L{!PCqsbI4v5N3db#lpQtCfj{=9@+y5~kT?3I_&#v=Q9TB&U`
zDl(5Y#K9I00vH?=ASf6JQ{%Q2+ecn1RWz2W5&F=;fJ4DRXzzL`#311Sh=KwGlY&}%
zQ6nyZC?qgg;UItk0Yia;Awk;Y5Rz45UGv={l;P`kRq}bs5*p5fWPUp_8yfq;;YQ-+
z8bxu10|NsC4x{N?GXF!?NP?;->0DmQ%ZCL$G!UJ)loqv5K?5Cu=+Wj2XhuUOqRlhigtwW5%JQJfT~@(o!Px42*BY0x~U
zm&dd-G2B-D2#cwTi}(e_ip`N32R;VB|mlMy9>n9OyXk;c}|~bc$k_|l?^ynLovO0
f=>7aYPLKWGbLsdn?+i^K;A1?UQbidh3(Fx>)!MFg

literal 0
HcmV?d00001


From 793f52c470a12626ea6837ced1091cb80daa0b23 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 19 Aug 2023 17:23:01 -0400
Subject: [PATCH 028/456] `sub`, `gsub`, and `ssub` verbs (#1361)

* sub, gsub, and ssub verbs

* doc mods

* content for verbs reference page

* test/cases/verb-sub-gsub-ssub/
---
 docs/src/manpage.md                           |  39 ++++-
 docs/src/manpage.txt                          |  39 ++++-
 docs/src/reference-verbs.md                   | 146 ++++++++++++++++
 docs/src/reference-verbs.md.in                |  42 +++++
 .../pkg/transformers/aaa_transformer_table.go |   3 +
 internal/pkg/transformers/gsub.go             | 157 ++++++++++++++++++
 internal/pkg/transformers/ssub.go             | 156 +++++++++++++++++
 internal/pkg/transformers/sub.go              | 157 ++++++++++++++++++
 man/manpage.txt                               |  39 ++++-
 man/mlr.1                                     |  57 ++++++-
 test/cases/cli-help/0001/expout               |  29 ++++
 test/cases/verb-case/x                        |  13 --
 test/cases/verb-sub-gsub-ssub/0001/cmd        |   1 +
 test/cases/verb-sub-gsub-ssub/0001/experr     |   0
 test/cases/verb-sub-gsub-ssub/0001/expout     |  11 ++
 test/cases/verb-sub-gsub-ssub/0002/cmd        |   1 +
 test/cases/verb-sub-gsub-ssub/0002/experr     |   0
 test/cases/verb-sub-gsub-ssub/0002/expout     |  11 ++
 test/cases/verb-sub-gsub-ssub/0003/cmd        |   1 +
 test/cases/verb-sub-gsub-ssub/0003/experr     |   0
 test/cases/verb-sub-gsub-ssub/0003/expout     |  11 ++
 test/cases/verb-sub-gsub-ssub/0004/cmd        |   1 +
 test/cases/verb-sub-gsub-ssub/0004/experr     |   0
 test/cases/verb-sub-gsub-ssub/0004/expout     |  11 ++
 24 files changed, 888 insertions(+), 37 deletions(-)
 create mode 100644 internal/pkg/transformers/gsub.go
 create mode 100644 internal/pkg/transformers/ssub.go
 create mode 100644 internal/pkg/transformers/sub.go
 delete mode 100644 test/cases/verb-case/x
 create mode 100644 test/cases/verb-sub-gsub-ssub/0001/cmd
 create mode 100644 test/cases/verb-sub-gsub-ssub/0001/experr
 create mode 100644 test/cases/verb-sub-gsub-ssub/0001/expout
 create mode 100644 test/cases/verb-sub-gsub-ssub/0002/cmd
 create mode 100644 test/cases/verb-sub-gsub-ssub/0002/experr
 create mode 100644 test/cases/verb-sub-gsub-ssub/0002/expout
 create mode 100644 test/cases/verb-sub-gsub-ssub/0003/cmd
 create mode 100644 test/cases/verb-sub-gsub-ssub/0003/experr
 create mode 100644 test/cases/verb-sub-gsub-ssub/0003/expout
 create mode 100644 test/cases/verb-sub-gsub-ssub/0004/cmd
 create mode 100644 test/cases/verb-sub-gsub-ssub/0004/experr
 create mode 100644 test/cases/verb-sub-gsub-ssub/0004/expout

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index aad8a4f50..1a9ebea12 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -194,12 +194,13 @@ MILLER(1)                                                            MILLER(1)
 1mVERB LIST0m
        altkv bar bootstrap case cat check clean-whitespace count-distinct count
        count-similar cut decimate fill-down fill-empty filter flatten format-values
-       fraction gap grep group-by group-like having-fields head histogram json-parse
-       json-stringify join label latin1-to-utf8 least-frequent merge-fields
-       most-frequent nest nothing put regularize remove-empty-columns rename reorder
-       repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records
-       sort sort-within-records split stats1 stats2 step summary tac tail tee
-       template top utf8-to-latin1 unflatten uniq unspace unsparsify
+       fraction gap grep group-by group-like gsub having-fields head histogram
+       json-parse json-stringify join label latin1-to-utf8 least-frequent
+       merge-fields most-frequent nest nothing put regularize remove-empty-columns
+       rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
+       skip-trivial-records sort sort-within-records split ssub stats1 stats2 step
+       sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace
+       unsparsify
 
 1mFUNCTION LIST0m
        abs acos acosh any append apply arrayify asin asinh asserting_absent
@@ -1245,6 +1246,15 @@ MILLER(1)                                                            MILLER(1)
        Options:
        -h|--help Show this message.
 
+   1mgsub0m
+       Usage: mlr gsub [options]
+       Replaces old string with new string in specified field(s), with regex support
+       for the old string and handling multiple matches, like the `gsub` DSL function.
+       See also the `sub` and `ssub` verbs.
+       Options:
+       -f {a,b,c}  Field names to convert.
+       -h|--help   Show this message.
+
    1mhaving-fields0m
        Usage: mlr having-fields [options]
        Conditionally passes through records depending on each record's field names.
@@ -1853,6 +1863,14 @@ MILLER(1)                                                            MILLER(1)
 
        See also the "tee" DSL function which lets you do more ad-hoc customization.
 
+   1mssub0m
+       Usage: mlr ssub [options]
+       Replaces old string with new string in specified field(s), without regex support for
+       the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.
+       Options:
+       -f {a,b,c}  Field names to convert.
+       -h|--help   Show this message.
+
    1mstats10m
        Usage: mlr stats1 [options]
        Computes univariate statistics for one or more given fields, accumulated across
@@ -1990,6 +2008,15 @@ MILLER(1)                                                            MILLER(1)
        https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average
        for more information on EWMA.
 
+   1msub0m
+       Usage: mlr sub [options]
+       Replaces old string with new string in specified field(s), with regex support
+       for the old string and not handling multiple matches, like the `sub` DSL function.
+       See also the `gsub` and `ssub` verbs.
+       Options:
+       -f {a,b,c}  Field names to convert.
+       -h|--help   Show this message.
+
    1msummary0m
        Usage: mlr summary [options]
        Show summary statistics about the input data.
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt
index 1d5912853..7372e3768 100644
--- a/docs/src/manpage.txt
+++ b/docs/src/manpage.txt
@@ -173,12 +173,13 @@ MILLER(1)                                                            MILLER(1)
 1mVERB LIST0m
        altkv bar bootstrap case cat check clean-whitespace count-distinct count
        count-similar cut decimate fill-down fill-empty filter flatten format-values
-       fraction gap grep group-by group-like having-fields head histogram json-parse
-       json-stringify join label latin1-to-utf8 least-frequent merge-fields
-       most-frequent nest nothing put regularize remove-empty-columns rename reorder
-       repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records
-       sort sort-within-records split stats1 stats2 step summary tac tail tee
-       template top utf8-to-latin1 unflatten uniq unspace unsparsify
+       fraction gap grep group-by group-like gsub having-fields head histogram
+       json-parse json-stringify join label latin1-to-utf8 least-frequent
+       merge-fields most-frequent nest nothing put regularize remove-empty-columns
+       rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
+       skip-trivial-records sort sort-within-records split ssub stats1 stats2 step
+       sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace
+       unsparsify
 
 1mFUNCTION LIST0m
        abs acos acosh any append apply arrayify asin asinh asserting_absent
@@ -1224,6 +1225,15 @@ MILLER(1)                                                            MILLER(1)
        Options:
        -h|--help Show this message.
 
+   1mgsub0m
+       Usage: mlr gsub [options]
+       Replaces old string with new string in specified field(s), with regex support
+       for the old string and handling multiple matches, like the `gsub` DSL function.
+       See also the `sub` and `ssub` verbs.
+       Options:
+       -f {a,b,c}  Field names to convert.
+       -h|--help   Show this message.
+
    1mhaving-fields0m
        Usage: mlr having-fields [options]
        Conditionally passes through records depending on each record's field names.
@@ -1832,6 +1842,14 @@ MILLER(1)                                                            MILLER(1)
 
        See also the "tee" DSL function which lets you do more ad-hoc customization.
 
+   1mssub0m
+       Usage: mlr ssub [options]
+       Replaces old string with new string in specified field(s), without regex support for
+       the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.
+       Options:
+       -f {a,b,c}  Field names to convert.
+       -h|--help   Show this message.
+
    1mstats10m
        Usage: mlr stats1 [options]
        Computes univariate statistics for one or more given fields, accumulated across
@@ -1969,6 +1987,15 @@ MILLER(1)                                                            MILLER(1)
        https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average
        for more information on EWMA.
 
+   1msub0m
+       Usage: mlr sub [options]
+       Replaces old string with new string in specified field(s), with regex support
+       for the old string and not handling multiple matches, like the `sub` DSL function.
+       See also the `gsub` and `ssub` verbs.
+       Options:
+       -f {a,b,c}  Field names to convert.
+       -h|--help   Show this message.
+
    1msummary0m
        Usage: mlr summary [options]
        Show summary statistics about the input data.
diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md
index a9abbcfe5..6e9fbb478 100644
--- a/docs/src/reference-verbs.md
+++ b/docs/src/reference-verbs.md
@@ -1447,6 +1447,55 @@ record_count resource
 150          /path/to/second/file
 
+## gsub + +
+mlr gsub -h
+
+
+Usage: mlr gsub [options]
+Replaces old string with new string in specified field(s), with regex support
+for the old string and handling multiple matches, like the `gsub` DSL function.
+See also the `sub` and `ssub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then sub -f color,shape l X
+
+
+filename    color  shape    flag  k  index quantity rate
+example.csv yeXlow triangXe true  1  11    43.6498  9.8870
+example.csv red    square   true  2  15    79.2778  0.0130
+example.csv red    circXe   true  3  16    13.8103  2.9010
+example.csv red    square   false 4  48    77.5542  7.4670
+example.csv purpXe triangXe false 5  51    81.2290  8.5910
+example.csv red    square   false 6  64    77.1991  9.5310
+example.csv purpXe triangXe false 7  65    80.1405  5.8240
+example.csv yeXlow circXe   true  8  73    63.9785  4.2370
+example.csv yeXlow circXe   true  9  87    63.5058  8.3350
+example.csv purpXe square   false 10 91    72.3735  8.2430
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then gsub -f color,shape l X
+
+
+filename    color  shape    flag  k  index quantity rate
+example.csv yeXXow triangXe true  1  11    43.6498  9.8870
+example.csv red    square   true  2  15    79.2778  0.0130
+example.csv red    circXe   true  3  16    13.8103  2.9010
+example.csv red    square   false 4  48    77.5542  7.4670
+example.csv purpXe triangXe false 5  51    81.2290  8.5910
+example.csv red    square   false 6  64    77.1991  9.5310
+example.csv purpXe triangXe false 7  65    80.1405  5.8240
+example.csv yeXXow circXe   true  8  73    63.9785  4.2370
+example.csv yeXXow circXe   true  9  87    63.5058  8.3350
+example.csv purpXe square   false 10 91    72.3735  8.2430
+
+ ## having-fields
@@ -3120,6 +3169,54 @@ then there will be split_yellow_triangle.csv, split_yellow_square.csv, etc.
 See also the "tee" DSL function which lets you do more ad-hoc customization.
 
+## ssub + +
+mlr ssub -h
+
+
+Usage: mlr ssub [options]
+Replaces old string with new string in specified field(s), without regex support for
+the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then sub -f filename . o
+
+
+filename    color  shape    flag  k  index quantity rate
+oxample.csv yellow triangle true  1  11    43.6498  9.8870
+oxample.csv red    square   true  2  15    79.2778  0.0130
+oxample.csv red    circle   true  3  16    13.8103  2.9010
+oxample.csv red    square   false 4  48    77.5542  7.4670
+oxample.csv purple triangle false 5  51    81.2290  8.5910
+oxample.csv red    square   false 6  64    77.1991  9.5310
+oxample.csv purple triangle false 7  65    80.1405  5.8240
+oxample.csv yellow circle   true  8  73    63.9785  4.2370
+oxample.csv yellow circle   true  9  87    63.5058  8.3350
+oxample.csv purple square   false 10 91    72.3735  8.2430
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then ssub -f filename . o
+
+
+filename    color  shape    flag  k  index quantity rate
+exampleocsv yellow triangle true  1  11    43.6498  9.8870
+exampleocsv red    square   true  2  15    79.2778  0.0130
+exampleocsv red    circle   true  3  16    13.8103  2.9010
+exampleocsv red    square   false 4  48    77.5542  7.4670
+exampleocsv purple triangle false 5  51    81.2290  8.5910
+exampleocsv red    square   false 6  64    77.1991  9.5310
+exampleocsv purple triangle false 7  65    80.1405  5.8240
+exampleocsv yellow circle   true  8  73    63.9785  4.2370
+exampleocsv yellow circle   true  9  87    63.5058  8.3350
+exampleocsv purple square   false 10 91    72.3735  8.2430
+
+ ## stats1
@@ -3574,6 +3671,55 @@ $ each 10 uptime | mlr -p step -a delta -f 11
 
 
+## sub + +
+mlr sub -h
+
+
+Usage: mlr sub [options]
+Replaces old string with new string in specified field(s), with regex support
+for the old string and not handling multiple matches, like the `sub` DSL function.
+See also the `gsub` and `ssub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then sub -f color,shape l X
+
+
+filename    color  shape    flag  k  index quantity rate
+example.csv yeXlow triangXe true  1  11    43.6498  9.8870
+example.csv red    square   true  2  15    79.2778  0.0130
+example.csv red    circXe   true  3  16    13.8103  2.9010
+example.csv red    square   false 4  48    77.5542  7.4670
+example.csv purpXe triangXe false 5  51    81.2290  8.5910
+example.csv red    square   false 6  64    77.1991  9.5310
+example.csv purpXe triangXe false 7  65    80.1405  5.8240
+example.csv yeXlow circXe   true  8  73    63.9785  4.2370
+example.csv yeXlow circXe   true  9  87    63.5058  8.3350
+example.csv purpXe square   false 10 91    72.3735  8.2430
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then gsub -f color,shape l X
+
+
+filename    color  shape    flag  k  index quantity rate
+example.csv yeXXow triangXe true  1  11    43.6498  9.8870
+example.csv red    square   true  2  15    79.2778  0.0130
+example.csv red    circXe   true  3  16    13.8103  2.9010
+example.csv red    square   false 4  48    77.5542  7.4670
+example.csv purpXe triangXe false 5  51    81.2290  8.5910
+example.csv red    square   false 6  64    77.1991  9.5310
+example.csv purpXe triangXe false 7  65    80.1405  5.8240
+example.csv yeXXow circXe   true  8  73    63.9785  4.2370
+example.csv yeXXow circXe   true  9  87    63.5058  8.3350
+example.csv purpXe square   false 10 91    72.3735  8.2430
+
+ ## summary
diff --git a/docs/src/reference-verbs.md.in b/docs/src/reference-verbs.md.in
index 0ff0bd15d..44feda3de 100644
--- a/docs/src/reference-verbs.md.in
+++ b/docs/src/reference-verbs.md.in
@@ -487,6 +487,20 @@ GENMD-RUN-COMMAND
 mlr --opprint group-like data/het.dkvp
 GENMD-EOF
 
+## gsub
+
+GENMD-RUN-COMMAND
+mlr gsub -h
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then sub -f color,shape l X
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then gsub -f color,shape l X
+GENMD-EOF
+
 ## having-fields
 
 GENMD-RUN-COMMAND
@@ -987,6 +1001,20 @@ GENMD-RUN-COMMAND
 mlr split --help
 GENMD-EOF
 
+## ssub
+
+GENMD-RUN-COMMAND
+mlr ssub -h
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then sub -f filename . o
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then ssub -f filename . o
+GENMD-EOF
+
 ## stats1
 
 GENMD-RUN-COMMAND
@@ -1095,6 +1123,20 @@ Example deriving uptime-delta from system uptime:
 
 GENMD-INCLUDE-ESCAPED(data/ping-delta-example.txt)
 
+## sub
+
+GENMD-RUN-COMMAND
+mlr sub -h
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then sub -f color,shape l X
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then gsub -f color,shape l X
+GENMD-EOF
+
 ## summary
 
 GENMD-RUN-COMMAND
diff --git a/internal/pkg/transformers/aaa_transformer_table.go b/internal/pkg/transformers/aaa_transformer_table.go
index 60f490e0d..ece90a858 100644
--- a/internal/pkg/transformers/aaa_transformer_table.go
+++ b/internal/pkg/transformers/aaa_transformer_table.go
@@ -33,6 +33,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{
 	GrepSetup,
 	GroupBySetup,
 	GroupLikeSetup,
+	GsubSetup,
 	HavingFieldsSetup,
 	HeadSetup,
 	HistogramSetup,
@@ -62,9 +63,11 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{
 	SortSetup,
 	SortWithinRecordsSetup,
 	SplitSetup,
+	SsubSetup,
 	Stats1Setup,
 	Stats2Setup,
 	StepSetup,
+	SubSetup,
 	SummarySetup,
 	TacSetup,
 	TailSetup,
diff --git a/internal/pkg/transformers/gsub.go b/internal/pkg/transformers/gsub.go
new file mode 100644
index 000000000..550aeda5a
--- /dev/null
+++ b/internal/pkg/transformers/gsub.go
@@ -0,0 +1,157 @@
+package transformers
+
+import (
+	"container/list"
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/johnkerl/miller/internal/pkg/bifs"
+	"github.com/johnkerl/miller/internal/pkg/cli"
+	"github.com/johnkerl/miller/internal/pkg/mlrval"
+	"github.com/johnkerl/miller/internal/pkg/types"
+)
+
+// ----------------------------------------------------------------
+const verbNameGsub = "gsub"
+
+var GsubSetup = TransformerSetup{
+	Verb:         verbNameGsub,
+	UsageFunc:    transformerGsubUsage,
+	ParseCLIFunc: transformerGsubParseCLI,
+	IgnoresInput: false,
+}
+
+func transformerGsubUsage(
+	o *os.File,
+) {
+	fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameGsub)
+	fmt.Fprintf(o, "Replaces old string with new string in specified field(s), with regex support\n")
+	fmt.Fprintf(o, "for the old string and handling multiple matches, like the `gsub` DSL function.\n")
+	fmt.Fprintf(o, "See also the `sub` and `ssub` verbs.\n")
+	fmt.Fprintf(o, "Options:\n")
+	fmt.Fprintf(o, "-f {a,b,c}  Field names to convert.\n")
+	fmt.Fprintf(o, "-h|--help   Show this message.\n")
+}
+
+func transformerGsubParseCLI(
+	pargi *int,
+	argc int,
+	args []string,
+	_ *cli.TOptions,
+	doConstruct bool, // false for first pass of CLI-parse, true for second pass
+) IRecordTransformer {
+
+	// Skip the verb name from the current spot in the mlr command line
+	argi := *pargi
+	verb := args[argi]
+	argi++
+
+	// Parse local flags
+	var fieldNames []string = nil
+	var oldText string
+	var newText string
+
+	for argi < argc /* variable increment: 1 or 2 depending on flag */ {
+		opt := args[argi]
+		if !strings.HasPrefix(opt, "-") {
+			break // No more flag options to process
+		}
+		if args[argi] == "--" {
+			break // All transformers must do this so main-flags can follow verb-flags
+		}
+		argi++
+
+		if opt == "-h" || opt == "--help" {
+			transformerGsubUsage(os.Stdout)
+			os.Exit(0)
+
+		} else if opt == "-f" {
+			fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
+		} else {
+			transformerGsubUsage(os.Stderr)
+			os.Exit(1)
+		}
+	}
+
+	if fieldNames == nil {
+		transformerGsubUsage(os.Stderr)
+		os.Exit(1)
+	}
+
+	// Get the old and new text from the command line
+	if (argc - argi) < 2 {
+		transformerGsubUsage(os.Stderr)
+		os.Exit(1)
+	}
+	oldText = args[argi]
+	newText = args[argi+1]
+
+	argi += 2
+
+	*pargi = argi
+	if !doConstruct { // All transformers must do this for main command-line parsing
+		return nil
+	}
+
+	transformer, err := NewTransformerGsub(
+		fieldNames,
+		oldText,
+		newText,
+	)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+
+	return transformer
+}
+
+// ----------------------------------------------------------------
+type TransformerGsub struct {
+	fieldNames []string
+	oldText    *mlrval.Mlrval
+	newText    *mlrval.Mlrval
+}
+
+// ----------------------------------------------------------------
+func NewTransformerGsub(
+	fieldNames []string,
+	oldText string,
+	newText string,
+) (*TransformerGsub, error) {
+	tr := &TransformerGsub{
+		fieldNames: fieldNames,
+		oldText:    mlrval.FromString(oldText),
+		newText:    mlrval.FromString(newText),
+	}
+	return tr, nil
+}
+
+func (tr *TransformerGsub) Transform(
+	inrecAndContext *types.RecordAndContext,
+	outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+	inputDownstreamDoneChannel <-chan bool,
+	outputDownstreamDoneChannel chan<- bool,
+) {
+	HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
+
+	if !inrecAndContext.EndOfStream {
+		inrec := inrecAndContext.Record
+
+		for _, fieldName := range tr.fieldNames {
+			oldValue := inrec.Get(fieldName)
+			if oldValue == nil {
+				continue
+			}
+
+			newValue := bifs.BIF_gsub(oldValue, tr.oldText, tr.newText)
+
+			inrec.PutReference(fieldName, newValue)
+		}
+
+		outputRecordsAndContexts.PushBack(inrecAndContext)
+	} else {
+		outputRecordsAndContexts.PushBack(inrecAndContext) // emit end-of-stream marker
+	}
+}
diff --git a/internal/pkg/transformers/ssub.go b/internal/pkg/transformers/ssub.go
new file mode 100644
index 000000000..bd8e54247
--- /dev/null
+++ b/internal/pkg/transformers/ssub.go
@@ -0,0 +1,156 @@
+package transformers
+
+import (
+	"container/list"
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/johnkerl/miller/internal/pkg/bifs"
+	"github.com/johnkerl/miller/internal/pkg/cli"
+	"github.com/johnkerl/miller/internal/pkg/mlrval"
+	"github.com/johnkerl/miller/internal/pkg/types"
+)
+
+// ----------------------------------------------------------------
+const verbNameSsub = "ssub"
+
+var SsubSetup = TransformerSetup{
+	Verb:         verbNameSsub,
+	UsageFunc:    transformerSsubUsage,
+	ParseCLIFunc: transformerSsubParseCLI,
+	IgnoresInput: false,
+}
+
+func transformerSsubUsage(
+	o *os.File,
+) {
+	fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameSsub)
+	fmt.Fprintf(o, "Replaces old string with new string in specified field(s), without regex support for\n")
+	fmt.Fprintf(o, "the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.\n")
+	fmt.Fprintf(o, "Options:\n")
+	fmt.Fprintf(o, "-f {a,b,c}  Field names to convert.\n")
+	fmt.Fprintf(o, "-h|--help   Show this message.\n")
+}
+
+func transformerSsubParseCLI(
+	pargi *int,
+	argc int,
+	args []string,
+	_ *cli.TOptions,
+	doConstruct bool, // false for first pass of CLI-parse, true for second pass
+) IRecordTransformer {
+
+	// Skip the verb name from the current spot in the mlr command line
+	argi := *pargi
+	verb := args[argi]
+	argi++
+
+	// Parse local flags
+	var fieldNames []string = nil
+	var oldText string
+	var newText string
+
+	for argi < argc /* variable increment: 1 or 2 depending on flag */ {
+		opt := args[argi]
+		if !strings.HasPrefix(opt, "-") {
+			break // No more flag options to process
+		}
+		if args[argi] == "--" {
+			break // All transformers must do this so main-flags can follow verb-flags
+		}
+		argi++
+
+		if opt == "-h" || opt == "--help" {
+			transformerSsubUsage(os.Stdout)
+			os.Exit(0)
+
+		} else if opt == "-f" {
+			fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
+		} else {
+			transformerSsubUsage(os.Stderr)
+			os.Exit(1)
+		}
+	}
+
+	if fieldNames == nil {
+		transformerSsubUsage(os.Stderr)
+		os.Exit(1)
+	}
+
+	// Get the old and new text from the command line
+	if (argc - argi) < 2 {
+		transformerSsubUsage(os.Stderr)
+		os.Exit(1)
+	}
+	oldText = args[argi]
+	newText = args[argi+1]
+
+	argi += 2
+
+	*pargi = argi
+	if !doConstruct { // All transformers must do this for main command-line parsing
+		return nil
+	}
+
+	transformer, err := NewTransformerSsub(
+		fieldNames,
+		oldText,
+		newText,
+	)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+
+	return transformer
+}
+
+// ----------------------------------------------------------------
+type TransformerSsub struct {
+	fieldNames []string
+	oldText    *mlrval.Mlrval
+	newText    *mlrval.Mlrval
+}
+
+// ----------------------------------------------------------------
+func NewTransformerSsub(
+	fieldNames []string,
+	oldText string,
+	newText string,
+) (*TransformerSsub, error) {
+	tr := &TransformerSsub{
+		fieldNames: fieldNames,
+		oldText:    mlrval.FromString(oldText),
+		newText:    mlrval.FromString(newText),
+	}
+	return tr, nil
+}
+
+func (tr *TransformerSsub) Transform(
+	inrecAndContext *types.RecordAndContext,
+	outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+	inputDownstreamDoneChannel <-chan bool,
+	outputDownstreamDoneChannel chan<- bool,
+) {
+	HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
+
+	if !inrecAndContext.EndOfStream {
+		inrec := inrecAndContext.Record
+
+		for _, fieldName := range tr.fieldNames {
+			oldValue := inrec.Get(fieldName)
+			if oldValue == nil {
+				continue
+			}
+
+			newValue := bifs.BIF_ssub(oldValue, tr.oldText, tr.newText)
+
+			inrec.PutReference(fieldName, newValue)
+		}
+
+		outputRecordsAndContexts.PushBack(inrecAndContext)
+	} else {
+		outputRecordsAndContexts.PushBack(inrecAndContext) // emit end-of-stream marker
+	}
+}
diff --git a/internal/pkg/transformers/sub.go b/internal/pkg/transformers/sub.go
new file mode 100644
index 000000000..eee778362
--- /dev/null
+++ b/internal/pkg/transformers/sub.go
@@ -0,0 +1,157 @@
+package transformers
+
+import (
+	"container/list"
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/johnkerl/miller/internal/pkg/bifs"
+	"github.com/johnkerl/miller/internal/pkg/cli"
+	"github.com/johnkerl/miller/internal/pkg/mlrval"
+	"github.com/johnkerl/miller/internal/pkg/types"
+)
+
+// ----------------------------------------------------------------
+const verbNameSub = "sub"
+
+var SubSetup = TransformerSetup{
+	Verb:         verbNameSub,
+	UsageFunc:    transformerSubUsage,
+	ParseCLIFunc: transformerSubParseCLI,
+	IgnoresInput: false,
+}
+
+func transformerSubUsage(
+	o *os.File,
+) {
+	fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameSub)
+	fmt.Fprintf(o, "Replaces old string with new string in specified field(s), with regex support\n")
+	fmt.Fprintf(o, "for the old string and not handling multiple matches, like the `sub` DSL function.\n")
+	fmt.Fprintf(o, "See also the `gsub` and `ssub` verbs.\n")
+	fmt.Fprintf(o, "Options:\n")
+	fmt.Fprintf(o, "-f {a,b,c}  Field names to convert.\n")
+	fmt.Fprintf(o, "-h|--help   Show this message.\n")
+}
+
+func transformerSubParseCLI(
+	pargi *int,
+	argc int,
+	args []string,
+	_ *cli.TOptions,
+	doConstruct bool, // false for first pass of CLI-parse, true for second pass
+) IRecordTransformer {
+
+	// Skip the verb name from the current spot in the mlr command line
+	argi := *pargi
+	verb := args[argi]
+	argi++
+
+	// Parse local flags
+	var fieldNames []string = nil
+	var oldText string
+	var newText string
+
+	for argi < argc /* variable increment: 1 or 2 depending on flag */ {
+		opt := args[argi]
+		if !strings.HasPrefix(opt, "-") {
+			break // No more flag options to process
+		}
+		if args[argi] == "--" {
+			break // All transformers must do this so main-flags can follow verb-flags
+		}
+		argi++
+
+		if opt == "-h" || opt == "--help" {
+			transformerSubUsage(os.Stdout)
+			os.Exit(0)
+
+		} else if opt == "-f" {
+			fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
+		} else {
+			transformerSubUsage(os.Stderr)
+			os.Exit(1)
+		}
+	}
+
+	if fieldNames == nil {
+		transformerSubUsage(os.Stderr)
+		os.Exit(1)
+	}
+
+	// Get the old and new text from the command line
+	if (argc - argi) < 2 {
+		transformerSubUsage(os.Stderr)
+		os.Exit(1)
+	}
+	oldText = args[argi]
+	newText = args[argi+1]
+
+	argi += 2
+
+	*pargi = argi
+	if !doConstruct { // All transformers must do this for main command-line parsing
+		return nil
+	}
+
+	transformer, err := NewTransformerSub(
+		fieldNames,
+		oldText,
+		newText,
+	)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+
+	return transformer
+}
+
+// ----------------------------------------------------------------
+type TransformerSub struct {
+	fieldNames []string
+	oldText    *mlrval.Mlrval
+	newText    *mlrval.Mlrval
+}
+
+// ----------------------------------------------------------------
+func NewTransformerSub(
+	fieldNames []string,
+	oldText string,
+	newText string,
+) (*TransformerSub, error) {
+	tr := &TransformerSub{
+		fieldNames: fieldNames,
+		oldText:    mlrval.FromString(oldText),
+		newText:    mlrval.FromString(newText),
+	}
+	return tr, nil
+}
+
+func (tr *TransformerSub) Transform(
+	inrecAndContext *types.RecordAndContext,
+	outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+	inputDownstreamDoneChannel <-chan bool,
+	outputDownstreamDoneChannel chan<- bool,
+) {
+	HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
+
+	if !inrecAndContext.EndOfStream {
+		inrec := inrecAndContext.Record
+
+		for _, fieldName := range tr.fieldNames {
+			oldValue := inrec.Get(fieldName)
+			if oldValue == nil {
+				continue
+			}
+
+			newValue := bifs.BIF_sub(oldValue, tr.oldText, tr.newText)
+
+			inrec.PutReference(fieldName, newValue)
+		}
+
+		outputRecordsAndContexts.PushBack(inrecAndContext)
+	} else {
+		outputRecordsAndContexts.PushBack(inrecAndContext) // emit end-of-stream marker
+	}
+}
diff --git a/man/manpage.txt b/man/manpage.txt
index 1d5912853..7372e3768 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -173,12 +173,13 @@ MILLER(1)                                                            MILLER(1)
 1mVERB LIST0m
        altkv bar bootstrap case cat check clean-whitespace count-distinct count
        count-similar cut decimate fill-down fill-empty filter flatten format-values
-       fraction gap grep group-by group-like having-fields head histogram json-parse
-       json-stringify join label latin1-to-utf8 least-frequent merge-fields
-       most-frequent nest nothing put regularize remove-empty-columns rename reorder
-       repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records
-       sort sort-within-records split stats1 stats2 step summary tac tail tee
-       template top utf8-to-latin1 unflatten uniq unspace unsparsify
+       fraction gap grep group-by group-like gsub having-fields head histogram
+       json-parse json-stringify join label latin1-to-utf8 least-frequent
+       merge-fields most-frequent nest nothing put regularize remove-empty-columns
+       rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
+       skip-trivial-records sort sort-within-records split ssub stats1 stats2 step
+       sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace
+       unsparsify
 
 1mFUNCTION LIST0m
        abs acos acosh any append apply arrayify asin asinh asserting_absent
@@ -1224,6 +1225,15 @@ MILLER(1)                                                            MILLER(1)
        Options:
        -h|--help Show this message.
 
+   1mgsub0m
+       Usage: mlr gsub [options]
+       Replaces old string with new string in specified field(s), with regex support
+       for the old string and handling multiple matches, like the `gsub` DSL function.
+       See also the `sub` and `ssub` verbs.
+       Options:
+       -f {a,b,c}  Field names to convert.
+       -h|--help   Show this message.
+
    1mhaving-fields0m
        Usage: mlr having-fields [options]
        Conditionally passes through records depending on each record's field names.
@@ -1832,6 +1842,14 @@ MILLER(1)                                                            MILLER(1)
 
        See also the "tee" DSL function which lets you do more ad-hoc customization.
 
+   1mssub0m
+       Usage: mlr ssub [options]
+       Replaces old string with new string in specified field(s), without regex support for
+       the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.
+       Options:
+       -f {a,b,c}  Field names to convert.
+       -h|--help   Show this message.
+
    1mstats10m
        Usage: mlr stats1 [options]
        Computes univariate statistics for one or more given fields, accumulated across
@@ -1969,6 +1987,15 @@ MILLER(1)                                                            MILLER(1)
        https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average
        for more information on EWMA.
 
+   1msub0m
+       Usage: mlr sub [options]
+       Replaces old string with new string in specified field(s), with regex support
+       for the old string and not handling multiple matches, like the `sub` DSL function.
+       See also the `gsub` and `ssub` verbs.
+       Options:
+       -f {a,b,c}  Field names to convert.
+       -h|--help   Show this message.
+
    1msummary0m
        Usage: mlr summary [options]
        Show summary statistics about the input data.
diff --git a/man/mlr.1 b/man/mlr.1
index 583b5dc11..4e1dc9ca3 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -214,12 +214,13 @@ for all things with "map" in their names.
 .nf
 altkv bar bootstrap case cat check clean-whitespace count-distinct count
 count-similar cut decimate fill-down fill-empty filter flatten format-values
-fraction gap grep group-by group-like having-fields head histogram json-parse
-json-stringify join label latin1-to-utf8 least-frequent merge-fields
-most-frequent nest nothing put regularize remove-empty-columns rename reorder
-repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records
-sort sort-within-records split stats1 stats2 step summary tac tail tee
-template top utf8-to-latin1 unflatten uniq unspace unsparsify
+fraction gap grep group-by group-like gsub having-fields head histogram
+json-parse json-stringify join label latin1-to-utf8 least-frequent
+merge-fields most-frequent nest nothing put regularize remove-empty-columns
+rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
+skip-trivial-records sort sort-within-records split ssub stats1 stats2 step
+sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace
+unsparsify
 .fi
 .if n \{\
 .RE
@@ -1529,6 +1530,21 @@ Options:
 .fi
 .if n \{\
 .RE
+.SS "gsub"
+.if n \{\
+.RS 0
+.\}
+.nf
+Usage: mlr gsub [options]
+Replaces old string with new string in specified field(s), with regex support
+for the old string and handling multiple matches, like the `gsub` DSL function.
+See also the `sub` and `ssub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+.fi
+.if n \{\
+.RE
 .SS "having-fields"
 .if n \{\
 .RS 0
@@ -2311,6 +2327,20 @@ See also the "tee" DSL function which lets you do more ad-hoc customization.
 .fi
 .if n \{\
 .RE
+.SS "ssub"
+.if n \{\
+.RS 0
+.\}
+.nf
+Usage: mlr ssub [options]
+Replaces old string with new string in specified field(s), without regex support for
+the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+.fi
+.if n \{\
+.RE
 .SS "stats1"
 .if n \{\
 .RS 0
@@ -2466,6 +2496,21 @@ for more information on EWMA.
 .fi
 .if n \{\
 .RE
+.SS "sub"
+.if n \{\
+.RS 0
+.\}
+.nf
+Usage: mlr sub [options]
+Replaces old string with new string in specified field(s), with regex support
+for the old string and not handling multiple matches, like the `sub` DSL function.
+See also the `gsub` and `ssub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+.fi
+.if n \{\
+.RE
 .SS "summary"
 .if n \{\
 .RS 0
diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout
index d6f70fe41..55efea8ac 100644
--- a/test/cases/cli-help/0001/expout
+++ b/test/cases/cli-help/0001/expout
@@ -379,6 +379,16 @@ Outputs records in batches having identical field names.
 Options:
 -h|--help Show this message.
 
+================================================================
+gsub
+Usage: mlr gsub [options]
+Replaces old string with new string in specified field(s), with regex support
+for the old string and handling multiple matches, like the `gsub` DSL function.
+See also the `sub` and `ssub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+
 ================================================================
 having-fields
 Usage: mlr having-fields [options]
@@ -1016,6 +1026,15 @@ then there will be split_yellow_triangle.csv, split_yellow_square.csv, etc.
 
 See also the "tee" DSL function which lets you do more ad-hoc customization.
 
+================================================================
+ssub
+Usage: mlr ssub [options]
+Replaces old string with new string in specified field(s), without regex support for
+the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+
 ================================================================
 stats1
 Usage: mlr stats1 [options]
@@ -1156,6 +1175,16 @@ Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#filter o
 https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average
 for more information on EWMA.
 
+================================================================
+sub
+Usage: mlr sub [options]
+Replaces old string with new string in specified field(s), with regex support
+for the old string and not handling multiple matches, like the `sub` DSL function.
+See also the `gsub` and `ssub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+
 ================================================================
 summary
 Usage: mlr summary [options]
diff --git a/test/cases/verb-case/x b/test/cases/verb-case/x
deleted file mode 100644
index a24cc18bd..000000000
--- a/test/cases/verb-case/x
+++ /dev/null
@@ -1,13 +0,0 @@
-mkdir 0020; echo mlr --from test/input.cases-csv --c2j case -u > 0020/cmd
-mkdir 0021; echo mlr --from test/input.cases-csv --c2j case -l > 0021/cmd
-mkdir 0022; echo mlr --from test/input.cases-csv --c2j case -s > 0022/cmd
-mkdir 0023; echo mlr --from test/input.cases-csv --c2j case -t > 0023/cmd
-mkdir 0024; echo mlr --from test/input.cases-csv --c2j case -k -u > 0024/cmd
-mkdir 0025; echo mlr --from test/input.cases-csv --c2j case -k -l > 0025/cmd
-mkdir 0026; echo mlr --from test/input.cases-csv --c2j case -k -s > 0026/cmd
-mkdir 0027; echo mlr --from test/input.cases-csv --c2j case -k -t > 0027/cmd
-mkdir 0028; echo mlr --from test/input.cases-csv --c2j case -v -u > 0028/cmd
-mkdir 0029; echo mlr --from test/input.cases-csv --c2j case -v -l > 0029/cmd
-mkdir 0030; echo mlr --from test/input.cases-csv --c2j case -v -s > 0030/cmd
-mkdir 0031; echo mlr --from test/input.cases-csv --c2j case -v -t > 0031/cmd
-mkdir 0032; echo mlr --from test/input.cases-csv --c2j case -u apple,ball then case -l cat,dog > 0032/cmd
diff --git a/test/cases/verb-sub-gsub-ssub/0001/cmd b/test/cases/verb-sub-gsub-ssub/0001/cmd
new file mode 100644
index 000000000..7d4cec775
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0001/cmd
@@ -0,0 +1 @@
+mlr --d2p --from test/input/abixy sub  -f a,b e X
diff --git a/test/cases/verb-sub-gsub-ssub/0001/experr b/test/cases/verb-sub-gsub-ssub/0001/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-sub-gsub-ssub/0001/expout b/test/cases/verb-sub-gsub-ssub/0001/expout
new file mode 100644
index 000000000..917c3f5ed
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0001/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+pan pan 1  0.34679014 0.72680286
+Xks pan 2  0.75867996 0.52215111
+wyX wyX 3  0.20460331 0.33831853
+Xks wyX 4  0.38139939 0.13418874
+wyX pan 5  0.57328892 0.86362447
+zXe pan 6  0.52712616 0.49322129
+Xks zXe 7  0.61178406 0.18788492
+zXe wyX 8  0.59855401 0.97618139
+hat wyX 9  0.03144188 0.74955076
+pan wyX 10 0.50262601 0.95261836
diff --git a/test/cases/verb-sub-gsub-ssub/0002/cmd b/test/cases/verb-sub-gsub-ssub/0002/cmd
new file mode 100644
index 000000000..f33200891
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0002/cmd
@@ -0,0 +1 @@
+mlr --d2p --from test/input/abixy gsub -f a,b e X
diff --git a/test/cases/verb-sub-gsub-ssub/0002/experr b/test/cases/verb-sub-gsub-ssub/0002/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-sub-gsub-ssub/0002/expout b/test/cases/verb-sub-gsub-ssub/0002/expout
new file mode 100644
index 000000000..49d53727b
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0002/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+pan pan 1  0.34679014 0.72680286
+Xks pan 2  0.75867996 0.52215111
+wyX wyX 3  0.20460331 0.33831853
+Xks wyX 4  0.38139939 0.13418874
+wyX pan 5  0.57328892 0.86362447
+zXX pan 6  0.52712616 0.49322129
+Xks zXX 7  0.61178406 0.18788492
+zXX wyX 8  0.59855401 0.97618139
+hat wyX 9  0.03144188 0.74955076
+pan wyX 10 0.50262601 0.95261836
diff --git a/test/cases/verb-sub-gsub-ssub/0003/cmd b/test/cases/verb-sub-gsub-ssub/0003/cmd
new file mode 100644
index 000000000..ff6b15c4a
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0003/cmd
@@ -0,0 +1 @@
+mlr --d2p --from test/input/abixy sub  -f a,b . X
diff --git a/test/cases/verb-sub-gsub-ssub/0003/experr b/test/cases/verb-sub-gsub-ssub/0003/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-sub-gsub-ssub/0003/expout b/test/cases/verb-sub-gsub-ssub/0003/expout
new file mode 100644
index 000000000..a8b8e8643
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0003/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+Xan Xan 1  0.34679014 0.72680286
+Xks Xan 2  0.75867996 0.52215111
+Xye Xye 3  0.20460331 0.33831853
+Xks Xye 4  0.38139939 0.13418874
+Xye Xan 5  0.57328892 0.86362447
+Xee Xan 6  0.52712616 0.49322129
+Xks Xee 7  0.61178406 0.18788492
+Xee Xye 8  0.59855401 0.97618139
+Xat Xye 9  0.03144188 0.74955076
+Xan Xye 10 0.50262601 0.95261836
diff --git a/test/cases/verb-sub-gsub-ssub/0004/cmd b/test/cases/verb-sub-gsub-ssub/0004/cmd
new file mode 100644
index 000000000..8770d578d
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0004/cmd
@@ -0,0 +1 @@
+mlr --d2p --from test/input/abixy ssub -f a,b e X
diff --git a/test/cases/verb-sub-gsub-ssub/0004/experr b/test/cases/verb-sub-gsub-ssub/0004/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-sub-gsub-ssub/0004/expout b/test/cases/verb-sub-gsub-ssub/0004/expout
new file mode 100644
index 000000000..917c3f5ed
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0004/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+pan pan 1  0.34679014 0.72680286
+Xks pan 2  0.75867996 0.52215111
+wyX wyX 3  0.20460331 0.33831853
+Xks wyX 4  0.38139939 0.13418874
+wyX pan 5  0.57328892 0.86362447
+zXe pan 6  0.52712616 0.49322129
+Xks zXe 7  0.61178406 0.18788492
+zXe wyX 8  0.59855401 0.97618139
+hat wyX 9  0.03144188 0.74955076
+pan wyX 10 0.50262601 0.95261836

From 9d1d2e07ca1314610c17db708a2b9cd4bf759bce Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 19 Aug 2023 17:40:35 -0400
Subject: [PATCH 029/456] Do wildcard globbing on Windows (#1362)

* Glob wildcards on Windows

* test/cases/globbing/0001
---
 internal/pkg/platform/getargs_windows.go | 16 +++++++++++++++-
 test/cases/globbing/0001/a.csv           |  2 ++
 test/cases/globbing/0001/b.csv           |  2 ++
 test/cases/globbing/0001/cmd             |  1 +
 test/cases/globbing/0001/experr          |  0
 test/cases/globbing/0001/expout          |  3 +++
 6 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 test/cases/globbing/0001/a.csv
 create mode 100644 test/cases/globbing/0001/b.csv
 create mode 100644 test/cases/globbing/0001/cmd
 create mode 100644 test/cases/globbing/0001/experr
 create mode 100644 test/cases/globbing/0001/expout

diff --git a/internal/pkg/platform/getargs_windows.go b/internal/pkg/platform/getargs_windows.go
index 536a6288e..4349e4346 100644
--- a/internal/pkg/platform/getargs_windows.go
+++ b/internal/pkg/platform/getargs_windows.go
@@ -11,6 +11,7 @@ package platform
 import (
 	"fmt"
 	"os"
+	"path/filepath"
 	"strings"
 
 	shellquote "github.com/kballard/go-shellquote"
@@ -76,7 +77,20 @@ func GetArgs() []string {
 		}
 	}
 	//printArgs(retargs, "NEW")
-	return retargs
+
+	globbed := make([]string, 0)
+	for i, _ := range retargs {
+		// Expand things like *.csv
+		matches, err := filepath.Glob(retargs[i])
+		if matches != nil && err == nil {
+			globbed = append(globbed, matches...)
+		} else {
+			globbed = append(globbed, retargs[i])
+		}
+	}
+	//printArgs(globbed, "NEW")
+
+	return globbed
 }
 
 // ----------------------------------------------------------------
diff --git a/test/cases/globbing/0001/a.csv b/test/cases/globbing/0001/a.csv
new file mode 100644
index 000000000..bfde6bfa0
--- /dev/null
+++ b/test/cases/globbing/0001/a.csv
@@ -0,0 +1,2 @@
+a,b,c
+1,2,3
diff --git a/test/cases/globbing/0001/b.csv b/test/cases/globbing/0001/b.csv
new file mode 100644
index 000000000..a9411aa9d
--- /dev/null
+++ b/test/cases/globbing/0001/b.csv
@@ -0,0 +1,2 @@
+a,b,c
+4,5,6
diff --git a/test/cases/globbing/0001/cmd b/test/cases/globbing/0001/cmd
new file mode 100644
index 000000000..a5eecc577
--- /dev/null
+++ b/test/cases/globbing/0001/cmd
@@ -0,0 +1 @@
+mlr --c2p cat ${CASEDIR}/*.csv
diff --git a/test/cases/globbing/0001/experr b/test/cases/globbing/0001/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/globbing/0001/expout b/test/cases/globbing/0001/expout
new file mode 100644
index 000000000..d0c04ad13
--- /dev/null
+++ b/test/cases/globbing/0001/expout
@@ -0,0 +1,3 @@
+a b c
+1 2 3
+4 5 6

From 2107d520fa850fdc5cc45406c7ccd4cd8f62d09f Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sun, 20 Aug 2023 12:20:15 -0400
Subject: [PATCH 030/456] Can't use ${field_name} if it contains UTF-8
 characters also encodeable as Latin-1 (#1363)

* unit-test data

* docgen

* windows unit-test accommodations
---
 docs/src/manpage.md                           |  2 +-
 docs/src/manpage.txt                          |  2 +-
 internal/pkg/parsing/lexer/lexer.go           |  7 ++++---
 internal/pkg/parsing/lexer/transitiontable.go | 12 ++++++++++++
 internal/pkg/parsing/mlr.bnf                  |  1 +
 man/manpage.txt                               |  2 +-
 man/mlr.1                                     |  4 ++--
 test/cases/dsl-utf8-field-names/0001/cmd      |  1 +
 test/cases/dsl-utf8-field-names/0001/experr   |  0
 test/cases/dsl-utf8-field-names/0001/expout   |  3 +++
 test/cases/dsl-utf8-field-names/0001/mlr      |  1 +
 test/cases/dsl-utf8-field-names/0002/cmd      |  1 +
 test/cases/dsl-utf8-field-names/0002/experr   |  0
 test/cases/dsl-utf8-field-names/0002/expout   |  3 +++
 test/cases/dsl-utf8-field-names/0002/mlr      |  1 +
 test/cases/dsl-utf8-field-names/0003/cmd      |  1 +
 test/cases/dsl-utf8-field-names/0003/experr   |  0
 test/cases/dsl-utf8-field-names/0003/expout   |  3 +++
 test/cases/dsl-utf8-field-names/0003/mlr      |  1 +
 test/cases/dsl-utf8-field-names/0004/cmd      |  1 +
 test/cases/dsl-utf8-field-names/0004/experr   |  0
 test/cases/dsl-utf8-field-names/0004/expout   |  3 +++
 test/cases/dsl-utf8-field-names/0004/mlr      |  1 +
 test/input/datos-plurilingรผes.csv             |  4 ++++
 24 files changed, 46 insertions(+), 8 deletions(-)
 create mode 100644 test/cases/dsl-utf8-field-names/0001/cmd
 create mode 100644 test/cases/dsl-utf8-field-names/0001/experr
 create mode 100644 test/cases/dsl-utf8-field-names/0001/expout
 create mode 100644 test/cases/dsl-utf8-field-names/0001/mlr
 create mode 100644 test/cases/dsl-utf8-field-names/0002/cmd
 create mode 100644 test/cases/dsl-utf8-field-names/0002/experr
 create mode 100644 test/cases/dsl-utf8-field-names/0002/expout
 create mode 100644 test/cases/dsl-utf8-field-names/0002/mlr
 create mode 100644 test/cases/dsl-utf8-field-names/0003/cmd
 create mode 100644 test/cases/dsl-utf8-field-names/0003/experr
 create mode 100644 test/cases/dsl-utf8-field-names/0003/expout
 create mode 100644 test/cases/dsl-utf8-field-names/0003/mlr
 create mode 100644 test/cases/dsl-utf8-field-names/0004/cmd
 create mode 100644 test/cases/dsl-utf8-field-names/0004/experr
 create mode 100644 test/cases/dsl-utf8-field-names/0004/expout
 create mode 100644 test/cases/dsl-utf8-field-names/0004/mlr
 create mode 100644 test/input/datos-plurilingรผes.csv

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 1a9ebea12..d80193433 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -3470,5 +3470,5 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-08-19                         MILLER(1)
+                                  2023-08-20                         MILLER(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 7372e3768..0c04fc330 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -3449,4 +3449,4 @@ MILLER(1) MILLER(1) - 2023-08-19 MILLER(1) + 2023-08-20 MILLER(1) diff --git a/internal/pkg/parsing/lexer/lexer.go b/internal/pkg/parsing/lexer/lexer.go index fa32f4989..18fd8ea7a 100644 --- a/internal/pkg/parsing/lexer/lexer.go +++ b/internal/pkg/parsing/lexer/lexer.go @@ -12,7 +12,7 @@ import ( const ( NoState = -1 NumStates = 336 - NumSymbols = 652 + NumSymbols = 653 ) type Lexer struct { @@ -779,6 +779,7 @@ Lexer symbols: 647: 'A'-'Z' 648: 'a'-'z' 649: '0'-'9' -650: \u0100-\U0010ffff -651: . +650: \u00a0-\u00ff +651: \u0100-\U0010ffff +652: . */ diff --git a/internal/pkg/parsing/lexer/transitiontable.go b/internal/pkg/parsing/lexer/transitiontable.go index ed8c21d83..447900840 100644 --- a/internal/pkg/parsing/lexer/transitiontable.go +++ b/internal/pkg/parsing/lexer/transitiontable.go @@ -1500,6 +1500,8 @@ var TransTab = TransitionTable{ return 159 case r == 126: // ['~','~'] return 159 + case 160 <= r && r <= 255: // [\u00a0,\u00ff] + return 159 case 256 <= r && r <= 1114111: // [\u0100,\U0010ffff] return 159 } @@ -1840,6 +1842,8 @@ var TransTab = TransitionTable{ return 184 case r == 126: // ['~','~'] return 184 + case 160 <= r && r <= 255: // [\u00a0,\u00ff] + return 184 case 256 <= r && r <= 1114111: // [\u0100,\U0010ffff] return 184 } @@ -3144,6 +3148,8 @@ var TransTab = TransitionTable{ return 239 case r == 126: // ['~','~'] return 159 + case 160 <= r && r <= 255: // [\u00a0,\u00ff] + return 159 case 256 <= r && r <= 1114111: // [\u0100,\U0010ffff] return 159 } @@ -3444,6 +3450,8 @@ var TransTab = TransitionTable{ return 254 case r == 126: // ['~','~'] return 184 + case 160 <= r && r <= 255: // [\u00a0,\u00ff] + return 184 case 256 <= r && r <= 1114111: // [\u0100,\U0010ffff] return 184 } @@ -4604,6 +4612,8 @@ var TransTab = TransitionTable{ return 239 case r == 126: // ['~','~'] return 159 + case 160 <= r && r <= 255: // [\u00a0,\u00ff] + return 159 case 256 <= r && r <= 1114111: // [\u0100,\U0010ffff] return 159 } @@ -4792,6 +4802,8 @@ var TransTab = TransitionTable{ return 254 case r == 126: // ['~','~'] return 184 + case 160 <= r && r <= 255: // [\u00a0,\u00ff] + return 184 case 256 <= r && r <= 1114111: // [\u0100,\U0010ffff] return 184 } diff --git a/internal/pkg/parsing/mlr.bnf b/internal/pkg/parsing/mlr.bnf index a14ed4475..6f987c827 100644 --- a/internal/pkg/parsing/mlr.bnf +++ b/internal/pkg/parsing/mlr.bnf @@ -271,6 +271,7 @@ _braced_char | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '[' | ']' | '^' | '_' | '`' | '|' | '~' | ( '\\' '{' ) | ( '\\' '}' ) + | '\u00a0'-'\u00ff' | '\u0100'-'\U0010FFFF' ; braced_field_name: '$' '{' _braced_char { _braced_char } '}' ; diff --git a/man/manpage.txt b/man/manpage.txt index 7372e3768..0c04fc330 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -3449,4 +3449,4 @@ MILLER(1) MILLER(1) - 2023-08-19 MILLER(1) + 2023-08-20 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index 4e1dc9ca3..ab56c69bb 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2023-08-19 +.\" Date: 2023-08-20 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2023-08-19" "\ \&" "\ \&" +.TH "MILLER" "1" "2023-08-20" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/test/cases/dsl-utf8-field-names/0001/cmd b/test/cases/dsl-utf8-field-names/0001/cmd new file mode 100644 index 000000000..c05a5b774 --- /dev/null +++ b/test/cases/dsl-utf8-field-names/0001/cmd @@ -0,0 +1 @@ +mlr --c2p filter -f ${CASEDIR}/mlr test/input/datos-plurilingรผes.csv diff --git a/test/cases/dsl-utf8-field-names/0001/experr b/test/cases/dsl-utf8-field-names/0001/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-utf8-field-names/0001/expout b/test/cases/dsl-utf8-field-names/0001/expout new file mode 100644 index 000000000..79c7e5186 --- /dev/null +++ b/test/cases/dsl-utf8-field-names/0001/expout @@ -0,0 +1,3 @@ +aรฑo ฯ€ฮฟฯƒฯŒฯ„ฮทฯ„ฮฑ +2021 130 +2022 145 diff --git a/test/cases/dsl-utf8-field-names/0001/mlr b/test/cases/dsl-utf8-field-names/0001/mlr new file mode 100644 index 000000000..df6b0abb3 --- /dev/null +++ b/test/cases/dsl-utf8-field-names/0001/mlr @@ -0,0 +1 @@ +$aรฑo > 2020 diff --git a/test/cases/dsl-utf8-field-names/0002/cmd b/test/cases/dsl-utf8-field-names/0002/cmd new file mode 100644 index 000000000..c05a5b774 --- /dev/null +++ b/test/cases/dsl-utf8-field-names/0002/cmd @@ -0,0 +1 @@ +mlr --c2p filter -f ${CASEDIR}/mlr test/input/datos-plurilingรผes.csv diff --git a/test/cases/dsl-utf8-field-names/0002/experr b/test/cases/dsl-utf8-field-names/0002/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-utf8-field-names/0002/expout b/test/cases/dsl-utf8-field-names/0002/expout new file mode 100644 index 000000000..79c7e5186 --- /dev/null +++ b/test/cases/dsl-utf8-field-names/0002/expout @@ -0,0 +1,3 @@ +aรฑo ฯ€ฮฟฯƒฯŒฯ„ฮทฯ„ฮฑ +2021 130 +2022 145 diff --git a/test/cases/dsl-utf8-field-names/0002/mlr b/test/cases/dsl-utf8-field-names/0002/mlr new file mode 100644 index 000000000..2d8badb71 --- /dev/null +++ b/test/cases/dsl-utf8-field-names/0002/mlr @@ -0,0 +1 @@ +${aรฑo} > 2020 diff --git a/test/cases/dsl-utf8-field-names/0003/cmd b/test/cases/dsl-utf8-field-names/0003/cmd new file mode 100644 index 000000000..c05a5b774 --- /dev/null +++ b/test/cases/dsl-utf8-field-names/0003/cmd @@ -0,0 +1 @@ +mlr --c2p filter -f ${CASEDIR}/mlr test/input/datos-plurilingรผes.csv diff --git a/test/cases/dsl-utf8-field-names/0003/experr b/test/cases/dsl-utf8-field-names/0003/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-utf8-field-names/0003/expout b/test/cases/dsl-utf8-field-names/0003/expout new file mode 100644 index 000000000..79c7e5186 --- /dev/null +++ b/test/cases/dsl-utf8-field-names/0003/expout @@ -0,0 +1,3 @@ +aรฑo ฯ€ฮฟฯƒฯŒฯ„ฮทฯ„ฮฑ +2021 130 +2022 145 diff --git a/test/cases/dsl-utf8-field-names/0003/mlr b/test/cases/dsl-utf8-field-names/0003/mlr new file mode 100644 index 000000000..38bb2d731 --- /dev/null +++ b/test/cases/dsl-utf8-field-names/0003/mlr @@ -0,0 +1 @@ +$ฯ€ฮฟฯƒฯŒฯ„ฮทฯ„ฮฑ > 100 diff --git a/test/cases/dsl-utf8-field-names/0004/cmd b/test/cases/dsl-utf8-field-names/0004/cmd new file mode 100644 index 000000000..c05a5b774 --- /dev/null +++ b/test/cases/dsl-utf8-field-names/0004/cmd @@ -0,0 +1 @@ +mlr --c2p filter -f ${CASEDIR}/mlr test/input/datos-plurilingรผes.csv diff --git a/test/cases/dsl-utf8-field-names/0004/experr b/test/cases/dsl-utf8-field-names/0004/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-utf8-field-names/0004/expout b/test/cases/dsl-utf8-field-names/0004/expout new file mode 100644 index 000000000..79c7e5186 --- /dev/null +++ b/test/cases/dsl-utf8-field-names/0004/expout @@ -0,0 +1,3 @@ +aรฑo ฯ€ฮฟฯƒฯŒฯ„ฮทฯ„ฮฑ +2021 130 +2022 145 diff --git a/test/cases/dsl-utf8-field-names/0004/mlr b/test/cases/dsl-utf8-field-names/0004/mlr new file mode 100644 index 000000000..c2d122478 --- /dev/null +++ b/test/cases/dsl-utf8-field-names/0004/mlr @@ -0,0 +1 @@ +${ฯ€ฮฟฯƒฯŒฯ„ฮทฯ„ฮฑ} > 100 diff --git a/test/input/datos-plurilingรผes.csv b/test/input/datos-plurilingรผes.csv new file mode 100644 index 000000000..620d3566d --- /dev/null +++ b/test/input/datos-plurilingรผes.csv @@ -0,0 +1,4 @@ +aรฑo,ฯ€ฮฟฯƒฯŒฯ„ฮทฯ„ฮฑ +2020,100 +2021,130 +2022,145 \ No newline at end of file From aed6de2adb93b68a685403bd2367fcfdbc70b897 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 21 Aug 2023 15:33:33 -0400 Subject: [PATCH 031/456] fix some broken links in README-dev.md --- README-dev.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README-dev.md b/README-dev.md index 715dee06e..6c3b5ca8c 100644 --- a/README-dev.md +++ b/README-dev.md @@ -179,8 +179,8 @@ See also [./README-profiling.md](./README-profiling.md) and [https://miller.read In summary: -* #765, #774, and #787 were low-hanging fruit. -* #424 was a bit more involved, and reveals that memory allocation -- not just GC -- needs to be handled more mindfully in Go than in C. -* #779 was a bit more involved, and reveals that Go's elegant goroutine/channel processing model comes with the caveat that channelized data should not be organized in many, small pieces. -* #809 was also bit more involved, and reveals that library functions are convenient, but profiling and analysis can sometimes reveal an opportunity for an impact, custom solution. -* #786 was a massive refactor involving about 10KLOC -- in hindsight it would have been best to do this work at the start of the Go port, not at the end. +* [#765](https://github.com/johnkerl/miller/pull/765), [#774](https://github.com/johnkerl/miller/pull/774), and [#787](https://github.com/johnkerl/miller/pull/787) were low-hanging fruit. +* [#424](https://github.com/johnkerl/miller/pull/424) was a bit more involved, and reveals that memory allocation -- not just GC -- needs to be handled more mindfully in Go than in C. +* [#779](https://github.com/johnkerl/miller/pull/779) was a bit more involved, and reveals that Go's elegant goroutine/channel processing model comes with the caveat that channelized data should not be organized in many, small pieces. +* [#809](https://github.com/johnkerl/miller/pull/809) was also bit more involved, and reveals that library functions are convenient, but profiling and analysis can sometimes reveal an opportunity for an impact, custom solution. +* [#786](https://github.com/johnkerl/miller/pull/786) was a massive refactor involving about 10KLOC -- in hindsight it would have been best to do this work at the start of the Go port, not at the end. From 9ad9e213da354165124276f56ed49fb24a633648 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Wed, 23 Aug 2023 09:55:57 -0400 Subject: [PATCH 032/456] fix codespell ci --- .github/workflows/codespell.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index f5d68e119..a90e79299 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -34,3 +34,12 @@ jobs: check_filenames: true ignore_words_file: .codespellignore skip: "*.csv,*.dkvp,*.txt,*.js,*.html,*.map,*.z,./tags,./test/cases,./docs/src/shapes-of-data.md.in,./docs/src/shapes-of-data.md,test/input/latin1.xtab" + # As of August 2023 or so, Codespell started exiting with status 1 just _examining_ the + # latin1.xtab file which is (intentionally) not UTF-8. Before, it said + # + # Warning: WARNING: Cannot decode file using encoding "utf-8": ./test/input/latin1.xtab + # WARNING: Trying next encoding "iso-8859-1" + # + # but would exit 0. After, it started exiting with a 1. This is annoying as it makes + # every PR red in CI. So we have to use warning mode now. + only_warn: 1 From 12f3b14ce6f42ab22e8acfea9f39e2fe86e7137d Mon Sep 17 00:00:00 2001 From: Eng Zer Jun Date: Wed, 23 Aug 2023 22:18:22 +0800 Subject: [PATCH 033/456] Remove redundant nil check (#1367) From the Go docs [1]: "1. For a nil slice, the number of iterations is 0." "3. If the map is nil, the number of iterations is 0." Therefore, an additional nil check for before the loop is unnecessary. [1]: https://go.dev/ref/spec#For_range Signed-off-by: Eng Zer Jun --- internal/pkg/cli/flag_types.go | 16 ++--- internal/pkg/dsl/ast_print.go | 6 +- internal/pkg/dsl/cst/for.go | 10 ++-- internal/pkg/dsl/cst/validate.go | 68 ++++++++++----------- internal/pkg/dsl/cst/warn.go | 100 +++++++++++++++---------------- 5 files changed, 92 insertions(+), 108 deletions(-) diff --git a/internal/pkg/cli/flag_types.go b/internal/pkg/cli/flag_types.go index e8ffabe0c..bcb86e290 100644 --- a/internal/pkg/cli/flag_types.go +++ b/internal/pkg/cli/flag_types.go @@ -398,11 +398,9 @@ func (flag *Flag) Owns(input string) bool { if flag.name == input { return true } - if flag.altNames != nil { - for _, name := range flag.altNames { - if name == input { - return true - } + for _, name := range flag.altNames { + if name == input { + return true } } return false @@ -414,11 +412,9 @@ func (flag *Flag) Matches(input string) bool { if strings.Contains(flag.name, input) { return true } - if flag.altNames != nil { - for _, name := range flag.altNames { - if strings.Contains(name, input) { - return true - } + for _, name := range flag.altNames { + if strings.Contains(name, input) { + return true } } return false diff --git a/internal/pkg/dsl/ast_print.go b/internal/pkg/dsl/ast_print.go index 55dfcc78b..4210e5b17 100644 --- a/internal/pkg/dsl/ast_print.go +++ b/internal/pkg/dsl/ast_print.go @@ -75,10 +75,8 @@ func (node *ASTNode) printAux(depth int) { fmt.Println() // Children, indented one level further - if node.Children != nil { - for _, child := range node.Children { - child.printAux(depth + 1) - } + for _, child := range node.Children { + child.printAux(depth + 1) } } diff --git a/internal/pkg/dsl/cst/for.go b/internal/pkg/dsl/cst/for.go index 162c86e91..3e9b7e309 100644 --- a/internal/pkg/dsl/cst/for.go +++ b/internal/pkg/dsl/cst/for.go @@ -887,12 +887,10 @@ func (node *TripleForLoopNode) Execute(state *runtime.State) (*BlockExitPayload, } for { - if node.precontinuationAssignments != nil { - for _, precontinuationAssignment := range node.precontinuationAssignments { - _, err := precontinuationAssignment.Execute(state) - if err != nil { - return nil, err - } + for _, precontinuationAssignment := range node.precontinuationAssignments { + _, err := precontinuationAssignment.Execute(state) + if err != nil { + return nil, err } } if node.continuationExpressionNode != nil { // empty is true diff --git a/internal/pkg/dsl/cst/validate.go b/internal/pkg/dsl/cst/validate.go index 264c7fe94..e5e4746ef 100644 --- a/internal/pkg/dsl/cst/validate.go +++ b/internal/pkg/dsl/cst/validate.go @@ -35,23 +35,21 @@ func ValidateAST( } } - if ast.RootNode.Children != nil { - for _, astChild := range ast.RootNode.Children { - err := validateASTAux( - astChild, - dslInstanceType, - atTopLevel, - inLoop, - inBeginOrEnd, - inUDF, - inUDS, - isMainBlockLastStatement, - isAssignmentLHS, - isUnset, - ) - if err != nil { - return err - } + for _, astChild := range ast.RootNode.Children { + err := validateASTAux( + astChild, + dslInstanceType, + atTopLevel, + inLoop, + inBeginOrEnd, + inUDF, + inUDS, + isMainBlockLastStatement, + isAssignmentLHS, + isUnset, + ) + if err != nil { + return err } } @@ -219,25 +217,23 @@ func validateASTAux( // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Treewalk - if astNode.Children != nil { - for i, astChild := range astNode.Children { - nextLevelIsAssignmentLHS = astNode.Type == dsl.NodeTypeAssignment && i == 0 - nextLevelIsUnset = astNode.Type == dsl.NodeTypeUnset - err := validateASTAux( - astChild, - dslInstanceType, - nextLevelAtTopLevel, - nextLevelInLoop, - nextLevelInBeginOrEnd, - nextLevelInUDF, - nextLevelInUDS, - isMainBlockLastStatement, - nextLevelIsAssignmentLHS, - nextLevelIsUnset, - ) - if err != nil { - return err - } + for i, astChild := range astNode.Children { + nextLevelIsAssignmentLHS = astNode.Type == dsl.NodeTypeAssignment && i == 0 + nextLevelIsUnset = astNode.Type == dsl.NodeTypeUnset + err := validateASTAux( + astChild, + dslInstanceType, + nextLevelAtTopLevel, + nextLevelInLoop, + nextLevelInBeginOrEnd, + nextLevelInUDF, + nextLevelInUDS, + isMainBlockLastStatement, + nextLevelIsAssignmentLHS, + nextLevelIsUnset, + ) + if err != nil { + return err } } diff --git a/internal/pkg/dsl/cst/warn.go b/internal/pkg/dsl/cst/warn.go index 65ca8db04..a66874ee9 100644 --- a/internal/pkg/dsl/cst/warn.go +++ b/internal/pkg/dsl/cst/warn.go @@ -24,16 +24,14 @@ func WarnOnAST( inAssignment := false ok := true - if ast.RootNode.Children != nil { - for _, astChild := range ast.RootNode.Children { - ok1 := warnOnASTAux( - astChild, - variableNamesWrittenTo, - inAssignment, - ) - // Don't end early on first warning; tree-walk to list them all. - ok = ok1 && ok - } + for _, astChild := range ast.RootNode.Children { + ok1 := warnOnASTAux( + astChild, + variableNamesWrittenTo, + inAssignment, + ) + // Don't end early on first warning; tree-walk to list them all. + ok = ok1 && ok } return ok @@ -134,52 +132,50 @@ func warnOnASTAux( // Treewalk to check the rest of the AST below this node. - if astNode.Children != nil { - for i, astChild := range astNode.Children { - childInAssignment := inAssignment + for i, astChild := range astNode.Children { + childInAssignment := inAssignment - if astNode.Type == dsl.NodeTypeAssignment && i == 0 { - // LHS of assignment statements + if astNode.Type == dsl.NodeTypeAssignment && i == 0 { + // LHS of assignment statements + childInAssignment = true + } else if astNode.Type == dsl.NodeTypeForLoopOneVariable && i == 0 { + // The 'k' in 'for (k in $*)' + childInAssignment = true + } else if astNode.Type == dsl.NodeTypeForLoopTwoVariable && (i == 0 || i == 1) { + // The 'k' and 'v' in 'for (k,v in $*)' + childInAssignment = true + } else if astNode.Type == dsl.NodeTypeForLoopMultivariable && (i == 0 || i == 1) { + // The 'k1', 'k2', and 'v' in 'for ((k1,k2),v in $*)' + childInAssignment = true + } else if astNode.Type == dsl.NodeTypeParameterList { + childInAssignment = true + } else if inAssignment && astNode.Type == dsl.NodeTypeArrayOrMapIndexAccess { + // In 'z[i] = 1', the 'i' is a read and the 'z' is a write. + // + // mlr --from r put -v -W 'z[i] = 1' + // DSL EXPRESSION: + // z[i]=1 + // + // AST: + // * statement block + // * assignment "=" + // * array or map index access "[]" + // * local variable "z" + // * local variable "i" + // * int literal "1" + if i == 0 { childInAssignment = true - } else if astNode.Type == dsl.NodeTypeForLoopOneVariable && i == 0 { - // The 'k' in 'for (k in $*)' - childInAssignment = true - } else if astNode.Type == dsl.NodeTypeForLoopTwoVariable && (i == 0 || i == 1) { - // The 'k' and 'v' in 'for (k,v in $*)' - childInAssignment = true - } else if astNode.Type == dsl.NodeTypeForLoopMultivariable && (i == 0 || i == 1) { - // The 'k1', 'k2', and 'v' in 'for ((k1,k2),v in $*)' - childInAssignment = true - } else if astNode.Type == dsl.NodeTypeParameterList { - childInAssignment = true - } else if inAssignment && astNode.Type == dsl.NodeTypeArrayOrMapIndexAccess { - // In 'z[i] = 1', the 'i' is a read and the 'z' is a write. - // - // mlr --from r put -v -W 'z[i] = 1' - // DSL EXPRESSION: - // z[i]=1 - // - // AST: - // * statement block - // * assignment "=" - // * array or map index access "[]" - // * local variable "z" - // * local variable "i" - // * int literal "1" - if i == 0 { - childInAssignment = true - } else { - childInAssignment = false - } + } else { + childInAssignment = false } - ok1 := warnOnASTAux( - astChild, - variableNamesWrittenTo, - childInAssignment, - ) - // Don't end early on first error; tree-walk to list them all. - ok = ok1 && ok } + ok1 := warnOnASTAux( + astChild, + variableNamesWrittenTo, + childInAssignment, + ) + // Don't end early on first error; tree-walk to list them all. + ok = ok1 && ok } return ok From e2338195bafaf46d58ddf956c6af88e4fd53c32e Mon Sep 17 00:00:00 2001 From: "Mr. Lance E Sloan" <17595351+sloanlance@users.noreply.github.com> Date: Wed, 23 Aug 2023 16:08:48 -0400 Subject: [PATCH 034/456] filename options for `split` (iss. #1365) (#1366) * #1365 - filename options for `split` * Don't use joiner string when prefix is empty. * Add option to specify joiner string. * Add option to not URL-escape file names. * #1365 - update documentation * #1365 - don't URL-escape file name prefix I **_thought_** it'd be cool to apply URL-escaping to the file name prefix as well, just in case it included spaces or other characters. I forgot that a common use for the prefix is to specify a directory path that will contain the file. When the slashes ("`/`") of the path are URL-escaped, they become "`%2F`" and the directories will not be created. So, I moved the prefix handling code to come after the URL-escaping. * #1365 - new `split` options for CLI help output * #1365 - fix escape/suffix logic error Trying to make the `return` statement cleaner, I thought it'd be good to add the file name suffix immediately after the file name is URL-escaped. I'd forgotten that the suffix will not be added if the new `-e` option is used to skip URL-escaping. So, I put the suffix back where I had it. * #1365 - add `split` to the "10 minutes" document Not strictly part of this issue, but as I was checking for docs that I should update as a result of my changes, I noticed this document showed how to split data using the `put` and `tee` combination, but not about the `split` verb. * #1365 - updated manpage When I ran `make dev`, generating `data-diving-examples.md` failed. The two `manpage.txt` files ended up empty, but `mlr.1` seems to be correct. --------- Co-authored-by: Mr. Lance E Sloan (sloanlance) --- docs/src/10min.md | 37 +++++++++++++++ docs/src/10min.md.in | 18 ++++++++ docs/src/manpage.txt | 2 + internal/pkg/transformers/split.go | 74 ++++++++++++++++++++---------- man/mlr.1 | 6 ++- test/cases/cli-help/0001/expout | 2 + 6 files changed, 113 insertions(+), 26 deletions(-) diff --git a/docs/src/10min.md b/docs/src/10min.md index 33f7252da..d9e4d2416 100644 --- a/docs/src/10min.md +++ b/docs/src/10min.md @@ -909,3 +909,40 @@ yellow,triangle,true,1,11,43.6498,9.8870 purple,triangle,false,5,51,81.2290,8.5910 purple,triangle,false,7,65,80.1405,5.8240
+ +Alternatively, the `split` verb can do the same thing: + +
+mlr --csv --from example.csv split -g shape
+
+ +
+cat split_circle.csv
+
+
+color,shape,flag,k,index,quantity,rate
+red,circle,true,3,16,13.8103,2.9010
+yellow,circle,true,8,73,63.9785,4.2370
+yellow,circle,true,9,87,63.5058,8.3350
+
+ +
+cat split_square.csv
+
+
+color,shape,flag,k,index,quantity,rate
+red,square,true,2,15,79.2778,0.0130
+red,square,false,4,48,77.5542,7.4670
+red,square,false,6,64,77.1991,9.5310
+purple,square,false,10,91,72.3735,8.2430
+
+ +
+cat split_triangle.csv
+
+
+color,shape,flag,k,index,quantity,rate
+yellow,triangle,true,1,11,43.6498,9.8870
+purple,triangle,false,5,51,81.2290,8.5910
+purple,triangle,false,7,65,80.1405,5.8240
+
diff --git a/docs/src/10min.md.in b/docs/src/10min.md.in index 7a0696c87..0fdc94bf1 100644 --- a/docs/src/10min.md.in +++ b/docs/src/10min.md.in @@ -434,3 +434,21 @@ GENMD-EOF GENMD-RUN-COMMAND cat triangle.csv GENMD-EOF + +Alternatively, the `split` verb can do the same thing: + +GENMD-RUN-COMMAND +mlr --csv --from example.csv split -g shape +GENMD-EOF + +GENMD-RUN-COMMAND +cat split_circle.csv +GENMD-EOF + +GENMD-RUN-COMMAND +cat split_square.csv +GENMD-EOF + +GENMD-RUN-COMMAND +cat split_triangle.csv +GENMD-EOF diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 0c04fc330..48497719b 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1813,6 +1813,8 @@ MILLER(1) MILLER(1) --suffix {s} Specify filename suffix; default is from mlr output format, e.g. "csv". -a Append to existing file(s), if any, rather than overwriting. -v Send records along to downstream verbs as well as splitting to files. + -e Do NOT URL-escape names of output files. + -j {j} Use string J to join filename parts; default "_". -h|--help Show this message. Any of the output-format command-line flags (see mlr -h). For example, using mlr --icsv --from myfile.csv split --ojson -n 1000 diff --git a/internal/pkg/transformers/split.go b/internal/pkg/transformers/split.go index 579aa0970..6e702d4fd 100644 --- a/internal/pkg/transformers/split.go +++ b/internal/pkg/transformers/split.go @@ -1,7 +1,6 @@ package transformers import ( - "bytes" "container/list" "fmt" "net/url" @@ -17,6 +16,7 @@ import ( // ---------------------------------------------------------------- const verbNameSplit = "split" const splitDefaultOutputFileNamePrefix = "split" +const splitDefaultFileNamePartJoiner = "_" var SplitSetup = TransformerSetup{ Verb: verbNameSplit, @@ -39,6 +39,8 @@ Exactly one of -m, -n, or -g must be supplied. --suffix {s} Specify filename suffix; default is from mlr output format, e.g. "csv". -a Append to existing file(s), if any, rather than overwriting. -v Send records along to downstream verbs as well as splitting to files. +-e Do NOT URL-escape names of output files. +-j {j} Use string J to join filename parts; default "`+splitDefaultFileNamePartJoiner+`". -h|--help Show this message. Any of the output-format command-line flags (see mlr -h). For example, using mlr --icsv --from myfile.csv split --ojson -n 1000 @@ -88,6 +90,8 @@ func transformerSplitParseCLI( var doSize bool = false var groupByFieldNames []string = nil var emitDownstream bool = false + var escapeFileNameCharacters bool = true + var fileNamePartJoiner string = splitDefaultFileNamePartJoiner var doAppend bool = false var outputFileNamePrefix string = splitDefaultOutputFileNamePrefix var outputFileNameSuffix string = "uninit" @@ -138,6 +142,12 @@ func transformerSplitParseCLI( } else if opt == "-v" { emitDownstream = true + } else if opt == "-e" { + escapeFileNameCharacters = false + + } else if opt == "-j" { + fileNamePartJoiner = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) + } else { // This is inelegant. For error-proofing we advance argi already in our // loop (so individual if-statements don't need to). However, @@ -180,6 +190,8 @@ func transformerSplitParseCLI( doSize, groupByFieldNames, emitDownstream, + escapeFileNameCharacters, + fileNamePartJoiner, doAppend, outputFileNamePrefix, outputFileNameSuffix, @@ -195,14 +207,16 @@ func transformerSplitParseCLI( // ---------------------------------------------------------------- type TransformerSplit struct { - n int64 - outputFileNamePrefix string - outputFileNameSuffix string - emitDownstream bool - ungroupedCounter int64 - groupByFieldNames []string - recordWriterOptions *cli.TWriterOptions - doAppend bool + n int64 + outputFileNamePrefix string + outputFileNameSuffix string + emitDownstream bool + escapeFileNameCharacters bool + fileNamePartJoiner string + ungroupedCounter int64 + groupByFieldNames []string + recordWriterOptions *cli.TWriterOptions + doAppend bool // For doSize ungrouped: only one file open at a time outputHandler output.OutputHandler @@ -220,6 +234,8 @@ func NewTransformerSplit( doSize bool, groupByFieldNames []string, emitDownstream bool, + escapeFileNameCharacters bool, + fileNamePartJoiner string, doAppend bool, outputFileNamePrefix string, outputFileNameSuffix string, @@ -227,14 +243,16 @@ func NewTransformerSplit( ) (*TransformerSplit, error) { tr := &TransformerSplit{ - n: n, - outputFileNamePrefix: outputFileNamePrefix, - outputFileNameSuffix: outputFileNameSuffix, - emitDownstream: emitDownstream, - ungroupedCounter: 0, - groupByFieldNames: groupByFieldNames, - recordWriterOptions: recordWriterOptions, - doAppend: doAppend, + n: n, + outputFileNamePrefix: outputFileNamePrefix, + outputFileNameSuffix: outputFileNameSuffix, + emitDownstream: emitDownstream, + escapeFileNameCharacters: escapeFileNameCharacters, + fileNamePartJoiner: fileNamePartJoiner, + ungroupedCounter: 0, + groupByFieldNames: groupByFieldNames, + recordWriterOptions: recordWriterOptions, + doAppend: doAppend, outputHandler: nil, previousQuotient: -1, @@ -402,13 +420,21 @@ func (tr *TransformerSplit) makeUngroupedOutputFileName(k int64) string { func (tr *TransformerSplit) makeGroupedOutputFileName( groupByFieldValues []*mlrval.Mlrval, ) string { - var buffer bytes.Buffer - buffer.WriteString(tr.outputFileNamePrefix) + var fileNameParts []string + for _, groupByFieldValue := range groupByFieldValues { - buffer.WriteString("_") - buffer.WriteString(url.QueryEscape(groupByFieldValue.String())) + fileNameParts = append(fileNameParts, groupByFieldValue.String()) } - buffer.WriteString(".") - buffer.WriteString(tr.outputFileNameSuffix) - return buffer.String() + + fileName := strings.Join(fileNameParts, tr.fileNamePartJoiner) + + if tr.escapeFileNameCharacters { + fileName = url.QueryEscape(fileName) + } + + if tr.outputFileNamePrefix != "" { + fileName = tr.outputFileNamePrefix + tr.fileNamePartJoiner + fileName + } + + return fileName + "." + tr.outputFileNameSuffix } diff --git a/man/mlr.1 b/man/mlr.1 index ab56c69bb..c14251a75 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2023-08-20 +.\" Date: 2023-08-22 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2023-08-20" "\ \&" "\ \&" +.TH "MILLER" "1" "2023-08-22" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2296,6 +2296,8 @@ Exactly one of -m, -n, or -g must be supplied. --suffix {s} Specify filename suffix; default is from mlr output format, e.g. "csv". -a Append to existing file(s), if any, rather than overwriting. -v Send records along to downstream verbs as well as splitting to files. +-e Do NOT URL-escape names of output files. +-j {j} Use string J to join filename parts; default "_". -h|--help Show this message. Any of the output-format command-line flags (see mlr -h). For example, using mlr --icsv --from myfile.csv split --ojson -n 1000 diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index 55efea8ac..14772570b 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -997,6 +997,8 @@ Exactly one of -m, -n, or -g must be supplied. --suffix {s} Specify filename suffix; default is from mlr output format, e.g. "csv". -a Append to existing file(s), if any, rather than overwriting. -v Send records along to downstream verbs as well as splitting to files. +-e Do NOT URL-escape names of output files. +-j {j} Use string J to join filename parts; default "_". -h|--help Show this message. Any of the output-format command-line flags (see mlr -h). For example, using mlr --icsv --from myfile.csv split --ojson -n 1000 From deda2a967ea591c2906f970938fb6e3388fadac6 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Wed, 23 Aug 2023 16:09:40 -0400 Subject: [PATCH 035/456] 1366 follow-up --- internal/pkg/transformers/split.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/pkg/transformers/split.go b/internal/pkg/transformers/split.go index 6e702d4fd..50d415918 100644 --- a/internal/pkg/transformers/split.go +++ b/internal/pkg/transformers/split.go @@ -40,7 +40,7 @@ Exactly one of -m, -n, or -g must be supplied. -a Append to existing file(s), if any, rather than overwriting. -v Send records along to downstream verbs as well as splitting to files. -e Do NOT URL-escape names of output files. --j {j} Use string J to join filename parts; default "`+splitDefaultFileNamePartJoiner+`". +-j {J} Use string J to join filename parts; default "`+splitDefaultFileNamePartJoiner+`". -h|--help Show this message. Any of the output-format command-line flags (see mlr -h). For example, using mlr --icsv --from myfile.csv split --ojson -n 1000 From 4405f732a1fd3f1d4d047bc5991f06a85c2d4714 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Wed, 23 Aug 2023 16:19:37 -0400 Subject: [PATCH 036/456] make-dev artifacts from previous commit --- docs/src/data-diving-examples.md | 46 ++++++++++++++++---------------- docs/src/manpage.md | 4 ++- docs/src/manpage.txt | 4 +-- docs/src/reference-verbs.md | 40 ++++++++++++++------------- docs/src/two-pass-algorithms.md | 4 +-- man/manpage.txt | 4 ++- man/mlr.1 | 6 ++--- test/cases/cli-help/0001/expout | 2 +- 8 files changed, 58 insertions(+), 52 deletions(-) diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 100716ec2..39738f193 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -160,11 +160,11 @@ CITRUS COUNTY 1332.9 79974.9 483785.1 stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
-tiv_2011_tiv_2012_corr  0.9730497632351701
-tiv_2011_tiv_2012_ols_m 0.9835583980337732
-tiv_2011_tiv_2012_ols_b 433854.6428968301
+tiv_2011_tiv_2012_corr  0.9730497632351692
+tiv_2011_tiv_2012_ols_m 0.9835583980337723
+tiv_2011_tiv_2012_ols_b 433854.6428968317
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320204
+tiv_2011_tiv_2012_r2    0.9468258417320189
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199852926
+0.1334180491027861 -0.011319841199866178
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887242  -0.018565536587084836
-orange   square   0.17685855992752933   -0.07104431573805543
- green   circle   0.05764419437577257   0.011795729888018455
-   red   square    0.0557447712489348 -0.0006801456507506415
-yellow triangle    0.0445727377196281   0.024604310103079844
-yellow   square    0.0437917292729612  -0.044621972016306265
-purple   circle   0.03587354936895115    0.13411339541407613
-  blue   square   0.03241153095761152   -0.05350764811965621
-  blue triangle  0.015356427073158612 -0.0006089997461408209
-orange   circle  0.010518953877704181    -0.1627939732927932
-   red triangle   0.00809782571528054    0.01248662135795501
-purple triangle  0.005155190909099739   -0.04505790925621933
-purple   square  -0.02568027696337717   0.057694296479293694
- green   square -0.025776073450284875 -0.0032651732520739014
-orange triangle -0.030456661186085584   -0.13186999819263814
-yellow   circle  -0.06477331572781515     0.0736944981970553
-  blue   circle   -0.1023476190192966  -0.030528539069839333
- green triangle  -0.10901825107358747   -0.04848782060162855
+   red   circle    0.9807984401887236   -0.01856553658708754
+orange   square   0.17685855992752927   -0.07104431573806054
+ green   circle   0.05764419437577255    0.01179572988801509
+   red   square   0.05574477124893523 -0.0006801456507510942
+yellow triangle   0.04457273771962798   0.024604310103081825
+yellow   square   0.04379172927296089   -0.04462197201631237
+purple   circle   0.03587354936895086     0.1341133954140899
+  blue   square   0.03241153095761164  -0.053507648119643196
+  blue triangle  0.015356427073158766 -0.0006089997461435399
+orange   circle  0.010518953877704048   -0.16279397329279383
+   red triangle   0.00809782571528034   0.012486621357942596
+purple triangle  0.005155190909099334  -0.045057909256220656
+purple   square -0.025680276963377404    0.05769429647930396
+ green   square   -0.0257760734502851  -0.003265173252087127
+orange triangle -0.030456661186085785    -0.1318699981926352
+yellow   circle  -0.06477331572781474    0.07369449819706045
+  blue   circle  -0.10234761901929677  -0.030528539069837757
+ green triangle  -0.10901825107358765   -0.04848782060162929
 
diff --git a/docs/src/manpage.md b/docs/src/manpage.md index d80193433..f0da5aea9 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -1834,6 +1834,8 @@ MILLER(1) MILLER(1) --suffix {s} Specify filename suffix; default is from mlr output format, e.g. "csv". -a Append to existing file(s), if any, rather than overwriting. -v Send records along to downstream verbs as well as splitting to files. + -e Do NOT URL-escape names of output files. + -j {J} Use string J to join filename parts; default "_". -h|--help Show this message. Any of the output-format command-line flags (see mlr -h). For example, using mlr --icsv --from myfile.csv split --ojson -n 1000 @@ -3470,5 +3472,5 @@ MILLER(1) MILLER(1) - 2023-08-20 MILLER(1) + 2023-08-23 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 48497719b..b3352b9a6 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1814,7 +1814,7 @@ MILLER(1) MILLER(1) -a Append to existing file(s), if any, rather than overwriting. -v Send records along to downstream verbs as well as splitting to files. -e Do NOT URL-escape names of output files. - -j {j} Use string J to join filename parts; default "_". + -j {J} Use string J to join filename parts; default "_". -h|--help Show this message. Any of the output-format command-line flags (see mlr -h). For example, using mlr --icsv --from myfile.csv split --ojson -n 1000 @@ -3451,4 +3451,4 @@ MILLER(1) MILLER(1) - 2023-08-20 MILLER(1) + 2023-08-23 MILLER(1) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 6e9fbb478..2b7e9501f 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -3139,6 +3139,8 @@ Exactly one of -m, -n, or -g must be supplied. --suffix {s} Specify filename suffix; default is from mlr output format, e.g. "csv". -a Append to existing file(s), if any, rather than overwriting. -v Send records along to downstream verbs as well as splitting to files. +-e Do NOT URL-escape names of output files. +-j {J} Use string J to join filename parts; default "_". -h|--help Show this message. Any of the output-format command-line flags (see mlr -h). For example, using mlr --icsv --from myfile.csv split --ojson -n 1000 @@ -3404,14 +3406,14 @@ fields, optionally categorized by one or more fields. data/medium
-x_y_cov    0.00004257482082749404
-x_y_corr   0.0005042001844473328
-y_y_cov    0.08461122467974005
+x_y_cov    0.000042574820827444476
+x_y_corr   0.0005042001844467462
+y_y_cov    0.08461122467974003
 y_y_corr   1
-x2_xy_cov  0.041883822817793716
-x2_xy_corr 0.6301743420379936
-x2_y2_cov  -0.0003095372596253918
-x2_y2_corr -0.003424908876111875
+x2_xy_cov  0.04188382281779374
+x2_xy_corr 0.630174342037994
+x2_y2_cov  -0.00030953725962542085
+x2_y2_corr -0.0034249088761121966
 
@@ -3420,12 +3422,12 @@ x2_y2_corr -0.003424908876111875
   data/medium
 
-a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
-eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
-wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
-zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
-hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
+a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
+eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
+wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
+zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
+hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
 
Here's an example simple line-fit. The `x` and `y` @@ -3511,11 +3513,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.030097447953594 -upsec_count_pca_b 979.9883413064917 +upsec_count_pca_m -39.03009744795354 +upsec_count_pca_b 979.9883413064914 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.108529196302943 +donesec 25.10852919630297 ## step @@ -3792,9 +3794,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 -var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 -skewness - - 0 -0.0006899591185517494 -0.01784976012013298 +stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 +var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 +skewness - - 0 -0.0006899591185521965 -0.017849760120133784 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index e475aebf3..146f3a81e 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144457 -x_stddev 0.29029251511440074 +x_var 0.08426974433144456 +x_stddev 0.2902925151144007
diff --git a/man/manpage.txt b/man/manpage.txt
index 0c04fc330..b3352b9a6 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -1813,6 +1813,8 @@ MILLER(1)                                                            MILLER(1)
        --suffix {s} Specify filename suffix; default is from mlr output format, e.g. "csv".
        -a           Append to existing file(s), if any, rather than overwriting.
        -v           Send records along to downstream verbs as well as splitting to files.
+       -e           Do NOT URL-escape names of output files.
+       -j {J}       Use string J to join filename parts; default "_".
        -h|--help    Show this message.
        Any of the output-format command-line flags (see mlr -h). For example, using
          mlr --icsv --from myfile.csv split --ojson -n 1000
@@ -3449,4 +3451,4 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-08-20                         MILLER(1)
+                                  2023-08-23                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index c14251a75..b7c343ce1 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2023-08-22
+.\"      Date: 2023-08-23
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2023-08-22" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-08-23" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -2297,7 +2297,7 @@ Exactly one  of -m, -n, or -g must be supplied.
 -a           Append to existing file(s), if any, rather than overwriting.
 -v           Send records along to downstream verbs as well as splitting to files.
 -e           Do NOT URL-escape names of output files.
--j {j}       Use string J to join filename parts; default "_".
+-j {J}       Use string J to join filename parts; default "_".
 -h|--help    Show this message.
 Any of the output-format command-line flags (see mlr -h). For example, using
   mlr --icsv --from myfile.csv split --ojson -n 1000
diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout
index 14772570b..bdb23ad6c 100644
--- a/test/cases/cli-help/0001/expout
+++ b/test/cases/cli-help/0001/expout
@@ -998,7 +998,7 @@ Exactly one  of -m, -n, or -g must be supplied.
 -a           Append to existing file(s), if any, rather than overwriting.
 -v           Send records along to downstream verbs as well as splitting to files.
 -e           Do NOT URL-escape names of output files.
--j {j}       Use string J to join filename parts; default "_".
+-j {J}       Use string J to join filename parts; default "_".
 -h|--help    Show this message.
 Any of the output-format command-line flags (see mlr -h). For example, using
   mlr --icsv --from myfile.csv split --ojson -n 1000

From 392b34fd0489e55028eee578860950297a1cb7cc Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 25 Aug 2023 09:06:03 -0400
Subject: [PATCH 037/456] Bump actions/checkout from 3.5.3 to 3.6.0 (#1369)

Bumps [actions/checkout](https://github.com/actions/checkout) from 3.5.3 to 3.6.0.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/c85c95e3d7251135ab7dc9ce3241c5835cc595a9...f43a0e5ff2bd294095638e18286ca9a3d1956744)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 2 +-
 .github/workflows/codespell.yml       | 2 +-
 .github/workflows/go.yml              | 2 +-
 .github/workflows/release.yml         | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index c26776f08..63dc1f217 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -36,7 +36,7 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+      uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
index a90e79299..e7f0285b0 100644
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -21,7 +21,7 @@ jobs:
     steps:
       # Check out the code base
       - name: Check out code
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744
         with:
           # Full git history is needed to get a proper list of changed files within `super-linter`
           fetch-depth: 0
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 1d44a7b35..9b2de2ff9 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -15,7 +15,7 @@ jobs:
         os: [ubuntu-latest, macos-latest, windows-latest]
 
     steps:
-    - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+    - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744
 
     - name: Set up Go
       uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 9a04cf966..31bcaa825 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -23,7 +23,7 @@ jobs:
         id: go
 
       - name: Check out code into the Go module directory
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744
         with:
           fetch-depth: 0
 

From d341cc6dd389d37f6bc429b8651ed384d6d7b828 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 26 Aug 2023 16:02:30 -0400
Subject: [PATCH 038/456] DSL functions for summary stats over arrays / maps
 (#1364)

* DSL stats functions [WIP]

* refactor

* move percentile computation to bifs module; iterate

* mode and antimode

* percentile iterate

* percentile sketching

* neaten

* unit-test iterate

* unify old & new min & max functions

* unit-test cases

* code-dedupe between mode and antimode

* make mode/antimode ties deterministic via first-found-wins rule

* online help strings for new stats DSL functions

* artifacts from `make dev`

* help info on how min/max now recurse into collections

* artifacts from `make dev`

* typofix
---
 docs/src/data-diving-examples.md              |  46 +-
 docs/src/dkvp-examples.md                     |   2 +
 docs/src/manpage.md                           | 196 ++++++-
 docs/src/manpage.txt                          | 196 ++++++-
 docs/src/reference-dsl-builtin-functions.md   | 227 +++++++-
 docs/src/reference-dsl-syntax.md              |   1 +
 docs/src/reference-main-flag-list.md          |   1 +
 docs/src/reference-verbs.md                   |  38 +-
 docs/src/two-pass-algorithms.md               |   4 +-
 internal/pkg/bifs/arithmetic.go               | 191 ++++++-
 internal/pkg/bifs/percentiles.go              | 217 ++++++++
 internal/pkg/bifs/stats.go                    | 497 +++++++++++++++++-
 internal/pkg/bifs/stats_test.go               | 192 +++++++
 .../pkg/dsl/cst/builtin_function_manager.go   | 275 +++++++++-
 internal/pkg/mlrval/mlrval_collections.go     |  48 ++
 internal/pkg/mlrval/mlrval_new.go             |   6 +
 .../transformers/utils/percentile_keeper.go   | 209 +-------
 .../transformers/utils/stats1_accumulators.go |  10 +-
 man/manpage.txt                               | 196 ++++++-
 man/mlr.1                                     | 324 +++++++++++-
 test/cases/dsl-stats/count/various/cmd        |   1 +
 test/cases/dsl-stats/count/various/experr     |   0
 test/cases/dsl-stats/count/various/expout     |  20 +
 test/cases/dsl-stats/count/various/mlr        |  26 +
 .../dsl-stats/distinct_count/various/cmd      |   1 +
 .../dsl-stats/distinct_count/various/experr   |   0
 .../dsl-stats/distinct_count/various/expout   |  32 ++
 .../dsl-stats/distinct_count/various/mlr      |  32 ++
 test/cases/dsl-stats/mode/various/cmd         |   1 +
 test/cases/dsl-stats/mode/various/experr      |   0
 test/cases/dsl-stats/mode/various/expout      |  24 +
 test/cases/dsl-stats/mode/various/mlr         |  28 +
 test/cases/dsl-stats/moments/numeric-000/cmd  |   1 +
 .../dsl-stats/moments/numeric-000/experr      |   0
 .../dsl-stats/moments/numeric-000/expout      |  26 +
 test/cases/dsl-stats/moments/numeric-001/cmd  |   1 +
 .../dsl-stats/moments/numeric-001/experr      |   0
 .../dsl-stats/moments/numeric-001/expout      |  26 +
 test/cases/dsl-stats/moments/numeric-002/cmd  |   1 +
 .../dsl-stats/moments/numeric-002/experr      |   0
 .../dsl-stats/moments/numeric-002/expout      |  26 +
 test/cases/dsl-stats/moments/numeric-003/cmd  |   1 +
 .../dsl-stats/moments/numeric-003/experr      |   0
 .../dsl-stats/moments/numeric-003/expout      |  26 +
 test/cases/dsl-stats/moments/numeric-004/cmd  |   1 +
 .../dsl-stats/moments/numeric-004/experr      |   0
 .../dsl-stats/moments/numeric-004/expout      |  26 +
 test/cases/dsl-stats/moments/numeric-all/cmd  |   1 +
 .../dsl-stats/moments/numeric-all/experr      |   0
 .../dsl-stats/moments/numeric-all/expout      |  26 +
 test/cases/dsl-stats/null_count/various/cmd   |   1 +
 .../cases/dsl-stats/null_count/various/experr |   0
 .../cases/dsl-stats/null_count/various/expout |  20 +
 test/cases/dsl-stats/null_count/various/mlr   |  28 +
 .../dsl-stats/percentiles/non-numeric-000/cmd |   1 +
 .../percentiles/non-numeric-000/experr        |   0
 .../percentiles/non-numeric-000/expout        |  62 +++
 .../dsl-stats/percentiles/non-numeric-001/cmd |   1 +
 .../percentiles/non-numeric-001/experr        |   0
 .../percentiles/non-numeric-001/expout        |  62 +++
 .../dsl-stats/percentiles/non-numeric-002/cmd |   1 +
 .../percentiles/non-numeric-002/experr        |   0
 .../percentiles/non-numeric-002/expout        |  62 +++
 .../dsl-stats/percentiles/non-numeric-003/cmd |   1 +
 .../percentiles/non-numeric-003/experr        |   0
 .../percentiles/non-numeric-003/expout        |  62 +++
 .../dsl-stats/percentiles/non-numeric-004/cmd |   1 +
 .../percentiles/non-numeric-004/experr        |   0
 .../percentiles/non-numeric-004/expout        |  62 +++
 .../dsl-stats/percentiles/non-numeric-all/cmd |   1 +
 .../percentiles/non-numeric-all/experr        |   0
 .../percentiles/non-numeric-all/expout        |  62 +++
 .../dsl-stats/percentiles/numeric-000/cmd     |   1 +
 .../dsl-stats/percentiles/numeric-000/experr  |   0
 .../dsl-stats/percentiles/numeric-000/expout  |  62 +++
 .../dsl-stats/percentiles/numeric-001/cmd     |   1 +
 .../dsl-stats/percentiles/numeric-001/experr  |   0
 .../dsl-stats/percentiles/numeric-001/expout  |  62 +++
 .../dsl-stats/percentiles/numeric-002/cmd     |   1 +
 .../dsl-stats/percentiles/numeric-002/experr  |   0
 .../dsl-stats/percentiles/numeric-002/expout  |  62 +++
 .../dsl-stats/percentiles/numeric-003/cmd     |   1 +
 .../dsl-stats/percentiles/numeric-003/experr  |   0
 .../dsl-stats/percentiles/numeric-003/expout  |  62 +++
 .../dsl-stats/percentiles/numeric-004/cmd     |   1 +
 .../dsl-stats/percentiles/numeric-004/experr  |   0
 .../dsl-stats/percentiles/numeric-004/expout  |  62 +++
 .../dsl-stats/percentiles/numeric-all/cmd     |   1 +
 .../dsl-stats/percentiles/numeric-all/experr  |   0
 .../dsl-stats/percentiles/numeric-all/expout  |  62 +++
 test/cases/dsl-stats/sums/README.txt          |   1 +
 test/input/test-moments.mlr                   |  39 ++
 test/input/test-percentiles.mlr               |  44 ++
 93 files changed, 3731 insertions(+), 361 deletions(-)
 create mode 100644 internal/pkg/bifs/percentiles.go
 create mode 100644 internal/pkg/bifs/stats_test.go
 create mode 100644 test/cases/dsl-stats/count/various/cmd
 create mode 100644 test/cases/dsl-stats/count/various/experr
 create mode 100644 test/cases/dsl-stats/count/various/expout
 create mode 100644 test/cases/dsl-stats/count/various/mlr
 create mode 100644 test/cases/dsl-stats/distinct_count/various/cmd
 create mode 100644 test/cases/dsl-stats/distinct_count/various/experr
 create mode 100644 test/cases/dsl-stats/distinct_count/various/expout
 create mode 100644 test/cases/dsl-stats/distinct_count/various/mlr
 create mode 100644 test/cases/dsl-stats/mode/various/cmd
 create mode 100644 test/cases/dsl-stats/mode/various/experr
 create mode 100644 test/cases/dsl-stats/mode/various/expout
 create mode 100644 test/cases/dsl-stats/mode/various/mlr
 create mode 100644 test/cases/dsl-stats/moments/numeric-000/cmd
 create mode 100644 test/cases/dsl-stats/moments/numeric-000/experr
 create mode 100644 test/cases/dsl-stats/moments/numeric-000/expout
 create mode 100644 test/cases/dsl-stats/moments/numeric-001/cmd
 create mode 100644 test/cases/dsl-stats/moments/numeric-001/experr
 create mode 100644 test/cases/dsl-stats/moments/numeric-001/expout
 create mode 100644 test/cases/dsl-stats/moments/numeric-002/cmd
 create mode 100644 test/cases/dsl-stats/moments/numeric-002/experr
 create mode 100644 test/cases/dsl-stats/moments/numeric-002/expout
 create mode 100644 test/cases/dsl-stats/moments/numeric-003/cmd
 create mode 100644 test/cases/dsl-stats/moments/numeric-003/experr
 create mode 100644 test/cases/dsl-stats/moments/numeric-003/expout
 create mode 100644 test/cases/dsl-stats/moments/numeric-004/cmd
 create mode 100644 test/cases/dsl-stats/moments/numeric-004/experr
 create mode 100644 test/cases/dsl-stats/moments/numeric-004/expout
 create mode 100644 test/cases/dsl-stats/moments/numeric-all/cmd
 create mode 100644 test/cases/dsl-stats/moments/numeric-all/experr
 create mode 100644 test/cases/dsl-stats/moments/numeric-all/expout
 create mode 100644 test/cases/dsl-stats/null_count/various/cmd
 create mode 100644 test/cases/dsl-stats/null_count/various/experr
 create mode 100644 test/cases/dsl-stats/null_count/various/expout
 create mode 100644 test/cases/dsl-stats/null_count/various/mlr
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-000/cmd
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-000/experr
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-000/expout
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-001/cmd
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-001/experr
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-001/expout
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-002/cmd
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-002/experr
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-002/expout
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-003/cmd
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-003/experr
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-003/expout
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-004/cmd
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-004/experr
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-004/expout
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-all/cmd
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-all/experr
 create mode 100644 test/cases/dsl-stats/percentiles/non-numeric-all/expout
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-000/cmd
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-000/experr
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-000/expout
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-001/cmd
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-001/experr
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-001/expout
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-002/cmd
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-002/experr
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-002/expout
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-003/cmd
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-003/experr
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-003/expout
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-004/cmd
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-004/experr
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-004/expout
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-all/cmd
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-all/experr
 create mode 100644 test/cases/dsl-stats/percentiles/numeric-all/expout
 create mode 100644 test/cases/dsl-stats/sums/README.txt
 create mode 100644 test/input/test-moments.mlr
 create mode 100644 test/input/test-percentiles.mlr

diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md
index 39738f193..100716ec2 100644
--- a/docs/src/data-diving-examples.md
+++ b/docs/src/data-diving-examples.md
@@ -160,11 +160,11 @@ CITRUS COUNTY       1332.9                 79974.9                483785.1
   stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
 
-tiv_2011_tiv_2012_corr  0.9730497632351692
-tiv_2011_tiv_2012_ols_m 0.9835583980337723
-tiv_2011_tiv_2012_ols_b 433854.6428968317
+tiv_2011_tiv_2012_corr  0.9730497632351701
+tiv_2011_tiv_2012_ols_m 0.9835583980337732
+tiv_2011_tiv_2012_ols_b 433854.6428968301
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320189
+tiv_2011_tiv_2012_r2    0.9468258417320204
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199866178
+0.1334180491027861 -0.011319841199852926
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887236   -0.01856553658708754
-orange   square   0.17685855992752927   -0.07104431573806054
- green   circle   0.05764419437577255    0.01179572988801509
-   red   square   0.05574477124893523 -0.0006801456507510942
-yellow triangle   0.04457273771962798   0.024604310103081825
-yellow   square   0.04379172927296089   -0.04462197201631237
-purple   circle   0.03587354936895086     0.1341133954140899
-  blue   square   0.03241153095761164  -0.053507648119643196
-  blue triangle  0.015356427073158766 -0.0006089997461435399
-orange   circle  0.010518953877704048   -0.16279397329279383
-   red triangle   0.00809782571528034   0.012486621357942596
-purple triangle  0.005155190909099334  -0.045057909256220656
-purple   square -0.025680276963377404    0.05769429647930396
- green   square   -0.0257760734502851  -0.003265173252087127
-orange triangle -0.030456661186085785    -0.1318699981926352
-yellow   circle  -0.06477331572781474    0.07369449819706045
-  blue   circle  -0.10234761901929677  -0.030528539069837757
- green triangle  -0.10901825107358765   -0.04848782060162929
+   red   circle    0.9807984401887242  -0.018565536587084836
+orange   square   0.17685855992752933   -0.07104431573805543
+ green   circle   0.05764419437577257   0.011795729888018455
+   red   square    0.0557447712489348 -0.0006801456507506415
+yellow triangle    0.0445727377196281   0.024604310103079844
+yellow   square    0.0437917292729612  -0.044621972016306265
+purple   circle   0.03587354936895115    0.13411339541407613
+  blue   square   0.03241153095761152   -0.05350764811965621
+  blue triangle  0.015356427073158612 -0.0006089997461408209
+orange   circle  0.010518953877704181    -0.1627939732927932
+   red triangle   0.00809782571528054    0.01248662135795501
+purple triangle  0.005155190909099739   -0.04505790925621933
+purple   square  -0.02568027696337717   0.057694296479293694
+ green   square -0.025776073450284875 -0.0032651732520739014
+orange triangle -0.030456661186085584   -0.13186999819263814
+yellow   circle  -0.06477331572781515     0.0736944981970553
+  blue   circle   -0.1023476190192966  -0.030528539069839333
+ green triangle  -0.10901825107358747   -0.04848782060162855
 
diff --git a/docs/src/dkvp-examples.md b/docs/src/dkvp-examples.md index 2f3e3b510..da29db4c3 100644 --- a/docs/src/dkvp-examples.md +++ b/docs/src/dkvp-examples.md @@ -251,6 +251,7 @@ a=eks,b=pan,i=2,y=0.522151,ab=ekspan,iy=2.522151,ta=String,tb=String,ti=Integer, a=wye,b=wye,i=3,y=0.338318,ab=wyewye,iy=3.338318,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float a=eks,b=wye,i=4,y=0.134188,ab=ekswye,iy=4.134188,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float a=wye,b=pan,i=5,y=0.863624,ab=wyepan,iy=5.863624,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float +/System/Library/Frameworks/Ruby.framework/Versions/2.6/usr/lib/ruby/2.6.0/universal-darwin22/rbconfig.rb:21: warning: Insecure world writable dir /usr/local/bin in PATH, mode 040777 Run as-is, then pipe to Miller for pretty-printing: @@ -265,4 +266,5 @@ eks pan 2 0.522151 ekspan 2.522151 String String Integer Float String Float wye wye 3 0.338318 wyewye 3.338318 String String Integer Float String Float eks wye 4 0.134188 ekswye 4.134188 String String Integer Float String Float wye pan 5 0.863624 wyepan 5.863624 String String Integer Float String Float +/System/Library/Frameworks/Ruby.framework/Versions/2.6/usr/lib/ruby/2.6.0/universal-darwin22/rbconfig.rb:21: warning: Insecure world writable dir /usr/local/bin in PATH, mode 040777 diff --git a/docs/src/manpage.md b/docs/src/manpage.md index f0da5aea9..5ab08d255 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -203,32 +203,34 @@ MILLER(1) MILLER(1) unsparsify 1mFUNCTION LIST0m - abs acos acosh any append apply arrayify asin asinh asserting_absent + abs acos acosh antimode any append apply arrayify asin asinh asserting_absent asserting_array asserting_bool asserting_boolean asserting_empty asserting_empty_map asserting_error asserting_float asserting_int asserting_map asserting_nonempty_map asserting_not_array asserting_not_empty asserting_not_map asserting_not_null asserting_null asserting_numeric asserting_present asserting_string atan atan2 atanh bitcount boolean capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh - depth dhms2fsec dhms2sec erf erfc every exec exp expm1 flatten float floor - fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys get_values - gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec - hostname index int invqnorm is_absent is_array is_bool is_boolean is_empty - is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map + count depth dhms2fsec dhms2sec distinct_count erf erfc every exec exp expm1 + flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys + get_values gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec + hms2sec hostname index int invqnorm is_absent is_array is_bool is_boolean + is_empty is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present - is_string joink joinkv joinv json_parse json_stringify latin1_to_utf8 + is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 - log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 mexp min - mmul msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime os pow qnorm + log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max maxlen md5 + mean meaneb median mexp min minlen mmul mode msub nsec2gmt nsec2gmtdate + nsec2localdate nsec2localtime null_count os percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 - sha512 sin sinh sort splita splitax splitkv splitkvx splitnv splitnvx sqrt - ssub strfntime strfntime_local strftime strftime_local string strip strlen - strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 - sysntime system systime systimeint tan tanh tolower toupper truncate typeof - unflatten unformat unformatx upntime uptime urand urand32 urandelement - urandint urandrange utf8_to_latin1 version ! != !=~ % & && * ** + - . .* .+ .- - ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + sha512 sin sinh skewness sort sort_collection splita splitax splitkv splitkvx + splitnv splitnvx sqrt ssub stddev strfntime strfntime_local strftime + strftime_local string strip strlen strpntime strpntime_local strptime + strptime_local sub substr substr0 substr1 sum sum2 sum3 sum4 sysntime system + systime systimeint tan tanh tolower toupper truncate typeof unflatten unformat + unformatx upntime uptime urand urand32 urandelement urandint urandrange + utf8_to_latin1 variance version ! != !=~ % & && * ** + - . .* .+ .- ./ / // < + << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -2185,6 +2187,12 @@ MILLER(1) MILLER(1) 1macosh0m (class=math #args=1) Inverse hyperbolic cosine. + 1mantimode0m + (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins. + Examples: + antimode([3,3,4,4,4]) is 3 + antimode([3,3,4,4]) is 3 + 1many0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, yields a boolean true if the argument function returns true for any array/map element, false otherwise. For arrays, the function should take one argument, for array element; for maps, it should take two, for map-element key and value. In either case it should return a boolean. Examples: @@ -2309,6 +2317,12 @@ MILLER(1) MILLER(1) 1mcosh0m (class=math #args=1) Hyperbolic cosine. + 1mcount0m + (class=stats #args=1) Returns the length of an array or map. Returns error for non-array/non-map types. + Examples: + count([7,8,9]) is 3 + count({"a":7,"b":8,"c":9}) is 3 + 1mdepth0m (class=collections #args=1) Prints maximum depth of map/array. Scalars have depth 0. @@ -2318,6 +2332,13 @@ MILLER(1) MILLER(1) 1mdhms2sec0m (class=time #args=1) Recovers integer seconds as in dhms2sec("5d18h53m20s") = 500000 + 1mdistinct_count0m + (class=stats #args=1) Returns the number of disinct values in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. + Examples: + distinct_count([7,8,9,7]) is 3 + distinct_count([1,"1"]) is 1 + distinct_count([1,1.0]) is 2 + 1merf0m (class=math #args=1) Error function. @@ -2542,6 +2563,11 @@ MILLER(1) MILLER(1) 1mjson_stringify0m (class=collections #args=1,2) Converts value to JSON-formatted string. Default output is single-line. With optional second boolean argument set to true, produces multiline output. + 1mkurtosis0m + (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + kurtosis([4,5,9,10,11]) is -1.6703688 + 1mlatin1_to_utf80m (class=string #args=1) Tries to convert Latin-1-encoded string to UTF-8-encoded string. If argument is array or map, recurses into it. Examples: @@ -2610,20 +2636,53 @@ MILLER(1) MILLER(1) (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'. 1mmax0m - (class=math #args=variadic) Max of n numbers; null loses. + (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. + + 1mmaxlen0m + (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + maxlen(["ao", "alto"]) is 4 1mmd50m (class=hashing #args=1) MD5 hash. + 1mmean0m + (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. + Example: + mean([4,5,7,10]) is 6.5 + + 1mmeaneb0m + (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. + Example: + meaneb([4,5,7,10]) is 1.3228756 + + 1mmedian0m + (class=stats #args=1,2) Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs. + Examples: + median([3,4,5,6,9,10]) is 6 + median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5 + median(["abc", "def", "ghi", "ghi"]) is "ghi" + 1mmexp0m (class=arithmetic #args=3) a ** b mod m (integers) 1mmin0m - (class=math #args=variadic) Min of n numbers; null loses. + (class=math #args=variadic) Min of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. + + 1mminlen0m + (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + minlen(["ao", "alto"]) is 3 1mmmul0m (class=arithmetic #args=3) a * b mod m (integers) + 1mmode0m + (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins. + Examples: + mode([3,3,4,4,4]) is 4 + mode([3,3,4,4]) is 3 + 1mmsub0m (class=arithmetic #args=3) a - b mod m (integers) @@ -2653,9 +2712,70 @@ MILLER(1) MILLER(1) nsec2localtime(1234567890123456789, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul" nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456" + 1mnull_count0m + (class=stats #args=1) Returns the number of values in an array or map which are empty-string (AKA void) or JSON null. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. + Example: + null_count(["a", "", "c"]) is 1 + 1mos0m (class=system #args=0) Returns the operating-system name as a string. + 1mpercentile0m + (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs. + Examples: + percentile([3,4,5,6,9,10], 90) is 10 + percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5 + percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi" + + 1mpercentiles0m + (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags. + Examples: + + Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort + the input before computing percentiles: + + percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 } + percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" } + + Use "output_array_not_map" (or shorthand "oa") to get the outputs as an array: + + percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9] + + Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces + ,error on string inputs: + + percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 } + + The percentiles function always sorts its inputs before computing percentiles. If you know your input + is already sorted -- see also the sort_collection function -- then computation will be faster on + large input if you pass in "array_is_sorted": + + x = [6,5,9,10,4,3] + percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect + x = sort_collection(x) + percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct + + You can also leverage this feature to compute percentiles on a sort of your choosing. For example: + + Non-sorted input: + x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ") + x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"] + Percentiles are taken over the original positions of the words in the array -- "dogs" is last + and hence appears as p99: + percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"] + With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99: + percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + With default sorting done outside percentiles, the same: + x = sort(x) # or x = sort_collection(x) + x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"] + percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"] + percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + Now sorting by word length, "loquaciously" is longest and hence is the p99: + x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } ) + x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"] + percentiles(x, [50, 99], {"oa":true, "ais":true}) + ["over", "loquaciously"] + 1mpow0m (class=arithmetic #args=2) Exponentiation. Same as **, but as a function. @@ -2752,6 +2872,11 @@ MILLER(1) MILLER(1) 1msinh0m (class=math #args=1) Hyperbolic sine. + 1mskewness0m + (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + skewness([4,5,9,10,11]) is -0.2097285 + 1msort0m (class=higher-order-functions #args=1-2) Given a map or array as first argument and string flags or function as optional second argument, returns a sorted copy of the input. With one argument, sorts array elements with numbers first numerically and then strings lexically, and map elements likewise by map keys. If the second argument is a string, it can contain any of "f" for lexical ("n" is for the above default), "c" for case-folded lexical, or "t" for natural sort order. An additional "r" in that string is for reverse. An additional "v" in that string means sort maps by value, rather than by key. If the second argument is a function, then for arrays it should take two arguments a and b, returning < 0, 0, or > 0 as a < b, a == b, or a > b respectively; for maps the function should take four arguments ak, av, bk, and bv, again returning < 0, 0, or > 0, using a and b's keys and values. Examples: @@ -2768,6 +2893,9 @@ MILLER(1) MILLER(1) Map without function: sort({"c":2,"a":3,"b":1}, "v") returns {"b":1,"c":2,"a":3}. Map without function: sort({"c":2,"a":3,"b":1}, "vnr") returns {"a":3,"c":2,"b":1}. + 1msort_collection0m + (class=stats #args=1) This is a helper function for the percentiles function; please see its online help for details. + 1msplita0m (class=conversion #args=2) Splits string into array with type inference. First argument is string to split; second is the separator to split on. Example: @@ -2806,6 +2934,11 @@ MILLER(1) MILLER(1) Example: ssub("abc.def", ".", "X") gives "abcXdef" + 1mstddev0m + (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + stddev([4,5,9,10,11]) is 3.1144823 + 1mstrfntime0m (class=time #args=2) Formats integer nanoseconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local. Examples: @@ -2893,6 +3026,26 @@ MILLER(1) MILLER(1) 1msubstr10m (class=string #args=3) substr1(s,m,n) gives substring of s from 1-up position m to n inclusive. Negative indices -len .. -1 alias to 1 .. len. See also substr and substr0. + 1msum0m + (class=stats #args=1) Returns the sum of values in an array or map. Returns error for non-array/non-map types. + Example: + sum([1,2,3,4,5]) is 15 + + 1msum20m + (class=stats #args=1) Returns the sum of squares of values in an array or map. Returns error for non-array/non-map types. + Example: + sum2([1,2,3,4,5]) is 55 + + 1msum30m + (class=stats #args=1) Returns the sum of cubes of values in an array or map. Returns error for non-array/non-map types. + Example: + sum3([1,2,3,4,5]) is 225 + + 1msum40m + (class=stats #args=1) Returns the sum of fourth powers of values in an array or map. Returns error for non-array/non-map types. + Example: + sum4([1,2,3,4,5]) is 979 + 1msysntime0m (class=time #args=0) Returns the system time in 64-bit nanoseconds since the epoch. @@ -2971,6 +3124,11 @@ MILLER(1) MILLER(1) $y = utf8_to_latin1($x) $* = utf8_to_latin1($*) + 1mvariance0m + (class=stats #args=1) Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + variance([4,5,9,10,11]) is 9.7 + 1mversion0m (class=system #args=0) Returns the Miller version as a string. @@ -3472,5 +3630,5 @@ MILLER(1) MILLER(1) - 2023-08-23 MILLER(1) + 2023-08-26 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index b3352b9a6..666177bee 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -182,32 +182,34 @@ MILLER(1) MILLER(1) unsparsify 1mFUNCTION LIST0m - abs acos acosh any append apply arrayify asin asinh asserting_absent + abs acos acosh antimode any append apply arrayify asin asinh asserting_absent asserting_array asserting_bool asserting_boolean asserting_empty asserting_empty_map asserting_error asserting_float asserting_int asserting_map asserting_nonempty_map asserting_not_array asserting_not_empty asserting_not_map asserting_not_null asserting_null asserting_numeric asserting_present asserting_string atan atan2 atanh bitcount boolean capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh - depth dhms2fsec dhms2sec erf erfc every exec exp expm1 flatten float floor - fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys get_values - gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec - hostname index int invqnorm is_absent is_array is_bool is_boolean is_empty - is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map + count depth dhms2fsec dhms2sec distinct_count erf erfc every exec exp expm1 + flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys + get_values gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec + hms2sec hostname index int invqnorm is_absent is_array is_bool is_boolean + is_empty is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present - is_string joink joinkv joinv json_parse json_stringify latin1_to_utf8 + is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 - log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 mexp min - mmul msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime os pow qnorm + log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max maxlen md5 + mean meaneb median mexp min minlen mmul mode msub nsec2gmt nsec2gmtdate + nsec2localdate nsec2localtime null_count os percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 - sha512 sin sinh sort splita splitax splitkv splitkvx splitnv splitnvx sqrt - ssub strfntime strfntime_local strftime strftime_local string strip strlen - strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 - sysntime system systime systimeint tan tanh tolower toupper truncate typeof - unflatten unformat unformatx upntime uptime urand urand32 urandelement - urandint urandrange utf8_to_latin1 version ! != !=~ % & && * ** + - . .* .+ .- - ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + sha512 sin sinh skewness sort sort_collection splita splitax splitkv splitkvx + splitnv splitnvx sqrt ssub stddev strfntime strfntime_local strftime + strftime_local string strip strlen strpntime strpntime_local strptime + strptime_local sub substr substr0 substr1 sum sum2 sum3 sum4 sysntime system + systime systimeint tan tanh tolower toupper truncate typeof unflatten unformat + unformatx upntime uptime urand urand32 urandelement urandint urandrange + utf8_to_latin1 variance version ! != !=~ % & && * ** + - . .* .+ .- ./ / // < + << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -2164,6 +2166,12 @@ MILLER(1) MILLER(1) 1macosh0m (class=math #args=1) Inverse hyperbolic cosine. + 1mantimode0m + (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins. + Examples: + antimode([3,3,4,4,4]) is 3 + antimode([3,3,4,4]) is 3 + 1many0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, yields a boolean true if the argument function returns true for any array/map element, false otherwise. For arrays, the function should take one argument, for array element; for maps, it should take two, for map-element key and value. In either case it should return a boolean. Examples: @@ -2288,6 +2296,12 @@ MILLER(1) MILLER(1) 1mcosh0m (class=math #args=1) Hyperbolic cosine. + 1mcount0m + (class=stats #args=1) Returns the length of an array or map. Returns error for non-array/non-map types. + Examples: + count([7,8,9]) is 3 + count({"a":7,"b":8,"c":9}) is 3 + 1mdepth0m (class=collections #args=1) Prints maximum depth of map/array. Scalars have depth 0. @@ -2297,6 +2311,13 @@ MILLER(1) MILLER(1) 1mdhms2sec0m (class=time #args=1) Recovers integer seconds as in dhms2sec("5d18h53m20s") = 500000 + 1mdistinct_count0m + (class=stats #args=1) Returns the number of disinct values in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. + Examples: + distinct_count([7,8,9,7]) is 3 + distinct_count([1,"1"]) is 1 + distinct_count([1,1.0]) is 2 + 1merf0m (class=math #args=1) Error function. @@ -2521,6 +2542,11 @@ MILLER(1) MILLER(1) 1mjson_stringify0m (class=collections #args=1,2) Converts value to JSON-formatted string. Default output is single-line. With optional second boolean argument set to true, produces multiline output. + 1mkurtosis0m + (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + kurtosis([4,5,9,10,11]) is -1.6703688 + 1mlatin1_to_utf80m (class=string #args=1) Tries to convert Latin-1-encoded string to UTF-8-encoded string. If argument is array or map, recurses into it. Examples: @@ -2589,20 +2615,53 @@ MILLER(1) MILLER(1) (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'. 1mmax0m - (class=math #args=variadic) Max of n numbers; null loses. + (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. + + 1mmaxlen0m + (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + maxlen(["ao", "alto"]) is 4 1mmd50m (class=hashing #args=1) MD5 hash. + 1mmean0m + (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. + Example: + mean([4,5,7,10]) is 6.5 + + 1mmeaneb0m + (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. + Example: + meaneb([4,5,7,10]) is 1.3228756 + + 1mmedian0m + (class=stats #args=1,2) Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs. + Examples: + median([3,4,5,6,9,10]) is 6 + median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5 + median(["abc", "def", "ghi", "ghi"]) is "ghi" + 1mmexp0m (class=arithmetic #args=3) a ** b mod m (integers) 1mmin0m - (class=math #args=variadic) Min of n numbers; null loses. + (class=math #args=variadic) Min of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. + + 1mminlen0m + (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + minlen(["ao", "alto"]) is 3 1mmmul0m (class=arithmetic #args=3) a * b mod m (integers) + 1mmode0m + (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins. + Examples: + mode([3,3,4,4,4]) is 4 + mode([3,3,4,4]) is 3 + 1mmsub0m (class=arithmetic #args=3) a - b mod m (integers) @@ -2632,9 +2691,70 @@ MILLER(1) MILLER(1) nsec2localtime(1234567890123456789, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul" nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456" + 1mnull_count0m + (class=stats #args=1) Returns the number of values in an array or map which are empty-string (AKA void) or JSON null. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. + Example: + null_count(["a", "", "c"]) is 1 + 1mos0m (class=system #args=0) Returns the operating-system name as a string. + 1mpercentile0m + (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs. + Examples: + percentile([3,4,5,6,9,10], 90) is 10 + percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5 + percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi" + + 1mpercentiles0m + (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags. + Examples: + + Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort + the input before computing percentiles: + + percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 } + percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" } + + Use "output_array_not_map" (or shorthand "oa") to get the outputs as an array: + + percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9] + + Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces + ,error on string inputs: + + percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 } + + The percentiles function always sorts its inputs before computing percentiles. If you know your input + is already sorted -- see also the sort_collection function -- then computation will be faster on + large input if you pass in "array_is_sorted": + + x = [6,5,9,10,4,3] + percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect + x = sort_collection(x) + percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct + + You can also leverage this feature to compute percentiles on a sort of your choosing. For example: + + Non-sorted input: + x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ") + x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"] + Percentiles are taken over the original positions of the words in the array -- "dogs" is last + and hence appears as p99: + percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"] + With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99: + percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + With default sorting done outside percentiles, the same: + x = sort(x) # or x = sort_collection(x) + x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"] + percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"] + percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + Now sorting by word length, "loquaciously" is longest and hence is the p99: + x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } ) + x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"] + percentiles(x, [50, 99], {"oa":true, "ais":true}) + ["over", "loquaciously"] + 1mpow0m (class=arithmetic #args=2) Exponentiation. Same as **, but as a function. @@ -2731,6 +2851,11 @@ MILLER(1) MILLER(1) 1msinh0m (class=math #args=1) Hyperbolic sine. + 1mskewness0m + (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + skewness([4,5,9,10,11]) is -0.2097285 + 1msort0m (class=higher-order-functions #args=1-2) Given a map or array as first argument and string flags or function as optional second argument, returns a sorted copy of the input. With one argument, sorts array elements with numbers first numerically and then strings lexically, and map elements likewise by map keys. If the second argument is a string, it can contain any of "f" for lexical ("n" is for the above default), "c" for case-folded lexical, or "t" for natural sort order. An additional "r" in that string is for reverse. An additional "v" in that string means sort maps by value, rather than by key. If the second argument is a function, then for arrays it should take two arguments a and b, returning < 0, 0, or > 0 as a < b, a == b, or a > b respectively; for maps the function should take four arguments ak, av, bk, and bv, again returning < 0, 0, or > 0, using a and b's keys and values. Examples: @@ -2747,6 +2872,9 @@ MILLER(1) MILLER(1) Map without function: sort({"c":2,"a":3,"b":1}, "v") returns {"b":1,"c":2,"a":3}. Map without function: sort({"c":2,"a":3,"b":1}, "vnr") returns {"a":3,"c":2,"b":1}. + 1msort_collection0m + (class=stats #args=1) This is a helper function for the percentiles function; please see its online help for details. + 1msplita0m (class=conversion #args=2) Splits string into array with type inference. First argument is string to split; second is the separator to split on. Example: @@ -2785,6 +2913,11 @@ MILLER(1) MILLER(1) Example: ssub("abc.def", ".", "X") gives "abcXdef" + 1mstddev0m + (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + stddev([4,5,9,10,11]) is 3.1144823 + 1mstrfntime0m (class=time #args=2) Formats integer nanoseconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local. Examples: @@ -2872,6 +3005,26 @@ MILLER(1) MILLER(1) 1msubstr10m (class=string #args=3) substr1(s,m,n) gives substring of s from 1-up position m to n inclusive. Negative indices -len .. -1 alias to 1 .. len. See also substr and substr0. + 1msum0m + (class=stats #args=1) Returns the sum of values in an array or map. Returns error for non-array/non-map types. + Example: + sum([1,2,3,4,5]) is 15 + + 1msum20m + (class=stats #args=1) Returns the sum of squares of values in an array or map. Returns error for non-array/non-map types. + Example: + sum2([1,2,3,4,5]) is 55 + + 1msum30m + (class=stats #args=1) Returns the sum of cubes of values in an array or map. Returns error for non-array/non-map types. + Example: + sum3([1,2,3,4,5]) is 225 + + 1msum40m + (class=stats #args=1) Returns the sum of fourth powers of values in an array or map. Returns error for non-array/non-map types. + Example: + sum4([1,2,3,4,5]) is 979 + 1msysntime0m (class=time #args=0) Returns the system time in 64-bit nanoseconds since the epoch. @@ -2950,6 +3103,11 @@ MILLER(1) MILLER(1) $y = utf8_to_latin1($x) $* = utf8_to_latin1($*) + 1mvariance0m + (class=stats #args=1) Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + Example: + variance([4,5,9,10,11]) is 9.7 + 1mversion0m (class=system #args=0) Returns the Miller version as a string. @@ -3451,4 +3609,4 @@ MILLER(1) MILLER(1) - 2023-08-23 MILLER(1) + 2023-08-26 MILLER(1) diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 7bdb1d5bf..3d24f0984 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -74,6 +74,7 @@ is 2. Unary operators such as `!` and `~` show argument-count of 1; the ternary * [**Hashing functions**](#hashing-functions): [md5](#md5), [sha1](#sha1), [sha256](#sha256), [sha512](#sha512). * [**Higher-order-functions functions**](#higher-order-functions-functions): [any](#any), [apply](#apply), [every](#every), [fold](#fold), [reduce](#reduce), [select](#select), [sort](#sort). * [**Math functions**](#math-functions): [abs](#abs), [acos](#acos), [acosh](#acosh), [asin](#asin), [asinh](#asinh), [atan](#atan), [atan2](#atan2), [atanh](#atanh), [cbrt](#cbrt), [ceil](#ceil), [cos](#cos), [cosh](#cosh), [erf](#erf), [erfc](#erfc), [exp](#exp), [expm1](#expm1), [floor](#floor), [invqnorm](#invqnorm), [log](#log), [log10](#log10), [log1p](#log1p), [logifit](#logifit), [max](#max), [min](#min), [qnorm](#qnorm), [round](#round), [roundm](#roundm), [sgn](#sgn), [sin](#sin), [sinh](#sinh), [sqrt](#sqrt), [tan](#tan), [tanh](#tanh), [urand](#urand), [urand32](#urand32), [urandelement](#urandelement), [urandint](#urandint), [urandrange](#urandrange). +* [**Stats functions**](#stats-functions): [antimode](#antimode), [count](#count), [distinct_count](#distinct_count), [kurtosis](#kurtosis), [maxlen](#maxlen), [mean](#mean), [meaneb](#meaneb), [median](#median), [minlen](#minlen), [mode](#mode), [null_count](#null_count), [percentile](#percentile), [percentiles](#percentiles), [skewness](#skewness), [sort_collection](#sort_collection), [stddev](#stddev), [sum](#sum), [sum2](#sum2), [sum3](#sum3), [sum4](#sum4), [variance](#variance). * [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). * [**System functions**](#system-functions): [exec](#exec), [hostname](#hostname), [os](#os), [system](#system), [version](#version). * [**Time functions**](#time-functions): [dhms2fsec](#dhms2fsec), [dhms2sec](#dhms2sec), [fsec2dhms](#fsec2dhms), [fsec2hms](#fsec2hms), [gmt2localtime](#gmt2localtime), [gmt2nsec](#gmt2nsec), [gmt2sec](#gmt2sec), [hms2fsec](#hms2fsec), [hms2sec](#hms2sec), [localtime2gmt](#localtime2gmt), [localtime2nsec](#localtime2nsec), [localtime2sec](#localtime2sec), [nsec2gmt](#nsec2gmt), [nsec2gmtdate](#nsec2gmtdate), [nsec2localdate](#nsec2localdate), [nsec2localtime](#nsec2localtime), [sec2dhms](#sec2dhms), [sec2gmt](#sec2gmt), [sec2gmtdate](#sec2gmtdate), [sec2hms](#sec2hms), [sec2localdate](#sec2localdate), [sec2localtime](#sec2localtime), [strfntime](#strfntime), [strfntime_local](#strfntime_local), [strftime](#strftime), [strftime_local](#strftime_local), [strpntime](#strpntime), [strpntime_local](#strpntime_local), [strptime](#strptime), [strptime_local](#strptime_local), [sysntime](#sysntime), [systime](#systime), [systimeint](#systimeint), [upntime](#upntime), [uptime](#uptime). @@ -877,13 +878,13 @@ logifit (class=math #args=3) Given m and b from logistic regression, compute fi ### max
-max  (class=math #args=variadic) Max of n numbers; null loses.
+max  (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
 
### min
-min  (class=math #args=variadic) Min of n numbers; null loses.
+min  (class=math #args=variadic) Min of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
 
@@ -972,6 +973,227 @@ urandint (class=math #args=2) Integer uniformly distributed between inclusive i urandrange (class=math #args=2) Floating-point numbers uniformly distributed on the interval [a, b). +## Stats functions + + +### antimode +
+antimode  (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
+Examples:
+antimode([3,3,4,4,4]) is 3
+antimode([3,3,4,4]) is 3
+
+ + +### count +
+count  (class=stats #args=1) Returns the length of an array or map. Returns error for non-array/non-map types.
+Examples:
+count([7,8,9]) is 3
+count({"a":7,"b":8,"c":9}) is 3
+
+ + +### distinct_count +
+distinct_count  (class=stats #args=1) Returns the number of disinct values in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct.
+Examples:
+distinct_count([7,8,9,7])  is 3
+distinct_count([1,"1"]) is 1
+distinct_count([1,1.0]) is 2
+
+ + +### kurtosis +
+kurtosis  (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+kurtosis([4,5,9,10,11]) is -1.6703688
+
+ + +### maxlen +
+maxlen  (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+maxlen(["aรฑo", "alto"]) is 4
+
+ + +### mean +
+mean  (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+Example:
+mean([4,5,7,10]) is 6.5
+
+ + +### meaneb +
+meaneb  (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+Example:
+meaneb([4,5,7,10]) is 1.3228756
+
+ + +### median +
+median  (class=stats #args=1,2) Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+Examples:
+median([3,4,5,6,9,10]) is 6
+median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5
+median(["abc", "def", "ghi", "ghi"]) is "ghi"
+
+ + +### minlen +
+minlen  (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+minlen(["aรฑo", "alto"]) is 3
+
+ + +### mode +
+mode  (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
+Examples:
+mode([3,3,4,4,4]) is 4
+mode([3,3,4,4]) is 3
+
+ + +### null_count +
+null_count  (class=stats #args=1) Returns the number of values in an array or map which are empty-string (AKA void) or JSON null. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct.
+Example:
+null_count(["a", "", "c"]) is 1
+
+ + +### percentile +
+percentile  (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+Examples:
+percentile([3,4,5,6,9,10], 90) is 10
+percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5
+percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi"
+
+ + +### percentiles +
+percentiles  (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags.
+Examples:
+
+Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort
+the input before computing percentiles:
+
+  percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 }
+  percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" }
+
+Use "output_array_not_map" (or shorthand "oa") to get the outputs as an array:
+
+  percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9]
+
+Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces
+,error on string inputs:
+
+  percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 }
+
+The percentiles function always sorts its inputs before computing percentiles. If you know your input
+is already sorted -- see also the sort_collection function -- then computation will be faster on
+large input if you pass in "array_is_sorted":
+
+  x = [6,5,9,10,4,3]
+  percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect
+  x = sort_collection(x)
+  percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct
+
+You can also leverage this feature to compute percentiles on a sort of your choosing. For example:
+
+  Non-sorted input:
+    x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ")
+    x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"]
+  Percentiles are taken over the original positions of the words in the array -- "dogs" is last
+  and hence appears as p99:
+    percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"]
+  With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99:
+    percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]
+  With default sorting done outside percentiles, the same:
+    x = sort(x) # or x = sort_collection(x)
+    x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"]
+    percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"]
+    percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]
+  Now sorting by word length, "loquaciously" is longest and hence is the p99:
+    x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } )
+    x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"]
+    percentiles(x, [50, 99], {"oa":true, "ais":true})
+    ["over", "loquaciously"]
+
+ + +### skewness +
+skewness  (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+skewness([4,5,9,10,11]) is -0.2097285
+
+ + +### sort_collection +
+sort_collection  (class=stats #args=1) This is a helper function for the percentiles function; please see its online help for details.
+
+ + +### stddev +
+stddev  (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+stddev([4,5,9,10,11]) is 3.1144823
+
+ + +### sum +
+sum  (class=stats #args=1) Returns the sum of values in an array or map. Returns error for non-array/non-map types.
+Example:
+sum([1,2,3,4,5]) is 15
+
+ + +### sum2 +
+sum2  (class=stats #args=1) Returns the sum of squares of values in an array or map. Returns error for non-array/non-map types.
+Example:
+sum2([1,2,3,4,5]) is 55
+
+ + +### sum3 +
+sum3  (class=stats #args=1) Returns the sum of cubes of values in an array or map. Returns error for non-array/non-map types.
+Example:
+sum3([1,2,3,4,5]) is 225
+
+ + +### sum4 +
+sum4  (class=stats #args=1) Returns the sum of fourth powers of values in an array or map. Returns error for non-array/non-map types.
+Example:
+sum4([1,2,3,4,5]) is 979
+
+ + +### variance +
+variance  (class=stats #args=1) Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+variance([4,5,9,10,11]) is 9.7
+
+ ## String functions @@ -1765,3 +1987,4 @@ is_string (class=typing #args=1) True if field is present with string (includin typeof (class=typing #args=1) Convert argument to type of argument (e.g. "str"). For debug. +/System/Library/Frameworks/Ruby.framework/Versions/2.6/usr/lib/ruby/2.6.0/universal-darwin22/rbconfig.rb:21: warning: Insecure world writable dir /usr/local/bin in PATH, mode 040777 diff --git a/docs/src/reference-dsl-syntax.md b/docs/src/reference-dsl-syntax.md index f2a8b45cb..cf1b4bc78 100644 --- a/docs/src/reference-dsl-syntax.md +++ b/docs/src/reference-dsl-syntax.md @@ -35,6 +35,7 @@ i j k 7 8 15 8 9 17 9 10 19 +/System/Library/Frameworks/Ruby.framework/Versions/2.6/usr/lib/ruby/2.6.0/universal-darwin22/rbconfig.rb:21: warning: Insecure world writable dir /usr/local/bin in PATH, mode 040777 Newlines within the expression are ignored, which can help increase legibility of complex expressions: diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index 8e2daf9d0..f688bdd82 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -495,3 +495,4 @@ Notes about all other separators: * `--repifs`: Let IFS be repeated: e.g. for splitting on multiple spaces. * `--rs {string}`: Specify RS for input and output. +/System/Library/Frameworks/Ruby.framework/Versions/2.6/usr/lib/ruby/2.6.0/universal-darwin22/rbconfig.rb:21: warning: Insecure world writable dir /usr/local/bin in PATH, mode 040777 diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 2b7e9501f..c94e184c5 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -3406,14 +3406,14 @@ fields, optionally categorized by one or more fields. data/medium
-x_y_cov    0.000042574820827444476
-x_y_corr   0.0005042001844467462
-y_y_cov    0.08461122467974003
+x_y_cov    0.00004257482082749404
+x_y_corr   0.0005042001844473328
+y_y_cov    0.08461122467974005
 y_y_corr   1
-x2_xy_cov  0.04188382281779374
-x2_xy_corr 0.630174342037994
-x2_y2_cov  -0.00030953725962542085
-x2_y2_corr -0.0034249088761121966
+x2_xy_cov  0.041883822817793716
+x2_xy_corr 0.6301743420379936
+x2_y2_cov  -0.0003095372596253918
+x2_y2_corr -0.003424908876111875
 
@@ -3422,12 +3422,12 @@ x2_y2_corr -0.0034249088761121966
   data/medium
 
-a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
-eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
-wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
-zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
-hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
+a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
+eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
+wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
+zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
+hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
 
Here's an example simple line-fit. The `x` and `y` @@ -3513,11 +3513,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.03009744795354 -upsec_count_pca_b 979.9883413064914 +upsec_count_pca_m -39.030097447953594 +upsec_count_pca_b 979.9883413064917 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.10852919630297 +donesec 25.108529196302943 ## step @@ -3794,9 +3794,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 -var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 -skewness - - 0 -0.0006899591185521965 -0.017849760120133784 +stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 +var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 +skewness - - 0 -0.0006899591185517494 -0.01784976012013298 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index 146f3a81e..e475aebf3 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144456 -x_stddev 0.2902925151144007 +x_var 0.08426974433144457 +x_stddev 0.29029251511440074
diff --git a/internal/pkg/bifs/arithmetic.go b/internal/pkg/bifs/arithmetic.go
index 45fc41390..86f6d1e7f 100644
--- a/internal/pkg/bifs/arithmetic.go
+++ b/internal/pkg/bifs/arithmetic.go
@@ -3,6 +3,7 @@ package bifs
 import (
 	"math"
 
+	"github.com/johnkerl/miller/internal/pkg/lib"
 	"github.com/johnkerl/miller/internal/pkg/mlrval"
 )
 
@@ -793,7 +794,7 @@ func min_s_ss(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
 }
 
 var min_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{
-	//       .  INT       FLOAT     BOOL      VOID   STRING    ARRAY  MAP    FUNC    ERROR   NULL   ABSENT
+	//       .  INT        FLOAT     BOOL      VOID   STRING    ARRAY  MAP    FUNC   ERROR  NULL   ABSENT
 	/*INT    */ {min_i_ii, min_f_if, _1___, _1___, _1___, _absn, _absn, _erro, _erro, _1___, _1___},
 	/*FLOAT  */ {min_f_fi, min_f_ff, _1___, _1___, _1___, _absn, _absn, _erro, _erro, _1___, _1___},
 	/*BOOL   */ {_2___, _2___, min_b_bb, _1___, _1___, _absn, _absn, _erro, _erro, _1___, _1___},
@@ -807,6 +808,8 @@ var min_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{
 	/*ABSENT */ {_2___, _2___, _2___, _2___, _2___, _absn, _absn, _erro, _erro, _null, _absn},
 }
 
+// BIF_min_binary is not a direct DSL function. It's a helper here,
+// and is also exposed publicly for use by the stats1 verb.
 func BIF_min_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
 	return (min_dispositions[input1.Type()][input2.Type()])(input1, input2)
 }
@@ -814,15 +817,91 @@ func BIF_min_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
 func BIF_min_variadic(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval {
 	if len(mlrvals) == 0 {
 		return mlrval.VOID
-	} else {
-		retval := mlrvals[0]
-		for i := range mlrvals {
-			if i > 0 {
-				retval = BIF_min_binary(retval, mlrvals[i])
-			}
-		}
-		return retval
 	}
+	return mlrval.ArrayFold(
+		mlrvals,
+		bif_min_unary(mlrvals[0]),
+		func(a, b *mlrval.Mlrval) *mlrval.Mlrval {
+			return BIF_min_binary(bif_min_unary(a), bif_min_unary(b))
+		},
+	)
+}
+
+func BIF_min_within_map_values(m *mlrval.Mlrmap) *mlrval.Mlrval {
+	if m.Head == nil {
+		return mlrval.VOID
+	}
+	return mlrval.MapFold(
+		m,
+		m.Head.Value,
+		func(a, b *mlrval.Mlrval) *mlrval.Mlrval {
+			return BIF_min_binary(a, b)
+		},
+	)
+}
+
+// bif_min_unary allows recursion into arguments, so users can do either
+// min(1,2,3) or min([1,2,3]).
+func bif_min_unary_array(input1 *mlrval.Mlrval) *mlrval.Mlrval {
+	return BIF_min_variadic(input1.AcquireArrayValue())
+}
+func bif_min_unary_map(input1 *mlrval.Mlrval) *mlrval.Mlrval {
+	return BIF_min_within_map_values(input1.AcquireMapValue())
+}
+
+// We get a Golang "initialization loop" due to recursive depth computation
+// if this is defined statically. So, we use a "package init" function.
+var min_unary_dispositions = [mlrval.MT_DIM]UnaryFunc{}
+
+func init() {
+	min_unary_dispositions = [mlrval.MT_DIM]UnaryFunc{
+		/*INT    */ _1u___,
+		/*FLOAT  */ _1u___,
+		/*BOOL   */ _1u___,
+		/*VOID   */ _1u___,
+		/*STRING */ _1u___,
+		/*ARRAY  */ bif_min_unary_array,
+		/*MAP    */ bif_min_unary_map,
+		/*FUNC   */ _erro1,
+		/*ERROR  */ _erro1,
+		/*NULL   */ _null1,
+		/*ABSENT */ _absn1,
+	}
+}
+
+func bif_min_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval {
+	return min_unary_dispositions[input1.Type()](input1)
+}
+
+// ----------------------------------------------------------------
+func BIF_minlen_variadic(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval {
+	if len(mlrvals) == 0 {
+		return mlrval.VOID
+	}
+	// Do the bulk arithmetic on native ints not Mlrvals, to avoid unnecessary allocation.
+	retval := lib.UTF8Strlen(mlrvals[0].OriginalString())
+	for i, _ := range mlrvals {
+		clen := lib.UTF8Strlen(mlrvals[i].OriginalString())
+		if clen < retval {
+			retval = clen
+		}
+	}
+	return mlrval.FromInt(retval)
+}
+
+func BIF_minlen_within_map_values(m *mlrval.Mlrmap) *mlrval.Mlrval {
+	if m.Head == nil {
+		return mlrval.VOID
+	}
+	// Do the bulk arithmetic on native ints not Mlrvals, to avoid unnecessary allocation.
+	retval := lib.UTF8Strlen(m.Head.Value.OriginalString())
+	for pe := m.Head.Next; pe != nil; pe = pe.Next {
+		clen := lib.UTF8Strlen(pe.Value.OriginalString())
+		if clen < retval {
+			retval = clen
+		}
+	}
+	return mlrval.FromInt(retval)
 }
 
 // ----------------------------------------------------------------
@@ -891,6 +970,8 @@ var max_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{
 	/*ABSENT */ {_2___, _2___, _2___, _2___, _2___, _absn, _absn, _erro, _erro, _absn, _absn},
 }
 
+// BIF_max_binary is not a direct DSL function. It's a helper here,
+// and is also exposed publicly for use by the stats1 verb.
 func BIF_max_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
 	return (max_dispositions[input1.Type()][input2.Type()])(input1, input2)
 }
@@ -898,13 +979,89 @@ func BIF_max_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
 func BIF_max_variadic(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval {
 	if len(mlrvals) == 0 {
 		return mlrval.VOID
-	} else {
-		retval := mlrvals[0]
-		for i := range mlrvals {
-			if i > 0 {
-				retval = BIF_max_binary(retval, mlrvals[i])
-			}
-		}
-		return retval
+	}
+	return mlrval.ArrayFold(
+		mlrvals,
+		bif_max_unary(mlrvals[0]),
+		func(a, b *mlrval.Mlrval) *mlrval.Mlrval {
+			return BIF_max_binary(bif_max_unary(a), bif_max_unary(b))
+		},
+	)
+}
+
+func BIF_max_within_map_values(m *mlrval.Mlrmap) *mlrval.Mlrval {
+	if m.Head == nil {
+		return mlrval.VOID
+	}
+	return mlrval.MapFold(
+		m,
+		m.Head.Value,
+		func(a, b *mlrval.Mlrval) *mlrval.Mlrval {
+			return BIF_max_binary(a, b)
+		},
+	)
+}
+
+// bif_max_unary allows recursion into arguments, so users can do either
+// max(1,2,3) or max([1,2,3]).
+func bif_max_unary_array(input1 *mlrval.Mlrval) *mlrval.Mlrval {
+	return BIF_max_variadic(input1.AcquireArrayValue())
+}
+func bif_max_unary_map(input1 *mlrval.Mlrval) *mlrval.Mlrval {
+	return BIF_max_within_map_values(input1.AcquireMapValue())
+}
+
+// We get a Golang "initialization loop" due to recursive depth computation
+// if this is defined statically. So, we use a "package init" function.
+var max_unary_dispositions = [mlrval.MT_DIM]UnaryFunc{}
+
+func init() {
+	max_unary_dispositions = [mlrval.MT_DIM]UnaryFunc{
+		/*INT    */ _1u___,
+		/*FLOAT  */ _1u___,
+		/*BOOL   */ _1u___,
+		/*VOID   */ _1u___,
+		/*STRING */ _1u___,
+		/*ARRAY  */ bif_max_unary_array,
+		/*MAP    */ bif_max_unary_map,
+		/*FUNC   */ _erro1,
+		/*ERROR  */ _erro1,
+		/*NULL   */ _null1,
+		/*ABSENT */ _absn1,
 	}
 }
+
+func bif_max_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval {
+	return max_unary_dispositions[input1.Type()](input1)
+}
+
+// ----------------------------------------------------------------
+func BIF_maxlen_variadic(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval {
+	if len(mlrvals) == 0 {
+		return mlrval.VOID
+	}
+	// Do the bulk arithmetic on native ints not Mlrvals, to avoid unnecessary allocation.
+	retval := lib.UTF8Strlen(mlrvals[0].OriginalString())
+	for i, _ := range mlrvals {
+		clen := lib.UTF8Strlen(mlrvals[i].OriginalString())
+		if clen > retval {
+			retval = clen
+		}
+	}
+	return mlrval.FromInt(retval)
+}
+
+func BIF_maxlen_within_map_values(m *mlrval.Mlrmap) *mlrval.Mlrval {
+	if m.Head == nil {
+		return mlrval.VOID
+	}
+	// Do the bulk arithmetic on native ints not Mlrvals, to avoid unnecessary allocation.
+	retval := lib.UTF8Strlen(m.Head.Value.OriginalString())
+	for pe := m.Head.Next; pe != nil; pe = pe.Next {
+		clen := lib.UTF8Strlen(pe.Value.OriginalString())
+		if clen > retval {
+			retval = clen
+		}
+	}
+	return mlrval.FromInt(retval)
+}
diff --git a/internal/pkg/bifs/percentiles.go b/internal/pkg/bifs/percentiles.go
new file mode 100644
index 000000000..087e7f200
--- /dev/null
+++ b/internal/pkg/bifs/percentiles.go
@@ -0,0 +1,217 @@
+package bifs
+
+import (
+	"math"
+
+	"github.com/johnkerl/miller/internal/pkg/mlrval"
+)
+
+func GetPercentileLinearlyInterpolated(
+	array []*mlrval.Mlrval,
+	n int,
+	p float64,
+) *mlrval.Mlrval {
+	findex := (p / 100.0) * (float64(n) - 1)
+	if findex < 0.0 {
+		findex = 0.0
+	}
+	iindex := int(math.Floor(findex))
+	if iindex >= n-1 {
+		return array[iindex].Copy()
+	} else {
+		// TODO: just do this in float64:
+		// array[iindex] + frac * (array[iindex+1] - array[iindex])
+		frac := mlrval.FromFloat(findex - float64(iindex))
+		diff := BIF_minus_binary(array[iindex+1], array[iindex])
+		prod := BIF_times(frac, diff)
+		return BIF_plus_binary(array[iindex], prod)
+	}
+}
+
+// ================================================================
+// Non-interpolated percentiles (see also https://en.wikipedia.org/wiki/Percentile)
+
+// ----------------------------------------------------------------
+// OPTION 1: int index = p*n/100.0;
+//
+// x
+// 0
+// 20
+// 40
+// 60
+// 80
+// 100
+//
+// x_p00 0 x_p10  0 x_p20 20 x_p30 20 x_p40 40 x_p50 60 x_p60 60 x_p70 80 x_p80  80 x_p90 100 x_p100 100
+// x_p01 0 x_p11  0 x_p21 20 x_p31 20 x_p41 40 x_p51 60 x_p61 60 x_p71 80 x_p81  80 x_p91 100
+// x_p02 0 x_p12  0 x_p22 20 x_p32 20 x_p42 40 x_p52 60 x_p62 60 x_p72 80 x_p82  80 x_p92 100
+// x_p03 0 x_p13  0 x_p23 20 x_p33 20 x_p43 40 x_p53 60 x_p63 60 x_p73 80 x_p83  80 x_p93 100
+// x_p04 0 x_p14  0 x_p24 20 x_p34 40 x_p44 40 x_p54 60 x_p64 60 x_p74 80 x_p84 100 x_p94 100
+// x_p05 0 x_p15  0 x_p25 20 x_p35 40 x_p45 40 x_p55 60 x_p65 60 x_p75 80 x_p85 100 x_p95 100
+// x_p06 0 x_p16  0 x_p26 20 x_p36 40 x_p46 40 x_p56 60 x_p66 60 x_p76 80 x_p86 100 x_p96 100
+// x_p07 0 x_p17 20 x_p27 20 x_p37 40 x_p47 40 x_p57 60 x_p67 80 x_p77 80 x_p87 100 x_p97 100
+// x_p08 0 x_p18 20 x_p28 20 x_p38 40 x_p48 40 x_p58 60 x_p68 80 x_p78 80 x_p88 100 x_p98 100
+// x_p09 0 x_p19 20 x_p29 20 x_p39 40 x_p49 40 x_p59 60 x_p69 80 x_p79 80 x_p89 100 x_p99 100
+//
+// x
+// 0
+// 25
+// 50
+// 75
+// 100
+//
+// x_p00 0 x_p10 0 x_p20 25 x_p30 25 x_p40 50 x_p50 50 x_p60 75 x_p70 75 x_p80 100 x_p90 100 x_p100 100
+// x_p01 0 x_p11 0 x_p21 25 x_p31 25 x_p41 50 x_p51 50 x_p61 75 x_p71 75 x_p81 100 x_p91 100
+// x_p02 0 x_p12 0 x_p22 25 x_p32 25 x_p42 50 x_p52 50 x_p62 75 x_p72 75 x_p82 100 x_p92 100
+// x_p03 0 x_p13 0 x_p23 25 x_p33 25 x_p43 50 x_p53 50 x_p63 75 x_p73 75 x_p83 100 x_p93 100
+// x_p04 0 x_p14 0 x_p24 25 x_p34 25 x_p44 50 x_p54 50 x_p64 75 x_p74 75 x_p84 100 x_p94 100
+// x_p05 0 x_p15 0 x_p25 25 x_p35 25 x_p45 50 x_p55 50 x_p65 75 x_p75 75 x_p85 100 x_p95 100
+// x_p06 0 x_p16 0 x_p26 25 x_p36 25 x_p46 50 x_p56 50 x_p66 75 x_p76 75 x_p86 100 x_p96 100
+// x_p07 0 x_p17 0 x_p27 25 x_p37 25 x_p47 50 x_p57 50 x_p67 75 x_p77 75 x_p87 100 x_p97 100
+// x_p08 0 x_p18 0 x_p28 25 x_p38 25 x_p48 50 x_p58 50 x_p68 75 x_p78 75 x_p88 100 x_p98 100
+// x_p09 0 x_p19 0 x_p29 25 x_p39 25 x_p49 50 x_p59 50 x_p69 75 x_p79 75 x_p89 100 x_p99 100
+//
+// ----------------------------------------------------------------
+// OPTION 2: int index = p*(n-1)/100.0;
+//
+// x
+// 0
+// 20
+// 40
+// 60
+// 80
+// 100
+//
+// x_p00 0 x_p10 0 x_p20 20 x_p30 20 x_p40 40 x_p50 40 x_p60 60 x_p70 60 x_p80 80 x_p90 80 x_p100 100
+// x_p01 0 x_p11 0 x_p21 20 x_p31 20 x_p41 40 x_p51 40 x_p61 60 x_p71 60 x_p81 80 x_p91 80
+// x_p02 0 x_p12 0 x_p22 20 x_p32 20 x_p42 40 x_p52 40 x_p62 60 x_p72 60 x_p82 80 x_p92 80
+// x_p03 0 x_p13 0 x_p23 20 x_p33 20 x_p43 40 x_p53 40 x_p63 60 x_p73 60 x_p83 80 x_p93 80
+// x_p04 0 x_p14 0 x_p24 20 x_p34 20 x_p44 40 x_p54 40 x_p64 60 x_p74 60 x_p84 80 x_p94 80
+// x_p05 0 x_p15 0 x_p25 20 x_p35 20 x_p45 40 x_p55 40 x_p65 60 x_p75 60 x_p85 80 x_p95 80
+// x_p06 0 x_p16 0 x_p26 20 x_p36 20 x_p46 40 x_p56 40 x_p66 60 x_p76 60 x_p86 80 x_p96 80
+// x_p07 0 x_p17 0 x_p27 20 x_p37 20 x_p47 40 x_p57 40 x_p67 60 x_p77 60 x_p87 80 x_p97 80
+// x_p08 0 x_p18 0 x_p28 20 x_p38 20 x_p48 40 x_p58 40 x_p68 60 x_p78 60 x_p88 80 x_p98 80
+// x_p09 0 x_p19 0 x_p29 20 x_p39 20 x_p49 40 x_p59 40 x_p69 60 x_p79 60 x_p89 80 x_p99 80
+//
+// x
+// 0
+// 25
+// 50
+// 75
+// 100
+//
+// x_p00 0 x_p10 0 x_p20  0 x_p30 25 x_p40 25 x_p50 50 x_p60 50 x_p70 50 x_p80 75 x_p90 75 x_p100 100
+// x_p01 0 x_p11 0 x_p21  0 x_p31 25 x_p41 25 x_p51 50 x_p61 50 x_p71 50 x_p81 75 x_p91 75
+// x_p02 0 x_p12 0 x_p22  0 x_p32 25 x_p42 25 x_p52 50 x_p62 50 x_p72 50 x_p82 75 x_p92 75
+// x_p03 0 x_p13 0 x_p23  0 x_p33 25 x_p43 25 x_p53 50 x_p63 50 x_p73 50 x_p83 75 x_p93 75
+// x_p04 0 x_p14 0 x_p24  0 x_p34 25 x_p44 25 x_p54 50 x_p64 50 x_p74 50 x_p84 75 x_p94 75
+// x_p05 0 x_p15 0 x_p25 25 x_p35 25 x_p45 25 x_p55 50 x_p65 50 x_p75 75 x_p85 75 x_p95 75
+// x_p06 0 x_p16 0 x_p26 25 x_p36 25 x_p46 25 x_p56 50 x_p66 50 x_p76 75 x_p86 75 x_p96 75
+// x_p07 0 x_p17 0 x_p27 25 x_p37 25 x_p47 25 x_p57 50 x_p67 50 x_p77 75 x_p87 75 x_p97 75
+// x_p08 0 x_p18 0 x_p28 25 x_p38 25 x_p48 25 x_p58 50 x_p68 50 x_p78 75 x_p88 75 x_p98 75
+// x_p09 0 x_p19 0 x_p29 25 x_p39 25 x_p49 25 x_p59 50 x_p69 50 x_p79 75 x_p89 75 x_p99 75
+//
+// ----------------------------------------------------------------
+// OPTION 3: int index = (int)ceil(p*(n-1)/100.0);
+//
+// x
+// 0
+// 20
+// 40
+// 60
+// 80
+// 100
+//
+// x_p00  0 x_p10 20 x_p20 20 x_p30 40 x_p40 40 x_p50 60 x_p60 60 x_p70 80 x_p80  80 x_p90 100 x_p100 100
+// x_p01 20 x_p11 20 x_p21 40 x_p31 40 x_p41 60 x_p51 60 x_p61 80 x_p71 80 x_p81 100 x_p91 100
+// x_p02 20 x_p12 20 x_p22 40 x_p32 40 x_p42 60 x_p52 60 x_p62 80 x_p72 80 x_p82 100 x_p92 100
+// x_p03 20 x_p13 20 x_p23 40 x_p33 40 x_p43 60 x_p53 60 x_p63 80 x_p73 80 x_p83 100 x_p93 100
+// x_p04 20 x_p14 20 x_p24 40 x_p34 40 x_p44 60 x_p54 60 x_p64 80 x_p74 80 x_p84 100 x_p94 100
+// x_p05 20 x_p15 20 x_p25 40 x_p35 40 x_p45 60 x_p55 60 x_p65 80 x_p75 80 x_p85 100 x_p95 100
+// x_p06 20 x_p16 20 x_p26 40 x_p36 40 x_p46 60 x_p56 60 x_p66 80 x_p76 80 x_p86 100 x_p96 100
+// x_p07 20 x_p17 20 x_p27 40 x_p37 40 x_p47 60 x_p57 60 x_p67 80 x_p77 80 x_p87 100 x_p97 100
+// x_p08 20 x_p18 20 x_p28 40 x_p38 40 x_p48 60 x_p58 60 x_p68 80 x_p78 80 x_p88 100 x_p98 100
+// x_p09 20 x_p19 20 x_p29 40 x_p39 40 x_p49 60 x_p59 60 x_p69 80 x_p79 80 x_p89 100 x_p99 100
+//
+// x
+// 0
+// 25
+// 50
+// 75
+// 100
+//
+// x_p00  0 x_p10 25 x_p20 25 x_p30 50 x_p40 50 x_p50 50 x_p60 75 x_p70  75 x_p80 100 x_p90 100 x_p100 100
+// x_p01 25 x_p11 25 x_p21 25 x_p31 50 x_p41 50 x_p51 75 x_p61 75 x_p71  75 x_p81 100 x_p91 100
+// x_p02 25 x_p12 25 x_p22 25 x_p32 50 x_p42 50 x_p52 75 x_p62 75 x_p72  75 x_p82 100 x_p92 100
+// x_p03 25 x_p13 25 x_p23 25 x_p33 50 x_p43 50 x_p53 75 x_p63 75 x_p73  75 x_p83 100 x_p93 100
+// x_p04 25 x_p14 25 x_p24 25 x_p34 50 x_p44 50 x_p54 75 x_p64 75 x_p74  75 x_p84 100 x_p94 100
+// x_p05 25 x_p15 25 x_p25 25 x_p35 50 x_p45 50 x_p55 75 x_p65 75 x_p75  75 x_p85 100 x_p95 100
+// x_p06 25 x_p16 25 x_p26 50 x_p36 50 x_p46 50 x_p56 75 x_p66 75 x_p76 100 x_p86 100 x_p96 100
+// x_p07 25 x_p17 25 x_p27 50 x_p37 50 x_p47 50 x_p57 75 x_p67 75 x_p77 100 x_p87 100 x_p97 100
+// x_p08 25 x_p18 25 x_p28 50 x_p38 50 x_p48 50 x_p58 75 x_p68 75 x_p78 100 x_p88 100 x_p98 100
+// x_p09 25 x_p19 25 x_p29 50 x_p39 50 x_p49 50 x_p59 75 x_p69 75 x_p79 100 x_p89 100 x_p99 100
+//
+// ----------------------------------------------------------------
+// OPTION 4: int index = (int)ceil(-0.5 + p*(n-1)/100.0);
+//
+// x
+// 0
+// 20
+// 40
+// 60
+// 80
+// 100
+//
+// x_p00 0 x_p10  0 x_p20 20 x_p30 20 x_p40 40 x_p50 40 x_p60 60 x_p70 60 x_p80 80 x_p90  80 x_p100 100
+// x_p01 0 x_p11 20 x_p21 20 x_p31 40 x_p41 40 x_p51 60 x_p61 60 x_p71 80 x_p81 80 x_p91 100
+// x_p02 0 x_p12 20 x_p22 20 x_p32 40 x_p42 40 x_p52 60 x_p62 60 x_p72 80 x_p82 80 x_p92 100
+// x_p03 0 x_p13 20 x_p23 20 x_p33 40 x_p43 40 x_p53 60 x_p63 60 x_p73 80 x_p83 80 x_p93 100
+// x_p04 0 x_p14 20 x_p24 20 x_p34 40 x_p44 40 x_p54 60 x_p64 60 x_p74 80 x_p84 80 x_p94 100
+// x_p05 0 x_p15 20 x_p25 20 x_p35 40 x_p45 40 x_p55 60 x_p65 60 x_p75 80 x_p85 80 x_p95 100
+// x_p06 0 x_p16 20 x_p26 20 x_p36 40 x_p46 40 x_p56 60 x_p66 60 x_p76 80 x_p86 80 x_p96 100
+// x_p07 0 x_p17 20 x_p27 20 x_p37 40 x_p47 40 x_p57 60 x_p67 60 x_p77 80 x_p87 80 x_p97 100
+// x_p08 0 x_p18 20 x_p28 20 x_p38 40 x_p48 40 x_p58 60 x_p68 60 x_p78 80 x_p88 80 x_p98 100
+// x_p09 0 x_p19 20 x_p29 20 x_p39 40 x_p49 40 x_p59 60 x_p69 60 x_p79 80 x_p89 80 x_p99 100
+//
+// x
+// 0
+// 25
+// 50
+// 75
+// 100
+//
+// x_p00 0 x_p10  0 x_p20 25 x_p30 25 x_p40 50 x_p50 50 x_p60 50 x_p70 75 x_p80  75 x_p90 100 x_p100 100
+// x_p01 0 x_p11  0 x_p21 25 x_p31 25 x_p41 50 x_p51 50 x_p61 50 x_p71 75 x_p81  75 x_p91 100
+// x_p02 0 x_p12  0 x_p22 25 x_p32 25 x_p42 50 x_p52 50 x_p62 50 x_p72 75 x_p82  75 x_p92 100
+// x_p03 0 x_p13 25 x_p23 25 x_p33 25 x_p43 50 x_p53 50 x_p63 75 x_p73 75 x_p83  75 x_p93 100
+// x_p04 0 x_p14 25 x_p24 25 x_p34 25 x_p44 50 x_p54 50 x_p64 75 x_p74 75 x_p84  75 x_p94 100
+// x_p05 0 x_p15 25 x_p25 25 x_p35 25 x_p45 50 x_p55 50 x_p65 75 x_p75 75 x_p85  75 x_p95 100
+// x_p06 0 x_p16 25 x_p26 25 x_p36 25 x_p46 50 x_p56 50 x_p66 75 x_p76 75 x_p86  75 x_p96 100
+// x_p07 0 x_p17 25 x_p27 25 x_p37 25 x_p47 50 x_p57 50 x_p67 75 x_p77 75 x_p87  75 x_p97 100
+// x_p08 0 x_p18 25 x_p28 25 x_p38 50 x_p48 50 x_p58 50 x_p68 75 x_p78 75 x_p88 100 x_p98 100
+// x_p09 0 x_p19 25 x_p29 25 x_p39 50 x_p49 50 x_p59 50 x_p69 75 x_p79 75 x_p89 100 x_p99 100
+//
+// ----------------------------------------------------------------
+// CONCLUSION:
+// * I like option 2 for its simplicity ...
+// * ... but option 1 matches R's quantile with type=1.
+// * (Note that Miller's interpolated percentiles match match R's quantile with type=7)
+// ----------------------------------------------------------------
+
+func GetPercentileNonInterpolated(
+	array []*mlrval.Mlrval,
+	n int,
+	p float64,
+) *mlrval.Mlrval {
+	index := int(p * float64(n) / 100.0)
+	//index := p * (float64(float64(n)) - 1) / 100.0
+	//index := int(ceil(p * (float64(n) - 1) / 100.0))
+	//index := int(ceil(-0.5 + p*(float64(n)-1)/100.0))
+	if index >= n {
+		index = n - 1
+	}
+	if index < 0 {
+		index = 0
+	}
+	return array[index].Copy()
+}
diff --git a/internal/pkg/bifs/stats.go b/internal/pkg/bifs/stats.go
index efcabec76..99e1e0ccd 100644
--- a/internal/pkg/bifs/stats.go
+++ b/internal/pkg/bifs/stats.go
@@ -2,6 +2,7 @@ package bifs
 
 import (
 	"math"
+	"sort"
 
 	"github.com/johnkerl/miller/internal/pkg/lib"
 	"github.com/johnkerl/miller/internal/pkg/mlrval"
@@ -24,7 +25,7 @@ import (
 //	output = [m, b, math.sqrt(var_m), math.sqrt(var_b)]
 
 // ----------------------------------------------------------------
-func BIF_get_var(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
+func BIF_finalize_variance(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
 	n, isInt := mn.GetIntValue()
 	lib.InternalCodingErrorIf(!isInt)
 	sum, isNumber := msum.GetNumericToFloatValue()
@@ -46,8 +47,8 @@ func BIF_get_var(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
 }
 
 // ----------------------------------------------------------------
-func BIF_get_stddev(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
-	mvar := BIF_get_var(mn, msum, msum2)
+func BIF_finalize_stddev(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
+	mvar := BIF_finalize_variance(mn, msum, msum2)
 	if mvar.IsVoid() {
 		return mvar
 	}
@@ -55,8 +56,8 @@ func BIF_get_stddev(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
 }
 
 // ----------------------------------------------------------------
-func BIF_get_mean_EB(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
-	mvar := BIF_get_var(mn, msum, msum2)
+func BIF_finalize_mean_eb(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
+	mvar := BIF_finalize_variance(mn, msum, msum2)
 	if mvar.IsVoid() {
 		return mvar
 	}
@@ -87,7 +88,7 @@ func BIF_get_mean_EB(mn, msum, msum2 *mlrval.Mlrval) *mlrval.Mlrval {
 //   = sumx2 - n mean^2
 
 // ----------------------------------------------------------------
-func BIF_get_skewness(mn, msum, msum2, msum3 *mlrval.Mlrval) *mlrval.Mlrval {
+func BIF_finalize_skewness(mn, msum, msum2, msum3 *mlrval.Mlrval) *mlrval.Mlrval {
 	n, isInt := mn.GetIntValue()
 	lib.InternalCodingErrorIf(!isInt)
 	if n < 2 {
@@ -124,7 +125,7 @@ func BIF_get_skewness(mn, msum, msum2, msum3 *mlrval.Mlrval) *mlrval.Mlrval {
 //   = sumx4 - mean*(4 sumx3 - mean*(6 sumx2 - 3 n mean^2))
 
 // ----------------------------------------------------------------
-func BIF_get_kurtosis(mn, msum, msum2, msum3, msum4 *mlrval.Mlrval) *mlrval.Mlrval {
+func BIF_finalize_kurtosis(mn, msum, msum2, msum3, msum4 *mlrval.Mlrval) *mlrval.Mlrval {
 	n, isInt := mn.GetIntValue()
 	lib.InternalCodingErrorIf(!isInt)
 	if n < 2 {
@@ -149,3 +150,485 @@ func BIF_get_kurtosis(mn, msum, msum2, msum3, msum4 *mlrval.Mlrval) *mlrval.Mlrv
 	return mlrval.FromFloat(numerator/denominator - 3.0)
 
 }
+
+// ================================================================
+// STATS ROUTINES -- other than min/max which are placed separately.
+
+// This is a helper function for BIFs which operate only on array or map.
+// It shorthands what values to return for non-collection inputs.
+func check_collection(c *mlrval.Mlrval) (bool, *mlrval.Mlrval) {
+	vtype := c.Type()
+	switch vtype {
+	case mlrval.MT_ARRAY:
+		return true, c
+	case mlrval.MT_MAP:
+		return true, c
+	case mlrval.MT_ABSENT:
+		return false, mlrval.ABSENT
+	default:
+		return false, mlrval.ERROR
+	}
+}
+
+// collection_sum_of_function sums f(value) for value in the array or map:
+// e.g. sum of values, sum of squares of values, etc.
+func collection_sum_of_function(
+	collection *mlrval.Mlrval,
+	f func(element *mlrval.Mlrval) *mlrval.Mlrval,
+) *mlrval.Mlrval {
+	return mlrval.CollectionFold(
+		collection,
+		mlrval.FromInt(0),
+		func(a, b *mlrval.Mlrval) *mlrval.Mlrval {
+			return BIF_plus_binary(a, f(b))
+		},
+	)
+}
+
+func BIF_count(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	if collection.IsArray() {
+		arrayval := collection.AcquireArrayValue()
+		return mlrval.FromInt(int64(len(arrayval)))
+	} else {
+		mapval := collection.AcquireMapValue()
+		return mlrval.FromInt(mapval.FieldCount)
+	}
+}
+
+func BIF_null_count(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	f := func(element *mlrval.Mlrval) *mlrval.Mlrval {
+		if element.IsVoid() || element.IsNull() {
+			return mlrval.FromInt(1)
+		} else {
+			return mlrval.FromInt(0)
+		}
+	}
+	return mlrval.CollectionFold(
+		collection,
+		mlrval.FromInt(0),
+		func(a, b *mlrval.Mlrval) *mlrval.Mlrval {
+			return BIF_plus_binary(a, f(b))
+		},
+	)
+}
+
+func BIF_distinct_count(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	counts := make(map[string]int)
+	if collection.IsArray() {
+		a := collection.AcquireArrayValue()
+		for _, e := range a {
+			valueString := e.OriginalString()
+			counts[valueString] += 1
+		}
+	} else {
+		m := collection.AcquireMapValue()
+		for pe := m.Head; pe != nil; pe = pe.Next {
+			valueString := pe.Value.OriginalString()
+			counts[valueString] += 1
+		}
+	}
+	return mlrval.FromInt(int64(len(counts)))
+}
+
+func BIF_mode(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	return bif_mode_or_antimode(collection, func(a, b int) bool { return a > b })
+}
+
+func BIF_antimode(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	return bif_mode_or_antimode(collection, func(a, b int) bool { return a < b })
+}
+
+func bif_mode_or_antimode(
+	collection *mlrval.Mlrval,
+	cmp func(int, int) bool,
+) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+
+	// Do not use a Go map[string]int as that makes the output in the case of ties
+	// (e.g. input = [3,3,4,4]) non-determinstic. That's bad for unit tests and also
+	// simply bad UX.
+	counts := lib.NewOrderedMap()
+
+	// We use stringification to detect uniqueness. Yet we want the output to be typed,
+	// e.g. mode of an array of ints should be an int, not a string. Here we store
+	// a reference to one representative for each equivalence class.
+	reps := lib.NewOrderedMap()
+
+	if collection.IsArray() {
+		a := collection.AcquireArrayValue()
+		if len(a) == 0 {
+			return mlrval.VOID
+		}
+		for _, e := range a {
+			valueString := e.OriginalString()
+			if counts.Has(valueString) {
+				counts.Put(valueString, counts.Get(valueString).(int)+1)
+			} else {
+				counts.Put(valueString, 1)
+				reps.Put(valueString, e)
+			}
+		}
+	} else {
+		m := collection.AcquireMapValue()
+		if m.Head == nil {
+			return mlrval.VOID
+		}
+		for pe := m.Head; pe != nil; pe = pe.Next {
+			valueString := pe.Value.OriginalString()
+			if counts.Has(valueString) {
+				counts.Put(valueString, counts.Get(valueString).(int)+1)
+			} else {
+				counts.Put(valueString, 1)
+				reps.Put(valueString, pe.Value)
+			}
+		}
+	}
+	first := true
+	maxk := ""
+	maxv := -1
+	for pf := counts.Head; pf != nil; pf = pf.Next {
+		k := pf.Key
+		v := pf.Value.(int)
+		if first || cmp(v, maxv) {
+			maxk = k
+			maxv = v
+			first = false
+		}
+	}
+	// OrderedMap has interface{} values, so dereference as Mlrval. Then, copy the Mlrval
+	// so we're not returning a pointer to input data.
+	return reps.Get(maxk).(*mlrval.Mlrval).Copy()
+}
+
+func BIF_sum(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	return collection_sum_of_function(
+		collection,
+		func(e *mlrval.Mlrval) *mlrval.Mlrval {
+			return e
+		},
+	)
+}
+
+func BIF_sum2(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	f := func(element *mlrval.Mlrval) *mlrval.Mlrval {
+		return BIF_times(element, element)
+	}
+	return collection_sum_of_function(collection, f)
+}
+
+func BIF_sum3(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	f := func(element *mlrval.Mlrval) *mlrval.Mlrval {
+		return BIF_times(element, BIF_times(element, element))
+	}
+	return collection_sum_of_function(collection, f)
+}
+
+func BIF_sum4(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	f := func(element *mlrval.Mlrval) *mlrval.Mlrval {
+		sq := BIF_times(element, element)
+		return BIF_times(sq, sq)
+	}
+	return collection_sum_of_function(collection, f)
+}
+
+func BIF_mean(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	n := BIF_count(collection)
+	if n.AcquireIntValue() == 0 {
+		return mlrval.VOID
+	}
+	sum := BIF_sum(collection)
+	return BIF_divide(sum, n)
+}
+
+func BIF_meaneb(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	n := BIF_count(collection)
+	sum := BIF_sum(collection)
+	sum2 := BIF_sum2(collection)
+	return BIF_finalize_mean_eb(n, sum, sum2)
+}
+
+func BIF_variance(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	n := BIF_count(collection)
+	sum := BIF_sum(collection)
+	sum2 := BIF_sum2(collection)
+	return BIF_finalize_variance(n, sum, sum2)
+}
+
+func BIF_stddev(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	n := BIF_count(collection)
+	sum := BIF_sum(collection)
+	sum2 := BIF_sum2(collection)
+	return BIF_finalize_stddev(n, sum, sum2)
+}
+
+func BIF_skewness(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	n := BIF_count(collection)
+	sum := BIF_sum(collection)
+	sum2 := BIF_sum2(collection)
+	sum3 := BIF_sum3(collection)
+	return BIF_finalize_skewness(n, sum, sum2, sum3)
+}
+
+func BIF_kurtosis(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	n := BIF_count(collection)
+	sum := BIF_sum(collection)
+	sum2 := BIF_sum2(collection)
+	sum3 := BIF_sum3(collection)
+	sum4 := BIF_sum4(collection)
+	return BIF_finalize_kurtosis(n, sum, sum2, sum3, sum4)
+}
+
+func BIF_minlen(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	if collection.IsArray() {
+		return BIF_minlen_variadic(collection.AcquireArrayValue())
+	} else {
+		return BIF_minlen_within_map_values(collection.AcquireMapValue())
+	}
+}
+
+func BIF_maxlen(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+	if collection.IsArray() {
+		return BIF_maxlen_variadic(collection.AcquireArrayValue())
+	} else {
+		return BIF_maxlen_within_map_values(collection.AcquireMapValue())
+	}
+}
+
+func BIF_sort_collection(collection *mlrval.Mlrval) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+
+	var array []*mlrval.Mlrval
+	if collection.IsArray() {
+		arrayval := collection.AcquireArrayValue()
+		n := len(arrayval)
+		array = make([]*mlrval.Mlrval, n)
+		for i := 0; i < n; i++ {
+			array[i] = arrayval[i].Copy()
+		}
+	} else {
+		mapval := collection.AcquireMapValue()
+		n := mapval.FieldCount
+		array = make([]*mlrval.Mlrval, n)
+		i := 0
+		for pe := mapval.Head; pe != nil; pe = pe.Next {
+			array[i] = pe.Value.Copy()
+			i++
+		}
+	}
+
+	sort.Slice(array, func(i, j int) bool {
+		return mlrval.LessThan(array[i], array[j])
+	})
+
+	return mlrval.FromArray(array)
+}
+
+func BIF_median(
+	collection *mlrval.Mlrval,
+) *mlrval.Mlrval {
+	return BIF_percentile(collection, mlrval.FromFloat(50.0))
+}
+
+func BIF_median_with_options(
+	collection *mlrval.Mlrval,
+	options *mlrval.Mlrval,
+) *mlrval.Mlrval {
+	return BIF_percentile_with_options(collection, mlrval.FromFloat(50.0), options)
+}
+
+func BIF_percentile(
+	collection *mlrval.Mlrval,
+	percentile *mlrval.Mlrval,
+) *mlrval.Mlrval {
+	return BIF_percentile_with_options(collection, percentile, nil)
+}
+
+func BIF_percentile_with_options(
+	collection *mlrval.Mlrval,
+	percentile *mlrval.Mlrval,
+	options *mlrval.Mlrval,
+) *mlrval.Mlrval {
+	percentiles := mlrval.FromSingletonArray(percentile)
+	outputs := BIF_percentiles_with_options(collection, percentiles, options)
+	return outputs.AcquireMapValue().Head.Value
+}
+
+func BIF_percentiles(
+	collection *mlrval.Mlrval,
+	percentiles *mlrval.Mlrval,
+) *mlrval.Mlrval {
+	return BIF_percentiles_with_options(collection, percentiles, nil)
+}
+
+func BIF_percentiles_with_options(
+	collection *mlrval.Mlrval,
+	percentiles *mlrval.Mlrval,
+	options *mlrval.Mlrval,
+) *mlrval.Mlrval {
+	ok, value_if_not := check_collection(collection)
+	if !ok {
+		return value_if_not
+	}
+
+	array_is_sorted := false
+	interpolate_linearly := false
+	output_array_not_map := false
+
+	if options != nil {
+		om := options.GetMap()
+		if om == nil { // not a map
+			return mlrval.ERROR
+		}
+		for pe := om.Head; pe != nil; pe = pe.Next {
+			if pe.Key == "array_is_sorted" || pe.Key == "ais" {
+				if mlrval.Equals(pe.Value, mlrval.TRUE) {
+					array_is_sorted = true
+				} else if mlrval.Equals(pe.Value, mlrval.FALSE) {
+					array_is_sorted = false
+				} else {
+					return mlrval.ERROR
+				}
+			} else if pe.Key == "interpolate_linearly" || pe.Key == "il" {
+				if mlrval.Equals(pe.Value, mlrval.TRUE) {
+					interpolate_linearly = true
+				} else if mlrval.Equals(pe.Value, mlrval.FALSE) {
+					interpolate_linearly = false
+				} else {
+					return mlrval.ERROR
+				}
+			} else if pe.Key == "output_array_not_map" || pe.Key == "oa" {
+				if mlrval.Equals(pe.Value, mlrval.TRUE) {
+					output_array_not_map = true
+				} else if mlrval.Equals(pe.Value, mlrval.FALSE) {
+					output_array_not_map = false
+				} else {
+					return mlrval.ERROR
+				}
+			}
+		}
+	}
+
+	var sorted_array *mlrval.Mlrval
+	if array_is_sorted {
+		if !collection.IsArray() {
+			return mlrval.ERROR
+		}
+		sorted_array = collection
+	} else {
+		sorted_array = BIF_sort_collection(collection)
+	}
+
+	return bif_percentiles(
+		sorted_array.AcquireArrayValue(),
+		percentiles,
+		interpolate_linearly,
+		output_array_not_map,
+	)
+}
+
+func bif_percentiles(
+	sorted_array []*mlrval.Mlrval,
+	percentiles *mlrval.Mlrval,
+	interpolate_linearly bool,
+	output_array_not_map bool,
+) *mlrval.Mlrval {
+
+	ps := percentiles.GetArray()
+	if ps == nil { // not an array
+		return mlrval.ERROR
+	}
+
+	outputs := make([]*mlrval.Mlrval, len(ps))
+
+	for i, _ := range ps {
+		p, ok := ps[i].GetNumericToFloatValue()
+		if !ok {
+			outputs[i] = mlrval.ERROR.Copy()
+		} else if len(sorted_array) == 0 {
+			outputs[i] = mlrval.VOID
+		} else {
+			if interpolate_linearly {
+				outputs[i] = GetPercentileLinearlyInterpolated(sorted_array, len(sorted_array), p)
+			} else {
+				outputs[i] = GetPercentileNonInterpolated(sorted_array, len(sorted_array), p)
+			}
+		}
+	}
+
+	if output_array_not_map {
+		return mlrval.FromArray(outputs)
+	} else {
+		m := mlrval.NewMlrmap()
+		for i, _ := range ps {
+			sp := ps[i].String()
+			m.PutCopy(sp, outputs[i])
+		}
+		return mlrval.FromMap(m)
+	}
+}
diff --git a/internal/pkg/bifs/stats_test.go b/internal/pkg/bifs/stats_test.go
new file mode 100644
index 000000000..0d1276ba1
--- /dev/null
+++ b/internal/pkg/bifs/stats_test.go
@@ -0,0 +1,192 @@
+package bifs
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/johnkerl/miller/internal/pkg/mlrval"
+)
+
+func stats_test_array(n int) *mlrval.Mlrval {
+	a := make([]*mlrval.Mlrval, n)
+	for i := 0; i < n; i++ {
+		a[i] = mlrval.FromInt(int64(i))
+	}
+	return mlrval.FromArray(a)
+}
+
+func array_to_map_for_test(a *mlrval.Mlrval) *mlrval.Mlrval {
+	array := a.AcquireArrayValue()
+	m := mlrval.NewMlrmap()
+	for i := 0; i < len(array); i++ {
+		key := fmt.Sprint(i)
+		val := array[i]
+		m.PutCopy(key, val)
+	}
+	return mlrval.FromMap(m)
+}
+
+func TestBIF_count(t *testing.T) {
+	// Needs array or map
+	input := mlrval.FromInt(3)
+	output := BIF_count(input)
+	assert.True(t, output.IsError())
+
+	for n := 0; n < 5; n++ {
+		input = stats_test_array(n)
+		assert.True(t, mlrval.Equals(BIF_count(input), mlrval.FromInt(int64(n))))
+
+		input = array_to_map_for_test(input)
+		assert.True(t, mlrval.Equals(BIF_count(input), mlrval.FromInt(int64(n))))
+	}
+}
+
+func TestBIF_distinct_count(t *testing.T) {
+	// Needs array or map
+	input := mlrval.FromInt(3)
+	output := BIF_count(input)
+	assert.True(t, output.IsError())
+
+	input = mlrval.FromArray([]*mlrval.Mlrval{
+		mlrval.FromInt(1),
+		mlrval.FromInt(2),
+		mlrval.FromInt(3),
+		mlrval.FromInt(1),
+		mlrval.FromInt(2),
+	})
+	assert.True(t, mlrval.Equals(BIF_distinct_count(input), mlrval.FromInt(3)))
+
+	input = array_to_map_for_test(input)
+	assert.True(t, mlrval.Equals(BIF_distinct_count(input), mlrval.FromInt(3)))
+}
+
+func TestBIF_null_count(t *testing.T) {
+	// Needs array or map
+	input := mlrval.FromInt(3)
+	output := BIF_count(input)
+	assert.True(t, output.IsError())
+
+	input = mlrval.FromArray([]*mlrval.Mlrval{
+		mlrval.FromInt(1),
+		mlrval.FromString("two"),
+		mlrval.FromString(""), // this counts
+		mlrval.ERROR,
+		mlrval.ABSENT,
+		mlrval.NULL, // this counts
+	})
+	assert.True(t, mlrval.Equals(BIF_null_count(input), mlrval.FromInt(2)))
+
+	input = array_to_map_for_test(input)
+	assert.True(t, mlrval.Equals(BIF_null_count(input), mlrval.FromInt(2)))
+
+}
+
+func TestBIF_mode_and_antimode(t *testing.T) {
+	// Needs array or map
+	input := mlrval.FromInt(3)
+	output := BIF_count(input)
+	assert.True(t, output.IsError())
+
+	// Empty array
+	input = mlrval.FromArray([]*mlrval.Mlrval{})
+	assert.True(t, mlrval.Equals(BIF_mode(input), mlrval.VOID))
+	assert.True(t, mlrval.Equals(BIF_antimode(input), mlrval.VOID))
+
+	// Empty map
+	input = array_to_map_for_test(input)
+	assert.True(t, mlrval.Equals(BIF_mode(input), mlrval.VOID))
+	assert.True(t, mlrval.Equals(BIF_antimode(input), mlrval.VOID))
+
+	// Clear winner as array
+	input = mlrval.FromArray([]*mlrval.Mlrval{
+		mlrval.FromInt(1),
+		mlrval.FromInt(2),
+		mlrval.FromInt(3),
+		mlrval.FromInt(1),
+		mlrval.FromInt(1),
+		mlrval.FromInt(2),
+	})
+	assert.True(t, mlrval.Equals(BIF_mode(input), mlrval.FromInt(1)))
+	assert.True(t, mlrval.Equals(BIF_antimode(input), mlrval.FromInt(3)))
+
+	// Clear winner as map
+	input = array_to_map_for_test(input)
+	assert.True(t, mlrval.Equals(BIF_mode(input), mlrval.FromInt(1)))
+	assert.True(t, mlrval.Equals(BIF_antimode(input), mlrval.FromInt(3)))
+
+	// Ties as array -- first-found breaks the tie
+	input = mlrval.FromArray([]*mlrval.Mlrval{
+		mlrval.FromInt(1),
+		mlrval.FromInt(1),
+		mlrval.FromInt(1),
+		mlrval.FromInt(2),
+		mlrval.FromInt(2),
+		mlrval.FromInt(2),
+	})
+	assert.True(t, mlrval.Equals(BIF_mode(input), mlrval.FromInt(1)))
+	assert.True(t, mlrval.Equals(BIF_antimode(input), mlrval.FromInt(1)))
+
+	// Clear winner as map
+	input = array_to_map_for_test(input)
+	assert.True(t, mlrval.Equals(BIF_mode(input), mlrval.FromInt(1)))
+	assert.True(t, mlrval.Equals(BIF_antimode(input), mlrval.FromInt(1)))
+}
+
+func TestBIF_sum(t *testing.T) {
+	// Needs array or map
+	input := mlrval.FromInt(3)
+	output := BIF_count(input)
+	assert.True(t, output.IsError())
+
+	// TODO: test empty array/map
+	for n := 1; n < 5; n++ {
+		input = stats_test_array(n)
+		var isum1 int64
+		var isum2 int64
+		var isum3 int64
+		var isum4 int64
+		for _, e := range input.AcquireArrayValue() {
+			v := e.AcquireIntValue()
+			isum1 += v
+			isum2 += v * v
+			isum3 += v * v * v
+			isum4 += v * v * v * v
+		}
+		assert.True(t, mlrval.Equals(BIF_sum(input), mlrval.FromInt(isum1)))
+		assert.True(t, mlrval.Equals(BIF_sum2(input), mlrval.FromInt(isum2)))
+		assert.True(t, mlrval.Equals(BIF_sum3(input), mlrval.FromInt(isum3)))
+		assert.True(t, mlrval.Equals(BIF_sum4(input), mlrval.FromInt(isum4)))
+
+		input = array_to_map_for_test(input)
+		assert.True(t, mlrval.Equals(BIF_sum(input), mlrval.FromInt(isum1)))
+		assert.True(t, mlrval.Equals(BIF_sum2(input), mlrval.FromInt(isum2)))
+		assert.True(t, mlrval.Equals(BIF_sum3(input), mlrval.FromInt(isum3)))
+		assert.True(t, mlrval.Equals(BIF_sum4(input), mlrval.FromInt(isum4)))
+	}
+}
+
+// More easily tested (much lower keystroking) within the regression-test framework:
+
+// BIF_mean
+// BIF_meaneb
+// BIF_variance
+// BIF_stddev
+// BIF_skewness
+// BIF_kurtosis
+
+// BIF_min
+// BIF_max
+
+// BIF_minlen
+// BIF_maxlen
+
+// BIF_median
+// BIF_median_with_options
+// BIF_percentile
+// BIF_percentile_with_options
+// BIF_percentiles
+// BIF_percentiles_with_options
+
+// BIF_sort_collection
diff --git a/internal/pkg/dsl/cst/builtin_function_manager.go b/internal/pkg/dsl/cst/builtin_function_manager.go
index 876fcdb29..b06695536 100644
--- a/internal/pkg/dsl/cst/builtin_function_manager.go
+++ b/internal/pkg/dsl/cst/builtin_function_manager.go
@@ -29,6 +29,7 @@ type TFunctionClass string
 const (
 	FUNC_CLASS_ARITHMETIC  TFunctionClass = "arithmetic"
 	FUNC_CLASS_MATH        TFunctionClass = "math"
+	FUNC_CLASS_STATS       TFunctionClass = "stats"
 	FUNC_CLASS_BOOLEAN     TFunctionClass = "boolean"
 	FUNC_CLASS_STRING      TFunctionClass = "string"
 	FUNC_CLASS_HASHING     TFunctionClass = "hashing"
@@ -846,14 +847,14 @@ is normally distributed.`,
 		{
 			name:         "max",
 			class:        FUNC_CLASS_MATH,
-			help:         `Max of n numbers; null loses.`,
+			help:         `Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.`,
 			variadicFunc: bifs.BIF_max_variadic,
 		},
 
 		{
 			name:         "min",
 			class:        FUNC_CLASS_MATH,
-			help:         `Min of n numbers; null loses.`,
+			help:         `Min of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.`,
 			variadicFunc: bifs.BIF_min_variadic,
 		},
 
@@ -958,6 +959,276 @@ is normally distributed.`,
 			unaryFunc: bifs.BIF_urandelement,
 		},
 
+		// ----------------------------------------------------------------
+		// FUNC_CLASS_STATS
+
+		{
+			name:      "count",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the length of an array or map. Returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_count,
+			examples: []string{
+				"count([7,8,9]) is 3",
+				`count({"a":7,"b":8,"c":9}) is 3`,
+			},
+		},
+
+		{
+			name:      "distinct_count",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the number of disinct values in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct.`,
+			unaryFunc: bifs.BIF_distinct_count,
+			examples: []string{
+				`distinct_count([7,8,9,7])  is 3`,
+				`distinct_count([1,"1"]) is 1`,
+				`distinct_count([1,1.0]) is 2`,
+			},
+		},
+
+		{
+			name:      "null_count",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the number of values in an array or map which are empty-string (AKA void) or JSON null. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct.`,
+			unaryFunc: bifs.BIF_null_count,
+			examples: []string{
+				`null_count(["a", "", "c"]) is 1`,
+			},
+		},
+
+		{
+			name:      "mode",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.`,
+			unaryFunc: bifs.BIF_mode,
+			examples: []string{
+				`mode([3,3,4,4,4]) is 4`,
+				`mode([3,3,4,4]) is 3`,
+			},
+		},
+
+		{
+			name:      "antimode",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.`,
+			unaryFunc: bifs.BIF_antimode,
+			examples: []string{
+				`antimode([3,3,4,4,4]) is 3`,
+				`antimode([3,3,4,4]) is 3`,
+			},
+		},
+
+		{
+			name:      "sum",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the sum of values in an array or map. Returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_sum,
+			examples: []string{
+				`sum([1,2,3,4,5]) is 15`,
+			},
+		},
+
+		{
+			name:      "sum2",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the sum of squares of values in an array or map. Returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_sum2,
+			examples: []string{
+				`sum2([1,2,3,4,5]) is 55`,
+			},
+		},
+
+		{
+			name:      "sum3",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the sum of cubes of values in an array or map. Returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_sum3,
+			examples: []string{
+				`sum3([1,2,3,4,5]) is 225`,
+			},
+		},
+
+		{
+			name:      "sum4",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the sum of fourth powers of values in an array or map. Returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_sum4,
+			examples: []string{
+				`sum4([1,2,3,4,5]) is 979`,
+			},
+		},
+
+		{
+			name:      "mean",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_mean,
+			examples: []string{
+				`mean([4,5,7,10]) is 6.5`,
+			},
+		},
+
+		{
+			name:      "meaneb",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_meaneb,
+			examples: []string{
+				`meaneb([4,5,7,10]) is 1.3228756`,
+			},
+		},
+
+		{
+			name:      "variance",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_variance,
+			examples: []string{
+				`variance([4,5,9,10,11]) is 9.7`,
+			},
+		},
+
+		{
+			name:      "stddev",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_stddev,
+			examples: []string{
+				`stddev([4,5,9,10,11]) is 3.1144823`,
+			},
+		},
+
+		{
+			name:      "skewness",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_skewness,
+			examples: []string{
+				`skewness([4,5,9,10,11]) is -0.2097285`,
+			},
+		},
+
+		{
+			name:      "kurtosis",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_kurtosis,
+			examples: []string{
+				`kurtosis([4,5,9,10,11]) is -1.6703688`,
+			},
+		},
+
+		{
+			name:      "minlen",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_minlen,
+			examples: []string{
+				`minlen(["aรฑo", "alto"]) is 3`,
+			},
+		},
+
+		{
+			name:      "maxlen",
+			class:     FUNC_CLASS_STATS,
+			help:      `Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			unaryFunc: bifs.BIF_maxlen,
+			examples: []string{
+				`maxlen(["aรฑo", "alto"]) is 4`,
+			},
+		},
+
+		{
+			name:               "median",
+			class:              FUNC_CLASS_STATS,
+			help:               `Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.`,
+			unaryFunc:          bifs.BIF_median,
+			binaryFunc:         bifs.BIF_median_with_options,
+			hasMultipleArities: true,
+			examples: []string{
+				`median([3,4,5,6,9,10]) is 6`,
+				`median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5`,
+				`median(["abc", "def", "ghi", "ghi"]) is "ghi"`,
+			},
+		},
+
+		{
+			name:               "percentile",
+			class:              FUNC_CLASS_STATS,
+			help:               `Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.`,
+			binaryFunc:         bifs.BIF_percentile,
+			ternaryFunc:        bifs.BIF_percentile_with_options,
+			hasMultipleArities: true,
+			examples: []string{
+				`percentile([3,4,5,6,9,10], 90) is 10`,
+				`percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5`,
+				`percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi"`,
+			},
+		},
+
+		{
+			name:               "percentiles",
+			class:              FUNC_CLASS_STATS,
+			help:               `Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags.`,
+			binaryFunc:         bifs.BIF_percentiles,
+			ternaryFunc:        bifs.BIF_percentiles_with_options,
+			hasMultipleArities: true,
+			examples: []string{
+				``,
+				`Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort`,
+				`the input before computing percentiles:`,
+				``,
+				`  percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 }`,
+				`  percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" }`,
+				``,
+				`Use "output_array_not_map" (or shorthand "oa") to get the outputs as an array:`,
+				``,
+				`  percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9]`,
+				``,
+				`Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces`,
+				`,error on string inputs:`,
+				``,
+				`  percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 }`,
+				``,
+				`The percentiles function always sorts its inputs before computing percentiles. If you know your input`,
+				`is already sorted -- see also the sort_collection function -- then computation will be faster on`,
+				`large input if you pass in "array_is_sorted":`,
+				``,
+				`  x = [6,5,9,10,4,3]`,
+				`  percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect`,
+				`  x = sort_collection(x)`,
+				`  percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct`,
+				``,
+				`You can also leverage this feature to compute percentiles on a sort of your choosing. For example:`,
+				``,
+				`  Non-sorted input:`,
+				`    x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ")`,
+				`    x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"]`,
+				`  Percentiles are taken over the original positions of the words in the array -- "dogs" is last`,
+				`  and hence appears as p99:`,
+				`    percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"]`,
+				`  With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99:`,
+				`    percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]`,
+				`  With default sorting done outside percentiles, the same:`,
+				`    x = sort(x) # or x = sort_collection(x)`,
+				`    x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"]`,
+				`    percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"]`,
+				`    percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]`,
+				`  Now sorting by word length, "loquaciously" is longest and hence is the p99:`,
+				`    x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } )`,
+				`    x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"]`,
+				`    percentiles(x, [50, 99], {"oa":true, "ais":true})`,
+				`    ["over", "loquaciously"]`,
+			},
+		},
+
+		{
+			name:      "sort_collection",
+			class:     FUNC_CLASS_STATS,
+			help:      `This is a helper function for the percentiles function; please see its online help for details.`,
+			unaryFunc: bifs.BIF_sort_collection,
+			examples:  []string{},
+		},
+
 		// ----------------------------------------------------------------
 		// FUNC_CLASS_TIME
 
diff --git a/internal/pkg/mlrval/mlrval_collections.go b/internal/pkg/mlrval/mlrval_collections.go
index 6674a044a..5e009aff2 100644
--- a/internal/pkg/mlrval/mlrval_collections.go
+++ b/internal/pkg/mlrval/mlrval_collections.go
@@ -739,3 +739,51 @@ func LengthenMlrvalArray(array *[]*Mlrval, newLength64 int) {
 		*array = newArray
 	}
 }
+
+// ArrayFold reduces an array to a single value, with a user-supplied starting value and pairwise
+// element-reducer function. Canonical example: start value is 0 and reducer f(a,b) is a+b: this
+// will sum up the values in the array.
+func ArrayFold(
+	a []*Mlrval,
+	initval *Mlrval,
+	f func(a, b *Mlrval) *Mlrval,
+) *Mlrval {
+	acc := initval
+	for _, e := range a {
+		acc = f(acc, e)
+	}
+	return acc
+}
+
+// MapFold reduces a map's values to a single value, with a user-supplied starting value and
+// pairwise element-reducer function. Canonical example: start value is 0 and reducer f(a,b) is a+b:
+// this will sum up the values in the map. Nothing here accesses map keys.
+func MapFold(
+	m *Mlrmap,
+	initval *Mlrval,
+	f func(a, b *Mlrval) *Mlrval,
+) *Mlrval {
+	acc := initval
+	for pe := m.Head; pe != nil; pe = pe.Next {
+		acc = f(acc, pe.Value)
+	}
+	return acc
+}
+
+// CollectionFold multiplexes ArrayFold or MapFold. The panic here is not robust, but is done to
+// avoid adding an error-return that would frictionalize the API.  The idea is that the caller
+// (internal/library functions, not directly user-facing) must have pre-validated that the argument
+// is an array or map. The panic here is merely a fallback, not the primary check.
+func CollectionFold(
+	c *Mlrval,
+	initval *Mlrval,
+	f func(a, b *Mlrval) *Mlrval,
+) *Mlrval {
+	if c.IsArray() {
+		return ArrayFold(c.AcquireArrayValue(), initval, f)
+	} else if c.IsMap() {
+		return MapFold(c.AcquireMapValue(), initval, f)
+	} else {
+		panic("CollectionFold argument is neither array nor map")
+	}
+}
diff --git a/internal/pkg/mlrval/mlrval_new.go b/internal/pkg/mlrval/mlrval_new.go
index 0ac8d2613..a46bc73a0 100644
--- a/internal/pkg/mlrval/mlrval_new.go
+++ b/internal/pkg/mlrval/mlrval_new.go
@@ -222,6 +222,12 @@ func FromArray(arrayval []*Mlrval) *Mlrval {
 	}
 }
 
+func FromSingletonArray(element *Mlrval) *Mlrval {
+	a := make([]*Mlrval, 1)
+	a[0] = element
+	return FromArray(a)
+}
+
 func FromEmptyArray() *Mlrval {
 	return FromArray(make([]*Mlrval, 0))
 }
diff --git a/internal/pkg/transformers/utils/percentile_keeper.go b/internal/pkg/transformers/utils/percentile_keeper.go
index 41be04652..c9f2453bd 100644
--- a/internal/pkg/transformers/utils/percentile_keeper.go
+++ b/internal/pkg/transformers/utils/percentile_keeper.go
@@ -6,7 +6,6 @@ package utils
 
 import (
 	"fmt"
-	"math"
 	"sort"
 
 	"github.com/johnkerl/miller/internal/pkg/bifs"
@@ -55,209 +54,6 @@ func (keeper *PercentileKeeper) Ingest(value *mlrval.Mlrval) {
 	keeper.sorted = false
 }
 
-// ================================================================
-// Non-interpolated percentiles (see also https://en.wikipedia.org/wiki/Percentile)
-
-// ----------------------------------------------------------------
-// OPTION 1: int index = p*n/100.0;
-//
-// x
-// 0
-// 20
-// 40
-// 60
-// 80
-// 100
-//
-// x_p00 0 x_p10  0 x_p20 20 x_p30 20 x_p40 40 x_p50 60 x_p60 60 x_p70 80 x_p80  80 x_p90 100 x_p100 100
-// x_p01 0 x_p11  0 x_p21 20 x_p31 20 x_p41 40 x_p51 60 x_p61 60 x_p71 80 x_p81  80 x_p91 100
-// x_p02 0 x_p12  0 x_p22 20 x_p32 20 x_p42 40 x_p52 60 x_p62 60 x_p72 80 x_p82  80 x_p92 100
-// x_p03 0 x_p13  0 x_p23 20 x_p33 20 x_p43 40 x_p53 60 x_p63 60 x_p73 80 x_p83  80 x_p93 100
-// x_p04 0 x_p14  0 x_p24 20 x_p34 40 x_p44 40 x_p54 60 x_p64 60 x_p74 80 x_p84 100 x_p94 100
-// x_p05 0 x_p15  0 x_p25 20 x_p35 40 x_p45 40 x_p55 60 x_p65 60 x_p75 80 x_p85 100 x_p95 100
-// x_p06 0 x_p16  0 x_p26 20 x_p36 40 x_p46 40 x_p56 60 x_p66 60 x_p76 80 x_p86 100 x_p96 100
-// x_p07 0 x_p17 20 x_p27 20 x_p37 40 x_p47 40 x_p57 60 x_p67 80 x_p77 80 x_p87 100 x_p97 100
-// x_p08 0 x_p18 20 x_p28 20 x_p38 40 x_p48 40 x_p58 60 x_p68 80 x_p78 80 x_p88 100 x_p98 100
-// x_p09 0 x_p19 20 x_p29 20 x_p39 40 x_p49 40 x_p59 60 x_p69 80 x_p79 80 x_p89 100 x_p99 100
-//
-// x
-// 0
-// 25
-// 50
-// 75
-// 100
-//
-// x_p00 0 x_p10 0 x_p20 25 x_p30 25 x_p40 50 x_p50 50 x_p60 75 x_p70 75 x_p80 100 x_p90 100 x_p100 100
-// x_p01 0 x_p11 0 x_p21 25 x_p31 25 x_p41 50 x_p51 50 x_p61 75 x_p71 75 x_p81 100 x_p91 100
-// x_p02 0 x_p12 0 x_p22 25 x_p32 25 x_p42 50 x_p52 50 x_p62 75 x_p72 75 x_p82 100 x_p92 100
-// x_p03 0 x_p13 0 x_p23 25 x_p33 25 x_p43 50 x_p53 50 x_p63 75 x_p73 75 x_p83 100 x_p93 100
-// x_p04 0 x_p14 0 x_p24 25 x_p34 25 x_p44 50 x_p54 50 x_p64 75 x_p74 75 x_p84 100 x_p94 100
-// x_p05 0 x_p15 0 x_p25 25 x_p35 25 x_p45 50 x_p55 50 x_p65 75 x_p75 75 x_p85 100 x_p95 100
-// x_p06 0 x_p16 0 x_p26 25 x_p36 25 x_p46 50 x_p56 50 x_p66 75 x_p76 75 x_p86 100 x_p96 100
-// x_p07 0 x_p17 0 x_p27 25 x_p37 25 x_p47 50 x_p57 50 x_p67 75 x_p77 75 x_p87 100 x_p97 100
-// x_p08 0 x_p18 0 x_p28 25 x_p38 25 x_p48 50 x_p58 50 x_p68 75 x_p78 75 x_p88 100 x_p98 100
-// x_p09 0 x_p19 0 x_p29 25 x_p39 25 x_p49 50 x_p59 50 x_p69 75 x_p79 75 x_p89 100 x_p99 100
-//
-// ----------------------------------------------------------------
-// OPTION 2: int index = p*(n-1)/100.0;
-//
-// x
-// 0
-// 20
-// 40
-// 60
-// 80
-// 100
-//
-// x_p00 0 x_p10 0 x_p20 20 x_p30 20 x_p40 40 x_p50 40 x_p60 60 x_p70 60 x_p80 80 x_p90 80 x_p100 100
-// x_p01 0 x_p11 0 x_p21 20 x_p31 20 x_p41 40 x_p51 40 x_p61 60 x_p71 60 x_p81 80 x_p91 80
-// x_p02 0 x_p12 0 x_p22 20 x_p32 20 x_p42 40 x_p52 40 x_p62 60 x_p72 60 x_p82 80 x_p92 80
-// x_p03 0 x_p13 0 x_p23 20 x_p33 20 x_p43 40 x_p53 40 x_p63 60 x_p73 60 x_p83 80 x_p93 80
-// x_p04 0 x_p14 0 x_p24 20 x_p34 20 x_p44 40 x_p54 40 x_p64 60 x_p74 60 x_p84 80 x_p94 80
-// x_p05 0 x_p15 0 x_p25 20 x_p35 20 x_p45 40 x_p55 40 x_p65 60 x_p75 60 x_p85 80 x_p95 80
-// x_p06 0 x_p16 0 x_p26 20 x_p36 20 x_p46 40 x_p56 40 x_p66 60 x_p76 60 x_p86 80 x_p96 80
-// x_p07 0 x_p17 0 x_p27 20 x_p37 20 x_p47 40 x_p57 40 x_p67 60 x_p77 60 x_p87 80 x_p97 80
-// x_p08 0 x_p18 0 x_p28 20 x_p38 20 x_p48 40 x_p58 40 x_p68 60 x_p78 60 x_p88 80 x_p98 80
-// x_p09 0 x_p19 0 x_p29 20 x_p39 20 x_p49 40 x_p59 40 x_p69 60 x_p79 60 x_p89 80 x_p99 80
-//
-// x
-// 0
-// 25
-// 50
-// 75
-// 100
-//
-// x_p00 0 x_p10 0 x_p20  0 x_p30 25 x_p40 25 x_p50 50 x_p60 50 x_p70 50 x_p80 75 x_p90 75 x_p100 100
-// x_p01 0 x_p11 0 x_p21  0 x_p31 25 x_p41 25 x_p51 50 x_p61 50 x_p71 50 x_p81 75 x_p91 75
-// x_p02 0 x_p12 0 x_p22  0 x_p32 25 x_p42 25 x_p52 50 x_p62 50 x_p72 50 x_p82 75 x_p92 75
-// x_p03 0 x_p13 0 x_p23  0 x_p33 25 x_p43 25 x_p53 50 x_p63 50 x_p73 50 x_p83 75 x_p93 75
-// x_p04 0 x_p14 0 x_p24  0 x_p34 25 x_p44 25 x_p54 50 x_p64 50 x_p74 50 x_p84 75 x_p94 75
-// x_p05 0 x_p15 0 x_p25 25 x_p35 25 x_p45 25 x_p55 50 x_p65 50 x_p75 75 x_p85 75 x_p95 75
-// x_p06 0 x_p16 0 x_p26 25 x_p36 25 x_p46 25 x_p56 50 x_p66 50 x_p76 75 x_p86 75 x_p96 75
-// x_p07 0 x_p17 0 x_p27 25 x_p37 25 x_p47 25 x_p57 50 x_p67 50 x_p77 75 x_p87 75 x_p97 75
-// x_p08 0 x_p18 0 x_p28 25 x_p38 25 x_p48 25 x_p58 50 x_p68 50 x_p78 75 x_p88 75 x_p98 75
-// x_p09 0 x_p19 0 x_p29 25 x_p39 25 x_p49 25 x_p59 50 x_p69 50 x_p79 75 x_p89 75 x_p99 75
-//
-// ----------------------------------------------------------------
-// OPTION 3: int index = (int)ceil(p*(n-1)/100.0);
-//
-// x
-// 0
-// 20
-// 40
-// 60
-// 80
-// 100
-//
-// x_p00  0 x_p10 20 x_p20 20 x_p30 40 x_p40 40 x_p50 60 x_p60 60 x_p70 80 x_p80  80 x_p90 100 x_p100 100
-// x_p01 20 x_p11 20 x_p21 40 x_p31 40 x_p41 60 x_p51 60 x_p61 80 x_p71 80 x_p81 100 x_p91 100
-// x_p02 20 x_p12 20 x_p22 40 x_p32 40 x_p42 60 x_p52 60 x_p62 80 x_p72 80 x_p82 100 x_p92 100
-// x_p03 20 x_p13 20 x_p23 40 x_p33 40 x_p43 60 x_p53 60 x_p63 80 x_p73 80 x_p83 100 x_p93 100
-// x_p04 20 x_p14 20 x_p24 40 x_p34 40 x_p44 60 x_p54 60 x_p64 80 x_p74 80 x_p84 100 x_p94 100
-// x_p05 20 x_p15 20 x_p25 40 x_p35 40 x_p45 60 x_p55 60 x_p65 80 x_p75 80 x_p85 100 x_p95 100
-// x_p06 20 x_p16 20 x_p26 40 x_p36 40 x_p46 60 x_p56 60 x_p66 80 x_p76 80 x_p86 100 x_p96 100
-// x_p07 20 x_p17 20 x_p27 40 x_p37 40 x_p47 60 x_p57 60 x_p67 80 x_p77 80 x_p87 100 x_p97 100
-// x_p08 20 x_p18 20 x_p28 40 x_p38 40 x_p48 60 x_p58 60 x_p68 80 x_p78 80 x_p88 100 x_p98 100
-// x_p09 20 x_p19 20 x_p29 40 x_p39 40 x_p49 60 x_p59 60 x_p69 80 x_p79 80 x_p89 100 x_p99 100
-//
-// x
-// 0
-// 25
-// 50
-// 75
-// 100
-//
-// x_p00  0 x_p10 25 x_p20 25 x_p30 50 x_p40 50 x_p50 50 x_p60 75 x_p70  75 x_p80 100 x_p90 100 x_p100 100
-// x_p01 25 x_p11 25 x_p21 25 x_p31 50 x_p41 50 x_p51 75 x_p61 75 x_p71  75 x_p81 100 x_p91 100
-// x_p02 25 x_p12 25 x_p22 25 x_p32 50 x_p42 50 x_p52 75 x_p62 75 x_p72  75 x_p82 100 x_p92 100
-// x_p03 25 x_p13 25 x_p23 25 x_p33 50 x_p43 50 x_p53 75 x_p63 75 x_p73  75 x_p83 100 x_p93 100
-// x_p04 25 x_p14 25 x_p24 25 x_p34 50 x_p44 50 x_p54 75 x_p64 75 x_p74  75 x_p84 100 x_p94 100
-// x_p05 25 x_p15 25 x_p25 25 x_p35 50 x_p45 50 x_p55 75 x_p65 75 x_p75  75 x_p85 100 x_p95 100
-// x_p06 25 x_p16 25 x_p26 50 x_p36 50 x_p46 50 x_p56 75 x_p66 75 x_p76 100 x_p86 100 x_p96 100
-// x_p07 25 x_p17 25 x_p27 50 x_p37 50 x_p47 50 x_p57 75 x_p67 75 x_p77 100 x_p87 100 x_p97 100
-// x_p08 25 x_p18 25 x_p28 50 x_p38 50 x_p48 50 x_p58 75 x_p68 75 x_p78 100 x_p88 100 x_p98 100
-// x_p09 25 x_p19 25 x_p29 50 x_p39 50 x_p49 50 x_p59 75 x_p69 75 x_p79 100 x_p89 100 x_p99 100
-//
-// ----------------------------------------------------------------
-// OPTION 4: int index = (int)ceil(-0.5 + p*(n-1)/100.0);
-//
-// x
-// 0
-// 20
-// 40
-// 60
-// 80
-// 100
-//
-// x_p00 0 x_p10  0 x_p20 20 x_p30 20 x_p40 40 x_p50 40 x_p60 60 x_p70 60 x_p80 80 x_p90  80 x_p100 100
-// x_p01 0 x_p11 20 x_p21 20 x_p31 40 x_p41 40 x_p51 60 x_p61 60 x_p71 80 x_p81 80 x_p91 100
-// x_p02 0 x_p12 20 x_p22 20 x_p32 40 x_p42 40 x_p52 60 x_p62 60 x_p72 80 x_p82 80 x_p92 100
-// x_p03 0 x_p13 20 x_p23 20 x_p33 40 x_p43 40 x_p53 60 x_p63 60 x_p73 80 x_p83 80 x_p93 100
-// x_p04 0 x_p14 20 x_p24 20 x_p34 40 x_p44 40 x_p54 60 x_p64 60 x_p74 80 x_p84 80 x_p94 100
-// x_p05 0 x_p15 20 x_p25 20 x_p35 40 x_p45 40 x_p55 60 x_p65 60 x_p75 80 x_p85 80 x_p95 100
-// x_p06 0 x_p16 20 x_p26 20 x_p36 40 x_p46 40 x_p56 60 x_p66 60 x_p76 80 x_p86 80 x_p96 100
-// x_p07 0 x_p17 20 x_p27 20 x_p37 40 x_p47 40 x_p57 60 x_p67 60 x_p77 80 x_p87 80 x_p97 100
-// x_p08 0 x_p18 20 x_p28 20 x_p38 40 x_p48 40 x_p58 60 x_p68 60 x_p78 80 x_p88 80 x_p98 100
-// x_p09 0 x_p19 20 x_p29 20 x_p39 40 x_p49 40 x_p59 60 x_p69 60 x_p79 80 x_p89 80 x_p99 100
-//
-// x
-// 0
-// 25
-// 50
-// 75
-// 100
-//
-// x_p00 0 x_p10  0 x_p20 25 x_p30 25 x_p40 50 x_p50 50 x_p60 50 x_p70 75 x_p80  75 x_p90 100 x_p100 100
-// x_p01 0 x_p11  0 x_p21 25 x_p31 25 x_p41 50 x_p51 50 x_p61 50 x_p71 75 x_p81  75 x_p91 100
-// x_p02 0 x_p12  0 x_p22 25 x_p32 25 x_p42 50 x_p52 50 x_p62 50 x_p72 75 x_p82  75 x_p92 100
-// x_p03 0 x_p13 25 x_p23 25 x_p33 25 x_p43 50 x_p53 50 x_p63 75 x_p73 75 x_p83  75 x_p93 100
-// x_p04 0 x_p14 25 x_p24 25 x_p34 25 x_p44 50 x_p54 50 x_p64 75 x_p74 75 x_p84  75 x_p94 100
-// x_p05 0 x_p15 25 x_p25 25 x_p35 25 x_p45 50 x_p55 50 x_p65 75 x_p75 75 x_p85  75 x_p95 100
-// x_p06 0 x_p16 25 x_p26 25 x_p36 25 x_p46 50 x_p56 50 x_p66 75 x_p76 75 x_p86  75 x_p96 100
-// x_p07 0 x_p17 25 x_p27 25 x_p37 25 x_p47 50 x_p57 50 x_p67 75 x_p77 75 x_p87  75 x_p97 100
-// x_p08 0 x_p18 25 x_p28 25 x_p38 50 x_p48 50 x_p58 50 x_p68 75 x_p78 75 x_p88 100 x_p98 100
-// x_p09 0 x_p19 25 x_p29 25 x_p39 50 x_p49 50 x_p59 50 x_p69 75 x_p79 75 x_p89 100 x_p99 100
-//
-// ----------------------------------------------------------------
-// CONCLUSION:
-// * I like option 2 for its simplicity ...
-// * ... but option 1 matches R's quantile with type=1.
-// * (Note that Miller's interpolated percentiles match match R's quantile with type=7)
-// ----------------------------------------------------------------
-
-func computeIndexNoninterpolated(n int, p float64) int {
-	index := int(p * float64(n) / 100.0)
-	//index := p * (float64(float64(n)) - 1) / 100.0
-	//index := int(ceil(p * (float64(n) - 1) / 100.0))
-	//index := int(ceil(-0.5 + p*(float64(n)-1)/100.0))
-	if index >= n {
-		index = n - 1
-	}
-	if index < 0 {
-		index = 0
-	}
-	return index
-}
-
-// xxx pending pointer-output refactor
-func getPercentileLinearlyInterpolated(array []*mlrval.Mlrval, n int, p float64) mlrval.Mlrval {
-	findex := (p / 100.0) * (float64(n) - 1)
-	if findex < 0.0 {
-		findex = 0.0
-	}
-	iindex := int(math.Floor(findex))
-	if iindex >= n-1 {
-		return *array[iindex].Copy()
-	} else {
-		// array[iindex] + frac * (array[iindex+1] - array[iindex])
-		// TODO: just do this in float64.
-		frac := mlrval.FromFloat(findex - float64(iindex))
-		diff := bifs.BIF_minus_binary(array[iindex+1], array[iindex])
-		prod := bifs.BIF_times(frac, diff)
-		return *bifs.BIF_plus_binary(array[iindex], prod)
-	}
-}
-
 // ----------------------------------------------------------------
 func (keeper *PercentileKeeper) sortIfNecessary() {
 	if !keeper.sorted {
@@ -282,7 +78,7 @@ func (keeper *PercentileKeeper) EmitNonInterpolated(percentile float64) *mlrval.
 		return mlrval.VOID
 	}
 	keeper.sortIfNecessary()
-	return keeper.data[computeIndexNoninterpolated(int(len(keeper.data)), percentile)].Copy()
+	return bifs.GetPercentileNonInterpolated(keeper.data, int(len(keeper.data)), percentile)
 }
 
 func (keeper *PercentileKeeper) EmitLinearlyInterpolated(percentile float64) *mlrval.Mlrval {
@@ -290,8 +86,7 @@ func (keeper *PercentileKeeper) EmitLinearlyInterpolated(percentile float64) *ml
 		return mlrval.VOID
 	}
 	keeper.sortIfNecessary()
-	output := getPercentileLinearlyInterpolated(keeper.data, int(len(keeper.data)), percentile)
-	return output.Copy()
+	return bifs.GetPercentileLinearlyInterpolated(keeper.data, int(len(keeper.data)), percentile)
 }
 
 // ----------------------------------------------------------------
diff --git a/internal/pkg/transformers/utils/stats1_accumulators.go b/internal/pkg/transformers/utils/stats1_accumulators.go
index d85cadf66..c984ed922 100644
--- a/internal/pkg/transformers/utils/stats1_accumulators.go
+++ b/internal/pkg/transformers/utils/stats1_accumulators.go
@@ -615,7 +615,7 @@ func (acc *Stats1VarAccumulator) Ingest(value *mlrval.Mlrval) {
 	}
 }
 func (acc *Stats1VarAccumulator) Emit() *mlrval.Mlrval {
-	return bifs.BIF_get_var(mlrval.FromInt(acc.count), acc.sum, acc.sum2)
+	return bifs.BIF_finalize_variance(mlrval.FromInt(acc.count), acc.sum, acc.sum2)
 }
 func (acc *Stats1VarAccumulator) Reset() {
 	acc.count = 0
@@ -646,7 +646,7 @@ func (acc *Stats1StddevAccumulator) Ingest(value *mlrval.Mlrval) {
 	}
 }
 func (acc *Stats1StddevAccumulator) Emit() *mlrval.Mlrval {
-	return bifs.BIF_get_stddev(mlrval.FromInt(acc.count), acc.sum, acc.sum2)
+	return bifs.BIF_finalize_stddev(mlrval.FromInt(acc.count), acc.sum, acc.sum2)
 }
 func (acc *Stats1StddevAccumulator) Reset() {
 	acc.count = 0
@@ -678,7 +678,7 @@ func (acc *Stats1MeanEBAccumulator) Ingest(value *mlrval.Mlrval) {
 }
 func (acc *Stats1MeanEBAccumulator) Emit() *mlrval.Mlrval {
 	mcount := mlrval.FromInt(acc.count)
-	return bifs.BIF_get_mean_EB(mcount, acc.sum, acc.sum2)
+	return bifs.BIF_finalize_mean_eb(mcount, acc.sum, acc.sum2)
 }
 func (acc *Stats1MeanEBAccumulator) Reset() {
 	acc.count = 0
@@ -714,7 +714,7 @@ func (acc *Stats1SkewnessAccumulator) Ingest(value *mlrval.Mlrval) {
 }
 func (acc *Stats1SkewnessAccumulator) Emit() *mlrval.Mlrval {
 	mcount := mlrval.FromInt(acc.count)
-	return bifs.BIF_get_skewness(mcount, acc.sum, acc.sum2, acc.sum3)
+	return bifs.BIF_finalize_skewness(mcount, acc.sum, acc.sum2, acc.sum3)
 }
 func (acc *Stats1SkewnessAccumulator) Reset() {
 	acc.count = 0
@@ -755,7 +755,7 @@ func (acc *Stats1KurtosisAccumulator) Ingest(value *mlrval.Mlrval) {
 }
 func (acc *Stats1KurtosisAccumulator) Emit() *mlrval.Mlrval {
 	mcount := mlrval.FromInt(acc.count)
-	return bifs.BIF_get_kurtosis(mcount, acc.sum, acc.sum2, acc.sum3, acc.sum4)
+	return bifs.BIF_finalize_kurtosis(mcount, acc.sum, acc.sum2, acc.sum3, acc.sum4)
 }
 func (acc *Stats1KurtosisAccumulator) Reset() {
 	acc.count = 0
diff --git a/man/manpage.txt b/man/manpage.txt
index b3352b9a6..666177bee 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -182,32 +182,34 @@ MILLER(1)                                                            MILLER(1)
        unsparsify
 
 1mFUNCTION LIST0m
-       abs acos acosh any append apply arrayify asin asinh asserting_absent
+       abs acos acosh antimode any append apply arrayify asin asinh asserting_absent
        asserting_array asserting_bool asserting_boolean asserting_empty
        asserting_empty_map asserting_error asserting_float asserting_int
        asserting_map asserting_nonempty_map asserting_not_array asserting_not_empty
        asserting_not_map asserting_not_null asserting_null asserting_numeric
        asserting_present asserting_string atan atan2 atanh bitcount boolean
        capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh
-       depth dhms2fsec dhms2sec erf erfc every exec exp expm1 flatten float floor
-       fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys get_values
-       gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec
-       hostname index int invqnorm is_absent is_array is_bool is_boolean is_empty
-       is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map
+       count depth dhms2fsec dhms2sec distinct_count erf erfc every exec exp expm1
+       flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys
+       get_values gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec
+       hms2sec hostname index int invqnorm is_absent is_array is_bool is_boolean
+       is_empty is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map
        is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present
-       is_string joink joinkv joinv json_parse json_stringify latin1_to_utf8
+       is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8
        leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10
-       log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 mexp min
-       mmul msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime os pow qnorm
+       log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max maxlen md5
+       mean meaneb median mexp min minlen mmul mode msub nsec2gmt nsec2gmtdate
+       nsec2localdate nsec2localtime null_count os percentile percentiles pow qnorm
        reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms
        sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256
-       sha512 sin sinh sort splita splitax splitkv splitkvx splitnv splitnvx sqrt
-       ssub strfntime strfntime_local strftime strftime_local string strip strlen
-       strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
-       sysntime system systime systimeint tan tanh tolower toupper truncate typeof
-       unflatten unformat unformatx upntime uptime urand urand32 urandelement
-       urandint urandrange utf8_to_latin1 version ! != !=~ % & && * ** + - . .* .+ .-
-       ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+       sha512 sin sinh skewness sort sort_collection splita splitax splitkv splitkvx
+       splitnv splitnvx sqrt ssub stddev strfntime strfntime_local strftime
+       strftime_local string strip strlen strpntime strpntime_local strptime
+       strptime_local sub substr substr0 substr1 sum sum2 sum3 sum4 sysntime system
+       systime systimeint tan tanh tolower toupper truncate typeof unflatten unformat
+       unformatx upntime uptime urand urand32 urandelement urandint urandrange
+       utf8_to_latin1 variance version ! != !=~ % & && * ** + - . .* .+ .- ./ / // <
+       << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
 
 1mCOMMENTS-IN-DATA FLAGS0m
        Miller lets you put comments in your data, such as
@@ -2164,6 +2166,12 @@ MILLER(1)                                                            MILLER(1)
    1macosh0m
         (class=math #args=1) Inverse hyperbolic cosine.
 
+   1mantimode0m
+        (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
+       Examples:
+       antimode([3,3,4,4,4]) is 3
+       antimode([3,3,4,4]) is 3
+
    1many0m
         (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, yields a boolean true if the argument function returns true for any array/map element, false otherwise. For arrays, the function should take one argument, for array element; for maps, it should take two, for map-element key and value. In either case it should return a boolean.
        Examples:
@@ -2288,6 +2296,12 @@ MILLER(1)                                                            MILLER(1)
    1mcosh0m
         (class=math #args=1) Hyperbolic cosine.
 
+   1mcount0m
+        (class=stats #args=1) Returns the length of an array or map. Returns error for non-array/non-map types.
+       Examples:
+       count([7,8,9]) is 3
+       count({"a":7,"b":8,"c":9}) is 3
+
    1mdepth0m
         (class=collections #args=1) Prints maximum depth of map/array. Scalars have depth 0.
 
@@ -2297,6 +2311,13 @@ MILLER(1)                                                            MILLER(1)
    1mdhms2sec0m
         (class=time #args=1) Recovers integer seconds as in dhms2sec("5d18h53m20s") = 500000
 
+   1mdistinct_count0m
+        (class=stats #args=1) Returns the number of disinct values in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct.
+       Examples:
+       distinct_count([7,8,9,7])  is 3
+       distinct_count([1,"1"]) is 1
+       distinct_count([1,1.0]) is 2
+
    1merf0m
         (class=math #args=1) Error function.
 
@@ -2521,6 +2542,11 @@ MILLER(1)                                                            MILLER(1)
    1mjson_stringify0m
         (class=collections #args=1,2) Converts value to JSON-formatted string. Default output is single-line. With optional second boolean argument set to true, produces multiline output.
 
+   1mkurtosis0m
+        (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+       Example:
+       kurtosis([4,5,9,10,11]) is -1.6703688
+
    1mlatin1_to_utf80m
         (class=string #args=1) Tries to convert Latin-1-encoded string to UTF-8-encoded string. If argument is array or map, recurses into it.
        Examples:
@@ -2589,20 +2615,53 @@ MILLER(1)                                                            MILLER(1)
         (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'.
 
    1mmax0m
-        (class=math #args=variadic) Max of n numbers; null loses.
+        (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
+
+   1mmaxlen0m
+        (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+       Example:
+       maxlen(["ao", "alto"]) is 4
 
    1mmd50m
         (class=hashing #args=1) MD5 hash.
 
+   1mmean0m
+        (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+       Example:
+       mean([4,5,7,10]) is 6.5
+
+   1mmeaneb0m
+        (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+       Example:
+       meaneb([4,5,7,10]) is 1.3228756
+
+   1mmedian0m
+        (class=stats #args=1,2) Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+       Examples:
+       median([3,4,5,6,9,10]) is 6
+       median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5
+       median(["abc", "def", "ghi", "ghi"]) is "ghi"
+
    1mmexp0m
         (class=arithmetic #args=3) a ** b mod m (integers)
 
    1mmin0m
-        (class=math #args=variadic) Min of n numbers; null loses.
+        (class=math #args=variadic) Min of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
+
+   1mminlen0m
+        (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+       Example:
+       minlen(["ao", "alto"]) is 3
 
    1mmmul0m
         (class=arithmetic #args=3) a * b mod m (integers)
 
+   1mmode0m
+        (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
+       Examples:
+       mode([3,3,4,4,4]) is 4
+       mode([3,3,4,4]) is 3
+
    1mmsub0m
         (class=arithmetic #args=3) a - b mod m (integers)
 
@@ -2632,9 +2691,70 @@ MILLER(1)                                                            MILLER(1)
        nsec2localtime(1234567890123456789, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul"
        nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456"
 
+   1mnull_count0m
+        (class=stats #args=1) Returns the number of values in an array or map which are empty-string (AKA void) or JSON null. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct.
+       Example:
+       null_count(["a", "", "c"]) is 1
+
    1mos0m
         (class=system #args=0) Returns the operating-system name as a string.
 
+   1mpercentile0m
+        (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+       Examples:
+       percentile([3,4,5,6,9,10], 90) is 10
+       percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5
+       percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi"
+
+   1mpercentiles0m
+        (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags.
+       Examples:
+
+       Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort
+       the input before computing percentiles:
+
+         percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 }
+         percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" }
+
+       Use "output_array_not_map" (or shorthand "oa") to get the outputs as an array:
+
+         percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9]
+
+       Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces
+       ,error on string inputs:
+
+         percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 }
+
+       The percentiles function always sorts its inputs before computing percentiles. If you know your input
+       is already sorted -- see also the sort_collection function -- then computation will be faster on
+       large input if you pass in "array_is_sorted":
+
+         x = [6,5,9,10,4,3]
+         percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect
+         x = sort_collection(x)
+         percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct
+
+       You can also leverage this feature to compute percentiles on a sort of your choosing. For example:
+
+         Non-sorted input:
+           x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ")
+           x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"]
+         Percentiles are taken over the original positions of the words in the array -- "dogs" is last
+         and hence appears as p99:
+           percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"]
+         With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99:
+           percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]
+         With default sorting done outside percentiles, the same:
+           x = sort(x) # or x = sort_collection(x)
+           x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"]
+           percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"]
+           percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]
+         Now sorting by word length, "loquaciously" is longest and hence is the p99:
+           x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } )
+           x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"]
+           percentiles(x, [50, 99], {"oa":true, "ais":true})
+           ["over", "loquaciously"]
+
    1mpow0m
         (class=arithmetic #args=2) Exponentiation. Same as **, but as a function.
 
@@ -2731,6 +2851,11 @@ MILLER(1)                                                            MILLER(1)
    1msinh0m
         (class=math #args=1) Hyperbolic sine.
 
+   1mskewness0m
+        (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+       Example:
+       skewness([4,5,9,10,11]) is -0.2097285
+
    1msort0m
         (class=higher-order-functions #args=1-2) Given a map or array as first argument and string flags or function as optional second argument, returns a sorted copy of the input. With one argument, sorts array elements with numbers first numerically and then strings lexically, and map elements likewise by map keys. If the second argument is a string, it can contain any of "f" for lexical ("n" is for the above default), "c" for case-folded lexical, or "t" for natural sort order. An additional "r" in that string is for reverse. An additional "v" in that string means sort maps by value, rather than by key. If the second argument is a function, then for arrays it should take two arguments a and b, returning < 0, 0, or > 0 as a < b, a == b, or a > b respectively; for maps the function should take four arguments ak, av, bk, and bv, again returning < 0, 0, or > 0, using a and b's keys and values.
        Examples:
@@ -2747,6 +2872,9 @@ MILLER(1)                                                            MILLER(1)
        Map without function: sort({"c":2,"a":3,"b":1}, "v") returns {"b":1,"c":2,"a":3}.
        Map without function: sort({"c":2,"a":3,"b":1}, "vnr") returns {"a":3,"c":2,"b":1}.
 
+   1msort_collection0m
+        (class=stats #args=1) This is a helper function for the percentiles function; please see its online help for details.
+
    1msplita0m
         (class=conversion #args=2) Splits string into array with type inference. First argument is string to split; second is the separator to split on.
        Example:
@@ -2785,6 +2913,11 @@ MILLER(1)                                                            MILLER(1)
        Example:
        ssub("abc.def", ".", "X") gives "abcXdef"
 
+   1mstddev0m
+        (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+       Example:
+       stddev([4,5,9,10,11]) is 3.1144823
+
    1mstrfntime0m
         (class=time #args=2) Formats integer nanoseconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local.
        Examples:
@@ -2872,6 +3005,26 @@ MILLER(1)                                                            MILLER(1)
    1msubstr10m
         (class=string #args=3) substr1(s,m,n) gives substring of s from 1-up position m to n inclusive. Negative indices -len .. -1 alias to 1 .. len. See also substr and substr0.
 
+   1msum0m
+        (class=stats #args=1) Returns the sum of values in an array or map. Returns error for non-array/non-map types.
+       Example:
+       sum([1,2,3,4,5]) is 15
+
+   1msum20m
+        (class=stats #args=1) Returns the sum of squares of values in an array or map. Returns error for non-array/non-map types.
+       Example:
+       sum2([1,2,3,4,5]) is 55
+
+   1msum30m
+        (class=stats #args=1) Returns the sum of cubes of values in an array or map. Returns error for non-array/non-map types.
+       Example:
+       sum3([1,2,3,4,5]) is 225
+
+   1msum40m
+        (class=stats #args=1) Returns the sum of fourth powers of values in an array or map. Returns error for non-array/non-map types.
+       Example:
+       sum4([1,2,3,4,5]) is 979
+
    1msysntime0m
         (class=time #args=0) Returns the system time in 64-bit nanoseconds since the epoch.
 
@@ -2950,6 +3103,11 @@ MILLER(1)                                                            MILLER(1)
        $y = utf8_to_latin1($x)
        $* = utf8_to_latin1($*)
 
+   1mvariance0m
+        (class=stats #args=1) Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+       Example:
+       variance([4,5,9,10,11]) is 9.7
+
    1mversion0m
         (class=system #args=0) Returns the Miller version as a string.
 
@@ -3451,4 +3609,4 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-08-23                         MILLER(1)
+                                  2023-08-26                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index b7c343ce1..91d501b6b 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2023-08-23
+.\"      Date: 2023-08-26
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2023-08-23" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-08-26" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -229,32 +229,34 @@ unsparsify
 .RS 0
 .\}
 .nf
-abs acos acosh any append apply arrayify asin asinh asserting_absent
+abs acos acosh antimode any append apply arrayify asin asinh asserting_absent
 asserting_array asserting_bool asserting_boolean asserting_empty
 asserting_empty_map asserting_error asserting_float asserting_int
 asserting_map asserting_nonempty_map asserting_not_array asserting_not_empty
 asserting_not_map asserting_not_null asserting_null asserting_numeric
 asserting_present asserting_string atan atan2 atanh bitcount boolean
 capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh
-depth dhms2fsec dhms2sec erf erfc every exec exp expm1 flatten float floor
-fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys get_values
-gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec
-hostname index int invqnorm is_absent is_array is_bool is_boolean is_empty
-is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map
+count depth dhms2fsec dhms2sec distinct_count erf erfc every exec exp expm1
+flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys
+get_values gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec
+hms2sec hostname index int invqnorm is_absent is_array is_bool is_boolean
+is_empty is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map
 is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present
-is_string joink joinkv joinv json_parse json_stringify latin1_to_utf8
+is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8
 leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10
-log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 mexp min
-mmul msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime os pow qnorm
+log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max maxlen md5
+mean meaneb median mexp min minlen mmul mode msub nsec2gmt nsec2gmtdate
+nsec2localdate nsec2localtime null_count os percentile percentiles pow qnorm
 reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms
 sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256
-sha512 sin sinh sort splita splitax splitkv splitkvx splitnv splitnvx sqrt
-ssub strfntime strfntime_local strftime strftime_local string strip strlen
-strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
-sysntime system systime systimeint tan tanh tolower toupper truncate typeof
-unflatten unformat unformatx upntime uptime urand urand32 urandelement
-urandint urandrange utf8_to_latin1 version ! != !=~ % & && * ** + - . .* .+ .-
-\&./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+sha512 sin sinh skewness sort sort_collection splita splitax splitkv splitkvx
+splitnv splitnvx sqrt ssub stddev strfntime strfntime_local strftime
+strftime_local string strip strlen strpntime strpntime_local strptime
+strptime_local sub substr substr0 substr1 sum sum2 sum3 sum4 sysntime system
+systime systimeint tan tanh tolower toupper truncate typeof unflatten unformat
+unformatx upntime uptime urand urand32 urandelement urandint urandrange
+utf8_to_latin1 variance version ! != !=~ % & && * ** + - . .* .+ .- ./ / // <
+<< <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
 .fi
 .if n \{\
 .RE
@@ -2765,6 +2767,18 @@ being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and
 .fi
 .if n \{\
 .RE
+.SS "antimode"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
+Examples:
+antimode([3,3,4,4,4]) is 3
+antimode([3,3,4,4]) is 3
+.fi
+.if n \{\
+.RE
 .SS "any"
 .if n \{\
 .RS 0
@@ -3117,6 +3131,18 @@ concat([1,2],[3]) is [1,2,3]
 .fi
 .if n \{\
 .RE
+.SS "count"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the length of an array or map. Returns error for non-array/non-map types.
+Examples:
+count([7,8,9]) is 3
+count({"a":7,"b":8,"c":9}) is 3
+.fi
+.if n \{\
+.RE
 .SS "depth"
 .if n \{\
 .RS 0
@@ -3144,6 +3170,19 @@ concat([1,2],[3]) is [1,2,3]
 .fi
 .if n \{\
 .RE
+.SS "distinct_count"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the number of disinct values in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct.
+Examples:
+distinct_count([7,8,9,7])  is 3
+distinct_count([1,"1"]) is 1
+distinct_count([1,1.0]) is 2
+.fi
+.if n \{\
+.RE
 .SS "erf"
 .if n \{\
 .RS 0
@@ -3698,6 +3737,17 @@ joinv({"a":3,"b":4,"c":5}, ",") = "3,4,5"
 .fi
 .if n \{\
 .RE
+.SS "kurtosis"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+kurtosis([4,5,9,10,11]) is -1.6703688
+.fi
+.if n \{\
+.RE
 .SS "latin1_to_utf8"
 .if n \{\
 .RS 0
@@ -3872,7 +3922,18 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906"
 .RS 0
 .\}
 .nf
- (class=math #args=variadic) Max of n numbers; null loses.
+ (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
+.fi
+.if n \{\
+.RE
+.SS "maxlen"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+maxlen(["aรฑo", "alto"]) is 4
 .fi
 .if n \{\
 .RE
@@ -3885,6 +3946,41 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906"
 .fi
 .if n \{\
 .RE
+.SS "mean"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+Example:
+mean([4,5,7,10]) is 6.5
+.fi
+.if n \{\
+.RE
+.SS "meaneb"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+Example:
+meaneb([4,5,7,10]) is 1.3228756
+.fi
+.if n \{\
+.RE
+.SS "median"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1,2) Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+Examples:
+median([3,4,5,6,9,10]) is 6
+median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5
+median(["abc", "def", "ghi", "ghi"]) is "ghi"
+.fi
+.if n \{\
+.RE
 .SS "mexp"
 .if n \{\
 .RS 0
@@ -3899,7 +3995,18 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906"
 .RS 0
 .\}
 .nf
- (class=math #args=variadic) Min of n numbers; null loses.
+ (class=math #args=variadic) Min of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
+.fi
+.if n \{\
+.RE
+.SS "minlen"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+minlen(["aรฑo", "alto"]) is 3
 .fi
 .if n \{\
 .RE
@@ -3912,6 +4019,18 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906"
 .fi
 .if n \{\
 .RE
+.SS "mode"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
+Examples:
+mode([3,3,4,4,4]) is 4
+mode([3,3,4,4]) is 3
+.fi
+.if n \{\
+.RE
 .SS "msub"
 .if n \{\
 .RS 0
@@ -3971,6 +4090,17 @@ nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.1
 .fi
 .if n \{\
 .RE
+.SS "null_count"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the number of values in an array or map which are empty-string (AKA void) or JSON null. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct.
+Example:
+null_count(["a", "", "c"]) is 1
+.fi
+.if n \{\
+.RE
 .SS "os"
 .if n \{\
 .RS 0
@@ -3980,6 +4110,74 @@ nsec2localtime(1234567890123456789, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.1
 .fi
 .if n \{\
 .RE
+.SS "percentile"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+Examples:
+percentile([3,4,5,6,9,10], 90) is 10
+percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5
+percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi"
+.fi
+.if n \{\
+.RE
+.SS "percentiles"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags.
+Examples:
+
+Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort
+the input before computing percentiles:
+
+  percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 }
+  percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" }
+
+Use "output_array_not_map" (or shorthand "oa") to get the outputs as an array:
+
+  percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9]
+
+Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces
+,error on string inputs:
+
+  percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 }
+
+The percentiles function always sorts its inputs before computing percentiles. If you know your input
+is already sorted -- see also the sort_collection function -- then computation will be faster on
+large input if you pass in "array_is_sorted":
+
+  x = [6,5,9,10,4,3]
+  percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect
+  x = sort_collection(x)
+  percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct
+
+You can also leverage this feature to compute percentiles on a sort of your choosing. For example:
+
+  Non-sorted input:
+    x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ")
+    x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"]
+  Percentiles are taken over the original positions of the words in the array -- "dogs" is last
+  and hence appears as p99:
+    percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"]
+  With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99:
+    percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]
+  With default sorting done outside percentiles, the same:
+    x = sort(x) # or x = sort_collection(x)
+    x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"]
+    percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"]
+    percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]
+  Now sorting by word length, "loquaciously" is longest and hence is the p99:
+    x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } )
+    x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"]
+    percentiles(x, [50, 99], {"oa":true, "ais":true})
+    ["over", "loquaciously"]
+.fi
+.if n \{\
+.RE
 .SS "pow"
 .if n \{\
 .RS 0
@@ -4208,6 +4406,17 @@ Map example: select({"a":1, "b":3, "c":5}, func(k,v) {return v >= 3}) returns {"
 .fi
 .if n \{\
 .RE
+.SS "skewness"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+skewness([4,5,9,10,11]) is -0.2097285
+.fi
+.if n \{\
+.RE
 .SS "sort"
 .if n \{\
 .RS 0
@@ -4230,6 +4439,15 @@ Map without function: sort({"c":2,"a":3,"b":1}, "vnr") returns {"a":3,"c":2,"b":
 .fi
 .if n \{\
 .RE
+.SS "sort_collection"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) This is a helper function for the percentiles function; please see its online help for details.
+.fi
+.if n \{\
+.RE
 .SS "splita"
 .if n \{\
 .RS 0
@@ -4316,6 +4534,17 @@ ssub("abc.def", ".", "X") gives "abcXdef"
 .fi
 .if n \{\
 .RE
+.SS "stddev"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+stddev([4,5,9,10,11]) is 3.1144823
+.fi
+.if n \{\
+.RE
 .SS "strfntime"
 .if n \{\
 .RS 0
@@ -4493,6 +4722,50 @@ sub("prefix4529:suffix8567", "suffix([0-9]+)", "name\e1") gives "prefix4529:name
 .fi
 .if n \{\
 .RE
+.SS "sum"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the sum of values in an array or map. Returns error for non-array/non-map types.
+Example:
+sum([1,2,3,4,5]) is 15
+.fi
+.if n \{\
+.RE
+.SS "sum2"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the sum of squares of values in an array or map. Returns error for non-array/non-map types.
+Example:
+sum2([1,2,3,4,5]) is 55
+.fi
+.if n \{\
+.RE
+.SS "sum3"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the sum of cubes of values in an array or map. Returns error for non-array/non-map types.
+Example:
+sum3([1,2,3,4,5]) is 225
+.fi
+.if n \{\
+.RE
+.SS "sum4"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the sum of fourth powers of values in an array or map. Returns error for non-array/non-map types.
+Example:
+sum4([1,2,3,4,5]) is 979
+.fi
+.if n \{\
+.RE
 .SS "sysntime"
 .if n \{\
 .RS 0
@@ -4697,6 +4970,17 @@ $* = utf8_to_latin1($*)
 .fi
 .if n \{\
 .RE
+.SS "variance"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=stats #args=1) Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+Example:
+variance([4,5,9,10,11]) is 9.7
+.fi
+.if n \{\
+.RE
 .SS "version"
 .if n \{\
 .RS 0
diff --git a/test/cases/dsl-stats/count/various/cmd b/test/cases/dsl-stats/count/various/cmd
new file mode 100644
index 000000000..8e64fdff2
--- /dev/null
+++ b/test/cases/dsl-stats/count/various/cmd
@@ -0,0 +1 @@
+mlr -n --ofmtf 6 --xtab put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-stats/count/various/experr b/test/cases/dsl-stats/count/various/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/count/various/expout b/test/cases/dsl-stats/count/various/expout
new file mode 100644
index 000000000..9e4f467e0
--- /dev/null
+++ b/test/cases/dsl-stats/count/various/expout
@@ -0,0 +1,20 @@
+count_0                 (error)
+count_0_type            error
+count_null              (error)
+count_null_type         error
+count_empty_array       0
+count_empty_array_type  int
+count_array_1           1
+count_array_1_type      int
+count_array_3           3
+count_array_3_type      int
+count_array_nested      3
+count_array_nested_type int
+count_empty_map         0
+count_empty_map_type    int
+count_map_1             1
+count_map_1_type        int
+count_map_3             3
+count_map_3_type        int
+count_map_nested        3
+count_map_nested_type   int
diff --git a/test/cases/dsl-stats/count/various/mlr b/test/cases/dsl-stats/count/various/mlr
new file mode 100644
index 000000000..39e9abd8e
--- /dev/null
+++ b/test/cases/dsl-stats/count/various/mlr
@@ -0,0 +1,26 @@
+end {
+    outputs = {};
+
+    outputs["count_0"] = count(0);
+    outputs["count_null"] = count(null);
+    outputs["count_nonesuch"] = count(nonesuch);
+
+    outputs["count_empty_array"] = count([]);
+    outputs["count_array_1"] = count([7]);
+    outputs["count_array_3"] = count([7,8,9]);
+    outputs["count_array_nested"] = count([7,[80,90],9]);
+
+    outputs["count_empty_map"] = count({});
+    outputs["count_map_1"] = count({ "a" : 7} );
+    outputs["count_map_3"] = count({ "a" : 7, "b" : 8, "c" : 9 } );
+    outputs["count_map_nested"] = count({ "a" : 7, "b" : [80,90], "c" : 9 });
+
+    typed_outputs = {};
+
+    for (k, v in outputs) {
+        typed_outputs[k] = v;
+        typed_outputs[k."_type"] = typeof(v);
+    }
+
+    emit typed_outputs;
+}
diff --git a/test/cases/dsl-stats/distinct_count/various/cmd b/test/cases/dsl-stats/distinct_count/various/cmd
new file mode 100644
index 000000000..8e64fdff2
--- /dev/null
+++ b/test/cases/dsl-stats/distinct_count/various/cmd
@@ -0,0 +1 @@
+mlr -n --ofmtf 6 --xtab put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-stats/distinct_count/various/experr b/test/cases/dsl-stats/distinct_count/various/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/distinct_count/various/expout b/test/cases/dsl-stats/distinct_count/various/expout
new file mode 100644
index 000000000..8d2416554
--- /dev/null
+++ b/test/cases/dsl-stats/distinct_count/various/expout
@@ -0,0 +1,32 @@
+distinct_count_0                 (error)
+distinct_count_0_type            error
+distinct_count_null              (error)
+distinct_count_null_type         error
+distinct_count_empty_array       0
+distinct_count_empty_array_type  int
+distinct_count_array_1           1
+distinct_count_array_1_type      int
+distinct_count_array_3a          3
+distinct_count_array_3a_type     int
+distinct_count_array_3b          2
+distinct_count_array_3b_type     int
+distinct_count_array_3c          1
+distinct_count_array_3c_type     int
+distinct_count_array_3d          1
+distinct_count_array_3d_type     int
+distinct_count_array_nested      2
+distinct_count_array_nested_type int
+distinct_count_empty_map         0
+distinct_count_empty_map_type    int
+distinct_count_map_1             1
+distinct_count_map_1_type        int
+distinct_count_map_3a            3
+distinct_count_map_3a_type       int
+distinct_count_map_3b            2
+distinct_count_map_3b_type       int
+distinct_count_map_3c            1
+distinct_count_map_3c_type       int
+distinct_count_map_3d            1
+distinct_count_map_3d_type       int
+distinct_count_map_nested        2
+distinct_count_map_nested_type   int
diff --git a/test/cases/dsl-stats/distinct_count/various/mlr b/test/cases/dsl-stats/distinct_count/various/mlr
new file mode 100644
index 000000000..f98ceb66e
--- /dev/null
+++ b/test/cases/dsl-stats/distinct_count/various/mlr
@@ -0,0 +1,32 @@
+end {
+    outputs = {};
+
+    outputs["distinct_count_0"] = distinct_count(0);
+    outputs["distinct_count_null"] = distinct_count(null);
+    outputs["distinct_count_nonesuch"] = distinct_count(nonesuch);
+
+    outputs["distinct_count_empty_array"] = distinct_count([]);
+    outputs["distinct_count_array_1"] = distinct_count([7]);
+    outputs["distinct_count_array_3a"] = distinct_count([7,8,9]);
+    outputs["distinct_count_array_3b"] = distinct_count([7,7,9]);
+    outputs["distinct_count_array_3c"] = distinct_count([7,7,7]);
+    outputs["distinct_count_array_3d"] = distinct_count([null,null,null]);
+    outputs["distinct_count_array_nested"] = distinct_count([7,[7],7]);
+
+    outputs["distinct_count_empty_map"] = distinct_count({});
+    outputs["distinct_count_map_1"] = distinct_count({ "a" : 7} );
+    outputs["distinct_count_map_3a"] = distinct_count({ "a" : 7, "b" : 8, "c" : 9 } );
+    outputs["distinct_count_map_3b"] = distinct_count({ "a" : 7, "b" : 7, "c" : 9 } );
+    outputs["distinct_count_map_3c"] = distinct_count({ "a" : 7, "b" : 7, "c" : 7 } );
+    outputs["distinct_count_map_3d"] = distinct_count({ "a" : null, "b" : null, "c" : null } );
+    outputs["distinct_count_map_nested"] = distinct_count({ "a" : 7, "b" : [7], "c" : 7 });
+
+    typed_outputs = {};
+
+    for (k, v in outputs) {
+        typed_outputs[k] = v;
+        typed_outputs[k."_type"] = typeof(v);
+    }
+
+    emit typed_outputs;
+}
diff --git a/test/cases/dsl-stats/mode/various/cmd b/test/cases/dsl-stats/mode/various/cmd
new file mode 100644
index 000000000..8e64fdff2
--- /dev/null
+++ b/test/cases/dsl-stats/mode/various/cmd
@@ -0,0 +1 @@
+mlr -n --ofmtf 6 --xtab put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-stats/mode/various/experr b/test/cases/dsl-stats/mode/various/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/mode/various/expout b/test/cases/dsl-stats/mode/various/expout
new file mode 100644
index 000000000..3b792ea2c
--- /dev/null
+++ b/test/cases/dsl-stats/mode/various/expout
@@ -0,0 +1,24 @@
+mode_0                 (error)
+mode_0_type            error
+mode_null              (error)
+mode_null_type         error
+mode_empty_array       
+mode_empty_array_type  empty
+mode_array_1           7
+mode_array_1_type      int
+mode_array_3a          7
+mode_array_3a_type     int
+mode_array_3b          7
+mode_array_3b_type     int
+mode_array_nested      9
+mode_array_nested_type int
+mode_empty_map         
+mode_empty_map_type    empty
+mode_map_1             7
+mode_map_1_type        int
+mode_map_3a            7
+mode_map_3a_type       int
+mode_map_3b            7
+mode_map_3b_type       int
+mode_map_nested        9
+mode_map_nested_type   int
diff --git a/test/cases/dsl-stats/mode/various/mlr b/test/cases/dsl-stats/mode/various/mlr
new file mode 100644
index 000000000..d59e8b070
--- /dev/null
+++ b/test/cases/dsl-stats/mode/various/mlr
@@ -0,0 +1,28 @@
+end {
+    outputs = {};
+
+    outputs["mode_0"] = mode(0);
+    outputs["mode_null"] = mode(null);
+    outputs["mode_nonesuch"] = mode(nonesuch);
+
+    outputs["mode_empty_array"] = mode([]);
+    outputs["mode_array_1"] = mode([7]);
+    outputs["mode_array_3a"] = mode([7,8,9]);
+    outputs["mode_array_3b"] = mode([7,8,7]);
+    outputs["mode_array_nested"] = mode([7,[8,8,8,8,8,8],9,9,9]);
+
+    outputs["mode_empty_map"] = mode({});
+    outputs["mode_map_1"] = mode({ "a" : 7} );
+    outputs["mode_map_3a"] = mode({ "a" : 7, "b" : 8, "c" : 9 } );
+    outputs["mode_map_3b"] = mode({ "a" : 7, "b" : 8, "c" : 7 } );
+    outputs["mode_map_nested"] = mode({ "a" : 7, "b" : [8,8,8,8,8,8], "c" : 9, "d": 9, "e": 9 });
+
+    typed_outputs = {};
+
+    for (k, v in outputs) {
+        typed_outputs[k] = v;
+        typed_outputs[k."_type"] = typeof(v);
+    }
+
+    emit typed_outputs;
+}
diff --git a/test/cases/dsl-stats/moments/numeric-000/cmd b/test/cases/dsl-stats/moments/numeric-000/cmd
new file mode 100644
index 000000000..7ebdd60bc
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-000/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --from test/input/abixy head -n 0 then put -q -f test/input/test-moments.mlr
diff --git a/test/cases/dsl-stats/moments/numeric-000/experr b/test/cases/dsl-stats/moments/numeric-000/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/moments/numeric-000/expout b/test/cases/dsl-stats/moments/numeric-000/expout
new file mode 100644
index 000000000..7a8c5d98f
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-000/expout
@@ -0,0 +1,26 @@
+[
+{
+  "a_count": 0,
+  "a_sum": 0,
+  "a_sum2": 0,
+  "a_sum3": 0,
+  "a_sum4": 0,
+  "a_mean": "",
+  "a_var": "",
+  "a_stddev": "",
+  "a_meaneb": "",
+  "a_skewness": "",
+  "a_kurtosis": "",
+  "m_count": 0,
+  "m_sum": 0,
+  "m_sum2": 0,
+  "m_sum3": 0,
+  "m_sum4": 0,
+  "m_mean": "",
+  "m_var": "",
+  "m_stddev": "",
+  "m_meaneb": "",
+  "m_skewness": "",
+  "m_kurtosis": ""
+}
+]
diff --git a/test/cases/dsl-stats/moments/numeric-001/cmd b/test/cases/dsl-stats/moments/numeric-001/cmd
new file mode 100644
index 000000000..fe2e61aa7
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-001/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --from test/input/abixy head -n 1 then put -q -f test/input/test-moments.mlr
diff --git a/test/cases/dsl-stats/moments/numeric-001/experr b/test/cases/dsl-stats/moments/numeric-001/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/moments/numeric-001/expout b/test/cases/dsl-stats/moments/numeric-001/expout
new file mode 100644
index 000000000..d278c2a6d
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-001/expout
@@ -0,0 +1,26 @@
+[
+{
+  "a_count": 1,
+  "a_sum": 1,
+  "a_sum2": 1,
+  "a_sum3": 1,
+  "a_sum4": 1,
+  "a_mean": 1,
+  "a_var": "",
+  "a_stddev": "",
+  "a_meaneb": "",
+  "a_skewness": "",
+  "a_kurtosis": "",
+  "m_count": 1,
+  "m_sum": 1,
+  "m_sum2": 1,
+  "m_sum3": 1,
+  "m_sum4": 1,
+  "m_mean": 1,
+  "m_var": "",
+  "m_stddev": "",
+  "m_meaneb": "",
+  "m_skewness": "",
+  "m_kurtosis": ""
+}
+]
diff --git a/test/cases/dsl-stats/moments/numeric-002/cmd b/test/cases/dsl-stats/moments/numeric-002/cmd
new file mode 100644
index 000000000..2d383e83c
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-002/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --from test/input/abixy head -n 2 then put -q -f test/input/test-moments.mlr
diff --git a/test/cases/dsl-stats/moments/numeric-002/experr b/test/cases/dsl-stats/moments/numeric-002/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/moments/numeric-002/expout b/test/cases/dsl-stats/moments/numeric-002/expout
new file mode 100644
index 000000000..7b268c3e5
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-002/expout
@@ -0,0 +1,26 @@
+[
+{
+  "a_count": 2,
+  "a_sum": 3,
+  "a_sum2": 5,
+  "a_sum3": 9,
+  "a_sum4": 17,
+  "a_mean": 1.500000,
+  "a_var": 0.500000,
+  "a_stddev": 0.707107,
+  "a_meaneb": 0.500000,
+  "a_skewness": 0.000000,
+  "a_kurtosis": -2.000000,
+  "m_count": 2,
+  "m_sum": 3,
+  "m_sum2": 5,
+  "m_sum3": 9,
+  "m_sum4": 17,
+  "m_mean": 1.500000,
+  "m_var": 0.500000,
+  "m_stddev": 0.707107,
+  "m_meaneb": 0.500000,
+  "m_skewness": 0.000000,
+  "m_kurtosis": -2.000000
+}
+]
diff --git a/test/cases/dsl-stats/moments/numeric-003/cmd b/test/cases/dsl-stats/moments/numeric-003/cmd
new file mode 100644
index 000000000..fe70bddae
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-003/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --from test/input/abixy head -n 3 then put -q -f test/input/test-moments.mlr
diff --git a/test/cases/dsl-stats/moments/numeric-003/experr b/test/cases/dsl-stats/moments/numeric-003/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/moments/numeric-003/expout b/test/cases/dsl-stats/moments/numeric-003/expout
new file mode 100644
index 000000000..a7b80ccf0
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-003/expout
@@ -0,0 +1,26 @@
+[
+{
+  "a_count": 3,
+  "a_sum": 6,
+  "a_sum2": 14,
+  "a_sum3": 36,
+  "a_sum4": 98,
+  "a_mean": 2,
+  "a_var": 1.000000,
+  "a_stddev": 1.000000,
+  "a_meaneb": 0.577350,
+  "a_skewness": 0.000000,
+  "a_kurtosis": -1.500000,
+  "m_count": 3,
+  "m_sum": 6,
+  "m_sum2": 14,
+  "m_sum3": 36,
+  "m_sum4": 98,
+  "m_mean": 2,
+  "m_var": 1.000000,
+  "m_stddev": 1.000000,
+  "m_meaneb": 0.577350,
+  "m_skewness": 0.000000,
+  "m_kurtosis": -1.500000
+}
+]
diff --git a/test/cases/dsl-stats/moments/numeric-004/cmd b/test/cases/dsl-stats/moments/numeric-004/cmd
new file mode 100644
index 000000000..9f91c06f9
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-004/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --from test/input/abixy head -n 4 then put -q -f test/input/test-moments.mlr
diff --git a/test/cases/dsl-stats/moments/numeric-004/experr b/test/cases/dsl-stats/moments/numeric-004/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/moments/numeric-004/expout b/test/cases/dsl-stats/moments/numeric-004/expout
new file mode 100644
index 000000000..344a8a12e
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-004/expout
@@ -0,0 +1,26 @@
+[
+{
+  "a_count": 4,
+  "a_sum": 10,
+  "a_sum2": 30,
+  "a_sum3": 100,
+  "a_sum4": 354,
+  "a_mean": 2.500000,
+  "a_var": 1.666667,
+  "a_stddev": 1.290994,
+  "a_meaneb": 0.645497,
+  "a_skewness": 0.000000,
+  "a_kurtosis": -1.360000,
+  "m_count": 4,
+  "m_sum": 10,
+  "m_sum2": 30,
+  "m_sum3": 100,
+  "m_sum4": 354,
+  "m_mean": 2.500000,
+  "m_var": 1.666667,
+  "m_stddev": 1.290994,
+  "m_meaneb": 0.645497,
+  "m_skewness": 0.000000,
+  "m_kurtosis": -1.360000
+}
+]
diff --git a/test/cases/dsl-stats/moments/numeric-all/cmd b/test/cases/dsl-stats/moments/numeric-all/cmd
new file mode 100644
index 000000000..de6266f30
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-all/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --from test/input/abixy put -q -f test/input/test-moments.mlr
diff --git a/test/cases/dsl-stats/moments/numeric-all/experr b/test/cases/dsl-stats/moments/numeric-all/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/moments/numeric-all/expout b/test/cases/dsl-stats/moments/numeric-all/expout
new file mode 100644
index 000000000..9e62f653a
--- /dev/null
+++ b/test/cases/dsl-stats/moments/numeric-all/expout
@@ -0,0 +1,26 @@
+[
+{
+  "a_count": 10,
+  "a_sum": 55,
+  "a_sum2": 385,
+  "a_sum3": 3025,
+  "a_sum4": 25333,
+  "a_mean": 5.500000,
+  "a_var": 9.166667,
+  "a_stddev": 3.027650,
+  "a_meaneb": 0.957427,
+  "a_skewness": 0.000000,
+  "a_kurtosis": -1.224242,
+  "m_count": 10,
+  "m_sum": 55,
+  "m_sum2": 385,
+  "m_sum3": 3025,
+  "m_sum4": 25333,
+  "m_mean": 5.500000,
+  "m_var": 9.166667,
+  "m_stddev": 3.027650,
+  "m_meaneb": 0.957427,
+  "m_skewness": 0.000000,
+  "m_kurtosis": -1.224242
+}
+]
diff --git a/test/cases/dsl-stats/null_count/various/cmd b/test/cases/dsl-stats/null_count/various/cmd
new file mode 100644
index 000000000..8e64fdff2
--- /dev/null
+++ b/test/cases/dsl-stats/null_count/various/cmd
@@ -0,0 +1 @@
+mlr -n --ofmtf 6 --xtab put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-stats/null_count/various/experr b/test/cases/dsl-stats/null_count/various/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/null_count/various/expout b/test/cases/dsl-stats/null_count/various/expout
new file mode 100644
index 000000000..1bf369f1f
--- /dev/null
+++ b/test/cases/dsl-stats/null_count/various/expout
@@ -0,0 +1,20 @@
+null_count_0                (error)
+null_count_0_type           error
+null_count_null             (error)
+null_count_null_type        error
+null_count_empty_array      0
+null_count_empty_array_type int
+null_count_array_1          0
+null_count_array_1_type     int
+null_count_array_2          0
+null_count_array_2_type     int
+null_count_array_3          2
+null_count_array_3_type     int
+null_count_empty_map        0
+null_count_empty_map_type   int
+null_count_map_1            0
+null_count_map_1_type       int
+null_count_map_2            0
+null_count_map_2_type       int
+null_count_map_3            2
+null_count_map_3_type       int
diff --git a/test/cases/dsl-stats/null_count/various/mlr b/test/cases/dsl-stats/null_count/various/mlr
new file mode 100644
index 000000000..088277711
--- /dev/null
+++ b/test/cases/dsl-stats/null_count/various/mlr
@@ -0,0 +1,28 @@
+end {
+    outputs = {};
+
+    # Only empty string and JSON-null count as nulls
+
+    outputs["null_count_0"] = null_count(0);
+    outputs["null_count_null"] = null_count(null);
+    outputs["null_count_nonesuch"] = null_count(nonesuch);
+
+    outputs["null_count_empty_array"] = null_count([]);
+    outputs["null_count_array_1"] = null_count([7]);
+    outputs["null_count_array_2"] = null_count([7,8]);
+    outputs["null_count_array_3"] = null_count(["",null,nonesuch]);
+
+    outputs["null_count_empty_map"] = null_count({});
+    outputs["null_count_map_1"] = null_count({ "a" : 7});
+    outputs["null_count_map_2"] = null_count({ "a" : 7, "b" : 8 });
+    outputs["null_count_map_3"] = null_count({ "a" : "", "b" : null, "c" : nonesuch });
+
+    typed_outputs = {};
+
+    for (k, v in outputs) {
+        typed_outputs[k] = v;
+        typed_outputs[k."_type"] = typeof(v);
+    }
+
+    emit typed_outputs;
+}
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-000/cmd b/test/cases/dsl-stats/percentiles/non-numeric-000/cmd
new file mode 100644
index 000000000..a862c1303
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-000/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 0 then put -q -f test/input/test-percentiles.mlr -s field=a
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-000/experr b/test/cases/dsl-stats/percentiles/non-numeric-000/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-000/expout b/test/cases/dsl-stats/percentiles/non-numeric-000/expout
new file mode 100644
index 000000000..2e711ec22
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-000/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": "",
+  "a_max": "",
+  "a_minlen": "",
+  "a_maxlen": "",
+  "a_median": "",
+  "a_ps": {
+    "0": "",
+    "1": "",
+    "10": "",
+    "25": "",
+    "50": "",
+    "75": "",
+    "90": "",
+    "99": "",
+    "100": ""
+  },
+  "a_psi": {
+    "0": "",
+    "1": "",
+    "10": "",
+    "25": "",
+    "50": "",
+    "75": "",
+    "90": "",
+    "99": "",
+    "100": ""
+  },
+  "a_psa": ["", "", "", "", "", "", "", "", ""],
+  "a_psia": ["", "", "", "", "", "", "", "", ""],
+  "m_min": "",
+  "m_max": "",
+  "m_minlen": "",
+  "m_maxlen": "",
+  "m_median": "",
+  "m_ps": {
+    "0": "",
+    "1": "",
+    "10": "",
+    "25": "",
+    "50": "",
+    "75": "",
+    "90": "",
+    "99": "",
+    "100": ""
+  },
+  "m_psi": {
+    "0": "",
+    "1": "",
+    "10": "",
+    "25": "",
+    "50": "",
+    "75": "",
+    "90": "",
+    "99": "",
+    "100": ""
+  },
+  "m_psa": ["", "", "", "", "", "", "", "", ""],
+  "m_psia": ["", "", "", "", "", "", "", "", ""]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-001/cmd b/test/cases/dsl-stats/percentiles/non-numeric-001/cmd
new file mode 100644
index 000000000..291777b39
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-001/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 1 then put -q -f test/input/test-percentiles.mlr -s field=a
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-001/experr b/test/cases/dsl-stats/percentiles/non-numeric-001/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-001/expout b/test/cases/dsl-stats/percentiles/non-numeric-001/expout
new file mode 100644
index 000000000..a4c419c7f
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-001/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": "pan",
+  "a_max": "pan",
+  "a_minlen": 3,
+  "a_maxlen": 3,
+  "a_median": "pan",
+  "a_ps": {
+    "0": "pan",
+    "1": "pan",
+    "10": "pan",
+    "25": "pan",
+    "50": "pan",
+    "75": "pan",
+    "90": "pan",
+    "99": "pan",
+    "100": "pan"
+  },
+  "a_psi": {
+    "0": "pan",
+    "1": "pan",
+    "10": "pan",
+    "25": "pan",
+    "50": "pan",
+    "75": "pan",
+    "90": "pan",
+    "99": "pan",
+    "100": "pan"
+  },
+  "a_psa": ["pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan"],
+  "a_psia": ["pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan"],
+  "m_min": "pan",
+  "m_max": "pan",
+  "m_minlen": 3,
+  "m_maxlen": 3,
+  "m_median": "pan",
+  "m_ps": {
+    "0": "pan",
+    "1": "pan",
+    "10": "pan",
+    "25": "pan",
+    "50": "pan",
+    "75": "pan",
+    "90": "pan",
+    "99": "pan",
+    "100": "pan"
+  },
+  "m_psi": {
+    "0": "pan",
+    "1": "pan",
+    "10": "pan",
+    "25": "pan",
+    "50": "pan",
+    "75": "pan",
+    "90": "pan",
+    "99": "pan",
+    "100": "pan"
+  },
+  "m_psa": ["pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan"],
+  "m_psia": ["pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan", "pan"]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-002/cmd b/test/cases/dsl-stats/percentiles/non-numeric-002/cmd
new file mode 100644
index 000000000..71815b457
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-002/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 2 then put -q -f test/input/test-percentiles.mlr -s field=a
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-002/experr b/test/cases/dsl-stats/percentiles/non-numeric-002/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-002/expout b/test/cases/dsl-stats/percentiles/non-numeric-002/expout
new file mode 100644
index 000000000..c814d0c5a
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-002/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": "eks",
+  "a_max": "pan",
+  "a_minlen": 3,
+  "a_maxlen": 3,
+  "a_median": "pan",
+  "a_ps": {
+    "0": "eks",
+    "1": "eks",
+    "10": "eks",
+    "25": "eks",
+    "50": "pan",
+    "75": "pan",
+    "90": "pan",
+    "99": "pan",
+    "100": "pan"
+  },
+  "a_psi": {
+    "0": (error),
+    "1": (error),
+    "10": (error),
+    "25": (error),
+    "50": (error),
+    "75": (error),
+    "90": (error),
+    "99": (error),
+    "100": "pan"
+  },
+  "a_psa": ["eks", "eks", "eks", "eks", "pan", "pan", "pan", "pan", "pan"],
+  "a_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "pan"],
+  "m_min": "eks",
+  "m_max": "pan",
+  "m_minlen": 3,
+  "m_maxlen": 3,
+  "m_median": "pan",
+  "m_ps": {
+    "0": "eks",
+    "1": "eks",
+    "10": "eks",
+    "25": "eks",
+    "50": "pan",
+    "75": "pan",
+    "90": "pan",
+    "99": "pan",
+    "100": "pan"
+  },
+  "m_psi": {
+    "0": (error),
+    "1": (error),
+    "10": (error),
+    "25": (error),
+    "50": (error),
+    "75": (error),
+    "90": (error),
+    "99": (error),
+    "100": "pan"
+  },
+  "m_psa": ["eks", "eks", "eks", "eks", "pan", "pan", "pan", "pan", "pan"],
+  "m_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "pan"]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-003/cmd b/test/cases/dsl-stats/percentiles/non-numeric-003/cmd
new file mode 100644
index 000000000..8e32f39f3
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-003/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 3 then put -q -f test/input/test-percentiles.mlr -s field=a
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-003/experr b/test/cases/dsl-stats/percentiles/non-numeric-003/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-003/expout b/test/cases/dsl-stats/percentiles/non-numeric-003/expout
new file mode 100644
index 000000000..995605fd0
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-003/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": "eks",
+  "a_max": "wye",
+  "a_minlen": 3,
+  "a_maxlen": 3,
+  "a_median": "pan",
+  "a_ps": {
+    "0": "eks",
+    "1": "eks",
+    "10": "eks",
+    "25": "eks",
+    "50": "pan",
+    "75": "wye",
+    "90": "wye",
+    "99": "wye",
+    "100": "wye"
+  },
+  "a_psi": {
+    "0": (error),
+    "1": (error),
+    "10": (error),
+    "25": (error),
+    "50": (error),
+    "75": (error),
+    "90": (error),
+    "99": (error),
+    "100": "wye"
+  },
+  "a_psa": ["eks", "eks", "eks", "eks", "pan", "wye", "wye", "wye", "wye"],
+  "a_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "wye"],
+  "m_min": "eks",
+  "m_max": "wye",
+  "m_minlen": 3,
+  "m_maxlen": 3,
+  "m_median": "pan",
+  "m_ps": {
+    "0": "eks",
+    "1": "eks",
+    "10": "eks",
+    "25": "eks",
+    "50": "pan",
+    "75": "wye",
+    "90": "wye",
+    "99": "wye",
+    "100": "wye"
+  },
+  "m_psi": {
+    "0": (error),
+    "1": (error),
+    "10": (error),
+    "25": (error),
+    "50": (error),
+    "75": (error),
+    "90": (error),
+    "99": (error),
+    "100": "wye"
+  },
+  "m_psa": ["eks", "eks", "eks", "eks", "pan", "wye", "wye", "wye", "wye"],
+  "m_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "wye"]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-004/cmd b/test/cases/dsl-stats/percentiles/non-numeric-004/cmd
new file mode 100644
index 000000000..5703b1230
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-004/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 4 then put -q -f test/input/test-percentiles.mlr -s field=a
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-004/experr b/test/cases/dsl-stats/percentiles/non-numeric-004/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-004/expout b/test/cases/dsl-stats/percentiles/non-numeric-004/expout
new file mode 100644
index 000000000..995605fd0
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-004/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": "eks",
+  "a_max": "wye",
+  "a_minlen": 3,
+  "a_maxlen": 3,
+  "a_median": "pan",
+  "a_ps": {
+    "0": "eks",
+    "1": "eks",
+    "10": "eks",
+    "25": "eks",
+    "50": "pan",
+    "75": "wye",
+    "90": "wye",
+    "99": "wye",
+    "100": "wye"
+  },
+  "a_psi": {
+    "0": (error),
+    "1": (error),
+    "10": (error),
+    "25": (error),
+    "50": (error),
+    "75": (error),
+    "90": (error),
+    "99": (error),
+    "100": "wye"
+  },
+  "a_psa": ["eks", "eks", "eks", "eks", "pan", "wye", "wye", "wye", "wye"],
+  "a_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "wye"],
+  "m_min": "eks",
+  "m_max": "wye",
+  "m_minlen": 3,
+  "m_maxlen": 3,
+  "m_median": "pan",
+  "m_ps": {
+    "0": "eks",
+    "1": "eks",
+    "10": "eks",
+    "25": "eks",
+    "50": "pan",
+    "75": "wye",
+    "90": "wye",
+    "99": "wye",
+    "100": "wye"
+  },
+  "m_psi": {
+    "0": (error),
+    "1": (error),
+    "10": (error),
+    "25": (error),
+    "50": (error),
+    "75": (error),
+    "90": (error),
+    "99": (error),
+    "100": "wye"
+  },
+  "m_psa": ["eks", "eks", "eks", "eks", "pan", "wye", "wye", "wye", "wye"],
+  "m_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "wye"]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-all/cmd b/test/cases/dsl-stats/percentiles/non-numeric-all/cmd
new file mode 100644
index 000000000..b20e151b4
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-all/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z put -q -f test/input/test-percentiles.mlr -s field=a
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-all/experr b/test/cases/dsl-stats/percentiles/non-numeric-all/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/percentiles/non-numeric-all/expout b/test/cases/dsl-stats/percentiles/non-numeric-all/expout
new file mode 100644
index 000000000..326ec1168
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/non-numeric-all/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": "eks",
+  "a_max": "zee",
+  "a_minlen": 3,
+  "a_maxlen": 3,
+  "a_median": "pan",
+  "a_ps": {
+    "0": "eks",
+    "1": "eks",
+    "10": "eks",
+    "25": "hat",
+    "50": "pan",
+    "75": "wye",
+    "90": "zee",
+    "99": "zee",
+    "100": "zee"
+  },
+  "a_psi": {
+    "0": (error),
+    "1": (error),
+    "10": (error),
+    "25": (error),
+    "50": (error),
+    "75": (error),
+    "90": (error),
+    "99": (error),
+    "100": "zee"
+  },
+  "a_psa": ["eks", "eks", "eks", "hat", "pan", "wye", "zee", "zee", "zee"],
+  "a_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "zee"],
+  "m_min": "eks",
+  "m_max": "zee",
+  "m_minlen": 3,
+  "m_maxlen": 3,
+  "m_median": "pan",
+  "m_ps": {
+    "0": "eks",
+    "1": "eks",
+    "10": "eks",
+    "25": "hat",
+    "50": "pan",
+    "75": "wye",
+    "90": "zee",
+    "99": "zee",
+    "100": "zee"
+  },
+  "m_psi": {
+    "0": (error),
+    "1": (error),
+    "10": (error),
+    "25": (error),
+    "50": (error),
+    "75": (error),
+    "90": (error),
+    "99": (error),
+    "100": "zee"
+  },
+  "m_psa": ["eks", "eks", "eks", "hat", "pan", "wye", "zee", "zee", "zee"],
+  "m_psia": [(error), (error), (error), (error), (error), (error), (error), (error), "zee"]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/numeric-000/cmd b/test/cases/dsl-stats/percentiles/numeric-000/cmd
new file mode 100644
index 000000000..432afc190
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-000/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 0 then put -q -f test/input/test-percentiles.mlr -s field=i
diff --git a/test/cases/dsl-stats/percentiles/numeric-000/experr b/test/cases/dsl-stats/percentiles/numeric-000/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/percentiles/numeric-000/expout b/test/cases/dsl-stats/percentiles/numeric-000/expout
new file mode 100644
index 000000000..2e711ec22
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-000/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": "",
+  "a_max": "",
+  "a_minlen": "",
+  "a_maxlen": "",
+  "a_median": "",
+  "a_ps": {
+    "0": "",
+    "1": "",
+    "10": "",
+    "25": "",
+    "50": "",
+    "75": "",
+    "90": "",
+    "99": "",
+    "100": ""
+  },
+  "a_psi": {
+    "0": "",
+    "1": "",
+    "10": "",
+    "25": "",
+    "50": "",
+    "75": "",
+    "90": "",
+    "99": "",
+    "100": ""
+  },
+  "a_psa": ["", "", "", "", "", "", "", "", ""],
+  "a_psia": ["", "", "", "", "", "", "", "", ""],
+  "m_min": "",
+  "m_max": "",
+  "m_minlen": "",
+  "m_maxlen": "",
+  "m_median": "",
+  "m_ps": {
+    "0": "",
+    "1": "",
+    "10": "",
+    "25": "",
+    "50": "",
+    "75": "",
+    "90": "",
+    "99": "",
+    "100": ""
+  },
+  "m_psi": {
+    "0": "",
+    "1": "",
+    "10": "",
+    "25": "",
+    "50": "",
+    "75": "",
+    "90": "",
+    "99": "",
+    "100": ""
+  },
+  "m_psa": ["", "", "", "", "", "", "", "", ""],
+  "m_psia": ["", "", "", "", "", "", "", "", ""]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/numeric-001/cmd b/test/cases/dsl-stats/percentiles/numeric-001/cmd
new file mode 100644
index 000000000..c9408b30e
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-001/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 1 then put -q -f test/input/test-percentiles.mlr -s field=i
diff --git a/test/cases/dsl-stats/percentiles/numeric-001/experr b/test/cases/dsl-stats/percentiles/numeric-001/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/percentiles/numeric-001/expout b/test/cases/dsl-stats/percentiles/numeric-001/expout
new file mode 100644
index 000000000..01539222e
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-001/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": 1,
+  "a_max": 1,
+  "a_minlen": 1,
+  "a_maxlen": 1,
+  "a_median": 1,
+  "a_ps": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 1,
+    "50": 1,
+    "75": 1,
+    "90": 1,
+    "99": 1,
+    "100": 1
+  },
+  "a_psi": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 1,
+    "50": 1,
+    "75": 1,
+    "90": 1,
+    "99": 1,
+    "100": 1
+  },
+  "a_psa": [1, 1, 1, 1, 1, 1, 1, 1, 1],
+  "a_psia": [1, 1, 1, 1, 1, 1, 1, 1, 1],
+  "m_min": 1,
+  "m_max": 1,
+  "m_minlen": 1,
+  "m_maxlen": 1,
+  "m_median": 1,
+  "m_ps": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 1,
+    "50": 1,
+    "75": 1,
+    "90": 1,
+    "99": 1,
+    "100": 1
+  },
+  "m_psi": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 1,
+    "50": 1,
+    "75": 1,
+    "90": 1,
+    "99": 1,
+    "100": 1
+  },
+  "m_psa": [1, 1, 1, 1, 1, 1, 1, 1, 1],
+  "m_psia": [1, 1, 1, 1, 1, 1, 1, 1, 1]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/numeric-002/cmd b/test/cases/dsl-stats/percentiles/numeric-002/cmd
new file mode 100644
index 000000000..c749a00ff
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-002/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 2 then put -q -f test/input/test-percentiles.mlr -s field=i
diff --git a/test/cases/dsl-stats/percentiles/numeric-002/experr b/test/cases/dsl-stats/percentiles/numeric-002/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/percentiles/numeric-002/expout b/test/cases/dsl-stats/percentiles/numeric-002/expout
new file mode 100644
index 000000000..fde0fe23b
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-002/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": 1,
+  "a_max": 2,
+  "a_minlen": 1,
+  "a_maxlen": 1,
+  "a_median": 2,
+  "a_ps": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 1,
+    "50": 2,
+    "75": 2,
+    "90": 2,
+    "99": 2,
+    "100": 2
+  },
+  "a_psi": {
+    "0": 1.000000,
+    "1": 1.010000,
+    "10": 1.100000,
+    "25": 1.250000,
+    "50": 1.500000,
+    "75": 1.750000,
+    "90": 1.900000,
+    "99": 1.990000,
+    "100": 2
+  },
+  "a_psa": [1, 1, 1, 1, 2, 2, 2, 2, 2],
+  "a_psia": [1.000000, 1.010000, 1.100000, 1.250000, 1.500000, 1.750000, 1.900000, 1.990000, 2],
+  "m_min": 1,
+  "m_max": 2,
+  "m_minlen": 1,
+  "m_maxlen": 1,
+  "m_median": 2,
+  "m_ps": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 1,
+    "50": 2,
+    "75": 2,
+    "90": 2,
+    "99": 2,
+    "100": 2
+  },
+  "m_psi": {
+    "0": 1.000000,
+    "1": 1.010000,
+    "10": 1.100000,
+    "25": 1.250000,
+    "50": 1.500000,
+    "75": 1.750000,
+    "90": 1.900000,
+    "99": 1.990000,
+    "100": 2
+  },
+  "m_psa": [1, 1, 1, 1, 2, 2, 2, 2, 2],
+  "m_psia": [1.000000, 1.010000, 1.100000, 1.250000, 1.500000, 1.750000, 1.900000, 1.990000, 2]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/numeric-003/cmd b/test/cases/dsl-stats/percentiles/numeric-003/cmd
new file mode 100644
index 000000000..819881139
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-003/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 3 then put -q -f test/input/test-percentiles.mlr -s field=i
diff --git a/test/cases/dsl-stats/percentiles/numeric-003/experr b/test/cases/dsl-stats/percentiles/numeric-003/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/percentiles/numeric-003/expout b/test/cases/dsl-stats/percentiles/numeric-003/expout
new file mode 100644
index 000000000..e1fdea0d7
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-003/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": 1,
+  "a_max": 3,
+  "a_minlen": 1,
+  "a_maxlen": 1,
+  "a_median": 2,
+  "a_ps": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 1,
+    "50": 2,
+    "75": 3,
+    "90": 3,
+    "99": 3,
+    "100": 3
+  },
+  "a_psi": {
+    "0": 1.000000,
+    "1": 1.020000,
+    "10": 1.200000,
+    "25": 1.500000,
+    "50": 2.000000,
+    "75": 2.500000,
+    "90": 2.800000,
+    "99": 2.980000,
+    "100": 3
+  },
+  "a_psa": [1, 1, 1, 1, 2, 3, 3, 3, 3],
+  "a_psia": [1.000000, 1.020000, 1.200000, 1.500000, 2.000000, 2.500000, 2.800000, 2.980000, 3],
+  "m_min": 1,
+  "m_max": 3,
+  "m_minlen": 1,
+  "m_maxlen": 1,
+  "m_median": 2,
+  "m_ps": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 1,
+    "50": 2,
+    "75": 3,
+    "90": 3,
+    "99": 3,
+    "100": 3
+  },
+  "m_psi": {
+    "0": 1.000000,
+    "1": 1.020000,
+    "10": 1.200000,
+    "25": 1.500000,
+    "50": 2.000000,
+    "75": 2.500000,
+    "90": 2.800000,
+    "99": 2.980000,
+    "100": 3
+  },
+  "m_psa": [1, 1, 1, 1, 2, 3, 3, 3, 3],
+  "m_psia": [1.000000, 1.020000, 1.200000, 1.500000, 2.000000, 2.500000, 2.800000, 2.980000, 3]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/numeric-004/cmd b/test/cases/dsl-stats/percentiles/numeric-004/cmd
new file mode 100644
index 000000000..519131232
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-004/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z head -n 4 then put -q -f test/input/test-percentiles.mlr -s field=i
diff --git a/test/cases/dsl-stats/percentiles/numeric-004/experr b/test/cases/dsl-stats/percentiles/numeric-004/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/percentiles/numeric-004/expout b/test/cases/dsl-stats/percentiles/numeric-004/expout
new file mode 100644
index 000000000..677a6f591
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-004/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": 1,
+  "a_max": 4,
+  "a_minlen": 1,
+  "a_maxlen": 1,
+  "a_median": 3,
+  "a_ps": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 2,
+    "50": 3,
+    "75": 4,
+    "90": 4,
+    "99": 4,
+    "100": 4
+  },
+  "a_psi": {
+    "0": 1.000000,
+    "1": 1.030000,
+    "10": 1.300000,
+    "25": 1.750000,
+    "50": 2.500000,
+    "75": 3.250000,
+    "90": 3.700000,
+    "99": 3.970000,
+    "100": 4
+  },
+  "a_psa": [1, 1, 1, 2, 3, 4, 4, 4, 4],
+  "a_psia": [1.000000, 1.030000, 1.300000, 1.750000, 2.500000, 3.250000, 3.700000, 3.970000, 4],
+  "m_min": 1,
+  "m_max": 4,
+  "m_minlen": 1,
+  "m_maxlen": 1,
+  "m_median": 3,
+  "m_ps": {
+    "0": 1,
+    "1": 1,
+    "10": 1,
+    "25": 2,
+    "50": 3,
+    "75": 4,
+    "90": 4,
+    "99": 4,
+    "100": 4
+  },
+  "m_psi": {
+    "0": 1.000000,
+    "1": 1.030000,
+    "10": 1.300000,
+    "25": 1.750000,
+    "50": 2.500000,
+    "75": 3.250000,
+    "90": 3.700000,
+    "99": 3.970000,
+    "100": 4
+  },
+  "m_psa": [1, 1, 1, 2, 3, 4, 4, 4, 4],
+  "m_psia": [1.000000, 1.030000, 1.300000, 1.750000, 2.500000, 3.250000, 3.700000, 3.970000, 4]
+}
+]
diff --git a/test/cases/dsl-stats/percentiles/numeric-all/cmd b/test/cases/dsl-stats/percentiles/numeric-all/cmd
new file mode 100644
index 000000000..2f7f93eb1
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-all/cmd
@@ -0,0 +1 @@
+mlr --ofmtf 6 --ojson --zin --from test/input/medium.z put -q -f test/input/test-percentiles.mlr -s field=i
diff --git a/test/cases/dsl-stats/percentiles/numeric-all/experr b/test/cases/dsl-stats/percentiles/numeric-all/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stats/percentiles/numeric-all/expout b/test/cases/dsl-stats/percentiles/numeric-all/expout
new file mode 100644
index 000000000..703200518
--- /dev/null
+++ b/test/cases/dsl-stats/percentiles/numeric-all/expout
@@ -0,0 +1,62 @@
+[
+{
+  "a_min": 1,
+  "a_max": 10000,
+  "a_minlen": 1,
+  "a_maxlen": 5,
+  "a_median": 5001,
+  "a_ps": {
+    "0": 1,
+    "1": 101,
+    "10": 1001,
+    "25": 2501,
+    "50": 5001,
+    "75": 7501,
+    "90": 9001,
+    "99": 9901,
+    "100": 10000
+  },
+  "a_psi": {
+    "0": 1.000000,
+    "1": 100.990000,
+    "10": 1000.900000,
+    "25": 2500.750000,
+    "50": 5000.500000,
+    "75": 7500.250000,
+    "90": 9000.100000,
+    "99": 9900.010000,
+    "100": 10000
+  },
+  "a_psa": [1, 101, 1001, 2501, 5001, 7501, 9001, 9901, 10000],
+  "a_psia": [1.000000, 100.990000, 1000.900000, 2500.750000, 5000.500000, 7500.250000, 9000.100000, 9900.010000, 10000],
+  "m_min": 1,
+  "m_max": 10000,
+  "m_minlen": 1,
+  "m_maxlen": 5,
+  "m_median": 5001,
+  "m_ps": {
+    "0": 1,
+    "1": 101,
+    "10": 1001,
+    "25": 2501,
+    "50": 5001,
+    "75": 7501,
+    "90": 9001,
+    "99": 9901,
+    "100": 10000
+  },
+  "m_psi": {
+    "0": 1.000000,
+    "1": 100.990000,
+    "10": 1000.900000,
+    "25": 2500.750000,
+    "50": 5000.500000,
+    "75": 7500.250000,
+    "90": 9000.100000,
+    "99": 9900.010000,
+    "100": 10000
+  },
+  "m_psa": [1, 101, 1001, 2501, 5001, 7501, 9001, 9901, 10000],
+  "m_psia": [1.000000, 100.990000, 1000.900000, 2500.750000, 5000.500000, 7500.250000, 9000.100000, 9900.010000, 10000]
+}
+]
diff --git a/test/cases/dsl-stats/sums/README.txt b/test/cases/dsl-stats/sums/README.txt
new file mode 100644
index 000000000..c257842b1
--- /dev/null
+++ b/test/cases/dsl-stats/sums/README.txt
@@ -0,0 +1 @@
+Coverage via unit-test framework, not regression-test framework
diff --git a/test/input/test-moments.mlr b/test/input/test-moments.mlr
new file mode 100644
index 000000000..0f81bce0b
--- /dev/null
+++ b/test/input/test-moments.mlr
@@ -0,0 +1,39 @@
+begin {
+    @a = [];
+    @m = {};
+    @field = "i";
+}
+
+@a[NR] = $[@field];
+@m[NR] = $[@field];
+
+end {
+    outputs = {
+
+        "a_count": count(@a),
+        "a_sum": sum(@a),
+        "a_sum2": sum2(@a),
+        "a_sum3": sum3(@a),
+        "a_sum4": sum4(@a),
+        "a_mean": mean(@a),
+        "a_var": variance(@a),
+        "a_stddev": stddev(@a),
+        "a_meaneb": meaneb(@a),
+        "a_skewness": skewness(@a),
+        "a_kurtosis": kurtosis(@a),
+
+        "m_count": count(@m),
+        "m_sum": sum(@m),
+        "m_sum2": sum2(@m),
+        "m_sum3": sum3(@m),
+        "m_sum4": sum4(@m),
+        "m_mean": mean(@m),
+        "m_var": variance(@m),
+        "m_stddev": stddev(@m),
+        "m_meaneb": meaneb(@m),
+        "m_skewness": skewness(@m),
+        "m_kurtosis": kurtosis(@m),
+
+    };
+    emit outputs;
+}
diff --git a/test/input/test-percentiles.mlr b/test/input/test-percentiles.mlr
new file mode 100644
index 000000000..1c5d807fe
--- /dev/null
+++ b/test/input/test-percentiles.mlr
@@ -0,0 +1,44 @@
+begin {
+    @a = [];
+    @m = {};
+    # @field must be given by put -s field=namegoeshere in the script invocation.
+    # This lets us test percentiles over various field names/types while re-using
+    # this same script.
+}
+
+@a[NR] = $[@field];
+@m[NR] = $[@field];
+
+end {
+    outputs = {
+
+        "a_min": min(@a),
+        "a_max": max(@a),
+        "a_minlen": minlen(@a),
+        "a_maxlen": maxlen(@a),
+        "a_median": median(@a),
+        "a_ps": percentiles(@a, [0,1,10,25,50,75,90,99,100]),
+        "a_psi": percentiles(@a, [0,1,10,25,50,75,90,99,100], {"interpolate_linearly":true}),
+        "a_psa": percentiles(@a, [0,1,10,25,50,75,90,99,100], {"output_array_not_map":true}),
+        "a_psia": percentiles(@a, [0,1,10,25,50,75,90,99,100], {
+            "interpolate_linearly": true,
+            "output_array_not_map":true,
+        }),
+
+        "m_min": min(@m),
+        "m_max": max(@m),
+        "m_minlen": minlen(@m),
+        "m_maxlen": maxlen(@m),
+        "m_median": median(@m),
+        "m_ps": percentiles(@m, [0,1,10,25,50,75,90,99,100]),
+        "m_psi": percentiles(@m, [0,1,10,25,50,75,90,99,100], {"interpolate_linearly":true}),
+        "m_psa": percentiles(@m, [0,1,10,25,50,75,90,99,100], {"output_array_not_map":true}),
+        "m_psia": percentiles(@m, [0,1,10,25,50,75,90,99,100], {
+            "interpolate_linearly": true,
+            "output_array_not_map":true,
+        }),
+
+    };
+    emit outputs;
+}
+

From deb5d692a88a7515949e5ef44348fa1b242f80e6 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 26 Aug 2023 16:23:48 -0400
Subject: [PATCH 039/456] typofixes

---
 .../pkg/dsl/cst/builtin_function_manager.go   | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/internal/pkg/dsl/cst/builtin_function_manager.go b/internal/pkg/dsl/cst/builtin_function_manager.go
index b06695536..9b2b45a22 100644
--- a/internal/pkg/dsl/cst/builtin_function_manager.go
+++ b/internal/pkg/dsl/cst/builtin_function_manager.go
@@ -1060,7 +1060,7 @@ is normally distributed.`,
 		{
 			name:      "mean",
 			class:     FUNC_CLASS_STATS,
-			help:      `Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.`,
+			help:      `Returns the arithmetic mean of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types.`,
 			unaryFunc: bifs.BIF_mean,
 			examples: []string{
 				`mean([4,5,7,10]) is 6.5`,
@@ -1070,7 +1070,7 @@ is normally distributed.`,
 		{
 			name:      "meaneb",
 			class:     FUNC_CLASS_STATS,
-			help:      `Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.`,
+			help:      `Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types.`,
 			unaryFunc: bifs.BIF_meaneb,
 			examples: []string{
 				`meaneb([4,5,7,10]) is 1.3228756`,
@@ -1080,7 +1080,7 @@ is normally distributed.`,
 		{
 			name:      "variance",
 			class:     FUNC_CLASS_STATS,
-			help:      `Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			help:      `Returns the sample variance of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
 			unaryFunc: bifs.BIF_variance,
 			examples: []string{
 				`variance([4,5,9,10,11]) is 9.7`,
@@ -1090,7 +1090,7 @@ is normally distributed.`,
 		{
 			name:      "stddev",
 			class:     FUNC_CLASS_STATS,
-			help:      `Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			help:      `Returns the sample standard deviation of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
 			unaryFunc: bifs.BIF_stddev,
 			examples: []string{
 				`stddev([4,5,9,10,11]) is 3.1144823`,
@@ -1100,7 +1100,7 @@ is normally distributed.`,
 		{
 			name:      "skewness",
 			class:     FUNC_CLASS_STATS,
-			help:      `Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			help:      `Returns the sample skewness of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
 			unaryFunc: bifs.BIF_skewness,
 			examples: []string{
 				`skewness([4,5,9,10,11]) is -0.2097285`,
@@ -1110,7 +1110,7 @@ is normally distributed.`,
 		{
 			name:      "kurtosis",
 			class:     FUNC_CLASS_STATS,
-			help:      `Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			help:      `Returns the sample kurtosis of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
 			unaryFunc: bifs.BIF_kurtosis,
 			examples: []string{
 				`kurtosis([4,5,9,10,11]) is -1.6703688`,
@@ -1120,7 +1120,7 @@ is normally distributed.`,
 		{
 			name:      "minlen",
 			class:     FUNC_CLASS_STATS,
-			help:      `Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			help:      `Returns the minimum string length of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
 			unaryFunc: bifs.BIF_minlen,
 			examples: []string{
 				`minlen(["aรฑo", "alto"]) is 3`,
@@ -1130,7 +1130,7 @@ is normally distributed.`,
 		{
 			name:      "maxlen",
 			class:     FUNC_CLASS_STATS,
-			help:      `Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
+			help:      `Returns the maximum string length of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.`,
 			unaryFunc: bifs.BIF_maxlen,
 			examples: []string{
 				`maxlen(["aรฑo", "alto"]) is 4`,
@@ -1140,7 +1140,7 @@ is normally distributed.`,
 		{
 			name:               "median",
 			class:              FUNC_CLASS_STATS,
-			help:               `Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.`,
+			help:               `Returns the median of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles function for information on optional flags, and on performance for large inputs.`,
 			unaryFunc:          bifs.BIF_median,
 			binaryFunc:         bifs.BIF_median_with_options,
 			hasMultipleArities: true,
@@ -1154,7 +1154,7 @@ is normally distributed.`,
 		{
 			name:               "percentile",
 			class:              FUNC_CLASS_STATS,
-			help:               `Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.`,
+			help:               `Returns the given percentile of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles function for information on optional flags, and on performance for large inputs.`,
 			binaryFunc:         bifs.BIF_percentile,
 			ternaryFunc:        bifs.BIF_percentile_with_options,
 			hasMultipleArities: true,
@@ -1168,7 +1168,7 @@ is normally distributed.`,
 		{
 			name:               "percentiles",
 			class:              FUNC_CLASS_STATS,
-			help:               `Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags.`,
+			help:               `Returns the given percentiles of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags.`,
 			binaryFunc:         bifs.BIF_percentiles,
 			ternaryFunc:        bifs.BIF_percentiles_with_options,
 			hasMultipleArities: true,

From 4cfb0ba1122b306fa90b24025c2e4646098610eb Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 26 Aug 2023 16:30:21 -0400
Subject: [PATCH 040/456] neaten online help for the percentiles function

---
 docs/src/manpage.md                           | 22 +++++++++----------
 docs/src/manpage.txt                          | 22 +++++++++----------
 docs/src/reference-dsl-builtin-functions.md   | 22 +++++++++----------
 .../pkg/dsl/cst/builtin_function_manager.go   | 13 ++++-------
 man/manpage.txt                               | 22 +++++++++----------
 man/mlr.1                                     | 22 +++++++++----------
 6 files changed, 59 insertions(+), 64 deletions(-)

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 5ab08d255..d7ed08295 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -2564,7 +2564,7 @@ MILLER(1)                                                            MILLER(1)
         (class=collections #args=1,2) Converts value to JSON-formatted string. Default output is single-line. With optional second boolean argument set to true, produces multiline output.
 
    1mkurtosis0m
-        (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+        (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
        Example:
        kurtosis([4,5,9,10,11]) is -1.6703688
 
@@ -2639,7 +2639,7 @@ MILLER(1)                                                            MILLER(1)
         (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
 
    1mmaxlen0m
-        (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+        (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
        Example:
        maxlen(["ao", "alto"]) is 4
 
@@ -2647,17 +2647,17 @@ MILLER(1)                                                            MILLER(1)
         (class=hashing #args=1) MD5 hash.
 
    1mmean0m
-        (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+        (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types.
        Example:
        mean([4,5,7,10]) is 6.5
 
    1mmeaneb0m
-        (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+        (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types.
        Example:
        meaneb([4,5,7,10]) is 1.3228756
 
    1mmedian0m
-        (class=stats #args=1,2) Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+        (class=stats #args=1,2) Returns the median of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles function for information on optional flags, and on performance for large inputs.
        Examples:
        median([3,4,5,6,9,10]) is 6
        median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5
@@ -2670,7 +2670,7 @@ MILLER(1)                                                            MILLER(1)
         (class=math #args=variadic) Min of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
 
    1mminlen0m
-        (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+        (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
        Example:
        minlen(["ao", "alto"]) is 3
 
@@ -2721,14 +2721,14 @@ MILLER(1)                                                            MILLER(1)
         (class=system #args=0) Returns the operating-system name as a string.
 
    1mpercentile0m
-        (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+        (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles function for information on optional flags, and on performance for large inputs.
        Examples:
        percentile([3,4,5,6,9,10], 90) is 10
        percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5
        percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi"
 
    1mpercentiles0m
-        (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags.
+        (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags.
        Examples:
 
        Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort
@@ -2873,7 +2873,7 @@ MILLER(1)                                                            MILLER(1)
         (class=math #args=1) Hyperbolic sine.
 
    1mskewness0m
-        (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+        (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
        Example:
        skewness([4,5,9,10,11]) is -0.2097285
 
@@ -2935,7 +2935,7 @@ MILLER(1)                                                            MILLER(1)
        ssub("abc.def", ".", "X") gives "abcXdef"
 
    1mstddev0m
-        (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+        (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
        Example:
        stddev([4,5,9,10,11]) is 3.1144823
 
@@ -3125,7 +3125,7 @@ MILLER(1)                                                            MILLER(1)
        $* = utf8_to_latin1($*)
 
    1mvariance0m
-        (class=stats #args=1) Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+        (class=stats #args=1) Returns the sample variance of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
        Example:
        variance([4,5,9,10,11]) is 9.7
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt
index 666177bee..290fa7c5d 100644
--- a/docs/src/manpage.txt
+++ b/docs/src/manpage.txt
@@ -2543,7 +2543,7 @@ MILLER(1)                                                            MILLER(1)
         (class=collections #args=1,2) Converts value to JSON-formatted string. Default output is single-line. With optional second boolean argument set to true, produces multiline output.
 
    1mkurtosis0m
-        (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+        (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
        Example:
        kurtosis([4,5,9,10,11]) is -1.6703688
 
@@ -2618,7 +2618,7 @@ MILLER(1)                                                            MILLER(1)
         (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
 
    1mmaxlen0m
-        (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+        (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
        Example:
        maxlen(["ao", "alto"]) is 4
 
@@ -2626,17 +2626,17 @@ MILLER(1)                                                            MILLER(1)
         (class=hashing #args=1) MD5 hash.
 
    1mmean0m
-        (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+        (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types.
        Example:
        mean([4,5,7,10]) is 6.5
 
    1mmeaneb0m
-        (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+        (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types.
        Example:
        meaneb([4,5,7,10]) is 1.3228756
 
    1mmedian0m
-        (class=stats #args=1,2) Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+        (class=stats #args=1,2) Returns the median of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles function for information on optional flags, and on performance for large inputs.
        Examples:
        median([3,4,5,6,9,10]) is 6
        median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5
@@ -2649,7 +2649,7 @@ MILLER(1)                                                            MILLER(1)
         (class=math #args=variadic) Min of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
 
    1mminlen0m
-        (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+        (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
        Example:
        minlen(["ao", "alto"]) is 3
 
@@ -2700,14 +2700,14 @@ MILLER(1)                                                            MILLER(1)
         (class=system #args=0) Returns the operating-system name as a string.
 
    1mpercentile0m
-        (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+        (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles function for information on optional flags, and on performance for large inputs.
        Examples:
        percentile([3,4,5,6,9,10], 90) is 10
        percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5
        percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi"
 
    1mpercentiles0m
-        (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags.
+        (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags.
        Examples:
 
        Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort
@@ -2852,7 +2852,7 @@ MILLER(1)                                                            MILLER(1)
         (class=math #args=1) Hyperbolic sine.
 
    1mskewness0m
-        (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+        (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
        Example:
        skewness([4,5,9,10,11]) is -0.2097285
 
@@ -2914,7 +2914,7 @@ MILLER(1)                                                            MILLER(1)
        ssub("abc.def", ".", "X") gives "abcXdef"
 
    1mstddev0m
-        (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+        (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
        Example:
        stddev([4,5,9,10,11]) is 3.1144823
 
@@ -3104,7 +3104,7 @@ MILLER(1)                                                            MILLER(1)
        $* = utf8_to_latin1($*)
 
    1mvariance0m
-        (class=stats #args=1) Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+        (class=stats #args=1) Returns the sample variance of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
        Example:
        variance([4,5,9,10,11]) is 9.7
 
diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md
index 3d24f0984..34a420f0d 100644
--- a/docs/src/reference-dsl-builtin-functions.md
+++ b/docs/src/reference-dsl-builtin-functions.md
@@ -1006,7 +1006,7 @@ distinct_count([1,1.0]) is 2
 
 ### kurtosis
 
-kurtosis  (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+kurtosis  (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
 Example:
 kurtosis([4,5,9,10,11]) is -1.6703688
 
@@ -1014,7 +1014,7 @@ kurtosis([4,5,9,10,11]) is -1.6703688 ### maxlen
-maxlen  (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+maxlen  (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
 Example:
 maxlen(["aรฑo", "alto"]) is 4
 
@@ -1022,7 +1022,7 @@ maxlen(["aรฑo", "alto"]) is 4 ### mean
-mean  (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+mean  (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types.
 Example:
 mean([4,5,7,10]) is 6.5
 
@@ -1030,7 +1030,7 @@ mean([4,5,7,10]) is 6.5 ### meaneb
-meaneb  (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types.
+meaneb  (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types.
 Example:
 meaneb([4,5,7,10]) is 1.3228756
 
@@ -1038,7 +1038,7 @@ meaneb([4,5,7,10]) is 1.3228756 ### median
-median  (class=stats #args=1,2) Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+median  (class=stats #args=1,2) Returns the median of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles function for information on optional flags, and on performance for large inputs.
 Examples:
 median([3,4,5,6,9,10]) is 6
 median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5
@@ -1048,7 +1048,7 @@ median(["abc", "def", "ghi", "ghi"]) is "ghi"
 
 ### minlen
 
-minlen  (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+minlen  (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
 Example:
 minlen(["aรฑo", "alto"]) is 3
 
@@ -1073,7 +1073,7 @@ null_count(["a", "", "c"]) is 1 ### percentile
-percentile  (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs.
+percentile  (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles function for information on optional flags, and on performance for large inputs.
 Examples:
 percentile([3,4,5,6,9,10], 90) is 10
 percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5
@@ -1083,7 +1083,7 @@ percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi"
 
 ### percentiles
 
-percentiles  (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags.
+percentiles  (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags.
 Examples:
 
 Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort
@@ -1135,7 +1135,7 @@ You can also leverage this feature to compute percentiles on a sort of your choo
 
 ### skewness
 
-skewness  (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+skewness  (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
 Example:
 skewness([4,5,9,10,11]) is -0.2097285
 
@@ -1149,7 +1149,7 @@ sort_collection (class=stats #args=1) This is a helper function for the percent ### stddev
-stddev  (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+stddev  (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
 Example:
 stddev([4,5,9,10,11]) is 3.1144823
 
@@ -1189,7 +1189,7 @@ sum4([1,2,3,4,5]) is 979 ### variance
-variance  (class=stats #args=1) Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types.
+variance  (class=stats #args=1) Returns the sample variance of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
 Example:
 variance([4,5,9,10,11]) is 9.7
 
diff --git a/internal/pkg/dsl/cst/builtin_function_manager.go b/internal/pkg/dsl/cst/builtin_function_manager.go index 9b2b45a22..ef474345a 100644 --- a/internal/pkg/dsl/cst/builtin_function_manager.go +++ b/internal/pkg/dsl/cst/builtin_function_manager.go @@ -1174,8 +1174,7 @@ is normally distributed.`, hasMultipleArities: true, examples: []string{ ``, - `Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort`, - `the input before computing percentiles:`, + `Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort the input before computing percentiles:`, ``, ` percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 }`, ` percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" }`, @@ -1184,14 +1183,11 @@ is normally distributed.`, ``, ` percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9]`, ``, - `Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces`, - `,error on string inputs:`, + `Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces error values on string inputs:`, ``, ` percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 }`, ``, - `The percentiles function always sorts its inputs before computing percentiles. If you know your input`, - `is already sorted -- see also the sort_collection function -- then computation will be faster on`, - `large input if you pass in "array_is_sorted":`, + `The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted":`, ``, ` x = [6,5,9,10,4,3]`, ` percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect`, @@ -1203,8 +1199,7 @@ is normally distributed.`, ` Non-sorted input:`, ` x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ")`, ` x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"]`, - ` Percentiles are taken over the original positions of the words in the array -- "dogs" is last`, - ` and hence appears as p99:`, + ` Percentiles are taken over the original positions of the words in the array -- "dogs" is last and hence appears as p99:`, ` percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"]`, ` With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99:`, ` percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]`, diff --git a/man/manpage.txt b/man/manpage.txt index 666177bee..290fa7c5d 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -2543,7 +2543,7 @@ MILLER(1) MILLER(1) (class=collections #args=1,2) Converts value to JSON-formatted string. Default output is single-line. With optional second boolean argument set to true, produces multiline output. 1mkurtosis0m - (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: kurtosis([4,5,9,10,11]) is -1.6703688 @@ -2618,7 +2618,7 @@ MILLER(1) MILLER(1) (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. 1mmaxlen0m - (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: maxlen(["ao", "alto"]) is 4 @@ -2626,17 +2626,17 @@ MILLER(1) MILLER(1) (class=hashing #args=1) MD5 hash. 1mmean0m - (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. + (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. Example: mean([4,5,7,10]) is 6.5 1mmeaneb0m - (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. + (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. Example: meaneb([4,5,7,10]) is 1.3228756 1mmedian0m - (class=stats #args=1,2) Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs. + (class=stats #args=1,2) Returns the median of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles function for information on optional flags, and on performance for large inputs. Examples: median([3,4,5,6,9,10]) is 6 median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5 @@ -2649,7 +2649,7 @@ MILLER(1) MILLER(1) (class=math #args=variadic) Min of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. 1mminlen0m - (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: minlen(["ao", "alto"]) is 3 @@ -2700,14 +2700,14 @@ MILLER(1) MILLER(1) (class=system #args=0) Returns the operating-system name as a string. 1mpercentile0m - (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs. + (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles function for information on optional flags, and on performance for large inputs. Examples: percentile([3,4,5,6,9,10], 90) is 10 percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5 percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi" 1mpercentiles0m - (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags. + (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags. Examples: Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort @@ -2852,7 +2852,7 @@ MILLER(1) MILLER(1) (class=math #args=1) Hyperbolic sine. 1mskewness0m - (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: skewness([4,5,9,10,11]) is -0.2097285 @@ -2914,7 +2914,7 @@ MILLER(1) MILLER(1) ssub("abc.def", ".", "X") gives "abcXdef" 1mstddev0m - (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: stddev([4,5,9,10,11]) is 3.1144823 @@ -3104,7 +3104,7 @@ MILLER(1) MILLER(1) $* = utf8_to_latin1($*) 1mvariance0m - (class=stats #args=1) Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + (class=stats #args=1) Returns the sample variance of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: variance([4,5,9,10,11]) is 9.7 diff --git a/man/mlr.1 b/man/mlr.1 index 91d501b6b..c7313352b 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -3742,7 +3742,7 @@ joinv({"a":3,"b":4,"c":5}, ",") = "3,4,5" .RS 0 .\} .nf - (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + (class=stats #args=1) Returns the sample kurtosis of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: kurtosis([4,5,9,10,11]) is -1.6703688 .fi @@ -3931,7 +3931,7 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906" .RS 0 .\} .nf - (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + (class=stats #args=1) Returns the maximum string length of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: maxlen(["aรฑo", "alto"]) is 4 .fi @@ -3951,7 +3951,7 @@ maxlen(["aรฑo", "alto"]) is 4 .RS 0 .\} .nf - (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. + (class=stats #args=1) Returns the arithmetic mean of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. Example: mean([4,5,7,10]) is 6.5 .fi @@ -3962,7 +3962,7 @@ mean([4,5,7,10]) is 6.5 .RS 0 .\} .nf - (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. + (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. Example: meaneb([4,5,7,10]) is 1.3228756 .fi @@ -3973,7 +3973,7 @@ meaneb([4,5,7,10]) is 1.3228756 .RS 0 .\} .nf - (class=stats #args=1,2) Returns the median of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs. + (class=stats #args=1,2) Returns the median of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles function for information on optional flags, and on performance for large inputs. Examples: median([3,4,5,6,9,10]) is 6 median([3,4,5,6,9,10],{"interpolate_linearly":true}) is 5.5 @@ -4004,7 +4004,7 @@ median(["abc", "def", "ghi", "ghi"]) is "ghi" .RS 0 .\} .nf - (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + (class=stats #args=1) Returns the minimum string length of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: minlen(["aรฑo", "alto"]) is 3 .fi @@ -4115,7 +4115,7 @@ null_count(["a", "", "c"]) is 1 .RS 0 .\} .nf - (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles for information on optional flags, and on performance for large inputs. + (class=stats #args=2,3) Returns the given percentile of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. Please see the percentiles function for information on optional flags, and on performance for large inputs. Examples: percentile([3,4,5,6,9,10], 90) is 10 percentile([3,4,5,6,9,10], 90, {"interpolate_linearly":true}) is 9.5 @@ -4128,7 +4128,7 @@ percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi" .RS 0 .\} .nf - (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns "" AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags. + (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags. Examples: Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort @@ -4411,7 +4411,7 @@ Map example: select({"a":1, "b":3, "c":5}, func(k,v) {return v >= 3}) returns {" .RS 0 .\} .nf - (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + (class=stats #args=1) Returns the sample skewness of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: skewness([4,5,9,10,11]) is -0.2097285 .fi @@ -4539,7 +4539,7 @@ ssub("abc.def", ".", "X") gives "abcXdef" .RS 0 .\} .nf - (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: stddev([4,5,9,10,11]) is 3.1144823 .fi @@ -4975,7 +4975,7 @@ $* = utf8_to_latin1($*) .RS 0 .\} .nf - (class=stats #args=1) Returns the sample variance of values in an array or map. Returns "" AKA void for array/map of length less than two; returns error for non-array/non-map types. + (class=stats #args=1) Returns the sample variance of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: variance([4,5,9,10,11]) is 9.7 .fi From 077fc3702dd617c33bf03afa9704f64e068ee37f Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 26 Aug 2023 16:41:37 -0400 Subject: [PATCH 041/456] more doc-neatens for percentiles on-line help --- Makefile | 3 ++- docs/src/reference-dsl-builtin-functions.md | 26 +++++++++++-------- .../pkg/dsl/cst/builtin_function_manager.go | 15 ++++++++--- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index b5d7a6760..60c7ce578 100644 --- a/Makefile +++ b/Makefile @@ -98,7 +98,8 @@ dev: make -C docs @echo DONE -docs: +docs: build + make -C docs/src forcebuild make -C docs # ---------------------------------------------------------------- diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 34a420f0d..718e78e49 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -1086,8 +1086,7 @@ percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi" percentiles (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags. Examples: -Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort -the input before computing percentiles: +Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort the input before computing percentiles: percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 } percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" } @@ -1096,36 +1095,41 @@ Use "output_array_not_map" (or shorthand "oa") to get the outputs as an array: percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9] -Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces -,error on string inputs: +Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces error values on string inputs: percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 } -The percentiles function always sorts its inputs before computing percentiles. If you know your input -is already sorted -- see also the sort_collection function -- then computation will be faster on -large input if you pass in "array_is_sorted": +The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais": x = [6,5,9,10,4,3] - percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect + percentiles(x, [25,75], {"ais":true}) gives { "25": 5, "75": 4 } which is incorrect x = sort_collection(x) - percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct + percentiles(x, [25,75], {"ais":true}) gives { "25": 4, "75": 9 } which is correct You can also leverage this feature to compute percentiles on a sort of your choosing. For example: Non-sorted input: + x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ") x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"] - Percentiles are taken over the original positions of the words in the array -- "dogs" is last - and hence appears as p99: + + Percentiles are taken over the original positions of the words in the array -- "dogs" is last and hence appears as p99: + percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"] + With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99: + percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + With default sorting done outside percentiles, the same: + x = sort(x) # or x = sort_collection(x) x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"] percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"] percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + Now sorting by word length, "loquaciously" is longest and hence is the p99: + x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } ) x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"] percentiles(x, [50, 99], {"oa":true, "ais":true}) diff --git a/internal/pkg/dsl/cst/builtin_function_manager.go b/internal/pkg/dsl/cst/builtin_function_manager.go index ef474345a..2ea781c98 100644 --- a/internal/pkg/dsl/cst/builtin_function_manager.go +++ b/internal/pkg/dsl/cst/builtin_function_manager.go @@ -1187,28 +1187,37 @@ is normally distributed.`, ``, ` percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 }`, ``, - `The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted":`, + `The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais":`, ``, ` x = [6,5,9,10,4,3]`, - ` percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect`, + ` percentiles(x, [25,75], {"ais":true}) gives { "25": 5, "75": 4 } which is incorrect`, ` x = sort_collection(x)`, - ` percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct`, + ` percentiles(x, [25,75], {"ais":true}) gives { "25": 4, "75": 9 } which is correct`, ``, `You can also leverage this feature to compute percentiles on a sort of your choosing. For example:`, ``, ` Non-sorted input:`, + ``, ` x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ")`, ` x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"]`, + ``, ` Percentiles are taken over the original positions of the words in the array -- "dogs" is last and hence appears as p99:`, + ``, ` percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"]`, + ``, ` With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99:`, + ``, ` percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]`, + ``, ` With default sorting done outside percentiles, the same:`, + ``, ` x = sort(x) # or x = sort_collection(x)`, ` x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"]`, ` percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"]`, ` percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"]`, + ``, ` Now sorting by word length, "loquaciously" is longest and hence is the p99:`, + ``, ` x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } )`, ` x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"]`, ` percentiles(x, [50, 99], {"oa":true, "ais":true})`, From 44e3a6237345f2a942b50158b427090ca7fc3b28 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 26 Aug 2023 16:44:39 -0400 Subject: [PATCH 042/456] typofix --- internal/pkg/dsl/cst/builtin_function_manager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/pkg/dsl/cst/builtin_function_manager.go b/internal/pkg/dsl/cst/builtin_function_manager.go index 2ea781c98..5499ec8fd 100644 --- a/internal/pkg/dsl/cst/builtin_function_manager.go +++ b/internal/pkg/dsl/cst/builtin_function_manager.go @@ -1070,7 +1070,7 @@ is normally distributed.`, { name: "meaneb", class: FUNC_CLASS_STATS, - help: `Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types.`, + help: `Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.`, unaryFunc: bifs.BIF_meaneb, examples: []string{ `meaneb([4,5,7,10]) is 1.3228756`, From fb3e3d15cd17ccd40adf4f99171a26a38c36d229 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 26 Aug 2023 16:47:19 -0400 Subject: [PATCH 043/456] make dev --- docs/src/manpage.md | 28 ++++++++++++--------- docs/src/manpage.txt | 28 ++++++++++++--------- docs/src/reference-dsl-builtin-functions.md | 2 +- man/manpage.txt | 28 ++++++++++++--------- man/mlr.1 | 28 ++++++++++++--------- 5 files changed, 65 insertions(+), 49 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index d7ed08295..aef0df1f3 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -2652,7 +2652,7 @@ MILLER(1) MILLER(1) mean([4,5,7,10]) is 6.5 1mmeaneb0m - (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. + (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: meaneb([4,5,7,10]) is 1.3228756 @@ -2731,8 +2731,7 @@ MILLER(1) MILLER(1) (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags. Examples: - Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort - the input before computing percentiles: + Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort the input before computing percentiles: percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 } percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" } @@ -2741,36 +2740,41 @@ MILLER(1) MILLER(1) percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9] - Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces - ,error on string inputs: + Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces error values on string inputs: percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 } - The percentiles function always sorts its inputs before computing percentiles. If you know your input - is already sorted -- see also the sort_collection function -- then computation will be faster on - large input if you pass in "array_is_sorted": + The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais": x = [6,5,9,10,4,3] - percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect + percentiles(x, [25,75], {"ais":true}) gives { "25": 5, "75": 4 } which is incorrect x = sort_collection(x) - percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct + percentiles(x, [25,75], {"ais":true}) gives { "25": 4, "75": 9 } which is correct You can also leverage this feature to compute percentiles on a sort of your choosing. For example: Non-sorted input: + x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ") x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"] - Percentiles are taken over the original positions of the words in the array -- "dogs" is last - and hence appears as p99: + + Percentiles are taken over the original positions of the words in the array -- "dogs" is last and hence appears as p99: + percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"] + With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99: + percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + With default sorting done outside percentiles, the same: + x = sort(x) # or x = sort_collection(x) x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"] percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"] percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + Now sorting by word length, "loquaciously" is longest and hence is the p99: + x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } ) x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"] percentiles(x, [50, 99], {"oa":true, "ais":true}) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 290fa7c5d..a4bb52049 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -2631,7 +2631,7 @@ MILLER(1) MILLER(1) mean([4,5,7,10]) is 6.5 1mmeaneb0m - (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. + (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: meaneb([4,5,7,10]) is 1.3228756 @@ -2710,8 +2710,7 @@ MILLER(1) MILLER(1) (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags. Examples: - Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort - the input before computing percentiles: + Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort the input before computing percentiles: percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 } percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" } @@ -2720,36 +2719,41 @@ MILLER(1) MILLER(1) percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9] - Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces - ,error on string inputs: + Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces error values on string inputs: percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 } - The percentiles function always sorts its inputs before computing percentiles. If you know your input - is already sorted -- see also the sort_collection function -- then computation will be faster on - large input if you pass in "array_is_sorted": + The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais": x = [6,5,9,10,4,3] - percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect + percentiles(x, [25,75], {"ais":true}) gives { "25": 5, "75": 4 } which is incorrect x = sort_collection(x) - percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct + percentiles(x, [25,75], {"ais":true}) gives { "25": 4, "75": 9 } which is correct You can also leverage this feature to compute percentiles on a sort of your choosing. For example: Non-sorted input: + x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ") x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"] - Percentiles are taken over the original positions of the words in the array -- "dogs" is last - and hence appears as p99: + + Percentiles are taken over the original positions of the words in the array -- "dogs" is last and hence appears as p99: + percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"] + With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99: + percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + With default sorting done outside percentiles, the same: + x = sort(x) # or x = sort_collection(x) x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"] percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"] percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + Now sorting by word length, "loquaciously" is longest and hence is the p99: + x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } ) x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"] percentiles(x, [50, 99], {"oa":true, "ais":true}) diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 718e78e49..8f0ba84c1 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -1030,7 +1030,7 @@ mean([4,5,7,10]) is 6.5 ### meaneb
-meaneb  (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types.
+meaneb  (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
 Example:
 meaneb([4,5,7,10]) is 1.3228756
 
diff --git a/man/manpage.txt b/man/manpage.txt index 290fa7c5d..a4bb52049 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -2631,7 +2631,7 @@ MILLER(1) MILLER(1) mean([4,5,7,10]) is 6.5 1mmeaneb0m - (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. + (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: meaneb([4,5,7,10]) is 1.3228756 @@ -2710,8 +2710,7 @@ MILLER(1) MILLER(1) (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags. Examples: - Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort - the input before computing percentiles: + Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort the input before computing percentiles: percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 } percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" } @@ -2720,36 +2719,41 @@ MILLER(1) MILLER(1) percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9] - Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces - ,error on string inputs: + Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces error values on string inputs: percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 } - The percentiles function always sorts its inputs before computing percentiles. If you know your input - is already sorted -- see also the sort_collection function -- then computation will be faster on - large input if you pass in "array_is_sorted": + The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais": x = [6,5,9,10,4,3] - percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect + percentiles(x, [25,75], {"ais":true}) gives { "25": 5, "75": 4 } which is incorrect x = sort_collection(x) - percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct + percentiles(x, [25,75], {"ais":true}) gives { "25": 4, "75": 9 } which is correct You can also leverage this feature to compute percentiles on a sort of your choosing. For example: Non-sorted input: + x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ") x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"] - Percentiles are taken over the original positions of the words in the array -- "dogs" is last - and hence appears as p99: + + Percentiles are taken over the original positions of the words in the array -- "dogs" is last and hence appears as p99: + percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"] + With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99: + percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + With default sorting done outside percentiles, the same: + x = sort(x) # or x = sort_collection(x) x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"] percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"] percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + Now sorting by word length, "loquaciously" is longest and hence is the p99: + x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } ) x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"] percentiles(x, [50, 99], {"oa":true, "ais":true}) diff --git a/man/mlr.1 b/man/mlr.1 index c7313352b..702d51667 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -3962,7 +3962,7 @@ mean([4,5,7,10]) is 6.5 .RS 0 .\} .nf - (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. + (class=stats #args=1) Returns the error bar for arithmetic mean of values in an array or map, assuming the values are independent and identically distributed. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: meaneb([4,5,7,10]) is 1.3228756 .fi @@ -4131,8 +4131,7 @@ percentile(["abc", "def", "ghi", "ghi"], 90) is "ghi" (class=stats #args=2,3) Returns the given percentiles of values in an array or map. Returns empty string AKA void for empty array/map; returns error for non-array/non-map types. See examples for information on the three option flags. Examples: -Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort -the input before computing percentiles: +Defaults are to not interpolate linearly, to produce a map keyed by percentile name, and to sort the input before computing percentiles: percentiles([3,4,5,6,9,10], [25,75]) is { "25": 4, "75": 9 } percentiles(["abc", "def", "ghi", "ghi"], [25,75]) is { "25": "def", "75": "ghi" } @@ -4141,36 +4140,41 @@ Use "output_array_not_map" (or shorthand "oa") to get the outputs as an array: percentiles([3,4,5,6,9,10], [25,75], {"output_array_not_map":true}) is [4, 9] -Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces -,error on string inputs: +Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- note this produces error values on string inputs: percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 } -The percentiles function always sorts its inputs before computing percentiles. If you know your input -is already sorted -- see also the sort_collection function -- then computation will be faster on -large input if you pass in "array_is_sorted": +The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais": x = [6,5,9,10,4,3] - percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 5, "75": 4 } which is incorrect + percentiles(x, [25,75], {"ais":true}) gives { "25": 5, "75": 4 } which is incorrect x = sort_collection(x) - percentiles(x, [25,75], {"array_is_sorted":true}) gives { "25": 4, "75": 9 } which is correct + percentiles(x, [25,75], {"ais":true}) gives { "25": 4, "75": 9 } which is correct You can also leverage this feature to compute percentiles on a sort of your choosing. For example: Non-sorted input: + x = splitax("the quick brown fox jumped loquaciously over the lazy dogs", " ") x is: ["the", "quick", "brown", "fox", "jumped", "loquaciously", "over", "the", "lazy", "dogs"] - Percentiles are taken over the original positions of the words in the array -- "dogs" is last - and hence appears as p99: + + Percentiles are taken over the original positions of the words in the array -- "dogs" is last and hence appears as p99: + percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "dogs"] + With sorting done inside percentiles, "the" is alphabetically last and is therefore the p99: + percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + With default sorting done outside percentiles, the same: + x = sort(x) # or x = sort_collection(x) x is: ["brown", "dogs", "fox", "jumped", "lazy", "loquaciously", "over", "quick", "the", "the"] percentiles(x, [50, 99], {"oa":true, "ais":true}) gives ["loquaciously", "the"] percentiles(x, [50, 99], {"oa":true}) gives ["loquaciously", "the"] + Now sorting by word length, "loquaciously" is longest and hence is the p99: + x = sort(x, func(a,b) { return strlen(a) <=> strlen(b) } ) x is: ["fox", "the", "the", "dogs", "lazy", "over", "brown", "quick", "jumped", "loquaciously"] percentiles(x, [50, 99], {"oa":true, "ais":true}) From fccb7c63bbf66d20749a1f60e879d833fa044f00 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 26 Aug 2023 16:51:44 -0400 Subject: [PATCH 044/456] doc-neaten --- docs/src/dkvp-examples.md | 2 -- docs/src/reference-dsl-builtin-functions.md | 1 - docs/src/reference-dsl-syntax.md | 1 - docs/src/reference-main-flag-list.md | 1 - 4 files changed, 5 deletions(-) diff --git a/docs/src/dkvp-examples.md b/docs/src/dkvp-examples.md index da29db4c3..2f3e3b510 100644 --- a/docs/src/dkvp-examples.md +++ b/docs/src/dkvp-examples.md @@ -251,7 +251,6 @@ a=eks,b=pan,i=2,y=0.522151,ab=ekspan,iy=2.522151,ta=String,tb=String,ti=Integer, a=wye,b=wye,i=3,y=0.338318,ab=wyewye,iy=3.338318,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float a=eks,b=wye,i=4,y=0.134188,ab=ekswye,iy=4.134188,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float a=wye,b=pan,i=5,y=0.863624,ab=wyepan,iy=5.863624,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float -/System/Library/Frameworks/Ruby.framework/Versions/2.6/usr/lib/ruby/2.6.0/universal-darwin22/rbconfig.rb:21: warning: Insecure world writable dir /usr/local/bin in PATH, mode 040777
Run as-is, then pipe to Miller for pretty-printing: @@ -266,5 +265,4 @@ eks pan 2 0.522151 ekspan 2.522151 String String Integer Float String Float wye wye 3 0.338318 wyewye 3.338318 String String Integer Float String Float eks wye 4 0.134188 ekswye 4.134188 String String Integer Float String Float wye pan 5 0.863624 wyepan 5.863624 String String Integer Float String Float -/System/Library/Frameworks/Ruby.framework/Versions/2.6/usr/lib/ruby/2.6.0/universal-darwin22/rbconfig.rb:21: warning: Insecure world writable dir /usr/local/bin in PATH, mode 040777
diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 8f0ba84c1..22d3a68d1 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -1991,4 +1991,3 @@ is_string (class=typing #args=1) True if field is present with string (includin typeof (class=typing #args=1) Convert argument to type of argument (e.g. "str"). For debug.
-/System/Library/Frameworks/Ruby.framework/Versions/2.6/usr/lib/ruby/2.6.0/universal-darwin22/rbconfig.rb:21: warning: Insecure world writable dir /usr/local/bin in PATH, mode 040777 diff --git a/docs/src/reference-dsl-syntax.md b/docs/src/reference-dsl-syntax.md index cf1b4bc78..f2a8b45cb 100644 --- a/docs/src/reference-dsl-syntax.md +++ b/docs/src/reference-dsl-syntax.md @@ -35,7 +35,6 @@ i j k 7 8 15 8 9 17 9 10 19 -/System/Library/Frameworks/Ruby.framework/Versions/2.6/usr/lib/ruby/2.6.0/universal-darwin22/rbconfig.rb:21: warning: Insecure world writable dir /usr/local/bin in PATH, mode 040777
Newlines within the expression are ignored, which can help increase legibility of complex expressions: diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index f688bdd82..8e2daf9d0 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -495,4 +495,3 @@ Notes about all other separators: * `--repifs`: Let IFS be repeated: e.g. for splitting on multiple spaces. * `--rs {string}`: Specify RS for input and output. -/System/Library/Frameworks/Ruby.framework/Versions/2.6/usr/lib/ruby/2.6.0/universal-darwin22/rbconfig.rb:21: warning: Insecure world writable dir /usr/local/bin in PATH, mode 040777 From 069c068298143146b1c6d97eaed35a6da6474eba Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 26 Aug 2023 21:24:34 -0400 Subject: [PATCH 045/456] Summing up empty data (#1370) * empty plus value is value * unit-test cases * make-docs output * docs files * on-line table for null-handling arithmetic rules * doc mods --- docs/src/manpage.md | 2 +- docs/src/manpage.txt | 2 +- docs/src/missings.csv | 4 ++ docs/src/missings.json | 5 ++ docs/src/reference-main-null-data.md | 68 ++++++++++++++++--- docs/src/reference-main-null-data.md.in | 31 ++++++++- docs/src/split_circle.csv | 4 ++ docs/src/split_square.csv | 5 ++ docs/src/split_triangle.csv | 4 ++ internal/pkg/bifs/arithmetic.go | 52 +++++++------- internal/pkg/bifs/base.go | 5 ++ internal/pkg/terminals/help/entry.go | 15 +++- man/manpage.txt | 2 +- man/mlr.1 | 4 +- test/cases/dsl-absent-empty/0009/expout | 4 +- test/cases/dsl-absent-empty/0011/expout | 22 +++--- test/cases/dsl-absent-empty/0013/expout | 4 +- test/cases/dsl-absent-empty/0015/expout | 22 +++--- test/cases/dsl-absent-empty/0018/expout | 6 +- test/cases/dsl-absent-empty/0019/expout | 6 +- test/cases/dsl-absent-empty/0024/expout | 2 +- test/cases/dsl-absent-empty/0025/expout | 2 +- .../cases/dsl-null-empty-handling/0005/expout | 2 +- .../cases/dsl-null-empty-handling/0018/expout | 4 +- .../cases/dsl-null-empty-handling/0019/expout | 10 +-- .../cases/dsl-null-empty-handling/0020/expout | 10 +-- .../cases/dsl-null-empty-handling/0021/expout | 14 ++-- .../cases/dsl-null-empty-handling/0022/expout | 4 +- .../cases/dsl-null-empty-handling/0023/expout | 10 +-- .../cases/dsl-null-empty-handling/0024/expout | 10 +-- .../cases/dsl-null-empty-handling/0025/expout | 14 ++-- .../cases/dsl-null-empty-handling/0026/expout | 4 +- .../cases/dsl-null-empty-handling/0027/expout | 10 +-- .../cases/dsl-null-empty-handling/0028/expout | 10 +-- .../cases/dsl-null-empty-handling/0029/expout | 14 ++-- 35 files changed, 249 insertions(+), 138 deletions(-) create mode 100644 docs/src/missings.csv create mode 100644 docs/src/missings.json create mode 100644 docs/src/split_circle.csv create mode 100644 docs/src/split_square.csv create mode 100644 docs/src/split_triangle.csv diff --git a/docs/src/manpage.md b/docs/src/manpage.md index aef0df1f3..831530aec 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -3634,5 +3634,5 @@ MILLER(1) MILLER(1) - 2023-08-26 MILLER(1) + 2023-08-27 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index a4bb52049..43f2ed386 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -3613,4 +3613,4 @@ MILLER(1) MILLER(1) - 2023-08-26 MILLER(1) + 2023-08-27 MILLER(1) diff --git a/docs/src/missings.csv b/docs/src/missings.csv new file mode 100644 index 000000000..22abbc042 --- /dev/null +++ b/docs/src/missings.csv @@ -0,0 +1,4 @@ +a,x,z,w +red,7,, +green,,242,zdatsyg +blue,9,, diff --git a/docs/src/missings.json b/docs/src/missings.json new file mode 100644 index 000000000..d1dfb13ad --- /dev/null +++ b/docs/src/missings.json @@ -0,0 +1,5 @@ +[ + { "a": "red", "x": 7 }, + { "a": "green", "z": 242, "w": "zdatsyg" }, + { "a": "blue", "x": 9 } +] diff --git a/docs/src/reference-main-null-data.md b/docs/src/reference-main-null-data.md index 6ddd2518f..0de7c67c9 100644 --- a/docs/src/reference-main-null-data.md +++ b/docs/src/reference-main-null-data.md @@ -93,7 +93,7 @@ a=1,b=8 x=9,b=10 -* Functions/operators which have one or more *empty* arguments produce empty output: e.g. +* Most functions/operators which have one or more *empty* arguments produce empty output: e.g.
 echo 'x=2,y=3' | mlr put '$a=$x+$y'
@@ -106,7 +106,7 @@ x=2,y=3,a=5
 echo 'x=,y=3' | mlr put '$a=$x+$y'
 
-x=,y=3,a=
+x=,y=3,a=3
 
@@ -125,6 +125,55 @@ with the exception that the `min` and `max` functions are special: if one argume
 x=,y=3,a=3,b=
 
+Likewise, empty works like 0 for addition and subtraction, and multiplication: + +
+echo 'x=,y=3' | mlr put '$a = $x + $y; $b = $x - $y; $c = $x * $y'
+
+
+x=,y=3,a=3,b=-3,c=3
+
+ +This is intended to follow the arithmetic rule for absent data (explained next). In particular: + +* For file formats allowing for heterogeneity in keys, e.g. JSON, you should be able to keep a running sum of some field, say `$x`. If a given record doesn't have `$x`, then `$x` will be absent for that record, and the sum should simply continue. +* For CSV and TSV, which don't allow for hetrogeneity in keys, the _only_ way a value can be missing is to be empty. Here, if a given record doesn't have `$x`, then `$x` will be empty for that record, and the sum should simply continue. + +
+cat missings.json
+
+
+[
+  { "a": "red",   "x": 7 },
+  { "a": "green", "z": 242, "w": "zdatsyg" },
+  { "a": "blue",  "x": 9 }
+]
+
+ +
+mlr --ijson --from missings.json put -q 'begin { @sum = 0 } @sum += $x; end { print @sum }'
+
+
+16
+
+ +
+cat missings.csv
+
+
+a,x,z,w
+red,7,,
+green,,242,zdatsyg
+blue,9,,
+
+ +
+mlr --icsv --from missings.csv put -q 'begin { @sum = 0 } @sum += $x; end { print @sum }'
+
+
+16
+
+ * Functions of *absent* variables (e.g. `mlr put '$y = log10($nonesuch)'`) evaluate to absent, and arithmetic/bitwise/boolean operators with both operands being absent evaluate to absent. Arithmetic operators with one absent operand return the other operand. More specifically, absent values act like zero for addition/subtraction, and one for multiplication: Furthermore, **any expression which evaluates to absent is not stored in the left-hand side of an assignment statement**:
@@ -145,8 +194,6 @@ x=2,y=3,a=2,b=3
 
 The reasoning is as follows:
 
-* Empty values are explicit in the data so they should explicitly affect accumulations: `mlr put '@sum += $x'` should accumulate numeric `x` values into the sum but an empty `x`, when encountered in the input data stream, should make the sum non-numeric. To work around this you can use the `is_not_null` function as follows: `mlr put 'is_not_null($x) { @sum += $x }'`
-
 * Absent stream-record values should not break accumulations, since Miller by design handles heterogeneous data: the running `@sum` in `mlr put '@sum += $x'` should not be invalidated for records which have no `x`.
 
 * Absent out-of-stream-variable values are precisely what allow you to write `mlr put '@sum += $x'`. Otherwise you would have to write `mlr put 'begin{@sum = 0}; @sum += $x'` -- which is tolerable -- but for `mlr put 'begin{...}; @sum[$a][$b] += $x'` you'd have to pre-initialize `@sum` for all values of `$a` and `$b` in your input data stream, which is intolerable.
@@ -198,10 +245,11 @@ If you're interested in a formal description of how empty and absent fields part
 mlr help type-arithmetic-info
 
-(+)        | 1          2.5        (absent)   (error)   
-------     + ------     ------     ------     ------    
-1          | 2          3.5        1          (error)   
-2.5        | 3.5        5          2.5        (error)   
-(absent)   | 1          2.5        (absent)   (error)   
-(error)    | (error)    (error)    (error)    (error)   
+(+)        | 1          2.5       (empty)    (absent)   (error)   
+------     + ------     ------     ------     ------     ------    
+1          | 2          3.5        1          1          (error)   
+2.5        | 3.5        5          2.5        2.5        (error)   
+(empty)    | 1          2.5        (empty)    (absent)   (error)   
+(absent)   | 1          2.5        (absent)   (absent)   (error)   
+(error)    | (error)    (error)    (error)    (error)    (error)   
 
diff --git a/docs/src/reference-main-null-data.md.in b/docs/src/reference-main-null-data.md.in index 4a9b44616..381a46522 100644 --- a/docs/src/reference-main-null-data.md.in +++ b/docs/src/reference-main-null-data.md.in @@ -34,7 +34,7 @@ GENMD-RUN-COMMAND mlr sort -nr a data/sort-null.dat GENMD-EOF -* Functions/operators which have one or more *empty* arguments produce empty output: e.g. +* Most functions/operators which have one or more *empty* arguments produce empty output: e.g. GENMD-RUN-COMMAND echo 'x=2,y=3' | mlr put '$a=$x+$y' @@ -54,6 +54,33 @@ GENMD-RUN-COMMAND echo 'x=,y=3' | mlr put '$a=min($x,$y);$b=max($x,$y)' GENMD-EOF +Likewise, empty works like 0 for addition and subtraction, and multiplication: + +GENMD-RUN-COMMAND +echo 'x=,y=3' | mlr put '$a = $x + $y; $b = $x - $y; $c = $x * $y' +GENMD-EOF + +This is intended to follow the arithmetic rule for absent data (explained next). In particular: + +* For file formats allowing for heterogeneity in keys, e.g. JSON, you should be able to keep a running sum of some field, say `$x`. If a given record doesn't have `$x`, then `$x` will be absent for that record, and the sum should simply continue. +* For CSV and TSV, which don't allow for hetrogeneity in keys, the _only_ way a value can be missing is to be empty. Here, if a given record doesn't have `$x`, then `$x` will be empty for that record, and the sum should simply continue. + +GENMD-RUN-COMMAND +cat missings.json +GENMD-EOF + +GENMD-RUN-COMMAND +mlr --ijson --from missings.json put -q 'begin { @sum = 0 } @sum += $x; end { print @sum }' +GENMD-EOF + +GENMD-RUN-COMMAND +cat missings.csv +GENMD-EOF + +GENMD-RUN-COMMAND +mlr --icsv --from missings.csv put -q 'begin { @sum = 0 } @sum += $x; end { print @sum }' +GENMD-EOF + * Functions of *absent* variables (e.g. `mlr put '$y = log10($nonesuch)'`) evaluate to absent, and arithmetic/bitwise/boolean operators with both operands being absent evaluate to absent. Arithmetic operators with one absent operand return the other operand. More specifically, absent values act like zero for addition/subtraction, and one for multiplication: Furthermore, **any expression which evaluates to absent is not stored in the left-hand side of an assignment statement**: GENMD-RUN-COMMAND @@ -68,8 +95,6 @@ GENMD-EOF The reasoning is as follows: -* Empty values are explicit in the data so they should explicitly affect accumulations: `mlr put '@sum += $x'` should accumulate numeric `x` values into the sum but an empty `x`, when encountered in the input data stream, should make the sum non-numeric. To work around this you can use the `is_not_null` function as follows: `mlr put 'is_not_null($x) { @sum += $x }'` - * Absent stream-record values should not break accumulations, since Miller by design handles heterogeneous data: the running `@sum` in `mlr put '@sum += $x'` should not be invalidated for records which have no `x`. * Absent out-of-stream-variable values are precisely what allow you to write `mlr put '@sum += $x'`. Otherwise you would have to write `mlr put 'begin{@sum = 0}; @sum += $x'` -- which is tolerable -- but for `mlr put 'begin{...}; @sum[$a][$b] += $x'` you'd have to pre-initialize `@sum` for all values of `$a` and `$b` in your input data stream, which is intolerable. diff --git a/docs/src/split_circle.csv b/docs/src/split_circle.csv new file mode 100644 index 000000000..6ea6a0a93 --- /dev/null +++ b/docs/src/split_circle.csv @@ -0,0 +1,4 @@ +color,shape,flag,k,index,quantity,rate +red,circle,true,3,16,13.8103,2.9010 +yellow,circle,true,8,73,63.9785,4.2370 +yellow,circle,true,9,87,63.5058,8.3350 diff --git a/docs/src/split_square.csv b/docs/src/split_square.csv new file mode 100644 index 000000000..122663bfe --- /dev/null +++ b/docs/src/split_square.csv @@ -0,0 +1,5 @@ +color,shape,flag,k,index,quantity,rate +red,square,true,2,15,79.2778,0.0130 +red,square,false,4,48,77.5542,7.4670 +red,square,false,6,64,77.1991,9.5310 +purple,square,false,10,91,72.3735,8.2430 diff --git a/docs/src/split_triangle.csv b/docs/src/split_triangle.csv new file mode 100644 index 000000000..70bce77e6 --- /dev/null +++ b/docs/src/split_triangle.csv @@ -0,0 +1,4 @@ +color,shape,flag,k,index,quantity,rate +yellow,triangle,true,1,11,43.6498,9.8870 +purple,triangle,false,5,51,81.2290,8.5910 +purple,triangle,false,7,65,80.1405,5.8240 diff --git a/internal/pkg/bifs/arithmetic.go b/internal/pkg/bifs/arithmetic.go index 86f6d1e7f..871874e7e 100644 --- a/internal/pkg/bifs/arithmetic.go +++ b/internal/pkg/bifs/arithmetic.go @@ -14,7 +14,7 @@ var upos_dispositions = [mlrval.MT_DIM]UnaryFunc{ /*INT */ _1u___, /*FLOAT */ _1u___, /*BOOL */ _erro1, - /*VOID */ _void1, + /*VOID */ _zero1, /*STRING */ _erro1, /*ARRAY */ _absn1, /*MAP */ _absn1, @@ -43,7 +43,7 @@ var uneg_dispositions = [mlrval.MT_DIM]UnaryFunc{ /*INT */ uneg_i_i, /*FLOAT */ uneg_f_f, /*BOOL */ _erro1, - /*VOID */ _void1, + /*VOID */ _zero1, /*STRING */ _erro1, /*ARRAY */ _absn1, /*MAP */ _absn1, @@ -97,11 +97,11 @@ func plus_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { } var plus_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ - // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {plus_n_ii, plus_f_if, _erro, _void, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*FLOAT */ {plus_f_fi, plus_f_ff, _erro, _void, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, + // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT + /*INT */ {plus_n_ii, plus_f_if, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, + /*FLOAT */ {plus_f_fi, plus_f_ff, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, + /*VOID */ {_2___, _2___, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, @@ -155,11 +155,11 @@ func minus_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { } var minus_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ - // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {minus_n_ii, minus_f_if, _erro, _void, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*FLOAT */ {minus_f_fi, minus_f_ff, _erro, _void, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, + // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT + /*INT */ {minus_n_ii, minus_f_if, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, + /*FLOAT */ {minus_f_fi, minus_f_ff, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, + /*VOID */ {_n2__, _n2__, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, @@ -229,11 +229,11 @@ func times_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { } var times_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ - // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {times_n_ii, times_f_if, _erro, _void, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*FLOAT */ {times_f_fi, times_f_ff, _erro, _void, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, + // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT + /*INT */ {times_n_ii, times_f_if, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, + /*FLOAT */ {times_f_fi, times_f_ff, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, + /*VOID */ {_2___, _2___, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, @@ -389,11 +389,11 @@ func dotplus_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { } var dot_plus_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ - // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {dotplus_i_ii, dotplus_f_if, _erro, _void, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*FLOAT */ {dotplus_f_fi, dotplus_f_ff, _erro, _void, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, + // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT + /*INT */ {dotplus_i_ii, dotplus_f_if, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, + /*FLOAT */ {dotplus_f_fi, dotplus_f_ff, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, + /*VOID */ {_2___, _2___, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, @@ -425,11 +425,11 @@ func dotminus_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { } var dotminus_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ - // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {dotminus_i_ii, dotminus_f_if, _erro, _void, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*FLOAT */ {dotminus_f_fi, dotminus_f_ff, _erro, _void, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, + // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT + /*INT */ {dotminus_i_ii, dotminus_f_if, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, + /*FLOAT */ {dotminus_f_fi, dotminus_f_ff, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, + /*VOID */ {_n2__, _n2__, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, @@ -461,11 +461,11 @@ func dottimes_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { } var dottimes_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ - // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {dottimes_i_ii, dottimes_f_if, _erro, _void, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*FLOAT */ {dottimes_f_fi, dottimes_f_ff, _erro, _void, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, + // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT + /*INT */ {dottimes_i_ii, dottimes_f_if, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, + /*FLOAT */ {dottimes_f_fi, dottimes_f_ff, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, + /*VOID */ {_n2__, _n2__, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, diff --git a/internal/pkg/bifs/base.go b/internal/pkg/bifs/base.go index 700cfab26..36aeb63d2 100644 --- a/internal/pkg/bifs/base.go +++ b/internal/pkg/bifs/base.go @@ -106,6 +106,11 @@ func _zero1(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromInt(0) } +// Return one (unary) +func __one1(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromInt(1) +} + // Return null (unary) func _null1(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.NULL diff --git a/internal/pkg/terminals/help/entry.go b/internal/pkg/terminals/help/entry.go index 5fe7c9a9f..17130fe26 100644 --- a/internal/pkg/terminals/help/entry.go +++ b/internal/pkg/terminals/help/entry.go @@ -486,6 +486,7 @@ func helpTypeArithmeticInfo() { mlrvals := []*mlrval.Mlrval{ mlrval.FromInt(1), mlrval.FromFloat(2.5), + mlrval.VOID, mlrval.ABSENT, mlrval.ERROR, } @@ -497,17 +498,27 @@ func helpTypeArithmeticInfo() { fmt.Printf("%-10s |", "(+)") } else if i == -1 { fmt.Printf("%-10s +", "------") + } else if mlrvals[i].IsVoid() { + fmt.Printf("%-10s |", "(empty)") } else { fmt.Printf("%-10s |", mlrvals[i].String()) } for j := 0; j < n; j++ { if i == -2 { - fmt.Printf(" %-10s", mlrvals[j].String()) + if mlrvals[j].IsVoid() { + fmt.Printf("%-10s", "(empty)") + } else { + fmt.Printf(" %-10s", mlrvals[j].String()) + } } else if i == -1 { fmt.Printf(" %-10s", "------") } else { sum := bifs.BIF_plus_binary(mlrvals[i], mlrvals[j]) - fmt.Printf(" %-10s", sum.String()) + if sum.IsVoid() { + fmt.Printf(" %-10s", "(empty)") + } else { + fmt.Printf(" %-10s", sum.String()) + } } } fmt.Println() diff --git a/man/manpage.txt b/man/manpage.txt index a4bb52049..43f2ed386 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -3613,4 +3613,4 @@ MILLER(1) MILLER(1) - 2023-08-26 MILLER(1) + 2023-08-27 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index 702d51667..51320a9a8 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2023-08-26 +.\" Date: 2023-08-27 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2023-08-26" "\ \&" "\ \&" +.TH "MILLER" "1" "2023-08-27" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/test/cases/dsl-absent-empty/0009/expout b/test/cases/dsl-absent-empty/0009/expout index 0a9b64d06..eaae44488 100644 --- a/test/cases/dsl-absent-empty/0009/expout +++ b/test/cases/dsl-absent-empty/0009/expout @@ -1,6 +1,6 @@ x=1,y=2,z=3 -x=1,y=,z= -x=,y=2,z= +x=1,y=,z=1 +x=,y=2,z=2 x=,y=,z= a=1,y=2,z=2 a=1,y= diff --git a/test/cases/dsl-absent-empty/0011/expout b/test/cases/dsl-absent-empty/0011/expout index 46b9c18aa..e13ed42e1 100644 --- a/test/cases/dsl-absent-empty/0011/expout +++ b/test/cases/dsl-absent-empty/0011/expout @@ -1,24 +1,24 @@ s=2 x=1,y=2 -s= +s=2 x=1,y= -s= +s=4 x=,y=2 -s= +s=4 x=,y= -s= +s=6 a=1,y=2 -s= +s=6 a=1,y= -s= +s=8 a=,y=2 -s= +s=8 a=,y= -s= +s=8 x=1,b=2 -s= +s=8 x=1,b= -s= +s=8 x=,b=2 -s= +s=8 x=,b= diff --git a/test/cases/dsl-absent-empty/0013/expout b/test/cases/dsl-absent-empty/0013/expout index 35bd56786..68ab9a155 100644 --- a/test/cases/dsl-absent-empty/0013/expout +++ b/test/cases/dsl-absent-empty/0013/expout @@ -1,6 +1,6 @@ x=int,y=int,z=int -x=int,y=empty,z=empty -x=empty,y=int,z=empty +x=int,y=empty,z=int +x=empty,y=int,z=int x=empty,y=empty,z=empty a=1,y=int,z=int,x=absent a=1,y=empty,x=absent,z=absent diff --git a/test/cases/dsl-absent-empty/0015/expout b/test/cases/dsl-absent-empty/0015/expout index cfcebeab9..bef580d78 100644 --- a/test/cases/dsl-absent-empty/0015/expout +++ b/test/cases/dsl-absent-empty/0015/expout @@ -1,12 +1,12 @@ x=int,y=int,z=absent,s=int -x=int,y=empty,z=absent,s=empty -x=empty,y=int,z=absent,s=empty -x=empty,y=empty,z=absent,s=empty -a=1,y=int,x=absent,z=absent,s=empty -a=1,y=empty,x=absent,z=absent,s=empty -a=,y=int,x=absent,z=absent,s=empty -a=,y=empty,x=absent,z=absent,s=empty -x=int,b=2,y=absent,z=absent,s=empty -x=int,b=,y=absent,z=absent,s=empty -x=empty,b=2,y=absent,z=absent,s=empty -x=empty,b=,y=absent,z=absent,s=empty +x=int,y=empty,z=absent,s=int +x=empty,y=int,z=absent,s=int +x=empty,y=empty,z=absent,s=int +a=1,y=int,x=absent,z=absent,s=int +a=1,y=empty,x=absent,z=absent,s=int +a=,y=int,x=absent,z=absent,s=int +a=,y=empty,x=absent,z=absent,s=int +x=int,b=2,y=absent,z=absent,s=int +x=int,b=,y=absent,z=absent,s=int +x=empty,b=2,y=absent,z=absent,s=int +x=empty,b=,y=absent,z=absent,s=int diff --git a/test/cases/dsl-absent-empty/0018/expout b/test/cases/dsl-absent-empty/0018/expout index 5f0fd1b7c..245f83dfe 100644 --- a/test/cases/dsl-absent-empty/0018/expout +++ b/test/cases/dsl-absent-empty/0018/expout @@ -1,4 +1,4 @@ x=1 ostype=absent xtype=int nstype=int nsum=1 -x= osum=1 ostype=int xtype=empty nstype=empty nsum= -x=7 osum= ostype=empty xtype=int nstype=empty nsum= -sum= +x= osum=1 ostype=int xtype=empty nstype=int nsum=1 +x=7 osum=1 ostype=int xtype=int nstype=int nsum=8 +sum=8 diff --git a/test/cases/dsl-absent-empty/0019/expout b/test/cases/dsl-absent-empty/0019/expout index 5f0fd1b7c..245f83dfe 100644 --- a/test/cases/dsl-absent-empty/0019/expout +++ b/test/cases/dsl-absent-empty/0019/expout @@ -1,4 +1,4 @@ x=1 ostype=absent xtype=int nstype=int nsum=1 -x= osum=1 ostype=int xtype=empty nstype=empty nsum= -x=7 osum= ostype=empty xtype=int nstype=empty nsum= -sum= +x= osum=1 ostype=int xtype=empty nstype=int nsum=1 +x=7 osum=1 ostype=int xtype=int nstype=int nsum=8 +sum=8 diff --git a/test/cases/dsl-absent-empty/0024/expout b/test/cases/dsl-absent-empty/0024/expout index e65c673b3..d9c770625 100644 --- a/test/cases/dsl-absent-empty/0024/expout +++ b/test/cases/dsl-absent-empty/0024/expout @@ -1,4 +1,4 @@ x=1 xtype=int sum=11 stype=int -x= xtype=empty sum= stype=empty +x= xtype=empty sum=10 stype=int y= xtype=absent sum=10 stype=int x=7 xtype=int sum=17 stype=int diff --git a/test/cases/dsl-absent-empty/0025/expout b/test/cases/dsl-absent-empty/0025/expout index 1f25de3ba..1c914b16e 100644 --- a/test/cases/dsl-absent-empty/0025/expout +++ b/test/cases/dsl-absent-empty/0025/expout @@ -1,4 +1,4 @@ x=1 xtype=int sum=11 stype=int -x= xtype=empty sum= stype=empty +x= xtype=empty sum=10 stype=int y= xtype=absent sum=999 stype=int x=7 xtype=int sum=17 stype=int diff --git a/test/cases/dsl-null-empty-handling/0005/expout b/test/cases/dsl-null-empty-handling/0005/expout index 578780bf4..ffaf4cbd0 100644 --- a/test/cases/dsl-null-empty-handling/0005/expout +++ b/test/cases/dsl-null-empty-handling/0005/expout @@ -1,3 +1,3 @@ x=1,y=2,s=hello,z=3 -x=1,y=,s=,z= +x=1,y=,s=,z=1 x=,y=,s=hurrah,z= diff --git a/test/cases/dsl-null-empty-handling/0018/expout b/test/cases/dsl-null-empty-handling/0018/expout index 533c22c7c..16af929a2 100644 --- a/test/cases/dsl-null-empty-handling/0018/expout +++ b/test/cases/dsl-null-empty-handling/0018/expout @@ -1,6 +1,6 @@ x=1 y=2 xy=3 sy=2 xt=1 -x=1 y= xy= xt=1 -x= y=2 xy= sy=2 +x=1 y= xy=1 xt=1 +x= y=2 xy=2 sy=2 x= y= xy= a=1 y=2 xy=2 sy=2 a=1 y= diff --git a/test/cases/dsl-null-empty-handling/0019/expout b/test/cases/dsl-null-empty-handling/0019/expout index 12833dc93..8f1ee8880 100644 --- a/test/cases/dsl-null-empty-handling/0019/expout +++ b/test/cases/dsl-null-empty-handling/0019/expout @@ -1,11 +1,11 @@ x=1 y=2 xy=3 sy=5 xt=1 st=3 -x=1 y= xy= sy= xt=1 st=3 -x= y=2 xy= sy=5 st=3 -x= y= xy= sy= st=3 +x=1 y= xy=1 sy=3 xt=1 st=3 +x= y=2 xy=2 sy=5 st=3 +x= y= xy= sy=3 st=3 a=1 y=2 xy=2 sy=5 st=3 -a=1 y= sy= st=3 +a=1 y= sy=3 st=3 a= y=2 xy=2 sy=5 st=3 -a= y= sy= st=3 +a= y= sy=3 st=3 x=1 b=2 xy=1 sy=3 xt=1 st=3 x=1 b= xy=1 sy=3 xt=1 st=3 x= b=2 sy=3 st=3 diff --git a/test/cases/dsl-null-empty-handling/0020/expout b/test/cases/dsl-null-empty-handling/0020/expout index bac8fc502..40c4af232 100644 --- a/test/cases/dsl-null-empty-handling/0020/expout +++ b/test/cases/dsl-null-empty-handling/0020/expout @@ -1,12 +1,12 @@ x=1 y=2 xy=3 sy=2 xt=5 st=4 -x=1 y= xy= xt=5 st=4 -x= y=2 xy= sy=2 xt= st=4 -x= y= xy= xt= st=4 +x=1 y= xy=1 xt=5 st=4 +x= y=2 xy=2 sy=2 xt=4 st=4 +x= y= xy= xt=4 st=4 a=1 y=2 xy=2 sy=2 xt=4 st=4 a=1 y= xt=4 st=4 a= y=2 xy=2 sy=2 xt=4 st=4 a= y= xt=4 st=4 x=1 b=2 xy=1 xt=5 st=4 x=1 b= xy=1 xt=5 st=4 -x= b=2 xt= st=4 -x= b= xt= st=4 +x= b=2 xt=4 st=4 +x= b= xt=4 st=4 diff --git a/test/cases/dsl-null-empty-handling/0021/expout b/test/cases/dsl-null-empty-handling/0021/expout index a57679797..76d169e42 100644 --- a/test/cases/dsl-null-empty-handling/0021/expout +++ b/test/cases/dsl-null-empty-handling/0021/expout @@ -1,12 +1,12 @@ x=1 y=2 xy=3 sy=5 xt=5 st=7 -x=1 y= xy= sy= xt=5 st=7 -x= y=2 xy= sy=5 xt= st=7 -x= y= xy= sy= xt= st=7 +x=1 y= xy=1 sy=3 xt=5 st=7 +x= y=2 xy=2 sy=5 xt=4 st=7 +x= y= xy= sy=3 xt=4 st=7 a=1 y=2 xy=2 sy=5 xt=4 st=7 -a=1 y= sy= xt=4 st=7 +a=1 y= sy=3 xt=4 st=7 a= y=2 xy=2 sy=5 xt=4 st=7 -a= y= sy= xt=4 st=7 +a= y= sy=3 xt=4 st=7 x=1 b=2 xy=1 sy=3 xt=5 st=7 x=1 b= xy=1 sy=3 xt=5 st=7 -x= b=2 sy=3 xt= st=7 -x= b= sy=3 xt= st=7 +x= b=2 sy=3 xt=4 st=7 +x= b= sy=3 xt=4 st=7 diff --git a/test/cases/dsl-null-empty-handling/0022/expout b/test/cases/dsl-null-empty-handling/0022/expout index b8d0e6ef3..2ec0bac9c 100644 --- a/test/cases/dsl-null-empty-handling/0022/expout +++ b/test/cases/dsl-null-empty-handling/0022/expout @@ -1,6 +1,6 @@ x=1 y=2 xy=-1 sy=2 xt=1 -x=1 y= xy= xt=1 -x= y=2 xy= sy=2 +x=1 y= xy=1 xt=1 +x= y=2 xy=-2 sy=2 x= y= xy= a=1 y=2 xy=2 sy=2 a=1 y= diff --git a/test/cases/dsl-null-empty-handling/0023/expout b/test/cases/dsl-null-empty-handling/0023/expout index 60236a2bf..5490ccccd 100644 --- a/test/cases/dsl-null-empty-handling/0023/expout +++ b/test/cases/dsl-null-empty-handling/0023/expout @@ -1,11 +1,11 @@ x=1 y=2 xy=-1 sy=1 xt=1 st=3 -x=1 y= xy= sy= xt=1 st=3 -x= y=2 xy= sy=1 st=3 -x= y= xy= sy= st=3 +x=1 y= xy=1 sy=3 xt=1 st=3 +x= y=2 xy=-2 sy=1 st=3 +x= y= xy= sy=3 st=3 a=1 y=2 xy=2 sy=1 st=3 -a=1 y= sy= st=3 +a=1 y= sy=3 st=3 a= y=2 xy=2 sy=1 st=3 -a= y= sy= st=3 +a= y= sy=3 st=3 x=1 b=2 xy=1 sy=3 xt=1 st=3 x=1 b= xy=1 sy=3 xt=1 st=3 x= b=2 sy=3 st=3 diff --git a/test/cases/dsl-null-empty-handling/0024/expout b/test/cases/dsl-null-empty-handling/0024/expout index decc36a94..df8e60489 100644 --- a/test/cases/dsl-null-empty-handling/0024/expout +++ b/test/cases/dsl-null-empty-handling/0024/expout @@ -1,12 +1,12 @@ x=1 y=2 xy=-1 sy=2 xt=-3 st=4 -x=1 y= xy= xt=-3 st=4 -x= y=2 xy= sy=2 xt= st=4 -x= y= xy= xt= st=4 +x=1 y= xy=1 xt=-3 st=4 +x= y=2 xy=-2 sy=2 xt=-4 st=4 +x= y= xy= xt=-4 st=4 a=1 y=2 xy=2 sy=2 xt=4 st=4 a=1 y= xt=4 st=4 a= y=2 xy=2 sy=2 xt=4 st=4 a= y= xt=4 st=4 x=1 b=2 xy=1 xt=-3 st=4 x=1 b= xy=1 xt=-3 st=4 -x= b=2 xt= st=4 -x= b= xt= st=4 +x= b=2 xt=-4 st=4 +x= b= xt=-4 st=4 diff --git a/test/cases/dsl-null-empty-handling/0025/expout b/test/cases/dsl-null-empty-handling/0025/expout index c19e412d0..ab04b574e 100644 --- a/test/cases/dsl-null-empty-handling/0025/expout +++ b/test/cases/dsl-null-empty-handling/0025/expout @@ -1,12 +1,12 @@ x=1 y=2 xy=-1 sy=1 xt=-3 st=-1 -x=1 y= xy= sy= xt=-3 st=-1 -x= y=2 xy= sy=1 xt= st=-1 -x= y= xy= sy= xt= st=-1 +x=1 y= xy=1 sy=3 xt=-3 st=-1 +x= y=2 xy=-2 sy=1 xt=-4 st=-1 +x= y= xy= sy=3 xt=-4 st=-1 a=1 y=2 xy=2 sy=1 xt=4 st=-1 -a=1 y= sy= xt=4 st=-1 +a=1 y= sy=3 xt=4 st=-1 a= y=2 xy=2 sy=1 xt=4 st=-1 -a= y= sy= xt=4 st=-1 +a= y= sy=3 xt=4 st=-1 x=1 b=2 xy=1 sy=3 xt=-3 st=-1 x=1 b= xy=1 sy=3 xt=-3 st=-1 -x= b=2 sy=3 xt= st=-1 -x= b= sy=3 xt= st=-1 +x= b=2 sy=3 xt=-4 st=-1 +x= b= sy=3 xt=-4 st=-1 diff --git a/test/cases/dsl-null-empty-handling/0026/expout b/test/cases/dsl-null-empty-handling/0026/expout index 5ed9ed807..b96c6e661 100644 --- a/test/cases/dsl-null-empty-handling/0026/expout +++ b/test/cases/dsl-null-empty-handling/0026/expout @@ -1,6 +1,6 @@ x=1 y=2 xy=2 sy=2 xt=1 -x=1 y= xy= xt=1 -x= y=2 xy= sy=2 +x=1 y= xy=1 xt=1 +x= y=2 xy=2 sy=2 x= y= xy= a=1 y=2 xy=2 sy=2 a=1 y= diff --git a/test/cases/dsl-null-empty-handling/0027/expout b/test/cases/dsl-null-empty-handling/0027/expout index 70c01ab4b..8b51ffa51 100644 --- a/test/cases/dsl-null-empty-handling/0027/expout +++ b/test/cases/dsl-null-empty-handling/0027/expout @@ -1,11 +1,11 @@ x=1 y=2 xy=2 sy=6 xt=1 st=3 -x=1 y= xy= sy= xt=1 st=3 -x= y=2 xy= sy=6 st=3 -x= y= xy= sy= st=3 +x=1 y= xy=1 sy=3 xt=1 st=3 +x= y=2 xy=2 sy=6 st=3 +x= y= xy= sy=3 st=3 a=1 y=2 xy=2 sy=6 st=3 -a=1 y= sy= st=3 +a=1 y= sy=3 st=3 a= y=2 xy=2 sy=6 st=3 -a= y= sy= st=3 +a= y= sy=3 st=3 x=1 b=2 xy=1 sy=3 xt=1 st=3 x=1 b= xy=1 sy=3 xt=1 st=3 x= b=2 sy=3 st=3 diff --git a/test/cases/dsl-null-empty-handling/0028/expout b/test/cases/dsl-null-empty-handling/0028/expout index 862ac3fc4..ccb1e4fa7 100644 --- a/test/cases/dsl-null-empty-handling/0028/expout +++ b/test/cases/dsl-null-empty-handling/0028/expout @@ -1,12 +1,12 @@ x=1 y=2 xy=2 sy=2 xt=4 st=4 -x=1 y= xy= xt=4 st=4 -x= y=2 xy= sy=2 xt= st=4 -x= y= xy= xt= st=4 +x=1 y= xy=1 xt=4 st=4 +x= y=2 xy=2 sy=2 xt=4 st=4 +x= y= xy= xt=4 st=4 a=1 y=2 xy=2 sy=2 xt=4 st=4 a=1 y= xt=4 st=4 a= y=2 xy=2 sy=2 xt=4 st=4 a= y= xt=4 st=4 x=1 b=2 xy=1 xt=4 st=4 x=1 b= xy=1 xt=4 st=4 -x= b=2 xt= st=4 -x= b= xt= st=4 +x= b=2 xt=4 st=4 +x= b= xt=4 st=4 diff --git a/test/cases/dsl-null-empty-handling/0029/expout b/test/cases/dsl-null-empty-handling/0029/expout index c6d102942..e5309caf0 100644 --- a/test/cases/dsl-null-empty-handling/0029/expout +++ b/test/cases/dsl-null-empty-handling/0029/expout @@ -1,12 +1,12 @@ x=1 y=2 xy=2 sy=6 xt=4 st=12 -x=1 y= xy= sy= xt=4 st=12 -x= y=2 xy= sy=6 xt= st=12 -x= y= xy= sy= xt= st=12 +x=1 y= xy=1 sy=3 xt=4 st=12 +x= y=2 xy=2 sy=6 xt=4 st=12 +x= y= xy= sy=3 xt=4 st=12 a=1 y=2 xy=2 sy=6 xt=4 st=12 -a=1 y= sy= xt=4 st=12 +a=1 y= sy=3 xt=4 st=12 a= y=2 xy=2 sy=6 xt=4 st=12 -a= y= sy= xt=4 st=12 +a= y= sy=3 xt=4 st=12 x=1 b=2 xy=1 sy=3 xt=4 st=12 x=1 b= xy=1 sy=3 xt=4 st=12 -x= b=2 sy=3 xt= st=12 -x= b= sy=3 xt= st=12 +x= b=2 sy=3 xt=4 st=12 +x= b= sy=3 xt=4 st=12 From 67d16c89c1f5be81ff202bbcae00653ca9bdeefe Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 26 Aug 2023 22:49:28 -0400 Subject: [PATCH 046/456] typofix --- docs/src/manpage.md | 4 ++-- docs/src/manpage.txt | 4 ++-- docs/src/reference-dsl-builtin-functions.md | 2 +- internal/pkg/dsl/cst/builtin_function_manager.go | 2 +- man/manpage.txt | 4 ++-- man/mlr.1 | 6 +++--- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index aef0df1f3..acd782c82 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -2744,7 +2744,7 @@ MILLER(1) MILLER(1) percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 } - The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais": + The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais"): x = [6,5,9,10,4,3] percentiles(x, [25,75], {"ais":true}) gives { "25": 5, "75": 4 } which is incorrect @@ -3634,5 +3634,5 @@ MILLER(1) MILLER(1) - 2023-08-26 MILLER(1) + 2023-08-27 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index a4bb52049..e2ed9df20 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -2723,7 +2723,7 @@ MILLER(1) MILLER(1) percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 } - The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais": + The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais"): x = [6,5,9,10,4,3] percentiles(x, [25,75], {"ais":true}) gives { "25": 5, "75": 4 } which is incorrect @@ -3613,4 +3613,4 @@ MILLER(1) MILLER(1) - 2023-08-26 MILLER(1) + 2023-08-27 MILLER(1) diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 22d3a68d1..135d27f14 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -1099,7 +1099,7 @@ Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- not percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 } -The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais": +The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais"): x = [6,5,9,10,4,3] percentiles(x, [25,75], {"ais":true}) gives { "25": 5, "75": 4 } which is incorrect diff --git a/internal/pkg/dsl/cst/builtin_function_manager.go b/internal/pkg/dsl/cst/builtin_function_manager.go index 5499ec8fd..8887c786e 100644 --- a/internal/pkg/dsl/cst/builtin_function_manager.go +++ b/internal/pkg/dsl/cst/builtin_function_manager.go @@ -1187,7 +1187,7 @@ is normally distributed.`, ``, ` percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 }`, ``, - `The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais":`, + `The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais"):`, ``, ` x = [6,5,9,10,4,3]`, ` percentiles(x, [25,75], {"ais":true}) gives { "25": 5, "75": 4 } which is incorrect`, diff --git a/man/manpage.txt b/man/manpage.txt index a4bb52049..e2ed9df20 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -2723,7 +2723,7 @@ MILLER(1) MILLER(1) percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 } - The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais": + The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais"): x = [6,5,9,10,4,3] percentiles(x, [25,75], {"ais":true}) gives { "25": 5, "75": 4 } which is incorrect @@ -3613,4 +3613,4 @@ MILLER(1) MILLER(1) - 2023-08-26 MILLER(1) + 2023-08-27 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index 702d51667..352c6f0c6 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2023-08-26 +.\" Date: 2023-08-27 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2023-08-26" "\ \&" "\ \&" +.TH "MILLER" "1" "2023-08-27" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -4144,7 +4144,7 @@ Use "interpolate_linearly" (or shorthand "il") to do linear interpolation -- not percentiles([3,4,5,6,9,10], [25,75], {"interpolate_linearly":true}) is { "25": 4.25, "75": 8.25 } -The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais": +The percentiles function always sorts its inputs before computing percentiles. If you know your input is already sorted -- see also the sort_collection function -- then computation will be faster on large input if you pass in "array_is_sorted" (shorthand: "ais"): x = [6,5,9,10,4,3] percentiles(x, [25,75], {"ais":true}) gives { "25": 5, "75": 4 } which is incorrect From 71171bc04cdecbb22696b83be079a0b163f4ac54 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 26 Aug 2023 23:41:50 -0400 Subject: [PATCH 047/456] Treat empty like absent in `+` `-` `*` (#1371) * empty plus value is value * unit-test cases * make-docs output * docs files * on-line table for null-handling arithmetic rules * doc mods From 5b29169b08838f91c52b29eb153348c4a7900df1 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 27 Aug 2023 10:08:34 -0400 Subject: [PATCH 048/456] Update 2015-era Python sketch to Python 3 (#1372) --- python/sketch.py | 868 +++++++++++++++++++++++++++-------------------- 1 file changed, 504 insertions(+), 364 deletions(-) diff --git a/python/sketch.py b/python/sketch.py index 81a5621ef..33f05bdee 100755 --- a/python/sketch.py +++ b/python/sketch.py @@ -1,8 +1,9 @@ #!/usr/bin/python -import os, sys +import os +import sys import getopt -import string, re +import re import collections # ================================================================ @@ -25,478 +26,617 @@ import collections # o summarizations: min, max, mean, count, sum, first, last # o tabular pretty-print + # ================================================================ def usage(): - print >> sys.stderr, "Usage: %s [options] {modulator-spec} {zero or more filenames}" % os.path.basename(sys.argv[0]) - print >> sys.stderr, "Options:" - print >> sys.stderr, " -R {rs} Input/output record separator" - print >> sys.stderr, " -F {fs} Input/output field separator" - print >> sys.stderr, " -P {ps} Input/output key-value-pair separator" - print >> sys.stderr, " -v {name=value} xxx needs more doc" - print >> sys.stderr, "" - print >> sys.stderr, " --idkvp Input format is delimited by IRS,IFS,IPS" - print >> sys.stderr, " --odkvp Output format is delimited by IRS,IFS,IPS" - print >> sys.stderr, " --icsv Input format is delimited by IRS,IFS,IPS, with header line followed by data lines (e.g. CSV)" - print >> sys.stderr, " --ocsv Output format is delimited by IRS,IFS,IPS, with header line followed by data lines (e.g. CSV)" - print >> sys.stderr, " --inidx Input format is implicitly integer-indexed (awk-style)" - print >> sys.stderr, " --onidx Output format is implicitly integer-indexed (awk-style)" - print >> sys.stderr, " --ixtab Input format is transposed-tabular-pretty-print" - print >> sys.stderr, " --oxtab Output format is transposed-tabular-pretty-print" - print >> sys.stderr, "Modulator specs:" - print >> sys.stderr, '--cat' - print >> sys.stderr, '--tac' - print >> sys.stderr, '--cut' - print >> sys.stderr, '--cutx' - print >> sys.stderr, '--sortfields' - print >> sys.stderr, '--sortfieldsup' - print >> sys.stderr, '--sortfieldsdown' + print( + "Usage: %s [options] {modulator-spec} {zero or more filenames}" + % os.path.basename(sys.argv[0]), + file=sys.stderr, + ) + msg = """ +Options: + -R {rs} Input/output record separator + -F {fs} Input/output field separator + -P {ps} Input/output key-value-pair separator + -v {name=value} xxx needs more doc + + --idkvp Input format is delimited by IRS,IFS,IPS + --odkvp Output format is delimited by IRS,IFS,IPS + --icsv Input format is delimited by IRS,IFS,IPS, with header line followed by data lines (e.g. CSV) + --ocsv Output format is delimited by IRS,IFS,IPS, with header line followed by data lines (e.g. CSV) + --inidx Input format is implicitly integer-indexed (awk-style) + --onidx Output format is implicitly integer-indexed (awk-style) + --ixtab Input format is transposed-tabular-pretty-print + --oxtab Output format is transposed-tabular-pretty-print +Modulator specs: +--cat +--tac +--cut +--cutx +--sortfields +--sortfieldsup +--sortfieldsdown +""" + print(msg, file=sys.stderr) + sys.exit(1) - sys.exit(1) # ---------------------------------------------------------------- def parse_command_line(): - namespace = set_up_namespace() - rreader = None - rwriter = None - rmodulator = None + namespace = set_up_namespace() + rreader = None + rwriter = None + rmodulator = None - try: - optargs, non_option_args = getopt.getopt(sys.argv[1:], "R:F:P:v:h", [ - 'help', 'idkvp', 'odkvp', 'icsv', 'ocsv', 'inidx', 'onidx', 'ixtab', 'oxtab', - 'cat', 'tac', 'cut=', 'cutx=', 'sortfields', 'sortfieldsup', 'sortfieldsdown']) + try: + optargs, non_option_args = getopt.getopt( + sys.argv[1:], + "R:F:P:v:h", + [ + "help", + "idkvp", + "odkvp", + "icsv", + "ocsv", + "inidx", + "onidx", + "ixtab", + "oxtab", + "cat", + "tac", + "cut=", + "cutx=", + "sortfields", + "sortfieldsup", + "sortfieldsdown", + ], + ) - except getopt.GetoptError, err: - print str(err) - usage() - sys.exit(1) + except getopt.GetoptError as e: + print(str(e)) + usage() + sys.exit(1) - for opt, arg in optargs: - if opt == '-R': - rs = arg - namespace.put("ORS", namespace.put("IRS", rs)) - elif opt == '-F': - fs = arg - namespace.put("OFS", namespace.put("IFS", fs)) - elif opt == '-P': - ps = arg - namespace.put("OPS", namespace.put("IPS", ps)) - elif opt == '-v': - kv = string.split(arg, "=", 1) - namespace.put(kv[0], kv[1]) + for opt, arg in optargs: + if opt == "-R": + rs = arg + namespace.put("ORS", namespace.put("IRS", rs)) + elif opt == "-F": + fs = arg + namespace.put("OFS", namespace.put("IFS", fs)) + elif opt == "-P": + ps = arg + namespace.put("OPS", namespace.put("IPS", ps)) + elif opt == "-v": + kv = arg.split("=", 1) + namespace.put(kv[0], kv[1]) - elif opt == '--idkvp': - rreader = RecordReaderDefault(istream=sys.stdin, namespace=namespace, irs=namespace.get("IRS"), ifs=namespace.get("IFS"), ips=namespace.get("IPS")) - elif opt == '--odkvp': - rwriter = RecordWriterDefault(ostream=sys.stdout, ors=namespace.get("ORS"), ofs=namespace.get("OFS"), ops=namespace.get("OPS")) + elif opt == "--idkvp": + rreader = RecordReaderDefault( + istream=sys.stdin, + namespace=namespace, + irs=namespace.get("IRS"), + ifs=namespace.get("IFS"), + ips=namespace.get("IPS"), + ) + elif opt == "--odkvp": + rwriter = RecordWriterDefault( + ostream=sys.stdout, + ors=namespace.get("ORS"), + ofs=namespace.get("OFS"), + ops=namespace.get("OPS"), + ) - elif opt == '--icsv': - rreader = RecordReaderHeaderFirst(istream=sys.stdin, namespace=namespace, irs=namespace.get("IRS"), ifs=namespace.get("IFS")) - elif opt == '--ocsv': - rwriter = RecordWriterHeaderFirst(ostream=sys.stdout, ors=namespace.get("ORS"), ofs=namespace.get("OFS")) + elif opt == "--icsv": + rreader = RecordReaderHeaderFirst( + istream=sys.stdin, + namespace=namespace, + irs=namespace.get("IRS"), + ifs=namespace.get("IFS"), + ) + elif opt == "--ocsv": + rwriter = RecordWriterHeaderFirst( + ostream=sys.stdout, + ors=namespace.get("ORS"), + ofs=namespace.get("OFS"), + ) - elif opt == '--inidx': - rreader = RecordReaderIntegerIndexed(istream=sys.stdin, namespace=namespace, irs=namespace.get("IRS"), ifs=namespace.get("IFS")) - elif opt == '--onidx': - rwriter = RecordWriterIntegerIndexed(ostream=sys.stdout, ors=namespace.get("ORS"), ofs=namespace.get("OFS")) + elif opt == "--inidx": + rreader = RecordReaderIntegerIndexed( + istream=sys.stdin, + namespace=namespace, + irs=namespace.get("IRS"), + ifs=namespace.get("IFS"), + ) + elif opt == "--onidx": + rwriter = RecordWriterIntegerIndexed( + ostream=sys.stdout, + ors=namespace.get("ORS"), + ofs=namespace.get("OFS"), + ) - #elif opt == '--ixtab': - # pass - elif opt == '--oxtab': - rwriter = RecordWriterVerticallyTabulated(ostream=sys.stdout) # xxx args w/r/t/ RS/FS/PS?!? + # elif opt == '--ixtab': + # pass + elif opt == "--oxtab": + rwriter = RecordWriterVerticallyTabulated( + ostream=sys.stdout + ) # xxx args w/r/t/ RS/FS/PS?!? - elif opt == '--cat': - rmodulator = CatModulator() - elif opt == '--tac': - rmodulator = TacModulator() - elif opt == '--cut': - rmodulator = SelectFieldsModulator(string.split(arg, namespace.get("IFS"))) - elif opt == '--cutx': - rmodulator = DeselectFieldsModulator(string.split(arg, namespace.get("IFS"))) - elif opt == '--cutx': - rmodulator = DeselectFieldsModulator(string.split(arg, namespace.get("IFS"))) - elif opt == '--sortfields': - rmodulator = SortFieldsInRecordModulator(True) - elif opt == '--sortfieldsup': - rmodulator = SortFieldsInRecordModulator(True) - elif opt == '--sortfieldsdown': - rmodulator = SortFieldsInRecordModulator(False) + elif opt == "--cat": + rmodulator = CatModulator() + elif opt == "--tac": + rmodulator = TacModulator() + elif opt == "--cut": + rmodulator = SelectFieldsModulator(arg.split(namespace.get("IFS"))) + elif opt == "--cutx": + rmodulator = DeselectFieldsModulator(arg.split(namespace.get("IFS"))) + elif opt == "--cutx": + rmodulator = DeselectFieldsModulator(arg.split(namespace.get("IFS"))) + elif opt == "--sortfields": + rmodulator = SortFieldsInRecordModulator(True) + elif opt == "--sortfieldsup": + rmodulator = SortFieldsInRecordModulator(True) + elif opt == "--sortfieldsdown": + rmodulator = SortFieldsInRecordModulator(False) - elif opt == '--help': - usage() - else: - print >> sys.stderr, "Unhandled option \"%s\"." % opt - sys.exit(1) + elif opt == "--help": + usage() + else: + print('Unhandled option "%s".' % opt, file=sys.stderr) + sys.exit(1) - #xxx non_option_arg_count = len(non_option_args) + # xxx non_option_arg_count = len(non_option_args) - if rreader == None: - rreader = RecordReaderDefault(istream=sys.stdin, namespace=namespace, irs=namespace.get("IRS"), ifs=namespace.get("IFS"), ips=namespace.get("IPS")) - if rwriter == None: - rwriter = RecordWriterDefault(ostream=sys.stdout, ors=namespace.get("ORS"), ofs=namespace.get("OFS"), ops=namespace.get("OPS")) - if rmodulator == None: - rmodulator = CatModulator() + if rreader is None: + rreader = RecordReaderDefault( + istream=sys.stdin, + namespace=namespace, + irs=namespace.get("IRS"), + ifs=namespace.get("IFS"), + ips=namespace.get("IPS"), + ) + if rwriter is None: + rwriter = RecordWriterDefault( + ostream=sys.stdout, + ors=namespace.get("ORS"), + ofs=namespace.get("OFS"), + ops=namespace.get("OPS"), + ) + if rmodulator is None: + rmodulator = CatModulator() + + return { + "namespace": namespace, + "rreader": rreader, + "rwriter": rwriter, + "rmodulator": rmodulator, + } - return {'namespace':namespace, 'rreader':rreader, 'rwriter':rwriter, 'rmodulator':rmodulator} def main(): - options = parse_command_line() + options = parse_command_line() - # parse ARGV: - # * --ifmt: dkvp,hdr1st,iidxed,align,xposealign - # * --ofmt: dkvp,hdr1st,iidxed,align,xposealign - # * which-control-language spec?!? - # * modulators/script ... this is the key decision area for language(s) design. - # * filenames + # parse ARGV: + # * --ifmt: dkvp,hdr1st,iidxed,align,xposealign + # * --ofmt: dkvp,hdr1st,iidxed,align,xposealign + # * which-control-language spec?!? + # * modulators/script ... this is the key decision area for language(s) design. + # * filenames - namespace = options['namespace'] - rreader = options['rreader'] - rmodulator = options['rmodulator'] - rwriter = options['rwriter'] + rreader = options["rreader"] + rmodulator = options["rmodulator"] + rwriter = options["rwriter"] + + smodulator = StreamModulator() + smodulator.modulate(rreader, rmodulator, rwriter) - smodulator = StreamModulator() - smodulator.modulate(rreader, rmodulator, rwriter) # ================================================================ class MillerNamespace: - def __init__(self): - self.mapping = {} - self.imapping = {} - def get(self, name): - return self.mapping[name] - def iget(self, name): - return self.imapping[name] - def put(self, name, value): - self.mapping[name] = value - return value - def iput(self, name, ivalue): - self.imapping[name] = ivalue - return ivalue + def __init__(self): + self.mapping = {} + self.imapping = {} + + def get(self, name): + return self.mapping[name] + + def iget(self, name): + return self.imapping[name] + + def put(self, name, value): + self.mapping[name] = value + return value + + def iput(self, name, ivalue): + self.imapping[name] = ivalue + return ivalue + # ================================================================ class Record: - # kvs is list of pair-lists. (xxx: do tuples work too?) - def __init__(self, kvs=[]): - self.fields = collections.OrderedDict() - self.mput(kvs) - def put(self, k, v): - self.fields[k] = v - def mput(self, kvs): - for [k,v] in kvs: - self.fields[k] = v - def get(self, k): - return self.fields[k] - def has_key(self, k): - return self.fields.has_key(k) - def get_field_names(self): - return self.fields.keys() - def get_pairs(self): - return self.fields.items() - def num_pairs(self): - return len(self.fields.items()) - # xxx xref to record-formatter classes - def __str__(self): - return self.fields.__repr__ - def __repr__(self): - return self.fields.__repr__ + # kvs is list of pair-lists. (xxx: do tuples work too?) + def __init__(self, kvs=[]): + self.fields = collections.OrderedDict() + self.mput(kvs) + + def put(self, k, v): + self.fields[k] = v + + def mput(self, kvs): + for [k, v] in kvs: + self.fields[k] = v + + def get(self, k): + return self.fields[k] + + def has_key(self, k): + return k in self.fields.keys() + + def get_field_names(self): + return self.fields.keys() + + def get_pairs(self): + return self.fields.items() + + def num_pairs(self): + return len(self.fields.items()) + + # xxx xref to record-formatter classes + def __str__(self): + return self.fields.__repr__ + + def __repr__(self): + return self.fields.__repr__ + # ================================================================ # Each record is a sequence of fields delimited by FS, each of which is a # key-value pair separated by PS. + class RecordReader: - def __init__(self, istream, namespace, irs, ifs, ips): - self.istream = istream - self.namespace = namespace - self.irs = irs - self.ifs = ifs - self.ips = ips + def __init__(self, istream, namespace, irs, ifs, ips): + self.istream = istream + self.namespace = namespace + self.irs = irs + self.ifs = ifs + self.ips = ips + class RecordReaderDefault(RecordReader): - def __init__(self, istream, namespace, irs, ifs, ips): - RecordReader.__init__(self, istream, namespace, irs, ifs, ips) + def __init__(self, istream, namespace, irs, ifs, ips): + RecordReader.__init__(self, istream, namespace, irs, ifs, ips) - def read(self): - line = self.istream.readline() # xxx use self.irs - if line == '': - return None + def read(self): + line = self.istream.readline() # xxx use self.irs + if line == "": + return None - line = line.strip() # Remove leading/trailing whitespace including carriage return from readline(). - fields = string.split(line, self.ifs) - kvs = [string.split(field, self.ips, 1) for field in fields] - record = Record(kvs) + line = ( + line.strip() + ) # Remove leading/trailing whitespace including carriage return from readline(). + fields = line.split(self.ifs) + kvs = [field.split(self.ips, 1) for field in fields] + record = Record(kvs) - self.namespace.iput("NF", record.num_pairs) - self.namespace.iput("NR", self.namespace.iget("NR") + 1) + self.namespace.iput("NF", record.num_pairs) + self.namespace.iput("NR", self.namespace.iget("NR") + 1) - # xxx stub - self.namespace.put("FILENAME", None) - self.namespace.iput("FNR", self.namespace.iget("FNR") + 1) + # xxx stub + self.namespace.put("FILENAME", None) + self.namespace.iput("FNR", self.namespace.iget("FNR") + 1) + + return record - return record # ---------------------------------------------------------------- # awk-style class RecordReaderIntegerIndexed(RecordReader): - # xxx ctor with istream context?!? or independent of that?!? for cskv, no matter. - # csv reader of course needs context. - def __init__(self, istream, namespace, irs, ifs): - RecordReader.__init__(self, istream, namespace, irs, ifs, None) + # xxx ctor with istream context?!? or independent of that?!? for cskv, no matter. + # csv reader of course needs context. + def __init__(self, istream, namespace, irs, ifs): + RecordReader.__init__(self, istream, namespace, irs, ifs, None) + + def read(self): + # xxx use self.irs + line = self.istream.readline() + if line == "": + return None + line = ( + line.strip() + ) # Remove leading/trailing whitespace including carriage return from readline(). + fields = re.split(self.ifs, line) + kvs = [] + i = 1 + for field in fields: + kvs.append([i, field]) + i += 1 + return Record(kvs) - def read(self): - # xxx use self.irs - line = self.istream.readline() - if line == '': - return None - line = line.strip() # Remove leading/trailing whitespace including carriage return from readline(). - fields = re.split(self.ifs, line) - kvs = [] - i = 1 - for field in fields: - kvs.append([i, field]) - i += 1 - return Record(kvs) # ---------------------------------------------------------------- # csv-style class RecordReaderHeaderFirst(RecordReader): - def __init__(self, istream, namespace, irs, ifs): - RecordReader.__init__(self, istream, namespace, irs, ifs, None) - self.field_names = None - self.header_line = None + def __init__(self, istream, namespace, irs, ifs): + RecordReader.__init__(self, istream, namespace, irs, ifs, None) + self.field_names = None + self.header_line = None - def read(self): - if self.field_names == None: - header_line = self.istream.readline() - if header_line == '': + def read(self): + if not self.field_names: + header_line = self.istream.readline() + if header_line == "": + return None + # Remove leading/trailing whitespace including carriage return from readline(). + header_line = header_line.strip() + self.field_names = header_line.split(self.ifs, -1) + self.header_line = header_line + + data_line = self.istream.readline() + if data_line == "": return None - # Remove leading/trailing whitespace including carriage return from readline(). - header_line = header_line.strip() - self.field_names = string.split(header_line, self.ifs, -1) - self.header_line = header_line + # Remove leading/trailing whitespace including carriage return from readline(). + data_line = data_line.strip() + field_values = data_line.split(self.ifs, -1) + if len(self.field_names) != len(field_values): + raise Exception( + 'Header/data length mismatch: %d != %d in "%s" and "%s"' + % ( + len(self.field_names), + len(field_values), + self.header_line, + data_line, + ) + ) - data_line = self.istream.readline() - if data_line == '': - return None - # Remove leading/trailing whitespace including carriage return from readline(). - data_line = data_line.strip() - field_values = string.split(data_line, self.ifs, -1) - if len(self.field_names) != len(field_values): - raise Exception("Header/data length mismatch: %d != %d in \"%s\" and \"%s\"" % \ - (len(field_names), len(field_values), self.header_line, data_line)) + return Record(zip(self.field_names, field_values)) - return Record(zip(self.field_names, field_values)) # ================================================================ # xxx ostream at ctor?? needs drain-at-end logic for prettyprint. + class RecordWriter: - def __init__(self, ostream, ors, ofs, ops): - self.ostream = ostream - self.ors = ors - self.ofs = ofs - self.ops = ops + def __init__(self, ostream, ors, ofs, ops): + self.ostream = ostream + self.ors = ors + self.ofs = ofs + self.ops = ops + class RecordWriterDefault(RecordWriter): - def __init__(self, ostream, ors, ofs, ops): - RecordWriter.__init__(self, ostream, ors, ofs, ops) + def __init__(self, ostream, ors, ofs, ops): + RecordWriter.__init__(self, ostream, ors, ofs, ops) + + def write(self, record): + self.ostream.write( + self.ofs.join([str(k) + self.ops + str(v) for [k, v] in record.get_pairs()]) + ) + self.ostream.write("\n") - def write(self, record): - self.ostream.write(self.ofs.join([str(k)+self.ops+str(v) for [k,v] in record.get_pairs()])) - self.ostream.write("\n") # ---------------------------------------------------------------- class RecordWriterHeaderFirst(RecordWriter): - def __init__(self, ostream, ors, ofs): - RecordWriter.__init__(self, ostream, ors, ofs, None) - self.field_names = None + def __init__(self, ostream, ors, ofs): + RecordWriter.__init__(self, ostream, ors, ofs, None) + self.field_names = None - def write(self, record): - data_string = self.ofs.join([str(v) for [k,v] in record.get_pairs()]) - if self.field_names == None: - self.field_names = record.get_field_names() - header_string = self.ofs.join([str(k) for [k,v] in record.get_pairs()]) - self.ostream.write(header_string) + def write(self, record): + data_string = self.ofs.join([str(v) for [k, v] in record.get_pairs()]) + if self.field_names is None: + self.field_names = record.get_field_names() + header_string = self.ofs.join([str(k) for [k, v] in record.get_pairs()]) + self.ostream.write(header_string) + self.ostream.write("\n") + self.ostream.write(data_string) self.ostream.write("\n") - self.ostream.write(data_string) - self.ostream.write("\n") + # ---------------------------------------------------------------- # xxx rename -class RecordWriterVerticallyTabulated(RecordWriter): - def __init__(self, ostream): - RecordWriter.__init__(self, ostream, None, None, None) - def write(self, record): - max_field_name_width = 1 - field_names = record.get_field_names() - for field_name in field_names: - field_name_width = len(field_name) - if field_name_width > max_field_name_width: - max_field_name_width = field_name_width - lines = [] - for field_name in field_names: - lines.append("%-*s %s" % (max_field_name_width, field_name, record.get(field_name))) - self.ostream.write("\n".join(lines)) - self.ostream.write("\n\n") +class RecordWriterVerticallyTabulated(RecordWriter): + def __init__(self, ostream): + RecordWriter.__init__(self, ostream, None, None, None) + + def write(self, record): + max_field_name_width = 1 + field_names = record.get_field_names() + for field_name in field_names: + field_name_width = len(field_name) + if field_name_width > max_field_name_width: + max_field_name_width = field_name_width + lines = [] + for field_name in field_names: + lines.append( + "%-*s %s" % (max_field_name_width, field_name, record.get(field_name)) + ) + self.ostream.write("\n".join(lines)) + self.ostream.write("\n\n") + # ---------------------------------------------------------------- class RecordWriterIntegerIndexed: - def __init__(self, ostream, ors, ofs): - self.ostream = ostream - self.ors = ors - self.ofs = ofs - def write(self, record): - self.ostream.write(self.ofs.join([str(v) for [k,v] in record.get_pairs()])) - self.ostream.write("\n") + def __init__(self, ostream, ors, ofs): + self.ostream = ostream + self.ors = ors + self.ofs = ofs + + def write(self, record): + self.ostream.write(self.ofs.join([str(v) for [k, v] in record.get_pairs()])) + self.ostream.write("\n") + # ================================================================ class CatModulator: - def __init__(self): - pass - def modulate(self, record): - if record == None: # drain at end - return [] - return [record] + def __init__(self): + pass + + def modulate(self, record): + if record is None: # drain at end + return [] + return [record] + class TacModulator: - def __init__(self): - self.records = [] - def modulate(self, record): - if record == None: # drain at end - self.records.reverse() - rv = self.records - self.records = [] - return rv - else: - self.records.append(record) - return [] + def __init__(self): + self.records = [] + + def modulate(self, record): + if record is None: # drain at end + self.records.reverse() + rv = self.records + self.records = [] + return rv + else: + self.records.append(record) + return [] + class SelectFieldsModulator: - def __init__(self, field_names): - self.field_names = field_names - def modulate(self, record): - if record == None: # drain at end - return [] - kvs = [] - for field_name in self.field_names: - if record.has_key(field_name): - kvs.append((field_name, record.get(field_name))) - new_record = Record() - new_record.mput(kvs) - return [new_record] + def __init__(self, field_names): + self.field_names = field_names + + def modulate(self, record): + if record is None: # drain at end + return [] + kvs = [] + for field_name in self.field_names: + if record.has_key(field_name): + kvs.append((field_name, record.get(field_name))) + new_record = Record() + new_record.mput(kvs) + return [new_record] + # The field_names argument may be a list or hash-set -- as long as it supports # the "in" operator as in "name in field_names". # xxx to do: use a hash-set internally. class DeselectFieldsModulator: - def __init__(self, field_names): - self.field_names = field_names - def modulate(self, record): - if record == None: # drain at end - return [] - kvs = [] - for field_name in record.get_field_names(): - if not field_name in self.field_names: - kvs.append((field_name, record.get(field_name))) - new_record = Record() - new_record.mput(kvs) - return [new_record] + def __init__(self, field_names): + self.field_names = field_names + + def modulate(self, record): + if record is None: # drain at end + return [] + kvs = [] + for field_name in record.get_field_names(): + if field_name not in self.field_names: + kvs.append((field_name, record.get(field_name))) + new_record = Record() + new_record.mput(kvs) + return [new_record] + class SortFieldsInRecordModulator: - def __init__(self, do_ascending_sort=True): - self.do_ascending_sort = do_ascending_sort - def modulate(self, record): - if record == None: # drain at end - return [] - kvs = [] - sorted_field_names = sorted(record.get_field_names()) - if not self.do_ascending_sort: - sorted_field_names.reverse() # xxx optimize - for field_name in sorted_field_names: - kvs.append((field_name, record.get(field_name))) - new_record = Record() - new_record.mput(kvs) - return [new_record] + def __init__(self, do_ascending_sort=True): + self.do_ascending_sort = do_ascending_sort + + def modulate(self, record): + if record is None: # drain at end + return [] + kvs = [] + sorted_field_names = sorted(record.get_field_names()) + if not self.do_ascending_sort: + sorted_field_names.reverse() # xxx optimize + for field_name in sorted_field_names: + kvs.append((field_name, record.get(field_name))) + new_record = Record() + new_record.mput(kvs) + return [new_record] + class MeanKeeper: - def __init__(self): - self.sum = 0.0 - self.count = 0 - def put(x): - self.sum += x - self.count += 1 - def get_sum(): - return self.sum - def get_count(): - return self.count - def get_mean(): - # In IEEE-standard floating-point this would give NaN in the empty case. - # But Python throws an exception on divide by zero instead. - if self.count == 0: - return None - else: - return self.sum / self.count + def __init__(self): + self.sum = 0.0 + self.count = 0 + + def put(self, x): + self.sum += x + self.count += 1 + + def get_sum(self): + return self.sum + + def get_count(self): + return self.count + + def get_mean(self): + # In IEEE-standard floating-point this would give NaN in the empty case. + # But Python throws an exception on divide by zero instead. + if self.count == 0: + return None + else: + return self.sum / self.count + class MeanModulator: - def __init__(self, collate_field_names, key_field_names=[]): - self.collate_field_names = collate_field_names - self.key_field_names = key_field_names - # map from key-field values to (map from collate-field names to MSCKeeper objects). - self.collate_outputs = {} + def __init__(self, collate_field_names, key_field_names=[]): + self.collate_field_names = collate_field_names + self.key_field_names = key_field_names + # map from key-field values to (map from collate-field names to MSCKeeper objects). + self.collate_outputs = {} - def modulate(self, record): - if record != None: # drain at end + def modulate(self, record): + if record is not None: # drain at end + # xxx optimize + for value_field_name in self.collate_field_names: + if not record.has_key(value_field_name): + return [] + for key_field_name in self.key_field_names: + if not record.has_key(key_field_name): + return [] - # xxx optimize - for value_field_name in self.collate_field_names: - if not record.has_key(value_field_name): - return [] - for key_field_name in self.key_field_names: - if not record.has_key(key_field_name): - return [] + collate_field_values = [ + float(record.get(k)) for k in self.collate_field_names + ] + key_string = ",".join([record.get(k) for k in self.key_field_names]) - collate_field_values = [float(record.get(k)) for k in self.collate_field_names] - key_string = ",".join([record.get(k) for k in self.key_field_names]) + # xxx wip + return [] + else: + # xxx stub + output_record = Record() + output_record.put("foo", "bar") + return [output_record] - return [] - else: - # xxx stub - output_record = Record() - output_record.put("foo", "bar") - return [output_record] # ================================================================ class StreamModulator: - def __init__(self): - pass - def modulate(self, rreader, rmodulator, rwriter): - while True: - in_record = rreader.read() + def __init__(self): + pass - out_records = rmodulator.modulate(in_record) + def modulate(self, rreader, rmodulator, rwriter): + while True: + in_record = rreader.read() - for out_record in out_records: - rwriter.write(out_record) + out_records = rmodulator.modulate(in_record) + + for out_record in out_records: + rwriter.write(out_record) + + if in_record is None: + break - if in_record == None: - break # ================================================================ def set_up_namespace(): - namespace = MillerNamespace() - namespace.put("ORS", namespace.put("IRS", "\n")) - namespace.put("OFS", namespace.put("IFS", ",")) - namespace.put("OPS", namespace.put("IPS", "=")) + namespace = MillerNamespace() + namespace.put("ORS", namespace.put("IRS", "\n")) + namespace.put("OFS", namespace.put("IFS", ",")) + namespace.put("OPS", namespace.put("IPS", "=")) - # xxx CONVFMT + # xxx CONVFMT - namespace.put("FILENAME", None) - namespace.iput("NF", None) - namespace.iput("NR", 0) - namespace.iput("FNR", 0) + namespace.put("FILENAME", None) + namespace.iput("NF", None) + namespace.iput("NR", 0) + namespace.iput("FNR", 0) + + return namespace - return namespace # ================================================================ main() From 5146dd7f90af98fdd53d386bb8ce74115c6253e2 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 27 Aug 2023 21:46:24 -0400 Subject: [PATCH 049/456] New `contains` DSL function (#1374) * New `contains` DSL function * unit-test files, and docs --- docs/src/manpage.md | 56 +++++++++------- docs/src/manpage.txt | 56 +++++++++------- docs/src/reference-dsl-builtin-functions.md | 15 ++++- internal/pkg/bifs/strings.go | 14 ++++ .../pkg/dsl/cst/builtin_function_manager.go | 14 +++- man/manpage.txt | 56 +++++++++------- man/mlr.1 | 64 +++++++++++-------- test/cases/dsl-contains/0001/cmd | 1 + test/cases/dsl-contains/0001/experr | 0 test/cases/dsl-contains/0001/expout | 8 +++ test/cases/dsl-contains/0001/mlr | 14 ++++ 11 files changed, 198 insertions(+), 100 deletions(-) create mode 100644 test/cases/dsl-contains/0001/cmd create mode 100644 test/cases/dsl-contains/0001/experr create mode 100644 test/cases/dsl-contains/0001/expout create mode 100644 test/cases/dsl-contains/0001/mlr diff --git a/docs/src/manpage.md b/docs/src/manpage.md index acd782c82..53496d9c9 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -209,28 +209,28 @@ MILLER(1) MILLER(1) asserting_map asserting_nonempty_map asserting_not_array asserting_not_empty asserting_not_map asserting_not_null asserting_null asserting_numeric asserting_present asserting_string atan atan2 atanh bitcount boolean - capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh - count depth dhms2fsec dhms2sec distinct_count erf erfc every exec exp expm1 - flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys - get_values gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec - hms2sec hostname index int invqnorm is_absent is_array is_bool is_boolean - is_empty is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map - is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present - is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8 - leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 - log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max maxlen md5 - mean meaneb median mexp min minlen mmul mode msub nsec2gmt nsec2gmtdate - nsec2localdate nsec2localtime null_count os percentile percentiles pow qnorm - reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms - sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 - sha512 sin sinh skewness sort sort_collection splita splitax splitkv splitkvx - splitnv splitnvx sqrt ssub stddev strfntime strfntime_local strftime - strftime_local string strip strlen strpntime strpntime_local strptime - strptime_local sub substr substr0 substr1 sum sum2 sum3 sum4 sysntime system - systime systimeint tan tanh tolower toupper truncate typeof unflatten unformat - unformatx upntime uptime urand urand32 urandelement urandint urandrange - utf8_to_latin1 variance version ! != !=~ % & && * ** + - . .* .+ .- ./ / // < - << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + capitalize cbrt ceil clean_whitespace collapse_whitespace concat contains cos + cosh count depth dhms2fsec dhms2sec distinct_count erf erfc every exec exp + expm1 flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms + get_keys get_values gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt + hms2fsec hms2sec hostname index int invqnorm is_absent is_array is_bool + is_boolean is_empty is_empty_map is_error is_float is_int is_map is_nan + is_nonempty_map is_not_array is_not_empty is_not_map is_not_null is_null + is_numeric is_present is_string joink joinkv joinv json_parse json_stringify + kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec + localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect + mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub + nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile + percentiles pow qnorm reduce regextract regextract_or_else rightpad round + roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime + select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita + splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime + strfntime_local strftime strftime_local string strip strlen strpntime + strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 + sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate + typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement + urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - . + .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -2311,6 +2311,14 @@ MILLER(1) MILLER(1) concat([1,2],3) is [1,2,3] concat([1,2],[3]) is [1,2,3] + 1mcontains0m + (class=string #args=2) Returns true if the first argument contains the second as a substring. This is like saying `index(arg1, arg2) >= 0`but with less keystroking. + Examples: + contains("abcde", "e") gives true + contains("abcde", "x") gives false + contains(12345, 34) gives true + contains("fort", "") gives true + 1mcos0m (class=math #args=1) Trigonometric cosine. @@ -2461,7 +2469,7 @@ MILLER(1) MILLER(1) (class=string #args=2) Returns the index (1-based) of the second argument within the first. Returns -1 if the second argument isn't a substring of the first. Stringifies non-string inputs. Uses UTF-8 encoding to count characters, not bytes. Examples: index("abcde", "e") gives 5 - index("abcde", "x") gives 01 + index("abcde", "x") gives -1 index(12345, 34) gives 3 index("fort", "t") gives 5 @@ -3634,5 +3642,5 @@ MILLER(1) MILLER(1) - 2023-08-27 MILLER(1) + 2023-08-28 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index e2ed9df20..0141cac60 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -188,28 +188,28 @@ MILLER(1) MILLER(1) asserting_map asserting_nonempty_map asserting_not_array asserting_not_empty asserting_not_map asserting_not_null asserting_null asserting_numeric asserting_present asserting_string atan atan2 atanh bitcount boolean - capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh - count depth dhms2fsec dhms2sec distinct_count erf erfc every exec exp expm1 - flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys - get_values gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec - hms2sec hostname index int invqnorm is_absent is_array is_bool is_boolean - is_empty is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map - is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present - is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8 - leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 - log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max maxlen md5 - mean meaneb median mexp min minlen mmul mode msub nsec2gmt nsec2gmtdate - nsec2localdate nsec2localtime null_count os percentile percentiles pow qnorm - reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms - sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 - sha512 sin sinh skewness sort sort_collection splita splitax splitkv splitkvx - splitnv splitnvx sqrt ssub stddev strfntime strfntime_local strftime - strftime_local string strip strlen strpntime strpntime_local strptime - strptime_local sub substr substr0 substr1 sum sum2 sum3 sum4 sysntime system - systime systimeint tan tanh tolower toupper truncate typeof unflatten unformat - unformatx upntime uptime urand urand32 urandelement urandint urandrange - utf8_to_latin1 variance version ! != !=~ % & && * ** + - . .* .+ .- ./ / // < - << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + capitalize cbrt ceil clean_whitespace collapse_whitespace concat contains cos + cosh count depth dhms2fsec dhms2sec distinct_count erf erfc every exec exp + expm1 flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms + get_keys get_values gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt + hms2fsec hms2sec hostname index int invqnorm is_absent is_array is_bool + is_boolean is_empty is_empty_map is_error is_float is_int is_map is_nan + is_nonempty_map is_not_array is_not_empty is_not_map is_not_null is_null + is_numeric is_present is_string joink joinkv joinv json_parse json_stringify + kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec + localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect + mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub + nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile + percentiles pow qnorm reduce regextract regextract_or_else rightpad round + roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime + select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita + splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime + strfntime_local strftime strftime_local string strip strlen strpntime + strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 + sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate + typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement + urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - . + .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -2290,6 +2290,14 @@ MILLER(1) MILLER(1) concat([1,2],3) is [1,2,3] concat([1,2],[3]) is [1,2,3] + 1mcontains0m + (class=string #args=2) Returns true if the first argument contains the second as a substring. This is like saying `index(arg1, arg2) >= 0`but with less keystroking. + Examples: + contains("abcde", "e") gives true + contains("abcde", "x") gives false + contains(12345, 34) gives true + contains("fort", "") gives true + 1mcos0m (class=math #args=1) Trigonometric cosine. @@ -2440,7 +2448,7 @@ MILLER(1) MILLER(1) (class=string #args=2) Returns the index (1-based) of the second argument within the first. Returns -1 if the second argument isn't a substring of the first. Stringifies non-string inputs. Uses UTF-8 encoding to count characters, not bytes. Examples: index("abcde", "e") gives 5 - index("abcde", "x") gives 01 + index("abcde", "x") gives -1 index(12345, 34) gives 3 index("fort", "t") gives 5 @@ -3613,4 +3621,4 @@ MILLER(1) MILLER(1) - 2023-08-27 MILLER(1) + 2023-08-28 MILLER(1) diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 135d27f14..3655493c2 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -75,7 +75,7 @@ is 2. Unary operators such as `!` and `~` show argument-count of 1; the ternary * [**Higher-order-functions functions**](#higher-order-functions-functions): [any](#any), [apply](#apply), [every](#every), [fold](#fold), [reduce](#reduce), [select](#select), [sort](#sort). * [**Math functions**](#math-functions): [abs](#abs), [acos](#acos), [acosh](#acosh), [asin](#asin), [asinh](#asinh), [atan](#atan), [atan2](#atan2), [atanh](#atanh), [cbrt](#cbrt), [ceil](#ceil), [cos](#cos), [cosh](#cosh), [erf](#erf), [erfc](#erfc), [exp](#exp), [expm1](#expm1), [floor](#floor), [invqnorm](#invqnorm), [log](#log), [log10](#log10), [log1p](#log1p), [logifit](#logifit), [max](#max), [min](#min), [qnorm](#qnorm), [round](#round), [roundm](#roundm), [sgn](#sgn), [sin](#sin), [sinh](#sinh), [sqrt](#sqrt), [tan](#tan), [tanh](#tanh), [urand](#urand), [urand32](#urand32), [urandelement](#urandelement), [urandint](#urandint), [urandrange](#urandrange). * [**Stats functions**](#stats-functions): [antimode](#antimode), [count](#count), [distinct_count](#distinct_count), [kurtosis](#kurtosis), [maxlen](#maxlen), [mean](#mean), [meaneb](#meaneb), [median](#median), [minlen](#minlen), [mode](#mode), [null_count](#null_count), [percentile](#percentile), [percentiles](#percentiles), [skewness](#skewness), [sort_collection](#sort_collection), [stddev](#stddev), [sum](#sum), [sum2](#sum2), [sum3](#sum3), [sum4](#sum4), [variance](#variance). -* [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). +* [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [contains](#contains), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). * [**System functions**](#system-functions): [exec](#exec), [hostname](#hostname), [os](#os), [system](#system), [version](#version). * [**Time functions**](#time-functions): [dhms2fsec](#dhms2fsec), [dhms2sec](#dhms2sec), [fsec2dhms](#fsec2dhms), [fsec2hms](#fsec2hms), [gmt2localtime](#gmt2localtime), [gmt2nsec](#gmt2nsec), [gmt2sec](#gmt2sec), [hms2fsec](#hms2fsec), [hms2sec](#hms2sec), [localtime2gmt](#localtime2gmt), [localtime2nsec](#localtime2nsec), [localtime2sec](#localtime2sec), [nsec2gmt](#nsec2gmt), [nsec2gmtdate](#nsec2gmtdate), [nsec2localdate](#nsec2localdate), [nsec2localtime](#nsec2localtime), [sec2dhms](#sec2dhms), [sec2gmt](#sec2gmt), [sec2gmtdate](#sec2gmtdate), [sec2hms](#sec2hms), [sec2localdate](#sec2localdate), [sec2localtime](#sec2localtime), [strfntime](#strfntime), [strfntime_local](#strfntime_local), [strftime](#strftime), [strftime_local](#strftime_local), [strpntime](#strpntime), [strpntime_local](#strpntime_local), [strptime](#strptime), [strptime_local](#strptime_local), [sysntime](#sysntime), [systime](#systime), [systimeint](#systimeint), [upntime](#upntime), [uptime](#uptime). * [**Typing functions**](#typing-functions): [asserting_absent](#asserting_absent), [asserting_array](#asserting_array), [asserting_bool](#asserting_bool), [asserting_boolean](#asserting_boolean), [asserting_empty](#asserting_empty), [asserting_empty_map](#asserting_empty_map), [asserting_error](#asserting_error), [asserting_float](#asserting_float), [asserting_int](#asserting_int), [asserting_map](#asserting_map), [asserting_nonempty_map](#asserting_nonempty_map), [asserting_not_array](#asserting_not_array), [asserting_not_empty](#asserting_not_empty), [asserting_not_map](#asserting_not_map), [asserting_not_null](#asserting_not_null), [asserting_null](#asserting_null), [asserting_numeric](#asserting_numeric), [asserting_present](#asserting_present), [asserting_string](#asserting_string), [is_absent](#is_absent), [is_array](#is_array), [is_bool](#is_bool), [is_boolean](#is_boolean), [is_empty](#is_empty), [is_empty_map](#is_empty_map), [is_error](#is_error), [is_float](#is_float), [is_int](#is_int), [is_map](#is_map), [is_nan](#is_nan), [is_nonempty_map](#is_nonempty_map), [is_not_array](#is_not_array), [is_not_empty](#is_not_empty), [is_not_map](#is_not_map), [is_not_null](#is_not_null), [is_null](#is_null), [is_numeric](#is_numeric), [is_present](#is_present), [is_string](#is_string), [typeof](#typeof). @@ -1219,6 +1219,17 @@ collapse_whitespace (class=string #args=1) Strip repeated whitespace from strin +### contains +
+contains  (class=string #args=2) Returns true if the first argument contains the second as a substring. This is like saying `index(arg1, arg2) >= 0`but with less keystroking.
+Examples:
+contains("abcde", "e") gives true
+contains("abcde", "x") gives false
+contains(12345, 34) gives true
+contains("forรชt", "รช") gives true
+
+ + ### format
 format  (class=string #args=variadic) Using first argument as format string, interpolate remaining arguments in place of each "{}" in the format string. Too-few arguments are treated as the empty string; too-many arguments are discarded.
@@ -1254,7 +1265,7 @@ gsub("prefix4529:suffix8567", "(....ix)([0-9]+)", "[\1 : \2]") gives "[prefix :
 index  (class=string #args=2) Returns the index (1-based) of the second argument within the first. Returns -1 if the second argument isn't a substring of the first. Stringifies non-string inputs. Uses UTF-8 encoding to count characters, not bytes.
 Examples:
 index("abcde", "e") gives 5
-index("abcde", "x") gives 01
+index("abcde", "x") gives -1
 index(12345, 34) gives 3
 index("forรชt", "t") gives 5
 
diff --git a/internal/pkg/bifs/strings.go b/internal/pkg/bifs/strings.go index 4cdcdce93..6e1c76511 100644 --- a/internal/pkg/bifs/strings.go +++ b/internal/pkg/bifs/strings.go @@ -149,6 +149,20 @@ func BIF_index(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromInt(lib.UTF8Strlen(sinput1[:iindex]) + 1) } +// ================================================================ +// contains(string, substring) returns true if string contains substring, else false. + +func BIF_contains(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + if input1.IsAbsent() { + return mlrval.ABSENT + } + if input1.IsError() { + return mlrval.ERROR + } + + return mlrval.FromBool(strings.Contains(input1.String(), input2.String())) +} + // ================================================================ func BIF_truncate(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if input1.IsErrorOrAbsent() { diff --git a/internal/pkg/dsl/cst/builtin_function_manager.go b/internal/pkg/dsl/cst/builtin_function_manager.go index 8887c786e..6d4dc6233 100644 --- a/internal/pkg/dsl/cst/builtin_function_manager.go +++ b/internal/pkg/dsl/cst/builtin_function_manager.go @@ -546,11 +546,23 @@ Arrays are new in Miller 6; the substr function is older.`, binaryFunc: bifs.BIF_index, examples: []string{ `index("abcde", "e") gives 5`, - `index("abcde", "x") gives 01`, + `index("abcde", "x") gives -1`, `index(12345, 34) gives 3`, `index("forรชt", "t") gives 5`, }, }, + { + name: "contains", + class: FUNC_CLASS_STRING, + help: `Returns true if the first argument contains the second as a substring. This is like saying ` + "`index(arg1, arg2) >= 0`" + `but with less keystroking.`, + binaryFunc: bifs.BIF_contains, + examples: []string{ + `contains("abcde", "e") gives true`, + `contains("abcde", "x") gives false`, + `contains(12345, 34) gives true`, + `contains("forรชt", "รช") gives true`, + }, + }, { name: "tolower", diff --git a/man/manpage.txt b/man/manpage.txt index e2ed9df20..0141cac60 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -188,28 +188,28 @@ MILLER(1) MILLER(1) asserting_map asserting_nonempty_map asserting_not_array asserting_not_empty asserting_not_map asserting_not_null asserting_null asserting_numeric asserting_present asserting_string atan atan2 atanh bitcount boolean - capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh - count depth dhms2fsec dhms2sec distinct_count erf erfc every exec exp expm1 - flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys - get_values gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec - hms2sec hostname index int invqnorm is_absent is_array is_bool is_boolean - is_empty is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map - is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present - is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8 - leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 - log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max maxlen md5 - mean meaneb median mexp min minlen mmul mode msub nsec2gmt nsec2gmtdate - nsec2localdate nsec2localtime null_count os percentile percentiles pow qnorm - reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms - sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 - sha512 sin sinh skewness sort sort_collection splita splitax splitkv splitkvx - splitnv splitnvx sqrt ssub stddev strfntime strfntime_local strftime - strftime_local string strip strlen strpntime strpntime_local strptime - strptime_local sub substr substr0 substr1 sum sum2 sum3 sum4 sysntime system - systime systimeint tan tanh tolower toupper truncate typeof unflatten unformat - unformatx upntime uptime urand urand32 urandelement urandint urandrange - utf8_to_latin1 variance version ! != !=~ % & && * ** + - . .* .+ .- ./ / // < - << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + capitalize cbrt ceil clean_whitespace collapse_whitespace concat contains cos + cosh count depth dhms2fsec dhms2sec distinct_count erf erfc every exec exp + expm1 flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms + get_keys get_values gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt + hms2fsec hms2sec hostname index int invqnorm is_absent is_array is_bool + is_boolean is_empty is_empty_map is_error is_float is_int is_map is_nan + is_nonempty_map is_not_array is_not_empty is_not_map is_not_null is_null + is_numeric is_present is_string joink joinkv joinv json_parse json_stringify + kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec + localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect + mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub + nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile + percentiles pow qnorm reduce regextract regextract_or_else rightpad round + roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime + select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita + splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime + strfntime_local strftime strftime_local string strip strlen strpntime + strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 + sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate + typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement + urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - . + .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -2290,6 +2290,14 @@ MILLER(1) MILLER(1) concat([1,2],3) is [1,2,3] concat([1,2],[3]) is [1,2,3] + 1mcontains0m + (class=string #args=2) Returns true if the first argument contains the second as a substring. This is like saying `index(arg1, arg2) >= 0`but with less keystroking. + Examples: + contains("abcde", "e") gives true + contains("abcde", "x") gives false + contains(12345, 34) gives true + contains("fort", "") gives true + 1mcos0m (class=math #args=1) Trigonometric cosine. @@ -2440,7 +2448,7 @@ MILLER(1) MILLER(1) (class=string #args=2) Returns the index (1-based) of the second argument within the first. Returns -1 if the second argument isn't a substring of the first. Stringifies non-string inputs. Uses UTF-8 encoding to count characters, not bytes. Examples: index("abcde", "e") gives 5 - index("abcde", "x") gives 01 + index("abcde", "x") gives -1 index(12345, 34) gives 3 index("fort", "t") gives 5 @@ -3613,4 +3621,4 @@ MILLER(1) MILLER(1) - 2023-08-27 MILLER(1) + 2023-08-28 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index 352c6f0c6..e0f0965b6 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2023-08-27 +.\" Date: 2023-08-28 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2023-08-27" "\ \&" "\ \&" +.TH "MILLER" "1" "2023-08-28" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -235,28 +235,28 @@ asserting_empty_map asserting_error asserting_float asserting_int asserting_map asserting_nonempty_map asserting_not_array asserting_not_empty asserting_not_map asserting_not_null asserting_null asserting_numeric asserting_present asserting_string atan atan2 atanh bitcount boolean -capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh -count depth dhms2fsec dhms2sec distinct_count erf erfc every exec exp expm1 -flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys -get_values gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt hms2fsec -hms2sec hostname index int invqnorm is_absent is_array is_bool is_boolean -is_empty is_empty_map is_error is_float is_int is_map is_nan is_nonempty_map -is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present -is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8 -leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 -log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max maxlen md5 -mean meaneb median mexp min minlen mmul mode msub nsec2gmt nsec2gmtdate -nsec2localdate nsec2localtime null_count os percentile percentiles pow qnorm -reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms -sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 -sha512 sin sinh skewness sort sort_collection splita splitax splitkv splitkvx -splitnv splitnvx sqrt ssub stddev strfntime strfntime_local strftime -strftime_local string strip strlen strpntime strpntime_local strptime -strptime_local sub substr substr0 substr1 sum sum2 sum3 sum4 sysntime system -systime systimeint tan tanh tolower toupper truncate typeof unflatten unformat -unformatx upntime uptime urand urand32 urandelement urandint urandrange -utf8_to_latin1 variance version ! != !=~ % & && * ** + - . .* .+ .- ./ / // < -<< <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ +capitalize cbrt ceil clean_whitespace collapse_whitespace concat contains cos +cosh count depth dhms2fsec dhms2sec distinct_count erf erfc every exec exp +expm1 flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms +get_keys get_values gmt2localtime gmt2nsec gmt2sec gssub gsub haskey hexfmt +hms2fsec hms2sec hostname index int invqnorm is_absent is_array is_bool +is_boolean is_empty is_empty_map is_error is_float is_int is_map is_nan +is_nonempty_map is_not_array is_not_empty is_not_map is_not_null is_null +is_numeric is_present is_string joink joinkv joinv json_parse json_stringify +kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec +localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect +mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub +nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile +percentiles pow qnorm reduce regextract regextract_or_else rightpad round +roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime +select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita +splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime +strfntime_local strftime strftime_local string strip strlen strpntime +strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 +sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate +typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement +urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - . +\&.* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ .fi .if n \{\ .RE @@ -3113,6 +3113,20 @@ concat([1,2],[3]) is [1,2,3] .fi .if n \{\ .RE +.SS "contains" +.if n \{\ +.RS 0 +.\} +.nf + (class=string #args=2) Returns true if the first argument contains the second as a substring. This is like saying `index(arg1, arg2) >= 0`but with less keystroking. +Examples: +contains("abcde", "e") gives true +contains("abcde", "x") gives false +contains(12345, 34) gives true +contains("forรชt", "รช") gives true +.fi +.if n \{\ +.RE .SS "cos" .if n \{\ .RS 0 @@ -3471,7 +3485,7 @@ gsub("prefix4529:suffix8567", "(....ix)([0-9]+)", "[\e1 : \e2]") gives "[prefix (class=string #args=2) Returns the index (1-based) of the second argument within the first. Returns -1 if the second argument isn't a substring of the first. Stringifies non-string inputs. Uses UTF-8 encoding to count characters, not bytes. Examples: index("abcde", "e") gives 5 -index("abcde", "x") gives 01 +index("abcde", "x") gives -1 index(12345, 34) gives 3 index("forรชt", "t") gives 5 .fi diff --git a/test/cases/dsl-contains/0001/cmd b/test/cases/dsl-contains/0001/cmd new file mode 100644 index 000000000..df47a8b50 --- /dev/null +++ b/test/cases/dsl-contains/0001/cmd @@ -0,0 +1 @@ +mlr -n put -q -f ${CASEDIR}/mlr diff --git a/test/cases/dsl-contains/0001/experr b/test/cases/dsl-contains/0001/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-contains/0001/expout b/test/cases/dsl-contains/0001/expout new file mode 100644 index 000000000..8ded02ad7 --- /dev/null +++ b/test/cases/dsl-contains/0001/expout @@ -0,0 +1,8 @@ +abc abc true +abc true + true + abc false +abcde abc true +123 3 true +123 34 false +123 23 true diff --git a/test/cases/dsl-contains/0001/mlr b/test/cases/dsl-contains/0001/mlr new file mode 100644 index 000000000..ea83c5320 --- /dev/null +++ b/test/cases/dsl-contains/0001/mlr @@ -0,0 +1,14 @@ +end { + for (e in [ + ["abc", "abc"], + ["abc", ""], + ["", ""], + ["", "abc"], + ["abcde", "abc"], + ["123", 3], + [123, 34], + [123, 23], + ]) { + print e[1], e[2], contains(e[1], e[2]) + } +} From 4c26b479f057ecc3c79ae08b4eaf6941894a219e Mon Sep 17 00:00:00 2001 From: John Kerl Date: Wed, 30 Aug 2023 07:32:04 -0400 Subject: [PATCH 050/456] typofix --- internal/pkg/dsl/cst/builtin_function_manager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/pkg/dsl/cst/builtin_function_manager.go b/internal/pkg/dsl/cst/builtin_function_manager.go index 6d4dc6233..e9154dc60 100644 --- a/internal/pkg/dsl/cst/builtin_function_manager.go +++ b/internal/pkg/dsl/cst/builtin_function_manager.go @@ -1021,7 +1021,7 @@ is normally distributed.`, { name: "antimode", class: FUNC_CLASS_STATS, - help: `Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.`, + help: `Returns the least frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.`, unaryFunc: bifs.BIF_antimode, examples: []string{ `antimode([3,3,4,4,4]) is 3`, From 2fd353c6be893c2ffea7146ca7ca61d81a9f9e33 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Wed, 30 Aug 2023 09:00:24 -0400 Subject: [PATCH 051/456] docmods for typofix --- docs/src/data-diving-examples.md | 46 ++++++++++----------- docs/src/manpage.md | 4 +- docs/src/manpage.txt | 4 +- docs/src/reference-dsl-builtin-functions.md | 2 +- docs/src/reference-verbs.md | 38 ++++++++--------- docs/src/two-pass-algorithms.md | 4 +- man/manpage.txt | 4 +- man/mlr.1 | 6 +-- 8 files changed, 54 insertions(+), 54 deletions(-) diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 100716ec2..39738f193 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -160,11 +160,11 @@ CITRUS COUNTY 1332.9 79974.9 483785.1 stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
-tiv_2011_tiv_2012_corr  0.9730497632351701
-tiv_2011_tiv_2012_ols_m 0.9835583980337732
-tiv_2011_tiv_2012_ols_b 433854.6428968301
+tiv_2011_tiv_2012_corr  0.9730497632351692
+tiv_2011_tiv_2012_ols_m 0.9835583980337723
+tiv_2011_tiv_2012_ols_b 433854.6428968317
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320204
+tiv_2011_tiv_2012_r2    0.9468258417320189
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199852926
+0.1334180491027861 -0.011319841199866178
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887242  -0.018565536587084836
-orange   square   0.17685855992752933   -0.07104431573805543
- green   circle   0.05764419437577257   0.011795729888018455
-   red   square    0.0557447712489348 -0.0006801456507506415
-yellow triangle    0.0445727377196281   0.024604310103079844
-yellow   square    0.0437917292729612  -0.044621972016306265
-purple   circle   0.03587354936895115    0.13411339541407613
-  blue   square   0.03241153095761152   -0.05350764811965621
-  blue triangle  0.015356427073158612 -0.0006089997461408209
-orange   circle  0.010518953877704181    -0.1627939732927932
-   red triangle   0.00809782571528054    0.01248662135795501
-purple triangle  0.005155190909099739   -0.04505790925621933
-purple   square  -0.02568027696337717   0.057694296479293694
- green   square -0.025776073450284875 -0.0032651732520739014
-orange triangle -0.030456661186085584   -0.13186999819263814
-yellow   circle  -0.06477331572781515     0.0736944981970553
-  blue   circle   -0.1023476190192966  -0.030528539069839333
- green triangle  -0.10901825107358747   -0.04848782060162855
+   red   circle    0.9807984401887236   -0.01856553658708754
+orange   square   0.17685855992752927   -0.07104431573806054
+ green   circle   0.05764419437577255    0.01179572988801509
+   red   square   0.05574477124893523 -0.0006801456507510942
+yellow triangle   0.04457273771962798   0.024604310103081825
+yellow   square   0.04379172927296089   -0.04462197201631237
+purple   circle   0.03587354936895086     0.1341133954140899
+  blue   square   0.03241153095761164  -0.053507648119643196
+  blue triangle  0.015356427073158766 -0.0006089997461435399
+orange   circle  0.010518953877704048   -0.16279397329279383
+   red triangle   0.00809782571528034   0.012486621357942596
+purple triangle  0.005155190909099334  -0.045057909256220656
+purple   square -0.025680276963377404    0.05769429647930396
+ green   square   -0.0257760734502851  -0.003265173252087127
+orange triangle -0.030456661186085785    -0.1318699981926352
+yellow   circle  -0.06477331572781474    0.07369449819706045
+  blue   circle  -0.10234761901929677  -0.030528539069837757
+ green triangle  -0.10901825107358765   -0.04848782060162929
 
diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 53496d9c9..a19f46f14 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -2188,7 +2188,7 @@ MILLER(1) MILLER(1) (class=math #args=1) Inverse hyperbolic cosine. 1mantimode0m - (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins. + (class=stats #args=1) Returns the least frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins. Examples: antimode([3,3,4,4,4]) is 3 antimode([3,3,4,4]) is 3 @@ -3642,5 +3642,5 @@ MILLER(1) MILLER(1) - 2023-08-28 MILLER(1) + 2023-08-30 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 0141cac60..a91bfe4a2 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -2167,7 +2167,7 @@ MILLER(1) MILLER(1) (class=math #args=1) Inverse hyperbolic cosine. 1mantimode0m - (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins. + (class=stats #args=1) Returns the least frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins. Examples: antimode([3,3,4,4,4]) is 3 antimode([3,3,4,4]) is 3 @@ -3621,4 +3621,4 @@ MILLER(1) MILLER(1) - 2023-08-28 MILLER(1) + 2023-08-30 MILLER(1) diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 3655493c2..8c3b49640 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -978,7 +978,7 @@ urandrange (class=math #args=2) Floating-point numbers uniformly distributed on ### antimode
-antimode  (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
+antimode  (class=stats #args=1) Returns the least frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
 Examples:
 antimode([3,3,4,4,4]) is 3
 antimode([3,3,4,4]) is 3
diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md
index c94e184c5..2b7e9501f 100644
--- a/docs/src/reference-verbs.md
+++ b/docs/src/reference-verbs.md
@@ -3406,14 +3406,14 @@ fields, optionally categorized by one or more fields.
   data/medium
 
-x_y_cov    0.00004257482082749404
-x_y_corr   0.0005042001844473328
-y_y_cov    0.08461122467974005
+x_y_cov    0.000042574820827444476
+x_y_corr   0.0005042001844467462
+y_y_cov    0.08461122467974003
 y_y_corr   1
-x2_xy_cov  0.041883822817793716
-x2_xy_corr 0.6301743420379936
-x2_y2_cov  -0.0003095372596253918
-x2_y2_corr -0.003424908876111875
+x2_xy_cov  0.04188382281779374
+x2_xy_corr 0.630174342037994
+x2_y2_cov  -0.00030953725962542085
+x2_y2_corr -0.0034249088761121966
 
@@ -3422,12 +3422,12 @@ x2_y2_corr -0.003424908876111875
   data/medium
 
-a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
-eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
-wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
-zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
-hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
+a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
+eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
+wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
+zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
+hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
 
Here's an example simple line-fit. The `x` and `y` @@ -3513,11 +3513,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.030097447953594 -upsec_count_pca_b 979.9883413064917 +upsec_count_pca_m -39.03009744795354 +upsec_count_pca_b 979.9883413064914 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.108529196302943 +donesec 25.10852919630297 ## step @@ -3794,9 +3794,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 -var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 -skewness - - 0 -0.0006899591185517494 -0.01784976012013298 +stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 +var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 +skewness - - 0 -0.0006899591185521965 -0.017849760120133784 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index e475aebf3..146f3a81e 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144457 -x_stddev 0.29029251511440074 +x_var 0.08426974433144456 +x_stddev 0.2902925151144007
diff --git a/man/manpage.txt b/man/manpage.txt
index 0141cac60..a91bfe4a2 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -2167,7 +2167,7 @@ MILLER(1)                                                            MILLER(1)
         (class=math #args=1) Inverse hyperbolic cosine.
 
    1mantimode0m
-        (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
+        (class=stats #args=1) Returns the least frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
        Examples:
        antimode([3,3,4,4,4]) is 3
        antimode([3,3,4,4]) is 3
@@ -3621,4 +3621,4 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-08-28                         MILLER(1)
+                                  2023-08-30                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index e0f0965b6..95cbcaf7c 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2023-08-28
+.\"      Date: 2023-08-30
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2023-08-28" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-08-30" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -2772,7 +2772,7 @@ being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and
 .RS 0
 .\}
 .nf
- (class=stats #args=1) Returns the most frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
+ (class=stats #args=1) Returns the least frequently occurring value in an array or map. Returns error for non-array/non-map types. Values are stringified for comparison, so for example string "1" and integer 1 are not distinct. In cases of ties, first-found wins.
 Examples:
 antimode([3,3,4,4,4]) is 3
 antimode([3,3,4,4]) is 3

From 879f272f7940fa996c1a788b6fccce3e79434476 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Wed, 30 Aug 2023 11:13:35 -0400
Subject: [PATCH 052/456] Typofix in `uif`/`uof` percentiles (#1375)

* typofix in uif/uof percentiles

* fix regression-test data
---
 internal/pkg/transformers/utils/percentile_keeper.go | 4 ++--
 test/cases/verb-summary/all/expout                   | 6 +++---
 test/cases/verb-summary/exclude/expout               | 6 +++---
 test/cases/verb-summary/transpose-all/expout         | 4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/internal/pkg/transformers/utils/percentile_keeper.go b/internal/pkg/transformers/utils/percentile_keeper.go
index c9f2453bd..d496abdd7 100644
--- a/internal/pkg/transformers/utils/percentile_keeper.go
+++ b/internal/pkg/transformers/utils/percentile_keeper.go
@@ -131,7 +131,7 @@ func (keeper *PercentileKeeper) EmitNamed(name string) *mlrval.Mlrval {
 		}
 
 	} else if name == "uif" {
-		p75 := keeper.EmitNonInterpolated(25.0)
+		p75 := keeper.EmitNonInterpolated(75.0)
 		iqr := keeper.EmitNamed("iqr")
 		if p75.IsNumeric() && iqr.IsNumeric() {
 			return bifs.BIF_plus_binary(p75, bifs.BIF_times(fenceInnerK, iqr))
@@ -140,7 +140,7 @@ func (keeper *PercentileKeeper) EmitNamed(name string) *mlrval.Mlrval {
 		}
 
 	} else if name == "uof" {
-		p75 := keeper.EmitNonInterpolated(25.0)
+		p75 := keeper.EmitNonInterpolated(75.0)
 		iqr := keeper.EmitNamed("iqr")
 		if p75.IsNumeric() && iqr.IsNumeric() {
 			return bifs.BIF_plus_binary(p75, bifs.BIF_times(fenceOuterK, iqr))
diff --git a/test/cases/verb-summary/all/expout b/test/cases/verb-summary/all/expout
index 06dc491d6..393eea855 100644
--- a/test/cases/verb-summary/all/expout
+++ b/test/cases/verb-summary/all/expout
@@ -1,6 +1,6 @@
 field_name field_type count null_count distinct_count mode   sum    mean   stddev var    skewness minlen maxlen min    p25    median p75    max    iqr    lof      lif     uif     uof
 a          string     10    0          5              eks    0      -      -      -      -        3      3      eks    eks    pan    wye    zee    -      -        -       -       -
 b          string     10    0          3              wye    0      -      -      -      -        3      3      pan    pan    wye    wye    zee    -      -        -       -       -
-i          int        10    0          10             1      55     5.5000 3.0277 9.1667 0.0000   1      2      1      3      6      8      10     5      -12.0000 -4.5000 10.5000 18.0000
-x          float      10    0          10             0.3468 4.5363 0.4536 0.2155 0.0465 -0.5461  18     19     0.0314 0.3468 0.5271 0.5986 0.7587 0.2518 -0.4085  -0.0309 0.7244  1.1021
-y          float      10    0          10             0.7268 5.9445 0.5945 0.3066 0.0940 -0.1936  17     19     0.1342 0.3383 0.7268 0.8636 0.9762 0.5253 -1.2376  -0.4496 1.1263  1.9142
+i          int        10    0          10             1      55     5.5000 3.0277 9.1667 0.0000   1      2      1      3      6      8      10     5      -12.0000 -4.5000 15.5000 23.0000
+x          float      10    0          10             0.3468 4.5363 0.4536 0.2155 0.0465 -0.5461  18     19     0.0314 0.3468 0.5271 0.5986 0.7587 0.2518 -0.4085  -0.0309 0.9762  1.3538
+y          float      10    0          10             0.7268 5.9445 0.5945 0.3066 0.0940 -0.1936  17     19     0.1342 0.3383 0.7268 0.8636 0.9762 0.5253 -1.2376  -0.4496 1.6516  2.4395
diff --git a/test/cases/verb-summary/exclude/expout b/test/cases/verb-summary/exclude/expout
index c36804f62..574eeea00 100644
--- a/test/cases/verb-summary/exclude/expout
+++ b/test/cases/verb-summary/exclude/expout
@@ -1,6 +1,6 @@
 field_name field_type count null_count distinct_count sum    stddev var    skewness minlen maxlen min    p25    p75    max    iqr    lof      lif     uif     uof
 a          string     10    0          5              0      -      -      -        3      3      eks    eks    wye    zee    -      -        -       -       -
 b          string     10    0          3              0      -      -      -        3      3      pan    pan    wye    zee    -      -        -       -       -
-i          int        10    0          10             55     3.0277 9.1667 0.0000   1      2      1      3      8      10     5      -12.0000 -4.5000 10.5000 18.0000
-x          float      10    0          10             4.5363 0.2155 0.0465 -0.5461  18     19     0.0314 0.3468 0.5986 0.7587 0.2518 -0.4085  -0.0309 0.7244  1.1021
-y          float      10    0          10             5.9445 0.3066 0.0940 -0.1936  17     19     0.1342 0.3383 0.8636 0.9762 0.5253 -1.2376  -0.4496 1.1263  1.9142
+i          int        10    0          10             55     3.0277 9.1667 0.0000   1      2      1      3      8      10     5      -12.0000 -4.5000 15.5000 23.0000
+x          float      10    0          10             4.5363 0.2155 0.0465 -0.5461  18     19     0.0314 0.3468 0.5986 0.7587 0.2518 -0.4085  -0.0309 0.9762  1.3538
+y          float      10    0          10             5.9445 0.3066 0.0940 -0.1936  17     19     0.1342 0.3383 0.8636 0.9762 0.5253 -1.2376  -0.4496 1.6516  2.4395
diff --git a/test/cases/verb-summary/transpose-all/expout b/test/cases/verb-summary/transpose-all/expout
index 8b948a500..7b6d0699a 100644
--- a/test/cases/verb-summary/transpose-all/expout
+++ b/test/cases/verb-summary/transpose-all/expout
@@ -19,5 +19,5 @@ max            zee    zee    10       0.7587  0.9762
 iqr            -      -      5        0.2518  0.5253
 lof            -      -      -12.0000 -0.4085 -1.2376
 lif            -      -      -4.5000  -0.0309 -0.4496
-uif            -      -      10.5000  0.7244  1.1263
-uof            -      -      18.0000  1.1021  1.9142
+uif            -      -      15.5000  0.9762  1.6516
+uof            -      -      23.0000  1.3538  2.4395

From 0493a0debd7a54fe949a88b7a9c9178bb58deb61 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Wed, 30 Aug 2023 19:39:22 -0400
Subject: [PATCH 053/456] Fatal-on-data-error `mlr -x` option (#1373)

* Fatal-on-data-error `mlr -x` option [WIP]

* arithmetic.go error-reason propagation

* more

* more

* more

* renames

* doc page

* namefix

* fix broken test

* make dev
---
 docs/src/data-error.csv                       |   6 +
 docs/src/manpage.md                           |   3 +
 docs/src/manpage.txt                          |   3 +
 docs/src/record-heterogeneity.md              |   2 +
 docs/src/reference-dsl-errors.md              |  51 ++-
 docs/src/reference-dsl-errors.md.in           |  26 +-
 docs/src/reference-main-flag-list.md          |   1 +
 internal/pkg/bifs/arithmetic.go               | 418 +++++++++++-------
 internal/pkg/bifs/base.go                     |  33 +-
 internal/pkg/bifs/bits.go                     | 184 ++++----
 internal/pkg/bifs/booleans.go                 |   8 +-
 internal/pkg/bifs/cmp.go                      | 182 ++++----
 internal/pkg/bifs/collections.go              | 116 ++---
 internal/pkg/bifs/datetime.go                 | 156 ++++---
 internal/pkg/bifs/hashing.go                  |   8 +-
 internal/pkg/bifs/mathlib.go                  | 202 +++++----
 internal/pkg/bifs/random.go                   |  12 +-
 internal/pkg/bifs/regex.go                    |  34 +-
 internal/pkg/bifs/relative_time.go            |  54 ++-
 internal/pkg/bifs/stats.go                    | 106 +++--
 internal/pkg/bifs/stats_test.go               |   2 +-
 internal/pkg/bifs/strings.go                  | 115 +++--
 internal/pkg/bifs/system.go                   |  10 +-
 internal/pkg/bifs/types.go                    |  86 +++-
 internal/pkg/cli/option_parse.go              |   9 +
 internal/pkg/cli/option_types.go              |   3 +
 internal/pkg/climain/mlrcli_parse.go          |   4 +
 internal/pkg/dsl/cst/builtin_functions.go     |  14 +-
 internal/pkg/dsl/cst/collections.go           |  60 ++-
 internal/pkg/dsl/cst/env.go                   |   2 +-
 internal/pkg/dsl/cst/hofs.go                  | 108 +++--
 internal/pkg/dsl/cst/udf.go                   |   8 +-
 internal/pkg/lib/logger.go                    |  12 +-
 internal/pkg/mlrval/mlrmap_json.go            |   4 +-
 internal/pkg/mlrval/mlrval_cmp_test.go        |   2 +-
 internal/pkg/mlrval/mlrval_collections.go     |  10 +-
 internal/pkg/mlrval/mlrval_constants.go       |   6 -
 internal/pkg/mlrval/mlrval_get.go             |  42 ++
 internal/pkg/mlrval/mlrval_is.go              |   8 +
 internal/pkg/mlrval/mlrval_is_test.go         |  18 +-
 internal/pkg/mlrval/mlrval_new.go             | 112 ++++-
 internal/pkg/mlrval/mlrval_output.go          |  10 +
 internal/pkg/mlrval/mlrval_sort_test.go       |  20 +-
 internal/pkg/mlrval/mlrval_type.go            |   1 +
 internal/pkg/output/channel_writer.go         |  45 +-
 internal/pkg/output/file_output_handlers.go   |  35 +-
 internal/pkg/stream/stream.go                 |  19 +-
 internal/pkg/terminals/help/entry.go          |   2 +-
 internal/pkg/terminals/repl/dsl.go            |   7 +
 internal/pkg/transformers/fraction.go         |   7 +-
 internal/pkg/transformers/latin1_to_utf8.go   |   2 +-
 internal/pkg/transformers/utf8_to_latin1.go   |   2 +-
 .../transformers/utils/percentile_keeper.go   |   4 +-
 man/manpage.txt                               |   3 +
 man/mlr.1                                     |   3 +
 .../dsl-gmt-date-time-functions/0002/expout   |   6 +-
 .../dsl-gmt-date-time-functions/0003/expout   |   6 +-
 .../dsl-gmt-date-time-functions/0004/expout   |   6 +-
 test/cases/io-spec-tsv/0004/cmd               |   2 +-
 test/cases/io-spec-tsv/0004/experr            |   2 +-
 test/cases/io-spec-tsv/0004/expout            |   8 +
 ...lank.json => single-column-with-blank.tsv} |   0
 xtodo.txt                                     |  70 +++
 63 files changed, 1648 insertions(+), 852 deletions(-)
 create mode 100644 docs/src/data-error.csv
 rename test/cases/io-spec-tsv/0004/{single-column-with-blank.json => single-column-with-blank.tsv} (100%)
 create mode 100644 xtodo.txt

diff --git a/docs/src/data-error.csv b/docs/src/data-error.csv
new file mode 100644
index 000000000..cc9b52390
--- /dev/null
+++ b/docs/src/data-error.csv
@@ -0,0 +1,6 @@
+x
+1
+2
+3
+text
+4
diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index a19f46f14..c4a7b3856 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -603,6 +603,9 @@ MILLER(1)                                                            MILLER(1)
        -s {file name}           Take command-line flags from file name. For more
                                 information please see
                                 https://miller.readthedocs.io/en/latest/scripting/.
+       -x                       If any record has an error value in it, report it and
+                                stop the process. The default is to print the field
+                                value as `(error)` and continue.
 
 1mOUTPUT-COLORIZATION FLAGS0m
        Miller uses colors to highlight outputs. You can specify color preferences.
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt
index a91bfe4a2..de6ca2f57 100644
--- a/docs/src/manpage.txt
+++ b/docs/src/manpage.txt
@@ -582,6 +582,9 @@ MILLER(1)                                                            MILLER(1)
        -s {file name}           Take command-line flags from file name. For more
                                 information please see
                                 https://miller.readthedocs.io/en/latest/scripting/.
+       -x                       If any record has an error value in it, report it and
+                                stop the process. The default is to print the field
+                                value as `(error)` and continue.
 
 1mOUTPUT-COLORIZATION FLAGS0m
        Miller uses colors to highlight outputs. You can specify color preferences.
diff --git a/docs/src/record-heterogeneity.md b/docs/src/record-heterogeneity.md
index ba80fc3c0..5794fc49a 100644
--- a/docs/src/record-heterogeneity.md
+++ b/docs/src/record-heterogeneity.md
@@ -127,6 +127,8 @@ If you `mlr --csv cat` this, you'll get an error message:
 mlr --csv cat data/het/ragged.csv
 
+a,b,c
+1,2,3
 mlr: mlr: CSV header/data length mismatch 3 != 2 at filename data/het/ragged.csv row 3.
 .
 
diff --git a/docs/src/reference-dsl-errors.md b/docs/src/reference-dsl-errors.md index 872e7fcd9..fa9a74636 100644 --- a/docs/src/reference-dsl-errors.md +++ b/docs/src/reference-dsl-errors.md @@ -16,6 +16,55 @@ Quick links: # DSL errors and transparency +# Handling for data errors + +By default, Miller doesn't stop data processing for a single cell error. For example: + +
+mlr --csv --from data-error.csv cat
+
+
+x
+1
+2
+3
+text
+4
+
+ +
+mlr --csv --from data-error.csv put '$y = log10($x)'
+
+
+x,y
+1,0
+2,0.3010299956639812
+3,0.4771212547196624
+text,(error)
+4,0.6020599913279624
+
+ +If you do want to stop processing, though, you have three options. The first is the `mlr -x` flag: + +
+mlr -x --csv --from data-error.csv put '$y = log10($x)'
+
+
+x,y
+1,0
+2,0.3010299956639812
+3,0.4771212547196624
+mlr: data error at NR=4 FNR=4 FILENAME=data-error.csv
+mlr: field y: log10: unacceptable type string with value "text"
+mlr: exiting due to data error.
+
+ +The second is to put `-x` into your [`~/.mlrrc` file](customization.md). + +The third is to set the `MLR_FAIL_ON_DATA_ERROR` environment variable, which makes `-x` implicit. + +# Common causes of syntax errors + As soon as you have a [programming language](miller-programming-language.md), you start having the problem *What is my code doing, and why?* This includes getting syntax errors -- which are always annoying -- as well as the even more annoying problem of a program which parses without syntax error but doesn't do what you expect. The syntax-error message gives you line/column position for the syntax that couldn't be parsed. The cause may be clear from that information, or perhaps not. Here are some common causes of syntax errors: @@ -26,7 +75,7 @@ The syntax-error message gives you line/column position for the syntax that coul * Curly braces are required for the bodies of `if`/`while`/`for` blocks, even when the body is a single statement. -As for transparency: +# Transparency * As in any language, you can do `print`, or `eprint` to print to stderr. See [Print statements](reference-dsl-output-statements.md#print-statements); see also [Dump statements](reference-dsl-output-statements.md#dump-statements) and [Emit statements](reference-dsl-output-statements.md#emit-statements). diff --git a/docs/src/reference-dsl-errors.md.in b/docs/src/reference-dsl-errors.md.in index d50b4ed1a..5731d956e 100644 --- a/docs/src/reference-dsl-errors.md.in +++ b/docs/src/reference-dsl-errors.md.in @@ -1,5 +1,29 @@ # DSL errors and transparency +# Handling for data errors + +By default, Miller doesn't stop data processing for a single cell error. For example: + +GENMD-RUN-COMMAND +mlr --csv --from data-error.csv cat +GENMD-EOF + +GENMD-RUN-COMMAND +mlr --csv --from data-error.csv put '$y = log10($x)' +GENMD-EOF + +If you do want to stop processing, though, you have three options. The first is the `mlr -x` flag: + +GENMD-RUN-COMMAND-TOLERATING-ERROR +mlr -x --csv --from data-error.csv put '$y = log10($x)' +GENMD-EOF + +The second is to put `-x` into your [`~/.mlrrc` file](customization.md). + +The third is to set the `MLR_FAIL_ON_DATA_ERROR` environment variable, which makes `-x` implicit. + +# Common causes of syntax errors + As soon as you have a [programming language](miller-programming-language.md), you start having the problem *What is my code doing, and why?* This includes getting syntax errors -- which are always annoying -- as well as the even more annoying problem of a program which parses without syntax error but doesn't do what you expect. The syntax-error message gives you line/column position for the syntax that couldn't be parsed. The cause may be clear from that information, or perhaps not. Here are some common causes of syntax errors: @@ -10,7 +34,7 @@ The syntax-error message gives you line/column position for the syntax that coul * Curly braces are required for the bodies of `if`/`while`/`for` blocks, even when the body is a single statement. -As for transparency: +# Transparency * As in any language, you can do `print`, or `eprint` to print to stderr. See [Print statements](reference-dsl-output-statements.md#print-statements); see also [Dump statements](reference-dsl-output-statements.md#dump-statements) and [Emit statements](reference-dsl-output-statements.md#emit-statements). diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index 8e2daf9d0..0a93e12e2 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -289,6 +289,7 @@ These are flags which don't fit into any other category. * `-I`: Process files in-place. For each file name on the command line, output is written to a temp file in the same directory, which is then renamed over the original. Each file is processed in isolation: if the output format is CSV, CSV headers will be present in each output file, statistics are only over each file's own records; and so on. * `-n`: Process no input files, nor standard input either. Useful for `mlr put` with `begin`/`end` statements only. (Same as `--from /dev/null`.) Also useful in `mlr -n put -v '...'` for analyzing abstract syntax trees (if that's your thing). * `-s {file name}`: Take command-line flags from file name. For more information please see https://miller.readthedocs.io/en/latest/scripting/. +* `-x`: If any record has an error value in it, report it and stop the process. The default is to print the field value as `(error)` and continue. ## Output-colorization flags diff --git a/internal/pkg/bifs/arithmetic.go b/internal/pkg/bifs/arithmetic.go index 871874e7e..7728270ec 100644 --- a/internal/pkg/bifs/arithmetic.go +++ b/internal/pkg/bifs/arithmetic.go @@ -1,6 +1,7 @@ package bifs import ( + "fmt" "math" "github.com/johnkerl/miller/internal/pkg/lib" @@ -10,16 +11,20 @@ import ( // ================================================================ // Unary plus operator +func upos_te(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorUnary("+", input1) +} + var upos_dispositions = [mlrval.MT_DIM]UnaryFunc{ /*INT */ _1u___, /*FLOAT */ _1u___, - /*BOOL */ _erro1, + /*BOOL */ upos_te, /*VOID */ _zero1, - /*STRING */ _erro1, + /*STRING */ upos_te, /*ARRAY */ _absn1, /*MAP */ _absn1, - /*FUNC */ _erro1, - /*ERROR */ _erro1, + /*FUNC */ upos_te, + /*ERROR */ upos_te, /*NULL */ _null1, /*ABSENT */ _absn1, } @@ -31,6 +36,10 @@ func BIF_plus_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { // ================================================================ // Unary minus operator +func uneg_te(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorUnary("-", input1) +} + func uneg_i_i(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromInt(-input1.AcquireIntValue()) } @@ -42,13 +51,13 @@ func uneg_f_f(input1 *mlrval.Mlrval) *mlrval.Mlrval { var uneg_dispositions = [mlrval.MT_DIM]UnaryFunc{ /*INT */ uneg_i_i, /*FLOAT */ uneg_f_f, - /*BOOL */ _erro1, + /*BOOL */ uneg_te, /*VOID */ _zero1, - /*STRING */ _erro1, + /*STRING */ uneg_te, /*ARRAY */ _absn1, /*MAP */ _absn1, - /*FUNC */ _erro1, - /*ERROR */ _erro1, + /*FUNC */ uneg_te, + /*ERROR */ uneg_te, /*NULL */ _null1, /*ABSENT */ _absn1, } @@ -96,19 +105,23 @@ func plus_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromFloat(input1.AcquireFloatValue() + input2.AcquireFloatValue()) } +func plste(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("+", input1, input2) +} + var plus_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ - // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {plus_n_ii, plus_f_if, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*FLOAT */ {plus_f_fi, plus_f_ff, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_2___, _2___, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_2___, _2___, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _null, _absn}, - /*ABSENT */ {_2___, _2___, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT + /*INT */ {plus_n_ii, plus_f_if, plste, _1___, plste, _absn, _absn, plste, plste, _1___, _1___}, + /*FLOAT */ {plus_f_fi, plus_f_ff, plste, _1___, plste, _absn, _absn, plste, plste, _1___, _1___}, + /*BOOL */ {plste, plste, plste, plste, plste, _absn, _absn, plste, plste, plste, plste}, + /*VOID */ {_2___, _2___, plste, _void, plste, _absn, _absn, plste, plste, plste, _absn}, + /*STRING */ {plste, plste, plste, plste, plste, _absn, _absn, plste, plste, plste, plste}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, plste, _absn, _absn, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, plste, _absn, _absn, _absn}, + /*FUNC */ {plste, plste, plste, plste, plste, plste, plste, plste, plste, plste, plste}, + /*ERROR */ {plste, plste, plste, plste, plste, _absn, _absn, plste, plste, plste, plste}, + /*NULL */ {_2___, _2___, plste, plste, plste, _absn, _absn, plste, plste, _null, _absn}, + /*ABSENT */ {_2___, _2___, plste, _absn, plste, _absn, _absn, plste, plste, _absn, _absn}, } func BIF_plus_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -154,19 +167,23 @@ func minus_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromFloat(input1.AcquireFloatValue() - input2.AcquireFloatValue()) } +func mnste(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("-", input1, input2) +} + var minus_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {minus_n_ii, minus_f_if, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*FLOAT */ {minus_f_fi, minus_f_ff, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_n2__, _n2__, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_2___, _2___, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _null, _absn}, - /*ABSENT */ {_2___, _2___, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {minus_n_ii, minus_f_if, mnste, _1___, mnste, _absn, _absn, mnste, mnste, _1___, _1___}, + /*FLOAT */ {minus_f_fi, minus_f_ff, mnste, _1___, mnste, _absn, _absn, mnste, mnste, _1___, _1___}, + /*BOOL */ {mnste, mnste, mnste, mnste, mnste, _absn, _absn, mnste, mnste, mnste, mnste}, + /*VOID */ {_n2__, _n2__, mnste, _void, mnste, _absn, _absn, mnste, mnste, mnste, _absn}, + /*STRING */ {mnste, mnste, mnste, mnste, mnste, _absn, _absn, mnste, mnste, mnste, mnste}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, mnste, _absn, _absn, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, mnste, _absn, _absn, _absn}, + /*FUNC */ {mnste, mnste, mnste, mnste, mnste, mnste, mnste, mnste, mnste, mnste, mnste}, + /*ERROR */ {mnste, mnste, mnste, mnste, mnste, _absn, _absn, mnste, mnste, mnste, mnste}, + /*NULL */ {_2___, _2___, mnste, mnste, mnste, _absn, _absn, mnste, mnste, _null, _absn}, + /*ABSENT */ {_2___, _2___, mnste, _absn, mnste, _absn, _absn, mnste, mnste, _absn, _absn}, } func BIF_minus_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -228,19 +245,23 @@ func times_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromFloat(input1.AcquireFloatValue() * input2.AcquireFloatValue()) } +func tmste(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("*", input1, input2) +} + var times_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {times_n_ii, times_f_if, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*FLOAT */ {times_f_fi, times_f_ff, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_2___, _2___, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_2___, _2___, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _null, _absn}, - /*ABSENT */ {_2___, _2___, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {times_n_ii, times_f_if, tmste, _1___, tmste, _absn, _absn, tmste, tmste, _1___, _1___}, + /*FLOAT */ {times_f_fi, times_f_ff, tmste, _1___, tmste, _absn, _absn, tmste, tmste, _1___, _1___}, + /*BOOL */ {tmste, tmste, tmste, tmste, tmste, _absn, _absn, tmste, tmste, tmste, tmste}, + /*VOID */ {_2___, _2___, tmste, _void, tmste, _absn, _absn, tmste, tmste, tmste, _absn}, + /*STRING */ {tmste, tmste, tmste, tmste, tmste, _absn, _absn, tmste, tmste, tmste, tmste}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, tmste, _absn, _absn, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, tmste, _absn, _absn, _absn}, + /*FUNC */ {tmste, tmste, tmste, tmste, tmste, tmste, tmste, tmste, tmste, tmste, tmste}, + /*ERROR */ {tmste, tmste, tmste, tmste, tmste, _absn, _absn, tmste, tmste, tmste, tmste}, + /*NULL */ {_2___, _2___, tmste, tmste, tmste, _absn, _absn, tmste, tmste, _null, _absn}, + /*ABSENT */ {_2___, _2___, tmste, _absn, tmste, _absn, _absn, tmste, tmste, _absn, _absn}, } func BIF_times(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -291,19 +312,23 @@ func divide_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromFloat(input1.AcquireFloatValue() / input2.AcquireFloatValue()) } +func dvdte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("/", input1, input2) +} + var divide_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {divide_n_ii, divide_f_if, _erro, _void, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*FLOAT */ {divide_f_fi, divide_f_ff, _erro, _void, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_i0__, _f0__, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*ABSENT */ {_i0__, _f0__, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {divide_n_ii, divide_f_if, dvdte, _void, dvdte, _absn, _absn, dvdte, dvdte, _1___, _1___}, + /*FLOAT */ {divide_f_fi, divide_f_ff, dvdte, _void, dvdte, _absn, _absn, dvdte, dvdte, _1___, _1___}, + /*BOOL */ {dvdte, dvdte, dvdte, dvdte, dvdte, _absn, _absn, dvdte, dvdte, dvdte, dvdte}, + /*VOID */ {_void, _void, dvdte, _void, dvdte, _absn, _absn, dvdte, dvdte, dvdte, _absn}, + /*STRING */ {dvdte, dvdte, dvdte, dvdte, dvdte, _absn, _absn, dvdte, dvdte, dvdte, dvdte}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, dvdte, _absn, _absn, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, dvdte, _absn, _absn, _absn}, + /*FUNC */ {dvdte, dvdte, dvdte, dvdte, dvdte, dvdte, dvdte, dvdte, dvdte, dvdte, dvdte}, + /*ERROR */ {dvdte, dvdte, dvdte, dvdte, dvdte, _absn, _absn, dvdte, dvdte, dvdte, dvdte}, + /*NULL */ {_i0__, _f0__, dvdte, dvdte, dvdte, _absn, _absn, dvdte, dvdte, dvdte, _absn}, + /*ABSENT */ {_i0__, _f0__, dvdte, _absn, dvdte, _absn, _absn, dvdte, dvdte, _absn, _absn}, } func BIF_divide(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -352,19 +377,23 @@ func int_divide_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromFloat(math.Floor(input1.AcquireFloatValue() / input2.AcquireFloatValue())) } +func idvte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("//", input1, input2) +} + var int_divide_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {int_divide_n_ii, int_divide_f_if, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*FLOAT */ {int_divide_f_fi, int_divide_f_ff, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*ABSENT */ {_i0__, _f0__, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {int_divide_n_ii, int_divide_f_if, idvte, _void, idvte, _absn, _absn, idvte, idvte, idvte, _1___}, + /*FLOAT */ {int_divide_f_fi, int_divide_f_ff, idvte, _void, idvte, _absn, _absn, idvte, idvte, idvte, _1___}, + /*BOOL */ {idvte, idvte, idvte, idvte, idvte, _absn, _absn, idvte, idvte, idvte, idvte}, + /*VOID */ {_void, _void, idvte, _void, idvte, _absn, _absn, idvte, idvte, idvte, _absn}, + /*STRING */ {idvte, idvte, idvte, idvte, idvte, _absn, _absn, idvte, idvte, idvte, idvte}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, idvte, _absn, _absn, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, idvte, _absn, _absn, _absn}, + /*FUNC */ {idvte, idvte, idvte, idvte, idvte, idvte, idvte, idvte, idvte, idvte, idvte}, + /*ERROR */ {idvte, idvte, idvte, idvte, idvte, _absn, _absn, idvte, idvte, idvte, idvte}, + /*NULL */ {idvte, idvte, idvte, idvte, idvte, _absn, _absn, idvte, idvte, idvte, _absn}, + /*ABSENT */ {_i0__, _f0__, idvte, _absn, idvte, _absn, _absn, idvte, idvte, _absn, _absn}, } func BIF_int_divide(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -388,19 +417,23 @@ func dotplus_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromFloat(input1.AcquireFloatValue() + input2.AcquireFloatValue()) } +func dplte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary(".+", input1, input2) +} + var dot_plus_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {dotplus_i_ii, dotplus_f_if, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*FLOAT */ {dotplus_f_fi, dotplus_f_ff, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_2___, _2___, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_2___, _2___, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _null, _absn}, - /*ABSENT */ {_2___, _2___, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {dotplus_i_ii, dotplus_f_if, dplte, _1___, dplte, _absn, _absn, dplte, dplte, _1___, _1___}, + /*FLOAT */ {dotplus_f_fi, dotplus_f_ff, dplte, _1___, dplte, _absn, _absn, dplte, dplte, _1___, _1___}, + /*BOOL */ {dplte, dplte, dplte, dplte, dplte, _absn, _absn, dplte, dplte, dplte, dplte}, + /*VOID */ {_2___, _2___, dplte, _void, dplte, _absn, _absn, dplte, dplte, dplte, _absn}, + /*STRING */ {dplte, dplte, dplte, dplte, dplte, _absn, _absn, dplte, dplte, dplte, dplte}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, dplte, _absn, _absn, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, dplte, _absn, _absn, _absn}, + /*FUNC */ {dplte, dplte, dplte, dplte, dplte, dplte, dplte, dplte, dplte, dplte, dplte}, + /*ERROR */ {dplte, dplte, dplte, dplte, dplte, _absn, _absn, dplte, dplte, dplte, dplte}, + /*NULL */ {_2___, _2___, dplte, dplte, dplte, _absn, _absn, dplte, dplte, _null, _absn}, + /*ABSENT */ {_2___, _2___, dplte, _absn, dplte, _absn, _absn, dplte, dplte, _absn, _absn}, } func BIF_dot_plus(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -424,19 +457,23 @@ func dotminus_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromFloat(input1.AcquireFloatValue() - input2.AcquireFloatValue()) } +func dmnte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary(".-", input1, input2) +} + var dotminus_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {dotminus_i_ii, dotminus_f_if, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*FLOAT */ {dotminus_f_fi, dotminus_f_ff, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_n2__, _n2__, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_n2__, _n2__, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _null, _absn}, - /*ABSENT */ {_n2__, _n2__, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {dotminus_i_ii, dotminus_f_if, dmnte, _1___, dmnte, _absn, _absn, dmnte, dmnte, _1___, _1___}, + /*FLOAT */ {dotminus_f_fi, dotminus_f_ff, dmnte, _1___, dmnte, _absn, _absn, dmnte, dmnte, _1___, _1___}, + /*BOOL */ {dmnte, dmnte, dmnte, dmnte, dmnte, _absn, _absn, dmnte, dmnte, dmnte, dmnte}, + /*VOID */ {_n2__, _n2__, dmnte, _void, dmnte, _absn, _absn, dmnte, dmnte, dmnte, _absn}, + /*STRING */ {dmnte, dmnte, dmnte, dmnte, dmnte, _absn, _absn, dmnte, dmnte, dmnte, dmnte}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, dmnte, _absn, _absn, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, dmnte, _absn, _absn, _absn}, + /*FUNC */ {dmnte, dmnte, dmnte, dmnte, dmnte, dmnte, dmnte, dmnte, dmnte, dmnte, dmnte}, + /*ERROR */ {dmnte, dmnte, dmnte, dmnte, dmnte, _absn, _absn, dmnte, dmnte, dmnte, dmnte}, + /*NULL */ {_n2__, _n2__, dmnte, dmnte, dmnte, _absn, _absn, dmnte, dmnte, _null, _absn}, + /*ABSENT */ {_n2__, _n2__, dmnte, _absn, dmnte, _absn, _absn, dmnte, dmnte, _absn, _absn}, } func BIF_dot_minus(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -460,19 +497,23 @@ func dottimes_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromFloat(input1.AcquireFloatValue() * input2.AcquireFloatValue()) } +func dttte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary(".*", input1, input2) +} + var dottimes_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {dottimes_i_ii, dottimes_f_if, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*FLOAT */ {dottimes_f_fi, dottimes_f_ff, _erro, _1___, _erro, _absn, _absn, _erro, _erro, _1___, _1___}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_n2__, _n2__, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_2___, _2___, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*ABSENT */ {_2___, _2___, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {dottimes_i_ii, dottimes_f_if, dttte, _1___, dttte, _absn, _absn, dttte, dttte, _1___, _1___}, + /*FLOAT */ {dottimes_f_fi, dottimes_f_ff, dttte, _1___, dttte, _absn, _absn, dttte, dttte, _1___, _1___}, + /*BOOL */ {dttte, dttte, dttte, dttte, dttte, _absn, _absn, dttte, dttte, dttte, dttte}, + /*VOID */ {_n2__, _n2__, dttte, _void, dttte, _absn, _absn, dttte, dttte, dttte, _absn}, + /*STRING */ {dttte, dttte, dttte, dttte, dttte, _absn, _absn, dttte, dttte, dttte, dttte}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, dttte, _absn, _absn, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, dttte, _absn, _absn, _absn}, + /*FUNC */ {dttte, dttte, dttte, dttte, dttte, dttte, dttte, dttte, dttte, dttte, dttte}, + /*ERROR */ {dttte, dttte, dttte, dttte, dttte, _absn, _absn, dttte, dttte, dttte, dttte}, + /*NULL */ {_2___, _2___, dttte, dttte, dttte, _absn, _absn, dttte, dttte, dttte, _absn}, + /*ABSENT */ {_2___, _2___, dttte, _absn, dttte, _absn, _absn, dttte, dttte, _absn, _absn}, } func BIF_dot_times(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -496,19 +537,23 @@ func dotdivide_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromFloat(input1.AcquireFloatValue() / input2.AcquireFloatValue()) } +func ddvte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("./", input1, input2) +} + var dotdivide_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {dotdivide_i_ii, dotdivide_f_if, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*FLOAT */ {dotdivide_f_fi, dotdivide_f_ff, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*ABSENT */ {_2___, _2___, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {dotdivide_i_ii, dotdivide_f_if, ddvte, _void, ddvte, _absn, _absn, ddvte, ddvte, ddvte, _1___}, + /*FLOAT */ {dotdivide_f_fi, dotdivide_f_ff, ddvte, _void, ddvte, _absn, _absn, ddvte, ddvte, ddvte, _1___}, + /*BOOL */ {ddvte, ddvte, ddvte, ddvte, ddvte, _absn, _absn, ddvte, ddvte, ddvte, ddvte}, + /*VOID */ {_void, _void, ddvte, _void, ddvte, _absn, _absn, ddvte, ddvte, ddvte, _absn}, + /*STRING */ {ddvte, ddvte, ddvte, ddvte, ddvte, _absn, _absn, ddvte, ddvte, ddvte, ddvte}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, ddvte, _absn, _absn, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, ddvte, _absn, _absn, _absn}, + /*FUNC */ {ddvte, ddvte, ddvte, ddvte, ddvte, ddvte, ddvte, ddvte, ddvte, ddvte, ddvte}, + /*ERROR */ {ddvte, ddvte, ddvte, ddvte, ddvte, _absn, _absn, ddvte, ddvte, ddvte, ddvte}, + /*NULL */ {ddvte, ddvte, ddvte, ddvte, ddvte, _absn, _absn, ddvte, ddvte, ddvte, _absn}, + /*ABSENT */ {_2___, _2___, ddvte, _absn, ddvte, _absn, _absn, ddvte, ddvte, _absn, _absn}, } func BIF_dot_divide(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -557,19 +602,23 @@ func dotidivide_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromFloat(math.Floor(input1.AcquireFloatValue() / input2.AcquireFloatValue())) } +func didte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary(".//", input1, input2) +} + var dotidivide_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {dotidivide_i_ii, dotidivide_f_if, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*FLOAT */ {dotidivide_f_fi, dotidivide_f_ff, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*ABSENT */ {_2___, _2___, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, + /*INT */ {dotidivide_i_ii, dotidivide_f_if, didte, _void, didte, _absn, _absn, didte, didte, didte, _1___}, + /*FLOAT */ {dotidivide_f_fi, dotidivide_f_ff, didte, _void, didte, _absn, _absn, didte, didte, didte, _1___}, + /*BOOL */ {didte, didte, didte, didte, didte, _absn, _absn, didte, didte, didte, didte}, + /*VOID */ {_void, _void, didte, _void, didte, _absn, _absn, didte, didte, didte, _absn}, + /*STRING */ {didte, didte, didte, didte, didte, _absn, _absn, didte, didte, didte, didte}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, didte, _absn, _absn, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, didte, _absn, _absn, _absn}, + /*FUNC */ {didte, didte, didte, didte, didte, didte, didte, didte, didte, didte, didte}, + /*ERROR */ {didte, didte, didte, didte, didte, _absn, _absn, didte, didte, didte, didte}, + /*NULL */ {didte, didte, didte, didte, didte, _absn, _absn, didte, didte, didte, _absn}, + /*ABSENT */ {_2___, _2___, didte, _absn, didte, _absn, _absn, didte, didte, didte, _absn}, } func BIF_dot_int_divide(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -621,19 +670,23 @@ func modulus_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromFloat(a - b*math.Floor(a/b)) } +func modte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("%", input1, input2) +} + var modulus_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {modulus_i_ii, modulus_f_if, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*FLOAT */ {modulus_f_fi, modulus_f_ff, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*ABSENT */ {_i0__, _f0__, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {modulus_i_ii, modulus_f_if, modte, _void, modte, _absn, _absn, modte, modte, modte, _1___}, + /*FLOAT */ {modulus_f_fi, modulus_f_ff, modte, _void, modte, _absn, _absn, modte, modte, modte, _1___}, + /*BOOL */ {modte, modte, modte, modte, modte, _absn, _absn, modte, modte, modte, modte}, + /*VOID */ {_void, _void, modte, _void, modte, _absn, _absn, modte, modte, modte, _absn}, + /*STRING */ {modte, modte, modte, modte, modte, _absn, _absn, modte, modte, modte, modte}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, modte, _absn, _absn, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, modte, _absn, _absn, _absn}, + /*FUNC */ {modte, modte, modte, modte, modte, modte, modte, modte, modte, modte, modte}, + /*ERROR */ {modte, modte, modte, modte, modte, _absn, _absn, modte, modte, modte, modte}, + /*NULL */ {modte, modte, modte, modte, modte, _absn, _absn, modte, modte, modte, _absn}, + /*ABSENT */ {_i0__, _f0__, modte, _absn, modte, _absn, _absn, modte, modte, _absn, _absn}, } func BIF_modulus(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -685,7 +738,7 @@ func imodexp(a, e, m int64) int64 { return c } -func imodop(input1, input2, input3 *mlrval.Mlrval, iop i_iii_func) *mlrval.Mlrval { +func imodop(input1, input2, input3 *mlrval.Mlrval, iop i_iii_func, funcname string) *mlrval.Mlrval { if !input1.IsLegit() { return input1 } @@ -695,37 +748,42 @@ func imodop(input1, input2, input3 *mlrval.Mlrval, iop i_iii_func) *mlrval.Mlrva if !input3.IsLegit() { return input3 } - if !input1.IsInt() { - return mlrval.ERROR - } - if !input2.IsInt() { - return mlrval.ERROR - } - if !input3.IsInt() { - return mlrval.ERROR + if !input1.IsInt() || !input2.IsInt() || !input3.IsInt() { + return mlrval.FromTypeErrorTernary(funcname, input1, input2, input3) } - return mlrval.FromInt(iop(input1.AcquireIntValue(), input2.AcquireIntValue(), input3.AcquireIntValue())) + return mlrval.FromInt( + iop( + input1.AcquireIntValue(), + input2.AcquireIntValue(), + input3.AcquireIntValue(), + ), + ) } func BIF_mod_add(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { - return imodop(input1, input2, input3, imodadd) + return imodop(input1, input2, input3, imodadd, "madd") } func BIF_mod_sub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { - return imodop(input1, input2, input3, imodsub) + return imodop(input1, input2, input3, imodsub, "msub") } func BIF_mod_mul(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { - return imodop(input1, input2, input3, imodmul) + return imodop(input1, input2, input3, imodmul, "mmul") } func BIF_mod_exp(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { // Pre-check for negative exponent - if input2.IsInt() && input2.AcquireIntValue() < 0 { - return mlrval.ERROR + i2, ok := input2.GetIntValue() + if ok && i2 < 0 { + return mlrval.FromError( + fmt.Errorf( + "mexp: negative exponent disallowed; got %d", i2, + ), + ) } - return imodop(input1, input2, input3, imodexp) + return imodop(input1, input2, input3, imodexp, "mexp") } // ================================================================ @@ -793,19 +851,23 @@ func min_s_ss(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { } } +func min_te(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("min", input1, input2) +} + var min_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {min_i_ii, min_f_if, _1___, _1___, _1___, _absn, _absn, _erro, _erro, _1___, _1___}, - /*FLOAT */ {min_f_fi, min_f_ff, _1___, _1___, _1___, _absn, _absn, _erro, _erro, _1___, _1___}, - /*BOOL */ {_2___, _2___, min_b_bb, _1___, _1___, _absn, _absn, _erro, _erro, _1___, _1___}, - /*VOID */ {_2___, _2___, _2___, _void, _void, _absn, _absn, _erro, _erro, _1___, _1___}, - /*STRING */ {_2___, _2___, _2___, _void, min_s_ss, _absn, _absn, _erro, _erro, _1___, _1___}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_2___, _2___, _2___, _2___, _2___, _absn, _absn, _erro, _erro, _null, _null}, - /*ABSENT */ {_2___, _2___, _2___, _2___, _2___, _absn, _absn, _erro, _erro, _null, _absn}, + /*INT */ {min_i_ii, min_f_if, _1___, _1___, _1___, _absn, _absn, min_te, min_te, _1___, _1___}, + /*FLOAT */ {min_f_fi, min_f_ff, _1___, _1___, _1___, _absn, _absn, min_te, min_te, _1___, _1___}, + /*BOOL */ {_2___, _2___, min_b_bb, _1___, _1___, _absn, _absn, min_te, min_te, _1___, _1___}, + /*VOID */ {_2___, _2___, _2___, _void, _void, _absn, _absn, min_te, min_te, _1___, _1___}, + /*STRING */ {_2___, _2___, _2___, _void, min_s_ss, _absn, _absn, min_te, min_te, _1___, _1___}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, min_te, _absn, _absn, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, min_te, _absn, _absn, _absn}, + /*FUNC */ {min_te, min_te, min_te, min_te, min_te, min_te, min_te, min_te, min_te, min_te, min_te}, + /*ERROR */ {min_te, min_te, min_te, min_te, min_te, _absn, _absn, min_te, min_te, min_te, min_te}, + /*NULL */ {_2___, _2___, _2___, _2___, _2___, _absn, _absn, min_te, min_te, _null, _null}, + /*ABSENT */ {_2___, _2___, _2___, _2___, _2___, _absn, _absn, min_te, min_te, _null, _absn}, } // BIF_min_binary is not a direct DSL function. It's a helper here, @@ -853,6 +915,10 @@ func bif_min_unary_map(input1 *mlrval.Mlrval) *mlrval.Mlrval { // if this is defined statically. So, we use a "package init" function. var min_unary_dispositions = [mlrval.MT_DIM]UnaryFunc{} +func min_unary_te(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorUnary("min", input1) +} + func init() { min_unary_dispositions = [mlrval.MT_DIM]UnaryFunc{ /*INT */ _1u___, @@ -862,8 +928,8 @@ func init() { /*STRING */ _1u___, /*ARRAY */ bif_min_unary_array, /*MAP */ bif_min_unary_map, - /*FUNC */ _erro1, - /*ERROR */ _erro1, + /*FUNC */ min_unary_te, + /*ERROR */ min_unary_te, /*NULL */ _null1, /*ABSENT */ _absn1, } @@ -955,19 +1021,23 @@ func max_s_ss(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { } } +func max_te(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("max", input1, input2) +} + var max_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {max_i_ii, max_f_if, _2___, _2___, _2___, _absn, _absn, _erro, _erro, _null, _1___}, - /*FLOAT */ {max_f_fi, max_f_ff, _2___, _2___, _2___, _absn, _absn, _erro, _erro, _null, _1___}, - /*BOOL */ {_1___, _1___, max_b_bb, _2___, _2___, _absn, _absn, _erro, _erro, _null, _1___}, - /*VOID */ {_1___, _1___, _1___, _void, _2___, _absn, _absn, _erro, _erro, _null, _1___}, - /*STRING */ {_1___, _1___, _1___, _1___, max_s_ss, _absn, _absn, _erro, _erro, _null, _1___}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _null, _erro}, - /*NULL */ {_null, _null, _null, _null, _null, _absn, _absn, _erro, _null, _null, _absn}, - /*ABSENT */ {_2___, _2___, _2___, _2___, _2___, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {max_i_ii, max_f_if, _2___, _2___, _2___, _absn, _absn, max_te, max_te, _null, _1___}, + /*FLOAT */ {max_f_fi, max_f_ff, _2___, _2___, _2___, _absn, _absn, max_te, max_te, _null, _1___}, + /*BOOL */ {_1___, _1___, max_b_bb, _2___, _2___, _absn, _absn, max_te, max_te, _null, _1___}, + /*VOID */ {_1___, _1___, _1___, _void, _2___, _absn, _absn, max_te, max_te, _null, _1___}, + /*STRING */ {_1___, _1___, _1___, _1___, max_s_ss, _absn, _absn, max_te, max_te, _null, _1___}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, max_te, _absn, _absn, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, max_te, _absn, _absn, _absn}, + /*FUNC */ {max_te, max_te, max_te, max_te, max_te, max_te, max_te, max_te, max_te, max_te, max_te}, + /*ERROR */ {max_te, max_te, max_te, max_te, max_te, _absn, _absn, max_te, max_te, _null, max_te}, + /*NULL */ {_null, _null, _null, _null, _null, _absn, _absn, max_te, _null, _null, _absn}, + /*ABSENT */ {_2___, _2___, _2___, _2___, _2___, _absn, _absn, max_te, max_te, _absn, _absn}, } // BIF_max_binary is not a direct DSL function. It's a helper here, @@ -1015,6 +1085,10 @@ func bif_max_unary_map(input1 *mlrval.Mlrval) *mlrval.Mlrval { // if this is defined statically. So, we use a "package init" function. var max_unary_dispositions = [mlrval.MT_DIM]UnaryFunc{} +func max_unary_te(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorUnary("max", input1) +} + func init() { max_unary_dispositions = [mlrval.MT_DIM]UnaryFunc{ /*INT */ _1u___, @@ -1024,8 +1098,8 @@ func init() { /*STRING */ _1u___, /*ARRAY */ bif_max_unary_array, /*MAP */ bif_max_unary_map, - /*FUNC */ _erro1, - /*ERROR */ _erro1, + /*FUNC */ max_unary_te, + /*ERROR */ max_unary_te, /*NULL */ _null1, /*ABSENT */ _absn1, } diff --git a/internal/pkg/bifs/base.go b/internal/pkg/bifs/base.go index 36aeb63d2..c0bf0e810 100644 --- a/internal/pkg/bifs/base.go +++ b/internal/pkg/bifs/base.go @@ -48,6 +48,8 @@ package bifs import ( + "fmt" + "github.com/johnkerl/miller/internal/pkg/lib" "github.com/johnkerl/miller/internal/pkg/mlrval" "github.com/johnkerl/miller/internal/pkg/types" @@ -70,7 +72,7 @@ type RegexCaptureBinaryFunc func(input *mlrval.Mlrval, sregex *mlrval.Mlrval) (* // Helps keystroke-saving for wrapping Go math-library functions // Examples: cos, sin, etc. type mathLibUnaryFunc func(float64) float64 -type mathLibUnaryFuncWrapper func(input1 *mlrval.Mlrval, f mathLibUnaryFunc) *mlrval.Mlrval +type mathLibUnaryFuncWrapper func(input1 *mlrval.Mlrval, f mathLibUnaryFunc, fname string) *mlrval.Mlrval // Function-pointer type for binary-operator disposition matrices. type BinaryFunc func(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval @@ -91,11 +93,6 @@ type ComparatorFunc func(*mlrval.Mlrval, *mlrval.Mlrval) int // reasonable rectangular even after gofmt has been run. // ---------------------------------------------------------------- -// Return error (unary) -func _erro1(input1 *mlrval.Mlrval) *mlrval.Mlrval { - return mlrval.ERROR -} - // Return absent (unary) func _absn1(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.ABSENT @@ -126,12 +123,6 @@ func _1u___(input1 *mlrval.Mlrval) *mlrval.Mlrval { return input1 } -// ---------------------------------------------------------------- -// Return error (binary) -func _erro(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { - return mlrval.ERROR -} - // Return absent (binary) func _absn(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.ABSENT @@ -254,3 +245,21 @@ func recurseBinaryFuncOnInput1(binaryFunc BinaryFunc, input1, input2 *mlrval.Mlr return binaryFunc(input1, input2) } } + +func type_error_named_argument( + funcname string, + expected_type_name string, + varname string, + varval *mlrval.Mlrval, +) *mlrval.Mlrval { + return mlrval.FromError( + fmt.Errorf( + "%s: %s should be a %s; got type %s with value %s", + funcname, + varname, + expected_type_name, + varval.GetTypeName(), + varval.StringMaybeQuoted(), + ), + ) +} diff --git a/internal/pkg/bifs/bits.go b/internal/pkg/bifs/bits.go index e309f7b4e..7fb786307 100644 --- a/internal/pkg/bifs/bits.go +++ b/internal/pkg/bifs/bits.go @@ -11,16 +11,20 @@ func bitwise_not_i_i(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromInt(^input1.AcquireIntValue()) } +func bitwise_not_te(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorUnary("~", input1) +} + var bitwise_not_dispositions = [mlrval.MT_DIM]UnaryFunc{ /*INT */ bitwise_not_i_i, - /*FLOAT */ _erro1, - /*BOOL */ _erro1, + /*FLOAT */ bitwise_not_te, + /*BOOL */ bitwise_not_te, /*VOID */ _void1, - /*STRING */ _erro1, + /*STRING */ bitwise_not_te, /*ARRAY */ _absn1, /*MAP */ _absn1, - /*FUNC */ _erro1, - /*ERROR */ _erro1, + /*FUNC */ bitwise_not_te, + /*ERROR */ bitwise_not_te, /*NULL */ _null1, /*ABSENT */ _absn1, } @@ -51,16 +55,20 @@ func bitcount_i_i(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromInt(int64(a)) } +func bitcount_te(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorUnary("bitcount", input1) +} + var bitcount_dispositions = [mlrval.MT_DIM]UnaryFunc{ /*INT */ bitcount_i_i, - /*FLOAT */ _erro1, - /*BOOL */ _erro1, + /*FLOAT */ bitcount_te, + /*BOOL */ bitcount_te, /*VOID */ _void1, - /*STRING */ _erro1, + /*STRING */ bitcount_te, /*ARRAY */ _absn1, /*MAP */ _absn1, - /*FUNC */ _erro1, - /*ERROR */ _erro1, + /*FUNC */ bitcount_te, + /*ERROR */ bitcount_te, /*NULL */ _zero1, /*ABSENT */ _absn1, } @@ -76,19 +84,23 @@ func bitwise_and_i_ii(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromInt(input1.AcquireIntValue() & input2.AcquireIntValue()) } +func bwandte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("&", input1, input2) +} + var bitwise_and_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {bitwise_and_i_ii, _erro, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*FLOAT */ {_erro, _erro, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _erro, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _erro, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _absn}, - /*ABSENT */ {_2___, _erro, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {bitwise_and_i_ii, bwandte, bwandte, _void, bwandte, _absn, _absn, bwandte, bwandte, bwandte, _1___}, + /*FLOAT */ {bwandte, bwandte, bwandte, _void, bwandte, _absn, _absn, bwandte, bwandte, bwandte, bwandte}, + /*BOOL */ {bwandte, bwandte, bwandte, bwandte, bwandte, _absn, _absn, bwandte, bwandte, bwandte, bwandte}, + /*VOID */ {_void, _void, bwandte, _void, bwandte, _absn, _absn, bwandte, bwandte, bwandte, _absn}, + /*STRING */ {bwandte, bwandte, bwandte, bwandte, bwandte, _absn, _absn, bwandte, bwandte, bwandte, bwandte}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, bwandte, _absn, bwandte, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, bwandte, _absn, bwandte, _absn}, + /*FUNC */ {bwandte, bwandte, bwandte, bwandte, bwandte, bwandte, bwandte, bwandte, bwandte, bwandte, bwandte}, + /*ERROR */ {bwandte, bwandte, bwandte, bwandte, bwandte, _absn, _absn, bwandte, bwandte, bwandte, bwandte}, + /*NULL */ {bwandte, bwandte, bwandte, bwandte, bwandte, bwandte, bwandte, bwandte, bwandte, bwandte, _absn}, + /*ABSENT */ {_2___, bwandte, bwandte, _absn, bwandte, _absn, _absn, bwandte, bwandte, _absn, _absn}, } func BIF_bitwise_and(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -102,19 +114,23 @@ func bitwise_or_i_ii(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromInt(input1.AcquireIntValue() | input2.AcquireIntValue()) } +func bworte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("|", input1, input2) +} + var bitwise_or_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {bitwise_or_i_ii, _erro, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*FLOAT */ {_erro, _erro, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _erro, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _erro, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _absn}, - /*ABSENT */ {_2___, _erro, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {bitwise_or_i_ii, bworte, bworte, _void, bworte, _absn, _absn, bworte, bworte, bworte, _1___}, + /*FLOAT */ {bworte, bworte, bworte, _void, bworte, _absn, _absn, bworte, bworte, bworte, bworte}, + /*BOOL */ {bworte, bworte, bworte, bworte, bworte, _absn, _absn, bworte, bworte, bworte, bworte}, + /*VOID */ {_void, _void, bworte, _void, bworte, _absn, _absn, bworte, bworte, bworte, _absn}, + /*STRING */ {bworte, bworte, bworte, bworte, bworte, _absn, _absn, bworte, bworte, bworte, bworte}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, bworte, _absn, bworte, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, bworte, _absn, bworte, _absn}, + /*FUNC */ {bworte, bworte, bworte, bworte, bworte, bworte, bworte, bworte, bworte, bworte, bworte}, + /*ERROR */ {bworte, bworte, bworte, bworte, bworte, _absn, _absn, bworte, bworte, bworte, bworte}, + /*NULL */ {bworte, bworte, bworte, bworte, bworte, bworte, bworte, bworte, bworte, bworte, _absn}, + /*ABSENT */ {_2___, bworte, bworte, _absn, bworte, _absn, _absn, bworte, bworte, _absn, _absn}, } func BIF_bitwise_or(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -128,19 +144,23 @@ func bitwise_xor_i_ii(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromInt(input1.AcquireIntValue() ^ input2.AcquireIntValue()) } +func bwxorte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("^", input1, input2) +} + var bitwise_xor_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {bitwise_xor_i_ii, _erro, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*FLOAT */ {_erro, _erro, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _absn}, - /*ABSENT */ {_2___, _erro, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {bitwise_xor_i_ii, bwxorte, bwxorte, _void, bwxorte, _absn, _absn, bwxorte, bwxorte, bwxorte, _1___}, + /*FLOAT */ {bwxorte, bwxorte, bwxorte, _void, bwxorte, _absn, _absn, bwxorte, bwxorte, bwxorte, bwxorte}, + /*BOOL */ {bwxorte, bwxorte, bwxorte, bwxorte, bwxorte, _absn, _absn, bwxorte, bwxorte, bwxorte, bwxorte}, + /*VOID */ {_void, _void, bwxorte, _void, bwxorte, _absn, _absn, bwxorte, bwxorte, bwxorte, _absn}, + /*STRING */ {bwxorte, bwxorte, bwxorte, bwxorte, bwxorte, _absn, _absn, bwxorte, bwxorte, bwxorte, bwxorte}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, bwxorte, _absn, _absn, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, bwxorte, _absn, _absn, _absn}, + /*FUNC */ {bwxorte, bwxorte, bwxorte, bwxorte, bwxorte, bwxorte, bwxorte, bwxorte, bwxorte, bwxorte, bwxorte}, + /*ERROR */ {bwxorte, bwxorte, bwxorte, bwxorte, bwxorte, _absn, _absn, bwxorte, bwxorte, bwxorte, bwxorte}, + /*NULL */ {bwxorte, bwxorte, bwxorte, bwxorte, bwxorte, bwxorte, bwxorte, bwxorte, bwxorte, bwxorte, _absn}, + /*ABSENT */ {_2___, bwxorte, bwxorte, _absn, bwxorte, _absn, _absn, bwxorte, bwxorte, _absn, _absn}, } func BIF_bitwise_xor(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -154,19 +174,23 @@ func lsh_i_ii(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromInt(input1.AcquireIntValue() << uint64(input2.AcquireIntValue())) } +func lshfte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("<<", input1, input2) +} + var left_shift_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {lsh_i_ii, _erro, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*FLOAT */ {_erro, _erro, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _erro, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _erro, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _absn}, - /*ABSENT */ {_2___, _erro, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {lsh_i_ii, lshfte, lshfte, _void, lshfte, _absn, _absn, lshfte, lshfte, lshfte, _1___}, + /*FLOAT */ {lshfte, lshfte, lshfte, _void, lshfte, _absn, _absn, lshfte, lshfte, lshfte, lshfte}, + /*BOOL */ {lshfte, lshfte, lshfte, lshfte, lshfte, _absn, _absn, lshfte, lshfte, lshfte, lshfte}, + /*VOID */ {_void, _void, lshfte, _void, lshfte, _absn, _absn, lshfte, lshfte, lshfte, _absn}, + /*STRING */ {lshfte, lshfte, lshfte, lshfte, lshfte, _absn, _absn, lshfte, lshfte, lshfte, lshfte}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, lshfte, _absn, lshfte, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, lshfte, _absn, lshfte, _absn}, + /*FUNC */ {lshfte, lshfte, lshfte, lshfte, lshfte, lshfte, lshfte, lshfte, lshfte, lshfte, lshfte}, + /*ERROR */ {lshfte, lshfte, lshfte, lshfte, lshfte, _absn, _absn, lshfte, lshfte, lshfte, lshfte}, + /*NULL */ {lshfte, lshfte, lshfte, lshfte, lshfte, lshfte, lshfte, lshfte, lshfte, lshfte, _absn}, + /*ABSENT */ {_2___, lshfte, lshfte, _absn, lshfte, _absn, _absn, lshfte, lshfte, _absn, _absn}, } func BIF_left_shift(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -180,19 +204,23 @@ func srsh_i_ii(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromInt(input1.AcquireIntValue() >> uint64(input2.AcquireIntValue())) } +func srste(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary(">>>", input1, input2) +} + var signed_right_shift_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {srsh_i_ii, _erro, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*FLOAT */ {_erro, _erro, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _erro, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _erro, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _absn}, - /*ABSENT */ {_2___, _erro, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {srsh_i_ii, srste, srste, _void, srste, _absn, _absn, srste, srste, srste, _1___}, + /*FLOAT */ {srste, srste, srste, _void, srste, _absn, _absn, srste, srste, srste, srste}, + /*BOOL */ {srste, srste, srste, srste, srste, _absn, _absn, srste, srste, srste, srste}, + /*VOID */ {_void, _void, srste, _void, srste, _absn, _absn, srste, srste, srste, _absn}, + /*STRING */ {srste, srste, srste, srste, srste, _absn, _absn, srste, srste, srste, srste}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, srste, _absn, srste, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, srste, _absn, srste, _absn}, + /*FUNC */ {srste, srste, srste, srste, srste, srste, srste, srste, srste, srste, srste}, + /*ERROR */ {srste, srste, srste, srste, srste, _absn, _absn, srste, srste, srste, srste}, + /*NULL */ {srste, srste, srste, srste, srste, srste, srste, srste, srste, srste, _absn}, + /*ABSENT */ {_2___, srste, srste, _absn, srste, _absn, _absn, srste, srste, _absn, _absn}, } func BIF_signed_right_shift(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -209,19 +237,23 @@ func ursh_i_ii(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromInt(int64(uc)) } +func rste(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary(">>", input1, input2) +} + var unsigned_right_shift_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {ursh_i_ii, _erro, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*FLOAT */ {_erro, _erro, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _erro, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _erro, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _absn}, - /*ABSENT */ {_2___, _erro, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {ursh_i_ii, rste, rste, _void, rste, _absn, _absn, rste, rste, rste, _1___}, + /*FLOAT */ {rste, rste, rste, _void, rste, _absn, _absn, rste, rste, rste, rste}, + /*BOOL */ {rste, rste, rste, rste, rste, _absn, _absn, rste, rste, rste, rste}, + /*VOID */ {_void, _void, rste, _void, rste, _absn, _absn, rste, rste, rste, _absn}, + /*STRING */ {rste, rste, rste, rste, rste, _absn, _absn, rste, rste, rste, rste}, + /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, rste, _absn, rste, _absn}, + /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, rste, _absn, rste, _absn}, + /*FUNC */ {rste, rste, rste, rste, rste, rste, rste, rste, rste, rste, rste}, + /*ERROR */ {rste, rste, rste, rste, rste, _absn, _absn, rste, rste, rste, rste}, + /*NULL */ {rste, rste, rste, rste, rste, rste, rste, rste, rste, rste, _absn}, + /*ABSENT */ {_2___, rste, rste, _absn, rste, _absn, _absn, rste, rste, _absn, _absn}, } func BIF_unsigned_right_shift(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { diff --git a/internal/pkg/bifs/booleans.go b/internal/pkg/bifs/booleans.go index cef3387d5..da77c6199 100644 --- a/internal/pkg/bifs/booleans.go +++ b/internal/pkg/bifs/booleans.go @@ -12,7 +12,7 @@ func BIF_logical_NOT(input1 *mlrval.Mlrval) *mlrval.Mlrval { if input1.IsBool() { return mlrval.FromBool(!input1.AcquireBoolValue()) } else { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("!", input1) } } @@ -20,7 +20,7 @@ func BIF_logical_AND(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if input1.IsBool() && input2.IsBool() { return mlrval.FromBool(input1.AcquireBoolValue() && input2.AcquireBoolValue()) } else { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("&&", input1) } } @@ -28,7 +28,7 @@ func BIF_logical_OR(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if input1.IsBool() && input2.IsBool() { return mlrval.FromBool(input1.AcquireBoolValue() || input2.AcquireBoolValue()) } else { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("||", input1) } } @@ -36,6 +36,6 @@ func BIF_logical_XOR(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if input1.IsBool() && input2.IsBool() { return mlrval.FromBool(input1.AcquireBoolValue() != input2.AcquireBoolValue()) } else { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("^^", input1) } } diff --git a/internal/pkg/bifs/cmp.go b/internal/pkg/bifs/cmp.go index ede417055..7d31cb7ee 100644 --- a/internal/pkg/bifs/cmp.go +++ b/internal/pkg/bifs/cmp.go @@ -270,111 +270,139 @@ func ne_b_mm(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { // if this is defined statically. So, we use a "package init" function. var eq_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{} +func eqte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("==", input1, input2) +} + func init() { eq_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {eq_b_ii, eq_b_if, _fals, eq_b_xs, eq_b_xs, _fals, _fals, _erro, _erro, _fals, _absn}, - /*FLOAT */ {eq_b_fi, eq_b_ff, _fals, eq_b_xs, eq_b_xs, _fals, _fals, _erro, _erro, _fals, _absn}, - /*BOOL */ {_fals, _fals, eq_b_bb, _fals, _fals, _fals, _fals, _erro, _erro, _fals, _absn}, - /*VOID */ {eq_b_sx, eq_b_sx, _fals, eq_b_ss, eq_b_ss, _fals, _fals, _erro, _erro, _fals, _absn}, - /*STRING */ {eq_b_sx, eq_b_sx, _fals, eq_b_ss, eq_b_ss, _fals, _fals, _erro, _erro, _fals, _absn}, - /*ARRAY */ {_fals, _fals, _fals, _fals, _fals, eq_b_aa, _fals, _erro, _erro, _fals, _absn}, - /*MAP */ {_fals, _fals, _fals, _fals, _fals, _fals, eq_b_mm, _erro, _erro, _fals, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*NULL */ {_fals, _fals, _fals, _fals, _fals, _fals, _fals, _erro, _erro, _true, _absn}, - /*ABSENT */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {eq_b_ii, eq_b_if, _fals, eq_b_xs, eq_b_xs, _fals, _fals, eqte, eqte, _fals, _absn}, + /*FLOAT */ {eq_b_fi, eq_b_ff, _fals, eq_b_xs, eq_b_xs, _fals, _fals, eqte, eqte, _fals, _absn}, + /*BOOL */ {_fals, _fals, eq_b_bb, _fals, _fals, _fals, _fals, eqte, eqte, _fals, _absn}, + /*VOID */ {eq_b_sx, eq_b_sx, _fals, eq_b_ss, eq_b_ss, _fals, _fals, eqte, eqte, _fals, _absn}, + /*STRING */ {eq_b_sx, eq_b_sx, _fals, eq_b_ss, eq_b_ss, _fals, _fals, eqte, eqte, _fals, _absn}, + /*ARRAY */ {_fals, _fals, _fals, _fals, _fals, eq_b_aa, _fals, eqte, eqte, _fals, _absn}, + /*MAP */ {_fals, _fals, _fals, _fals, _fals, _fals, eq_b_mm, eqte, eqte, _fals, _absn}, + /*FUNC */ {eqte, eqte, eqte, eqte, eqte, eqte, eqte, eqte, eqte, eqte, eqte}, + /*ERROR */ {eqte, eqte, eqte, eqte, eqte, eqte, eqte, eqte, eqte, eqte, eqte}, + /*NULL */ {_fals, _fals, _fals, _fals, _fals, _fals, _fals, eqte, eqte, _true, _absn}, + /*ABSENT */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, eqte, eqte, _absn, _absn}, } } +func nete(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("!=", input1, input2) +} + var ne_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {ne_b_ii, ne_b_if, _true, ne_b_xs, ne_b_xs, _true, _true, _erro, _erro, _true, _absn}, - /*FLOAT */ {ne_b_fi, ne_b_ff, _true, ne_b_xs, ne_b_xs, _true, _true, _erro, _erro, _true, _absn}, - /*BOOL */ {_true, _true, ne_b_bb, _true, _true, _true, _true, _erro, _erro, _true, _absn}, - /*VOID */ {ne_b_sx, ne_b_sx, _true, ne_b_ss, ne_b_ss, _true, _true, _erro, _erro, _true, _absn}, - /*STRING */ {ne_b_sx, ne_b_sx, _true, ne_b_ss, ne_b_ss, _true, _true, _erro, _erro, _true, _absn}, - /*ARRAY */ {_true, _true, _true, _true, _true, ne_b_aa, _true, _erro, _erro, _true, _absn}, - /*MAP */ {_true, _true, _true, _true, _true, _true, ne_b_mm, _erro, _erro, _true, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*NULL */ {_true, _true, _true, _true, _true, _true, _true, _erro, _erro, _fals, _absn}, - /*ABSENT */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _erro, _absn, _absn}, + /*INT */ {ne_b_ii, ne_b_if, _true, ne_b_xs, ne_b_xs, _true, _true, nete, nete, _true, _absn}, + /*FLOAT */ {ne_b_fi, ne_b_ff, _true, ne_b_xs, ne_b_xs, _true, _true, nete, nete, _true, _absn}, + /*BOOL */ {_true, _true, ne_b_bb, _true, _true, _true, _true, nete, nete, _true, _absn}, + /*VOID */ {ne_b_sx, ne_b_sx, _true, ne_b_ss, ne_b_ss, _true, _true, nete, nete, _true, _absn}, + /*STRING */ {ne_b_sx, ne_b_sx, _true, ne_b_ss, ne_b_ss, _true, _true, nete, nete, _true, _absn}, + /*ARRAY */ {_true, _true, _true, _true, _true, ne_b_aa, _true, nete, nete, _true, _absn}, + /*MAP */ {_true, _true, _true, _true, _true, _true, ne_b_mm, nete, nete, _true, _absn}, + /*FUNC */ {nete, nete, nete, nete, nete, nete, nete, nete, nete, nete, nete}, + /*ERROR */ {nete, nete, nete, nete, nete, nete, nete, nete, nete, nete, nete}, + /*NULL */ {_true, _true, _true, _true, _true, _true, _true, nete, nete, _fals, _absn}, + /*ABSENT */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, nete, nete, _absn, _absn}, +} + +func gtte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary(">", input1, input2) } var gt_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {gt_b_ii, gt_b_if, _fals, gt_b_xs, gt_b_xs, _fals, _fals, _erro, _erro, _fals, _absn}, - /*FLOAT */ {gt_b_fi, gt_b_ff, _fals, gt_b_xs, gt_b_xs, _fals, _fals, _erro, _erro, _fals, _absn}, - /*BOOL */ {_fals, _fals, gt_b_bb, _fals, _fals, _fals, _fals, _erro, _erro, _fals, _absn}, - /*VOID */ {gt_b_sx, gt_b_sx, _fals, gt_b_ss, gt_b_ss, _fals, _fals, _erro, _erro, _fals, _absn}, - /*STRING */ {gt_b_sx, gt_b_sx, _fals, gt_b_ss, gt_b_ss, _fals, _fals, _erro, _erro, _fals, _absn}, - /*ARRAY */ {_fals, _fals, _fals, _fals, _fals, _erro, _fals, _erro, _erro, _fals, _absn}, - /*MAP */ {_fals, _fals, _fals, _fals, _fals, _fals, _erro, _erro, _erro, _fals, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _fals, _erro}, - /*NULL */ {_true, _true, _true, _true, _true, _absn, _absn, _erro, _true, _fals, _fals}, - /*ABSENT */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _erro, _true, _absn}, + /*INT */ {gt_b_ii, gt_b_if, _fals, gt_b_xs, gt_b_xs, _fals, _fals, gtte, gtte, _fals, _absn}, + /*FLOAT */ {gt_b_fi, gt_b_ff, _fals, gt_b_xs, gt_b_xs, _fals, _fals, gtte, gtte, _fals, _absn}, + /*BOOL */ {_fals, _fals, gt_b_bb, _fals, _fals, _fals, _fals, gtte, gtte, _fals, _absn}, + /*VOID */ {gt_b_sx, gt_b_sx, _fals, gt_b_ss, gt_b_ss, _fals, _fals, gtte, gtte, _fals, _absn}, + /*STRING */ {gt_b_sx, gt_b_sx, _fals, gt_b_ss, gt_b_ss, _fals, _fals, gtte, gtte, _fals, _absn}, + /*ARRAY */ {_fals, _fals, _fals, _fals, _fals, gtte, _fals, gtte, gtte, _fals, _absn}, + /*MAP */ {_fals, _fals, _fals, _fals, _fals, _fals, gtte, gtte, gtte, _fals, _absn}, + /*FUNC */ {gtte, gtte, gtte, gtte, gtte, gtte, gtte, gtte, gtte, gtte, gtte}, + /*ERROR */ {gtte, gtte, gtte, gtte, gtte, gtte, gtte, gtte, gtte, _fals, gtte}, + /*NULL */ {_true, _true, _true, _true, _true, _absn, _absn, gtte, _true, _fals, _fals}, + /*ABSENT */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, gtte, gtte, _true, _absn}, +} + +func gete(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary(">=", input1, input2) } var ge_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {ge_b_ii, ge_b_if, _fals, ge_b_xs, ge_b_xs, _fals, _fals, _erro, _erro, _fals, _absn}, - /*FLOAT */ {ge_b_fi, ge_b_ff, _fals, ge_b_xs, ge_b_xs, _fals, _fals, _erro, _erro, _fals, _absn}, - /*BOOL */ {_fals, _fals, ge_b_bb, _fals, _fals, _fals, _fals, _erro, _erro, _fals, _absn}, - /*VOID */ {ge_b_sx, ge_b_sx, _fals, ge_b_ss, ge_b_ss, _fals, _fals, _erro, _erro, _fals, _absn}, - /*STRING */ {ge_b_sx, ge_b_sx, _fals, ge_b_ss, ge_b_ss, _fals, _fals, _erro, _erro, _fals, _absn}, - /*ARRAY */ {_fals, _fals, _fals, _fals, _fals, _erro, _fals, _erro, _erro, _fals, _absn}, - /*MAP */ {_fals, _fals, _fals, _fals, _fals, _fals, _erro, _erro, _erro, _fals, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _fals, _erro}, - /*NULL */ {_true, _true, _true, _true, _true, _absn, _absn, _erro, _true, _true, _fals}, - /*ABSENT */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _erro, _true, _absn}, + /*INT */ {ge_b_ii, ge_b_if, _fals, ge_b_xs, ge_b_xs, _fals, _fals, gete, gete, _fals, _absn}, + /*FLOAT */ {ge_b_fi, ge_b_ff, _fals, ge_b_xs, ge_b_xs, _fals, _fals, gete, gete, _fals, _absn}, + /*BOOL */ {_fals, _fals, ge_b_bb, _fals, _fals, _fals, _fals, gete, gete, _fals, _absn}, + /*VOID */ {ge_b_sx, ge_b_sx, _fals, ge_b_ss, ge_b_ss, _fals, _fals, gete, gete, _fals, _absn}, + /*STRING */ {ge_b_sx, ge_b_sx, _fals, ge_b_ss, ge_b_ss, _fals, _fals, gete, gete, _fals, _absn}, + /*ARRAY */ {_fals, _fals, _fals, _fals, _fals, gete, _fals, gete, gete, _fals, _absn}, + /*MAP */ {_fals, _fals, _fals, _fals, _fals, _fals, gete, gete, gete, _fals, _absn}, + /*FUNC */ {gete, gete, gete, gete, gete, gete, gete, gete, gete, gete, gete}, + /*ERROR */ {gete, gete, gete, gete, gete, gete, gete, gete, gete, _fals, gete}, + /*NULL */ {_true, _true, _true, _true, _true, _absn, _absn, gete, _true, _true, _fals}, + /*ABSENT */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, gete, gete, _true, _absn}, +} + +func ltte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("<", input1, input2) } var lt_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {lt_b_ii, lt_b_if, _fals, lt_b_xs, lt_b_xs, _fals, _fals, _erro, _erro, _true, _absn}, - /*FLOAT */ {lt_b_fi, lt_b_ff, _fals, lt_b_xs, lt_b_xs, _fals, _fals, _erro, _erro, _true, _absn}, - /*BOOL */ {_fals, _fals, lt_b_bb, _fals, _fals, _fals, _fals, _erro, _erro, _true, _absn}, - /*VOID */ {lt_b_sx, lt_b_sx, _fals, lt_b_ss, lt_b_ss, _fals, _fals, _erro, _erro, _true, _absn}, - /*STRING */ {lt_b_sx, lt_b_sx, _fals, lt_b_ss, lt_b_ss, _fals, _fals, _erro, _erro, _true, _absn}, - /*ARRAY */ {_fals, _fals, _fals, _fals, _fals, _erro, _fals, _erro, _erro, _absn, _absn}, - /*MAP */ {_fals, _fals, _fals, _fals, _fals, _fals, _erro, _erro, _erro, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _true, _erro}, - /*NULL */ {_fals, _fals, _fals, _fals, _fals, _absn, _absn, _erro, _fals, _fals, _true}, - /*ABSENT */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _erro, _fals, _absn}, + /*INT */ {lt_b_ii, lt_b_if, _fals, lt_b_xs, lt_b_xs, _fals, _fals, ltte, ltte, _true, _absn}, + /*FLOAT */ {lt_b_fi, lt_b_ff, _fals, lt_b_xs, lt_b_xs, _fals, _fals, ltte, ltte, _true, _absn}, + /*BOOL */ {_fals, _fals, lt_b_bb, _fals, _fals, _fals, _fals, ltte, ltte, _true, _absn}, + /*VOID */ {lt_b_sx, lt_b_sx, _fals, lt_b_ss, lt_b_ss, _fals, _fals, ltte, ltte, _true, _absn}, + /*STRING */ {lt_b_sx, lt_b_sx, _fals, lt_b_ss, lt_b_ss, _fals, _fals, ltte, ltte, _true, _absn}, + /*ARRAY */ {_fals, _fals, _fals, _fals, _fals, ltte, _fals, ltte, ltte, _absn, _absn}, + /*MAP */ {_fals, _fals, _fals, _fals, _fals, _fals, ltte, ltte, ltte, _absn, _absn}, + /*FUNC */ {ltte, ltte, ltte, ltte, ltte, ltte, ltte, ltte, ltte, ltte, ltte}, + /*ERROR */ {ltte, ltte, ltte, ltte, ltte, ltte, ltte, ltte, ltte, _true, ltte}, + /*NULL */ {_fals, _fals, _fals, _fals, _fals, _absn, _absn, ltte, _fals, _fals, _true}, + /*ABSENT */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, ltte, ltte, _fals, _absn}, +} + +func lete(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("<=", input1, input2) } var le_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {le_b_ii, le_b_if, _fals, le_b_xs, le_b_xs, _fals, _fals, _erro, _erro, _true, _absn}, - /*FLOAT */ {le_b_fi, le_b_ff, _fals, le_b_xs, le_b_xs, _fals, _fals, _erro, _erro, _true, _absn}, - /*BOOL */ {_fals, _fals, le_b_bb, _fals, _fals, _fals, _fals, _erro, _erro, _true, _absn}, - /*VOID */ {le_b_sx, le_b_sx, _fals, le_b_ss, le_b_ss, _fals, _fals, _erro, _erro, _true, _absn}, - /*STRING */ {le_b_sx, le_b_sx, _fals, le_b_ss, le_b_ss, _fals, _fals, _erro, _erro, _true, _absn}, - /*ARRAY */ {_fals, _fals, _fals, _fals, _fals, _erro, _fals, _erro, _erro, _absn, _absn}, - /*MAP */ {_fals, _fals, _fals, _fals, _fals, _fals, _erro, _erro, _erro, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _true, _erro}, - /*NULL */ {_fals, _fals, _fals, _fals, _fals, _absn, _absn, _erro, _fals, _true, _true}, - /*ABSENT */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _erro, _fals, _absn}, + /*INT */ {le_b_ii, le_b_if, _fals, le_b_xs, le_b_xs, _fals, _fals, lete, lete, _true, _absn}, + /*FLOAT */ {le_b_fi, le_b_ff, _fals, le_b_xs, le_b_xs, _fals, _fals, lete, lete, _true, _absn}, + /*BOOL */ {_fals, _fals, le_b_bb, _fals, _fals, _fals, _fals, lete, lete, _true, _absn}, + /*VOID */ {le_b_sx, le_b_sx, _fals, le_b_ss, le_b_ss, _fals, _fals, lete, lete, _true, _absn}, + /*STRING */ {le_b_sx, le_b_sx, _fals, le_b_ss, le_b_ss, _fals, _fals, lete, lete, _true, _absn}, + /*ARRAY */ {_fals, _fals, _fals, _fals, _fals, lete, _fals, lete, lete, _absn, _absn}, + /*MAP */ {_fals, _fals, _fals, _fals, _fals, _fals, lete, lete, lete, _absn, _absn}, + /*FUNC */ {lete, lete, lete, lete, lete, lete, lete, lete, lete, lete, lete}, + /*ERROR */ {lete, lete, lete, lete, lete, lete, lete, lete, lete, _true, lete}, + /*NULL */ {_fals, _fals, _fals, _fals, _fals, _absn, _absn, lete, _fals, _true, _true}, + /*ABSENT */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, lete, lete, _fals, _absn}, +} + +func cmpte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("<=>", input1, input2) } var cmp_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {cmp_b_ii, cmp_b_if, _less, cmp_b_xs, cmp_b_xs, _less, _less, _erro, _erro, _true, _absn}, - /*FLOAT */ {cmp_b_fi, cmp_b_ff, _less, cmp_b_xs, cmp_b_xs, _less, _less, _erro, _erro, _true, _absn}, - /*BOOL */ {_more, _more, cmp_b_bb, _less, _less, _less, _less, _erro, _erro, _true, _absn}, - /*VOID */ {cmp_b_sx, cmp_b_sx, _more, cmp_b_ss, cmp_b_ss, _less, _less, _erro, _erro, _true, _absn}, - /*STRING */ {cmp_b_sx, cmp_b_sx, _more, cmp_b_ss, cmp_b_ss, _less, _less, _erro, _erro, _true, _absn}, - /*ARRAY */ {_more, _more, _more, _more, _more, _erro, _less, _erro, _erro, _absn, _absn}, - /*MAP */ {_more, _more, _more, _more, _more, _more, _erro, _erro, _erro, _absn, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _true, _erro}, - /*NULL */ {_more, _more, _more, _more, _more, _absn, _absn, _erro, _more, _same, _true}, - /*ABSENT */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _erro, _more, _absn}, + /*INT */ {cmp_b_ii, cmp_b_if, _less, cmp_b_xs, cmp_b_xs, _less, _less, cmpte, cmpte, _true, _absn}, + /*FLOAT */ {cmp_b_fi, cmp_b_ff, _less, cmp_b_xs, cmp_b_xs, _less, _less, cmpte, cmpte, _true, _absn}, + /*BOOL */ {_more, _more, cmp_b_bb, _less, _less, _less, _less, cmpte, cmpte, _true, _absn}, + /*VOID */ {cmp_b_sx, cmp_b_sx, _more, cmp_b_ss, cmp_b_ss, _less, _less, cmpte, cmpte, _true, _absn}, + /*STRING */ {cmp_b_sx, cmp_b_sx, _more, cmp_b_ss, cmp_b_ss, _less, _less, cmpte, cmpte, _true, _absn}, + /*ARRAY */ {_more, _more, _more, _more, _more, cmpte, _less, cmpte, cmpte, _absn, _absn}, + /*MAP */ {_more, _more, _more, _more, _more, _more, cmpte, cmpte, cmpte, _absn, _absn}, + /*FUNC */ {cmpte, cmpte, cmpte, cmpte, cmpte, cmpte, cmpte, cmpte, cmpte, cmpte, cmpte}, + /*ERROR */ {cmpte, cmpte, cmpte, cmpte, cmpte, cmpte, cmpte, cmpte, cmpte, _true, cmpte}, + /*NULL */ {_more, _more, _more, _more, _more, _absn, _absn, cmpte, _more, _same, _true}, + /*ABSENT */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, cmpte, cmpte, _more, _absn}, } func BIF_equals(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { diff --git a/internal/pkg/bifs/collections.go b/internal/pkg/bifs/collections.go index 6dd523b6a..5278b5b6a 100644 --- a/internal/pkg/bifs/collections.go +++ b/internal/pkg/bifs/collections.go @@ -65,6 +65,10 @@ func depth_from_scalar(input1 *mlrval.Mlrval) *mlrval.Mlrval { // if this is defined statically. So, we use a "package init" function. var depth_dispositions = [mlrval.MT_DIM]UnaryFunc{} +func depth_te(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorUnary("depth", input1) +} + func init() { depth_dispositions = [mlrval.MT_DIM]UnaryFunc{ /*INT */ depth_from_scalar, @@ -74,8 +78,8 @@ func init() { /*STRING */ depth_from_scalar, /*ARRAY */ depth_from_array, /*MAP */ depth_from_map, - /*FUNC */ _erro1, - /*ERROR */ _erro1, + /*FUNC */ depth_te, + /*ERROR */ depth_te, /*NULL */ _zero1, /*ABSENT */ _absn1, } @@ -134,6 +138,10 @@ func leafcount_from_scalar(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromInt(1) } +func leafcount_te(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorUnary("leafcount", input1) +} + var leafcount_dispositions = [mlrval.MT_DIM]UnaryFunc{ /*INT */ leafcount_from_scalar, /*FLOAT */ leafcount_from_scalar, @@ -142,8 +150,8 @@ var leafcount_dispositions = [mlrval.MT_DIM]UnaryFunc{ /*STRING */ leafcount_from_scalar, /*ARRAY */ leafcount_from_array, /*MAP */ leafcount_from_map, - /*FUNC */ _erro1, - /*ERROR */ _erro1, + /*FUNC */ leafcount_te, + /*ERROR */ leafcount_te, /*NULL */ _zero1, /*ABSENT */ _absn1, } @@ -158,7 +166,7 @@ func has_key_in_array(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FALSE } if !input2.IsInt() { - return mlrval.ERROR + return mlrval.FromNotIntError("haskey", input2) } arrayval := input1.AcquireArrayValue() _, ok := unaliasArrayIndex(&arrayval, int(input2.AcquireIntValue())) @@ -169,7 +177,7 @@ func has_key_in_map(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if input2.IsString() || input2.IsInt() { return mlrval.FromBool(input1.AcquireMapValue().Has(input2.String())) } else { - return mlrval.ERROR + return mlrval.FromNotNamedTypeError("haskey", input2, "string or int") } } @@ -179,7 +187,7 @@ func BIF_haskey(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { } else if input1.IsMap() { return has_key_in_map(input1, input2) } else { - return mlrval.ERROR + return mlrval.FromNotCollectionError("haskey", input1) } } @@ -204,10 +212,10 @@ func BIF_concat(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval { // ================================================================ func BIF_mapselect(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval { if len(mlrvals) < 1 { - return mlrval.ERROR + return mlrval.FromErrorString("mapselect: received a zero-length array as input") } if !mlrvals[0].IsMap() { - return mlrval.ERROR + return mlrval.FromNotMapError("mapselect", mlrvals[0]) } oldmap := mlrvals[0].AcquireMapValue() newMap := mlrval.NewMlrmap() @@ -223,11 +231,11 @@ func BIF_mapselect(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval { if element.IsString() { newKeys[element.AcquireStringValue()] = true } else { - return mlrval.ERROR + return mlrval.FromNotStringError("mapselect", element) } } } else { - return mlrval.ERROR + return mlrval.FromNotNamedTypeError("mapselect", selectArg, "string, int, or array") } } @@ -245,10 +253,10 @@ func BIF_mapselect(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval { // ---------------------------------------------------------------- func BIF_mapexcept(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval { if len(mlrvals) < 1 { - return mlrval.ERROR + return mlrval.FromErrorString("mapexcept: received a zero-length array as input") } if !mlrvals[0].IsMap() { - return mlrval.ERROR + return mlrval.FromNotMapError("mapexcept", mlrvals[0]) } newMap := mlrvals[0].AcquireMapValue().Copy() @@ -262,11 +270,11 @@ func BIF_mapexcept(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval { if element.IsString() { newMap.Remove(element.AcquireStringValue()) } else { - return mlrval.ERROR + return mlrval.FromNotStringError("mapselect", element) } } } else { - return mlrval.ERROR + return mlrval.FromNotNamedTypeError("mapexcept", exceptArg, "string, int, or array") } } @@ -282,13 +290,13 @@ func BIF_mapsum(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval { return mlrvals[0] } if mlrvals[0].Type() != mlrval.MT_MAP { - return mlrval.ERROR + return mlrval.FromNotMapError("mapsum", mlrvals[0]) } newMap := mlrvals[0].AcquireMapValue().Copy() for _, otherMapArg := range mlrvals[1:] { if otherMapArg.Type() != mlrval.MT_MAP { - return mlrval.ERROR + return mlrval.FromNotMapError("mapsum", otherMapArg) } for pe := otherMapArg.AcquireMapValue().Head; pe != nil; pe = pe.Next { @@ -308,13 +316,13 @@ func BIF_mapdiff(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval { return mlrvals[0] } if !mlrvals[0].IsMap() { - return mlrval.ERROR + return mlrval.FromNotMapError("mapdiff", mlrvals[0]) } newMap := mlrvals[0].AcquireMapValue().Copy() for _, otherMapArg := range mlrvals[1:] { if !otherMapArg.IsMap() { - return mlrval.ERROR + return mlrval.FromNotMapError("mapdiff", otherMapArg) } for pe := otherMapArg.AcquireMapValue().Head; pe != nil; pe = pe.Next { @@ -330,7 +338,7 @@ func BIF_mapdiff(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval { // joink({"a":3,"b":4,"c":5}, ",") -> "a,b,c" func BIF_joink(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("joink", input2) } fieldSeparator := input2.AcquireStringValue() if input1.IsMap() { @@ -357,7 +365,7 @@ func BIF_joink(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromString(buffer.String()) } else { - return mlrval.ERROR + return mlrval.FromNotCollectionError("joink", input1) } } @@ -366,7 +374,7 @@ func BIF_joink(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { // joinv({"a":3,"b":4,"c":5}, ",") -> "3,4,5" func BIF_joinv(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("joinv", input2) } fieldSeparator := input2.AcquireStringValue() @@ -393,7 +401,7 @@ func BIF_joinv(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromString(buffer.String()) } else { - return mlrval.ERROR + return mlrval.FromNotCollectionError("joinv", input1) } } @@ -402,11 +410,11 @@ func BIF_joinv(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { // joinkv({"a":3,"b":4,"c":5}, "=", ",") -> "a=3,b=4,c=5" func BIF_joinkv(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("joinkv", input2) } pairSeparator := input2.AcquireStringValue() if !input3.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("joinkv", input3) } fieldSeparator := input3.AcquireStringValue() @@ -438,7 +446,7 @@ func BIF_joinkv(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromString(buffer.String()) } else { - return mlrval.ERROR + return mlrval.FromNotCollectionError("joinkv", input1) } } @@ -446,14 +454,14 @@ func BIF_joinkv(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { // splitkv("a=3,b=4,c=5", "=", ",") -> {"a":3,"b":4,"c":5} func BIF_splitkv(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("splitkv", input1) } if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("splitkv", input2) } pairSeparator := input2.AcquireStringValue() if !input3.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("splitkv", input3) } fieldSeparator := input3.AcquireStringValue() @@ -481,14 +489,14 @@ func BIF_splitkv(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { // splitkvx("a=3,b=4,c=5", "=", ",") -> {"a":"3","b":"4","c":"5"} func BIF_splitkvx(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("splitkvx", input1) } if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("splitkvx", input2) } pairSeparator := input2.AcquireStringValue() if !input3.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("splitkvx", input3) } fieldSeparator := input3.AcquireStringValue() @@ -517,10 +525,10 @@ func BIF_splitkvx(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { // splitnv("a,b,c", ",") -> {"1":"a","2":"b","3":"c"} func BIF_splitnv(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("splitnv", input1) } if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("splitnv", input2) } output := mlrval.FromMap(mlrval.NewMlrmap()) @@ -539,10 +547,10 @@ func BIF_splitnv(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { // splitnvx("3,4,5", ",") -> {"1":"3","2":"4","3":"5"} func BIF_splitnvx(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("splitnvx", input1) } if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("splitnvx", input2) } output := mlrval.FromMap(mlrval.NewMlrmap()) @@ -561,10 +569,10 @@ func BIF_splitnvx(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { // splita("3,4,5", ",") -> [3,4,5] func BIF_splita(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("splita", input1) } if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("splita", input2) } fieldSeparator := input2.AcquireStringValue() @@ -585,10 +593,10 @@ func BIF_splita(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { // e.g. splitax("3,4,5", ",") -> ["3","4","5"] func BIF_splitax(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("splitax", input1) } if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("splitax", input2) } input := input1.AcquireStringValue() fieldSeparator := input2.AcquireStringValue() @@ -632,7 +640,7 @@ func BIF_get_keys(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromArray(arrayval) } else { - return mlrval.ERROR + return mlrval.FromNotCollectionError("get_keys", input1) } } @@ -657,14 +665,14 @@ func BIF_get_values(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromArray(arrayval) } else { - return mlrval.ERROR + return mlrval.FromNotCollectionError("get_values", input1) } } // ---------------------------------------------------------------- func BIF_append(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsArray() { - return mlrval.ERROR + return mlrval.FromNotArrayError("append", input1) } output := input1.Copy() @@ -681,11 +689,11 @@ func BIF_append(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { func BIF_flatten(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { if input3.IsMap() || input3.IsArray() { if !input1.IsString() && input1.Type() != mlrval.MT_VOID { - return mlrval.ERROR + return mlrval.FromNotStringError("flatten", input1) } prefix := input1.AcquireStringValue() if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("flatten", input2) } delimiter := input2.AcquireStringValue() @@ -707,7 +715,7 @@ func BIF_flatten_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { // unflatten({"a.b.c", ".") is {"a": { "b": { "c": 4}}}. func BIF_unflatten(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("unflatten", input2) } if input1.Type() != mlrval.MT_MAP { return input1 @@ -770,12 +778,12 @@ func BIF_json_parse(input1 *mlrval.Mlrval) *mlrval.Mlrval { if input1.IsVoid() { return input1 } else if !input1.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("json_parse", input1) } else { output := mlrval.FromPending() err := output.UnmarshalJSON([]byte(input1.AcquireStringValue())) if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } return output } @@ -784,7 +792,7 @@ func BIF_json_parse(input1 *mlrval.Mlrval) *mlrval.Mlrval { func BIF_json_stringify_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { outputBytes, err := input1.MarshalJSON(mlrval.JSON_SINGLE_LINE, false) if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } else { return mlrval.FromString(string(outputBytes)) } @@ -794,7 +802,7 @@ func BIF_json_stringify_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { var jsonFormatting mlrval.TJSONFormatting = mlrval.JSON_SINGLE_LINE useMultiline, ok := input2.GetBoolValue() if !ok { - return mlrval.ERROR + return mlrval.FromNotBooleanError("json_stringify", input2) } if useMultiline { jsonFormatting = mlrval.JSON_MULTILINE @@ -802,7 +810,7 @@ func BIF_json_stringify_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { outputBytes, err := input1.MarshalJSON(jsonFormatting, false) if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } else { return mlrval.FromString(string(outputBytes)) } @@ -902,7 +910,8 @@ func MillerSliceAccess( if lowerIndexMlrval.IsVoid() { lowerIndex = 1 } else { - return false, mlrval.ERROR, 0, 0 + e := mlrval.FromNotNamedTypeError("array/map/slice lower index", lowerIndexMlrval, "int or empty") + return false, e, 0, 0 } } upperIndex, ok := upperIndexMlrval.GetIntValue() @@ -910,7 +919,8 @@ func MillerSliceAccess( if upperIndexMlrval.IsVoid() { upperIndex = int64(n) } else { - return false, mlrval.ERROR, 0, 0 + e := mlrval.FromNotNamedTypeError("array/map/slice upper index", upperIndexMlrval, "int or empty") + return false, e, 0, 0 } } diff --git a/internal/pkg/bifs/datetime.go b/internal/pkg/bifs/datetime.go index 9ab5d3031..4126078b1 100644 --- a/internal/pkg/bifs/datetime.go +++ b/internal/pkg/bifs/datetime.go @@ -62,22 +62,22 @@ func BIF_sec2gmt_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { } func BIF_nsec2gmt_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { - intValue, ok := input1.GetIntValue() - if !ok { - return mlrval.ERROR + intValue, errValue := input1.GetIntValueOrError("nsec2gmt") + if errValue != nil { + return errValue } numDecimalPlaces := 0 return mlrval.FromString(lib.Nsec2GMT(intValue, numDecimalPlaces)) } func BIF_sec2gmt_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { - floatValue, isNumeric := input1.GetNumericToFloatValue() - if !isNumeric { - return input1 + floatValue, errValue := input1.GetNumericToFloatValueOrError("sec2gmt") + if errValue != nil { + return errValue } - numDecimalPlaces, isInt := input2.GetIntValue() - if !isInt { - return mlrval.ERROR + numDecimalPlaces, errValue := input2.GetIntValueOrError("sec2gmt") + if errValue != nil { + return errValue } return mlrval.FromString(lib.Sec2GMT(floatValue, int(numDecimalPlaces))) } @@ -87,9 +87,9 @@ func BIF_nsec2gmt_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !ok { return input1 } - numDecimalPlaces, isInt := input2.GetIntValue() - if !isInt { - return mlrval.ERROR + numDecimalPlaces, errValue := input2.GetIntValueOrError("nsec2gmt") + if errValue != nil { + return errValue } return mlrval.FromString(lib.Nsec2GMT(intValue, int(numDecimalPlaces))) } @@ -117,9 +117,9 @@ func BIF_sec2localtime_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !isNumeric { return input1 } - numDecimalPlaces, isInt := input2.GetIntValue() - if !isInt { - return mlrval.ERROR + numDecimalPlaces, errValue := input2.GetIntValueOrError("sec2localtime") + if errValue != nil { + return errValue } return mlrval.FromString(lib.Sec2LocalTime(floatValue, int(numDecimalPlaces))) } @@ -129,9 +129,9 @@ func BIF_nsec2localtime_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !ok { return input1 } - numDecimalPlaces, isInt := input2.GetIntValue() - if !isInt { - return mlrval.ERROR + numDecimalPlaces, errValue := input2.GetIntValueOrError("nsec2localtime") + if errValue != nil { + return errValue } return mlrval.FromString(lib.Nsec2LocalTime(intValue, int(numDecimalPlaces))) } @@ -141,17 +141,17 @@ func BIF_sec2localtime_ternary(input1, input2, input3 *mlrval.Mlrval) *mlrval.Ml if !isNumeric { return input1 } - numDecimalPlaces, isInt := input2.GetIntValue() - if !isInt { - return mlrval.ERROR + numDecimalPlaces, errValue := input2.GetIntValueOrError("sec2localtime") + if errValue != nil { + return errValue } - locationString, isString := input3.GetStringValue() - if !isString { - return mlrval.ERROR + locationString, errValue := input3.GetStringValueOrError("sec2localtime") + if errValue != nil { + return errValue } location, err := time.LoadLocation(locationString) if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } return mlrval.FromString(lib.Sec2LocationTime(floatValue, int(numDecimalPlaces), location)) } @@ -161,17 +161,17 @@ func BIF_nsec2localtime_ternary(input1, input2, input3 *mlrval.Mlrval) *mlrval.M if !isNumeric { return input1 } - numDecimalPlaces, isInt := input2.GetIntValue() - if !isInt { - return mlrval.ERROR + numDecimalPlaces, errValue := input2.GetIntValueOrError("nsec2localtime") + if errValue != nil { + return errValue } - locationString, isString := input3.GetStringValue() - if !isString { - return mlrval.ERROR + locationString, errValue := input3.GetStringValueOrError("nsec2localtime") + if errValue != nil { + return errValue } location, err := time.LoadLocation(locationString) if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } return mlrval.FromString(lib.Nsec2LocationTime(intValue, int(numDecimalPlaces), location)) } @@ -221,28 +221,28 @@ func BIF_nsec2localdate_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { // ---------------------------------------------------------------- func BIF_localtime2gmt_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("localtime2gmt", input1) } return BIF_nsec2gmt_unary(BIF_localtime2nsec_unary(input1)) } func BIF_localtime2gmt_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("localtime2gmt", input1) } return BIF_nsec2gmt_unary(BIF_localtime2nsec_binary(input1, input2)) } func BIF_gmt2localtime_unary(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("gmt2localtime2", input1) } return BIF_nsec2localtime_unary(BIF_gmt2nsec(input1)) } func BIF_gmt2localtime_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("gmt2localtime2", input1) } return BIF_nsec2localtime_ternary(BIF_gmt2nsec(input1), mlrval.FromInt(0), input2) } @@ -254,57 +254,62 @@ func BIF_gmt2localtime_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { var extensionRegex = regexp.MustCompile("([1-9])S") func BIF_strftime(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { - return strftimeHelper(input1, input2, false, nil) + return strftimeHelper(input1, input2, false, nil, "strftime") } func BIF_strfntime(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { - return strfntimeHelper(input1, input2, false, nil) + return strfntimeHelper(input1, input2, false, nil, "strfntime") } func BIF_strftime_local_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { - return strftimeHelper(input1, input2, true, nil) + return strftimeHelper(input1, input2, true, nil, "strftime_local") } func BIF_strfntime_local_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { - return strfntimeHelper(input1, input2, true, nil) + return strfntimeHelper(input1, input2, true, nil, "strfntime_local") } func BIF_strftime_local_ternary(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { - locationString, isString := input3.GetStringValue() - if !isString { - return mlrval.ERROR + locationString, errValue := input3.GetStringValueOrError("strftime") + if errValue != nil { + return errValue } location, err := time.LoadLocation(locationString) if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } - return strftimeHelper(input1, input2, true, location) + return strftimeHelper(input1, input2, true, location, "strftime_local") } func BIF_strfntime_local_ternary(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { - locationString, isString := input3.GetStringValue() - if !isString { - return mlrval.ERROR + locationString, errValue := input3.GetStringValueOrError("strfntime") + if errValue != nil { + return errValue } location, err := time.LoadLocation(locationString) if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } - return strfntimeHelper(input1, input2, true, location) + return strfntimeHelper(input1, input2, true, location, "strfntime_local") } -func strftimeHelper(input1, input2 *mlrval.Mlrval, doLocal bool, location *time.Location) *mlrval.Mlrval { +func strftimeHelper( + input1, input2 *mlrval.Mlrval, + doLocal bool, + location *time.Location, + funcname string, +) *mlrval.Mlrval { if input1.IsVoid() { return input1 } - epochSeconds, ok := input1.GetNumericToFloatValue() - if !ok { - return mlrval.ERROR + epochSeconds, errValue := input1.GetNumericToFloatValueOrError(funcname) + if errValue != nil { + return errValue } if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError(funcname, input2) } // Convert argument1 from float seconds since the epoch to a Go time. @@ -329,7 +334,7 @@ func strftimeHelper(input1, input2 *mlrval.Mlrval, doLocal bool, location *time. formatter, err := strftime.New(formatString, strftimeExtensions) if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } outputString := formatter.FormatString(inputTime) @@ -337,16 +342,21 @@ func strftimeHelper(input1, input2 *mlrval.Mlrval, doLocal bool, location *time. return mlrval.FromString(outputString) } -func strfntimeHelper(input1, input2 *mlrval.Mlrval, doLocal bool, location *time.Location) *mlrval.Mlrval { +func strfntimeHelper( + input1, input2 *mlrval.Mlrval, + doLocal bool, + location *time.Location, + funcname string, +) *mlrval.Mlrval { if input1.IsVoid() { return input1 } - epochNanoseconds, ok := input1.GetIntValue() - if !ok { - return mlrval.ERROR + epochNanoseconds, errValue := input1.GetIntValueOrError(funcname) + if errValue != nil { + return errValue } if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError(funcname, input2) } // Convert argument1 from float seconds since the epoch to a Go time. @@ -371,7 +381,7 @@ func strfntimeHelper(input1, input2 *mlrval.Mlrval, doLocal bool, location *time formatter, err := strftime.New(formatString, strftimeExtensions) if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } outputString := formatter.FormatString(inputTime) @@ -473,10 +483,10 @@ func BIF_strpntime(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { func bif_strptime_unary_aux(input1, input2 *mlrval.Mlrval, doLocal, produceNanoseconds bool) *mlrval.Mlrval { if !input1.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("strptime", input1) } if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("strptime", input2) } timeString := input1.AcquireStringValue() formatString := input2.AcquireStringValue() @@ -489,7 +499,7 @@ func bif_strptime_unary_aux(input1, input2 *mlrval.Mlrval, doLocal, produceNanos t, err = strptime.Parse(timeString, formatString) } if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } if produceNanoseconds { @@ -529,10 +539,10 @@ func BIF_strpntime_local_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { func bif_strptime_binary_aux(input1, input2 *mlrval.Mlrval, doLocal, produceNanoseconds bool) *mlrval.Mlrval { if !input1.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("strptime", input1) } if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("strptime", input2) } timeString := input1.AcquireStringValue() formatString := input2.AcquireStringValue() @@ -545,7 +555,7 @@ func bif_strptime_binary_aux(input1, input2 *mlrval.Mlrval, doLocal, produceNano t, err = strptime.Parse(timeString, formatString) } if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } if produceNanoseconds { @@ -575,13 +585,13 @@ func BIF_localtime2nsec_binary(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { func bif_strptime_local_ternary_aux(input1, input2, input3 *mlrval.Mlrval, produceNanoseconds bool) *mlrval.Mlrval { if !input1.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("strptime_local", input1) } if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("strptime_local", input2) } if !input3.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("strptime_local", input3) } timeString := input1.AcquireStringValue() @@ -590,12 +600,12 @@ func bif_strptime_local_ternary_aux(input1, input2, input3 *mlrval.Mlrval, produ location, err := time.LoadLocation(locationString) if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } t, err := strptime.ParseLocation(timeString, formatString, location) if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } if produceNanoseconds { diff --git a/internal/pkg/bifs/hashing.go b/internal/pkg/bifs/hashing.go index 6c6c0ce39..09552ab0f 100644 --- a/internal/pkg/bifs/hashing.go +++ b/internal/pkg/bifs/hashing.go @@ -12,7 +12,7 @@ import ( func BIF_md5(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("md5", input1) } else { return mlrval.FromString( fmt.Sprintf( @@ -25,7 +25,7 @@ func BIF_md5(input1 *mlrval.Mlrval) *mlrval.Mlrval { func BIF_sha1(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("sha1", input1) } else { return mlrval.FromString( fmt.Sprintf( @@ -38,7 +38,7 @@ func BIF_sha1(input1 *mlrval.Mlrval) *mlrval.Mlrval { func BIF_sha256(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("sha256", input1) } else { return mlrval.FromString( fmt.Sprintf( @@ -51,7 +51,7 @@ func BIF_sha256(input1 *mlrval.Mlrval) *mlrval.Mlrval { func BIF_sha512(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("sha512", input1) } else { return mlrval.FromString( fmt.Sprintf( diff --git a/internal/pkg/bifs/mathlib.go b/internal/pkg/bifs/mathlib.go index 21f123ace..b18e0d4ed 100644 --- a/internal/pkg/bifs/mathlib.go +++ b/internal/pkg/bifs/mathlib.go @@ -13,33 +13,33 @@ import ( // ---------------------------------------------------------------- // Return error (unary math-library func) -func _math_unary_erro1(input1 *mlrval.Mlrval, f mathLibUnaryFunc) *mlrval.Mlrval { - return mlrval.ERROR +func _math_unary_erro1(input1 *mlrval.Mlrval, f mathLibUnaryFunc, fname string) *mlrval.Mlrval { + return mlrval.FromTypeErrorUnary(fname, input1) } // Return absent (unary math-library func) -func _math_unary_absn1(input1 *mlrval.Mlrval, f mathLibUnaryFunc) *mlrval.Mlrval { +func _math_unary_absn1(input1 *mlrval.Mlrval, f mathLibUnaryFunc, fname string) *mlrval.Mlrval { return mlrval.ABSENT } // Return null (unary math-library func) -func _math_unary_null1(input1 *mlrval.Mlrval, f mathLibUnaryFunc) *mlrval.Mlrval { +func _math_unary_null1(input1 *mlrval.Mlrval, f mathLibUnaryFunc, fname string) *mlrval.Mlrval { return mlrval.NULL } // Return void (unary math-library func) -func _math_unary_void1(input1 *mlrval.Mlrval, f mathLibUnaryFunc) *mlrval.Mlrval { +func _math_unary_void1(input1 *mlrval.Mlrval, f mathLibUnaryFunc, fname string) *mlrval.Mlrval { return mlrval.VOID } // ---------------------------------------------------------------- -func math_unary_f_i(input1 *mlrval.Mlrval, f mathLibUnaryFunc) *mlrval.Mlrval { +func math_unary_f_i(input1 *mlrval.Mlrval, f mathLibUnaryFunc, fname string) *mlrval.Mlrval { return mlrval.FromFloat(f(float64(input1.AcquireIntValue()))) } -func math_unary_i_i(input1 *mlrval.Mlrval, f mathLibUnaryFunc) *mlrval.Mlrval { +func math_unary_i_i(input1 *mlrval.Mlrval, f mathLibUnaryFunc, fname string) *mlrval.Mlrval { return mlrval.FromInt(int64(f(float64(input1.AcquireIntValue())))) } -func math_unary_f_f(input1 *mlrval.Mlrval, f mathLibUnaryFunc) *mlrval.Mlrval { +func math_unary_f_f(input1 *mlrval.Mlrval, f mathLibUnaryFunc, fname string) *mlrval.Mlrval { return mlrval.FromFloat(f(input1.AcquireFloatValue())) } @@ -58,45 +58,75 @@ var mudispo = [mlrval.MT_DIM]mathLibUnaryFuncWrapper{ /*ABSENT */ _math_unary_absn1, } -func BIF_acos(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mudispo[input1.Type()](input1, math.Acos) } +func BIF_acos(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mudispo[input1.Type()](input1, math.Acos, "acos") +} func BIF_acosh(input1 *mlrval.Mlrval) *mlrval.Mlrval { - return mudispo[input1.Type()](input1, math.Acosh) + return mudispo[input1.Type()](input1, math.Acosh, "acosh") +} +func BIF_asin(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mudispo[input1.Type()](input1, math.Asin, "asin") } -func BIF_asin(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mudispo[input1.Type()](input1, math.Asin) } func BIF_asinh(input1 *mlrval.Mlrval) *mlrval.Mlrval { - return mudispo[input1.Type()](input1, math.Asinh) + return mudispo[input1.Type()](input1, math.Asinh, "asinh") +} +func BIF_atan(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mudispo[input1.Type()](input1, math.Atan, "atan") } -func BIF_atan(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mudispo[input1.Type()](input1, math.Atan) } func BIF_atanh(input1 *mlrval.Mlrval) *mlrval.Mlrval { - return mudispo[input1.Type()](input1, math.Atanh) + return mudispo[input1.Type()](input1, math.Atanh, "atanh") +} +func BIF_cbrt(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mudispo[input1.Type()](input1, math.Cbrt, "atan") +} +func BIF_cos(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mudispo[input1.Type()](input1, math.Cos, "cos") +} +func BIF_cosh(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mudispo[input1.Type()](input1, math.Cosh, "cosh") +} +func BIF_erf(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mudispo[input1.Type()](input1, math.Erf, "erf") +} +func BIF_erfc(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mudispo[input1.Type()](input1, math.Erfc, "erfc") +} +func BIF_exp(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mudispo[input1.Type()](input1, math.Exp, "exp") } -func BIF_cbrt(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mudispo[input1.Type()](input1, math.Cbrt) } -func BIF_cos(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mudispo[input1.Type()](input1, math.Cos) } -func BIF_cosh(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mudispo[input1.Type()](input1, math.Cosh) } -func BIF_erf(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mudispo[input1.Type()](input1, math.Erf) } -func BIF_erfc(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mudispo[input1.Type()](input1, math.Erfc) } -func BIF_exp(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mudispo[input1.Type()](input1, math.Exp) } func BIF_expm1(input1 *mlrval.Mlrval) *mlrval.Mlrval { - return mudispo[input1.Type()](input1, math.Expm1) + return mudispo[input1.Type()](input1, math.Expm1, "expm1") } func BIF_invqnorm(input1 *mlrval.Mlrval) *mlrval.Mlrval { - return mudispo[input1.Type()](input1, lib.Invqnorm) + return mudispo[input1.Type()](input1, lib.Invqnorm, "invqnorm") +} +func BIF_log(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mudispo[input1.Type()](input1, math.Log, "log") } -func BIF_log(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mudispo[input1.Type()](input1, math.Log) } func BIF_log10(input1 *mlrval.Mlrval) *mlrval.Mlrval { - return mudispo[input1.Type()](input1, math.Log10) + return mudispo[input1.Type()](input1, math.Log10, "log10") } func BIF_log1p(input1 *mlrval.Mlrval) *mlrval.Mlrval { - return mudispo[input1.Type()](input1, math.Log1p) + return mudispo[input1.Type()](input1, math.Log1p, "log1p") } func BIF_qnorm(input1 *mlrval.Mlrval) *mlrval.Mlrval { - return mudispo[input1.Type()](input1, lib.Qnorm) + return mudispo[input1.Type()](input1, lib.Qnorm, "qnorm") +} +func BIF_sin(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mudispo[input1.Type()](input1, math.Sin, "sin") +} +func BIF_sinh(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mudispo[input1.Type()](input1, math.Sinh, "sinh") +} +func BIF_sqrt(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mudispo[input1.Type()](input1, math.Sqrt, "sqrt") +} +func BIF_tan(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mudispo[input1.Type()](input1, math.Tan, "tan") +} +func BIF_tanh(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mudispo[input1.Type()](input1, math.Tanh, "tanh") } -func BIF_sin(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mudispo[input1.Type()](input1, math.Sin) } -func BIF_sinh(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mudispo[input1.Type()](input1, math.Sinh) } -func BIF_sqrt(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mudispo[input1.Type()](input1, math.Sqrt) } -func BIF_tan(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mudispo[input1.Type()](input1, math.Tan) } -func BIF_tanh(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mudispo[input1.Type()](input1, math.Tanh) } // Disposition vector for unary mathlib functions which are int-preserving var imudispo = [mlrval.MT_DIM]mathLibUnaryFuncWrapper{ @@ -114,17 +144,21 @@ var imudispo = [mlrval.MT_DIM]mathLibUnaryFuncWrapper{ } // Int-preserving -func BIF_abs(input1 *mlrval.Mlrval) *mlrval.Mlrval { return imudispo[input1.Type()](input1, math.Abs) } // xxx +func BIF_abs(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return imudispo[input1.Type()](input1, math.Abs, "abs") +} // xxx func BIF_ceil(input1 *mlrval.Mlrval) *mlrval.Mlrval { - return imudispo[input1.Type()](input1, math.Ceil) + return imudispo[input1.Type()](input1, math.Ceil, "ceil") } // xxx func BIF_floor(input1 *mlrval.Mlrval) *mlrval.Mlrval { - return imudispo[input1.Type()](input1, math.Floor) + return imudispo[input1.Type()](input1, math.Floor, "floor") } // xxx func BIF_round(input1 *mlrval.Mlrval) *mlrval.Mlrval { - return imudispo[input1.Type()](input1, math.Round) -} // xxx -func BIF_sgn(input1 *mlrval.Mlrval) *mlrval.Mlrval { return imudispo[input1.Type()](input1, lib.Sgn) } // xxx + return imudispo[input1.Type()](input1, math.Round, "round") +} // xxx +func BIF_sgn(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return imudispo[input1.Type()](input1, lib.Sgn, "sgn") +} // xxx // ================================================================ // Exponentiation: DSL operator '**'. See also @@ -150,19 +184,23 @@ func pow_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromFloat(math.Pow(input1.AcquireFloatValue(), input2.AcquireFloatValue())) } +func powte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("**", input1, input2) +} + var pow_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ - // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {pow_f_ii, pow_f_if, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*FLOAT */ {pow_f_fi, pow_f_ff, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _erro, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _erro, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _absn}, - /*ABSENT */ {_i0__, _f0__, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT + /*INT */ {pow_f_ii, pow_f_if, powte, _void, powte, powte, powte, powte, powte, powte, _1___}, + /*FLOAT */ {pow_f_fi, pow_f_ff, powte, _void, powte, powte, powte, powte, powte, powte, _1___}, + /*BOOL */ {powte, powte, powte, powte, powte, powte, powte, powte, powte, powte, _absn}, + /*VOID */ {_void, _void, powte, _void, powte, powte, powte, powte, powte, powte, _absn}, + /*STRING */ {powte, powte, powte, powte, powte, powte, powte, powte, powte, powte, _absn}, + /*ARRAY */ {powte, powte, powte, powte, powte, powte, powte, powte, powte, powte, _absn}, + /*MAP */ {powte, powte, powte, powte, powte, powte, powte, powte, powte, powte, _absn}, + /*FUNC */ {powte, powte, powte, powte, powte, powte, powte, powte, powte, powte, _absn}, + /*ERROR */ {powte, powte, powte, powte, powte, powte, powte, powte, powte, powte, _absn}, + /*NULL */ {powte, powte, powte, powte, powte, powte, powte, powte, powte, powte, _absn}, + /*ABSENT */ {_i0__, _f0__, _absn, _absn, _absn, _absn, _absn, _absn, _absn, _absn, _absn}, } func BIF_pow(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -183,19 +221,23 @@ func atan2_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromFloat(math.Atan2(input1.AcquireFloatValue(), input2.AcquireFloatValue())) } +func atan2te(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("atan2", input1, input2) +} + var atan2_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ - // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {atan2_f_ii, atan2_f_if, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*FLOAT */ {atan2_f_fi, atan2_f_ff, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _erro, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _erro, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _absn}, - /*ABSENT */ {_i0__, _f0__, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT + /*INT */ {atan2_f_ii, atan2_f_if, atan2te, _void, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, _1___}, + /*FLOAT */ {atan2_f_fi, atan2_f_ff, atan2te, _void, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, _1___}, + /*BOOL */ {atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, _absn}, + /*VOID */ {_void, _void, atan2te, _void, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, _absn}, + /*STRING */ {atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, _absn}, + /*ARRAY */ {atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, _absn}, + /*MAP */ {atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, _absn}, + /*FUNC */ {atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, _absn}, + /*ERROR */ {atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, _absn}, + /*NULL */ {atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, atan2te, _absn}, + /*ABSENT */ {_i0__, _f0__, atan2te, _absn, _absn, _absn, _absn, _absn, _absn, _absn, _absn}, } func BIF_atan2(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -220,19 +262,23 @@ func roundm_f_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromFloat(mlr_roundm(input1.AcquireFloatValue(), input2.AcquireFloatValue())) } +func rdmte(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("roundm", input1, input2) +} + var roundm_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ - // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {roundm_f_ii, roundm_f_if, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*FLOAT */ {roundm_f_fi, roundm_f_ff, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _1___}, - /*BOOL */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*VOID */ {_void, _void, _erro, _void, _erro, _absn, _absn, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*ARRAY */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _erro, _absn}, - /*MAP */ {_absn, _absn, _absn, _absn, _absn, _absn, _absn, _erro, _absn, _erro, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _absn}, - /*ABSENT */ {_i0__, _f0__, _erro, _absn, _erro, _absn, _absn, _erro, _erro, _absn, _absn}, + // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT + /*INT */ {roundm_f_ii, roundm_f_if, rdmte, _void, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, _1___}, + /*FLOAT */ {roundm_f_fi, roundm_f_ff, rdmte, _void, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, _1___}, + /*BOOL */ {rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, _absn}, + /*VOID */ {_void, _void, rdmte, _void, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, _absn}, + /*STRING */ {rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, _absn}, + /*ARRAY */ {rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, _absn}, + /*MAP */ {rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, _absn}, + /*FUNC */ {rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, _absn}, + /*ERROR */ {rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, _absn}, + /*NULL */ {rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, rdmte, _absn}, + /*ABSENT */ {_i0__, _f0__, _absn, _absn, _absn, _absn, _absn, _absn, _absn, _absn, _absn}, } func BIF_roundm(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -240,6 +286,10 @@ func BIF_roundm(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { } // ================================================================ +func logifit_te(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("logifit", input1, input2) +} + func BIF_logifit(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsLegit() { return input1 @@ -254,15 +304,15 @@ func BIF_logifit(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { // int/float OK; rest not x, xok := input1.GetNumericToFloatValue() if !xok { - return mlrval.ERROR + return logifit_te(input1, input2) } m, mok := input2.GetNumericToFloatValue() if !mok { - return mlrval.ERROR + return logifit_te(input1, input2) } b, bok := input3.GetNumericToFloatValue() if !bok { - return mlrval.ERROR + return logifit_te(input1, input2) } return mlrval.FromFloat(1.0 / (1.0 + math.Exp(-m*x-b))) diff --git a/internal/pkg/bifs/random.go b/internal/pkg/bifs/random.go index 3662b00b4..5562b4d30 100644 --- a/internal/pkg/bifs/random.go +++ b/internal/pkg/bifs/random.go @@ -30,10 +30,10 @@ func BIF_urandint(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return input2 } if !input1.IsInt() { - return mlrval.ERROR + return mlrval.FromNotIntError("urandint", input1) } if !input2.IsInt() { - return mlrval.ERROR + return mlrval.FromNotIntError("urandint", input2) } a := input1.AcquireIntValue() @@ -62,10 +62,10 @@ func BIF_urandrange(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { a, aok := input1.GetNumericToFloatValue() b, bok := input2.GetNumericToFloatValue() if !aok { - return mlrval.ERROR + return mlrval.FromNotNumericError("urandrange", input1) } if !bok { - return mlrval.ERROR + return mlrval.FromNotNumericError("urandrange", input2) } return mlrval.FromFloat( a + (b-a)*lib.RandFloat64(), @@ -75,10 +75,10 @@ func BIF_urandrange(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { func BIF_urandelement(input1 *mlrval.Mlrval) *mlrval.Mlrval { arrayval := input1.GetArray() if arrayval == nil { // not an array - return mlrval.ERROR + return mlrval.FromNotArrayError("urandelement", input1) } if len(arrayval) == 0 { - return mlrval.ERROR + return mlrval.FromErrorString("urandelement: received a zero-length array as input") } // lo is inclusive, hi is exclusive diff --git a/internal/pkg/bifs/regex.go b/internal/pkg/bifs/regex.go index 41e816f16..72a6878b4 100644 --- a/internal/pkg/bifs/regex.go +++ b/internal/pkg/bifs/regex.go @@ -10,17 +10,17 @@ import ( // BIF_ssub implements the ssub function -- no-frills string-replace, no // regexes, no escape sequences. func BIF_ssub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { - return bif_ssub_gssub(input1, input2, input3, false) + return bif_ssub_gssub(input1, input2, input3, false, "ssub") } // BIF_gssub implements the gssub function -- no-frills string-replace, no // regexes, no escape sequences. func BIF_gssub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { - return bif_ssub_gssub(input1, input2, input3, true) + return bif_ssub_gssub(input1, input2, input3, true, "gssub") } // bif_ssub_gssub is shared code for BIF_ssub and BIF_gssub. -func bif_ssub_gssub(input1, input2, input3 *mlrval.Mlrval, doAll bool) *mlrval.Mlrval { +func bif_ssub_gssub(input1, input2, input3 *mlrval.Mlrval, doAll bool, funcname string) *mlrval.Mlrval { if input1.IsErrorOrAbsent() { return input1 } @@ -31,13 +31,13 @@ func bif_ssub_gssub(input1, input2, input3 *mlrval.Mlrval, doAll bool) *mlrval.M return input3 } if !input1.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError(funcname, input1) } if !input2.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError(funcname, input2) } if !input3.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError(funcname, input3) } if doAll { return mlrval.FromString( @@ -68,13 +68,13 @@ func BIF_sub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { return input3 } if !input1.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("sub", input1) } if !input2.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("sub", input2) } if !input3.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("sub", input3) } input := input1.AcquireStringValue() @@ -98,13 +98,13 @@ func BIF_gsub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { return input3 } if !input1.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("gsub", input1) } if !input2.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("gsub", input2) } if !input3.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("gsub", input3) } input := input1.AcquireStringValue() @@ -126,7 +126,7 @@ func BIF_string_matches_regexp(input1, input2 *mlrval.Mlrval) (retval *mlrval.Ml } input1string := input1.String() if !input2.IsStringOrVoid() { - return mlrval.ERROR, nil + return mlrval.FromNotStringError("=~", input2), nil } boolOutput, captures := lib.RegexMatches(input1string, input2.AcquireStringValue()) @@ -146,10 +146,10 @@ func BIF_string_does_not_match_regexp(input1, input2 *mlrval.Mlrval) (retval *ml func BIF_regextract(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("!=~", input1) } if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("!=~", input2) } regex := lib.CompileMillerRegexOrDie(input2.AcquireStringValue()) match := regex.FindStringIndex(input1.AcquireStringValue()) @@ -162,10 +162,10 @@ func BIF_regextract(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { func BIF_regextract_or_else(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("regextract_or_else", input1) } if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("regextract_or_else", input2) } regex := lib.CompileMillerRegexOrDie(input2.AcquireStringValue()) match := regex.FindStringIndex(input1.AcquireStringValue()) diff --git a/internal/pkg/bifs/relative_time.go b/internal/pkg/bifs/relative_time.go index be07c6301..d6d57b16a 100644 --- a/internal/pkg/bifs/relative_time.go +++ b/internal/pkg/bifs/relative_time.go @@ -10,12 +10,12 @@ import ( func BIF_dhms2sec(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("dhms2sec", input1) } input := input1.String() if input == "" { - return mlrval.ERROR + return mlrval.FromNotStringError("dhms2sec", input1) } negate := false @@ -36,10 +36,10 @@ func BIF_dhms2sec(input1 *mlrval.Mlrval) *mlrval.Mlrval { _, err := fmt.Sscanf(remainingInput, "%d%s", &n, &rest) if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } if len(rest) < 1 { - return mlrval.ERROR + return mlrval.FromErrorString("dhms2sec: input too short") } unitPart := rest[0] remainingInput = rest[1:] @@ -54,7 +54,13 @@ func BIF_dhms2sec(input1 *mlrval.Mlrval) *mlrval.Mlrval { case 's': seconds += n default: - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf( + "dhms2sec(\"%s\"): unrecognized unit '%c'", + input1.OriginalString(), + unitPart, + ), + ) } } if negate { @@ -66,12 +72,12 @@ func BIF_dhms2sec(input1 *mlrval.Mlrval) *mlrval.Mlrval { func BIF_dhms2fsec(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("dhms2fsec", input1) } input := input1.String() if input == "" { - return mlrval.ERROR + return mlrval.FromNotStringError("dhms2fsec", input1) } negate := false @@ -92,10 +98,10 @@ func BIF_dhms2fsec(input1 *mlrval.Mlrval) *mlrval.Mlrval { _, err := fmt.Sscanf(remainingInput, "%f%s", &f, &rest) if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } if len(rest) < 1 { - return mlrval.ERROR + return mlrval.FromErrorString("dhms2fsec: input too short") } unitPart := rest[0] remainingInput = rest[1:] @@ -110,7 +116,13 @@ func BIF_dhms2fsec(input1 *mlrval.Mlrval) *mlrval.Mlrval { case 's': seconds += f default: - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf( + "dhms2fsec(\"%s\"): unrecognized unit '%c'", + input1.OriginalString(), + unitPart, + ), + ) } } if negate { @@ -122,10 +134,10 @@ func BIF_dhms2fsec(input1 *mlrval.Mlrval) *mlrval.Mlrval { func BIF_hms2sec(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("hms2sec", input1) } if input1.AcquireStringValue() == "" { - return mlrval.ERROR + return mlrval.FromNotStringError("hms2sec", input1) } var h, m, s int64 @@ -141,12 +153,14 @@ func BIF_hms2sec(input1 *mlrval.Mlrval) *mlrval.Mlrval { } } - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf("hsm2sec: could not parse input \"%s\"", input1.OriginalString()), + ) } func BIF_hms2fsec(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsString() { - return mlrval.ERROR + return mlrval.FromNotStringError("hms2fsec", input1) } var h, m int @@ -164,13 +178,15 @@ func BIF_hms2fsec(input1 *mlrval.Mlrval) *mlrval.Mlrval { } } - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf("hsm2fsec: could not parse input \"%s\"", input1.OriginalString()), + ) } func BIF_sec2dhms(input1 *mlrval.Mlrval) *mlrval.Mlrval { isec, ok := input1.GetIntValue() if !ok { - return mlrval.ERROR + return mlrval.FromNotIntError("sec2dhms", input1) } var d, h, m, s int64 @@ -198,7 +214,7 @@ func BIF_sec2dhms(input1 *mlrval.Mlrval) *mlrval.Mlrval { func BIF_sec2hms(input1 *mlrval.Mlrval) *mlrval.Mlrval { isec, ok := input1.GetIntValue() if !ok { - return mlrval.ERROR + return mlrval.FromNotIntError("sec2hms", input1) } sign := "" if isec < 0 { @@ -219,7 +235,7 @@ func BIF_sec2hms(input1 *mlrval.Mlrval) *mlrval.Mlrval { func BIF_fsec2dhms(input1 *mlrval.Mlrval) *mlrval.Mlrval { fsec, ok := input1.GetNumericToFloatValue() if !ok { - return mlrval.ERROR + return mlrval.FromNotIntError("fsec2dhms", input1) } sign := int64(1) @@ -269,7 +285,7 @@ func BIF_fsec2dhms(input1 *mlrval.Mlrval) *mlrval.Mlrval { func BIF_fsec2hms(input1 *mlrval.Mlrval) *mlrval.Mlrval { fsec, ok := input1.GetNumericToFloatValue() if !ok { - return mlrval.ERROR + return mlrval.FromNotIntError("fsec2hms", input1) } sign := "" diff --git a/internal/pkg/bifs/stats.go b/internal/pkg/bifs/stats.go index 99e1e0ccd..c809f7167 100644 --- a/internal/pkg/bifs/stats.go +++ b/internal/pkg/bifs/stats.go @@ -156,7 +156,7 @@ func BIF_finalize_kurtosis(mn, msum, msum2, msum3, msum4 *mlrval.Mlrval) *mlrval // This is a helper function for BIFs which operate only on array or map. // It shorthands what values to return for non-collection inputs. -func check_collection(c *mlrval.Mlrval) (bool, *mlrval.Mlrval) { +func check_collection(c *mlrval.Mlrval, funcname string) (bool, *mlrval.Mlrval) { vtype := c.Type() switch vtype { case mlrval.MT_ARRAY: @@ -165,8 +165,10 @@ func check_collection(c *mlrval.Mlrval) (bool, *mlrval.Mlrval) { return true, c case mlrval.MT_ABSENT: return false, mlrval.ABSENT + case mlrval.MT_ERROR: + return false, c default: - return false, mlrval.ERROR + return false, mlrval.FromNotCollectionError(funcname, c) } } @@ -186,7 +188,7 @@ func collection_sum_of_function( } func BIF_count(collection *mlrval.Mlrval) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, "count") if !ok { return value_if_not } @@ -200,7 +202,7 @@ func BIF_count(collection *mlrval.Mlrval) *mlrval.Mlrval { } func BIF_null_count(collection *mlrval.Mlrval) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, "null_count") if !ok { return value_if_not } @@ -221,7 +223,7 @@ func BIF_null_count(collection *mlrval.Mlrval) *mlrval.Mlrval { } func BIF_distinct_count(collection *mlrval.Mlrval) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, "distinct_count") if !ok { return value_if_not } @@ -243,18 +245,19 @@ func BIF_distinct_count(collection *mlrval.Mlrval) *mlrval.Mlrval { } func BIF_mode(collection *mlrval.Mlrval) *mlrval.Mlrval { - return bif_mode_or_antimode(collection, func(a, b int) bool { return a > b }) + return bif_mode_or_antimode(collection, "mode", func(a, b int) bool { return a > b }) } func BIF_antimode(collection *mlrval.Mlrval) *mlrval.Mlrval { - return bif_mode_or_antimode(collection, func(a, b int) bool { return a < b }) + return bif_mode_or_antimode(collection, "antimode", func(a, b int) bool { return a < b }) } func bif_mode_or_antimode( collection *mlrval.Mlrval, + funcname string, cmp func(int, int) bool, ) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, funcname) if !ok { return value_if_not } @@ -316,7 +319,7 @@ func bif_mode_or_antimode( } func BIF_sum(collection *mlrval.Mlrval) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, "sum") if !ok { return value_if_not } @@ -329,7 +332,7 @@ func BIF_sum(collection *mlrval.Mlrval) *mlrval.Mlrval { } func BIF_sum2(collection *mlrval.Mlrval) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, "sum2") if !ok { return value_if_not } @@ -340,7 +343,7 @@ func BIF_sum2(collection *mlrval.Mlrval) *mlrval.Mlrval { } func BIF_sum3(collection *mlrval.Mlrval) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, "sum3") if !ok { return value_if_not } @@ -351,7 +354,7 @@ func BIF_sum3(collection *mlrval.Mlrval) *mlrval.Mlrval { } func BIF_sum4(collection *mlrval.Mlrval) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, "sum4") if !ok { return value_if_not } @@ -363,7 +366,7 @@ func BIF_sum4(collection *mlrval.Mlrval) *mlrval.Mlrval { } func BIF_mean(collection *mlrval.Mlrval) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, "mean") if !ok { return value_if_not } @@ -376,7 +379,7 @@ func BIF_mean(collection *mlrval.Mlrval) *mlrval.Mlrval { } func BIF_meaneb(collection *mlrval.Mlrval) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, "meaneb") if !ok { return value_if_not } @@ -387,7 +390,7 @@ func BIF_meaneb(collection *mlrval.Mlrval) *mlrval.Mlrval { } func BIF_variance(collection *mlrval.Mlrval) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, "variance") if !ok { return value_if_not } @@ -398,7 +401,7 @@ func BIF_variance(collection *mlrval.Mlrval) *mlrval.Mlrval { } func BIF_stddev(collection *mlrval.Mlrval) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, "stddev") if !ok { return value_if_not } @@ -409,7 +412,7 @@ func BIF_stddev(collection *mlrval.Mlrval) *mlrval.Mlrval { } func BIF_skewness(collection *mlrval.Mlrval) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, "skewness") if !ok { return value_if_not } @@ -421,7 +424,7 @@ func BIF_skewness(collection *mlrval.Mlrval) *mlrval.Mlrval { } func BIF_kurtosis(collection *mlrval.Mlrval) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, "kurtosis") if !ok { return value_if_not } @@ -434,7 +437,7 @@ func BIF_kurtosis(collection *mlrval.Mlrval) *mlrval.Mlrval { } func BIF_minlen(collection *mlrval.Mlrval) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, "minlen") if !ok { return value_if_not } @@ -446,7 +449,7 @@ func BIF_minlen(collection *mlrval.Mlrval) *mlrval.Mlrval { } func BIF_maxlen(collection *mlrval.Mlrval) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, "maxlen") if !ok { return value_if_not } @@ -458,7 +461,7 @@ func BIF_maxlen(collection *mlrval.Mlrval) *mlrval.Mlrval { } func BIF_sort_collection(collection *mlrval.Mlrval) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + ok, value_if_not := check_collection(collection, "sort_collection") if !ok { return value_if_not } @@ -492,21 +495,21 @@ func BIF_sort_collection(collection *mlrval.Mlrval) *mlrval.Mlrval { func BIF_median( collection *mlrval.Mlrval, ) *mlrval.Mlrval { - return BIF_percentile(collection, mlrval.FromFloat(50.0)) + return bif_percentile_with_options_aux(collection, mlrval.FromFloat(50.0), nil, "median") } func BIF_median_with_options( collection *mlrval.Mlrval, options *mlrval.Mlrval, ) *mlrval.Mlrval { - return BIF_percentile_with_options(collection, mlrval.FromFloat(50.0), options) + return bif_percentile_with_options_aux(collection, mlrval.FromFloat(50.0), options, "median") } func BIF_percentile( collection *mlrval.Mlrval, percentile *mlrval.Mlrval, ) *mlrval.Mlrval { - return BIF_percentile_with_options(collection, percentile, nil) + return bif_percentile_with_options_aux(collection, percentile, nil, "percentile") } func BIF_percentile_with_options( @@ -514,16 +517,14 @@ func BIF_percentile_with_options( percentile *mlrval.Mlrval, options *mlrval.Mlrval, ) *mlrval.Mlrval { - percentiles := mlrval.FromSingletonArray(percentile) - outputs := BIF_percentiles_with_options(collection, percentiles, options) - return outputs.AcquireMapValue().Head.Value + return bif_percentile_with_options_aux(collection, percentile, options, "percentile") } func BIF_percentiles( collection *mlrval.Mlrval, percentiles *mlrval.Mlrval, ) *mlrval.Mlrval { - return BIF_percentiles_with_options(collection, percentiles, nil) + return bif_percentiles_with_options_aux(collection, percentiles, nil, "percentiles") } func BIF_percentiles_with_options( @@ -531,7 +532,34 @@ func BIF_percentiles_with_options( percentiles *mlrval.Mlrval, options *mlrval.Mlrval, ) *mlrval.Mlrval { - ok, value_if_not := check_collection(collection) + return bif_percentiles_with_options_aux(collection, percentiles, options, "percentiles") +} + +func bif_percentile_with_options_aux( + collection *mlrval.Mlrval, + percentile *mlrval.Mlrval, + options *mlrval.Mlrval, + funcname string, +) *mlrval.Mlrval { + percentiles := mlrval.FromSingletonArray(percentile) + outputs := bif_percentiles_with_options_aux(collection, percentiles, options, funcname) + + // Check for error/absent returns from the main impl body + ok, value_if_not := check_collection(outputs, funcname) + if !ok { + return value_if_not + } + + return outputs.AcquireMapValue().Head.Value +} + +func bif_percentiles_with_options_aux( + collection *mlrval.Mlrval, + percentiles *mlrval.Mlrval, + options *mlrval.Mlrval, + funcname string, +) *mlrval.Mlrval { + ok, value_if_not := check_collection(collection, funcname) if !ok { return value_if_not } @@ -543,7 +571,7 @@ func BIF_percentiles_with_options( if options != nil { om := options.GetMap() if om == nil { // not a map - return mlrval.ERROR + return type_error_named_argument(funcname, "map", "options", options) } for pe := om.Head; pe != nil; pe = pe.Next { if pe.Key == "array_is_sorted" || pe.Key == "ais" { @@ -552,7 +580,7 @@ func BIF_percentiles_with_options( } else if mlrval.Equals(pe.Value, mlrval.FALSE) { array_is_sorted = false } else { - return mlrval.ERROR + return type_error_named_argument(funcname, "boolean", pe.Key, pe.Value) } } else if pe.Key == "interpolate_linearly" || pe.Key == "il" { if mlrval.Equals(pe.Value, mlrval.TRUE) { @@ -560,7 +588,7 @@ func BIF_percentiles_with_options( } else if mlrval.Equals(pe.Value, mlrval.FALSE) { interpolate_linearly = false } else { - return mlrval.ERROR + return type_error_named_argument(funcname, "boolean", pe.Key, pe.Value) } } else if pe.Key == "output_array_not_map" || pe.Key == "oa" { if mlrval.Equals(pe.Value, mlrval.TRUE) { @@ -568,7 +596,7 @@ func BIF_percentiles_with_options( } else if mlrval.Equals(pe.Value, mlrval.FALSE) { output_array_not_map = false } else { - return mlrval.ERROR + return type_error_named_argument(funcname, "boolean", pe.Key, pe.Value) } } } @@ -577,31 +605,33 @@ func BIF_percentiles_with_options( var sorted_array *mlrval.Mlrval if array_is_sorted { if !collection.IsArray() { - return mlrval.ERROR + return mlrval.FromNotArrayError(funcname+" collection", collection) } sorted_array = collection } else { sorted_array = BIF_sort_collection(collection) } - return bif_percentiles( + return bif_percentiles_impl( sorted_array.AcquireArrayValue(), percentiles, interpolate_linearly, output_array_not_map, + funcname, ) } -func bif_percentiles( +func bif_percentiles_impl( sorted_array []*mlrval.Mlrval, percentiles *mlrval.Mlrval, interpolate_linearly bool, output_array_not_map bool, + funcname string, ) *mlrval.Mlrval { ps := percentiles.GetArray() if ps == nil { // not an array - return mlrval.ERROR + return mlrval.FromNotArrayError(funcname+" percentiles", percentiles) } outputs := make([]*mlrval.Mlrval, len(ps)) @@ -609,7 +639,7 @@ func bif_percentiles( for i, _ := range ps { p, ok := ps[i].GetNumericToFloatValue() if !ok { - outputs[i] = mlrval.ERROR.Copy() + outputs[i] = type_error_named_argument(funcname, "numeric", "percentile", ps[i]) } else if len(sorted_array) == 0 { outputs[i] = mlrval.VOID } else { diff --git a/internal/pkg/bifs/stats_test.go b/internal/pkg/bifs/stats_test.go index 0d1276ba1..3fc21c84a 100644 --- a/internal/pkg/bifs/stats_test.go +++ b/internal/pkg/bifs/stats_test.go @@ -72,7 +72,7 @@ func TestBIF_null_count(t *testing.T) { mlrval.FromInt(1), mlrval.FromString("two"), mlrval.FromString(""), // this counts - mlrval.ERROR, + mlrval.FromAnonymousError(), mlrval.ABSENT, mlrval.NULL, // this counts }) diff --git a/internal/pkg/bifs/strings.go b/internal/pkg/bifs/strings.go index 6e1c76511..7ef8019a4 100644 --- a/internal/pkg/bifs/strings.go +++ b/internal/pkg/bifs/strings.go @@ -2,6 +2,7 @@ package bifs import ( "bytes" + "fmt" "regexp" "strconv" "strings" @@ -13,7 +14,7 @@ import ( // ================================================================ func BIF_strlen(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("strlen", input1) } else { return mlrval.FromInt(lib.UTF8Strlen(input1.AcquireStringValue())) } @@ -42,19 +43,23 @@ func dot_s_xx(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromString(input1.String() + input2.String()) } +func dot_te(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary(".", input1, input2) +} + var dot_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {dot_s_xx, dot_s_xx, dot_s_xx, _s1__, dot_s_xx, _erro, _erro, _erro, _erro, _1___, _s1__}, - /*FLOAT */ {dot_s_xx, dot_s_xx, dot_s_xx, _s1__, dot_s_xx, _erro, _erro, _erro, _erro, _1___, _s1__}, - /*BOOL */ {dot_s_xx, dot_s_xx, dot_s_xx, _s1__, dot_s_xx, _erro, _erro, _erro, _erro, _1___, _s1__}, - /*VOID */ {_s2__, _s2__, _s2__, _void, _2___, _absn, _absn, _erro, _erro, _void, _void}, - /*STRING */ {dot_s_xx, dot_s_xx, dot_s_xx, _1___, dot_s_xx, _erro, _erro, _erro, _erro, _1___, _1___}, - /*ARRAY */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*MAP */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _absn, _absn, _erro, _erro, _erro, _erro}, - /*NULL */ {_s2__, _s2__, _s2__, _void, _2___, _absn, _absn, _erro, _erro, _null, _null}, - /*ABSENT */ {_s2__, _s2__, _s2__, _void, _2___, _absn, _absn, _erro, _erro, _null, _absn}, + /*INT */ {dot_s_xx, dot_s_xx, dot_s_xx, _s1__, dot_s_xx, dot_te, dot_te, dot_te, dot_te, _1___, _s1__}, + /*FLOAT */ {dot_s_xx, dot_s_xx, dot_s_xx, _s1__, dot_s_xx, dot_te, dot_te, dot_te, dot_te, _1___, _s1__}, + /*BOOL */ {dot_s_xx, dot_s_xx, dot_s_xx, _s1__, dot_s_xx, dot_te, dot_te, dot_te, dot_te, _1___, _s1__}, + /*VOID */ {_s2__, _s2__, _s2__, _void, _2___, _absn, _absn, dot_te, dot_te, _void, _void}, + /*STRING */ {dot_s_xx, dot_s_xx, dot_s_xx, _1___, dot_s_xx, dot_te, dot_te, dot_te, dot_te, _1___, _1___}, + /*ARRAY */ {dot_te, dot_te, dot_te, dot_te, dot_te, dot_te, dot_te, dot_te, dot_te, dot_te, dot_te}, + /*MAP */ {dot_te, dot_te, dot_te, dot_te, dot_te, dot_te, dot_te, dot_te, dot_te, dot_te, dot_te}, + /*FUNC */ {dot_te, dot_te, dot_te, dot_te, dot_te, dot_te, dot_te, dot_te, dot_te, dot_te, dot_te}, + /*ERROR */ {dot_te, dot_te, dot_te, dot_te, dot_te, _absn, _absn, dot_te, dot_te, dot_te, dot_te}, + /*NULL */ {_s2__, _s2__, _s2__, _void, _2___, _absn, _absn, dot_te, dot_te, _null, _null}, + /*ABSENT */ {_s2__, _s2__, _s2__, _void, _2___, _absn, _absn, dot_te, dot_te, _null, _absn}, } func BIF_dot(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -70,7 +75,7 @@ func BIF_substr_1_up(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.ABSENT } if input1.IsError() { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("substr1", input1) } sinput := input1.String() @@ -102,7 +107,7 @@ func BIF_substr_0_up(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.ABSENT } if input1.IsError() { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("substr0", input1) } sinput := input1.String() @@ -134,7 +139,7 @@ func BIF_index(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.ABSENT } if input1.IsError() { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("index", input1) } sinput1 := input1.String() sinput2 := input2.String() @@ -157,7 +162,7 @@ func BIF_contains(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.ABSENT } if input1.IsError() { - return mlrval.ERROR + return input1 } return mlrval.FromBool(strings.Contains(input1.String(), input2.String())) @@ -172,13 +177,13 @@ func BIF_truncate(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return input2 } if !input1.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("truncate", input1) } if !input2.IsInt() { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("truncate", input2) } if input2.AcquireIntValue() < 0 { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("truncate", input2) } // Handle UTF-8 correctly: len(input1.AcquireStringValue()) will count bytes, not runes. @@ -205,7 +210,7 @@ func BIF_leftpad(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { } if !input2.IsInt() { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("leftpad", input2) } inputString := input1.String() @@ -238,7 +243,7 @@ func BIF_rightpad(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { } if !input2.IsInt() { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("rightpad", input2) } inputString := input1.String() @@ -353,7 +358,7 @@ func BIF_format(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval { } formatString, ok := mlrvals[0].GetStringValue() if !ok { // not a string - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("format", mlrvals[0]) } pieces := lib.SplitString(formatString, "{}") @@ -405,11 +410,11 @@ func BIF_unformatx(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { func bif_unformat_aux(input1, input2 *mlrval.Mlrval, inferTypes bool) *mlrval.Mlrval { template, ok1 := input1.GetStringValue() if !ok1 { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("unformat", input1) } input, ok2 := input2.GetStringValue() if !ok2 { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("unformat", input2) } templatePieces := strings.Split(template, "{}") @@ -422,7 +427,15 @@ func bif_unformat_aux(input1, input2 *mlrval.Mlrval, inferTypes bool) *mlrval.Ml remaining := input if !strings.HasPrefix(remaining, templatePieces[0]) { - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf( + "unformat(\"%s\", \"%s\"): component \"%s\" lacks prefix \"%s\"", + input1.OriginalString(), + input2.OriginalString(), + remaining, + templatePieces[0], + ), + ) } remaining = remaining[len(templatePieces[0]):] templatePieces = templatePieces[1:] @@ -438,7 +451,15 @@ func bif_unformat_aux(input1, input2 *mlrval.Mlrval, inferTypes bool) *mlrval.Ml } else { index = strings.Index(remaining, templatePiece) if index < 0 { - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf( + "unformat(\"%s\", \"%s\"): component \"%s\" lacks prefix \"%s\"", + input1.OriginalString(), + input2.OriginalString(), + remaining, + templatePiece, + ), + ) } } @@ -466,12 +487,12 @@ func BIF_hexfmt(input1 *mlrval.Mlrval) *mlrval.Mlrval { // ---------------------------------------------------------------- func fmtnum_is(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("fmtnum", input2) } formatString := input2.AcquireStringValue() formatter, err := mlrval.GetFormatter(formatString) if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } return formatter.Format(input1) @@ -479,12 +500,12 @@ func fmtnum_is(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { func fmtnum_fs(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("fmtnum", input2) } formatString := input2.AcquireStringValue() formatter, err := mlrval.GetFormatter(formatString) if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } return formatter.Format(input1) @@ -492,12 +513,12 @@ func fmtnum_fs(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { func fmtnum_bs(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input2.IsString() { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("fmtnum", input2) } formatString := input2.AcquireStringValue() formatter, err := mlrval.GetFormatter(formatString) if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } intMv := mlrval.FromInt(lib.BoolToInt(input1.AcquireBoolValue())) @@ -505,19 +526,23 @@ func fmtnum_bs(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { return formatter.Format(intMv) } +func fmtnum_te(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("fmtnum", input1, input2) +} + var fmtnum_dispositions = [mlrval.MT_DIM][mlrval.MT_DIM]BinaryFunc{ // . INT FLOAT BOOL VOID STRING ARRAY MAP FUNC ERROR NULL ABSENT - /*INT */ {_erro, _erro, _erro, _erro, fmtnum_is, _erro, _erro, _erro, _erro, _erro, _absn}, - /*FLOAT */ {_erro, _erro, _erro, _erro, fmtnum_fs, _erro, _erro, _erro, _erro, _erro, _absn}, - /*BOOL */ {_erro, _erro, _erro, _erro, fmtnum_bs, _erro, _erro, _erro, _erro, _erro, _absn}, - /*VOID */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _absn}, - /*STRING */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _absn}, - /*ARRAY */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _absn}, - /*MAP */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _absn}, - /*FUNC */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*ERROR */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro}, - /*NULL */ {_erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _erro, _absn}, - /*ABSENT */ {_absn, _absn, _erro, _absn, _absn, _erro, _erro, _erro, _erro, _absn, _absn}, + /*INT */ {fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_is, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, _absn}, + /*FLOAT */ {fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_fs, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, _absn}, + /*BOOL */ {fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_bs, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, _absn}, + /*VOID */ {fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, _absn}, + /*STRING */ {fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, _absn}, + /*ARRAY */ {fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, _absn}, + /*MAP */ {fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, _absn}, + /*FUNC */ {fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te}, + /*ERROR */ {fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te}, + /*NULL */ {fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, _absn}, + /*ABSENT */ {_absn, _absn, fmtnum_te, _absn, _absn, fmtnum_te, fmtnum_te, fmtnum_te, fmtnum_te, _absn, _absn}, } func BIF_fmtnum(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { @@ -549,7 +574,7 @@ func BIF_latin1_to_utf8(input1 *mlrval.Mlrval) *mlrval.Mlrval { if err != nil { // Somewhat arbitrary design decision // return input1 - return mlrval.ERROR + return mlrval.FromError(err) } else { return mlrval.FromString(output) } @@ -566,7 +591,7 @@ func BIF_utf8_to_latin1(input1 *mlrval.Mlrval) *mlrval.Mlrval { if err != nil { // Somewhat arbitrary design decision // return input1 - return mlrval.ERROR + return mlrval.FromError(err) } else { return mlrval.FromString(output) } diff --git a/internal/pkg/bifs/system.go b/internal/pkg/bifs/system.go index 90dedb56f..a3ac73ad4 100644 --- a/internal/pkg/bifs/system.go +++ b/internal/pkg/bifs/system.go @@ -22,7 +22,7 @@ func BIF_os() *mlrval.Mlrval { func BIF_hostname() *mlrval.Mlrval { hostname, err := os.Hostname() if err != nil { - return mlrval.ERROR + return mlrval.FromErrorString("could not retrieve system hostname") } else { return mlrval.FromString(hostname) } @@ -30,7 +30,7 @@ func BIF_hostname() *mlrval.Mlrval { func BIF_system(input1 *mlrval.Mlrval) *mlrval.Mlrval { if !input1.IsStringOrVoid() { - return mlrval.ERROR + return mlrval.FromNotStringError("system", input1) } commandString := input1.AcquireStringValue() @@ -38,7 +38,7 @@ func BIF_system(input1 *mlrval.Mlrval) *mlrval.Mlrval { outputBytes, err := exec.Command(shellRunArray[0], shellRunArray[1:]...).Output() if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } outputString := strings.TrimRight(string(outputBytes), "\n") @@ -48,7 +48,7 @@ func BIF_system(input1 *mlrval.Mlrval) *mlrval.Mlrval { func BIF_exec(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval { if len(mlrvals) == 0 { - return mlrval.ERROR + return mlrval.FromErrorString("exec: zero-length input given") } cmd := exec.Command(mlrvals[0].String()) @@ -96,7 +96,7 @@ func BIF_exec(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval { } if err != nil { - return mlrval.ERROR + return mlrval.FromError(err) } outputString := strings.TrimRight(string(outputBytes), "\n") diff --git a/internal/pkg/bifs/types.go b/internal/pkg/bifs/types.go index b876aa780..b57e0dc84 100644 --- a/internal/pkg/bifs/types.go +++ b/internal/pkg/bifs/types.go @@ -21,7 +21,14 @@ func string_to_int(input1 *mlrval.Mlrval) *mlrval.Mlrval { if ok { return mlrval.FromInt(i) } else { - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf( + "%s: unacceptable value %s with type %s", + "int", + input1.StringMaybeQuoted(), + input1.GetTypeName(), + ), + ) } } @@ -37,16 +44,20 @@ func bool_to_int(input1 *mlrval.Mlrval) *mlrval.Mlrval { } } +func to_int_te(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorUnary("int", input1) +} + var to_int_dispositions = [mlrval.MT_DIM]UnaryFunc{ /*INT */ _1u___, /*FLOAT */ float_to_int, /*BOOL */ bool_to_int, /*VOID */ _void1, /*STRING */ string_to_int, - /*ARRAY */ _erro1, - /*MAP */ _erro1, - /*FUNC */ _erro1, - /*ERROR */ _erro1, + /*ARRAY */ to_int_te, + /*MAP */ to_int_te, + /*FUNC */ to_int_te, + /*ERROR */ to_int_te, /*NULL */ _null1, /*ABSENT */ _absn1, } @@ -61,7 +72,14 @@ func string_to_int_with_base(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if ok { return mlrval.FromInt(i) } else { - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf( + "%s: unacceptable value %s with type %s", + "int", + input1.StringMaybeQuoted(), + input1.GetTypeName(), + ), + ) } } @@ -81,23 +99,27 @@ func bool_to_int_with_base(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { } } +func to_int_with_base_te(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorBinary("int", input1, input2) +} + var to_int_with_base_dispositions = [mlrval.MT_DIM]BinaryFunc{ /*INT */ int_to_int_with_base, /*FLOAT */ float_to_int_with_base, /*BOOL */ bool_to_int_with_base, /*VOID */ _void, /*STRING */ string_to_int_with_base, - /*ARRAY */ _erro, - /*MAP */ _erro, - /*FUNC */ _erro, - /*ERROR */ _erro, + /*ARRAY */ to_int_with_base_te, + /*MAP */ to_int_with_base_te, + /*FUNC */ to_int_with_base_te, + /*ERROR */ to_int_with_base_te, /*NULL */ _null, /*ABSENT */ _absn, } func BIF_int_with_base(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { if !input2.IsInt() { - return mlrval.ERROR + return mlrval.FromTypeErrorBinary("int", input1, input2) } return to_int_with_base_dispositions[input1.Type()](input1, input2) } @@ -108,7 +130,14 @@ func string_to_float(input1 *mlrval.Mlrval) *mlrval.Mlrval { if ok { return mlrval.FromFloat(f) } else { - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf( + "%s: unacceptable value %s with type %s", + "float", + input1.StringMaybeQuoted(), + input1.GetTypeName(), + ), + ) } } @@ -124,16 +153,20 @@ func bool_to_float(input1 *mlrval.Mlrval) *mlrval.Mlrval { } } +func to_float_te(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorUnary("float", input1) +} + var to_float_dispositions = [mlrval.MT_DIM]UnaryFunc{ /*INT */ int_to_float, /*FLOAT */ _1u___, /*BOOL */ bool_to_float, /*VOID */ _void1, /*STRING */ string_to_float, - /*ARRAY */ _erro1, - /*MAP */ _erro1, - /*FUNC */ _erro1, - /*ERROR */ _erro1, + /*ARRAY */ to_float_te, + /*MAP */ to_float_te, + /*FUNC */ to_float_te, + /*ERROR */ to_float_te, /*NULL */ _null1, /*ABSENT */ _absn1, } @@ -148,7 +181,14 @@ func string_to_boolean(input1 *mlrval.Mlrval) *mlrval.Mlrval { if ok { return mlrval.FromBool(b) } else { - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf( + "%s: unacceptable value %s with type %s", + "boolean", + input1.StringMaybeQuoted(), + input1.GetTypeName(), + ), + ) } } @@ -160,16 +200,20 @@ func float_to_bool(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromBool(input1.AcquireFloatValue() != 0.0) } +func to_boolean_te(input1 *mlrval.Mlrval) *mlrval.Mlrval { + return mlrval.FromTypeErrorUnary("boolean", input1) +} + var to_boolean_dispositions = [mlrval.MT_DIM]UnaryFunc{ /*INT */ int_to_bool, /*FLOAT */ float_to_bool, /*BOOL */ _1u___, /*VOID */ _void1, /*STRING */ string_to_boolean, - /*ARRAY */ _erro1, - /*MAP */ _erro1, - /*FUNC */ _erro1, - /*ERROR */ _erro1, + /*ARRAY */ to_boolean_te, + /*MAP */ to_boolean_te, + /*FUNC */ to_boolean_te, + /*ERROR */ to_boolean_te, /*NULL */ _null1, /*ABSENT */ _absn1, } diff --git a/internal/pkg/cli/option_parse.go b/internal/pkg/cli/option_parse.go index 0ee362f2b..013deb582 100644 --- a/internal/pkg/cli/option_parse.go +++ b/internal/pkg/cli/option_parse.go @@ -2711,6 +2711,15 @@ var MiscFlagSection = FlagSection{ infoPrinter: MiscPrintInfo, flags: []Flag{ + { + name: "-x", + help: "If any record has an error value in it, report it and stop the process. The default is to print the field value as `(error)` and continue.", + parser: func(args []string, argc int, pargi *int, options *TOptions) { + options.WriterOptions.FailOnDataError = true + *pargi += 1 + }, + }, + { name: "-n", help: "Process no input files, nor standard input either. Useful for `mlr put` with `begin`/`end` statements only. (Same as `--from /dev/null`.) Also useful in `mlr -n put -v '...'` for analyzing abstract syntax trees (if that's your thing).", diff --git a/internal/pkg/cli/option_types.go b/internal/pkg/cli/option_types.go index 06f71ad27..b70d4a2f7 100644 --- a/internal/pkg/cli/option_types.go +++ b/internal/pkg/cli/option_types.go @@ -135,6 +135,9 @@ type TWriterOptions struct { // For floating-point numbers: "" means use the Go default. FPOFMT string + + // Fatal the process when error data in a given record is about to be output. + FailOnDataError bool } // ---------------------------------------------------------------- diff --git a/internal/pkg/climain/mlrcli_parse.go b/internal/pkg/climain/mlrcli_parse.go index 31e72408f..e68b5902e 100644 --- a/internal/pkg/climain/mlrcli_parse.go +++ b/internal/pkg/climain/mlrcli_parse.go @@ -261,6 +261,10 @@ func parseCommandLinePassTwo( options.WriterOptions.FPOFMT = mlr_ofmt } + if os.Getenv("MLR_FAIL_ON_DATA_ERROR") != "" { + options.WriterOptions.FailOnDataError = true + } + recordTransformers = make([]transformers.IRecordTransformer, 0) err = nil ignoresInput := false diff --git a/internal/pkg/dsl/cst/builtin_functions.go b/internal/pkg/dsl/cst/builtin_functions.go index 44acdfc0f..dfc1b4596 100644 --- a/internal/pkg/dsl/cst/builtin_functions.go +++ b/internal/pkg/dsl/cst/builtin_functions.go @@ -752,7 +752,7 @@ func (node *LogicalANDOperatorNode) Evaluate( aout := node.a.Evaluate(state) atype := aout.Type() if !(atype == mlrval.MT_ABSENT || atype == mlrval.MT_BOOL) { - return mlrval.ERROR + return mlrval.FromNotNamedTypeError("&&", aout, "absent or boolean") } if atype == mlrval.MT_ABSENT { return mlrval.ABSENT @@ -768,7 +768,7 @@ func (node *LogicalANDOperatorNode) Evaluate( bout := node.b.Evaluate(state) btype := bout.Type() if !(btype == mlrval.MT_ABSENT || btype == mlrval.MT_BOOL) { - return mlrval.ERROR + return mlrval.FromNotNamedTypeError("&&", bout, "absent or boolean") } if btype == mlrval.MT_ABSENT { return mlrval.ABSENT @@ -800,7 +800,7 @@ func (node *LogicalOROperatorNode) Evaluate( aout := node.a.Evaluate(state) atype := aout.Type() if !(atype == mlrval.MT_ABSENT || atype == mlrval.MT_BOOL) { - return mlrval.ERROR + return mlrval.FromNotNamedTypeError("||", aout, "absent or boolean") } if atype == mlrval.MT_ABSENT { return mlrval.ABSENT @@ -816,7 +816,7 @@ func (node *LogicalOROperatorNode) Evaluate( bout := node.b.Evaluate(state) btype := bout.Type() if !(btype == mlrval.MT_ABSENT || btype == mlrval.MT_BOOL) { - return mlrval.ERROR + return mlrval.FromNotNamedTypeError("||", bout, "absent or boolean") } if btype == mlrval.MT_ABSENT { return mlrval.ABSENT @@ -884,7 +884,7 @@ func (node *StandardTernaryOperatorNode) Evaluate( boolValue, isBool := aout.GetBoolValue() if !isBool { - return mlrval.ERROR + return mlrval.FromNotBooleanError("?:", aout) } // Short-circuit: defer evaluation unless needed @@ -908,10 +908,10 @@ func (node *StandardTernaryOperatorNode) Evaluate( func BinaryShortCircuitPlaceholder(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { lib.InternalCodingErrorPanic("Short-circuting was not correctly implemented") - return mlrval.ERROR // not reached + return nil // not reached } func TernaryShortCircuitPlaceholder(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { lib.InternalCodingErrorPanic("Short-circuting was not correctly implemented") - return mlrval.ERROR // not reached + return nil // not reached } diff --git a/internal/pkg/dsl/cst/collections.go b/internal/pkg/dsl/cst/collections.go index bf117b00e..66d63788f 100644 --- a/internal/pkg/dsl/cst/collections.go +++ b/internal/pkg/dsl/cst/collections.go @@ -6,6 +6,8 @@ package cst import ( + "fmt" + "github.com/johnkerl/miller/internal/pkg/bifs" "github.com/johnkerl/miller/internal/pkg/dsl" "github.com/johnkerl/miller/internal/pkg/lib" @@ -97,14 +99,28 @@ func (node *ArrayOrMapIndexAccessNode) Evaluate( } else if baseMlrval.IsStringOrVoid() { mindex, isInt := indexMlrval.GetIntValue() if !isInt { - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf( + "unacceptable non-int index value %s of type %s on base value %s", + indexMlrval.StringMaybeQuoted(), + indexMlrval.GetTypeName(), + baseMlrval.StringMaybeQuoted(), + ), + ) } // Handle UTF-8 correctly: len(input1.printrep) will count bytes, not runes. runes := []rune(baseMlrval.String()) // Miller uses 1-up, and negatively aliased, indexing for strings and arrays. zindex, inBounds := mlrval.UnaliasArrayLengthIndex(len(runes), int(mindex)) if !inBounds { - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf( + "cannot index base string %s of length %d with out-of-bounds index %d", + baseMlrval.StringMaybeQuoted(), + len(runes), + int(mindex), + ), + ) } return mlrval.FromString(string(runes[zindex])) @@ -112,7 +128,13 @@ func (node *ArrayOrMapIndexAccessNode) Evaluate( // For strict mode, absence should be detected on the baseMlrval and indexMlrval evaluators. return mlrval.ABSENT } else { - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf( + "cannot index base value %s of type %s, which is not array, map, or string", + baseMlrval.StringMaybeQuoted(), + baseMlrval.GetTypeName(), + ), + ) } } @@ -171,7 +193,13 @@ func (node *ArraySliceAccessNode) Evaluate( } array := baseMlrval.GetArray() if array == nil { - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf( + "cannot slice base value %s with non-array type %s", + baseMlrval.StringMaybeQuoted(), + baseMlrval.GetTypeName(), + ), + ) } n := len(array) @@ -236,7 +264,7 @@ func (node *PositionalFieldNameNode) Evaluate( index, ok := indexMlrval.GetIntValue() if !ok { - return mlrval.ERROR + return mlrval.FromNotIntError("$[[...]]", indexMlrval) } name, ok := state.Inrec.GetNameAtPositionalIndex(index) @@ -282,7 +310,7 @@ func (node *PositionalFieldValueNode) Evaluate( index, ok := indexMlrval.GetIntValue() if !ok { - return mlrval.ERROR + return mlrval.FromNotIntError("$[[...]]", indexMlrval) } retval := state.Inrec.GetWithPositionalIndex(index) @@ -338,7 +366,7 @@ func (node *ArrayOrMapPositionalNameAccessNode) Evaluate( index, ok := indexMlrval.GetIntValue() if !ok { - return mlrval.ERROR + return mlrval.FromNotIntError("$[[...]]", indexMlrval) } if baseMlrval.IsArray() { @@ -363,7 +391,13 @@ func (node *ArrayOrMapPositionalNameAccessNode) Evaluate( return mlrval.ABSENT } else { - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf( + "cannot index base value %s of type %s, which is not array, map, or string", + baseMlrval.StringMaybeQuoted(), + baseMlrval.GetTypeName(), + ), + ) } } @@ -412,7 +446,7 @@ func (node *ArrayOrMapPositionalValueAccessNode) Evaluate( index, ok := indexMlrval.GetIntValue() if !ok { - return mlrval.ERROR + return mlrval.FromNotIntError("$[[...]]", indexMlrval) } if baseMlrval.IsArray() { @@ -434,7 +468,13 @@ func (node *ArrayOrMapPositionalValueAccessNode) Evaluate( return mlrval.ABSENT } else { - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf( + "cannot index base value %s of type %s, which is not array, map, or string", + baseMlrval.StringMaybeQuoted(), + baseMlrval.GetTypeName(), + ), + ) } } diff --git a/internal/pkg/dsl/cst/env.go b/internal/pkg/dsl/cst/env.go index 24e2647bb..73e37c343 100644 --- a/internal/pkg/dsl/cst/env.go +++ b/internal/pkg/dsl/cst/env.go @@ -41,7 +41,7 @@ func (node *EnvironmentVariableNode) Evaluate( return mlrval.ABSENT.StrictModeCheck(state.StrictMode, "ENV[(absent)]") } if !name.IsString() { - return mlrval.ERROR + return mlrval.FromTypeErrorUnary("ENV[]", name) } return mlrval.FromString(os.Getenv(name.String())) diff --git a/internal/pkg/dsl/cst/hofs.go b/internal/pkg/dsl/cst/hofs.go index e47f32ce1..9257d7f46 100644 --- a/internal/pkg/dsl/cst/hofs.go +++ b/internal/pkg/dsl/cst/hofs.go @@ -207,7 +207,7 @@ func SelectHOF( } else if input1.IsMap() { return selectMap(input1, input2, state) } else { - return mlrval.ERROR + return mlrval.FromNotCollectionError("select", input1) } } @@ -216,9 +216,9 @@ func selectArray( input2 *mlrval.Mlrval, state *runtime.State, ) *mlrval.Mlrval { - inputArray := input1.GetArray() + inputArray, errVal := input1.GetArrayValueOrError("select") if inputArray == nil { // not an array - return mlrval.ERROR + return errVal } isFunctionOrDie(input2, "select") @@ -252,9 +252,9 @@ func selectMap( input2 *mlrval.Mlrval, state *runtime.State, ) *mlrval.Mlrval { - inputMap := input1.GetMap() + inputMap, errVal := input1.GetMapValueOrError("select") if inputMap == nil { // not a map - return mlrval.ERROR + return errVal } isFunctionOrDie(input2, "select") @@ -298,7 +298,7 @@ func ApplyHOF( } else if input1.IsMap() { return applyMap(input1, input2, state) } else { - return mlrval.ERROR + return mlrval.FromNotCollectionError("apply", input1) } } @@ -307,9 +307,9 @@ func applyArray( input2 *mlrval.Mlrval, state *runtime.State, ) *mlrval.Mlrval { - inputArray := input1.GetArray() - if inputArray == nil { // not an array - return mlrval.ERROR + inputArray, errVal := input1.GetArrayValueOrError("apply") + if inputArray == nil { + return errVal } isFunctionOrDie(input2, "apply") @@ -334,9 +334,9 @@ func applyMap( input2 *mlrval.Mlrval, state *runtime.State, ) *mlrval.Mlrval { - inputMap := input1.GetMap() + inputMap, errVal := input1.GetMapValueOrError("apply") if inputMap == nil { // not a map - return mlrval.ERROR + return errVal } isFunctionOrDie(input2, "apply") @@ -369,7 +369,7 @@ func ReduceHOF( } else if input1.IsMap() { return reduceMap(input1, input2, state) } else { - return mlrval.ERROR + return mlrval.FromNotCollectionError("reduce", input1) } } @@ -378,9 +378,9 @@ func reduceArray( input2 *mlrval.Mlrval, state *runtime.State, ) *mlrval.Mlrval { - inputArray := input1.GetArray() - if inputArray == nil { // not an array - return mlrval.ERROR + inputArray, errVal := input1.GetArrayValueOrError("reduce") + if inputArray == nil { + return errVal } isFunctionOrDie(input2, "reduce") @@ -408,9 +408,9 @@ func reduceMap( input2 *mlrval.Mlrval, state *runtime.State, ) *mlrval.Mlrval { - inputMap := input1.GetMap() + inputMap, errVal := input1.GetMapValueOrError("reduce") if inputMap == nil { // not a map - return mlrval.ERROR + return errVal } isFunctionOrDie(input2, "reduce") @@ -449,7 +449,7 @@ func FoldHOF( } else if input1.IsMap() { return foldMap(input1, input2, input3, state) } else { - return mlrval.ERROR + return mlrval.FromNotCollectionError("fold", input1) } } @@ -459,9 +459,9 @@ func foldArray( input3 *mlrval.Mlrval, state *runtime.State, ) *mlrval.Mlrval { - inputArray := input1.GetArray() - if inputArray == nil { // not an array - return mlrval.ERROR + inputArray, errVal := input1.GetArrayValueOrError("fold") + if inputArray == nil { + return errVal } isFunctionOrDie(input2, "fold") @@ -486,9 +486,9 @@ func foldMap( input3 *mlrval.Mlrval, state *runtime.State, ) *mlrval.Mlrval { - inputMap := input1.GetMap() + inputMap, errVal := input1.GetMapValueOrError("fold") if inputMap == nil { // not a map - return mlrval.ERROR + return errVal } isFunctionOrDie(input2, "fold") @@ -528,7 +528,7 @@ func SortHOF( } else if inputs[0].IsMap() { return sortM(inputs[0], "") } else { - return mlrval.ERROR + return mlrval.FromNotCollectionError("sort", inputs[0]) } } else if inputs[1].IsStringOrVoid() { @@ -537,7 +537,7 @@ func SortHOF( } else if inputs[0].IsMap() { return sortM(inputs[0], inputs[1].String()) } else { - return mlrval.ERROR + return mlrval.FromNotCollectionError("sort", inputs[0]) } } else if inputs[1].IsFunction() { @@ -546,7 +546,7 @@ func SortHOF( } else if inputs[0].IsMap() { return sortMF(inputs[0], inputs[1], state) } else { - return mlrval.ERROR + return mlrval.FromNotCollectionError("sort", inputs[0]) } } else { @@ -555,7 +555,9 @@ func SortHOF( ) os.Exit(1) } - return mlrval.ERROR + // Not reached + lib.InternalCodingErrorIf(true) + return nil } // ---------------------------------------------------------------- @@ -600,10 +602,10 @@ func sortA( input1 *mlrval.Mlrval, flags string, ) *mlrval.Mlrval { - if input1.GetArray() == nil { // not an array - return mlrval.ERROR + temp, errVal := input1.GetArrayValueOrError("sort") + if temp == nil { // not an array + return errVal } - output := input1.Copy() // byMapValue is ignored for sorting arrays @@ -677,19 +679,19 @@ func sortM( input1 *mlrval.Mlrval, flags string, ) *mlrval.Mlrval { - inmap := input1.GetMap() - if inmap == nil { // not a map - return mlrval.ERROR + inputMap, errVal := input1.GetMapValueOrError("sort") + if inputMap == nil { // not a map + return errVal } // Get sort-flags, if provided sortType, reverse, byMapValue := decodeSortFlags(flags) // Copy the entries to an array for sorting. - n := inmap.FieldCount + n := inputMap.FieldCount entries := make([]mlrval.MlrmapEntryForArray, n) i := 0 - for pe := inmap.Head; pe != nil; pe = pe.Next { + for pe := inputMap.Head; pe != nil; pe = pe.Next { entries[i].Key = pe.Key entries[i].Value = pe.Value // pointer alias for now until new map at end of this function i++ @@ -838,13 +840,11 @@ func sortAF( input2 *mlrval.Mlrval, state *runtime.State, ) *mlrval.Mlrval { - inputArray := input1.GetArray() + inputArray, errVal := input1.GetArrayValueOrError("select") if inputArray == nil { // not an array - return mlrval.ERROR - } - if !input2.IsFunction() { - return mlrval.ERROR + return errVal } + isFunctionOrDie(input2, "sort") hofSpace := getHOFSpace(input2, 2, "sort", "array") udfCallsite := hofSpace.udfCallsite @@ -881,13 +881,11 @@ func sortMF( input2 *mlrval.Mlrval, state *runtime.State, ) *mlrval.Mlrval { - inputMap := input1.GetMap() + inputMap, errVal := input1.GetMapValueOrError("sort") if inputMap == nil { // not a map - return mlrval.ERROR - } - if !input2.IsFunction() { - return mlrval.ERROR + return errVal } + isFunctionOrDie(input2, "sort") pairsArray := inputMap.ToPairsArray() @@ -936,7 +934,7 @@ func AnyHOF( } else if input1.IsMap() { return anyMap(input1, input2, state) } else { - return mlrval.ERROR + return mlrval.FromNotCollectionError("any", input1) } } @@ -945,9 +943,9 @@ func anyArray( input2 *mlrval.Mlrval, state *runtime.State, ) *mlrval.Mlrval { - inputArray := input1.GetArray() + inputArray, errVal := input1.GetArrayValueOrError("any") if inputArray == nil { // not an array - return mlrval.ERROR + return errVal } isFunctionOrDie(input2, "any") @@ -981,9 +979,9 @@ func anyMap( input2 *mlrval.Mlrval, state *runtime.State, ) *mlrval.Mlrval { - inputMap := input1.GetMap() + inputMap, errVal := input1.GetMapValueOrError("any") if inputMap == nil { // not a map - return mlrval.ERROR + return errVal } isFunctionOrDie(input2, "any") @@ -1028,7 +1026,7 @@ func EveryHOF( } else if input1.IsMap() { return everyMap(input1, input2, state) } else { - return mlrval.ERROR + return mlrval.FromNotCollectionError("every", input1) } } @@ -1037,9 +1035,9 @@ func everyArray( input2 *mlrval.Mlrval, state *runtime.State, ) *mlrval.Mlrval { - inputArray := input1.GetArray() + inputArray, errVal := input1.GetArrayValueOrError("every") if inputArray == nil { // not an array - return mlrval.ERROR + return errVal } isFunctionOrDie(input2, "every") @@ -1073,9 +1071,9 @@ func everyMap( input2 *mlrval.Mlrval, state *runtime.State, ) *mlrval.Mlrval { - inputMap := input1.GetMap() + inputMap, errVal := input1.GetMapValueOrError("every") if inputMap == nil { // not a map - return mlrval.ERROR + return errVal } isFunctionOrDie(input2, "every") diff --git a/internal/pkg/dsl/cst/udf.go b/internal/pkg/dsl/cst/udf.go index aafc2bd1b..83c1a5b09 100644 --- a/internal/pkg/dsl/cst/udf.go +++ b/internal/pkg/dsl/cst/udf.go @@ -250,12 +250,12 @@ func (site *UDFCallsite) EvaluateWithArguments( // being MT_ERROR should be mapped to MT_ERROR here (nominally, // data-dependent). But error-return could be something not data-dependent. if err != nil { - err = udf.signature.typeGatedReturnValue.Check(mlrval.ERROR) - if err != nil { - fmt.Fprint(os.Stderr, err) + err2 := udf.signature.typeGatedReturnValue.Check(mlrval.FromError(err)) + if err2 != nil { + fmt.Fprint(os.Stderr, err2) os.Exit(1) } - return mlrval.ERROR + return mlrval.FromError(err) } // Fell off end of function with no return diff --git a/internal/pkg/lib/logger.go b/internal/pkg/lib/logger.go index 31876cb91..5868dccf5 100644 --- a/internal/pkg/lib/logger.go +++ b/internal/pkg/lib/logger.go @@ -31,9 +31,11 @@ func InternalCodingErrorIf(condition bool) { "(unknown)", ) } - // Uncomment this and re-run if you want to get a stack trace to get the + // Use this and re-run if you want to get a stack trace to get the // call-tree that led to the indicated file/line: - // panic("eek") + if os.Getenv("MLR_PANIC_ON_INTERNAL_ERROR") != "" { + panic("Here is the stack trace") + } os.Exit(1) } @@ -61,9 +63,11 @@ func InternalCodingErrorWithMessageIf(condition bool, message string) { message, ) } - // Uncomment this and re-run if you want to get a stack trace to get the + // use this and re-run if you want to get a stack trace to get the // call-tree that led to the indicated file/line: - // panic("eek") + if os.Getenv("MLR_PANIC_ON_INTERNAL_ERROR") != "" { + panic("Here is the stack trace") + } os.Exit(1) } diff --git a/internal/pkg/mlrval/mlrmap_json.go b/internal/pkg/mlrval/mlrmap_json.go index 7b2628ed7..726c38fb7 100644 --- a/internal/pkg/mlrval/mlrmap_json.go +++ b/internal/pkg/mlrval/mlrmap_json.go @@ -153,7 +153,7 @@ func (entry *MlrmapEntry) JSONStringifyInPlace( ) { outputBytes, err := entry.Value.MarshalJSON(jsonFormatting, false) if err != nil { - entry.Value = ERROR + entry.Value = FromError(err) } else { entry.Value = FromString(string(outputBytes)) } @@ -165,7 +165,7 @@ func (entry *MlrmapEntry) JSONParseInPlace() { input := entry.Value.String() err := entry.Value.UnmarshalJSON([]byte(input)) if err != nil { - entry.Value = ERROR + entry.Value = FromError(err) } } diff --git a/internal/pkg/mlrval/mlrval_cmp_test.go b/internal/pkg/mlrval/mlrval_cmp_test.go index 0700f8715..151083449 100644 --- a/internal/pkg/mlrval/mlrval_cmp_test.go +++ b/internal/pkg/mlrval/mlrval_cmp_test.go @@ -34,7 +34,7 @@ var orderedMlrvals = []*Mlrval{ // FromMap(NewMlrmap()), // TODO: - ERROR, + FromErrorString("error text goes here"), NULL, ABSENT, } diff --git a/internal/pkg/mlrval/mlrval_collections.go b/internal/pkg/mlrval/mlrval_collections.go index 5e009aff2..1e33b552b 100644 --- a/internal/pkg/mlrval/mlrval_collections.go +++ b/internal/pkg/mlrval/mlrval_collections.go @@ -81,10 +81,10 @@ import ( // TODO: copy-reduction refactor func (mv *Mlrval) ArrayGet(mindex *Mlrval) Mlrval { if !mv.IsArray() { - return *ERROR + return *FromNotArrayError("array [] base", mv) } if !mindex.IsInt() { - return *ERROR + return *FromNotIntError("array [] index", mindex) } arrayval := mv.intf.([]*Mlrval) value := arrayGetAliased(&arrayval, int(mindex.intf.(int64))) @@ -223,12 +223,12 @@ func (mv *Mlrval) ArrayAppend(value *Mlrval) { // ================================================================ func (mv *Mlrval) MapGet(key *Mlrval) Mlrval { if !mv.IsMap() { - return *ERROR + return *FromNotMapError("map[]", mv) } mval, err := mv.intf.(*Mlrmap).GetWithMlrvalIndex(key) - if err != nil { // xxx maybe error-return in the API - return *ERROR + if err != nil { + return *FromError(err) } if mval == nil { return *ABSENT diff --git a/internal/pkg/mlrval/mlrval_constants.go b/internal/pkg/mlrval/mlrval_constants.go index 3cc09d98e..d54e119c8 100644 --- a/internal/pkg/mlrval/mlrval_constants.go +++ b/internal/pkg/mlrval/mlrval_constants.go @@ -41,12 +41,6 @@ var VOID = &Mlrval{ printrepValid: true, } -var ERROR = &Mlrval{ - mvtype: MT_ERROR, - printrep: ERROR_PRINTREP, - printrepValid: true, -} - var NULL = &Mlrval{ mvtype: MT_NULL, printrep: "null", diff --git a/internal/pkg/mlrval/mlrval_get.go b/internal/pkg/mlrval/mlrval_get.go index 68d15e866..c88d8d36a 100644 --- a/internal/pkg/mlrval/mlrval_get.go +++ b/internal/pkg/mlrval/mlrval_get.go @@ -23,6 +23,14 @@ func (mv *Mlrval) GetStringValue() (stringValue string, isString bool) { } } +func (mv *Mlrval) GetStringValueOrError(funcname string) (stringValue string, errValue *Mlrval) { + if mv.Type() == MT_STRING || mv.Type() == MT_VOID { + return mv.printrep, nil + } else { + return "", FromNotStringError(funcname, mv) + } +} + func (mv *Mlrval) GetIntValue() (intValue int64, isInt bool) { if mv.Type() == MT_INT { return mv.intf.(int64), true @@ -31,6 +39,14 @@ func (mv *Mlrval) GetIntValue() (intValue int64, isInt bool) { } } +func (mv *Mlrval) GetIntValueOrError(funcname string) (intValue int64, errValue *Mlrval) { + if mv.Type() == MT_INT { + return mv.intf.(int64), nil + } else { + return -999, FromNotIntError(funcname, mv) + } +} + func (mv *Mlrval) GetFloatValue() (floatValue float64, isFloat bool) { if mv.Type() == MT_FLOAT { return mv.intf.(float64), true @@ -49,6 +65,16 @@ func (mv *Mlrval) GetNumericToFloatValue() (floatValue float64, isFloat bool) { } } +func (mv *Mlrval) GetNumericToFloatValueOrError(funcname string) (floatValue float64, errValue *Mlrval) { + if mv.Type() == MT_FLOAT { + return mv.intf.(float64), nil + } else if mv.Type() == MT_INT { + return float64(mv.intf.(int64)), nil + } else { + return -888.0, FromNotNumericError(funcname, mv) + } +} + func (mv *Mlrval) GetNumericNegativeorDie() bool { floatValue, ok := mv.GetNumericToFloatValue() lib.InternalCodingErrorIf(!ok) @@ -71,6 +97,14 @@ func (mv *Mlrval) GetArray() []*Mlrval { } } +func (mv *Mlrval) GetArrayValueOrError(funcname string) (ok []*Mlrval, errValue *Mlrval) { + if mv.IsArray() { + return mv.intf.([]*Mlrval), nil + } else { + return nil, FromNotArrayError(funcname, mv) + } +} + func (mv *Mlrval) GetMap() *Mlrmap { if mv.IsMap() { return mv.intf.(*Mlrmap) @@ -79,6 +113,14 @@ func (mv *Mlrval) GetMap() *Mlrmap { } } +func (mv *Mlrval) GetMapValueOrError(funcname string) (ok *Mlrmap, errValue *Mlrval) { + if mv.IsMap() { + return mv.intf.(*Mlrmap), nil + } else { + return nil, FromNotMapError(funcname, mv) + } +} + func (mv *Mlrval) GetFunction() interface{} { if mv.Type() == MT_FUNC { return mv.intf diff --git a/internal/pkg/mlrval/mlrval_is.go b/internal/pkg/mlrval/mlrval_is.go index 900b0e985..d1593776e 100644 --- a/internal/pkg/mlrval/mlrval_is.go +++ b/internal/pkg/mlrval/mlrval_is.go @@ -23,6 +23,14 @@ func (mv *Mlrval) IsError() bool { return mv.Type() == MT_ERROR } +func (mv *Mlrval) GetError() (bool, error) { + if mv.Type() == MT_ERROR { + return true, mv.err + } else { + return false, nil + } +} + // TODO: comment no JIT-infer here -- absent is non-inferrable and we needn't take the expense of JIT. func (mv *Mlrval) IsAbsent() bool { return mv.mvtype == MT_ABSENT diff --git a/internal/pkg/mlrval/mlrval_is_test.go b/internal/pkg/mlrval/mlrval_is_test.go index 47c14fffe..d3f183403 100644 --- a/internal/pkg/mlrval/mlrval_is_test.go +++ b/internal/pkg/mlrval/mlrval_is_test.go @@ -11,7 +11,7 @@ import ( ) func TestIsLegit(t *testing.T) { - assert.False(t, ERROR.IsLegit()) + assert.False(t, FromErrorString("foo").IsLegit()) assert.False(t, ABSENT.IsLegit()) assert.False(t, NULL.IsLegit()) assert.True(t, FromString("").IsLegit()) @@ -24,35 +24,35 @@ func TestIsLegit(t *testing.T) { } func TestIsErrorOrAbsent(t *testing.T) { - assert.True(t, ERROR.IsErrorOrAbsent()) + assert.True(t, FromErrorString("foo").IsErrorOrAbsent()) assert.True(t, ABSENT.IsErrorOrAbsent()) assert.False(t, NULL.IsErrorOrAbsent()) assert.False(t, FromString("").IsErrorOrAbsent()) } func TestIsError(t *testing.T) { - assert.True(t, ERROR.IsError()) + assert.True(t, FromErrorString("foo").IsError()) assert.False(t, ABSENT.IsError()) assert.False(t, NULL.IsError()) assert.False(t, FromString("").IsError()) } func TestIsAbsent(t *testing.T) { - assert.False(t, ERROR.IsAbsent()) + assert.False(t, FromErrorString("foo").IsAbsent()) assert.True(t, ABSENT.IsAbsent()) assert.False(t, NULL.IsAbsent()) assert.False(t, FromString("").IsAbsent()) } func TestIsNull(t *testing.T) { - assert.False(t, ERROR.IsNull()) + assert.False(t, FromErrorString("foo").IsNull()) assert.False(t, ABSENT.IsNull()) assert.True(t, NULL.IsNull()) assert.False(t, FromString("").IsNull()) } func TestIsVoid(t *testing.T) { - assert.False(t, ERROR.IsVoid()) + assert.False(t, FromErrorString("foo").IsVoid()) assert.False(t, ABSENT.IsVoid()) assert.False(t, NULL.IsVoid()) assert.True(t, FromString("").IsVoid()) @@ -63,7 +63,7 @@ func TestIsVoid(t *testing.T) { } func TestIsEmptyString(t *testing.T) { - assert.False(t, ERROR.IsEmptyString()) + assert.False(t, FromErrorString("foo").IsEmptyString()) assert.False(t, ABSENT.IsEmptyString()) assert.False(t, NULL.IsEmptyString()) assert.True(t, FromString("").IsEmptyString()) @@ -74,7 +74,7 @@ func TestIsEmptyString(t *testing.T) { } func TestIsString(t *testing.T) { - assert.False(t, ERROR.IsString()) + assert.False(t, FromErrorString("foo").IsString()) assert.False(t, ABSENT.IsString()) assert.False(t, NULL.IsString()) assert.False(t, FromString("").IsString()) @@ -89,7 +89,7 @@ func TestIsString(t *testing.T) { } func TestIsStringOrVoid(t *testing.T) { - assert.False(t, ERROR.IsStringOrVoid()) + assert.False(t, FromErrorString("foo").IsStringOrVoid()) assert.False(t, ABSENT.IsStringOrVoid()) assert.False(t, NULL.IsStringOrVoid()) assert.True(t, FromString("").IsStringOrVoid()) diff --git a/internal/pkg/mlrval/mlrval_new.go b/internal/pkg/mlrval/mlrval_new.go index a46bc73a0..474dcb807 100644 --- a/internal/pkg/mlrval/mlrval_new.go +++ b/internal/pkg/mlrval/mlrval_new.go @@ -5,7 +5,8 @@ package mlrval import ( - //"errors" + "errors" + "fmt" "github.com/johnkerl/miller/internal/pkg/lib" ) @@ -31,6 +32,115 @@ func FromDeferredType(input string) *Mlrval { } } +func FromError(err error) *Mlrval { + return &Mlrval{ + mvtype: MT_ERROR, + err: err, + printrep: ERROR_PRINTREP, + printrepValid: true, + } +} + +func FromErrorString(err string) *Mlrval { + return &Mlrval{ + mvtype: MT_ERROR, + err: errors.New(err), + printrep: ERROR_PRINTREP, + printrepValid: true, + } +} + +func FromAnonymousError() *Mlrval { + return &Mlrval{ + mvtype: MT_ERROR, + printrep: ERROR_PRINTREP, + printrepValid: true, + } +} + +func FromTypeErrorUnary(funcname string, v *Mlrval) *Mlrval { + return FromError( + fmt.Errorf( + "%s: unacceptable type %s with value %s", + funcname, + v.GetTypeName(), + v.StringMaybeQuoted(), + ), + ) +} + +func FromTypeErrorBinary(funcname string, v, input2 *Mlrval) *Mlrval { + return FromError( + fmt.Errorf( + "%s: unacceptable types %s, %s with values %s, %s", + funcname, + v.GetTypeName(), + input2.GetTypeName(), + v.StringMaybeQuoted(), + input2.StringMaybeQuoted(), + ), + ) +} + +func FromTypeErrorTernary(funcname string, v, input2, input3 *Mlrval) *Mlrval { + return FromError( + fmt.Errorf( + "%s: unacceptable types %s, %s, %s with values %s, %s, %s", + funcname, + v.GetTypeName(), + input2.GetTypeName(), + input3.GetTypeName(), + v.StringMaybeQuoted(), + input2.StringMaybeQuoted(), + input3.StringMaybeQuoted(), + ), + ) +} + +func FromNotStringError(funcname string, v *Mlrval) *Mlrval { + return FromNotNamedTypeError(funcname, v, "string") +} + +func FromNotBooleanError(funcname string, v *Mlrval) *Mlrval { + return FromNotNamedTypeError(funcname, v, "boolean") +} + +func FromNotIntError(funcname string, v *Mlrval) *Mlrval { + return FromNotNamedTypeError(funcname, v, "int") +} + +func FromNotNumericError(funcname string, v *Mlrval) *Mlrval { + return FromNotNamedTypeError(funcname, v, "int or float") +} + +func FromNotArrayError(funcname string, v *Mlrval) *Mlrval { + return FromNotNamedTypeError(funcname, v, "array") +} + +func FromNotMapError(funcname string, v *Mlrval) *Mlrval { + return FromNotNamedTypeError(funcname, v, "map") +} + +func FromNotCollectionError(funcname string, v *Mlrval) *Mlrval { + return FromNotNamedTypeError(funcname, v, "array or map") +} + +func FromNotFunctionError(funcname string, v *Mlrval) *Mlrval { + return FromNotNamedTypeError(funcname, v, "function") +} + +func FromNotNamedTypeError(funcname string, v *Mlrval, expected_type_name string) *Mlrval { + return FromError( + fmt.Errorf( + "%s: unacceptable non-array value %s with type %s; needed type %s", + funcname, + v.StringMaybeQuoted(), + v.GetTypeName(), + expected_type_name, + ), + ) +} + // TODO: comment non-JIT context like mlr put -s. // TODO: comment re inferBool. func FromInferredType(input string) *Mlrval { diff --git a/internal/pkg/mlrval/mlrval_output.go b/internal/pkg/mlrval/mlrval_output.go index 7b354b359..d864806b3 100644 --- a/internal/pkg/mlrval/mlrval_output.go +++ b/internal/pkg/mlrval/mlrval_output.go @@ -44,6 +44,16 @@ func (mv *Mlrval) OriginalString() string { } } +// StringMaybeQuoted Returns strings double-quoted; all else not. +func (mv *Mlrval) StringMaybeQuoted() string { + output := mv.String() + if mv.mvtype == MT_VOID || mv.mvtype == MT_STRING { + return `"` + output + `"` + } else { + return output + } +} + // See mlrval.go for more about JIT-formatting of string backings func (mv *Mlrval) setPrintRep() { if !mv.printrepValid { diff --git a/internal/pkg/mlrval/mlrval_sort_test.go b/internal/pkg/mlrval/mlrval_sort_test.go index 8cf515346..9e1f45ac8 100644 --- a/internal/pkg/mlrval/mlrval_sort_test.go +++ b/internal/pkg/mlrval/mlrval_sort_test.go @@ -37,7 +37,7 @@ func TestComparators(t *testing.T) { assert.Equal(t, 0, LexicalAscendingComparator(bfalse, bfalse)) assert.Equal(t, -1, LexicalAscendingComparator(bfalse, btrue)) assert.Equal(t, -1, LexicalAscendingComparator(sabc, sdef)) - assert.Equal(t, 0, LexicalAscendingComparator(ERROR, ERROR)) + assert.Equal(t, 0, LexicalAscendingComparator(FromErrorString("foo"), FromErrorString("foo"))) assert.Equal(t, 0, LexicalAscendingComparator(ABSENT, ABSENT)) // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -48,30 +48,30 @@ func TestComparators(t *testing.T) { assert.Equal(t, 1, NumericAscendingComparator(btrue, bfalse)) - assert.Equal(t, 0, NumericAscendingComparator(ERROR, ERROR)) + assert.Equal(t, 0, NumericAscendingComparator(FromErrorString("foo"), FromErrorString("foo"))) assert.Equal(t, 0, NumericAscendingComparator(ABSENT, ABSENT)) // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Across-type lexical comparisons - assert.Equal(t, -1, LexicalAscendingComparator(i10, btrue)) // "10" < "true" - assert.Equal(t, -1, LexicalAscendingComparator(i10, sabc)) // "10" < "abc" - assert.Equal(t, 1, LexicalAscendingComparator(i10, ERROR)) // "10" > "(error)" + assert.Equal(t, -1, LexicalAscendingComparator(i10, btrue)) // "10" < "true" + assert.Equal(t, -1, LexicalAscendingComparator(i10, sabc)) // "10" < "abc" + assert.Equal(t, 1, LexicalAscendingComparator(i10, FromErrorString("foo"))) // "10" > "(error)" - assert.Equal(t, 1, LexicalAscendingComparator(bfalse, sabc)) // "false" > "abc" - assert.Equal(t, 1, LexicalAscendingComparator(bfalse, ERROR)) // "false" > "(error)" + assert.Equal(t, 1, LexicalAscendingComparator(bfalse, sabc)) // "false" > "abc" + assert.Equal(t, 1, LexicalAscendingComparator(bfalse, FromErrorString("foo"))) // "false" > "(error)" // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Across-type numeric comparisons assert.Equal(t, -1, NumericAscendingComparator(i10, btrue)) assert.Equal(t, -1, NumericAscendingComparator(i10, sabc)) - assert.Equal(t, -1, NumericAscendingComparator(i10, ERROR)) + assert.Equal(t, -1, NumericAscendingComparator(i10, FromErrorString("foo"))) assert.Equal(t, -1, NumericAscendingComparator(i10, ABSENT)) assert.Equal(t, -1, NumericAscendingComparator(bfalse, sabc)) - assert.Equal(t, -1, NumericAscendingComparator(bfalse, ERROR)) + assert.Equal(t, -1, NumericAscendingComparator(bfalse, FromErrorString("foo"))) assert.Equal(t, -1, NumericAscendingComparator(bfalse, ABSENT)) - assert.Equal(t, -1, NumericAscendingComparator(ERROR, ABSENT)) + assert.Equal(t, -1, NumericAscendingComparator(FromErrorString("foo"), ABSENT)) } diff --git a/internal/pkg/mlrval/mlrval_type.go b/internal/pkg/mlrval/mlrval_type.go index ace7805dd..e47f73b8e 100644 --- a/internal/pkg/mlrval/mlrval_type.go +++ b/internal/pkg/mlrval/mlrval_type.go @@ -56,6 +56,7 @@ package mlrval type Mlrval struct { printrep string intf interface{} + err error // Payload for MT_ERROR types printrepValid bool // Enumeration for string / int / float / boolean / etc. // I would call this "type" not "mvtype" but "type" is a keyword in Go. diff --git a/internal/pkg/output/channel_writer.go b/internal/pkg/output/channel_writer.go index 061583ed5..ea7ed814d 100644 --- a/internal/pkg/output/channel_writer.go +++ b/internal/pkg/output/channel_writer.go @@ -3,6 +3,8 @@ package output import ( "bufio" "container/list" + "fmt" + "os" "github.com/johnkerl/miller/internal/pkg/cli" "github.com/johnkerl/miller/internal/pkg/types" @@ -13,19 +15,26 @@ func ChannelWriter( recordWriter IRecordWriter, writerOptions *cli.TWriterOptions, doneChannel chan<- bool, + dataProcessingErrorChannel chan<- bool, bufferedOutputStream *bufio.Writer, outputIsStdout bool, ) { for { recordsAndContexts := <-writerChannel - done := channelWriterHandleBatch( + done, errored := channelWriterHandleBatch( recordsAndContexts, recordWriter, writerOptions, + dataProcessingErrorChannel, bufferedOutputStream, outputIsStdout, ) + if errored { + dataProcessingErrorChannel <- true + doneChannel <- true + break + } if done { doneChannel <- true break @@ -39,9 +48,10 @@ func channelWriterHandleBatch( recordsAndContexts *list.List, recordWriter IRecordWriter, writerOptions *cli.TWriterOptions, + dataProcessingErrorChannel chan<- bool, bufferedOutputStream *bufio.Writer, outputIsStdout bool, -) bool { +) (done bool, errored bool) { for e := recordsAndContexts.Front(); e != nil; e = e.Next() { recordAndContext := e.Value.(*types.RecordAndContext) @@ -56,6 +66,33 @@ func channelWriterHandleBatch( if !recordAndContext.EndOfStream { record := recordAndContext.Record + + // XXX more + // XXX also make sure this results in exit 1 & goroutine cleanup + if writerOptions.FailOnDataError { + ok := true + for pe := record.Head; pe != nil; pe = pe.Next { + if pe.Value.IsError() { + context := recordAndContext.Context + fmt.Fprintf(os.Stderr, "mlr: data error at NR=%d FNR=%d FILENAME=%s\n", + context.NR, context.FNR, context.FILENAME, + ) + is, err := pe.Value.GetError() + if is { + if err != nil { + fmt.Fprintf(os.Stderr, "mlr: field %s: %v\n", pe.Key, err) + } else { + fmt.Fprintf(os.Stderr, "mlr: field %s\n", pe.Key) + } + ok = false + } + } + } + if !ok { + return true, true + } + } + if record != nil { recordWriter.Write(record, bufferedOutputStream, outputIsStdout) } @@ -75,8 +112,8 @@ func channelWriterHandleBatch( // records before printing any, since it needs to compute max width // down columns. recordWriter.Write(nil, bufferedOutputStream, outputIsStdout) - return true + return true, false } } - return false + return false, false } diff --git a/internal/pkg/output/file_output_handlers.go b/internal/pkg/output/file_output_handlers.go index cd7c3f896..c7cf6f483 100644 --- a/internal/pkg/output/file_output_handlers.go +++ b/internal/pkg/output/file_output_handlers.go @@ -15,6 +15,7 @@ package output import ( "bufio" "container/list" + "errors" "fmt" "io" "os" @@ -213,10 +214,11 @@ type FileOutputHandler struct { // lazily created on WriteRecord. The record-writer / channel parts are // called only by WriteRecrod which is called by emit and tee variants; // print and dump variants call WriteString. - recordWriterOptions *cli.TWriterOptions - recordWriter IRecordWriter - recordOutputChannel chan *list.List // list of *types.RecordAndContext - recordDoneChannel chan bool + recordWriterOptions *cli.TWriterOptions + recordWriter IRecordWriter + recordOutputChannel chan *list.List // list of *types.RecordAndContext + recordDoneChannel chan bool + recordErroredChannel chan bool } func newOutputHandlerCommon( @@ -231,10 +233,11 @@ func newOutputHandlerCommon( bufferedOutputStream: bufio.NewWriter(handle), closeable: closeable, - recordWriterOptions: recordWriterOptions, - recordWriter: nil, - recordOutputChannel: nil, - recordDoneChannel: nil, + recordWriterOptions: recordWriterOptions, + recordWriter: nil, + recordOutputChannel: nil, + recordDoneChannel: nil, + recordErroredChannel: nil, } } @@ -368,12 +371,14 @@ func (handler *FileOutputHandler) setUpRecordWriter() error { handler.recordOutputChannel = make(chan *list.List, 1) // list of *types.RecordAndContext handler.recordDoneChannel = make(chan bool, 1) + handler.recordErroredChannel = make(chan bool, 1) go ChannelWriter( handler.recordOutputChannel, handler.recordWriter, handler.recordWriterOptions, handler.recordDoneChannel, + handler.recordErroredChannel, handler.bufferedOutputStream, false, // outputIsStdout ) @@ -382,7 +387,9 @@ func (handler *FileOutputHandler) setUpRecordWriter() error { } // ---------------------------------------------------------------- -func (handler *FileOutputHandler) Close() error { +func (handler *FileOutputHandler) Close() (retval error) { + retval = nil + if handler.recordOutputChannel != nil { // TODO: see if we need a real context emptyContext := types.Context{} @@ -392,6 +399,10 @@ func (handler *FileOutputHandler) Close() error { done := false for !done { select { + case _ = <-handler.recordErroredChannel: + done = true + retval = errors.New("exiting due to data error") // details already printed + break case _ = <-handler.recordDoneChannel: done = true break @@ -399,10 +410,14 @@ func (handler *FileOutputHandler) Close() error { } } + if retval != nil { + return retval + } + handler.bufferedOutputStream.Flush() if handler.closeable { return handler.handle.Close() - } else { + } else { // e.g. stdout return nil } } diff --git a/internal/pkg/stream/stream.go b/internal/pkg/stream/stream.go index 60be62a17..351eda06d 100644 --- a/internal/pkg/stream/stream.go +++ b/internal/pkg/stream/stream.go @@ -3,6 +3,7 @@ package stream import ( "bufio" "container/list" + "errors" "io" "github.com/johnkerl/miller/internal/pkg/cli" @@ -67,8 +68,9 @@ func Stream( // We're done when a fatal error is registered on input (file not found, // etc) or when the record-writer has written all its output. We use // channels to communicate both of these conditions. - errorChannel := make(chan error, 1) + inputErrorChannel := make(chan error, 1) doneWritingChannel := make(chan bool, 1) + dataProcessingErrorChannel := make(chan bool, 1) // For mlr head, so a transformer can communicate it will disregard all // further input. It writes this back upstream, and that is passed back to @@ -81,17 +83,22 @@ func Stream( // error or end-of-processing happens. bufferedOutputStream := bufio.NewWriter(outputStream) - go recordReader.Read(fileNames, *initialContext, readerChannel, errorChannel, readerDownstreamDoneChannel) + go recordReader.Read(fileNames, *initialContext, readerChannel, inputErrorChannel, readerDownstreamDoneChannel) go transformers.ChainTransformer(readerChannel, readerDownstreamDoneChannel, recordTransformers, writerChannel, options) go output.ChannelWriter(writerChannel, recordWriter, &options.WriterOptions, doneWritingChannel, - bufferedOutputStream, outputIsStdout) + dataProcessingErrorChannel, bufferedOutputStream, outputIsStdout) + var retval error done := false for !done { select { - case err := <-errorChannel: - return err + case ierr := <-inputErrorChannel: + retval = ierr + break + case _ = <-dataProcessingErrorChannel: + retval = errors.New("exiting due to data error") // details already printed + break case _ = <-doneWritingChannel: done = true break @@ -100,5 +107,5 @@ func Stream( bufferedOutputStream.Flush() - return nil + return retval } diff --git a/internal/pkg/terminals/help/entry.go b/internal/pkg/terminals/help/entry.go index 17130fe26..6598c48b3 100644 --- a/internal/pkg/terminals/help/entry.go +++ b/internal/pkg/terminals/help/entry.go @@ -488,7 +488,7 @@ func helpTypeArithmeticInfo() { mlrval.FromFloat(2.5), mlrval.VOID, mlrval.ABSENT, - mlrval.ERROR, + mlrval.FromAnonymousError(), } n := len(mlrvals) diff --git a/internal/pkg/terminals/repl/dsl.go b/internal/pkg/terminals/repl/dsl.go index a499966fc..87d6500f0 100644 --- a/internal/pkg/terminals/repl/dsl.go +++ b/internal/pkg/terminals/repl/dsl.go @@ -89,6 +89,13 @@ func (repl *Repl) handleDSLStringAux( filterExpression := repl.runtimeState.FilterExpression if filterExpression.IsNull() { // nothing to print + } else if filterExpression.IsError() { + _, err := filterExpression.GetError() + if err == nil { // No supporting information + fmt.Printf("\"%s\"\n", filterExpression.String()) + } else { + fmt.Printf("%v\n", err) + } } else if filterExpression.IsStringOrVoid() { fmt.Printf("\"%s\"\n", filterExpression.String()) } else { diff --git a/internal/pkg/transformers/fraction.go b/internal/pkg/transformers/fraction.go index 275450449..9327ce208 100644 --- a/internal/pkg/transformers/fraction.go +++ b/internal/pkg/transformers/fraction.go @@ -2,6 +2,7 @@ package transformers import ( "container/list" + "errors" "fmt" "os" "strings" @@ -269,7 +270,11 @@ func (tr *TransformerFraction) Transform( outputValue = bifs.BIF_divide(numerator, denominator) outputValue = bifs.BIF_times(outputValue, tr.multiplier) } else { - outputValue = mlrval.ERROR + outputValue = mlrval.FromError( + errors.New( + "mlr fraction: division by zero", + ), + ) } outrec.PutCopy( diff --git a/internal/pkg/transformers/latin1_to_utf8.go b/internal/pkg/transformers/latin1_to_utf8.go index ade2b2b1c..0783b6279 100644 --- a/internal/pkg/transformers/latin1_to_utf8.go +++ b/internal/pkg/transformers/latin1_to_utf8.go @@ -106,7 +106,7 @@ func (tr *TransformerLatin1ToUTF8) Transform( if err == nil { pe.Value = mlrval.FromString(output) } else { - pe.Value = mlrval.ERROR + pe.Value = mlrval.FromError(err) } } } diff --git a/internal/pkg/transformers/utf8_to_latin1.go b/internal/pkg/transformers/utf8_to_latin1.go index bafed1e59..e3f09210d 100644 --- a/internal/pkg/transformers/utf8_to_latin1.go +++ b/internal/pkg/transformers/utf8_to_latin1.go @@ -106,7 +106,7 @@ func (tr *TransformerUTF8ToLatin1) Transform( if err == nil { pe.Value = mlrval.FromString(output) } else { - pe.Value = mlrval.ERROR + pe.Value = mlrval.FromError(err) } } } diff --git a/internal/pkg/transformers/utils/percentile_keeper.go b/internal/pkg/transformers/utils/percentile_keeper.go index d496abdd7..b89f8831b 100644 --- a/internal/pkg/transformers/utils/percentile_keeper.go +++ b/internal/pkg/transformers/utils/percentile_keeper.go @@ -149,7 +149,9 @@ func (keeper *PercentileKeeper) EmitNamed(name string) *mlrval.Mlrval { } } else { - return mlrval.ERROR + return mlrval.FromError( + fmt.Errorf(`stats1: unrecognized percentilename "%s"`, name), + ) } } diff --git a/man/manpage.txt b/man/manpage.txt index a91bfe4a2..de6ca2f57 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -582,6 +582,9 @@ MILLER(1) MILLER(1) -s {file name} Take command-line flags from file name. For more information please see https://miller.readthedocs.io/en/latest/scripting/. + -x If any record has an error value in it, report it and + stop the process. The default is to print the field + value as `(error)` and continue. 1mOUTPUT-COLORIZATION FLAGS0m Miller uses colors to highlight outputs. You can specify color preferences. diff --git a/man/mlr.1 b/man/mlr.1 index 95cbcaf7c..bda00b93a 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -701,6 +701,9 @@ These are flags which don't fit into any other category. -s {file name} Take command-line flags from file name. For more information please see https://miller.readthedocs.io/en/latest/scripting/. +-x If any record has an error value in it, report it and + stop the process. The default is to print the field + value as `(error)` and continue. .fi .if n \{\ .RE diff --git a/test/cases/dsl-gmt-date-time-functions/0002/expout b/test/cases/dsl-gmt-date-time-functions/0002/expout index 62c36ca09..f50b94f32 100644 --- a/test/cases/dsl-gmt-date-time-functions/0002/expout +++ b/test/cases/dsl-gmt-date-time-functions/0002/expout @@ -21,6 +21,6 @@ n,sec,gmt 20,2000000000.99900007,2033-05-18T03:33:20.9Z 21,2000000000.99999905,2033-05-18T03:33:20.9Z 22,2000000001.00000000,2033-05-18T03:33:21.0Z -23,, -24,x,x -25,123x,123x +23,,(error) +24,x,(error) +25,123x,(error) diff --git a/test/cases/dsl-gmt-date-time-functions/0003/expout b/test/cases/dsl-gmt-date-time-functions/0003/expout index 54b93e2c8..cd68f6448 100644 --- a/test/cases/dsl-gmt-date-time-functions/0003/expout +++ b/test/cases/dsl-gmt-date-time-functions/0003/expout @@ -21,6 +21,6 @@ n,sec,gmt 20,2000000000.99900007,2033-05-18T03:33:20.999Z 21,2000000000.99999905,2033-05-18T03:33:20.999Z 22,2000000001.00000000,2033-05-18T03:33:21.000Z -23,, -24,x,x -25,123x,123x +23,,(error) +24,x,(error) +25,123x,(error) diff --git a/test/cases/dsl-gmt-date-time-functions/0004/expout b/test/cases/dsl-gmt-date-time-functions/0004/expout index b48a06fe6..462f7f4c8 100644 --- a/test/cases/dsl-gmt-date-time-functions/0004/expout +++ b/test/cases/dsl-gmt-date-time-functions/0004/expout @@ -21,6 +21,6 @@ n,sec,gmt 20,2000000000.99900007,2033-05-18T03:33:20.999000Z 21,2000000000.99999905,2033-05-18T03:33:20.999999Z 22,2000000001.00000000,2033-05-18T03:33:21.000000Z -23,, -24,x,x -25,123x,123x +23,,(error) +24,x,(error) +25,123x,(error) diff --git a/test/cases/io-spec-tsv/0004/cmd b/test/cases/io-spec-tsv/0004/cmd index 735d4f3df..a49dff7b0 100644 --- a/test/cases/io-spec-tsv/0004/cmd +++ b/test/cases/io-spec-tsv/0004/cmd @@ -1 +1 @@ -mlr --itsv --ojson cat ${CASEDIR}/single-column-with-blank.json +mlr --itsv --ojson cat ${CASEDIR}/single-column-with-blank.tsv diff --git a/test/cases/io-spec-tsv/0004/experr b/test/cases/io-spec-tsv/0004/experr index 750e0a578..77ead78b2 100644 --- a/test/cases/io-spec-tsv/0004/experr +++ b/test/cases/io-spec-tsv/0004/experr @@ -1,2 +1,2 @@ -mlr: mlr: TSV header/data length mismatch 1 != 0 at filename test/cases/io-spec-tsv/0004/single-column-with-blank.json line 4. +mlr: mlr: TSV header/data length mismatch 1 != 0 at filename test/cases/io-spec-tsv/0004/single-column-with-blank.tsv line 4. . diff --git a/test/cases/io-spec-tsv/0004/expout b/test/cases/io-spec-tsv/0004/expout index e69de29bb..f741e40d4 100644 --- a/test/cases/io-spec-tsv/0004/expout +++ b/test/cases/io-spec-tsv/0004/expout @@ -0,0 +1,8 @@ +[ +{ + "a": 1 +}, +{ + "a": 2 +} +] diff --git a/test/cases/io-spec-tsv/0004/single-column-with-blank.json b/test/cases/io-spec-tsv/0004/single-column-with-blank.tsv similarity index 100% rename from test/cases/io-spec-tsv/0004/single-column-with-blank.json rename to test/cases/io-spec-tsv/0004/single-column-with-blank.tsv diff --git a/xtodo.txt b/xtodo.txt new file mode 100644 index 000000000..e3dab2ea5 --- /dev/null +++ b/xtodo.txt @@ -0,0 +1,70 @@ +---------------------------------------------------------------- +* look at: mr -vvv test/cases/io-spec-tsv/0004/cmd + +---------------------------------------------------------------- + +func (keeper *PercentileKeeper) EmitNamed(name string) *mlrval.Mlrval { + if name == "min" { + return keeper.EmitNonInterpolated(0.0) + } else if name == "p25" { + return keeper.EmitNonInterpolated(25.0) + } else if name == "median" { + return keeper.EmitNonInterpolated(50.0) + } else if name == "p75" { + return keeper.EmitNonInterpolated(75.0) + } else if name == "max" { + return keeper.EmitNonInterpolated(100.0) + + } else if name == "iqr" { + p25 := keeper.EmitNonInterpolated(25.0) + p75 := keeper.EmitNonInterpolated(75.0) + if p25.IsNumeric() && p75.IsNumeric() { + return bifs.BIF_minus_binary(p75, p25) + } else { + return mlrval.VOID + } + + } else if name == "lof" { + p25 := keeper.EmitNonInterpolated(25.0) + iqr := keeper.EmitNamed("iqr") + if p25.IsNumeric() && iqr.IsNumeric() { + return bifs.BIF_minus_binary(p25, bifs.BIF_times(fenceOuterK, iqr)) + } else { + return mlrval.VOID + } + + } else if name == "lif" { + p25 := keeper.EmitNonInterpolated(25.0) + iqr := keeper.EmitNamed("iqr") + if p25.IsNumeric() && iqr.IsNumeric() { + return bifs.BIF_minus_binary(p25, bifs.BIF_times(fenceInnerK, iqr)) + } else { + return mlrval.VOID + } + + } else if name == "uif" { + p75 := keeper.EmitNonInterpolated(75.0) + iqr := keeper.EmitNamed("iqr") + if p75.IsNumeric() && iqr.IsNumeric() { + return bifs.BIF_plus_binary(p75, bifs.BIF_times(fenceInnerK, iqr)) + } else { + return mlrval.VOID + } + + } else if name == "uof" { + p75 := keeper.EmitNonInterpolated(75.0) + iqr := keeper.EmitNamed("iqr") + if p75.IsNumeric() && iqr.IsNumeric() { + return bifs.BIF_plus_binary(p75, bifs.BIF_times(fenceOuterK, iqr)) + } else { + return mlrval.VOID + } + + } else { + return mlrval.FromError( + errors.New( + "stats1: unrecognized + ), + ) + } +} From acc10cdc37716fd408e7e495fd82534e5ccce117 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 31 Aug 2023 09:00:29 -0400 Subject: [PATCH 054/456] miller 6.9.0 --- docs/src/manpage.md | 4 ++-- docs/src/manpage.txt | 4 ++-- docs/src/reference-verbs.md | 4 ++-- internal/pkg/version/version.go | 2 +- man/manpage.txt | 4 ++-- man/mlr.1 | 6 +++--- miller.spec | 5 ++++- 7 files changed, 16 insertions(+), 13 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index c4a7b3856..0d22cfdab 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -50,7 +50,7 @@ MILLER(1) MILLER(1) insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.8.0-dev. + manpage documents mlr 6.9.0. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv @@ -3645,5 +3645,5 @@ MILLER(1) MILLER(1) - 2023-08-30 MILLER(1) + 2023-08-31 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index de6ca2f57..ccf519ced 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -29,7 +29,7 @@ MILLER(1) MILLER(1) insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.8.0-dev. + manpage documents mlr 6.9.0. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv @@ -3624,4 +3624,4 @@ MILLER(1) MILLER(1) - 2023-08-30 MILLER(1) + 2023-08-31 MILLER(1) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 2b7e9501f..89bbc2b71 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -3807,8 +3807,8 @@ max zee zee 10000 0.999952670371898 0.9999648 iqr - - 5000 0.5015156280035271 0.5118661397595003 lof - - -12499 -1.2578765057782637 -1.2834617140383442 lif - - -4999 -0.5056030637729731 -0.5156625043990937 -uif - - 10001 0.9989438202376082 1.0199359148794074 -uof - - 17501 1.751217262242899 1.787735124518658 +uif - - 15001 1.5004594482411353 1.5318020546389077 +uof - - 22501 2.252732890246426 2.2996012642781585
diff --git a/internal/pkg/version/version.go b/internal/pkg/version/version.go
index 3072110fa..47daeaa6d 100644
--- a/internal/pkg/version/version.go
+++ b/internal/pkg/version/version.go
@@ -4,4 +4,4 @@ package version
 // Nominally things like "6.0.0" for a release, then "6.0.0-dev" in between.
 // This makes it clear that a given build is on the main dev branch, not a
 // particular snapshot tag.
-var STRING string = "6.8.0-dev"
+var STRING string = "6.9.0"
diff --git a/man/manpage.txt b/man/manpage.txt
index de6ca2f57..ccf519ced 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -29,7 +29,7 @@ MILLER(1)                                                            MILLER(1)
        insertion-ordered hash map.  This encompasses a variety of data
        formats, including but not limited to the familiar CSV, TSV, and JSON.
        (Miller can handle positionally-indexed data as a special case.) This
-       manpage documents mlr 6.8.0-dev.
+       manpage documents mlr 6.9.0.
 
 1mEXAMPLES0m
        mlr --icsv --opprint cat example.csv
@@ -3624,4 +3624,4 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-08-30                         MILLER(1)
+                                  2023-08-31                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index bda00b93a..b8794a352 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2023-08-30
+.\"      Date: 2023-08-31
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2023-08-30" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-08-31" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -47,7 +47,7 @@ on integer-indexed fields: if the natural data structure for the latter is the
 array, then Miller's natural data structure is the insertion-ordered hash map.
 This encompasses a variety of data formats, including but not limited to the
 familiar CSV, TSV, and JSON.  (Miller can handle positionally-indexed data as
-a special case.) This manpage documents mlr 6.8.0-dev.
+a special case.) This manpage documents mlr 6.9.0.
 .SH "EXAMPLES"
 .sp
 
diff --git a/miller.spec b/miller.spec
index b6c1c727e..ba30cd94d 100644
--- a/miller.spec
+++ b/miller.spec
@@ -1,6 +1,6 @@
 Summary: Name-indexed data processing tool
 Name: miller
-Version: 6.8.0
+Version: 6.9.0
 Release: 1%{?dist}
 License: BSD
 Source: https://github.com/johnkerl/miller/releases/download/%{version}/miller-%{version}.tar.gz
@@ -36,6 +36,9 @@ make install
 %{_mandir}/man1/mlr.1*
 
 %changelog
+* Thu Aug 31 2023 John Kerl  - 6.9.0-1
+- 6.9.0 release
+
 * Sun Jun 4 2023 John Kerl  - 6.8.0-1
 - 6.8.0 release
 

From 640dbdc7307adc8b21f3b0d0ae5a4ecb8698a15c Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Thu, 31 Aug 2023 10:15:29 -0400
Subject: [PATCH 055/456] Remove `replacements` from `.goreleaser.yaml` (#1376)

---
 .goreleaser.yml | 10 ++++------
 go.mod          |  2 +-
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/.goreleaser.yml b/.goreleaser.yml
index 92c394fb0..1810aacc9 100644
--- a/.goreleaser.yml
+++ b/.goreleaser.yml
@@ -20,7 +20,7 @@ builds:
     goos:
       - linux
       - windows
-      - darwin
+      - macos
       - freebsd
       - aix
     goarch:
@@ -38,11 +38,11 @@ builds:
     ignore:
       - goos: linux
         goarch: ppc64
-      - goos: darwin
+      - goos: macos
         goarch: arm
-      - goos: darwin
+      - goos: macos
         goarch: ppc64le
-      - goos: darwin
+      - goos: macos
         goarch: s390x
       - goos: windows
         goarch: arm64
@@ -70,8 +70,6 @@ archives:
     format_overrides:
       - goos: windows
         format: zip
-    replacements:
-      darwin: macos
     name_template: '{{ .ProjectName }}-{{ .Version }}-{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}'
     files:
       - LICENSE.txt
diff --git a/go.mod b/go.mod
index 2373dea14..55aee8cfd 100644
--- a/go.mod
+++ b/go.mod
@@ -20,6 +20,7 @@ require (
 	github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb
 	github.com/johnkerl/lumin v1.0.0
 	github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
+	github.com/klauspost/compress v1.16.7
 	github.com/lestrrat-go/strftime v1.0.6
 	github.com/mattn/go-isatty v0.0.19
 	github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4
@@ -34,7 +35,6 @@ require (
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/felixge/fgprof v0.9.3 // indirect
 	github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect
-	github.com/klauspost/compress v1.16.7 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect

From 80bb82df6b7ab5e4ed3b3633574bdea60db05c1b Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Thu, 31 Aug 2023 10:51:07 -0400
Subject: [PATCH 056/456] `macos` -> `darwin` in `.goreleaser.yml` (#1377)

---
 .goreleaser.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.goreleaser.yml b/.goreleaser.yml
index 1810aacc9..479bf3701 100644
--- a/.goreleaser.yml
+++ b/.goreleaser.yml
@@ -20,7 +20,7 @@ builds:
     goos:
       - linux
       - windows
-      - macos
+      - darwin
       - freebsd
       - aix
     goarch:
@@ -38,11 +38,11 @@ builds:
     ignore:
       - goos: linux
         goarch: ppc64
-      - goos: macos
+      - goos: darwin
         goarch: arm
-      - goos: macos
+      - goos: darwin
         goarch: ppc64le
-      - goos: macos
+      - goos: darwin
         goarch: s390x
       - goos: windows
         goarch: arm64

From 717189b6b14bd7e7bbcb556a459bc44da9f9c321 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 4 Sep 2023 17:21:23 -0400
Subject: [PATCH 057/456] Bump golang.org/x/text from 0.12.0 to 0.13.0 (#1382)

Bumps [golang.org/x/text](https://github.com/golang/text) from 0.12.0 to 0.13.0.
- [Release notes](https://github.com/golang/text/releases)
- [Commits](https://github.com/golang/text/compare/v0.12.0...v0.13.0)

---
updated-dependencies:
- dependency-name: golang.org/x/text
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 55aee8cfd..4d84faf00 100644
--- a/go.mod
+++ b/go.mod
@@ -28,7 +28,7 @@ require (
 	github.com/stretchr/testify v1.8.4
 	golang.org/x/sys v0.11.0
 	golang.org/x/term v0.11.0
-	golang.org/x/text v0.12.0
+	golang.org/x/text v0.13.0
 )
 
 require (
diff --git a/go.sum b/go.sum
index 84593de57..8191b742e 100644
--- a/go.sum
+++ b/go.sum
@@ -44,8 +44,8 @@ golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
 golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.11.0 h1:F9tnn/DA/Im8nCwm+fX+1/eBwi4qFjRT++MhtVC4ZX0=
 golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
-golang.org/x/text v0.12.0 h1:k+n5B8goJNdU7hSvEtMUz3d1Q6D/XW4COJSJR6fN0mc=
-golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

From 9ab9c2f4e8d428d7f3f307349a3702cdc13eaf6e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 4 Sep 2023 17:22:41 -0400
Subject: [PATCH 058/456] Bump golang.org/x/sys from 0.11.0 to 0.12.0 (#1381)

Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.11.0 to 0.12.0.
- [Commits](https://github.com/golang/sys/compare/v0.11.0...v0.12.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 4d84faf00..ef2c50f96 100644
--- a/go.mod
+++ b/go.mod
@@ -26,7 +26,7 @@ require (
 	github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4
 	github.com/pkg/profile v1.7.0
 	github.com/stretchr/testify v1.8.4
-	golang.org/x/sys v0.11.0
+	golang.org/x/sys v0.12.0
 	golang.org/x/term v0.11.0
 	golang.org/x/text v0.13.0
 )
diff --git a/go.sum b/go.sum
index 8191b742e..e9f1f893b 100644
--- a/go.sum
+++ b/go.sum
@@ -40,8 +40,8 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
-golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.11.0 h1:F9tnn/DA/Im8nCwm+fX+1/eBwi4qFjRT++MhtVC4ZX0=
 golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
 golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=

From 67bd565a537924cb4fc75ef364acb72b3011a61d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 4 Sep 2023 20:29:51 -0400
Subject: [PATCH 059/456] Bump golang.org/x/term from 0.11.0 to 0.12.0 (#1380)

Bumps [golang.org/x/term](https://github.com/golang/term) from 0.11.0 to 0.12.0.
- [Commits](https://github.com/golang/term/compare/v0.11.0...v0.12.0)

---
updated-dependencies:
- dependency-name: golang.org/x/term
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index ef2c50f96..886c54ea0 100644
--- a/go.mod
+++ b/go.mod
@@ -27,7 +27,7 @@ require (
 	github.com/pkg/profile v1.7.0
 	github.com/stretchr/testify v1.8.4
 	golang.org/x/sys v0.12.0
-	golang.org/x/term v0.11.0
+	golang.org/x/term v0.12.0
 	golang.org/x/text v0.13.0
 )
 
diff --git a/go.sum b/go.sum
index e9f1f893b..cf44078f7 100644
--- a/go.sum
+++ b/go.sum
@@ -42,8 +42,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o=
 golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/term v0.11.0 h1:F9tnn/DA/Im8nCwm+fX+1/eBwi4qFjRT++MhtVC4ZX0=
-golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
+golang.org/x/term v0.12.0 h1:/ZfYdc3zq+q02Rv9vGqTeSItdzZTSNDmfTi0mBAuidU=
+golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
 golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
 golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=

From 587b7ce313bb10526b3ddfaa68b28f0813cfb6fd Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 5 Sep 2023 09:36:21 -0400
Subject: [PATCH 060/456] Bump actions/checkout from 3.6.0 to 4.0.0 (#1383)

Bumps [actions/checkout](https://github.com/actions/checkout) from 3.6.0 to 4.0.0.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/f43a0e5ff2bd294095638e18286ca9a3d1956744...3df4ab11eba7bda6032a0b82a6bb43b11571feac)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 2 +-
 .github/workflows/codespell.yml       | 2 +-
 .github/workflows/go.yml              | 2 +-
 .github/workflows/release.yml         | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 63dc1f217..4039af88c 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -36,7 +36,7 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744
+      uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
index e7f0285b0..e5b497011 100644
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -21,7 +21,7 @@ jobs:
     steps:
       # Check out the code base
       - name: Check out code
-        uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
         with:
           # Full git history is needed to get a proper list of changed files within `super-linter`
           fetch-depth: 0
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 9b2de2ff9..75879111b 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -15,7 +15,7 @@ jobs:
         os: [ubuntu-latest, macos-latest, windows-latest]
 
     steps:
-    - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744
+    - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
 
     - name: Set up Go
       uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 31bcaa825..5a8693df6 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -23,7 +23,7 @@ jobs:
         id: go
 
       - name: Check out code into the Go module directory
-        uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
         with:
           fetch-depth: 0
 

From 2b77328b0fa34e0eb0c8e4897988833ee2a14b9e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 6 Sep 2023 09:45:47 -0400
Subject: [PATCH 061/456] Bump goreleaser/goreleaser-action from 4.4.0 to 4.6.0
 (#1385)

Bumps [goreleaser/goreleaser-action](https://github.com/goreleaser/goreleaser-action) from 4.4.0 to 4.6.0.
- [Release notes](https://github.com/goreleaser/goreleaser-action/releases)
- [Commits](https://github.com/goreleaser/goreleaser-action/compare/3fa32b8bb5620a2c1afe798654bbad59f9da4906...5fdedb94abba051217030cc86d4523cf3f02243d)

---
updated-dependencies:
- dependency-name: goreleaser/goreleaser-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 5a8693df6..b83666ee6 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -40,7 +40,7 @@ jobs:
 
       # https://goreleaser.com/ci/actions/
       - name: Run GoReleaser
-        uses: goreleaser/goreleaser-action@3fa32b8bb5620a2c1afe798654bbad59f9da4906
+        uses: goreleaser/goreleaser-action@5fdedb94abba051217030cc86d4523cf3f02243d
         #if: startsWith(github.ref, 'refs/tags/v')
         with:
           version: latest

From 5519179122ca0839e0c5106c278f36917e8c5989 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 7 Sep 2023 09:09:23 -0400
Subject: [PATCH 062/456] Bump actions/upload-artifact from 3.1.2 to 3.1.3
 (#1387)

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3.1.2 to 3.1.3.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/0b7f8abb1508181956e8e162db84b466c27e18ce...a8a3f3ad30e3422c9c7b888a15615d19a852ae32)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/go.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 75879111b..415d462cd 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -41,7 +41,7 @@ jobs:
       if: matrix.os == 'windows-latest'
       run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}}
 
-    - uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+    - uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
       with:
         name: mlr-${{matrix.os}}
         path: bin/${{matrix.os}}/*

From 93b7c8eac071ea2059b17653ebdfceb469c1ae1d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 8 Sep 2023 10:01:26 -0400
Subject: [PATCH 063/456] Bump actions/cache from 3.3.1 to 3.3.2 (#1390)

Bumps [actions/cache](https://github.com/actions/cache) from 3.3.1 to 3.3.2.
- [Release notes](https://github.com/actions/cache/releases)
- [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md)
- [Commits](https://github.com/actions/cache/compare/88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8...704facf57e6136b1bc63b828d79edcd491f0ee84)

---
updated-dependencies:
- dependency-name: actions/cache
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index b83666ee6..6db5dd970 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -29,7 +29,7 @@ jobs:
 
       # https://github.com/marketplace/actions/cache
       - name: Cache Go modules
-        uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8
+        uses: actions/cache@704facf57e6136b1bc63b828d79edcd491f0ee84
         with:
           path: |
             ~/.cache/go-build

From 268a96d0025c66568ee2429fc1928c804801ee5c Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sun, 10 Sep 2023 17:15:13 -0400
Subject: [PATCH 064/456] Export library code in `pkg/` (#1391)

* Export library code in `pkg/`

* new doc page
---
 Makefile                                      |  22 +-
 README-dev.md                                 |  50 ++---
 README.md                                     |   2 +-
 cmd/experiments/colors/main.go                |   2 +-
 cmd/mlr/main.go                               |   2 +-
 cmd/scan/main.go                              |   2 +-
 cmd/sizes/main.go                             |   2 +-
 docs/mkdocs.yml                               |   1 +
 docs/src/build.md                             |   2 +-
 docs/src/build.md.in                          |   2 +-
 docs/src/how-to-release.md                    |   4 +-
 docs/src/how-to-release.md.in                 |   4 +-
 docs/src/manpage.md                           |   2 +-
 docs/src/manpage.txt                          |   2 +-
 docs/src/miller-as-library.md                 | 202 ++++++++++++++++++
 docs/src/miller-as-library.md.in              |  54 +++++
 docs/src/miller-as-library/main1.go           |  15 ++
 docs/src/miller-as-library/main2.go           | 111 ++++++++++
 internal/pkg/cli/README.md                    |   4 -
 man/manpage.txt                               |   2 +-
 man/mlr.1                                     |   4 +-
 {internal/pkg => pkg}/README.md               |   0
 {internal/pkg => pkg}/auxents/auxents.go      |   0
 {internal/pkg => pkg}/auxents/doc.go          |   0
 {internal/pkg => pkg}/auxents/hex.go          |   0
 {internal/pkg => pkg}/auxents/lecat.go        |   0
 {internal/pkg => pkg}/auxents/termcvt.go      |   0
 {internal/pkg => pkg}/auxents/unhex.go        |   0
 {internal/pkg => pkg}/bifs/arithmetic.go      |   4 +-
 {internal/pkg => pkg}/bifs/arithmetic_test.go |   2 +-
 {internal/pkg => pkg}/bifs/base.go            |   6 +-
 {internal/pkg => pkg}/bifs/bits.go            |   2 +-
 {internal/pkg => pkg}/bifs/bits_test.go       |   2 +-
 {internal/pkg => pkg}/bifs/booleans.go        |   2 +-
 {internal/pkg => pkg}/bifs/cmp.go             |   4 +-
 {internal/pkg => pkg}/bifs/collections.go     |   4 +-
 .../pkg => pkg}/bifs/collections_test.go      |   2 +-
 {internal/pkg => pkg}/bifs/datetime.go        |   6 +-
 {internal/pkg => pkg}/bifs/hashing.go         |   2 +-
 {internal/pkg => pkg}/bifs/hashing_test.go    |   2 +-
 {internal/pkg => pkg}/bifs/mathlib.go         |   4 +-
 {internal/pkg => pkg}/bifs/percentiles.go     |   2 +-
 {internal/pkg => pkg}/bifs/random.go          |   4 +-
 {internal/pkg => pkg}/bifs/regex.go           |   4 +-
 {internal/pkg => pkg}/bifs/relative_time.go   |   2 +-
 {internal/pkg => pkg}/bifs/stats.go           |   4 +-
 {internal/pkg => pkg}/bifs/stats_test.go      |   2 +-
 {internal/pkg => pkg}/bifs/strings.go         |   4 +-
 {internal/pkg => pkg}/bifs/system.go          |   6 +-
 {internal/pkg => pkg}/bifs/types.go           |   6 +-
 pkg/cli/README.md                             |   4 +
 {internal/pkg => pkg}/cli/doc.go              |   0
 {internal/pkg => pkg}/cli/flag_types.go       |   6 +-
 .../pkg => pkg}/cli/flatten_unflatten.go      |   0
 {internal/pkg => pkg}/cli/mlrcli_util.go      |   0
 {internal/pkg => pkg}/cli/option_parse.go     |   6 +-
 {internal/pkg => pkg}/cli/option_types.go     |   2 +-
 {internal/pkg => pkg}/cli/separators.go       |   0
 {internal/pkg => pkg}/cli/verb_utils.go       |   2 +-
 {internal/pkg => pkg}/climain/README.md       |   4 +-
 {internal/pkg => pkg}/climain/doc.go          |   0
 {internal/pkg => pkg}/climain/mlrcli_mlrrc.go |   2 +-
 {internal/pkg => pkg}/climain/mlrcli_parse.go |  14 +-
 .../pkg => pkg}/climain/mlrcli_shebang.go     |   2 +-
 {internal/pkg => pkg}/colorizer/README.md     |   2 +-
 {internal/pkg => pkg}/colorizer/colorizer.go  |   0
 {internal/pkg => pkg}/colorizer/doc.go        |   0
 {internal/pkg => pkg}/dsl/README.md           |   2 +-
 {internal/pkg => pkg}/dsl/ast_build.go        |   4 +-
 {internal/pkg => pkg}/dsl/ast_print.go        |   0
 {internal/pkg => pkg}/dsl/ast_types.go        |   2 +-
 {internal/pkg => pkg}/dsl/cst/README.md       |   2 +-
 {internal/pkg => pkg}/dsl/cst/assignments.go  |   6 +-
 {internal/pkg => pkg}/dsl/cst/block_exit.go   |   6 +-
 {internal/pkg => pkg}/dsl/cst/blocks.go       |   6 +-
 .../dsl/cst/builtin_function_manager.go       |   6 +-
 .../pkg => pkg}/dsl/cst/builtin_functions.go  |  10 +-
 {internal/pkg => pkg}/dsl/cst/collections.go  |  10 +-
 {internal/pkg => pkg}/dsl/cst/cond.go         |  10 +-
 {internal/pkg => pkg}/dsl/cst/doc.go          |   0
 {internal/pkg => pkg}/dsl/cst/dump.go         |  10 +-
 {internal/pkg => pkg}/dsl/cst/emit1.go        |   8 +-
 {internal/pkg => pkg}/dsl/cst/emit_emitp.go   |  14 +-
 {internal/pkg => pkg}/dsl/cst/emitf.go        |  12 +-
 {internal/pkg => pkg}/dsl/cst/env.go          |   8 +-
 {internal/pkg => pkg}/dsl/cst/evaluable.go    |   8 +-
 {internal/pkg => pkg}/dsl/cst/filter.go       |   6 +-
 {internal/pkg => pkg}/dsl/cst/for.go          |  10 +-
 {internal/pkg => pkg}/dsl/cst/functions.go    |   4 +-
 {internal/pkg => pkg}/dsl/cst/hofs.go         |   8 +-
 {internal/pkg => pkg}/dsl/cst/if.go           |  10 +-
 .../pkg => pkg}/dsl/cst/keyword_usage.go      |   4 +-
 {internal/pkg => pkg}/dsl/cst/leaves.go       |   8 +-
 {internal/pkg => pkg}/dsl/cst/lvalues.go      |   8 +-
 {internal/pkg => pkg}/dsl/cst/print.go        |  10 +-
 {internal/pkg => pkg}/dsl/cst/root.go         |  14 +-
 {internal/pkg => pkg}/dsl/cst/signature.go    |   2 +-
 {internal/pkg => pkg}/dsl/cst/statements.go   |   2 +-
 {internal/pkg => pkg}/dsl/cst/subroutines.go  |   4 +-
 {internal/pkg => pkg}/dsl/cst/tee.go          |  12 +-
 {internal/pkg => pkg}/dsl/cst/types.go        |   8 +-
 {internal/pkg => pkg}/dsl/cst/udf.go          |  10 +-
 {internal/pkg => pkg}/dsl/cst/uds.go          |  10 +-
 {internal/pkg => pkg}/dsl/cst/validate.go     |   4 +-
 {internal/pkg => pkg}/dsl/cst/warn.go         |   4 +-
 {internal/pkg => pkg}/dsl/cst/while.go        |   8 +-
 {internal/pkg => pkg}/dsl/doc.go              |   0
 {internal/pkg => pkg}/dsl/token.go            |   2 +-
 {internal/pkg => pkg}/entrypoint/README.md    |   0
 {internal/pkg => pkg}/entrypoint/doc.go       |   0
 .../pkg => pkg}/entrypoint/entrypoint.go      |  14 +-
 {internal/pkg => pkg}/go-csv/LICENSE          |   0
 {internal/pkg => pkg}/go-csv/README.md        |   0
 {internal/pkg => pkg}/go-csv/csv_reader.go    |   0
 {internal/pkg => pkg}/go-csv/csv_writer.go    |   0
 {internal/pkg => pkg}/input/README.md         |   0
 {internal/pkg => pkg}/input/doc.go            |   0
 .../pkg => pkg}/input/pseudo_reader_gen.go    |   8 +-
 {internal/pkg => pkg}/input/record_reader.go  |   6 +-
 .../input/record_reader_benchmark_test.go     |   4 +-
 .../pkg => pkg}/input/record_reader_csv.go    |  10 +-
 .../input/record_reader_csvlite.go            |   8 +-
 .../input/record_reader_dkvp_nidx.go          |   8 +-
 .../input/record_reader_dkvp_test.go          |   2 +-
 .../input/record_reader_factory.go            |   2 +-
 .../pkg => pkg}/input/record_reader_json.go   |   8 +-
 .../pkg => pkg}/input/record_reader_tsv.go    |   8 +-
 .../pkg => pkg}/input/record_reader_xtab.go   |   8 +-
 {internal/pkg => pkg}/lib/README.md           |   0
 {internal/pkg => pkg}/lib/doc.go              |   0
 {internal/pkg => pkg}/lib/docurl.go           |   0
 {internal/pkg => pkg}/lib/file_readers.go     |   2 +-
 {internal/pkg => pkg}/lib/getoptify.go        |   0
 {internal/pkg => pkg}/lib/halfpipe.go         |   2 +-
 {internal/pkg => pkg}/lib/latin1.go           |   0
 {internal/pkg => pkg}/lib/latin1_test.go      |   0
 {internal/pkg => pkg}/lib/logger.go           |   0
 {internal/pkg => pkg}/lib/mlrmath.go          |   0
 {internal/pkg => pkg}/lib/ordered_map.go      |   0
 {internal/pkg => pkg}/lib/paragraph.go        |   0
 {internal/pkg => pkg}/lib/rand.go             |   0
 {internal/pkg => pkg}/lib/readfiles.go        |   2 +-
 {internal/pkg => pkg}/lib/regex.go            |   0
 {internal/pkg => pkg}/lib/regex_test.go       |   0
 {internal/pkg => pkg}/lib/stats.go            |   0
 {internal/pkg => pkg}/lib/time.go             |   0
 {internal/pkg => pkg}/lib/time_test.go        |   0
 {internal/pkg => pkg}/lib/tsv_codec.go        |   0
 {internal/pkg => pkg}/lib/tsv_codec_test.go   |   0
 {internal/pkg => pkg}/lib/unbackslash.go      |   0
 {internal/pkg => pkg}/lib/unbackslash_test.go |   0
 {internal/pkg => pkg}/lib/util.go             |   0
 {internal/pkg => pkg}/mlrval/mlrmap.go        |   0
 .../pkg => pkg}/mlrval/mlrmap_accessors.go    |   2 +-
 .../mlrval/mlrmap_accessors_test.go           |   0
 .../mlrval/mlrmap_flatten_unflatten.go        |   2 +-
 {internal/pkg => pkg}/mlrval/mlrmap_json.go   |   4 +-
 .../pkg => pkg}/mlrval/mlrmap_new_test.go     |   0
 {internal/pkg => pkg}/mlrval/mlrmap_print.go  |   0
 .../pkg => pkg}/mlrval/mlrval_accessors.go    |   2 +-
 .../mlrval/mlrval_benchmark_test.go           |   2 +-
 {internal/pkg => pkg}/mlrval/mlrval_cmp.go    |   2 +-
 .../pkg => pkg}/mlrval/mlrval_cmp_test.go     |   0
 .../pkg => pkg}/mlrval/mlrval_collections.go  |   2 +-
 .../pkg => pkg}/mlrval/mlrval_constants.go    |   0
 {internal/pkg => pkg}/mlrval/mlrval_copy.go   |   0
 {internal/pkg => pkg}/mlrval/mlrval_format.go |   0
 .../pkg => pkg}/mlrval/mlrval_format_test.go  |   0
 {internal/pkg => pkg}/mlrval/mlrval_get.go    |   2 +-
 .../pkg => pkg}/mlrval/mlrval_get_test.go     |   0
 {internal/pkg => pkg}/mlrval/mlrval_infer.go  |   2 +-
 .../pkg => pkg}/mlrval/mlrval_infer_test.go   |   0
 {internal/pkg => pkg}/mlrval/mlrval_is.go     |   2 +-
 .../pkg => pkg}/mlrval/mlrval_is_test.go      |   0
 {internal/pkg => pkg}/mlrval/mlrval_json.go   |   4 +-
 {internal/pkg => pkg}/mlrval/mlrval_new.go    |   4 +-
 .../pkg => pkg}/mlrval/mlrval_new_test.go     |   0
 {internal/pkg => pkg}/mlrval/mlrval_output.go |   0
 .../pkg => pkg}/mlrval/mlrval_output_test.go  |   0
 {internal/pkg => pkg}/mlrval/mlrval_sort.go   |   0
 .../pkg => pkg}/mlrval/mlrval_sort_test.go    |   0
 {internal/pkg => pkg}/mlrval/mlrval_type.go   |   0
 .../pkg => pkg}/mlrval/mlrval_type_test.go    |   0
 {internal/pkg => pkg}/output/README.md        |   0
 .../pkg => pkg}/output/channel_writer.go      |   4 +-
 {internal/pkg => pkg}/output/doc.go           |   0
 .../output/file_output_handlers.go            |   6 +-
 {internal/pkg => pkg}/output/record_writer.go |   2 +-
 .../pkg => pkg}/output/record_writer_csv.go   |   6 +-
 .../output/record_writer_csv_colorizer.go     |   2 +-
 .../output/record_writer_csvlite.go           |   6 +-
 .../pkg => pkg}/output/record_writer_dkvp.go  |   6 +-
 .../output/record_writer_factory.go           |   2 +-
 .../pkg => pkg}/output/record_writer_json.go  |   4 +-
 .../output/record_writer_markdown.go          |   6 +-
 .../pkg => pkg}/output/record_writer_nidx.go  |   4 +-
 .../output/record_writer_pprint.go            |   6 +-
 .../pkg => pkg}/output/record_writer_tsv.go   |   8 +-
 .../pkg => pkg}/output/record_writer_xtab.go  |   6 +-
 {internal/pkg => pkg}/parsing/README.md       |   4 +-
 {internal/pkg => pkg}/parsing/doc.go          |   0
 .../pkg => pkg}/parsing/errors.go.template    |   6 +-
 {internal/pkg => pkg}/parsing/errors/doc.go   |   2 +-
 .../pkg => pkg}/parsing/errors/errors.go      |   6 +-
 {internal/pkg => pkg}/parsing/lexer/acttab.go |   2 +-
 {internal/pkg => pkg}/parsing/lexer/doc.go    |   2 +-
 {internal/pkg => pkg}/parsing/lexer/lexer.go  |   2 +-
 .../parsing/lexer/transitiontable.go          |   0
 {internal/pkg => pkg}/parsing/mlr.bnf         |   4 +-
 .../pkg => pkg}/parsing/parser/action.go      |   0
 .../pkg => pkg}/parsing/parser/actiontable.go |   0
 .../pkg => pkg}/parsing/parser/context.go     |   0
 {internal/pkg => pkg}/parsing/parser/doc.go   |   2 +-
 .../pkg => pkg}/parsing/parser/gototable.go   |   0
 .../pkg => pkg}/parsing/parser/parser.go      |   4 +-
 .../parsing/parser/productionstable.go        |   2 +-
 .../pkg => pkg}/parsing/token/context.go      |   0
 {internal/pkg => pkg}/parsing/token/doc.go    |   2 +-
 {internal/pkg => pkg}/parsing/token/token.go  |   0
 {internal/pkg => pkg}/parsing/util/doc.go     |   2 +-
 {internal/pkg => pkg}/parsing/util/litconv.go |   0
 {internal/pkg => pkg}/parsing/util/rune.go    |   0
 .../pkg => pkg}/pbnjay-strptime/README.md     |   0
 .../pkg => pkg}/pbnjay-strptime/strptime.go   |   0
 .../pbnjay-strptime/strptime_test.go          |   0
 {internal/pkg => pkg}/platform/README.md      |   0
 .../pkg => pkg}/platform/diff_notwindows.go   |   0
 .../pkg => pkg}/platform/diff_windows.go      |   0
 {internal/pkg => pkg}/platform/doc.go         |   0
 .../platform/getargs_notwindows.go            |   0
 .../pkg => pkg}/platform/getargs_windows.go   |   0
 .../platform/shellrun_notwindows.go           |   0
 .../pkg => pkg}/platform/shellrun_windows.go  |   0
 .../platform/terminal_notwindows.go           |   0
 .../pkg => pkg}/platform/terminal_windows.go  |   0
 {internal/pkg => pkg}/runtime/README.md       |   0
 {internal/pkg => pkg}/runtime/doc.go          |   0
 {internal/pkg => pkg}/runtime/stack.go        |   6 +-
 {internal/pkg => pkg}/runtime/state.go        |   8 +-
 {internal/pkg => pkg}/scan/digits.go          |   0
 {internal/pkg => pkg}/scan/digits_test.go     |   0
 {internal/pkg => pkg}/scan/doc.go             |   0
 {internal/pkg => pkg}/scan/find.go            |   0
 .../pkg => pkg}/scan/find_benchmark_test.go   |   2 +-
 {internal/pkg => pkg}/scan/find_test.go       |   0
 {internal/pkg => pkg}/scan/type.go            |   0
 {internal/pkg => pkg}/scan/type_test.go       |   0
 {internal/pkg => pkg}/stream/README.md        |   0
 {internal/pkg => pkg}/stream/doc.go           |   0
 {internal/pkg => pkg}/stream/stream.go        |  10 +-
 {internal/pkg => pkg}/terminals/doc.go        |   0
 {internal/pkg => pkg}/terminals/help/doc.go   |   0
 {internal/pkg => pkg}/terminals/help/entry.go |  14 +-
 .../pkg => pkg}/terminals/regtest/README.md   |   0
 .../pkg => pkg}/terminals/regtest/doc.go      |   0
 .../pkg => pkg}/terminals/regtest/entry.go    |   0
 .../pkg => pkg}/terminals/regtest/invoker.go  |   4 +-
 .../terminals/regtest/regtester.go            |   4 +-
 .../pkg => pkg}/terminals/repl/README.md      |   0
 {internal/pkg => pkg}/terminals/repl/doc.go   |   0
 {internal/pkg => pkg}/terminals/repl/dsl.go   |   6 +-
 {internal/pkg => pkg}/terminals/repl/entry.go |   2 +-
 .../pkg => pkg}/terminals/repl/prompt.go      |   6 +-
 .../pkg => pkg}/terminals/repl/session.go     |  16 +-
 {internal/pkg => pkg}/terminals/repl/types.go |  10 +-
 {internal/pkg => pkg}/terminals/repl/verbs.go |  12 +-
 {internal/pkg => pkg}/terminals/terminals.go  |   8 +-
 {internal/pkg => pkg}/transformers/README.md  |   0
 .../transformers/aaa_chain_transformer.go     |   4 +-
 .../transformers/aaa_record_transformer.go    |   4 +-
 .../transformers/aaa_transformer_table.go     |   4 +-
 {internal/pkg => pkg}/transformers/altkv.go   |   6 +-
 {internal/pkg => pkg}/transformers/bar.go     |   6 +-
 .../pkg => pkg}/transformers/bootstrap.go     |   6 +-
 {internal/pkg => pkg}/transformers/case.go    |   8 +-
 {internal/pkg => pkg}/transformers/cat.go     |   6 +-
 {internal/pkg => pkg}/transformers/check.go   |   4 +-
 .../transformers/clean_whitespace.go          |   8 +-
 {internal/pkg => pkg}/transformers/count.go   |   8 +-
 .../pkg => pkg}/transformers/count_similar.go |   8 +-
 {internal/pkg => pkg}/transformers/cut.go     |   8 +-
 .../pkg => pkg}/transformers/decimate.go      |   4 +-
 {internal/pkg => pkg}/transformers/doc.go     |   0
 .../pkg => pkg}/transformers/fill_down.go     |   6 +-
 .../pkg => pkg}/transformers/fill_empty.go    |   6 +-
 {internal/pkg => pkg}/transformers/flatten.go |   6 +-
 .../pkg => pkg}/transformers/format_values.go |   6 +-
 .../pkg => pkg}/transformers/fraction.go      |  10 +-
 {internal/pkg => pkg}/transformers/gap.go     |   6 +-
 {internal/pkg => pkg}/transformers/grep.go    |   4 +-
 .../pkg => pkg}/transformers/group_by.go      |   6 +-
 .../pkg => pkg}/transformers/group_like.go    |   6 +-
 {internal/pkg => pkg}/transformers/gsub.go    |   8 +-
 .../pkg => pkg}/transformers/having_fields.go |   6 +-
 {internal/pkg => pkg}/transformers/head.go    |   4 +-
 .../pkg => pkg}/transformers/histogram.go     |   8 +-
 {internal/pkg => pkg}/transformers/join.go    |  12 +-
 .../pkg => pkg}/transformers/json_parse.go    |   6 +-
 .../transformers/json_stringify.go            |   8 +-
 {internal/pkg => pkg}/transformers/label.go   |   6 +-
 .../transformers/latin1_to_utf8.go            |   8 +-
 .../pkg => pkg}/transformers/merge_fields.go  |   8 +-
 .../transformers/most_or_least_frequent.go    |   8 +-
 {internal/pkg => pkg}/transformers/nest.go    |   8 +-
 {internal/pkg => pkg}/transformers/nothing.go |   4 +-
 .../pkg => pkg}/transformers/put_or_filter.go |  14 +-
 .../pkg => pkg}/transformers/regularize.go    |   8 +-
 .../transformers/remove_empty_columns.go      |   6 +-
 {internal/pkg => pkg}/transformers/rename.go  |   6 +-
 {internal/pkg => pkg}/transformers/reorder.go |   8 +-
 {internal/pkg => pkg}/transformers/repeat.go  |   4 +-
 {internal/pkg => pkg}/transformers/reshape.go |   8 +-
 {internal/pkg => pkg}/transformers/sample.go  |   6 +-
 {internal/pkg => pkg}/transformers/sec2gmt.go |   8 +-
 .../pkg => pkg}/transformers/sec2gmtdate.go   |   8 +-
 {internal/pkg => pkg}/transformers/seqgen.go  |   8 +-
 {internal/pkg => pkg}/transformers/shuffle.go |   6 +-
 .../transformers/skip_trivial_records.go      |   4 +-
 {internal/pkg => pkg}/transformers/sort.go    |   8 +-
 .../transformers/sort_within_records.go       |   4 +-
 {internal/pkg => pkg}/transformers/split.go   |   8 +-
 {internal/pkg => pkg}/transformers/ssub.go    |   8 +-
 {internal/pkg => pkg}/transformers/stats1.go  |  10 +-
 {internal/pkg => pkg}/transformers/stats2.go  |  10 +-
 {internal/pkg => pkg}/transformers/step.go    |  12 +-
 {internal/pkg => pkg}/transformers/sub.go     |   8 +-
 {internal/pkg => pkg}/transformers/summary.go |  10 +-
 {internal/pkg => pkg}/transformers/tac.go     |   4 +-
 {internal/pkg => pkg}/transformers/tail.go    |   6 +-
 {internal/pkg => pkg}/transformers/tee.go     |   6 +-
 .../pkg => pkg}/transformers/template.go      |   8 +-
 {internal/pkg => pkg}/transformers/top.go     |  10 +-
 .../pkg => pkg}/transformers/unflatten.go     |   6 +-
 {internal/pkg => pkg}/transformers/uniq.go    |   8 +-
 {internal/pkg => pkg}/transformers/unspace.go |   6 +-
 .../pkg => pkg}/transformers/unsparsify.go    |   8 +-
 .../transformers/utf8_to_latin1.go            |   8 +-
 .../pkg => pkg}/transformers/utils/README.md  |   0
 .../pkg => pkg}/transformers/utils/doc.go     |   0
 .../transformers/utils/join_bucket.go         |   2 +-
 .../transformers/utils/join_bucket_keeper.go  |  10 +-
 .../transformers/utils/percentile_keeper.go   |   4 +-
 .../transformers/utils/stats1_accumulators.go |   6 +-
 .../transformers/utils/stats2_accumulators.go |   4 +-
 .../transformers/utils/top_keeper.go          |   4 +-
 .../transformers/utils/window_keeper.go       |   2 +-
 .../transformers/utils/window_keeper_test.go  |   0
 {internal/pkg => pkg}/types/README.md         |   0
 {internal/pkg => pkg}/types/context.go        |   2 +-
 {internal/pkg => pkg}/types/doc.go            |   0
 .../pkg => pkg}/types/indexed-lvalues.md      |   0
 {internal/pkg => pkg}/types/mlrval_typing.go  |   2 +-
 {internal/pkg => pkg}/version/doc.go          |   0
 {internal/pkg => pkg}/version/version.go      |   0
 regression_test.go                            |   4 +-
 scripts/mcountlines                           |   8 +-
 todo.txt                                      |   2 +-
 tools/build-dsl                               |  22 +-
 358 files changed, 1076 insertions(+), 693 deletions(-)
 create mode 100644 docs/src/miller-as-library.md
 create mode 100644 docs/src/miller-as-library.md.in
 create mode 100644 docs/src/miller-as-library/main1.go
 create mode 100644 docs/src/miller-as-library/main2.go
 delete mode 100644 internal/pkg/cli/README.md
 rename {internal/pkg => pkg}/README.md (100%)
 rename {internal/pkg => pkg}/auxents/auxents.go (100%)
 rename {internal/pkg => pkg}/auxents/doc.go (100%)
 rename {internal/pkg => pkg}/auxents/hex.go (100%)
 rename {internal/pkg => pkg}/auxents/lecat.go (100%)
 rename {internal/pkg => pkg}/auxents/termcvt.go (100%)
 rename {internal/pkg => pkg}/auxents/unhex.go (100%)
 rename {internal/pkg => pkg}/bifs/arithmetic.go (99%)
 rename {internal/pkg => pkg}/bifs/arithmetic_test.go (98%)
 rename {internal/pkg => pkg}/bifs/base.go (98%)
 rename {internal/pkg => pkg}/bifs/bits.go (99%)
 rename {internal/pkg => pkg}/bifs/bits_test.go (87%)
 rename {internal/pkg => pkg}/bifs/booleans.go (95%)
 rename {internal/pkg => pkg}/bifs/cmp.go (99%)
 rename {internal/pkg => pkg}/bifs/collections.go (99%)
 rename {internal/pkg => pkg}/bifs/collections_test.go (98%)
 rename {internal/pkg => pkg}/bifs/datetime.go (99%)
 rename {internal/pkg => pkg}/bifs/hashing.go (95%)
 rename {internal/pkg => pkg}/bifs/hashing_test.go (91%)
 rename {internal/pkg => pkg}/bifs/mathlib.go (99%)
 rename {internal/pkg => pkg}/bifs/percentiles.go (99%)
 rename {internal/pkg => pkg}/bifs/random.go (94%)
 rename {internal/pkg => pkg}/bifs/regex.go (98%)
 rename {internal/pkg => pkg}/bifs/relative_time.go (99%)
 rename {internal/pkg => pkg}/bifs/stats.go (99%)
 rename {internal/pkg => pkg}/bifs/stats_test.go (99%)
 rename {internal/pkg => pkg}/bifs/strings.go (99%)
 rename {internal/pkg => pkg}/bifs/system.go (93%)
 rename {internal/pkg => pkg}/bifs/types.go (98%)
 create mode 100644 pkg/cli/README.md
 rename {internal/pkg => pkg}/cli/doc.go (100%)
 rename {internal/pkg => pkg}/cli/flag_types.go (99%)
 rename {internal/pkg => pkg}/cli/flatten_unflatten.go (100%)
 rename {internal/pkg => pkg}/cli/mlrcli_util.go (100%)
 rename {internal/pkg => pkg}/cli/option_parse.go (99%)
 rename {internal/pkg => pkg}/cli/option_types.go (99%)
 rename {internal/pkg => pkg}/cli/separators.go (100%)
 rename {internal/pkg => pkg}/cli/verb_utils.go (98%)
 rename {internal/pkg => pkg}/climain/README.md (51%)
 rename {internal/pkg => pkg}/climain/doc.go (100%)
 rename {internal/pkg => pkg}/climain/mlrcli_mlrrc.go (98%)
 rename {internal/pkg => pkg}/climain/mlrcli_parse.go (97%)
 rename {internal/pkg => pkg}/climain/mlrcli_shebang.go (98%)
 rename {internal/pkg => pkg}/colorizer/README.md (58%)
 rename {internal/pkg => pkg}/colorizer/colorizer.go (100%)
 rename {internal/pkg => pkg}/colorizer/doc.go (100%)
 rename {internal/pkg => pkg}/dsl/README.md (94%)
 rename {internal/pkg => pkg}/dsl/ast_build.go (98%)
 rename {internal/pkg => pkg}/dsl/ast_print.go (100%)
 rename {internal/pkg => pkg}/dsl/ast_types.go (99%)
 rename {internal/pkg => pkg}/dsl/cst/README.md (93%)
 rename {internal/pkg => pkg}/dsl/cst/assignments.go (94%)
 rename {internal/pkg => pkg}/dsl/cst/block_exit.go (94%)
 rename {internal/pkg => pkg}/dsl/cst/blocks.go (96%)
 rename {internal/pkg => pkg}/dsl/cst/builtin_function_manager.go (99%)
 rename {internal/pkg => pkg}/dsl/cst/builtin_functions.go (99%)
 rename {internal/pkg => pkg}/dsl/cst/collections.go (98%)
 rename {internal/pkg => pkg}/dsl/cst/cond.go (88%)
 rename {internal/pkg => pkg}/dsl/cst/doc.go (100%)
 rename {internal/pkg => pkg}/dsl/cst/dump.go (96%)
 rename {internal/pkg => pkg}/dsl/cst/emit1.go (92%)
 rename {internal/pkg => pkg}/dsl/cst/emit_emitp.go (98%)
 rename {internal/pkg => pkg}/dsl/cst/emitf.go (95%)
 rename {internal/pkg => pkg}/dsl/cst/env.go (87%)
 rename {internal/pkg => pkg}/dsl/cst/evaluable.go (96%)
 rename {internal/pkg => pkg}/dsl/cst/filter.go (95%)
 rename {internal/pkg => pkg}/dsl/cst/for.go (99%)
 rename {internal/pkg => pkg}/dsl/cst/functions.go (97%)
 rename {internal/pkg => pkg}/dsl/cst/hofs.go (99%)
 rename {internal/pkg => pkg}/dsl/cst/if.go (94%)
 rename {internal/pkg => pkg}/dsl/cst/keyword_usage.go (99%)
 rename {internal/pkg => pkg}/dsl/cst/leaves.go (98%)
 rename {internal/pkg => pkg}/dsl/cst/lvalues.go (99%)
 rename {internal/pkg => pkg}/dsl/cst/print.go (97%)
 rename {internal/pkg => pkg}/dsl/cst/root.go (97%)
 rename {internal/pkg => pkg}/dsl/cst/signature.go (95%)
 rename {internal/pkg => pkg}/dsl/cst/statements.go (98%)
 rename {internal/pkg => pkg}/dsl/cst/subroutines.go (96%)
 rename {internal/pkg => pkg}/dsl/cst/tee.go (94%)
 rename {internal/pkg => pkg}/dsl/cst/types.go (95%)
 rename {internal/pkg => pkg}/dsl/cst/udf.go (98%)
 rename {internal/pkg => pkg}/dsl/cst/uds.go (97%)
 rename {internal/pkg => pkg}/dsl/cst/validate.go (98%)
 rename {internal/pkg => pkg}/dsl/cst/warn.go (98%)
 rename {internal/pkg => pkg}/dsl/cst/while.go (95%)
 rename {internal/pkg => pkg}/dsl/doc.go (100%)
 rename {internal/pkg => pkg}/dsl/token.go (87%)
 rename {internal/pkg => pkg}/entrypoint/README.md (100%)
 rename {internal/pkg => pkg}/entrypoint/doc.go (100%)
 rename {internal/pkg => pkg}/entrypoint/entrypoint.go (94%)
 rename {internal/pkg => pkg}/go-csv/LICENSE (100%)
 rename {internal/pkg => pkg}/go-csv/README.md (100%)
 rename {internal/pkg => pkg}/go-csv/csv_reader.go (100%)
 rename {internal/pkg => pkg}/go-csv/csv_writer.go (100%)
 rename {internal/pkg => pkg}/input/README.md (100%)
 rename {internal/pkg => pkg}/input/doc.go (100%)
 rename {internal/pkg => pkg}/input/pseudo_reader_gen.go (96%)
 rename {internal/pkg => pkg}/input/record_reader.go (96%)
 rename {internal/pkg => pkg}/input/record_reader_benchmark_test.go (92%)
 rename {internal/pkg => pkg}/input/record_reader_csv.go (97%)
 rename {internal/pkg => pkg}/input/record_reader_csvlite.go (98%)
 rename {internal/pkg => pkg}/input/record_reader_dkvp_nidx.go (96%)
 rename {internal/pkg => pkg}/input/record_reader_dkvp_test.go (97%)
 rename {internal/pkg => pkg}/input/record_reader_factory.go (95%)
 rename {internal/pkg => pkg}/input/record_reader_json.go (97%)
 rename {internal/pkg => pkg}/input/record_reader_tsv.go (98%)
 rename {internal/pkg => pkg}/input/record_reader_xtab.go (98%)
 rename {internal/pkg => pkg}/lib/README.md (100%)
 rename {internal/pkg => pkg}/lib/doc.go (100%)
 rename {internal/pkg => pkg}/lib/docurl.go (100%)
 rename {internal/pkg => pkg}/lib/file_readers.go (99%)
 rename {internal/pkg => pkg}/lib/getoptify.go (100%)
 rename {internal/pkg => pkg}/lib/halfpipe.go (97%)
 rename {internal/pkg => pkg}/lib/latin1.go (100%)
 rename {internal/pkg => pkg}/lib/latin1_test.go (100%)
 rename {internal/pkg => pkg}/lib/logger.go (100%)
 rename {internal/pkg => pkg}/lib/mlrmath.go (100%)
 rename {internal/pkg => pkg}/lib/ordered_map.go (100%)
 rename {internal/pkg => pkg}/lib/paragraph.go (100%)
 rename {internal/pkg => pkg}/lib/rand.go (100%)
 rename {internal/pkg => pkg}/lib/readfiles.go (97%)
 rename {internal/pkg => pkg}/lib/regex.go (100%)
 rename {internal/pkg => pkg}/lib/regex_test.go (100%)
 rename {internal/pkg => pkg}/lib/stats.go (100%)
 rename {internal/pkg => pkg}/lib/time.go (100%)
 rename {internal/pkg => pkg}/lib/time_test.go (100%)
 rename {internal/pkg => pkg}/lib/tsv_codec.go (100%)
 rename {internal/pkg => pkg}/lib/tsv_codec_test.go (100%)
 rename {internal/pkg => pkg}/lib/unbackslash.go (100%)
 rename {internal/pkg => pkg}/lib/unbackslash_test.go (100%)
 rename {internal/pkg => pkg}/lib/util.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrmap.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrmap_accessors.go (99%)
 rename {internal/pkg => pkg}/mlrval/mlrmap_accessors_test.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrmap_flatten_unflatten.go (99%)
 rename {internal/pkg => pkg}/mlrval/mlrmap_json.go (98%)
 rename {internal/pkg => pkg}/mlrval/mlrmap_new_test.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrmap_print.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrval_accessors.go (98%)
 rename {internal/pkg => pkg}/mlrval/mlrval_benchmark_test.go (86%)
 rename {internal/pkg => pkg}/mlrval/mlrval_cmp.go (99%)
 rename {internal/pkg => pkg}/mlrval/mlrval_cmp_test.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrval_collections.go (99%)
 rename {internal/pkg => pkg}/mlrval/mlrval_constants.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrval_copy.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrval_format.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrval_format_test.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrval_get.go (98%)
 rename {internal/pkg => pkg}/mlrval/mlrval_get_test.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrval_infer.go (99%)
 rename {internal/pkg => pkg}/mlrval/mlrval_infer_test.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrval_is.go (98%)
 rename {internal/pkg => pkg}/mlrval/mlrval_is_test.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrval_json.go (99%)
 rename {internal/pkg => pkg}/mlrval/mlrval_new.go (98%)
 rename {internal/pkg => pkg}/mlrval/mlrval_new_test.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrval_output.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrval_output_test.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrval_sort.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrval_sort_test.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrval_type.go (100%)
 rename {internal/pkg => pkg}/mlrval/mlrval_type_test.go (100%)
 rename {internal/pkg => pkg}/output/README.md (100%)
 rename {internal/pkg => pkg}/output/channel_writer.go (96%)
 rename {internal/pkg => pkg}/output/doc.go (100%)
 rename {internal/pkg => pkg}/output/file_output_handlers.go (98%)
 rename {internal/pkg => pkg}/output/record_writer.go (92%)
 rename {internal/pkg => pkg}/output/record_writer_csv.go (94%)
 rename {internal/pkg => pkg}/output/record_writer_csv_colorizer.go (99%)
 rename {internal/pkg => pkg}/output/record_writer_csvlite.go (93%)
 rename {internal/pkg => pkg}/output/record_writer_dkvp.go (87%)
 rename {internal/pkg => pkg}/output/record_writer_factory.go (94%)
 rename {internal/pkg => pkg}/output/record_writer_json.go (96%)
 rename {internal/pkg => pkg}/output/record_writer_markdown.go (93%)
 rename {internal/pkg => pkg}/output/record_writer_nidx.go (88%)
 rename {internal/pkg => pkg}/output/record_writer_pprint.go (98%)
 rename {internal/pkg => pkg}/output/record_writer_tsv.go (92%)
 rename {internal/pkg => pkg}/output/record_writer_xtab.go (96%)
 rename {internal/pkg => pkg}/parsing/README.md (87%)
 rename {internal/pkg => pkg}/parsing/doc.go (100%)
 rename {internal/pkg => pkg}/parsing/errors.go.template (92%)
 rename {internal/pkg => pkg}/parsing/errors/doc.go (68%)
 rename {internal/pkg => pkg}/parsing/errors/errors.go (92%)
 rename {internal/pkg => pkg}/parsing/lexer/acttab.go (99%)
 rename {internal/pkg => pkg}/parsing/lexer/doc.go (67%)
 rename {internal/pkg => pkg}/parsing/lexer/lexer.go (99%)
 rename {internal/pkg => pkg}/parsing/lexer/transitiontable.go (100%)
 rename {internal/pkg => pkg}/parsing/mlr.bnf (99%)
 rename {internal/pkg => pkg}/parsing/parser/action.go (100%)
 rename {internal/pkg => pkg}/parsing/parser/actiontable.go (100%)
 rename {internal/pkg => pkg}/parsing/parser/context.go (100%)
 rename {internal/pkg => pkg}/parsing/parser/doc.go (68%)
 rename {internal/pkg => pkg}/parsing/parser/gototable.go (100%)
 rename {internal/pkg => pkg}/parsing/parser/parser.go (97%)
 rename {internal/pkg => pkg}/parsing/parser/productionstable.go (99%)
 rename {internal/pkg => pkg}/parsing/token/context.go (100%)
 rename {internal/pkg => pkg}/parsing/token/doc.go (67%)
 rename {internal/pkg => pkg}/parsing/token/token.go (100%)
 rename {internal/pkg => pkg}/parsing/util/doc.go (67%)
 rename {internal/pkg => pkg}/parsing/util/litconv.go (100%)
 rename {internal/pkg => pkg}/parsing/util/rune.go (100%)
 rename {internal/pkg => pkg}/pbnjay-strptime/README.md (100%)
 rename {internal/pkg => pkg}/pbnjay-strptime/strptime.go (100%)
 rename {internal/pkg => pkg}/pbnjay-strptime/strptime_test.go (100%)
 rename {internal/pkg => pkg}/platform/README.md (100%)
 rename {internal/pkg => pkg}/platform/diff_notwindows.go (100%)
 rename {internal/pkg => pkg}/platform/diff_windows.go (100%)
 rename {internal/pkg => pkg}/platform/doc.go (100%)
 rename {internal/pkg => pkg}/platform/getargs_notwindows.go (100%)
 rename {internal/pkg => pkg}/platform/getargs_windows.go (100%)
 rename {internal/pkg => pkg}/platform/shellrun_notwindows.go (100%)
 rename {internal/pkg => pkg}/platform/shellrun_windows.go (100%)
 rename {internal/pkg => pkg}/platform/terminal_notwindows.go (100%)
 rename {internal/pkg => pkg}/platform/terminal_windows.go (100%)
 rename {internal/pkg => pkg}/runtime/README.md (100%)
 rename {internal/pkg => pkg}/runtime/doc.go (100%)
 rename {internal/pkg => pkg}/runtime/stack.go (98%)
 rename {internal/pkg => pkg}/runtime/state.go (89%)
 rename {internal/pkg => pkg}/scan/digits.go (100%)
 rename {internal/pkg => pkg}/scan/digits_test.go (100%)
 rename {internal/pkg => pkg}/scan/doc.go (100%)
 rename {internal/pkg => pkg}/scan/find.go (100%)
 rename {internal/pkg => pkg}/scan/find_benchmark_test.go (94%)
 rename {internal/pkg => pkg}/scan/find_test.go (100%)
 rename {internal/pkg => pkg}/scan/type.go (100%)
 rename {internal/pkg => pkg}/scan/type_test.go (100%)
 rename {internal/pkg => pkg}/stream/README.md (100%)
 rename {internal/pkg => pkg}/stream/doc.go (100%)
 rename {internal/pkg => pkg}/stream/stream.go (94%)
 rename {internal/pkg => pkg}/terminals/doc.go (100%)
 rename {internal/pkg => pkg}/terminals/help/doc.go (100%)
 rename {internal/pkg => pkg}/terminals/help/entry.go (98%)
 rename {internal/pkg => pkg}/terminals/regtest/README.md (100%)
 rename {internal/pkg => pkg}/terminals/regtest/doc.go (100%)
 rename {internal/pkg => pkg}/terminals/regtest/entry.go (100%)
 rename {internal/pkg => pkg}/terminals/regtest/invoker.go (97%)
 rename {internal/pkg => pkg}/terminals/regtest/regtester.go (99%)
 rename {internal/pkg => pkg}/terminals/repl/README.md (100%)
 rename {internal/pkg => pkg}/terminals/repl/doc.go (100%)
 rename {internal/pkg => pkg}/terminals/repl/dsl.go (96%)
 rename {internal/pkg => pkg}/terminals/repl/entry.go (99%)
 rename {internal/pkg => pkg}/terminals/repl/prompt.go (92%)
 rename {internal/pkg => pkg}/terminals/repl/session.go (94%)
 rename {internal/pkg => pkg}/terminals/repl/types.go (88%)
 rename {internal/pkg => pkg}/terminals/repl/verbs.go (99%)
 rename {internal/pkg => pkg}/terminals/terminals.go (90%)
 rename {internal/pkg => pkg}/transformers/README.md (100%)
 rename {internal/pkg => pkg}/transformers/aaa_chain_transformer.go (99%)
 rename {internal/pkg => pkg}/transformers/aaa_record_transformer.go (95%)
 rename {internal/pkg => pkg}/transformers/aaa_transformer_table.go (96%)
 rename {internal/pkg => pkg}/transformers/altkv.go (95%)
 rename {internal/pkg => pkg}/transformers/bar.go (98%)
 rename {internal/pkg => pkg}/transformers/bootstrap.go (97%)
 rename {internal/pkg => pkg}/transformers/case.go (97%)
 rename {internal/pkg => pkg}/transformers/cat.go (97%)
 rename {internal/pkg => pkg}/transformers/check.go (97%)
 rename {internal/pkg => pkg}/transformers/clean_whitespace.go (96%)
 rename {internal/pkg => pkg}/transformers/count.go (97%)
 rename {internal/pkg => pkg}/transformers/count_similar.go (95%)
 rename {internal/pkg => pkg}/transformers/cut.go (97%)
 rename {internal/pkg => pkg}/transformers/decimate.go (97%)
 rename {internal/pkg => pkg}/transformers/doc.go (100%)
 rename {internal/pkg => pkg}/transformers/fill_down.go (97%)
 rename {internal/pkg => pkg}/transformers/fill_empty.go (95%)
 rename {internal/pkg => pkg}/transformers/flatten.go (97%)
 rename {internal/pkg => pkg}/transformers/format_values.go (97%)
 rename {internal/pkg => pkg}/transformers/fraction.go (97%)
 rename {internal/pkg => pkg}/transformers/gap.go (97%)
 rename {internal/pkg => pkg}/transformers/grep.go (97%)
 rename {internal/pkg => pkg}/transformers/group_by.go (96%)
 rename {internal/pkg => pkg}/transformers/group_like.go (95%)
 rename {internal/pkg => pkg}/transformers/gsub.go (94%)
 rename {internal/pkg => pkg}/transformers/having_fields.go (98%)
 rename {internal/pkg => pkg}/transformers/head.go (98%)
 rename {internal/pkg => pkg}/transformers/histogram.go (98%)
 rename {internal/pkg => pkg}/transformers/join.go (98%)
 rename {internal/pkg => pkg}/transformers/json_parse.go (97%)
 rename {internal/pkg => pkg}/transformers/json_stringify.go (96%)
 rename {internal/pkg => pkg}/transformers/label.go (95%)
 rename {internal/pkg => pkg}/transformers/latin1_to_utf8.go (93%)
 rename {internal/pkg => pkg}/transformers/merge_fields.go (98%)
 rename {internal/pkg => pkg}/transformers/most_or_least_frequent.go (97%)
 rename {internal/pkg => pkg}/transformers/nest.go (98%)
 rename {internal/pkg => pkg}/transformers/nothing.go (96%)
 rename {internal/pkg => pkg}/transformers/put_or_filter.go (97%)
 rename {internal/pkg => pkg}/transformers/regularize.go (94%)
 rename {internal/pkg => pkg}/transformers/remove_empty_columns.go (96%)
 rename {internal/pkg => pkg}/transformers/rename.go (98%)
 rename {internal/pkg => pkg}/transformers/reorder.go (97%)
 rename {internal/pkg => pkg}/transformers/repeat.go (98%)
 rename {internal/pkg => pkg}/transformers/reshape.go (98%)
 rename {internal/pkg => pkg}/transformers/sample.go (97%)
 rename {internal/pkg => pkg}/transformers/sec2gmt.go (95%)
 rename {internal/pkg => pkg}/transformers/sec2gmtdate.go (94%)
 rename {internal/pkg => pkg}/transformers/seqgen.go (96%)
 rename {internal/pkg => pkg}/transformers/shuffle.go (96%)
 rename {internal/pkg => pkg}/transformers/skip_trivial_records.go (96%)
 rename {internal/pkg => pkg}/transformers/sort.go (98%)
 rename {internal/pkg => pkg}/transformers/sort_within_records.go (97%)
 rename {internal/pkg => pkg}/transformers/split.go (98%)
 rename {internal/pkg => pkg}/transformers/ssub.go (94%)
 rename {internal/pkg => pkg}/transformers/stats1.go (98%)
 rename {internal/pkg => pkg}/transformers/stats2.go (98%)
 rename {internal/pkg => pkg}/transformers/step.go (99%)
 rename {internal/pkg => pkg}/transformers/sub.go (94%)
 rename {internal/pkg => pkg}/transformers/summary.go (98%)
 rename {internal/pkg => pkg}/transformers/tac.go (96%)
 rename {internal/pkg => pkg}/transformers/tail.go (96%)
 rename {internal/pkg => pkg}/transformers/tee.go (97%)
 rename {internal/pkg => pkg}/transformers/template.go (95%)
 rename {internal/pkg => pkg}/transformers/top.go (97%)
 rename {internal/pkg => pkg}/transformers/unflatten.go (97%)
 rename {internal/pkg => pkg}/transformers/uniq.go (98%)
 rename {internal/pkg => pkg}/transformers/unspace.go (97%)
 rename {internal/pkg => pkg}/transformers/unsparsify.go (96%)
 rename {internal/pkg => pkg}/transformers/utf8_to_latin1.go (93%)
 rename {internal/pkg => pkg}/transformers/utils/README.md (100%)
 rename {internal/pkg => pkg}/transformers/utils/doc.go (100%)
 rename {internal/pkg => pkg}/transformers/utils/join_bucket.go (92%)
 rename {internal/pkg => pkg}/transformers/utils/join_bucket_keeper.go (98%)
 rename {internal/pkg => pkg}/transformers/utils/percentile_keeper.go (97%)
 rename {internal/pkg => pkg}/transformers/utils/stats1_accumulators.go (99%)
 rename {internal/pkg => pkg}/transformers/utils/stats2_accumulators.go (99%)
 rename {internal/pkg => pkg}/transformers/utils/top_keeper.go (96%)
 rename {internal/pkg => pkg}/transformers/utils/window_keeper.go (97%)
 rename {internal/pkg => pkg}/transformers/utils/window_keeper_test.go (100%)
 rename {internal/pkg => pkg}/types/README.md (100%)
 rename {internal/pkg => pkg}/types/context.go (98%)
 rename {internal/pkg => pkg}/types/doc.go (100%)
 rename {internal/pkg => pkg}/types/indexed-lvalues.md (100%)
 rename {internal/pkg => pkg}/types/mlrval_typing.go (97%)
 rename {internal/pkg => pkg}/version/doc.go (100%)
 rename {internal/pkg => pkg}/version/version.go (100%)

diff --git a/Makefile b/Makefile
index 60c7ce578..fb374cb91 100644
--- a/Makefile
+++ b/Makefile
@@ -30,25 +30,25 @@ install: build
 # ----------------------------------------------------------------
 # Unit tests (small number)
 unit-test ut: build
-	go test github.com/johnkerl/miller/internal/pkg/...
+	go test github.com/johnkerl/miller/pkg/...
 
 ut-lib:build
-	go test github.com/johnkerl/miller/internal/pkg/lib...
+	go test github.com/johnkerl/miller/pkg/lib...
 ut-scan:build
-	go test github.com/johnkerl/miller/internal/pkg/scan/...
+	go test github.com/johnkerl/miller/pkg/scan/...
 ut-mlv:build
-	go test github.com/johnkerl/miller/internal/pkg/mlrval/...
+	go test github.com/johnkerl/miller/pkg/mlrval/...
 ut-bifs:build
-	go test github.com/johnkerl/miller/internal/pkg/bifs/...
+	go test github.com/johnkerl/miller/pkg/bifs/...
 ut-input:build
-	go test github.com/johnkerl/miller/internal/pkg/input/...
+	go test github.com/johnkerl/miller/pkg/input/...
 
 bench:build
-	go test -run=nonesuch -bench=. github.com/johnkerl/miller/internal/pkg/...
+	go test -run=nonesuch -bench=. github.com/johnkerl/miller/pkg/...
 bench-mlv:build
-	go test -run=nonesuch -bench=. github.com/johnkerl/miller/internal/pkg/mlrval/...
+	go test -run=nonesuch -bench=. github.com/johnkerl/miller/pkg/mlrval/...
 bench-input:build
-	go test -run=nonesuch -bench=. github.com/johnkerl/miller/internal/pkg/input/...
+	go test -run=nonesuch -bench=. github.com/johnkerl/miller/pkg/input/...
 
 # ----------------------------------------------------------------
 # Regression tests (large number)
@@ -56,7 +56,7 @@ bench-input:build
 # See ./regression_test.go for information on how to get more details
 # for debugging.  TL;DR is for CI jobs, we have 'go test -v'; for
 # interactive use, instead of 'go test -v' simply use 'mlr regtest
-# -vvv' or 'mlr regtest -s 20'. See also internal/pkg/terminals/regtest.
+# -vvv' or 'mlr regtest -s 20'. See also pkg/terminals/regtest.
 regression-test: build
 	go test -v regression_test.go
 
@@ -65,7 +65,7 @@ regression-test: build
 # go fmt ./... finds experimental C files which we want to ignore.
 fmt format:
 	-go fmt ./cmd/...
-	-go fmt ./internal/pkg/...
+	-go fmt ./pkg/...
 	-go fmt ./regression_test.go
 
 # ----------------------------------------------------------------
diff --git a/README-dev.md b/README-dev.md
index 6c3b5ca8c..0e363db5c 100644
--- a/README-dev.md
+++ b/README-dev.md
@@ -61,10 +61,10 @@ During the coding of Miller, I've been guided by the following:
   * Names of files, variables, functions, etc. should be fully spelled out (e.g. `NewEvaluableLeafNode`), except for a small number of most-used names where a longer name would cause unnecessary line-wraps (e.g. `Mlrval` instead of `MillerValue` since this appears very very often).
   * Code should not be too clever. This includes some reasonable amounts of code duplication from time to time, to keep things inline, rather than lasagna code.
   * Things should be transparent.  For example, the `-v` in `mlr -n put -v '$y = 3 + 0.1 * $x'` shows you the abstract syntax tree derived from the DSL expression.
-  * Comments should be robust with respect to reasonably anticipated changes. For example, one package should cross-link to another in its comments, but I try to avoid mentioning specific filenames too much in the comments and README files since these may change over time. I make an exception for stable points such as [cmd/mlr/main.go](./cmd/mlr/main.go), [mlr.bnf](./internal/pkg/parsing/mlr.bnf), [stream.go](./internal/pkg/stream/stream.go), etc.
+  * Comments should be robust with respect to reasonably anticipated changes. For example, one package should cross-link to another in its comments, but I try to avoid mentioning specific filenames too much in the comments and README files since these may change over time. I make an exception for stable points such as [cmd/mlr/main.go](./cmd/mlr/main.go), [mlr.bnf](./pkg/parsing/mlr.bnf), [stream.go](./pkg/stream/stream.go), etc.
 * *Miller should be pleasant to write.*
   * It should be quick to answer the question *Did I just break anything?* -- hence `mlr regtest` functionality.
-  * It should be quick to find out what to do next as you iteratively develop -- see for example [cst/README.md](./internal/pkg/dsl/cst/README.md).
+  * It should be quick to find out what to do next as you iteratively develop -- see for example [cst/README.md](./pkg/dsl/cst/README.md).
 * *The language should be an asset, not a liability.*
   * One of the reasons I chose Go is that (personally anyway) I find it to be reasonably efficient, well-supported with standard libraries, straightforward, and fun.  I hope you enjoy it as much as I have.
 
@@ -83,10 +83,10 @@ sequence of key-value pairs. The basic **stream** operation is:
 
 So, in broad overview, the key packages are:
 
-* [internal/pkg/stream](./internal/pkg/stream) -- connect input -> transforms -> output via Go channels
-* [internal/pkg/input](./internal/pkg/input) -- read input records
-* [internal/pkg/transformers](./internal/pkg/transformers) -- transform input records to output records
-* [internal/pkg/output](./internal/pkg/output) -- write output records
+* [pkg/stream](./pkg/stream) -- connect input -> transforms -> output via Go channels
+* [pkg/input](./pkg/input) -- read input records
+* [pkg/transformers](./pkg/transformers) -- transform input records to output records
+* [pkg/output](./pkg/output) -- write output records
 * The rest are details to support this.
 
 ## Directory-structure details
@@ -98,7 +98,7 @@ So, in broad overview, the key packages are:
     * This package defines the grammar for Miller's domain-specific language (DSL) for the Miller `put` and `filter` verbs. And, GOCC is a joy to use. :)
     * It is used on the terms of its open-source license.
   * [golang.org/x/term](https://pkg.go.dev/golang.org/x/term):
-    * Just a one-line Miller callsite for is-a-terminal checking for the [Miller REPL](./internal/pkg/terminals/repl/README.md).
+    * Just a one-line Miller callsite for is-a-terminal checking for the [Miller REPL](./pkg/terminals/repl/README.md).
     * It is used on the terms of its open-source license.
 * See also [./go.mod](go.mod). Setup:
   * `go get github.com/goccmack/gocc`
@@ -106,22 +106,22 @@ So, in broad overview, the key packages are:
 
 ### Miller per se
 
-* The main entry point is [cmd/mlr/main.go](./cmd/mlr/main.go); everything else in [internal/pkg](./internal/pkg).
-* [internal/pkg/entrypoint](./internal/pkg/entrypoint): All the usual contents of `main()` are here, for ease of testing.
-* [internal/pkg/platform](./internal/pkg/platform): Platform-dependent code, which as of early 2021 is the command-line parser. Handling single quotes and double quotes is different on Windows unless particular care is taken, which is what this package does.
-* [internal/pkg/lib](./internal/pkg/lib):
-  * Implementation of the [`Mlrval`](./internal/pkg/types/mlrval.go) datatype which includes string/int/float/boolean/void/absent/error types. These are used for record values, as well as expression/variable values in the Miller `put`/`filter` DSL. See also below for more details.
-  * [`Mlrmap`](./internal/pkg/types/mlrmap.go) is the sequence of key-value pairs which represents a Miller record. The key-lookup mechanism is optimized for Miller read/write usage patterns -- please see [mlrmap.go](./internal/pkg/types/mlrmap.go) for more details.
-  * [`context`](./internal/pkg/types/context.go) supports AWK-like variables such as `FILENAME`, `NF`, `NR`, and so on.
-* [internal/pkg/cli](./internal/pkg/cli) is the flag-parsing logic for supporting Miller's command-line interface. When you type something like `mlr --icsv --ojson put '$sum = $a + $b' then filter '$sum > 1000' myfile.csv`, it's the CLI parser which makes it possible for Miller to construct a CSV record-reader, a transformer-chain of `put` then `filter`, and a JSON record-writer.
-* [internal/pkg/climain](./internal/pkg/climain) contains a layer which invokes `internal/pkg/cli`, which was split out to avoid a Go package-import cycle.
-* [internal/pkg/stream](./internal/pkg/stream) is as above -- it uses Go channels to pipe together file-reads, to record-reading/parsing, to a chain of record-transformers, to record-writing/formatting, to terminal standard output.
-* [internal/pkg/input](./internal/pkg/input) is as above -- one record-reader type per supported input file format, and a factory method.
-* [internal/pkg/output](./internal/pkg/output) is as above -- one record-writer type per supported output file format, and a factory method.
-* [internal/pkg/transformers](./internal/pkg/transformers) contains the abstract record-transformer interface datatype, as well as the Go-channel chaining mechanism for piping one transformer into the next. It also contains all the concrete record-transformers such as `cat`, `tac`, `sort`, `put`, and so on.
-* [internal/pkg/parsing](./internal/pkg/parsing) contains a single source file, `mlr.bnf`, which is the lexical/semantic grammar file for the Miller `put`/`filter` DSL using the GOCC framework. All subdirectories of `internal/pkg/parsing/` are autogen code created by GOCC's processing of `mlr.bnf`. If you need to edit `mlr.bnf`, please use [tools/build-dsl](./tools/build-dsl) to autogenerate Go code from it (using the GOCC tool). (This takes several minutes to run.)
-* [internal/pkg/dsl](./internal/pkg/dsl) contains [`ast_types.go`](internal/pkg/dsl/ast_types.go) which is the abstract syntax tree datatype shared between GOCC and Miller. I didn't use a `internal/pkg/dsl/ast` naming convention, although that would have been nice, in order to avoid a Go package-dependency cycle.
-* [internal/pkg/dsl/cst](./internal/pkg/dsl/cst) is the concrete syntax tree, constructed from an AST produced by GOCC. The CST is what is actually executed on every input record when you do things like `$z = $x * 0.3 * $y`. Please see the [internal/pkg/dsl/cst/README.md](./internal/pkg/dsl/cst/README.md) for more information.
+* The main entry point is [cmd/mlr/main.go](./cmd/mlr/main.go); everything else in [pkg](./pkg).
+* [pkg/entrypoint](./pkg/entrypoint): All the usual contents of `main()` are here, for ease of testing.
+* [pkg/platform](./pkg/platform): Platform-dependent code, which as of early 2021 is the command-line parser. Handling single quotes and double quotes is different on Windows unless particular care is taken, which is what this package does.
+* [pkg/lib](./pkg/lib):
+  * Implementation of the [`Mlrval`](./pkg/types/mlrval.go) datatype which includes string/int/float/boolean/void/absent/error types. These are used for record values, as well as expression/variable values in the Miller `put`/`filter` DSL. See also below for more details.
+  * [`Mlrmap`](./pkg/types/mlrmap.go) is the sequence of key-value pairs which represents a Miller record. The key-lookup mechanism is optimized for Miller read/write usage patterns -- please see [mlrmap.go](./pkg/types/mlrmap.go) for more details.
+  * [`context`](./pkg/types/context.go) supports AWK-like variables such as `FILENAME`, `NF`, `NR`, and so on.
+* [pkg/cli](./pkg/cli) is the flag-parsing logic for supporting Miller's command-line interface. When you type something like `mlr --icsv --ojson put '$sum = $a + $b' then filter '$sum > 1000' myfile.csv`, it's the CLI parser which makes it possible for Miller to construct a CSV record-reader, a transformer-chain of `put` then `filter`, and a JSON record-writer.
+* [pkg/climain](./pkg/climain) contains a layer which invokes `pkg/cli`, which was split out to avoid a Go package-import cycle.
+* [pkg/stream](./pkg/stream) is as above -- it uses Go channels to pipe together file-reads, to record-reading/parsing, to a chain of record-transformers, to record-writing/formatting, to terminal standard output.
+* [pkg/input](./pkg/input) is as above -- one record-reader type per supported input file format, and a factory method.
+* [pkg/output](./pkg/output) is as above -- one record-writer type per supported output file format, and a factory method.
+* [pkg/transformers](./pkg/transformers) contains the abstract record-transformer interface datatype, as well as the Go-channel chaining mechanism for piping one transformer into the next. It also contains all the concrete record-transformers such as `cat`, `tac`, `sort`, `put`, and so on.
+* [pkg/parsing](./pkg/parsing) contains a single source file, `mlr.bnf`, which is the lexical/semantic grammar file for the Miller `put`/`filter` DSL using the GOCC framework. All subdirectories of `pkg/parsing/` are autogen code created by GOCC's processing of `mlr.bnf`. If you need to edit `mlr.bnf`, please use [tools/build-dsl](./tools/build-dsl) to autogenerate Go code from it (using the GOCC tool). (This takes several minutes to run.)
+* [pkg/dsl](./pkg/dsl) contains [`ast_types.go`](pkg/dsl/ast_types.go) which is the abstract syntax tree datatype shared between GOCC and Miller. I didn't use a `pkg/dsl/ast` naming convention, although that would have been nice, in order to avoid a Go package-dependency cycle.
+* [pkg/dsl/cst](./pkg/dsl/cst) is the concrete syntax tree, constructed from an AST produced by GOCC. The CST is what is actually executed on every input record when you do things like `$z = $x * 0.3 * $y`. Please see the [pkg/dsl/cst/README.md](./pkg/dsl/cst/README.md) for more information.
 
 ## Nil-record conventions
 
@@ -153,7 +153,7 @@ nil through the reader/transformer/writer sequence.
 
 ## More about mlrvals
 
-[`Mlrval`](./internal/pkg/types/mlrval.go) is the datatype of record values, as well as expression/variable values in the Miller `put`/`filter` DSL. It includes string/int/float/boolean/void/absent/error types, not unlike PHP's `zval`.
+[`Mlrval`](./pkg/types/mlrval.go) is the datatype of record values, as well as expression/variable values in the Miller `put`/`filter` DSL. It includes string/int/float/boolean/void/absent/error types, not unlike PHP's `zval`.
 
 * Miller's `absent` type is like Javascript's `undefined` -- it's for times when there is no such key, as in a DSL expression `$out = $foo` when the input record is `$x=3,y=4` -- there is no `$foo` so `$foo` has `absent` type. Nothing is written to the `$out` field in this case. See also [here](https://miller.readthedocs.io/en/latest/reference-main-null-data) for more information.
 * Miller's `void` type is like Javascript's `null` -- it's for times when there is a key with no value, as in `$out = $x` when the input record is `$x=,$y=4`. This is an overlap with `string` type, since a void value looks like an empty string. I've gone back and forth on this (including when I was writing the C implementation) -- whether to retain `void` as a distinct type from empty-string, or not. I ended up keeping it as it made the `Mlrval` logic easier to understand.
@@ -161,7 +161,7 @@ nil through the reader/transformer/writer sequence.
 * Miller's number handling makes auto-overflow from int to float transparent, while preserving the possibility of 64-bit bitwise arithmetic.
   * This is different from JavaScript, which has only double-precision floats and thus no support for 64-bit numbers (note however that there is now [`BigInt`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/BigInt)).
   * This is also different from C and Go, wherein casts are necessary -- without which int arithmetic overflows.
-  * See also [here](https://miller.readthedocs.io/en/latest/reference-main-arithmetic) for the semantics of Miller arithmetic, which the [`Mlrval`](./internal/pkg/types/mlrval.go) class implements.
+  * See also [here](https://miller.readthedocs.io/en/latest/reference-main-arithmetic) for the semantics of Miller arithmetic, which the [`Mlrval`](./pkg/types/mlrval.go) class implements.
 
 ## Performance optimizations
 
diff --git a/README.md b/README.md
index c1a1977b4..0b9cd76e3 100644
--- a/README.md
+++ b/README.md
@@ -110,7 +110,7 @@ See also [building from source](https://miller.readthedocs.io/en/latest/build.ht
   * You can do `./configure --prefix=/some/install/path` before `make install` if you want to install somewhere other than `/usr/local`.
 * Without `make`:
   * To build: `go build github.com/johnkerl/miller/cmd/mlr`.
-  * To run tests: `go test github.com/johnkerl/miller/internal/pkg/...` and `mlr regtest`.
+  * To run tests: `go test github.com/johnkerl/miller/pkg/...` and `mlr regtest`.
   * To install: `go install github.com/johnkerl/miller/cmd/mlr` will install to _GOPATH_`/bin/mlr`.
 * See also the doc page on [building from source](https://miller.readthedocs.io/en/latest/build).
 * For more developer information please see [README-dev.md](./README-dev.md).
diff --git a/cmd/experiments/colors/main.go b/cmd/experiments/colors/main.go
index 305fa6062..5f5093eee 100644
--- a/cmd/experiments/colors/main.go
+++ b/cmd/experiments/colors/main.go
@@ -3,7 +3,7 @@ package main
 
 import (
 	"fmt"
-	"github.com/johnkerl/miller/internal/pkg/colorizer"
+	"github.com/johnkerl/miller/pkg/colorizer"
 )
 
 const boldString = "\u001b[1m"
diff --git a/cmd/mlr/main.go b/cmd/mlr/main.go
index 23ed5b5b6..3e37bdca1 100644
--- a/cmd/mlr/main.go
+++ b/cmd/mlr/main.go
@@ -11,7 +11,7 @@ import (
 	"strings"
 	"time"
 
-	"github.com/johnkerl/miller/internal/pkg/entrypoint"
+	"github.com/johnkerl/miller/pkg/entrypoint"
 	"github.com/pkg/profile" // for trace.out
 )
 
diff --git a/cmd/scan/main.go b/cmd/scan/main.go
index c185b8752..f93e0226e 100644
--- a/cmd/scan/main.go
+++ b/cmd/scan/main.go
@@ -8,7 +8,7 @@ import (
 	"fmt"
 	"os"
 
-	"github.com/johnkerl/miller/internal/pkg/scan"
+	"github.com/johnkerl/miller/pkg/scan"
 )
 
 func main() {
diff --git a/cmd/sizes/main.go b/cmd/sizes/main.go
index 82a4905b0..5ae6209cc 100644
--- a/cmd/sizes/main.go
+++ b/cmd/sizes/main.go
@@ -11,7 +11,7 @@ package main
 import (
 	"fmt"
 
-	"github.com/johnkerl/miller/internal/pkg/mlrval"
+	"github.com/johnkerl/miller/pkg/mlrval"
 )
 
 func main() {
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
index 5ea29ed16..287d929c7 100644
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -109,6 +109,7 @@ nav:
     - "Auxiliary commands": "reference-main-auxiliary-commands.md"
     - "Manual page": "manpage.md"
     - "Building from source": "build.md"
+    - "Miller as a library": "miller-as-library.md"
     - "How to create a new release": "how-to-release.md"
     - "Documents for previous releases": "release-docs.md"
     - "Glossary": "glossary.md"
diff --git a/docs/src/build.md b/docs/src/build.md
index e2a6c59f9..0b6787898 100644
--- a/docs/src/build.md
+++ b/docs/src/build.md
@@ -33,7 +33,7 @@ Two-clause BSD license [https://github.com/johnkerl/miller/blob/master/LICENSE.t
 * `make` creates the `./mlr` (or `.\mlr.exe` on Windows) executable
     * Without `make`: `go build github.com/johnkerl/miller/cmd/mlr`
 * `make check` runs tests
-    * Without `make`: `go test github.com/johnkerl/miller/internal/pkg/...` and `mlr regtest`
+    * Without `make`: `go test github.com/johnkerl/miller/pkg/...` and `mlr regtest`
 * `make install` installs the `mlr` executable and the `mlr` manpage
     * Without make: `go install github.com/johnkerl/miller/cmd/mlr` will install to _GOPATH_`/bin/mlr`
 
diff --git a/docs/src/build.md.in b/docs/src/build.md.in
index b01464d20..5138c9b8f 100644
--- a/docs/src/build.md.in
+++ b/docs/src/build.md.in
@@ -17,7 +17,7 @@ Two-clause BSD license [https://github.com/johnkerl/miller/blob/master/LICENSE.t
 * `make` creates the `./mlr` (or `.\mlr.exe` on Windows) executable
     * Without `make`: `go build github.com/johnkerl/miller/cmd/mlr`
 * `make check` runs tests
-    * Without `make`: `go test github.com/johnkerl/miller/internal/pkg/...` and `mlr regtest`
+    * Without `make`: `go test github.com/johnkerl/miller/pkg/...` and `mlr regtest`
 * `make install` installs the `mlr` executable and the `mlr` manpage
     * Without make: `go install github.com/johnkerl/miller/cmd/mlr` will install to _GOPATH_`/bin/mlr`
 
diff --git a/docs/src/how-to-release.md b/docs/src/how-to-release.md
index 4c7e97628..d8675cdbc 100644
--- a/docs/src/how-to-release.md
+++ b/docs/src/how-to-release.md
@@ -22,7 +22,7 @@ In this example I am using version 6.2.0 to 6.3.0; of course that will change fo
 
 * Update version found in `mlr --version` and `man mlr`:
 
-    * Edit `internal/pkg/version/version.go` from `6.2.0-dev` to `6.3.0`.
+    * Edit `pkg/version/version.go` from `6.2.0-dev` to `6.3.0`.
     * Edit `miller.spec`: `Version`, and `changelog` entry
     * Run `make dev` in the Miller repo base directory
     * The ordering in this makefile rule is important: the first build creates `mlr`; the second runs `mlr` to create `manpage.txt`; the third includes `manpage.txt` into one of its outputs.
@@ -69,6 +69,6 @@ In this example I am using version 6.2.0 to 6.3.0; of course that will change fo
 
 * Afterwork:
 
-    * Edit `internal/pkg/version/version.go` to change version from `6.3.0` to `6.3.0-dev`.
+    * Edit `pkg/version/version.go` to change version from `6.3.0` to `6.3.0-dev`.
     * `make dev`
     * Commit and push.
diff --git a/docs/src/how-to-release.md.in b/docs/src/how-to-release.md.in
index ad26704cf..2754a2bfd 100644
--- a/docs/src/how-to-release.md.in
+++ b/docs/src/how-to-release.md.in
@@ -6,7 +6,7 @@ In this example I am using version 6.2.0 to 6.3.0; of course that will change fo
 
 * Update version found in `mlr --version` and `man mlr`:
 
-    * Edit `internal/pkg/version/version.go` from `6.2.0-dev` to `6.3.0`.
+    * Edit `pkg/version/version.go` from `6.2.0-dev` to `6.3.0`.
     * Edit `miller.spec`: `Version`, and `changelog` entry
     * Run `make dev` in the Miller repo base directory
     * The ordering in this makefile rule is important: the first build creates `mlr`; the second runs `mlr` to create `manpage.txt`; the third includes `manpage.txt` into one of its outputs.
@@ -53,6 +53,6 @@ In this example I am using version 6.2.0 to 6.3.0; of course that will change fo
 
 * Afterwork:
 
-    * Edit `internal/pkg/version/version.go` to change version from `6.3.0` to `6.3.0-dev`.
+    * Edit `pkg/version/version.go` to change version from `6.3.0` to `6.3.0-dev`.
     * `make dev`
     * Commit and push.
diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 0d22cfdab..0037efeb2 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -3645,5 +3645,5 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-08-31                         MILLER(1)
+                                  2023-09-10                         MILLER(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index ccf519ced..3087f568f 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -3624,4 +3624,4 @@ MILLER(1) MILLER(1) - 2023-08-31 MILLER(1) + 2023-09-10 MILLER(1) diff --git a/docs/src/miller-as-library.md b/docs/src/miller-as-library.md new file mode 100644 index 000000000..49c381138 --- /dev/null +++ b/docs/src/miller-as-library.md @@ -0,0 +1,202 @@ + +
+ +Quick links: +  +Flags +  +Verbs +  +Functions +  +Glossary +  +Release docs + +
+# Miller as a library + +Very initially and experimentally, as of Miller 6.9.1, Go developers will be able to access Miller source +code --- moved from `internal/pkg/` to `pkg/` --- within their own Go projects. + +## Setup + +``` +$ mkdir use-mlr + +$ cd cd use-mlr + +$ go mod init github.com/johnkerl/miller-library-example +go: creating new go.mod: module github.com/johnkerl/miller-library-example + +# One of: +$ go get github.com/johnkerl/miller +$ go get github.com/johnkerl/miller@0f27a39a9f92d4c633dd29d99ad203e95a484dd3 +# Etc. + +$ go mod tidy +``` + +## One example use + +
+package main
+
+import (
+	"fmt"
+
+	"github.com/johnkerl/miller/pkg/bifs"
+	"github.com/johnkerl/miller/pkg/mlrval"
+)
+
+func main() {
+	a := mlrval.FromInt(2)
+	b := mlrval.FromInt(60)
+	c := bifs.BIF_pow(a, b)
+	fmt.Println(c.String())
+}
+
+ +``` +$ go build main1.go +$ ./main1 +1152921504606846976 +``` + +Or simply: +``` +$ go run main1.go +1152921504606846976 +``` + +## Another example use + +
+package main
+
+import (
+	"bufio"
+	"container/list"
+	"errors"
+	"fmt"
+	"os"
+
+	"github.com/johnkerl/miller/pkg/cli"
+	"github.com/johnkerl/miller/pkg/input"
+	"github.com/johnkerl/miller/pkg/output"
+	"github.com/johnkerl/miller/pkg/transformers"
+	"github.com/johnkerl/miller/pkg/types"
+)
+
+func convert_csv_to_json(fileNames []string) error {
+	options := &cli.TOptions{
+		ReaderOptions: cli.TReaderOptions{
+			InputFileFormat: "csv",
+			IFS:             ",",
+			IRS:             "\n",
+			RecordsPerBatch: 1,
+		},
+		WriterOptions: cli.TWriterOptions{
+			OutputFileFormat: "json",
+		},
+	}
+	outputStream := os.Stdout
+	outputIsStdout := true
+
+	// Since Go is concurrent, the context struct needs to be duplicated and
+	// passed through the channels along with each record.
+	initialContext := types.NewContext()
+
+	// Instantiate the record-reader.
+	// RecordsPerBatch is tracked separately from ReaderOptions since join/repl
+	// may use batch size of 1.
+	recordReader, err := input.Create(&options.ReaderOptions, options.ReaderOptions.RecordsPerBatch)
+	if err != nil {
+		return err
+	}
+
+	// Instantiate the record-writer
+	recordWriter, err := output.Create(&options.WriterOptions)
+	if err != nil {
+		return err
+	}
+
+	cat, err := transformers.NewTransformerCat(
+		false, // doCounters bool,
+		"",    // counterFieldName string,
+		nil,   // groupByFieldNames []string,
+		false, // doFileName bool,
+		false, // doFileNum bool,
+	)
+	if err != nil {
+		return err
+	}
+	recordTransformers := []transformers.IRecordTransformer{cat}
+
+	// Set up the reader-to-transformer and transformer-to-writer channels.
+	readerChannel := make(chan *list.List, 2) // list of *types.RecordAndContext
+	writerChannel := make(chan *list.List, 1) // list of *types.RecordAndContext
+
+	// We're done when a fatal error is registered on input (file not found,
+	// etc) or when the record-writer has written all its output. We use
+	// channels to communicate both of these conditions.
+	inputErrorChannel := make(chan error, 1)
+	doneWritingChannel := make(chan bool, 1)
+	dataProcessingErrorChannel := make(chan bool, 1)
+
+	readerDownstreamDoneChannel := make(chan bool, 1)
+
+	// Start the reader, transformer, and writer. Let them run until fatal input
+	// error or end-of-processing happens.
+	bufferedOutputStream := bufio.NewWriter(outputStream)
+
+	go recordReader.Read(fileNames, *initialContext, readerChannel, inputErrorChannel, readerDownstreamDoneChannel)
+	go transformers.ChainTransformer(readerChannel, readerDownstreamDoneChannel, recordTransformers,
+		writerChannel, options)
+	go output.ChannelWriter(writerChannel, recordWriter, &options.WriterOptions, doneWritingChannel,
+		dataProcessingErrorChannel, bufferedOutputStream, outputIsStdout)
+
+	var retval error
+	done := false
+	for !done {
+		select {
+		case ierr := <-inputErrorChannel:
+			retval = ierr
+			break
+		case _ = <-dataProcessingErrorChannel:
+			retval = errors.New("exiting due to data error") // details already printed
+			break
+		case _ = <-doneWritingChannel:
+			done = true
+			break
+		}
+	}
+
+	bufferedOutputStream.Flush()
+
+	return retval
+}
+
+func main() {
+	err := convert_csv_to_json(os.Args[1:])
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "%v\n", err)
+	}
+}
+
+ +
+host,status
+apoapsis.east.our.org,up
+nadir.west.our.org,down
+
+ +``` +$ go build main2.go +$ ./main2 data/hostnames.csv +{"host": "apoapsis.east.our.org", "status": "up"} +{"host": "nadir.west.our.org", "status": "down"} +``` + + + diff --git a/docs/src/miller-as-library.md.in b/docs/src/miller-as-library.md.in new file mode 100644 index 000000000..b7051165b --- /dev/null +++ b/docs/src/miller-as-library.md.in @@ -0,0 +1,54 @@ +# Miller as a library + +Very initially and experimentally, as of Miller 6.9.1, Go developers will be able to access Miller source +code --- moved from `internal/pkg/` to `pkg/` --- within their own Go projects. + +## Setup + +``` +$ mkdir use-mlr + +$ cd cd use-mlr + +$ go mod init github.com/johnkerl/miller-library-example +go: creating new go.mod: module github.com/johnkerl/miller-library-example + +# One of: +$ go get github.com/johnkerl/miller +$ go get github.com/johnkerl/miller@0f27a39a9f92d4c633dd29d99ad203e95a484dd3 +# Etc. + +$ go mod tidy +``` + +## One example use + +GENMD-INCLUDE-ESCAPED(miller-as-library/main1.go) + +``` +$ go build main1.go +$ ./main1 +1152921504606846976 +``` + +Or simply: +``` +$ go run main1.go +1152921504606846976 +``` + +## Another example use + +GENMD-INCLUDE-ESCAPED(miller-as-library/main2.go) + +GENMD-INCLUDE-ESCAPED(data/hostnames.csv) + +``` +$ go build main2.go +$ ./main2 data/hostnames.csv +{"host": "apoapsis.east.our.org", "status": "up"} +{"host": "nadir.west.our.org", "status": "down"} +``` + + + diff --git a/docs/src/miller-as-library/main1.go b/docs/src/miller-as-library/main1.go new file mode 100644 index 000000000..c56f5a0db --- /dev/null +++ b/docs/src/miller-as-library/main1.go @@ -0,0 +1,15 @@ +package main + +import ( + "fmt" + + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/mlrval" +) + +func main() { + a := mlrval.FromInt(2) + b := mlrval.FromInt(60) + c := bifs.BIF_pow(a, b) + fmt.Println(c.String()) +} diff --git a/docs/src/miller-as-library/main2.go b/docs/src/miller-as-library/main2.go new file mode 100644 index 000000000..07d4be50e --- /dev/null +++ b/docs/src/miller-as-library/main2.go @@ -0,0 +1,111 @@ +package main + +import ( + "bufio" + "container/list" + "errors" + "fmt" + "os" + + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/input" + "github.com/johnkerl/miller/pkg/output" + "github.com/johnkerl/miller/pkg/transformers" + "github.com/johnkerl/miller/pkg/types" +) + +func convert_csv_to_json(fileNames []string) error { + options := &cli.TOptions{ + ReaderOptions: cli.TReaderOptions{ + InputFileFormat: "csv", + IFS: ",", + IRS: "\n", + RecordsPerBatch: 1, + }, + WriterOptions: cli.TWriterOptions{ + OutputFileFormat: "json", + }, + } + outputStream := os.Stdout + outputIsStdout := true + + // Since Go is concurrent, the context struct needs to be duplicated and + // passed through the channels along with each record. + initialContext := types.NewContext() + + // Instantiate the record-reader. + // RecordsPerBatch is tracked separately from ReaderOptions since join/repl + // may use batch size of 1. + recordReader, err := input.Create(&options.ReaderOptions, options.ReaderOptions.RecordsPerBatch) + if err != nil { + return err + } + + // Instantiate the record-writer + recordWriter, err := output.Create(&options.WriterOptions) + if err != nil { + return err + } + + cat, err := transformers.NewTransformerCat( + false, // doCounters bool, + "", // counterFieldName string, + nil, // groupByFieldNames []string, + false, // doFileName bool, + false, // doFileNum bool, + ) + if err != nil { + return err + } + recordTransformers := []transformers.IRecordTransformer{cat} + + // Set up the reader-to-transformer and transformer-to-writer channels. + readerChannel := make(chan *list.List, 2) // list of *types.RecordAndContext + writerChannel := make(chan *list.List, 1) // list of *types.RecordAndContext + + // We're done when a fatal error is registered on input (file not found, + // etc) or when the record-writer has written all its output. We use + // channels to communicate both of these conditions. + inputErrorChannel := make(chan error, 1) + doneWritingChannel := make(chan bool, 1) + dataProcessingErrorChannel := make(chan bool, 1) + + readerDownstreamDoneChannel := make(chan bool, 1) + + // Start the reader, transformer, and writer. Let them run until fatal input + // error or end-of-processing happens. + bufferedOutputStream := bufio.NewWriter(outputStream) + + go recordReader.Read(fileNames, *initialContext, readerChannel, inputErrorChannel, readerDownstreamDoneChannel) + go transformers.ChainTransformer(readerChannel, readerDownstreamDoneChannel, recordTransformers, + writerChannel, options) + go output.ChannelWriter(writerChannel, recordWriter, &options.WriterOptions, doneWritingChannel, + dataProcessingErrorChannel, bufferedOutputStream, outputIsStdout) + + var retval error + done := false + for !done { + select { + case ierr := <-inputErrorChannel: + retval = ierr + break + case _ = <-dataProcessingErrorChannel: + retval = errors.New("exiting due to data error") // details already printed + break + case _ = <-doneWritingChannel: + done = true + break + } + } + + bufferedOutputStream.Flush() + + return retval +} + +func main() { + err := convert_csv_to_json(os.Args[1:]) + if err != nil { + fmt.Fprintf(os.Stderr, "%v\n", err) + } +} diff --git a/internal/pkg/cli/README.md b/internal/pkg/cli/README.md deleted file mode 100644 index eb6f48300..000000000 --- a/internal/pkg/cli/README.md +++ /dev/null @@ -1,4 +0,0 @@ -Datatypes for parsing the Miller command line, and the flags table. - -* `internal/pkg/climain` is the flag-parsing logic for supporting Miller's command-line interface. When you type something like `mlr --icsv --ojson put '$sum = $a + $b' then filter '$sum > 1000' myfile.csv`, it's the CLI parser which makes it possible for Miller to construct a CSV record-reader, a transformer chain of `put` then `filter`, and a JSON record-writer. -* `internal/pkg/cli` contains datatypes and the flags table for the CLI-parser, which was split out to avoid a Go package-import cycle. diff --git a/man/manpage.txt b/man/manpage.txt index ccf519ced..3087f568f 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -3624,4 +3624,4 @@ MILLER(1) MILLER(1) - 2023-08-31 MILLER(1) + 2023-09-10 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index b8794a352..a98daa906 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2023-08-31 +.\" Date: 2023-09-10 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2023-08-31" "\ \&" "\ \&" +.TH "MILLER" "1" "2023-09-10" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/internal/pkg/README.md b/pkg/README.md similarity index 100% rename from internal/pkg/README.md rename to pkg/README.md diff --git a/internal/pkg/auxents/auxents.go b/pkg/auxents/auxents.go similarity index 100% rename from internal/pkg/auxents/auxents.go rename to pkg/auxents/auxents.go diff --git a/internal/pkg/auxents/doc.go b/pkg/auxents/doc.go similarity index 100% rename from internal/pkg/auxents/doc.go rename to pkg/auxents/doc.go diff --git a/internal/pkg/auxents/hex.go b/pkg/auxents/hex.go similarity index 100% rename from internal/pkg/auxents/hex.go rename to pkg/auxents/hex.go diff --git a/internal/pkg/auxents/lecat.go b/pkg/auxents/lecat.go similarity index 100% rename from internal/pkg/auxents/lecat.go rename to pkg/auxents/lecat.go diff --git a/internal/pkg/auxents/termcvt.go b/pkg/auxents/termcvt.go similarity index 100% rename from internal/pkg/auxents/termcvt.go rename to pkg/auxents/termcvt.go diff --git a/internal/pkg/auxents/unhex.go b/pkg/auxents/unhex.go similarity index 100% rename from internal/pkg/auxents/unhex.go rename to pkg/auxents/unhex.go diff --git a/internal/pkg/bifs/arithmetic.go b/pkg/bifs/arithmetic.go similarity index 99% rename from internal/pkg/bifs/arithmetic.go rename to pkg/bifs/arithmetic.go index 7728270ec..f5a2b853e 100644 --- a/internal/pkg/bifs/arithmetic.go +++ b/pkg/bifs/arithmetic.go @@ -4,8 +4,8 @@ import ( "fmt" "math" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" ) // ================================================================ diff --git a/internal/pkg/bifs/arithmetic_test.go b/pkg/bifs/arithmetic_test.go similarity index 98% rename from internal/pkg/bifs/arithmetic_test.go rename to pkg/bifs/arithmetic_test.go index 2890c37e7..76efd45ea 100644 --- a/internal/pkg/bifs/arithmetic_test.go +++ b/pkg/bifs/arithmetic_test.go @@ -5,7 +5,7 @@ import ( "github.com/stretchr/testify/assert" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/mlrval" ) func TestBIF_plus_unary(t *testing.T) { diff --git a/internal/pkg/bifs/base.go b/pkg/bifs/base.go similarity index 98% rename from internal/pkg/bifs/base.go rename to pkg/bifs/base.go index c0bf0e810..28aa0d6bf 100644 --- a/internal/pkg/bifs/base.go +++ b/pkg/bifs/base.go @@ -50,9 +50,9 @@ package bifs import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // Function-pointer type for zary functions. diff --git a/internal/pkg/bifs/bits.go b/pkg/bifs/bits.go similarity index 99% rename from internal/pkg/bifs/bits.go rename to pkg/bifs/bits.go index 7fb786307..5ed8cc20e 100644 --- a/internal/pkg/bifs/bits.go +++ b/pkg/bifs/bits.go @@ -1,7 +1,7 @@ package bifs import ( - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/mlrval" ) // ================================================================ diff --git a/internal/pkg/bifs/bits_test.go b/pkg/bifs/bits_test.go similarity index 87% rename from internal/pkg/bifs/bits_test.go rename to pkg/bifs/bits_test.go index bc88698a1..96718e00d 100644 --- a/internal/pkg/bifs/bits_test.go +++ b/pkg/bifs/bits_test.go @@ -5,7 +5,7 @@ import ( "github.com/stretchr/testify/assert" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/mlrval" ) func TestBIF_bitcount(t *testing.T) { diff --git a/internal/pkg/bifs/booleans.go b/pkg/bifs/booleans.go similarity index 95% rename from internal/pkg/bifs/booleans.go rename to pkg/bifs/booleans.go index da77c6199..c0b3bc3db 100644 --- a/internal/pkg/bifs/booleans.go +++ b/pkg/bifs/booleans.go @@ -5,7 +5,7 @@ package bifs import ( - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/mlrval" ) func BIF_logical_NOT(input1 *mlrval.Mlrval) *mlrval.Mlrval { diff --git a/internal/pkg/bifs/cmp.go b/pkg/bifs/cmp.go similarity index 99% rename from internal/pkg/bifs/cmp.go rename to pkg/bifs/cmp.go index 7d31cb7ee..832feab57 100644 --- a/internal/pkg/bifs/cmp.go +++ b/pkg/bifs/cmp.go @@ -5,8 +5,8 @@ package bifs import ( - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" ) // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/internal/pkg/bifs/collections.go b/pkg/bifs/collections.go similarity index 99% rename from internal/pkg/bifs/collections.go rename to pkg/bifs/collections.go index 5278b5b6a..a734ee451 100644 --- a/internal/pkg/bifs/collections.go +++ b/pkg/bifs/collections.go @@ -5,8 +5,8 @@ import ( "strconv" "strings" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" ) // ================================================================ diff --git a/internal/pkg/bifs/collections_test.go b/pkg/bifs/collections_test.go similarity index 98% rename from internal/pkg/bifs/collections_test.go rename to pkg/bifs/collections_test.go index 417ae3534..16ffba8c6 100644 --- a/internal/pkg/bifs/collections_test.go +++ b/pkg/bifs/collections_test.go @@ -5,7 +5,7 @@ import ( "github.com/stretchr/testify/assert" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/mlrval" ) func TestBIF_length(t *testing.T) { diff --git a/internal/pkg/bifs/datetime.go b/pkg/bifs/datetime.go similarity index 99% rename from internal/pkg/bifs/datetime.go rename to pkg/bifs/datetime.go index 4126078b1..9fa11e6b0 100644 --- a/internal/pkg/bifs/datetime.go +++ b/pkg/bifs/datetime.go @@ -5,11 +5,11 @@ import ( "regexp" "time" - strptime "github.com/johnkerl/miller/internal/pkg/pbnjay-strptime" + strptime "github.com/johnkerl/miller/pkg/pbnjay-strptime" "github.com/lestrrat-go/strftime" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" ) const ISO8601_TIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ" diff --git a/internal/pkg/bifs/hashing.go b/pkg/bifs/hashing.go similarity index 95% rename from internal/pkg/bifs/hashing.go rename to pkg/bifs/hashing.go index 09552ab0f..e2d09d1e4 100644 --- a/internal/pkg/bifs/hashing.go +++ b/pkg/bifs/hashing.go @@ -7,7 +7,7 @@ import ( "crypto/sha512" "fmt" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/mlrval" ) func BIF_md5(input1 *mlrval.Mlrval) *mlrval.Mlrval { diff --git a/internal/pkg/bifs/hashing_test.go b/pkg/bifs/hashing_test.go similarity index 91% rename from internal/pkg/bifs/hashing_test.go rename to pkg/bifs/hashing_test.go index 3e736c0a0..6b44028be 100644 --- a/internal/pkg/bifs/hashing_test.go +++ b/pkg/bifs/hashing_test.go @@ -5,7 +5,7 @@ import ( "github.com/stretchr/testify/assert" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/mlrval" ) func TestBIF_md5(t *testing.T) { diff --git a/internal/pkg/bifs/mathlib.go b/pkg/bifs/mathlib.go similarity index 99% rename from internal/pkg/bifs/mathlib.go rename to pkg/bifs/mathlib.go index b18e0d4ed..b415cb809 100644 --- a/internal/pkg/bifs/mathlib.go +++ b/pkg/bifs/mathlib.go @@ -7,8 +7,8 @@ package bifs import ( "math" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/bifs/percentiles.go b/pkg/bifs/percentiles.go similarity index 99% rename from internal/pkg/bifs/percentiles.go rename to pkg/bifs/percentiles.go index 087e7f200..cecb98aec 100644 --- a/internal/pkg/bifs/percentiles.go +++ b/pkg/bifs/percentiles.go @@ -3,7 +3,7 @@ package bifs import ( "math" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/mlrval" ) func GetPercentileLinearlyInterpolated( diff --git a/internal/pkg/bifs/random.go b/pkg/bifs/random.go similarity index 94% rename from internal/pkg/bifs/random.go rename to pkg/bifs/random.go index 5562b4d30..c85509da6 100644 --- a/internal/pkg/bifs/random.go +++ b/pkg/bifs/random.go @@ -3,8 +3,8 @@ package bifs import ( "math" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" ) func BIF_urand() *mlrval.Mlrval { diff --git a/internal/pkg/bifs/regex.go b/pkg/bifs/regex.go similarity index 98% rename from internal/pkg/bifs/regex.go rename to pkg/bifs/regex.go index 72a6878b4..52cab9ac5 100644 --- a/internal/pkg/bifs/regex.go +++ b/pkg/bifs/regex.go @@ -3,8 +3,8 @@ package bifs import ( "strings" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" ) // BIF_ssub implements the ssub function -- no-frills string-replace, no diff --git a/internal/pkg/bifs/relative_time.go b/pkg/bifs/relative_time.go similarity index 99% rename from internal/pkg/bifs/relative_time.go rename to pkg/bifs/relative_time.go index d6d57b16a..f36258ffe 100644 --- a/internal/pkg/bifs/relative_time.go +++ b/pkg/bifs/relative_time.go @@ -5,7 +5,7 @@ import ( "math" "strings" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/mlrval" ) func BIF_dhms2sec(input1 *mlrval.Mlrval) *mlrval.Mlrval { diff --git a/internal/pkg/bifs/stats.go b/pkg/bifs/stats.go similarity index 99% rename from internal/pkg/bifs/stats.go rename to pkg/bifs/stats.go index c809f7167..ff3531a31 100644 --- a/internal/pkg/bifs/stats.go +++ b/pkg/bifs/stats.go @@ -4,8 +4,8 @@ import ( "math" "sort" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/bifs/stats_test.go b/pkg/bifs/stats_test.go similarity index 99% rename from internal/pkg/bifs/stats_test.go rename to pkg/bifs/stats_test.go index 3fc21c84a..735ceab83 100644 --- a/internal/pkg/bifs/stats_test.go +++ b/pkg/bifs/stats_test.go @@ -6,7 +6,7 @@ import ( "github.com/stretchr/testify/assert" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/mlrval" ) func stats_test_array(n int) *mlrval.Mlrval { diff --git a/internal/pkg/bifs/strings.go b/pkg/bifs/strings.go similarity index 99% rename from internal/pkg/bifs/strings.go rename to pkg/bifs/strings.go index 7ef8019a4..cd68ee480 100644 --- a/internal/pkg/bifs/strings.go +++ b/pkg/bifs/strings.go @@ -7,8 +7,8 @@ import ( "strconv" "strings" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" ) // ================================================================ diff --git a/internal/pkg/bifs/system.go b/pkg/bifs/system.go similarity index 93% rename from internal/pkg/bifs/system.go rename to pkg/bifs/system.go index a3ac73ad4..d56f0bb66 100644 --- a/internal/pkg/bifs/system.go +++ b/pkg/bifs/system.go @@ -6,9 +6,9 @@ import ( "runtime" "strings" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/platform" - "github.com/johnkerl/miller/internal/pkg/version" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/platform" + "github.com/johnkerl/miller/pkg/version" ) func BIF_version() *mlrval.Mlrval { diff --git a/internal/pkg/bifs/types.go b/pkg/bifs/types.go similarity index 98% rename from internal/pkg/bifs/types.go rename to pkg/bifs/types.go index b57e0dc84..87ee80448 100644 --- a/internal/pkg/bifs/types.go +++ b/pkg/bifs/types.go @@ -5,9 +5,9 @@ import ( "math" "os" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ================================================================ diff --git a/pkg/cli/README.md b/pkg/cli/README.md new file mode 100644 index 000000000..f08cc1aba --- /dev/null +++ b/pkg/cli/README.md @@ -0,0 +1,4 @@ +Datatypes for parsing the Miller command line, and the flags table. + +* `pkg/climain` is the flag-parsing logic for supporting Miller's command-line interface. When you type something like `mlr --icsv --ojson put '$sum = $a + $b' then filter '$sum > 1000' myfile.csv`, it's the CLI parser which makes it possible for Miller to construct a CSV record-reader, a transformer chain of `put` then `filter`, and a JSON record-writer. +* `pkg/cli` contains datatypes and the flags table for the CLI-parser, which was split out to avoid a Go package-import cycle. diff --git a/internal/pkg/cli/doc.go b/pkg/cli/doc.go similarity index 100% rename from internal/pkg/cli/doc.go rename to pkg/cli/doc.go diff --git a/internal/pkg/cli/flag_types.go b/pkg/cli/flag_types.go similarity index 99% rename from internal/pkg/cli/flag_types.go rename to pkg/cli/flag_types.go index bcb86e290..590487d43 100644 --- a/internal/pkg/cli/flag_types.go +++ b/pkg/cli/flag_types.go @@ -18,7 +18,7 @@ // o Autogenerating webdocs (mkdocs). // // * For these reasons, flags are organized into tables; for documentation -// purposes, flags are organized into sections (see internal/pkg/cli/option_parse.go). +// purposes, flags are organized into sections (see pkg/cli/option_parse.go). // // * The Flag struct separates out flag name (e.g. `--csv`), any alternate // names (e.g. `-c`), any arguments the flag may take, a help string, and a @@ -42,8 +42,8 @@ import ( "sort" "strings" - "github.com/johnkerl/miller/internal/pkg/colorizer" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/cli/flatten_unflatten.go b/pkg/cli/flatten_unflatten.go similarity index 100% rename from internal/pkg/cli/flatten_unflatten.go rename to pkg/cli/flatten_unflatten.go diff --git a/internal/pkg/cli/mlrcli_util.go b/pkg/cli/mlrcli_util.go similarity index 100% rename from internal/pkg/cli/mlrcli_util.go rename to pkg/cli/mlrcli_util.go diff --git a/internal/pkg/cli/option_parse.go b/pkg/cli/option_parse.go similarity index 99% rename from internal/pkg/cli/option_parse.go rename to pkg/cli/option_parse.go index 013deb582..7f838a096 100644 --- a/internal/pkg/cli/option_parse.go +++ b/pkg/cli/option_parse.go @@ -13,9 +13,9 @@ import ( "github.com/mattn/go-isatty" - "github.com/johnkerl/miller/internal/pkg/colorizer" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" ) // FinalizeReaderOptions does a few things. diff --git a/internal/pkg/cli/option_types.go b/pkg/cli/option_types.go similarity index 99% rename from internal/pkg/cli/option_types.go rename to pkg/cli/option_types.go index b70d4a2f7..d959e0c52 100644 --- a/internal/pkg/cli/option_types.go +++ b/pkg/cli/option_types.go @@ -9,7 +9,7 @@ package cli import ( "regexp" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/lib" ) type TCommentHandling int diff --git a/internal/pkg/cli/separators.go b/pkg/cli/separators.go similarity index 100% rename from internal/pkg/cli/separators.go rename to pkg/cli/separators.go diff --git a/internal/pkg/cli/verb_utils.go b/pkg/cli/verb_utils.go similarity index 98% rename from internal/pkg/cli/verb_utils.go rename to pkg/cli/verb_utils.go index 9dda91ebd..421af9af8 100644 --- a/internal/pkg/cli/verb_utils.go +++ b/pkg/cli/verb_utils.go @@ -9,7 +9,7 @@ import ( "os" "strconv" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/lib" ) // For flags with values, e.g. ["-n" "10"], while we're looking at the "-n" this let us see if the "10" slot exists. diff --git a/internal/pkg/climain/README.md b/pkg/climain/README.md similarity index 51% rename from internal/pkg/climain/README.md rename to pkg/climain/README.md index a1b380667..1cd6b186d 100644 --- a/internal/pkg/climain/README.md +++ b/pkg/climain/README.md @@ -1,5 +1,5 @@ Logic for parsing the Miller command line. -* `internal/pkg/climain` is the flag-parsing logic for supporting Miller's command-line interface. When you type something like `mlr --icsv --ojson put '$sum = $a + $b' then filter '$sum > 1000' myfile.csv`, it's the CLI parser which makes it possible for Miller to construct a CSV record-reader, a transformer chain of `put` then `filter`, and a JSON record-writer. -* `internal/pkg/cli` contains datatypes for the CLI-parser, which was split out to avoid a Go package-import cycle. +* `pkg/climain` is the flag-parsing logic for supporting Miller's command-line interface. When you type something like `mlr --icsv --ojson put '$sum = $a + $b' then filter '$sum > 1000' myfile.csv`, it's the CLI parser which makes it possible for Miller to construct a CSV record-reader, a transformer chain of `put` then `filter`, and a JSON record-writer. +* `pkg/cli` contains datatypes for the CLI-parser, which was split out to avoid a Go package-import cycle. * I don't use the Go [`flag`](https://golang.org/pkg/flag/) package. The `flag` package is quite fine; Miller's command-line processing is multi-purpose between serving CLI needs per se as well as for manpage/docfile generation, and I found it simplest to roll my own command-line handling here. More importantly, some Miller verbs such as ``sort`` take flags more than once -- ``mlr sort -f field1 -n field2 -f field3`` -- which is not supported by the `flag` package. diff --git a/internal/pkg/climain/doc.go b/pkg/climain/doc.go similarity index 100% rename from internal/pkg/climain/doc.go rename to pkg/climain/doc.go diff --git a/internal/pkg/climain/mlrcli_mlrrc.go b/pkg/climain/mlrcli_mlrrc.go similarity index 98% rename from internal/pkg/climain/mlrcli_mlrrc.go rename to pkg/climain/mlrcli_mlrrc.go index beb4aa45c..d3c5c1401 100644 --- a/internal/pkg/climain/mlrcli_mlrrc.go +++ b/pkg/climain/mlrcli_mlrrc.go @@ -8,7 +8,7 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" + "github.com/johnkerl/miller/pkg/cli" ) // loadMlrrcOrDie rule: If $MLRRC is set, use it and only it. Otherwise try diff --git a/internal/pkg/climain/mlrcli_parse.go b/pkg/climain/mlrcli_parse.go similarity index 97% rename from internal/pkg/climain/mlrcli_parse.go rename to pkg/climain/mlrcli_parse.go index e68b5902e..9e8679eef 100644 --- a/internal/pkg/climain/mlrcli_parse.go +++ b/pkg/climain/mlrcli_parse.go @@ -74,13 +74,13 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/terminals" - "github.com/johnkerl/miller/internal/pkg/terminals/help" - "github.com/johnkerl/miller/internal/pkg/transformers" - "github.com/johnkerl/miller/internal/pkg/version" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/terminals" + "github.com/johnkerl/miller/pkg/terminals/help" + "github.com/johnkerl/miller/pkg/transformers" + "github.com/johnkerl/miller/pkg/version" ) // ParseCommandLine is the entrypoint for handling the Miller command line: diff --git a/internal/pkg/climain/mlrcli_shebang.go b/pkg/climain/mlrcli_shebang.go similarity index 98% rename from internal/pkg/climain/mlrcli_shebang.go rename to pkg/climain/mlrcli_shebang.go index e465ed2b7..b05643bce 100644 --- a/internal/pkg/climain/mlrcli_shebang.go +++ b/pkg/climain/mlrcli_shebang.go @@ -6,7 +6,7 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/lib" shellquote "github.com/kballard/go-shellquote" ) diff --git a/internal/pkg/colorizer/README.md b/pkg/colorizer/README.md similarity index 58% rename from internal/pkg/colorizer/README.md rename to pkg/colorizer/README.md index 8ac92b0fe..b507e1936 100644 --- a/internal/pkg/colorizer/README.md +++ b/pkg/colorizer/README.md @@ -1,4 +1,4 @@ ANSI colors for key/value color highlighting, test pass/fail, etc. -Not placed in the internal/pkg/platform directory since these don't check the build-for +Not placed in the pkg/platform directory since these don't check the build-for platform but rather simply the TERM environment variable. diff --git a/internal/pkg/colorizer/colorizer.go b/pkg/colorizer/colorizer.go similarity index 100% rename from internal/pkg/colorizer/colorizer.go rename to pkg/colorizer/colorizer.go diff --git a/internal/pkg/colorizer/doc.go b/pkg/colorizer/doc.go similarity index 100% rename from internal/pkg/colorizer/doc.go rename to pkg/colorizer/doc.go diff --git a/internal/pkg/dsl/README.md b/pkg/dsl/README.md similarity index 94% rename from internal/pkg/dsl/README.md rename to pkg/dsl/README.md index 53ff79eb3..923f413db 100644 --- a/internal/pkg/dsl/README.md +++ b/pkg/dsl/README.md @@ -97,5 +97,5 @@ tree is executed once on every data record. # Source directories/files -* The AST logic is in `./ast*.go`. I didn't use a `internal/pkg/dsl/ast` naming convention, although that would have been nice, in order to avoid a Go package-dependency cycle. +* The AST logic is in `./ast*.go`. I didn't use a `pkg/dsl/ast` naming convention, although that would have been nice, in order to avoid a Go package-dependency cycle. * The CST logic is in [`./cst`](./cst). Please see [cst/README.md](./cst/README.md) for more information. diff --git a/internal/pkg/dsl/ast_build.go b/pkg/dsl/ast_build.go similarity index 98% rename from internal/pkg/dsl/ast_build.go rename to pkg/dsl/ast_build.go index 47f3a583d..f417998f6 100644 --- a/internal/pkg/dsl/ast_build.go +++ b/pkg/dsl/ast_build.go @@ -8,8 +8,8 @@ package dsl import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/parsing/token" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/parsing/token" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/ast_print.go b/pkg/dsl/ast_print.go similarity index 100% rename from internal/pkg/dsl/ast_print.go rename to pkg/dsl/ast_print.go diff --git a/internal/pkg/dsl/ast_types.go b/pkg/dsl/ast_types.go similarity index 99% rename from internal/pkg/dsl/ast_types.go rename to pkg/dsl/ast_types.go index 825b338e9..6856ce830 100644 --- a/internal/pkg/dsl/ast_types.go +++ b/pkg/dsl/ast_types.go @@ -5,7 +5,7 @@ package dsl import ( - "github.com/johnkerl/miller/internal/pkg/parsing/token" + "github.com/johnkerl/miller/pkg/parsing/token" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/README.md b/pkg/dsl/cst/README.md similarity index 93% rename from internal/pkg/dsl/cst/README.md rename to pkg/dsl/cst/README.md index 02dc6f73a..d1936faf9 100644 --- a/internal/pkg/dsl/cst/README.md +++ b/pkg/dsl/cst/README.md @@ -11,7 +11,7 @@ See [../dsl/README.md](../README.md) for more information about Miller's use of Go is a strongly typed language, but the AST is polymorphic. This results in if/else or switch statements as an AST is walked. -Also, when we modify code, there can be changes in the [BNF grammar](../../parsing/mlr.bnf) not yet reflected in the [AST](../../internal/pkg/dsl/ast_types.go). Likewise, there can be AST changes not yet reflected here. (Example: you are partway through adding a new binary operator to the grammar.) +Also, when we modify code, there can be changes in the [BNF grammar](../../parsing/mlr.bnf) not yet reflected in the [AST](../../pkg/dsl/ast_types.go). Likewise, there can be AST changes not yet reflected here. (Example: you are partway through adding a new binary operator to the grammar.) As a result, throughout the code, there are error checks which may seem redundant but which are in place to make incremental development more pleasant and robust. diff --git a/internal/pkg/dsl/cst/assignments.go b/pkg/dsl/cst/assignments.go similarity index 94% rename from internal/pkg/dsl/cst/assignments.go rename to pkg/dsl/cst/assignments.go index 41540ed20..129ec850c 100644 --- a/internal/pkg/dsl/cst/assignments.go +++ b/pkg/dsl/cst/assignments.go @@ -5,9 +5,9 @@ package cst import ( - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/runtime" ) // ================================================================ diff --git a/internal/pkg/dsl/cst/block_exit.go b/pkg/dsl/cst/block_exit.go similarity index 94% rename from internal/pkg/dsl/cst/block_exit.go rename to pkg/dsl/cst/block_exit.go index 3a912a990..b52b363cb 100644 --- a/internal/pkg/dsl/cst/block_exit.go +++ b/pkg/dsl/cst/block_exit.go @@ -8,9 +8,9 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/blocks.go b/pkg/dsl/cst/blocks.go similarity index 96% rename from internal/pkg/dsl/cst/blocks.go rename to pkg/dsl/cst/blocks.go index 6eb805b11..d51c70d75 100644 --- a/internal/pkg/dsl/cst/blocks.go +++ b/pkg/dsl/cst/blocks.go @@ -6,9 +6,9 @@ package cst import ( - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/builtin_function_manager.go b/pkg/dsl/cst/builtin_function_manager.go similarity index 99% rename from internal/pkg/dsl/cst/builtin_function_manager.go rename to pkg/dsl/cst/builtin_function_manager.go index e9154dc60..7fbf60f3d 100644 --- a/internal/pkg/dsl/cst/builtin_function_manager.go +++ b/pkg/dsl/cst/builtin_function_manager.go @@ -19,9 +19,9 @@ import ( "sort" "strings" - "github.com/johnkerl/miller/internal/pkg/bifs" - "github.com/johnkerl/miller/internal/pkg/colorizer" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/pkg/lib" ) type TFunctionClass string diff --git a/internal/pkg/dsl/cst/builtin_functions.go b/pkg/dsl/cst/builtin_functions.go similarity index 99% rename from internal/pkg/dsl/cst/builtin_functions.go rename to pkg/dsl/cst/builtin_functions.go index dfc1b4596..26f1dacce 100644 --- a/internal/pkg/dsl/cst/builtin_functions.go +++ b/pkg/dsl/cst/builtin_functions.go @@ -7,11 +7,11 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/bifs" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/collections.go b/pkg/dsl/cst/collections.go similarity index 98% rename from internal/pkg/dsl/cst/collections.go rename to pkg/dsl/cst/collections.go index 66d63788f..1dcee4daf 100644 --- a/internal/pkg/dsl/cst/collections.go +++ b/pkg/dsl/cst/collections.go @@ -8,11 +8,11 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/bifs" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/cond.go b/pkg/dsl/cst/cond.go similarity index 88% rename from internal/pkg/dsl/cst/cond.go rename to pkg/dsl/cst/cond.go index cd5f0c128..f7f0063e3 100644 --- a/internal/pkg/dsl/cst/cond.go +++ b/pkg/dsl/cst/cond.go @@ -8,11 +8,11 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/parsing/token" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/parsing/token" + "github.com/johnkerl/miller/pkg/runtime" ) type CondBlockNode struct { diff --git a/internal/pkg/dsl/cst/doc.go b/pkg/dsl/cst/doc.go similarity index 100% rename from internal/pkg/dsl/cst/doc.go rename to pkg/dsl/cst/doc.go diff --git a/internal/pkg/dsl/cst/dump.go b/pkg/dsl/cst/dump.go similarity index 96% rename from internal/pkg/dsl/cst/dump.go rename to pkg/dsl/cst/dump.go index a53e43a1b..14070527d 100644 --- a/internal/pkg/dsl/cst/dump.go +++ b/pkg/dsl/cst/dump.go @@ -21,11 +21,11 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/output" - "github.com/johnkerl/miller/internal/pkg/runtime" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/output" + "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/pkg/types" ) // ================================================================ diff --git a/internal/pkg/dsl/cst/emit1.go b/pkg/dsl/cst/emit1.go similarity index 92% rename from internal/pkg/dsl/cst/emit1.go rename to pkg/dsl/cst/emit1.go index 5a43105c7..a4996e312 100644 --- a/internal/pkg/dsl/cst/emit1.go +++ b/pkg/dsl/cst/emit1.go @@ -22,10 +22,10 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/runtime" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/pkg/types" ) type Emit1StatementNode struct { diff --git a/internal/pkg/dsl/cst/emit_emitp.go b/pkg/dsl/cst/emit_emitp.go similarity index 98% rename from internal/pkg/dsl/cst/emit_emitp.go rename to pkg/dsl/cst/emit_emitp.go index 43e87243f..3552f023d 100644 --- a/internal/pkg/dsl/cst/emit_emitp.go +++ b/pkg/dsl/cst/emit_emitp.go @@ -41,13 +41,13 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/output" - "github.com/johnkerl/miller/internal/pkg/runtime" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/output" + "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/pkg/types" ) // ================================================================ diff --git a/internal/pkg/dsl/cst/emitf.go b/pkg/dsl/cst/emitf.go similarity index 95% rename from internal/pkg/dsl/cst/emitf.go rename to pkg/dsl/cst/emitf.go index 213149e86..97aebfe98 100644 --- a/internal/pkg/dsl/cst/emitf.go +++ b/pkg/dsl/cst/emitf.go @@ -8,12 +8,12 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/output" - "github.com/johnkerl/miller/internal/pkg/runtime" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/output" + "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/pkg/types" ) // ================================================================ diff --git a/internal/pkg/dsl/cst/env.go b/pkg/dsl/cst/env.go similarity index 87% rename from internal/pkg/dsl/cst/env.go rename to pkg/dsl/cst/env.go index 73e37c343..c2f038f2d 100644 --- a/internal/pkg/dsl/cst/env.go +++ b/pkg/dsl/cst/env.go @@ -10,10 +10,10 @@ package cst import ( "os" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/runtime" ) type EnvironmentVariableNode struct { diff --git a/internal/pkg/dsl/cst/evaluable.go b/pkg/dsl/cst/evaluable.go similarity index 96% rename from internal/pkg/dsl/cst/evaluable.go rename to pkg/dsl/cst/evaluable.go index d46cab5b1..9440537ba 100644 --- a/internal/pkg/dsl/cst/evaluable.go +++ b/pkg/dsl/cst/evaluable.go @@ -10,10 +10,10 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/filter.go b/pkg/dsl/cst/filter.go similarity index 95% rename from internal/pkg/dsl/cst/filter.go rename to pkg/dsl/cst/filter.go index dfab5ceee..4a4d3984e 100644 --- a/internal/pkg/dsl/cst/filter.go +++ b/pkg/dsl/cst/filter.go @@ -19,9 +19,9 @@ package cst import ( - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/for.go b/pkg/dsl/cst/for.go similarity index 99% rename from internal/pkg/dsl/cst/for.go rename to pkg/dsl/cst/for.go index 3e9b7e309..34d6b808d 100644 --- a/internal/pkg/dsl/cst/for.go +++ b/pkg/dsl/cst/for.go @@ -7,11 +7,11 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/parsing/token" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/parsing/token" + "github.com/johnkerl/miller/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/functions.go b/pkg/dsl/cst/functions.go similarity index 97% rename from internal/pkg/dsl/cst/functions.go rename to pkg/dsl/cst/functions.go index d601a0644..5aca6d397 100644 --- a/internal/pkg/dsl/cst/functions.go +++ b/pkg/dsl/cst/functions.go @@ -9,8 +9,8 @@ package cst import ( - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/hofs.go b/pkg/dsl/cst/hofs.go similarity index 99% rename from internal/pkg/dsl/cst/hofs.go rename to pkg/dsl/cst/hofs.go index 9257d7f46..87edc01b2 100644 --- a/internal/pkg/dsl/cst/hofs.go +++ b/pkg/dsl/cst/hofs.go @@ -14,12 +14,12 @@ import ( "github.com/facette/natsort" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/runtime" ) -// Most function types are in the github.com/johnkerl/miller/internal/pkg/types package. These types, though, +// Most function types are in the github.com/johnkerl/miller/pkg/types package. These types, though, // include functions which need to access CST state in order to call back to // user-defined functions. To avoid a package-cycle dependency, they are // defined here. diff --git a/internal/pkg/dsl/cst/if.go b/pkg/dsl/cst/if.go similarity index 94% rename from internal/pkg/dsl/cst/if.go rename to pkg/dsl/cst/if.go index 40da46c7a..b947c7f6e 100644 --- a/internal/pkg/dsl/cst/if.go +++ b/pkg/dsl/cst/if.go @@ -7,11 +7,11 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/parsing/token" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/parsing/token" + "github.com/johnkerl/miller/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/keyword_usage.go b/pkg/dsl/cst/keyword_usage.go similarity index 99% rename from internal/pkg/dsl/cst/keyword_usage.go rename to pkg/dsl/cst/keyword_usage.go index 23340ccf8..c5bec8f7a 100644 --- a/internal/pkg/dsl/cst/keyword_usage.go +++ b/pkg/dsl/cst/keyword_usage.go @@ -4,8 +4,8 @@ import ( "fmt" "strings" - "github.com/johnkerl/miller/internal/pkg/colorizer" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/leaves.go b/pkg/dsl/cst/leaves.go similarity index 98% rename from internal/pkg/dsl/cst/leaves.go rename to pkg/dsl/cst/leaves.go index 41b9cef3a..382525e52 100644 --- a/internal/pkg/dsl/cst/leaves.go +++ b/pkg/dsl/cst/leaves.go @@ -8,10 +8,10 @@ import ( "fmt" "math" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/lvalues.go b/pkg/dsl/cst/lvalues.go similarity index 99% rename from internal/pkg/dsl/cst/lvalues.go rename to pkg/dsl/cst/lvalues.go index 6be69e88b..073c5d991 100644 --- a/internal/pkg/dsl/cst/lvalues.go +++ b/pkg/dsl/cst/lvalues.go @@ -9,10 +9,10 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/print.go b/pkg/dsl/cst/print.go similarity index 97% rename from internal/pkg/dsl/cst/print.go rename to pkg/dsl/cst/print.go index a7318c398..8c68593be 100644 --- a/internal/pkg/dsl/cst/print.go +++ b/pkg/dsl/cst/print.go @@ -9,11 +9,11 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/output" - "github.com/johnkerl/miller/internal/pkg/runtime" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/output" + "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/root.go b/pkg/dsl/cst/root.go similarity index 97% rename from internal/pkg/dsl/cst/root.go rename to pkg/dsl/cst/root.go index 979c82ad1..f48ebc330 100644 --- a/internal/pkg/dsl/cst/root.go +++ b/pkg/dsl/cst/root.go @@ -11,13 +11,13 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/output" - "github.com/johnkerl/miller/internal/pkg/parsing/lexer" - "github.com/johnkerl/miller/internal/pkg/parsing/parser" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/output" + "github.com/johnkerl/miller/pkg/parsing/lexer" + "github.com/johnkerl/miller/pkg/parsing/parser" + "github.com/johnkerl/miller/pkg/runtime" ) // NewEmptyRoot sets up an empty CST, before ingesting any DSL strings. For diff --git a/internal/pkg/dsl/cst/signature.go b/pkg/dsl/cst/signature.go similarity index 95% rename from internal/pkg/dsl/cst/signature.go rename to pkg/dsl/cst/signature.go index aeac8e80a..1ee554763 100644 --- a/internal/pkg/dsl/cst/signature.go +++ b/pkg/dsl/cst/signature.go @@ -6,7 +6,7 @@ package cst import ( - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/statements.go b/pkg/dsl/cst/statements.go similarity index 98% rename from internal/pkg/dsl/cst/statements.go rename to pkg/dsl/cst/statements.go index 1ea5ca026..ce42cb854 100644 --- a/internal/pkg/dsl/cst/statements.go +++ b/pkg/dsl/cst/statements.go @@ -8,7 +8,7 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/dsl" + "github.com/johnkerl/miller/pkg/dsl" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/subroutines.go b/pkg/dsl/cst/subroutines.go similarity index 96% rename from internal/pkg/dsl/cst/subroutines.go rename to pkg/dsl/cst/subroutines.go index b91326a81..6c1b76dae 100644 --- a/internal/pkg/dsl/cst/subroutines.go +++ b/pkg/dsl/cst/subroutines.go @@ -9,8 +9,8 @@ package cst import ( - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/tee.go b/pkg/dsl/cst/tee.go similarity index 94% rename from internal/pkg/dsl/cst/tee.go rename to pkg/dsl/cst/tee.go index b76ac4320..df9b8ff0b 100644 --- a/internal/pkg/dsl/cst/tee.go +++ b/pkg/dsl/cst/tee.go @@ -7,12 +7,12 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/output" - "github.com/johnkerl/miller/internal/pkg/runtime" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/output" + "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/types.go b/pkg/dsl/cst/types.go similarity index 95% rename from internal/pkg/dsl/cst/types.go rename to pkg/dsl/cst/types.go index e9a606afd..4fa935edf 100644 --- a/internal/pkg/dsl/cst/types.go +++ b/pkg/dsl/cst/types.go @@ -7,10 +7,10 @@ package cst import ( "container/list" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/udf.go b/pkg/dsl/cst/udf.go similarity index 98% rename from internal/pkg/dsl/cst/udf.go rename to pkg/dsl/cst/udf.go index 83c1a5b09..9be4bf59c 100644 --- a/internal/pkg/dsl/cst/udf.go +++ b/pkg/dsl/cst/udf.go @@ -8,11 +8,11 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/runtime" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/uds.go b/pkg/dsl/cst/uds.go similarity index 97% rename from internal/pkg/dsl/cst/uds.go rename to pkg/dsl/cst/uds.go index 944db7a11..3a72e4c23 100644 --- a/internal/pkg/dsl/cst/uds.go +++ b/pkg/dsl/cst/uds.go @@ -7,11 +7,11 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/runtime" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/validate.go b/pkg/dsl/cst/validate.go similarity index 98% rename from internal/pkg/dsl/cst/validate.go rename to pkg/dsl/cst/validate.go index e5e4746ef..305c1bf7f 100644 --- a/internal/pkg/dsl/cst/validate.go +++ b/pkg/dsl/cst/validate.go @@ -9,8 +9,8 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/warn.go b/pkg/dsl/cst/warn.go similarity index 98% rename from internal/pkg/dsl/cst/warn.go rename to pkg/dsl/cst/warn.go index a66874ee9..55850c8b1 100644 --- a/internal/pkg/dsl/cst/warn.go +++ b/pkg/dsl/cst/warn.go @@ -11,8 +11,8 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/dsl/cst/while.go b/pkg/dsl/cst/while.go similarity index 95% rename from internal/pkg/dsl/cst/while.go rename to pkg/dsl/cst/while.go index 538c1f153..4e088df6c 100644 --- a/internal/pkg/dsl/cst/while.go +++ b/pkg/dsl/cst/while.go @@ -7,10 +7,10 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/parsing/token" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/parsing/token" + "github.com/johnkerl/miller/pkg/runtime" ) // ================================================================ diff --git a/internal/pkg/dsl/doc.go b/pkg/dsl/doc.go similarity index 100% rename from internal/pkg/dsl/doc.go rename to pkg/dsl/doc.go diff --git a/internal/pkg/dsl/token.go b/pkg/dsl/token.go similarity index 87% rename from internal/pkg/dsl/token.go rename to pkg/dsl/token.go index 1cf624d4e..ff79d26e0 100644 --- a/internal/pkg/dsl/token.go +++ b/pkg/dsl/token.go @@ -3,7 +3,7 @@ package dsl import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/parsing/token" + "github.com/johnkerl/miller/pkg/parsing/token" ) // TokenToLocationInfo is used to track runtime errors back to source-code locations in DSL diff --git a/internal/pkg/entrypoint/README.md b/pkg/entrypoint/README.md similarity index 100% rename from internal/pkg/entrypoint/README.md rename to pkg/entrypoint/README.md diff --git a/internal/pkg/entrypoint/doc.go b/pkg/entrypoint/doc.go similarity index 100% rename from internal/pkg/entrypoint/doc.go rename to pkg/entrypoint/doc.go diff --git a/internal/pkg/entrypoint/entrypoint.go b/pkg/entrypoint/entrypoint.go similarity index 94% rename from internal/pkg/entrypoint/entrypoint.go rename to pkg/entrypoint/entrypoint.go index f48a8f10b..7f7fab711 100644 --- a/internal/pkg/entrypoint/entrypoint.go +++ b/pkg/entrypoint/entrypoint.go @@ -11,13 +11,13 @@ import ( "os" "path" - "github.com/johnkerl/miller/internal/pkg/auxents" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/climain" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/platform" - "github.com/johnkerl/miller/internal/pkg/stream" - "github.com/johnkerl/miller/internal/pkg/transformers" + "github.com/johnkerl/miller/pkg/auxents" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/climain" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/platform" + "github.com/johnkerl/miller/pkg/stream" + "github.com/johnkerl/miller/pkg/transformers" ) type MainReturn struct { diff --git a/internal/pkg/go-csv/LICENSE b/pkg/go-csv/LICENSE similarity index 100% rename from internal/pkg/go-csv/LICENSE rename to pkg/go-csv/LICENSE diff --git a/internal/pkg/go-csv/README.md b/pkg/go-csv/README.md similarity index 100% rename from internal/pkg/go-csv/README.md rename to pkg/go-csv/README.md diff --git a/internal/pkg/go-csv/csv_reader.go b/pkg/go-csv/csv_reader.go similarity index 100% rename from internal/pkg/go-csv/csv_reader.go rename to pkg/go-csv/csv_reader.go diff --git a/internal/pkg/go-csv/csv_writer.go b/pkg/go-csv/csv_writer.go similarity index 100% rename from internal/pkg/go-csv/csv_writer.go rename to pkg/go-csv/csv_writer.go diff --git a/internal/pkg/input/README.md b/pkg/input/README.md similarity index 100% rename from internal/pkg/input/README.md rename to pkg/input/README.md diff --git a/internal/pkg/input/doc.go b/pkg/input/doc.go similarity index 100% rename from internal/pkg/input/doc.go rename to pkg/input/doc.go diff --git a/internal/pkg/input/pseudo_reader_gen.go b/pkg/input/pseudo_reader_gen.go similarity index 96% rename from internal/pkg/input/pseudo_reader_gen.go rename to pkg/input/pseudo_reader_gen.go index 2808ce451..6479cb4d7 100644 --- a/internal/pkg/input/pseudo_reader_gen.go +++ b/pkg/input/pseudo_reader_gen.go @@ -4,10 +4,10 @@ import ( "container/list" "fmt" - "github.com/johnkerl/miller/internal/pkg/bifs" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) type PseudoReaderGen struct { diff --git a/internal/pkg/input/record_reader.go b/pkg/input/record_reader.go similarity index 96% rename from internal/pkg/input/record_reader.go rename to pkg/input/record_reader.go index b56a25ceb..280201936 100644 --- a/internal/pkg/input/record_reader.go +++ b/pkg/input/record_reader.go @@ -10,9 +10,9 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/types" ) const CSV_BOM = "\xef\xbb\xbf" diff --git a/internal/pkg/input/record_reader_benchmark_test.go b/pkg/input/record_reader_benchmark_test.go similarity index 92% rename from internal/pkg/input/record_reader_benchmark_test.go rename to pkg/input/record_reader_benchmark_test.go index 93ce89857..9d2352983 100644 --- a/internal/pkg/input/record_reader_benchmark_test.go +++ b/pkg/input/record_reader_benchmark_test.go @@ -5,10 +5,10 @@ import ( "github.com/stretchr/testify/assert" - "github.com/johnkerl/miller/internal/pkg/cli" + "github.com/johnkerl/miller/pkg/cli" ) -// go test -run=nonesuch -bench=. github.com/johnkerl/miller/internal/pkg/input/... +// go test -run=nonesuch -bench=. github.com/johnkerl/miller/pkg/input/... func BenchmarkDKVPParse(b *testing.B) { readerOptions := &cli.TReaderOptions{ diff --git a/internal/pkg/input/record_reader_csv.go b/pkg/input/record_reader_csv.go similarity index 97% rename from internal/pkg/input/record_reader_csv.go rename to pkg/input/record_reader_csv.go index fc1da8dc7..b2c2f1853 100644 --- a/internal/pkg/input/record_reader_csv.go +++ b/pkg/input/record_reader_csv.go @@ -8,12 +8,12 @@ import ( "strconv" "strings" - csv "github.com/johnkerl/miller/internal/pkg/go-csv" + csv "github.com/johnkerl/miller/pkg/go-csv" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/input/record_reader_csvlite.go b/pkg/input/record_reader_csvlite.go similarity index 98% rename from internal/pkg/input/record_reader_csvlite.go rename to pkg/input/record_reader_csvlite.go index 630f78657..222064358 100644 --- a/internal/pkg/input/record_reader_csvlite.go +++ b/pkg/input/record_reader_csvlite.go @@ -25,10 +25,10 @@ import ( "strconv" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // recordBatchGetterCSV points to either an explicit-CSV-header or diff --git a/internal/pkg/input/record_reader_dkvp_nidx.go b/pkg/input/record_reader_dkvp_nidx.go similarity index 96% rename from internal/pkg/input/record_reader_dkvp_nidx.go rename to pkg/input/record_reader_dkvp_nidx.go index d7f83dd76..5cd92f77d 100644 --- a/internal/pkg/input/record_reader_dkvp_nidx.go +++ b/pkg/input/record_reader_dkvp_nidx.go @@ -8,10 +8,10 @@ import ( "strconv" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // splitter_DKVP_NIDX is a function type for the one bit of code differing diff --git a/internal/pkg/input/record_reader_dkvp_test.go b/pkg/input/record_reader_dkvp_test.go similarity index 97% rename from internal/pkg/input/record_reader_dkvp_test.go rename to pkg/input/record_reader_dkvp_test.go index 101cfc83d..77e0e557f 100644 --- a/internal/pkg/input/record_reader_dkvp_test.go +++ b/pkg/input/record_reader_dkvp_test.go @@ -5,7 +5,7 @@ import ( "github.com/stretchr/testify/assert" - "github.com/johnkerl/miller/internal/pkg/cli" + "github.com/johnkerl/miller/pkg/cli" ) func TestRecordFromDKVPLine(t *testing.T) { diff --git a/internal/pkg/input/record_reader_factory.go b/pkg/input/record_reader_factory.go similarity index 95% rename from internal/pkg/input/record_reader_factory.go rename to pkg/input/record_reader_factory.go index 2a501831b..e8328fd51 100644 --- a/internal/pkg/input/record_reader_factory.go +++ b/pkg/input/record_reader_factory.go @@ -3,7 +3,7 @@ package input import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/cli" + "github.com/johnkerl/miller/pkg/cli" ) func Create(readerOptions *cli.TReaderOptions, recordsPerBatch int64) (IRecordReader, error) { diff --git a/internal/pkg/input/record_reader_json.go b/pkg/input/record_reader_json.go similarity index 97% rename from internal/pkg/input/record_reader_json.go rename to pkg/input/record_reader_json.go index 7cdd02fb9..27b9b8e2c 100644 --- a/internal/pkg/input/record_reader_json.go +++ b/pkg/input/record_reader_json.go @@ -9,10 +9,10 @@ import ( "encoding/json" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) type RecordReaderJSON struct { diff --git a/internal/pkg/input/record_reader_tsv.go b/pkg/input/record_reader_tsv.go similarity index 98% rename from internal/pkg/input/record_reader_tsv.go rename to pkg/input/record_reader_tsv.go index 7b496e725..d3b9d75a3 100644 --- a/internal/pkg/input/record_reader_tsv.go +++ b/pkg/input/record_reader_tsv.go @@ -7,10 +7,10 @@ import ( "strconv" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // recordBatchGetterTSV points to either an explicit-TSV-header or diff --git a/internal/pkg/input/record_reader_xtab.go b/pkg/input/record_reader_xtab.go similarity index 98% rename from internal/pkg/input/record_reader_xtab.go rename to pkg/input/record_reader_xtab.go index 1cb8ff79e..0cfc74b25 100644 --- a/internal/pkg/input/record_reader_xtab.go +++ b/pkg/input/record_reader_xtab.go @@ -8,10 +8,10 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) type iXTABPairSplitter interface { diff --git a/internal/pkg/lib/README.md b/pkg/lib/README.md similarity index 100% rename from internal/pkg/lib/README.md rename to pkg/lib/README.md diff --git a/internal/pkg/lib/doc.go b/pkg/lib/doc.go similarity index 100% rename from internal/pkg/lib/doc.go rename to pkg/lib/doc.go diff --git a/internal/pkg/lib/docurl.go b/pkg/lib/docurl.go similarity index 100% rename from internal/pkg/lib/docurl.go rename to pkg/lib/docurl.go diff --git a/internal/pkg/lib/file_readers.go b/pkg/lib/file_readers.go similarity index 99% rename from internal/pkg/lib/file_readers.go rename to pkg/lib/file_readers.go index 151120040..a348ff900 100644 --- a/internal/pkg/lib/file_readers.go +++ b/pkg/lib/file_readers.go @@ -129,7 +129,7 @@ func openPrepipedHandleForRead( // single-quote and double-quote with backslack double-quote, then wrapping the // entire result in initial and final single-quote. // -// TODO: test on Windows. Maybe needs move to internal/pkg/platform. +// TODO: test on Windows. Maybe needs move to pkg/platform. func escapeFileNameForPopen(filename string) string { var buffer bytes.Buffer foundQuote := false diff --git a/internal/pkg/lib/getoptify.go b/pkg/lib/getoptify.go similarity index 100% rename from internal/pkg/lib/getoptify.go rename to pkg/lib/getoptify.go diff --git a/internal/pkg/lib/halfpipe.go b/pkg/lib/halfpipe.go similarity index 97% rename from internal/pkg/lib/halfpipe.go rename to pkg/lib/halfpipe.go index ab8eb7f11..276b2893b 100644 --- a/internal/pkg/lib/halfpipe.go +++ b/pkg/lib/halfpipe.go @@ -4,7 +4,7 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/internal/pkg/platform" + "github.com/johnkerl/miller/pkg/platform" ) // OpenOutboundHalfPipe returns a handle to a process. Writing to that handle diff --git a/internal/pkg/lib/latin1.go b/pkg/lib/latin1.go similarity index 100% rename from internal/pkg/lib/latin1.go rename to pkg/lib/latin1.go diff --git a/internal/pkg/lib/latin1_test.go b/pkg/lib/latin1_test.go similarity index 100% rename from internal/pkg/lib/latin1_test.go rename to pkg/lib/latin1_test.go diff --git a/internal/pkg/lib/logger.go b/pkg/lib/logger.go similarity index 100% rename from internal/pkg/lib/logger.go rename to pkg/lib/logger.go diff --git a/internal/pkg/lib/mlrmath.go b/pkg/lib/mlrmath.go similarity index 100% rename from internal/pkg/lib/mlrmath.go rename to pkg/lib/mlrmath.go diff --git a/internal/pkg/lib/ordered_map.go b/pkg/lib/ordered_map.go similarity index 100% rename from internal/pkg/lib/ordered_map.go rename to pkg/lib/ordered_map.go diff --git a/internal/pkg/lib/paragraph.go b/pkg/lib/paragraph.go similarity index 100% rename from internal/pkg/lib/paragraph.go rename to pkg/lib/paragraph.go diff --git a/internal/pkg/lib/rand.go b/pkg/lib/rand.go similarity index 100% rename from internal/pkg/lib/rand.go rename to pkg/lib/rand.go diff --git a/internal/pkg/lib/readfiles.go b/pkg/lib/readfiles.go similarity index 97% rename from internal/pkg/lib/readfiles.go rename to pkg/lib/readfiles.go index 920db7955..53ce49cc2 100644 --- a/internal/pkg/lib/readfiles.go +++ b/pkg/lib/readfiles.go @@ -10,7 +10,7 @@ import ( "os" "strings" - csv "github.com/johnkerl/miller/internal/pkg/go-csv" + csv "github.com/johnkerl/miller/pkg/go-csv" ) // LoadStringsFromFileOrDir calls LoadStringFromFile if path exists and is a diff --git a/internal/pkg/lib/regex.go b/pkg/lib/regex.go similarity index 100% rename from internal/pkg/lib/regex.go rename to pkg/lib/regex.go diff --git a/internal/pkg/lib/regex_test.go b/pkg/lib/regex_test.go similarity index 100% rename from internal/pkg/lib/regex_test.go rename to pkg/lib/regex_test.go diff --git a/internal/pkg/lib/stats.go b/pkg/lib/stats.go similarity index 100% rename from internal/pkg/lib/stats.go rename to pkg/lib/stats.go diff --git a/internal/pkg/lib/time.go b/pkg/lib/time.go similarity index 100% rename from internal/pkg/lib/time.go rename to pkg/lib/time.go diff --git a/internal/pkg/lib/time_test.go b/pkg/lib/time_test.go similarity index 100% rename from internal/pkg/lib/time_test.go rename to pkg/lib/time_test.go diff --git a/internal/pkg/lib/tsv_codec.go b/pkg/lib/tsv_codec.go similarity index 100% rename from internal/pkg/lib/tsv_codec.go rename to pkg/lib/tsv_codec.go diff --git a/internal/pkg/lib/tsv_codec_test.go b/pkg/lib/tsv_codec_test.go similarity index 100% rename from internal/pkg/lib/tsv_codec_test.go rename to pkg/lib/tsv_codec_test.go diff --git a/internal/pkg/lib/unbackslash.go b/pkg/lib/unbackslash.go similarity index 100% rename from internal/pkg/lib/unbackslash.go rename to pkg/lib/unbackslash.go diff --git a/internal/pkg/lib/unbackslash_test.go b/pkg/lib/unbackslash_test.go similarity index 100% rename from internal/pkg/lib/unbackslash_test.go rename to pkg/lib/unbackslash_test.go diff --git a/internal/pkg/lib/util.go b/pkg/lib/util.go similarity index 100% rename from internal/pkg/lib/util.go rename to pkg/lib/util.go diff --git a/internal/pkg/mlrval/mlrmap.go b/pkg/mlrval/mlrmap.go similarity index 100% rename from internal/pkg/mlrval/mlrmap.go rename to pkg/mlrval/mlrmap.go diff --git a/internal/pkg/mlrval/mlrmap_accessors.go b/pkg/mlrval/mlrmap_accessors.go similarity index 99% rename from internal/pkg/mlrval/mlrmap_accessors.go rename to pkg/mlrval/mlrmap_accessors.go index 9ad8e5999..befb5f726 100644 --- a/internal/pkg/mlrval/mlrmap_accessors.go +++ b/pkg/mlrval/mlrmap_accessors.go @@ -5,7 +5,7 @@ import ( "fmt" "strconv" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/lib" ) // IsEmpty determines if a map is empty. diff --git a/internal/pkg/mlrval/mlrmap_accessors_test.go b/pkg/mlrval/mlrmap_accessors_test.go similarity index 100% rename from internal/pkg/mlrval/mlrmap_accessors_test.go rename to pkg/mlrval/mlrmap_accessors_test.go diff --git a/internal/pkg/mlrval/mlrmap_flatten_unflatten.go b/pkg/mlrval/mlrmap_flatten_unflatten.go similarity index 99% rename from internal/pkg/mlrval/mlrmap_flatten_unflatten.go rename to pkg/mlrval/mlrmap_flatten_unflatten.go index bb91901b7..8e48ba515 100644 --- a/internal/pkg/mlrval/mlrmap_flatten_unflatten.go +++ b/pkg/mlrval/mlrmap_flatten_unflatten.go @@ -25,7 +25,7 @@ package mlrval import ( "strings" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/mlrval/mlrmap_json.go b/pkg/mlrval/mlrmap_json.go similarity index 98% rename from internal/pkg/mlrval/mlrmap_json.go rename to pkg/mlrval/mlrmap_json.go index 726c38fb7..d185efc99 100644 --- a/internal/pkg/mlrval/mlrmap_json.go +++ b/pkg/mlrval/mlrmap_json.go @@ -7,8 +7,8 @@ package mlrval import ( "bytes" - "github.com/johnkerl/miller/internal/pkg/colorizer" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/mlrval/mlrmap_new_test.go b/pkg/mlrval/mlrmap_new_test.go similarity index 100% rename from internal/pkg/mlrval/mlrmap_new_test.go rename to pkg/mlrval/mlrmap_new_test.go diff --git a/internal/pkg/mlrval/mlrmap_print.go b/pkg/mlrval/mlrmap_print.go similarity index 100% rename from internal/pkg/mlrval/mlrmap_print.go rename to pkg/mlrval/mlrmap_print.go diff --git a/internal/pkg/mlrval/mlrval_accessors.go b/pkg/mlrval/mlrval_accessors.go similarity index 98% rename from internal/pkg/mlrval/mlrval_accessors.go rename to pkg/mlrval/mlrval_accessors.go index a33a2f441..f788cc35b 100644 --- a/internal/pkg/mlrval/mlrval_accessors.go +++ b/pkg/mlrval/mlrval_accessors.go @@ -3,7 +3,7 @@ package mlrval import ( "strconv" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/lib" ) func (mv *Mlrval) GetArrayLength() (int, bool) { diff --git a/internal/pkg/mlrval/mlrval_benchmark_test.go b/pkg/mlrval/mlrval_benchmark_test.go similarity index 86% rename from internal/pkg/mlrval/mlrval_benchmark_test.go rename to pkg/mlrval/mlrval_benchmark_test.go index cb8a1f6ee..8d7c576b2 100644 --- a/internal/pkg/mlrval/mlrval_benchmark_test.go +++ b/pkg/mlrval/mlrval_benchmark_test.go @@ -4,7 +4,7 @@ import ( "testing" ) -// go test -run=nonesuch -bench=. github.com/johnkerl/miller/internal/pkg/mlrval/... +// go test -run=nonesuch -bench=. github.com/johnkerl/miller/pkg/mlrval/... func BenchmarkFromDeferredType(b *testing.B) { for i := 0; i < b.N; i++ { diff --git a/internal/pkg/mlrval/mlrval_cmp.go b/pkg/mlrval/mlrval_cmp.go similarity index 99% rename from internal/pkg/mlrval/mlrval_cmp.go rename to pkg/mlrval/mlrval_cmp.go index c9968d65a..cebd3af25 100644 --- a/internal/pkg/mlrval/mlrval_cmp.go +++ b/pkg/mlrval/mlrval_cmp.go @@ -14,7 +14,7 @@ package mlrval import ( - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/lib" ) type CmpFuncBool func(input1, input2 *Mlrval) bool diff --git a/internal/pkg/mlrval/mlrval_cmp_test.go b/pkg/mlrval/mlrval_cmp_test.go similarity index 100% rename from internal/pkg/mlrval/mlrval_cmp_test.go rename to pkg/mlrval/mlrval_cmp_test.go diff --git a/internal/pkg/mlrval/mlrval_collections.go b/pkg/mlrval/mlrval_collections.go similarity index 99% rename from internal/pkg/mlrval/mlrval_collections.go rename to pkg/mlrval/mlrval_collections.go index 1e33b552b..5f4e305a6 100644 --- a/internal/pkg/mlrval/mlrval_collections.go +++ b/pkg/mlrval/mlrval_collections.go @@ -74,7 +74,7 @@ import ( "os" "strconv" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/lib" ) // ================================================================ diff --git a/internal/pkg/mlrval/mlrval_constants.go b/pkg/mlrval/mlrval_constants.go similarity index 100% rename from internal/pkg/mlrval/mlrval_constants.go rename to pkg/mlrval/mlrval_constants.go diff --git a/internal/pkg/mlrval/mlrval_copy.go b/pkg/mlrval/mlrval_copy.go similarity index 100% rename from internal/pkg/mlrval/mlrval_copy.go rename to pkg/mlrval/mlrval_copy.go diff --git a/internal/pkg/mlrval/mlrval_format.go b/pkg/mlrval/mlrval_format.go similarity index 100% rename from internal/pkg/mlrval/mlrval_format.go rename to pkg/mlrval/mlrval_format.go diff --git a/internal/pkg/mlrval/mlrval_format_test.go b/pkg/mlrval/mlrval_format_test.go similarity index 100% rename from internal/pkg/mlrval/mlrval_format_test.go rename to pkg/mlrval/mlrval_format_test.go diff --git a/internal/pkg/mlrval/mlrval_get.go b/pkg/mlrval/mlrval_get.go similarity index 98% rename from internal/pkg/mlrval/mlrval_get.go rename to pkg/mlrval/mlrval_get.go index c88d8d36a..2eb6bfb66 100644 --- a/internal/pkg/mlrval/mlrval_get.go +++ b/pkg/mlrval/mlrval_get.go @@ -4,7 +4,7 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/lib" ) // It's essential that we use mv.Type() not mv.mvtype, or use an Is...() diff --git a/internal/pkg/mlrval/mlrval_get_test.go b/pkg/mlrval/mlrval_get_test.go similarity index 100% rename from internal/pkg/mlrval/mlrval_get_test.go rename to pkg/mlrval/mlrval_get_test.go diff --git a/internal/pkg/mlrval/mlrval_infer.go b/pkg/mlrval/mlrval_infer.go similarity index 99% rename from internal/pkg/mlrval/mlrval_infer.go rename to pkg/mlrval/mlrval_infer.go index cf28bd260..ada3792bd 100644 --- a/internal/pkg/mlrval/mlrval_infer.go +++ b/pkg/mlrval/mlrval_infer.go @@ -3,7 +3,7 @@ package mlrval import ( "strconv" - "github.com/johnkerl/miller/internal/pkg/scan" + "github.com/johnkerl/miller/pkg/scan" ) // TODO: comment no infer-bool from data files. Always false in this path. diff --git a/internal/pkg/mlrval/mlrval_infer_test.go b/pkg/mlrval/mlrval_infer_test.go similarity index 100% rename from internal/pkg/mlrval/mlrval_infer_test.go rename to pkg/mlrval/mlrval_infer_test.go diff --git a/internal/pkg/mlrval/mlrval_is.go b/pkg/mlrval/mlrval_is.go similarity index 98% rename from internal/pkg/mlrval/mlrval_is.go rename to pkg/mlrval/mlrval_is.go index d1593776e..5b3fcd9e8 100644 --- a/internal/pkg/mlrval/mlrval_is.go +++ b/pkg/mlrval/mlrval_is.go @@ -1,7 +1,7 @@ package mlrval import ( - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/lib" ) // It's essential that we use mv.Type() not mv.mvtype since types are diff --git a/internal/pkg/mlrval/mlrval_is_test.go b/pkg/mlrval/mlrval_is_test.go similarity index 100% rename from internal/pkg/mlrval/mlrval_is_test.go rename to pkg/mlrval/mlrval_is_test.go diff --git a/internal/pkg/mlrval/mlrval_json.go b/pkg/mlrval/mlrval_json.go similarity index 99% rename from internal/pkg/mlrval/mlrval_json.go rename to pkg/mlrval/mlrval_json.go index 0b60d0ee5..c657815ec 100644 --- a/internal/pkg/mlrval/mlrval_json.go +++ b/pkg/mlrval/mlrval_json.go @@ -14,8 +14,8 @@ import ( "fmt" "io" - "github.com/johnkerl/miller/internal/pkg/colorizer" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/pkg/lib" ) const JSON_INDENT_STRING string = " " diff --git a/internal/pkg/mlrval/mlrval_new.go b/pkg/mlrval/mlrval_new.go similarity index 98% rename from internal/pkg/mlrval/mlrval_new.go rename to pkg/mlrval/mlrval_new.go index 474dcb807..eafea9afd 100644 --- a/internal/pkg/mlrval/mlrval_new.go +++ b/pkg/mlrval/mlrval_new.go @@ -8,7 +8,7 @@ import ( "errors" "fmt" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/lib" ) // TODO: comment for JSON-scanner context. @@ -309,7 +309,7 @@ func (mv *Mlrval) SetFromPrevalidatedBoolString(input string, boolval bool) *Mlr // The user-defined function is of type 'interface{}' here to avoid what would // otherwise be a package-dependency cycle between this package and -// github.com/johnkerl/miller/internal/pkg/dsl/cst. +// github.com/johnkerl/miller/pkg/dsl/cst. // // Nominally the name argument is the user-specified name if `func f(a, b) { // ... }`, or some autogenerated UUID like `fl0052` if `func (a, b) { ... }`. diff --git a/internal/pkg/mlrval/mlrval_new_test.go b/pkg/mlrval/mlrval_new_test.go similarity index 100% rename from internal/pkg/mlrval/mlrval_new_test.go rename to pkg/mlrval/mlrval_new_test.go diff --git a/internal/pkg/mlrval/mlrval_output.go b/pkg/mlrval/mlrval_output.go similarity index 100% rename from internal/pkg/mlrval/mlrval_output.go rename to pkg/mlrval/mlrval_output.go diff --git a/internal/pkg/mlrval/mlrval_output_test.go b/pkg/mlrval/mlrval_output_test.go similarity index 100% rename from internal/pkg/mlrval/mlrval_output_test.go rename to pkg/mlrval/mlrval_output_test.go diff --git a/internal/pkg/mlrval/mlrval_sort.go b/pkg/mlrval/mlrval_sort.go similarity index 100% rename from internal/pkg/mlrval/mlrval_sort.go rename to pkg/mlrval/mlrval_sort.go diff --git a/internal/pkg/mlrval/mlrval_sort_test.go b/pkg/mlrval/mlrval_sort_test.go similarity index 100% rename from internal/pkg/mlrval/mlrval_sort_test.go rename to pkg/mlrval/mlrval_sort_test.go diff --git a/internal/pkg/mlrval/mlrval_type.go b/pkg/mlrval/mlrval_type.go similarity index 100% rename from internal/pkg/mlrval/mlrval_type.go rename to pkg/mlrval/mlrval_type.go diff --git a/internal/pkg/mlrval/mlrval_type_test.go b/pkg/mlrval/mlrval_type_test.go similarity index 100% rename from internal/pkg/mlrval/mlrval_type_test.go rename to pkg/mlrval/mlrval_type_test.go diff --git a/internal/pkg/output/README.md b/pkg/output/README.md similarity index 100% rename from internal/pkg/output/README.md rename to pkg/output/README.md diff --git a/internal/pkg/output/channel_writer.go b/pkg/output/channel_writer.go similarity index 96% rename from internal/pkg/output/channel_writer.go rename to pkg/output/channel_writer.go index ea7ed814d..6805ad890 100644 --- a/internal/pkg/output/channel_writer.go +++ b/pkg/output/channel_writer.go @@ -6,8 +6,8 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/types" ) func ChannelWriter( diff --git a/internal/pkg/output/doc.go b/pkg/output/doc.go similarity index 100% rename from internal/pkg/output/doc.go rename to pkg/output/doc.go diff --git a/internal/pkg/output/file_output_handlers.go b/pkg/output/file_output_handlers.go similarity index 98% rename from internal/pkg/output/file_output_handlers.go rename to pkg/output/file_output_handlers.go index c7cf6f483..d21cfb812 100644 --- a/internal/pkg/output/file_output_handlers.go +++ b/pkg/output/file_output_handlers.go @@ -20,9 +20,9 @@ import ( "io" "os" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/types" ) // ================================================================ diff --git a/internal/pkg/output/record_writer.go b/pkg/output/record_writer.go similarity index 92% rename from internal/pkg/output/record_writer.go rename to pkg/output/record_writer.go index 30b17badb..37d8a7780 100644 --- a/internal/pkg/output/record_writer.go +++ b/pkg/output/record_writer.go @@ -3,7 +3,7 @@ package output import ( "bufio" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/mlrval" ) // IRecordWriter is the abstract interface for all record-writers. They are diff --git a/internal/pkg/output/record_writer_csv.go b/pkg/output/record_writer_csv.go similarity index 94% rename from internal/pkg/output/record_writer_csv.go rename to pkg/output/record_writer_csv.go index 076c6778c..fd4801d29 100644 --- a/internal/pkg/output/record_writer_csv.go +++ b/pkg/output/record_writer_csv.go @@ -5,10 +5,10 @@ import ( "fmt" "strings" - csv "github.com/johnkerl/miller/internal/pkg/go-csv" + csv "github.com/johnkerl/miller/pkg/go-csv" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" ) type RecordWriterCSV struct { diff --git a/internal/pkg/output/record_writer_csv_colorizer.go b/pkg/output/record_writer_csv_colorizer.go similarity index 99% rename from internal/pkg/output/record_writer_csv_colorizer.go rename to pkg/output/record_writer_csv_colorizer.go index 43434e643..9cb103b6a 100644 --- a/internal/pkg/output/record_writer_csv_colorizer.go +++ b/pkg/output/record_writer_csv_colorizer.go @@ -47,7 +47,7 @@ import ( "strings" "unicode/utf8" - "github.com/johnkerl/miller/internal/pkg/colorizer" + "github.com/johnkerl/miller/pkg/colorizer" ) var errInvalidDelim = errors.New("csv: invalid field or comment delimiter") diff --git a/internal/pkg/output/record_writer_csvlite.go b/pkg/output/record_writer_csvlite.go similarity index 93% rename from internal/pkg/output/record_writer_csvlite.go rename to pkg/output/record_writer_csvlite.go index 6acf0976c..251cf9580 100644 --- a/internal/pkg/output/record_writer_csvlite.go +++ b/pkg/output/record_writer_csvlite.go @@ -4,9 +4,9 @@ import ( "bufio" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/colorizer" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/pkg/mlrval" ) type RecordWriterCSVLite struct { diff --git a/internal/pkg/output/record_writer_dkvp.go b/pkg/output/record_writer_dkvp.go similarity index 87% rename from internal/pkg/output/record_writer_dkvp.go rename to pkg/output/record_writer_dkvp.go index 674388130..bc60868ca 100644 --- a/internal/pkg/output/record_writer_dkvp.go +++ b/pkg/output/record_writer_dkvp.go @@ -3,9 +3,9 @@ package output import ( "bufio" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/colorizer" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/pkg/mlrval" ) type RecordWriterDKVP struct { diff --git a/internal/pkg/output/record_writer_factory.go b/pkg/output/record_writer_factory.go similarity index 94% rename from internal/pkg/output/record_writer_factory.go rename to pkg/output/record_writer_factory.go index a48c68f99..b706f21bd 100644 --- a/internal/pkg/output/record_writer_factory.go +++ b/pkg/output/record_writer_factory.go @@ -3,7 +3,7 @@ package output import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/cli" + "github.com/johnkerl/miller/pkg/cli" ) func Create(writerOptions *cli.TWriterOptions) (IRecordWriter, error) { diff --git a/internal/pkg/output/record_writer_json.go b/pkg/output/record_writer_json.go similarity index 96% rename from internal/pkg/output/record_writer_json.go rename to pkg/output/record_writer_json.go index 3ee176852..578e9f8ba 100644 --- a/internal/pkg/output/record_writer_json.go +++ b/pkg/output/record_writer_json.go @@ -5,8 +5,8 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/output/record_writer_markdown.go b/pkg/output/record_writer_markdown.go similarity index 93% rename from internal/pkg/output/record_writer_markdown.go rename to pkg/output/record_writer_markdown.go index c7bdf280d..2688c2962 100644 --- a/internal/pkg/output/record_writer_markdown.go +++ b/pkg/output/record_writer_markdown.go @@ -4,9 +4,9 @@ import ( "bufio" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/colorizer" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/pkg/mlrval" ) type RecordWriterMarkdown struct { diff --git a/internal/pkg/output/record_writer_nidx.go b/pkg/output/record_writer_nidx.go similarity index 88% rename from internal/pkg/output/record_writer_nidx.go rename to pkg/output/record_writer_nidx.go index a0e350958..d3babd35a 100644 --- a/internal/pkg/output/record_writer_nidx.go +++ b/pkg/output/record_writer_nidx.go @@ -3,8 +3,8 @@ package output import ( "bufio" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" ) type RecordWriterNIDX struct { diff --git a/internal/pkg/output/record_writer_pprint.go b/pkg/output/record_writer_pprint.go similarity index 98% rename from internal/pkg/output/record_writer_pprint.go rename to pkg/output/record_writer_pprint.go index f714e86fa..79d49b316 100644 --- a/internal/pkg/output/record_writer_pprint.go +++ b/pkg/output/record_writer_pprint.go @@ -7,9 +7,9 @@ import ( "strings" "unicode/utf8" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/colorizer" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/pkg/mlrval" ) type RecordWriterPPRINT struct { diff --git a/internal/pkg/output/record_writer_tsv.go b/pkg/output/record_writer_tsv.go similarity index 92% rename from internal/pkg/output/record_writer_tsv.go rename to pkg/output/record_writer_tsv.go index 3a7b53953..149ac9530 100644 --- a/internal/pkg/output/record_writer_tsv.go +++ b/pkg/output/record_writer_tsv.go @@ -5,10 +5,10 @@ import ( "fmt" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/colorizer" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" ) type RecordWriterTSV struct { diff --git a/internal/pkg/output/record_writer_xtab.go b/pkg/output/record_writer_xtab.go similarity index 96% rename from internal/pkg/output/record_writer_xtab.go rename to pkg/output/record_writer_xtab.go index a13536c1e..9093935e9 100644 --- a/internal/pkg/output/record_writer_xtab.go +++ b/pkg/output/record_writer_xtab.go @@ -5,9 +5,9 @@ import ( "fmt" "unicode/utf8" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/colorizer" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/pkg/mlrval" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/parsing/README.md b/pkg/parsing/README.md similarity index 87% rename from internal/pkg/parsing/README.md rename to pkg/parsing/README.md index 58ae6926e..161e5c75d 100644 --- a/internal/pkg/parsing/README.md +++ b/pkg/parsing/README.md @@ -4,7 +4,7 @@ lexical/semantic grammar file for the Miller `put`/`filter` DSL framework, there would be separate `mlr.l` and `mlr.y` files; using GOCC, there is a single `mlr.bnf` file.) -All subdirectories of `internal/pkg/parsing/` are autogen code created by GOCC's +All subdirectories of `pkg/parsing/` are autogen code created by GOCC's processing of `mlr.bnf`. They are nonetheless committed to source control, since running GOCC takes quite a bit longer than the `go build` does, and the BNF file doesn't often change. (_BNF_ is for _Backus-Naur Form_ which is the @@ -12,7 +12,7 @@ phrasing of the grammar file that GOCC support.) See the top-level `miller/go` build scripts for how to rerun GOCC. As of this writing, it's ``` -gocc -o internal/pkg/parsing internal/pkg/parsing/mlr.bnf +gocc -o pkg/parsing pkg/parsing/mlr.bnf ```` as invoked from the repo base directory -- however you should instead use diff --git a/internal/pkg/parsing/doc.go b/pkg/parsing/doc.go similarity index 100% rename from internal/pkg/parsing/doc.go rename to pkg/parsing/doc.go diff --git a/internal/pkg/parsing/errors.go.template b/pkg/parsing/errors.go.template similarity index 92% rename from internal/pkg/parsing/errors.go.template rename to pkg/parsing/errors.go.template index da89c0bac..5c39ede1d 100644 --- a/internal/pkg/parsing/errors.go.template +++ b/pkg/parsing/errors.go.template @@ -3,8 +3,8 @@ // over the top of GOCC codegen so that we can customize handling of error // messages. // -// Source: internal/pkg/parsing/errors.go.template -// Destination: internal/pkg/parsing/errors/errors.go +// Source: pkg/parsing/errors.go.template +// Destination: pkg/parsing/errors/errors.go // ================================================================ package errors @@ -13,7 +13,7 @@ import ( "fmt" "strings" - "github.com/johnkerl/miller/internal/pkg/parsing/token" + "github.com/johnkerl/miller/pkg/parsing/token" ) type ErrorSymbol interface { diff --git a/internal/pkg/parsing/errors/doc.go b/pkg/parsing/errors/doc.go similarity index 68% rename from internal/pkg/parsing/errors/doc.go rename to pkg/parsing/errors/doc.go index b3a2ff9c7..2d1f13f3f 100644 --- a/internal/pkg/parsing/errors/doc.go +++ b/pkg/parsing/errors/doc.go @@ -1,3 +1,3 @@ // Package errors contains source code autogenerated by GOCC from the Miller DSL's -// grammar file internal/pkg/parsing/mlr.bnf. +// grammar file pkg/parsing/mlr.bnf. package errors diff --git a/internal/pkg/parsing/errors/errors.go b/pkg/parsing/errors/errors.go similarity index 92% rename from internal/pkg/parsing/errors/errors.go rename to pkg/parsing/errors/errors.go index da89c0bac..5c39ede1d 100644 --- a/internal/pkg/parsing/errors/errors.go +++ b/pkg/parsing/errors/errors.go @@ -3,8 +3,8 @@ // over the top of GOCC codegen so that we can customize handling of error // messages. // -// Source: internal/pkg/parsing/errors.go.template -// Destination: internal/pkg/parsing/errors/errors.go +// Source: pkg/parsing/errors.go.template +// Destination: pkg/parsing/errors/errors.go // ================================================================ package errors @@ -13,7 +13,7 @@ import ( "fmt" "strings" - "github.com/johnkerl/miller/internal/pkg/parsing/token" + "github.com/johnkerl/miller/pkg/parsing/token" ) type ErrorSymbol interface { diff --git a/internal/pkg/parsing/lexer/acttab.go b/pkg/parsing/lexer/acttab.go similarity index 99% rename from internal/pkg/parsing/lexer/acttab.go rename to pkg/parsing/lexer/acttab.go index 080936766..18917879f 100644 --- a/internal/pkg/parsing/lexer/acttab.go +++ b/pkg/parsing/lexer/acttab.go @@ -5,7 +5,7 @@ package lexer import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/parsing/token" + "github.com/johnkerl/miller/pkg/parsing/token" ) type ActionTable [NumStates]ActionRow diff --git a/internal/pkg/parsing/lexer/doc.go b/pkg/parsing/lexer/doc.go similarity index 67% rename from internal/pkg/parsing/lexer/doc.go rename to pkg/parsing/lexer/doc.go index 8381497d4..ed55d4bd2 100644 --- a/internal/pkg/parsing/lexer/doc.go +++ b/pkg/parsing/lexer/doc.go @@ -1,3 +1,3 @@ // Package lexer contains source code autogenerated by GOCC from the Miller DSL's -// grammar file internal/pkg/parsing/mlr.bnf. +// grammar file pkg/parsing/mlr.bnf. package lexer diff --git a/internal/pkg/parsing/lexer/lexer.go b/pkg/parsing/lexer/lexer.go similarity index 99% rename from internal/pkg/parsing/lexer/lexer.go rename to pkg/parsing/lexer/lexer.go index 18fd8ea7a..d106a08a3 100644 --- a/internal/pkg/parsing/lexer/lexer.go +++ b/pkg/parsing/lexer/lexer.go @@ -6,7 +6,7 @@ import ( "os" "unicode/utf8" - "github.com/johnkerl/miller/internal/pkg/parsing/token" + "github.com/johnkerl/miller/pkg/parsing/token" ) const ( diff --git a/internal/pkg/parsing/lexer/transitiontable.go b/pkg/parsing/lexer/transitiontable.go similarity index 100% rename from internal/pkg/parsing/lexer/transitiontable.go rename to pkg/parsing/lexer/transitiontable.go diff --git a/internal/pkg/parsing/mlr.bnf b/pkg/parsing/mlr.bnf similarity index 99% rename from internal/pkg/parsing/mlr.bnf rename to pkg/parsing/mlr.bnf index 6f987c827..39d6c0c3b 100644 --- a/internal/pkg/parsing/mlr.bnf +++ b/pkg/parsing/mlr.bnf @@ -42,7 +42,7 @@ // interface{}/error since they are meant for nesting as arguments here // within this file. // -// * Please see internal/pkg/dsl/ast*.go for more about what the <<...>> +// * Please see pkg/dsl/ast*.go for more about what the <<...>> // code here is calling. // ================================================================ @@ -347,7 +347,7 @@ panic : '%' '%' '%' 'p' 'a' 'n' 'i' 'c' '%' '%' '%' ; // ================================================================ // Import the AST/ASTNode types and functions -<< import "github.com/johnkerl/miller/internal/pkg/dsl" >> +<< import "github.com/johnkerl/miller/pkg/dsl" >> // ================================================================ // TOP-LEVEL PRODUCTION RULE FOR THE MILLER DSL diff --git a/internal/pkg/parsing/parser/action.go b/pkg/parsing/parser/action.go similarity index 100% rename from internal/pkg/parsing/parser/action.go rename to pkg/parsing/parser/action.go diff --git a/internal/pkg/parsing/parser/actiontable.go b/pkg/parsing/parser/actiontable.go similarity index 100% rename from internal/pkg/parsing/parser/actiontable.go rename to pkg/parsing/parser/actiontable.go diff --git a/internal/pkg/parsing/parser/context.go b/pkg/parsing/parser/context.go similarity index 100% rename from internal/pkg/parsing/parser/context.go rename to pkg/parsing/parser/context.go diff --git a/internal/pkg/parsing/parser/doc.go b/pkg/parsing/parser/doc.go similarity index 68% rename from internal/pkg/parsing/parser/doc.go rename to pkg/parsing/parser/doc.go index e67ea07a5..031a6d91c 100644 --- a/internal/pkg/parsing/parser/doc.go +++ b/pkg/parsing/parser/doc.go @@ -1,3 +1,3 @@ // Package parser contains source code autogenerated by GOCC from the Miller DSL's -// grammar file internal/pkg/parsing/mlr.bnf. +// grammar file pkg/parsing/mlr.bnf. package parser diff --git a/internal/pkg/parsing/parser/gototable.go b/pkg/parsing/parser/gototable.go similarity index 100% rename from internal/pkg/parsing/parser/gototable.go rename to pkg/parsing/parser/gototable.go diff --git a/internal/pkg/parsing/parser/parser.go b/pkg/parsing/parser/parser.go similarity index 97% rename from internal/pkg/parsing/parser/parser.go rename to pkg/parsing/parser/parser.go index 8402d1bed..444e9f495 100644 --- a/internal/pkg/parsing/parser/parser.go +++ b/pkg/parsing/parser/parser.go @@ -6,8 +6,8 @@ import ( "fmt" "strings" - parseError "github.com/johnkerl/miller/internal/pkg/parsing/errors" - "github.com/johnkerl/miller/internal/pkg/parsing/token" + parseError "github.com/johnkerl/miller/pkg/parsing/errors" + "github.com/johnkerl/miller/pkg/parsing/token" ) const ( diff --git a/internal/pkg/parsing/parser/productionstable.go b/pkg/parsing/parser/productionstable.go similarity index 99% rename from internal/pkg/parsing/parser/productionstable.go rename to pkg/parsing/parser/productionstable.go index bd84651b2..f4b61fd50 100644 --- a/internal/pkg/parsing/parser/productionstable.go +++ b/pkg/parsing/parser/productionstable.go @@ -2,7 +2,7 @@ package parser -import "github.com/johnkerl/miller/internal/pkg/dsl" +import "github.com/johnkerl/miller/pkg/dsl" type ( ProdTab [numProductions]ProdTabEntry diff --git a/internal/pkg/parsing/token/context.go b/pkg/parsing/token/context.go similarity index 100% rename from internal/pkg/parsing/token/context.go rename to pkg/parsing/token/context.go diff --git a/internal/pkg/parsing/token/doc.go b/pkg/parsing/token/doc.go similarity index 67% rename from internal/pkg/parsing/token/doc.go rename to pkg/parsing/token/doc.go index a41d41816..77899ed79 100644 --- a/internal/pkg/parsing/token/doc.go +++ b/pkg/parsing/token/doc.go @@ -1,3 +1,3 @@ // Package token contains source code autogenerated by GOCC from the Miller DSL's -// grammar file internal/pkg/parsing/mlr.bnf. +// grammar file pkg/parsing/mlr.bnf. package token diff --git a/internal/pkg/parsing/token/token.go b/pkg/parsing/token/token.go similarity index 100% rename from internal/pkg/parsing/token/token.go rename to pkg/parsing/token/token.go diff --git a/internal/pkg/parsing/util/doc.go b/pkg/parsing/util/doc.go similarity index 67% rename from internal/pkg/parsing/util/doc.go rename to pkg/parsing/util/doc.go index af6cd78c2..1a61fda12 100644 --- a/internal/pkg/parsing/util/doc.go +++ b/pkg/parsing/util/doc.go @@ -1,3 +1,3 @@ // Package util contains source code autogenerated by GOCC from the Miller DSL's -// grammar file internal/pkg/parsing/mlr.bnf. +// grammar file pkg/parsing/mlr.bnf. package util diff --git a/internal/pkg/parsing/util/litconv.go b/pkg/parsing/util/litconv.go similarity index 100% rename from internal/pkg/parsing/util/litconv.go rename to pkg/parsing/util/litconv.go diff --git a/internal/pkg/parsing/util/rune.go b/pkg/parsing/util/rune.go similarity index 100% rename from internal/pkg/parsing/util/rune.go rename to pkg/parsing/util/rune.go diff --git a/internal/pkg/pbnjay-strptime/README.md b/pkg/pbnjay-strptime/README.md similarity index 100% rename from internal/pkg/pbnjay-strptime/README.md rename to pkg/pbnjay-strptime/README.md diff --git a/internal/pkg/pbnjay-strptime/strptime.go b/pkg/pbnjay-strptime/strptime.go similarity index 100% rename from internal/pkg/pbnjay-strptime/strptime.go rename to pkg/pbnjay-strptime/strptime.go diff --git a/internal/pkg/pbnjay-strptime/strptime_test.go b/pkg/pbnjay-strptime/strptime_test.go similarity index 100% rename from internal/pkg/pbnjay-strptime/strptime_test.go rename to pkg/pbnjay-strptime/strptime_test.go diff --git a/internal/pkg/platform/README.md b/pkg/platform/README.md similarity index 100% rename from internal/pkg/platform/README.md rename to pkg/platform/README.md diff --git a/internal/pkg/platform/diff_notwindows.go b/pkg/platform/diff_notwindows.go similarity index 100% rename from internal/pkg/platform/diff_notwindows.go rename to pkg/platform/diff_notwindows.go diff --git a/internal/pkg/platform/diff_windows.go b/pkg/platform/diff_windows.go similarity index 100% rename from internal/pkg/platform/diff_windows.go rename to pkg/platform/diff_windows.go diff --git a/internal/pkg/platform/doc.go b/pkg/platform/doc.go similarity index 100% rename from internal/pkg/platform/doc.go rename to pkg/platform/doc.go diff --git a/internal/pkg/platform/getargs_notwindows.go b/pkg/platform/getargs_notwindows.go similarity index 100% rename from internal/pkg/platform/getargs_notwindows.go rename to pkg/platform/getargs_notwindows.go diff --git a/internal/pkg/platform/getargs_windows.go b/pkg/platform/getargs_windows.go similarity index 100% rename from internal/pkg/platform/getargs_windows.go rename to pkg/platform/getargs_windows.go diff --git a/internal/pkg/platform/shellrun_notwindows.go b/pkg/platform/shellrun_notwindows.go similarity index 100% rename from internal/pkg/platform/shellrun_notwindows.go rename to pkg/platform/shellrun_notwindows.go diff --git a/internal/pkg/platform/shellrun_windows.go b/pkg/platform/shellrun_windows.go similarity index 100% rename from internal/pkg/platform/shellrun_windows.go rename to pkg/platform/shellrun_windows.go diff --git a/internal/pkg/platform/terminal_notwindows.go b/pkg/platform/terminal_notwindows.go similarity index 100% rename from internal/pkg/platform/terminal_notwindows.go rename to pkg/platform/terminal_notwindows.go diff --git a/internal/pkg/platform/terminal_windows.go b/pkg/platform/terminal_windows.go similarity index 100% rename from internal/pkg/platform/terminal_windows.go rename to pkg/platform/terminal_windows.go diff --git a/internal/pkg/runtime/README.md b/pkg/runtime/README.md similarity index 100% rename from internal/pkg/runtime/README.md rename to pkg/runtime/README.md diff --git a/internal/pkg/runtime/doc.go b/pkg/runtime/doc.go similarity index 100% rename from internal/pkg/runtime/doc.go rename to pkg/runtime/doc.go diff --git a/internal/pkg/runtime/stack.go b/pkg/runtime/stack.go similarity index 98% rename from internal/pkg/runtime/stack.go rename to pkg/runtime/stack.go index a5cf2499e..b32cd06dd 100644 --- a/internal/pkg/runtime/stack.go +++ b/pkg/runtime/stack.go @@ -29,9 +29,9 @@ import ( "container/list" "fmt" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ================================================================ diff --git a/internal/pkg/runtime/state.go b/pkg/runtime/state.go similarity index 89% rename from internal/pkg/runtime/state.go rename to pkg/runtime/state.go index 8c3a6caca..e94fd4ce5 100644 --- a/internal/pkg/runtime/state.go +++ b/pkg/runtime/state.go @@ -9,10 +9,10 @@ package runtime import ( "container/list" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) type State struct { diff --git a/internal/pkg/scan/digits.go b/pkg/scan/digits.go similarity index 100% rename from internal/pkg/scan/digits.go rename to pkg/scan/digits.go diff --git a/internal/pkg/scan/digits_test.go b/pkg/scan/digits_test.go similarity index 100% rename from internal/pkg/scan/digits_test.go rename to pkg/scan/digits_test.go diff --git a/internal/pkg/scan/doc.go b/pkg/scan/doc.go similarity index 100% rename from internal/pkg/scan/doc.go rename to pkg/scan/doc.go diff --git a/internal/pkg/scan/find.go b/pkg/scan/find.go similarity index 100% rename from internal/pkg/scan/find.go rename to pkg/scan/find.go diff --git a/internal/pkg/scan/find_benchmark_test.go b/pkg/scan/find_benchmark_test.go similarity index 94% rename from internal/pkg/scan/find_benchmark_test.go rename to pkg/scan/find_benchmark_test.go index 0d023a25f..e905bb735 100644 --- a/internal/pkg/scan/find_benchmark_test.go +++ b/pkg/scan/find_benchmark_test.go @@ -4,7 +4,7 @@ import ( "testing" ) -// go test -run=nonesuch -bench=. github.com/johnkerl/miller/internal/pkg/scan/... +// go test -run=nonesuch -bench=. github.com/johnkerl/miller/pkg/scan/... func BenchmarkFromNormalCases(b *testing.B) { diff --git a/internal/pkg/scan/find_test.go b/pkg/scan/find_test.go similarity index 100% rename from internal/pkg/scan/find_test.go rename to pkg/scan/find_test.go diff --git a/internal/pkg/scan/type.go b/pkg/scan/type.go similarity index 100% rename from internal/pkg/scan/type.go rename to pkg/scan/type.go diff --git a/internal/pkg/scan/type_test.go b/pkg/scan/type_test.go similarity index 100% rename from internal/pkg/scan/type_test.go rename to pkg/scan/type_test.go diff --git a/internal/pkg/stream/README.md b/pkg/stream/README.md similarity index 100% rename from internal/pkg/stream/README.md rename to pkg/stream/README.md diff --git a/internal/pkg/stream/doc.go b/pkg/stream/doc.go similarity index 100% rename from internal/pkg/stream/doc.go rename to pkg/stream/doc.go diff --git a/internal/pkg/stream/stream.go b/pkg/stream/stream.go similarity index 94% rename from internal/pkg/stream/stream.go rename to pkg/stream/stream.go index 351eda06d..9f2cbe805 100644 --- a/internal/pkg/stream/stream.go +++ b/pkg/stream/stream.go @@ -6,11 +6,11 @@ import ( "errors" "io" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/input" - "github.com/johnkerl/miller/internal/pkg/output" - "github.com/johnkerl/miller/internal/pkg/transformers" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/input" + "github.com/johnkerl/miller/pkg/output" + "github.com/johnkerl/miller/pkg/transformers" + "github.com/johnkerl/miller/pkg/types" ) // Since Go is concurrent, the context struct (AWK-like variables such as diff --git a/internal/pkg/terminals/doc.go b/pkg/terminals/doc.go similarity index 100% rename from internal/pkg/terminals/doc.go rename to pkg/terminals/doc.go diff --git a/internal/pkg/terminals/help/doc.go b/pkg/terminals/help/doc.go similarity index 100% rename from internal/pkg/terminals/help/doc.go rename to pkg/terminals/help/doc.go diff --git a/internal/pkg/terminals/help/entry.go b/pkg/terminals/help/entry.go similarity index 98% rename from internal/pkg/terminals/help/entry.go rename to pkg/terminals/help/entry.go index 6598c48b3..3b25b58b5 100644 --- a/internal/pkg/terminals/help/entry.go +++ b/pkg/terminals/help/entry.go @@ -10,13 +10,13 @@ import ( "github.com/mattn/go-isatty" - "github.com/johnkerl/miller/internal/pkg/auxents" - "github.com/johnkerl/miller/internal/pkg/bifs" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/dsl/cst" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/transformers" + "github.com/johnkerl/miller/pkg/auxents" + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/dsl/cst" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/transformers" ) // ================================================================ diff --git a/internal/pkg/terminals/regtest/README.md b/pkg/terminals/regtest/README.md similarity index 100% rename from internal/pkg/terminals/regtest/README.md rename to pkg/terminals/regtest/README.md diff --git a/internal/pkg/terminals/regtest/doc.go b/pkg/terminals/regtest/doc.go similarity index 100% rename from internal/pkg/terminals/regtest/doc.go rename to pkg/terminals/regtest/doc.go diff --git a/internal/pkg/terminals/regtest/entry.go b/pkg/terminals/regtest/entry.go similarity index 100% rename from internal/pkg/terminals/regtest/entry.go rename to pkg/terminals/regtest/entry.go diff --git a/internal/pkg/terminals/regtest/invoker.go b/pkg/terminals/regtest/invoker.go similarity index 97% rename from internal/pkg/terminals/regtest/invoker.go rename to pkg/terminals/regtest/invoker.go index a1cf4a921..febbbbfa3 100644 --- a/internal/pkg/terminals/regtest/invoker.go +++ b/pkg/terminals/regtest/invoker.go @@ -6,8 +6,8 @@ import ( "os/exec" "strings" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/platform" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/platform" ) // RunMillerCommand runs a string like 'mlr cat foo.dat', with specified mlr diff --git a/internal/pkg/terminals/regtest/regtester.go b/pkg/terminals/regtest/regtester.go similarity index 99% rename from internal/pkg/terminals/regtest/regtester.go rename to pkg/terminals/regtest/regtester.go index 34550053b..ec58bdf73 100644 --- a/internal/pkg/terminals/regtest/regtester.go +++ b/pkg/terminals/regtest/regtester.go @@ -64,8 +64,8 @@ import ( "runtime" "strings" - "github.com/johnkerl/miller/internal/pkg/colorizer" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/pkg/lib" ) const CmdName = "cmd" diff --git a/internal/pkg/terminals/repl/README.md b/pkg/terminals/repl/README.md similarity index 100% rename from internal/pkg/terminals/repl/README.md rename to pkg/terminals/repl/README.md diff --git a/internal/pkg/terminals/repl/doc.go b/pkg/terminals/repl/doc.go similarity index 100% rename from internal/pkg/terminals/repl/doc.go rename to pkg/terminals/repl/doc.go diff --git a/internal/pkg/terminals/repl/dsl.go b/pkg/terminals/repl/dsl.go similarity index 96% rename from internal/pkg/terminals/repl/dsl.go rename to pkg/terminals/repl/dsl.go index 87d6500f0..8f3a2a046 100644 --- a/internal/pkg/terminals/repl/dsl.go +++ b/pkg/terminals/repl/dsl.go @@ -23,9 +23,9 @@ import ( "fmt" "strings" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/dsl/cst" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/dsl/cst" + "github.com/johnkerl/miller/pkg/mlrval" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/terminals/repl/entry.go b/pkg/terminals/repl/entry.go similarity index 99% rename from internal/pkg/terminals/repl/entry.go rename to pkg/terminals/repl/entry.go index 28f873fd7..d2403ccdf 100644 --- a/internal/pkg/terminals/repl/entry.go +++ b/pkg/terminals/repl/entry.go @@ -27,7 +27,7 @@ import ( "path" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" + "github.com/johnkerl/miller/pkg/cli" ) // ================================================================ diff --git a/internal/pkg/terminals/repl/prompt.go b/pkg/terminals/repl/prompt.go similarity index 92% rename from internal/pkg/terminals/repl/prompt.go rename to pkg/terminals/repl/prompt.go index 7c0341881..bfcb46d2c 100644 --- a/internal/pkg/terminals/repl/prompt.go +++ b/pkg/terminals/repl/prompt.go @@ -11,9 +11,9 @@ import ( "golang.org/x/term" - "github.com/johnkerl/miller/internal/pkg/colorizer" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/version" + "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/version" ) const ENV_PRIMARY_PROMPT = "MLR_REPL_PS1" diff --git a/internal/pkg/terminals/repl/session.go b/pkg/terminals/repl/session.go similarity index 94% rename from internal/pkg/terminals/repl/session.go rename to pkg/terminals/repl/session.go index 98b77aa74..03ef0f6b4 100644 --- a/internal/pkg/terminals/repl/session.go +++ b/pkg/terminals/repl/session.go @@ -25,14 +25,14 @@ import ( "strings" "syscall" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/dsl/cst" - "github.com/johnkerl/miller/internal/pkg/input" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/output" - "github.com/johnkerl/miller/internal/pkg/runtime" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/dsl/cst" + "github.com/johnkerl/miller/pkg/input" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/output" + "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/terminals/repl/types.go b/pkg/terminals/repl/types.go similarity index 88% rename from internal/pkg/terminals/repl/types.go rename to pkg/terminals/repl/types.go index ea20658a6..b0da1b9d3 100644 --- a/internal/pkg/terminals/repl/types.go +++ b/pkg/terminals/repl/types.go @@ -9,11 +9,11 @@ import ( "container/list" "os" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/dsl/cst" - "github.com/johnkerl/miller/internal/pkg/input" - "github.com/johnkerl/miller/internal/pkg/output" - "github.com/johnkerl/miller/internal/pkg/runtime" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/dsl/cst" + "github.com/johnkerl/miller/pkg/input" + "github.com/johnkerl/miller/pkg/output" + "github.com/johnkerl/miller/pkg/runtime" ) // ================================================================ diff --git a/internal/pkg/terminals/repl/verbs.go b/pkg/terminals/repl/verbs.go similarity index 99% rename from internal/pkg/terminals/repl/verbs.go rename to pkg/terminals/repl/verbs.go index a47e93d53..92d9046ff 100644 --- a/internal/pkg/terminals/repl/verbs.go +++ b/pkg/terminals/repl/verbs.go @@ -10,12 +10,12 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/colorizer" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/dsl/cst" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/dsl/cst" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/terminals/terminals.go b/pkg/terminals/terminals.go similarity index 90% rename from internal/pkg/terminals/terminals.go rename to pkg/terminals/terminals.go index 91f0fecf4..78ec0dd3f 100644 --- a/internal/pkg/terminals/terminals.go +++ b/pkg/terminals/terminals.go @@ -10,10 +10,10 @@ import ( "os" "runtime" - "github.com/johnkerl/miller/internal/pkg/terminals/help" - "github.com/johnkerl/miller/internal/pkg/terminals/regtest" - "github.com/johnkerl/miller/internal/pkg/terminals/repl" - "github.com/johnkerl/miller/internal/pkg/version" + "github.com/johnkerl/miller/pkg/terminals/help" + "github.com/johnkerl/miller/pkg/terminals/regtest" + "github.com/johnkerl/miller/pkg/terminals/repl" + "github.com/johnkerl/miller/pkg/version" ) // tTerminalMain is a function-pointer type for the entrypoint handler for a given terminal, diff --git a/internal/pkg/transformers/README.md b/pkg/transformers/README.md similarity index 100% rename from internal/pkg/transformers/README.md rename to pkg/transformers/README.md diff --git a/internal/pkg/transformers/aaa_chain_transformer.go b/pkg/transformers/aaa_chain_transformer.go similarity index 99% rename from internal/pkg/transformers/aaa_chain_transformer.go rename to pkg/transformers/aaa_chain_transformer.go index 81e954e50..e367ab883 100644 --- a/internal/pkg/transformers/aaa_chain_transformer.go +++ b/pkg/transformers/aaa_chain_transformer.go @@ -3,8 +3,8 @@ package transformers import ( "container/list" "fmt" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/types" "os" ) diff --git a/internal/pkg/transformers/aaa_record_transformer.go b/pkg/transformers/aaa_record_transformer.go similarity index 95% rename from internal/pkg/transformers/aaa_record_transformer.go rename to pkg/transformers/aaa_record_transformer.go index 4a8dfaa43..1f9bae7dd 100644 --- a/internal/pkg/transformers/aaa_record_transformer.go +++ b/pkg/transformers/aaa_record_transformer.go @@ -4,8 +4,8 @@ import ( "container/list" "os" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/types" ) // IRecordTransformer is the interface satisfied by all transformers, i.e., diff --git a/internal/pkg/transformers/aaa_transformer_table.go b/pkg/transformers/aaa_transformer_table.go similarity index 96% rename from internal/pkg/transformers/aaa_transformer_table.go rename to pkg/transformers/aaa_transformer_table.go index ece90a858..ed98af07f 100644 --- a/internal/pkg/transformers/aaa_transformer_table.go +++ b/pkg/transformers/aaa_transformer_table.go @@ -5,8 +5,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/colorizer" - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/altkv.go b/pkg/transformers/altkv.go similarity index 95% rename from internal/pkg/transformers/altkv.go rename to pkg/transformers/altkv.go index af3c4f02b..a97c3127e 100644 --- a/internal/pkg/transformers/altkv.go +++ b/pkg/transformers/altkv.go @@ -7,9 +7,9 @@ import ( "strconv" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/bar.go b/pkg/transformers/bar.go similarity index 98% rename from internal/pkg/transformers/bar.go rename to pkg/transformers/bar.go index d7157da54..0aaafd8f1 100644 --- a/internal/pkg/transformers/bar.go +++ b/pkg/transformers/bar.go @@ -7,9 +7,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) const barDefaultFillString = "*" diff --git a/internal/pkg/transformers/bootstrap.go b/pkg/transformers/bootstrap.go similarity index 97% rename from internal/pkg/transformers/bootstrap.go rename to pkg/transformers/bootstrap.go index 8447fb171..9450a425e 100644 --- a/internal/pkg/transformers/bootstrap.go +++ b/pkg/transformers/bootstrap.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/case.go b/pkg/transformers/case.go similarity index 97% rename from internal/pkg/transformers/case.go rename to pkg/transformers/case.go index f25a68f96..4d02617c4 100644 --- a/internal/pkg/transformers/case.go +++ b/pkg/transformers/case.go @@ -9,10 +9,10 @@ import ( "golang.org/x/text/cases" "golang.org/x/text/language" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/cat.go b/pkg/transformers/cat.go similarity index 97% rename from internal/pkg/transformers/cat.go rename to pkg/transformers/cat.go index 045b57a73..c065aa536 100644 --- a/internal/pkg/transformers/cat.go +++ b/pkg/transformers/cat.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/check.go b/pkg/transformers/check.go similarity index 97% rename from internal/pkg/transformers/check.go rename to pkg/transformers/check.go index 0301dad7d..9f3600190 100644 --- a/internal/pkg/transformers/check.go +++ b/pkg/transformers/check.go @@ -6,8 +6,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/clean_whitespace.go b/pkg/transformers/clean_whitespace.go similarity index 96% rename from internal/pkg/transformers/clean_whitespace.go rename to pkg/transformers/clean_whitespace.go index c5c9bf51c..3ffdd3862 100644 --- a/internal/pkg/transformers/clean_whitespace.go +++ b/pkg/transformers/clean_whitespace.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/bifs" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/count.go b/pkg/transformers/count.go similarity index 97% rename from internal/pkg/transformers/count.go rename to pkg/transformers/count.go index 91c73ee0d..623855ac9 100644 --- a/internal/pkg/transformers/count.go +++ b/pkg/transformers/count.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/count_similar.go b/pkg/transformers/count_similar.go similarity index 95% rename from internal/pkg/transformers/count_similar.go rename to pkg/transformers/count_similar.go index 9707216de..2fafe2d70 100644 --- a/internal/pkg/transformers/count_similar.go +++ b/pkg/transformers/count_similar.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/cut.go b/pkg/transformers/cut.go similarity index 97% rename from internal/pkg/transformers/cut.go rename to pkg/transformers/cut.go index 35c22bbcd..f3039f65a 100644 --- a/internal/pkg/transformers/cut.go +++ b/pkg/transformers/cut.go @@ -7,10 +7,10 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/decimate.go b/pkg/transformers/decimate.go similarity index 97% rename from internal/pkg/transformers/decimate.go rename to pkg/transformers/decimate.go index 9446c0502..8535c1d4b 100644 --- a/internal/pkg/transformers/decimate.go +++ b/pkg/transformers/decimate.go @@ -6,8 +6,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/doc.go b/pkg/transformers/doc.go similarity index 100% rename from internal/pkg/transformers/doc.go rename to pkg/transformers/doc.go diff --git a/internal/pkg/transformers/fill_down.go b/pkg/transformers/fill_down.go similarity index 97% rename from internal/pkg/transformers/fill_down.go rename to pkg/transformers/fill_down.go index 88b78ef9b..b9ab079dc 100644 --- a/internal/pkg/transformers/fill_down.go +++ b/pkg/transformers/fill_down.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/fill_empty.go b/pkg/transformers/fill_empty.go similarity index 95% rename from internal/pkg/transformers/fill_empty.go rename to pkg/transformers/fill_empty.go index 1b6f93193..e893ef334 100644 --- a/internal/pkg/transformers/fill_empty.go +++ b/pkg/transformers/fill_empty.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/flatten.go b/pkg/transformers/flatten.go similarity index 97% rename from internal/pkg/transformers/flatten.go rename to pkg/transformers/flatten.go index 479df8a9c..a2f4d1a97 100644 --- a/internal/pkg/transformers/flatten.go +++ b/pkg/transformers/flatten.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/format_values.go b/pkg/transformers/format_values.go similarity index 97% rename from internal/pkg/transformers/format_values.go rename to pkg/transformers/format_values.go index fc0372e83..d91041c27 100644 --- a/internal/pkg/transformers/format_values.go +++ b/pkg/transformers/format_values.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/fraction.go b/pkg/transformers/fraction.go similarity index 97% rename from internal/pkg/transformers/fraction.go rename to pkg/transformers/fraction.go index 9327ce208..fb90ab2bf 100644 --- a/internal/pkg/transformers/fraction.go +++ b/pkg/transformers/fraction.go @@ -7,11 +7,11 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/bifs" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/gap.go b/pkg/transformers/gap.go similarity index 97% rename from internal/pkg/transformers/gap.go rename to pkg/transformers/gap.go index 2a8e8536c..345780f4e 100644 --- a/internal/pkg/transformers/gap.go +++ b/pkg/transformers/gap.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/grep.go b/pkg/transformers/grep.go similarity index 97% rename from internal/pkg/transformers/grep.go rename to pkg/transformers/grep.go index 5f3f217fd..6e8c8f183 100644 --- a/internal/pkg/transformers/grep.go +++ b/pkg/transformers/grep.go @@ -7,8 +7,8 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/group_by.go b/pkg/transformers/group_by.go similarity index 96% rename from internal/pkg/transformers/group_by.go rename to pkg/transformers/group_by.go index aa51e3bd0..f210969ad 100644 --- a/internal/pkg/transformers/group_by.go +++ b/pkg/transformers/group_by.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/group_like.go b/pkg/transformers/group_like.go similarity index 95% rename from internal/pkg/transformers/group_like.go rename to pkg/transformers/group_like.go index 860ae8b80..a7ede9a59 100644 --- a/internal/pkg/transformers/group_like.go +++ b/pkg/transformers/group_like.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/gsub.go b/pkg/transformers/gsub.go similarity index 94% rename from internal/pkg/transformers/gsub.go rename to pkg/transformers/gsub.go index 550aeda5a..0b188505b 100644 --- a/internal/pkg/transformers/gsub.go +++ b/pkg/transformers/gsub.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/bifs" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/having_fields.go b/pkg/transformers/having_fields.go similarity index 98% rename from internal/pkg/transformers/having_fields.go rename to pkg/transformers/having_fields.go index 1a8442a8b..b17e6cecc 100644 --- a/internal/pkg/transformers/having_fields.go +++ b/pkg/transformers/having_fields.go @@ -7,9 +7,9 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/types" ) type tHavingFieldsCriterion int diff --git a/internal/pkg/transformers/head.go b/pkg/transformers/head.go similarity index 98% rename from internal/pkg/transformers/head.go rename to pkg/transformers/head.go index 3209db5b2..4626ddead 100644 --- a/internal/pkg/transformers/head.go +++ b/pkg/transformers/head.go @@ -6,8 +6,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/histogram.go b/pkg/transformers/histogram.go similarity index 98% rename from internal/pkg/transformers/histogram.go rename to pkg/transformers/histogram.go index 39ae3aa62..a7d6241fb 100644 --- a/internal/pkg/transformers/histogram.go +++ b/pkg/transformers/histogram.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/join.go b/pkg/transformers/join.go similarity index 98% rename from internal/pkg/transformers/join.go rename to pkg/transformers/join.go index 51c73cccb..3d8547a12 100644 --- a/internal/pkg/transformers/join.go +++ b/pkg/transformers/join.go @@ -6,12 +6,12 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/input" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/transformers/utils" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/input" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/transformers/utils" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/json_parse.go b/pkg/transformers/json_parse.go similarity index 97% rename from internal/pkg/transformers/json_parse.go rename to pkg/transformers/json_parse.go index 64904a653..1a00ccf08 100644 --- a/internal/pkg/transformers/json_parse.go +++ b/pkg/transformers/json_parse.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/json_stringify.go b/pkg/transformers/json_stringify.go similarity index 96% rename from internal/pkg/transformers/json_stringify.go rename to pkg/transformers/json_stringify.go index 71e3d4d76..ca515f0a0 100644 --- a/internal/pkg/transformers/json_stringify.go +++ b/pkg/transformers/json_stringify.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/label.go b/pkg/transformers/label.go similarity index 95% rename from internal/pkg/transformers/label.go rename to pkg/transformers/label.go index 876aeb7b0..b14be1b6d 100644 --- a/internal/pkg/transformers/label.go +++ b/pkg/transformers/label.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/latin1_to_utf8.go b/pkg/transformers/latin1_to_utf8.go similarity index 93% rename from internal/pkg/transformers/latin1_to_utf8.go rename to pkg/transformers/latin1_to_utf8.go index 0783b6279..b3dca48b5 100644 --- a/internal/pkg/transformers/latin1_to_utf8.go +++ b/pkg/transformers/latin1_to_utf8.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/merge_fields.go b/pkg/transformers/merge_fields.go similarity index 98% rename from internal/pkg/transformers/merge_fields.go rename to pkg/transformers/merge_fields.go index fe48d5e8d..de1a555c3 100644 --- a/internal/pkg/transformers/merge_fields.go +++ b/pkg/transformers/merge_fields.go @@ -7,10 +7,10 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/transformers/utils" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/transformers/utils" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/most_or_least_frequent.go b/pkg/transformers/most_or_least_frequent.go similarity index 97% rename from internal/pkg/transformers/most_or_least_frequent.go rename to pkg/transformers/most_or_least_frequent.go index 2e58999eb..1381e81d9 100644 --- a/internal/pkg/transformers/most_or_least_frequent.go +++ b/pkg/transformers/most_or_least_frequent.go @@ -7,10 +7,10 @@ import ( "sort" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/nest.go b/pkg/transformers/nest.go similarity index 98% rename from internal/pkg/transformers/nest.go rename to pkg/transformers/nest.go index c8e98d486..bced869df 100644 --- a/internal/pkg/transformers/nest.go +++ b/pkg/transformers/nest.go @@ -9,10 +9,10 @@ import ( "strconv" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/nothing.go b/pkg/transformers/nothing.go similarity index 96% rename from internal/pkg/transformers/nothing.go rename to pkg/transformers/nothing.go index 3652ad83b..9c5b72f1a 100644 --- a/internal/pkg/transformers/nothing.go +++ b/pkg/transformers/nothing.go @@ -6,8 +6,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/put_or_filter.go b/pkg/transformers/put_or_filter.go similarity index 97% rename from internal/pkg/transformers/put_or_filter.go rename to pkg/transformers/put_or_filter.go index 6a3b35341..1437c8b15 100644 --- a/internal/pkg/transformers/put_or_filter.go +++ b/pkg/transformers/put_or_filter.go @@ -6,13 +6,13 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/dsl" - "github.com/johnkerl/miller/internal/pkg/dsl/cst" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/runtime" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/pkg/dsl/cst" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/regularize.go b/pkg/transformers/regularize.go similarity index 94% rename from internal/pkg/transformers/regularize.go rename to pkg/transformers/regularize.go index 9b6f98882..c75d261f0 100644 --- a/internal/pkg/transformers/regularize.go +++ b/pkg/transformers/regularize.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/remove_empty_columns.go b/pkg/transformers/remove_empty_columns.go similarity index 96% rename from internal/pkg/transformers/remove_empty_columns.go rename to pkg/transformers/remove_empty_columns.go index 1baf22a8e..ce9b5a5dc 100644 --- a/internal/pkg/transformers/remove_empty_columns.go +++ b/pkg/transformers/remove_empty_columns.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/rename.go b/pkg/transformers/rename.go similarity index 98% rename from internal/pkg/transformers/rename.go rename to pkg/transformers/rename.go index 1878b86d1..e5f0658b8 100644 --- a/internal/pkg/transformers/rename.go +++ b/pkg/transformers/rename.go @@ -7,9 +7,9 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/reorder.go b/pkg/transformers/reorder.go similarity index 97% rename from internal/pkg/transformers/reorder.go rename to pkg/transformers/reorder.go index 26e1b226b..216dd714d 100644 --- a/internal/pkg/transformers/reorder.go +++ b/pkg/transformers/reorder.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/repeat.go b/pkg/transformers/repeat.go similarity index 98% rename from internal/pkg/transformers/repeat.go rename to pkg/transformers/repeat.go index a4021d4da..eab1725f4 100644 --- a/internal/pkg/transformers/repeat.go +++ b/pkg/transformers/repeat.go @@ -6,8 +6,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/types" ) type tRepeatCountSource int diff --git a/internal/pkg/transformers/reshape.go b/pkg/transformers/reshape.go similarity index 98% rename from internal/pkg/transformers/reshape.go rename to pkg/transformers/reshape.go index f82c16dab..4c0cffc07 100644 --- a/internal/pkg/transformers/reshape.go +++ b/pkg/transformers/reshape.go @@ -34,10 +34,10 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/sample.go b/pkg/transformers/sample.go similarity index 97% rename from internal/pkg/transformers/sample.go rename to pkg/transformers/sample.go index 62482c4b8..b1eef576b 100644 --- a/internal/pkg/transformers/sample.go +++ b/pkg/transformers/sample.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/sec2gmt.go b/pkg/transformers/sec2gmt.go similarity index 95% rename from internal/pkg/transformers/sec2gmt.go rename to pkg/transformers/sec2gmt.go index 4aa34bbd8..0dfbbe5df 100644 --- a/internal/pkg/transformers/sec2gmt.go +++ b/pkg/transformers/sec2gmt.go @@ -5,10 +5,10 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/sec2gmtdate.go b/pkg/transformers/sec2gmtdate.go similarity index 94% rename from internal/pkg/transformers/sec2gmtdate.go rename to pkg/transformers/sec2gmtdate.go index dc1aca7e0..d09defffa 100644 --- a/internal/pkg/transformers/sec2gmtdate.go +++ b/pkg/transformers/sec2gmtdate.go @@ -5,10 +5,10 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/internal/pkg/bifs" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/seqgen.go b/pkg/transformers/seqgen.go similarity index 96% rename from internal/pkg/transformers/seqgen.go rename to pkg/transformers/seqgen.go index 409596aa2..2d1c3b188 100644 --- a/internal/pkg/transformers/seqgen.go +++ b/pkg/transformers/seqgen.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/bifs" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/shuffle.go b/pkg/transformers/shuffle.go similarity index 96% rename from internal/pkg/transformers/shuffle.go rename to pkg/transformers/shuffle.go index 8a7cfc46a..7aad23641 100644 --- a/internal/pkg/transformers/shuffle.go +++ b/pkg/transformers/shuffle.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/skip_trivial_records.go b/pkg/transformers/skip_trivial_records.go similarity index 96% rename from internal/pkg/transformers/skip_trivial_records.go rename to pkg/transformers/skip_trivial_records.go index 85c5bac21..91c8bd242 100644 --- a/internal/pkg/transformers/skip_trivial_records.go +++ b/pkg/transformers/skip_trivial_records.go @@ -6,8 +6,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/sort.go b/pkg/transformers/sort.go similarity index 98% rename from internal/pkg/transformers/sort.go rename to pkg/transformers/sort.go index 429314ddd..d28009ae4 100644 --- a/internal/pkg/transformers/sort.go +++ b/pkg/transformers/sort.go @@ -48,10 +48,10 @@ import ( "sort" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/sort_within_records.go b/pkg/transformers/sort_within_records.go similarity index 97% rename from internal/pkg/transformers/sort_within_records.go rename to pkg/transformers/sort_within_records.go index d2af984ca..398ca5372 100644 --- a/internal/pkg/transformers/sort_within_records.go +++ b/pkg/transformers/sort_within_records.go @@ -6,8 +6,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/split.go b/pkg/transformers/split.go similarity index 98% rename from internal/pkg/transformers/split.go rename to pkg/transformers/split.go index 50d415918..7834931e9 100644 --- a/internal/pkg/transformers/split.go +++ b/pkg/transformers/split.go @@ -7,10 +7,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/output" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/output" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/ssub.go b/pkg/transformers/ssub.go similarity index 94% rename from internal/pkg/transformers/ssub.go rename to pkg/transformers/ssub.go index bd8e54247..a31864711 100644 --- a/internal/pkg/transformers/ssub.go +++ b/pkg/transformers/ssub.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/bifs" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/stats1.go b/pkg/transformers/stats1.go similarity index 98% rename from internal/pkg/transformers/stats1.go rename to pkg/transformers/stats1.go index ade693989..b58129691 100644 --- a/internal/pkg/transformers/stats1.go +++ b/pkg/transformers/stats1.go @@ -8,11 +8,11 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/transformers/utils" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/transformers/utils" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/stats2.go b/pkg/transformers/stats2.go similarity index 98% rename from internal/pkg/transformers/stats2.go rename to pkg/transformers/stats2.go index c8f163911..a30141021 100644 --- a/internal/pkg/transformers/stats2.go +++ b/pkg/transformers/stats2.go @@ -6,11 +6,11 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/transformers/utils" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/transformers/utils" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/step.go b/pkg/transformers/step.go similarity index 99% rename from internal/pkg/transformers/step.go rename to pkg/transformers/step.go index b40aaeed6..56539cb90 100644 --- a/internal/pkg/transformers/step.go +++ b/pkg/transformers/step.go @@ -73,12 +73,12 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/bifs" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/transformers/utils" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/transformers/utils" + "github.com/johnkerl/miller/pkg/types" ) // For EWMA diff --git a/internal/pkg/transformers/sub.go b/pkg/transformers/sub.go similarity index 94% rename from internal/pkg/transformers/sub.go rename to pkg/transformers/sub.go index eee778362..1c96b45fc 100644 --- a/internal/pkg/transformers/sub.go +++ b/pkg/transformers/sub.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/bifs" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/summary.go b/pkg/transformers/summary.go similarity index 98% rename from internal/pkg/transformers/summary.go rename to pkg/transformers/summary.go index 6b6134c8b..1b1db3df4 100644 --- a/internal/pkg/transformers/summary.go +++ b/pkg/transformers/summary.go @@ -6,11 +6,11 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/transformers/utils" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/transformers/utils" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/tac.go b/pkg/transformers/tac.go similarity index 96% rename from internal/pkg/transformers/tac.go rename to pkg/transformers/tac.go index 5075a3efb..83ccd6876 100644 --- a/internal/pkg/transformers/tac.go +++ b/pkg/transformers/tac.go @@ -6,8 +6,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/tail.go b/pkg/transformers/tail.go similarity index 96% rename from internal/pkg/transformers/tail.go rename to pkg/transformers/tail.go index 0081ae51d..dcba44b67 100644 --- a/internal/pkg/transformers/tail.go +++ b/pkg/transformers/tail.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/tee.go b/pkg/transformers/tee.go similarity index 97% rename from internal/pkg/transformers/tee.go rename to pkg/transformers/tee.go index 04bd65648..9e944df17 100644 --- a/internal/pkg/transformers/tee.go +++ b/pkg/transformers/tee.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/output" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/output" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/template.go b/pkg/transformers/template.go similarity index 95% rename from internal/pkg/transformers/template.go rename to pkg/transformers/template.go index 152f0878e..392f96377 100644 --- a/internal/pkg/transformers/template.go +++ b/pkg/transformers/template.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/top.go b/pkg/transformers/top.go similarity index 97% rename from internal/pkg/transformers/top.go rename to pkg/transformers/top.go index 3cdb1dc57..70119731b 100644 --- a/internal/pkg/transformers/top.go +++ b/pkg/transformers/top.go @@ -6,11 +6,11 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/transformers/utils" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/transformers/utils" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/unflatten.go b/pkg/transformers/unflatten.go similarity index 97% rename from internal/pkg/transformers/unflatten.go rename to pkg/transformers/unflatten.go index af0a3a904..d1e02a52c 100644 --- a/internal/pkg/transformers/unflatten.go +++ b/pkg/transformers/unflatten.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/uniq.go b/pkg/transformers/uniq.go similarity index 98% rename from internal/pkg/transformers/uniq.go rename to pkg/transformers/uniq.go index a3dd68ef4..f28e6c854 100644 --- a/internal/pkg/transformers/uniq.go +++ b/pkg/transformers/uniq.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/unspace.go b/pkg/transformers/unspace.go similarity index 97% rename from internal/pkg/transformers/unspace.go rename to pkg/transformers/unspace.go index 20e2b3365..eb6253025 100644 --- a/internal/pkg/transformers/unspace.go +++ b/pkg/transformers/unspace.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/unsparsify.go b/pkg/transformers/unsparsify.go similarity index 96% rename from internal/pkg/transformers/unsparsify.go rename to pkg/transformers/unsparsify.go index 72e6c8626..467b83dac 100644 --- a/internal/pkg/transformers/unsparsify.go +++ b/pkg/transformers/unsparsify.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/utf8_to_latin1.go b/pkg/transformers/utf8_to_latin1.go similarity index 93% rename from internal/pkg/transformers/utf8_to_latin1.go rename to pkg/transformers/utf8_to_latin1.go index e3f09210d..fb658562a 100644 --- a/internal/pkg/transformers/utf8_to_latin1.go +++ b/pkg/transformers/utf8_to_latin1.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/utils/README.md b/pkg/transformers/utils/README.md similarity index 100% rename from internal/pkg/transformers/utils/README.md rename to pkg/transformers/utils/README.md diff --git a/internal/pkg/transformers/utils/doc.go b/pkg/transformers/utils/doc.go similarity index 100% rename from internal/pkg/transformers/utils/doc.go rename to pkg/transformers/utils/doc.go diff --git a/internal/pkg/transformers/utils/join_bucket.go b/pkg/transformers/utils/join_bucket.go similarity index 92% rename from internal/pkg/transformers/utils/join_bucket.go rename to pkg/transformers/utils/join_bucket.go index 92b169541..4e26e2538 100644 --- a/internal/pkg/transformers/utils/join_bucket.go +++ b/pkg/transformers/utils/join_bucket.go @@ -7,7 +7,7 @@ package utils import ( "container/list" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/mlrval" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/utils/join_bucket_keeper.go b/pkg/transformers/utils/join_bucket_keeper.go similarity index 98% rename from internal/pkg/transformers/utils/join_bucket_keeper.go rename to pkg/transformers/utils/join_bucket_keeper.go index 6f0e307b9..df218cc3d 100644 --- a/internal/pkg/transformers/utils/join_bucket_keeper.go +++ b/pkg/transformers/utils/join_bucket_keeper.go @@ -113,11 +113,11 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/cli" - "github.com/johnkerl/miller/internal/pkg/input" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/input" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/utils/percentile_keeper.go b/pkg/transformers/utils/percentile_keeper.go similarity index 97% rename from internal/pkg/transformers/utils/percentile_keeper.go rename to pkg/transformers/utils/percentile_keeper.go index b89f8831b..0aebdc709 100644 --- a/internal/pkg/transformers/utils/percentile_keeper.go +++ b/pkg/transformers/utils/percentile_keeper.go @@ -8,8 +8,8 @@ import ( "fmt" "sort" - "github.com/johnkerl/miller/internal/pkg/bifs" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/mlrval" ) type PercentileKeeper struct { diff --git a/internal/pkg/transformers/utils/stats1_accumulators.go b/pkg/transformers/utils/stats1_accumulators.go similarity index 99% rename from internal/pkg/transformers/utils/stats1_accumulators.go rename to pkg/transformers/utils/stats1_accumulators.go index c984ed922..02756a9a3 100644 --- a/internal/pkg/transformers/utils/stats1_accumulators.go +++ b/pkg/transformers/utils/stats1_accumulators.go @@ -9,9 +9,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/internal/pkg/bifs" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/utils/stats2_accumulators.go b/pkg/transformers/utils/stats2_accumulators.go similarity index 99% rename from internal/pkg/transformers/utils/stats2_accumulators.go rename to pkg/transformers/utils/stats2_accumulators.go index 05274b21f..84fcdc030 100644 --- a/internal/pkg/transformers/utils/stats2_accumulators.go +++ b/pkg/transformers/utils/stats2_accumulators.go @@ -9,8 +9,8 @@ import ( "math" "os" - "github.com/johnkerl/miller/internal/pkg/lib" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/utils/top_keeper.go b/pkg/transformers/utils/top_keeper.go similarity index 96% rename from internal/pkg/transformers/utils/top_keeper.go rename to pkg/transformers/utils/top_keeper.go index 1a536e5cd..3a3ce9e4e 100644 --- a/internal/pkg/transformers/utils/top_keeper.go +++ b/pkg/transformers/utils/top_keeper.go @@ -5,8 +5,8 @@ package utils import ( - "github.com/johnkerl/miller/internal/pkg/mlrval" - "github.com/johnkerl/miller/internal/pkg/types" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/transformers/utils/window_keeper.go b/pkg/transformers/utils/window_keeper.go similarity index 97% rename from internal/pkg/transformers/utils/window_keeper.go rename to pkg/transformers/utils/window_keeper.go index 7c213ad71..2de875020 100644 --- a/internal/pkg/transformers/utils/window_keeper.go +++ b/pkg/transformers/utils/window_keeper.go @@ -1,7 +1,7 @@ package utils import ( - "github.com/johnkerl/miller/internal/pkg/lib" + "github.com/johnkerl/miller/pkg/lib" ) // WindowKeeper is a sliding-window container, nominally for use by mlr step, diff --git a/internal/pkg/transformers/utils/window_keeper_test.go b/pkg/transformers/utils/window_keeper_test.go similarity index 100% rename from internal/pkg/transformers/utils/window_keeper_test.go rename to pkg/transformers/utils/window_keeper_test.go diff --git a/internal/pkg/types/README.md b/pkg/types/README.md similarity index 100% rename from internal/pkg/types/README.md rename to pkg/types/README.md diff --git a/internal/pkg/types/context.go b/pkg/types/context.go similarity index 98% rename from internal/pkg/types/context.go rename to pkg/types/context.go index a0771efa3..08ba3cbb6 100644 --- a/internal/pkg/types/context.go +++ b/pkg/types/context.go @@ -5,7 +5,7 @@ import ( "container/list" "strconv" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/mlrval" ) // Since Go is concurrent, the context struct (AWK-like variables such as diff --git a/internal/pkg/types/doc.go b/pkg/types/doc.go similarity index 100% rename from internal/pkg/types/doc.go rename to pkg/types/doc.go diff --git a/internal/pkg/types/indexed-lvalues.md b/pkg/types/indexed-lvalues.md similarity index 100% rename from internal/pkg/types/indexed-lvalues.md rename to pkg/types/indexed-lvalues.md diff --git a/internal/pkg/types/mlrval_typing.go b/pkg/types/mlrval_typing.go similarity index 97% rename from internal/pkg/types/mlrval_typing.go rename to pkg/types/mlrval_typing.go index 85ad30d48..e3c68b5f8 100644 --- a/internal/pkg/types/mlrval_typing.go +++ b/pkg/types/mlrval_typing.go @@ -8,7 +8,7 @@ package types import ( "fmt" - "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/pkg/mlrval" ) // ---------------------------------------------------------------- diff --git a/internal/pkg/version/doc.go b/pkg/version/doc.go similarity index 100% rename from internal/pkg/version/doc.go rename to pkg/version/doc.go diff --git a/internal/pkg/version/version.go b/pkg/version/version.go similarity index 100% rename from internal/pkg/version/version.go rename to pkg/version/version.go diff --git a/regression_test.go b/regression_test.go index 1e819f2f8..5657d17bc 100644 --- a/regression_test.go +++ b/regression_test.go @@ -5,13 +5,13 @@ import ( "os" "testing" - "github.com/johnkerl/miller/internal/pkg/terminals/regtest" + "github.com/johnkerl/miller/pkg/terminals/regtest" ) // TestRegression is a familiar entry point for regression testing. Miller // regression tests are more flexibly invoked via 'mlr regtest'. However here // is a standard location so people can get at them via 'go test'. Please see -// (as of this writing) internal/pkg/terminals/regtest for the Miller regtest package. +// (as of this writing) pkg/terminals/regtest for the Miller regtest package. func TestRegression(t *testing.T) { // How much detail to show? There are thousands of cases, organized into a // few hundred top-level directories under ./test/cases. diff --git a/scripts/mcountlines b/scripts/mcountlines index 3c16c481f..7a9cfd90f 100755 --- a/scripts/mcountlines +++ b/scripts/mcountlines @@ -1,14 +1,14 @@ #!/bin/bash wc -l \ - $(find internal -name \*.go | grep -v internal/pkg/parsing) \ - internal/pkg/parsing/mlr.bnf \ + $(find -name \*.go | grep -v pkg/parsing) \ + pkg/parsing/mlr.bnf \ | sort -n echo wc -c \ - $(find internal -name \*.go | grep -v internal/pkg/parsing) \ - internal/pkg/parsing/mlr.bnf \ + $(find -name \*.go | grep -v pkg/parsing) \ + pkg/parsing/mlr.bnf \ | sort -n \ | tail -n 5 diff --git a/todo.txt b/todo.txt index eea98acf3..5507dd73e 100644 --- a/todo.txt +++ b/todo.txt @@ -280,7 +280,7 @@ DOC w contact re https://jsonlines.org/on_the_web/ * verslink old relnotes * single UT, hard to invoke w/ new full go.mod path - go test $(ls internal/pkg/lib/*.go|grep -v test) internal/pkg/lib/unbackslash_test.go + go test $(ls pkg/lib/*.go|grep -v test) pkg/lib/unbackslash_test.go etc * file-formats: NIDX link to headerless CSV * window.mlr, window2.mlr -> doc somewhere diff --git a/tools/build-dsl b/tools/build-dsl index 90854230f..e2a6186d2 100755 --- a/tools/build-dsl +++ b/tools/build-dsl @@ -35,26 +35,26 @@ if [ ! -x "$bingocc" ]; then exit 1 fi -rm -f internal/pkg/parsing/*.txt +rm -f pkg/parsing/*.txt if [ "$verbose" = "true" ]; then - lr1="internal/pkg/parsing/LR1_conflicts.txt" - $bingocc -v -o ./internal/pkg/parsing -p mlr/internal/pkg/parsing internal/pkg/parsing/mlr.bnf || expand -2 $lr1 + lr1="pkg/parsing/LR1_conflicts.txt" + $bingocc -v -o ./pkg/parsing -p mlr/pkg/parsing pkg/parsing/mlr.bnf || expand -2 $lr1 else - $bingocc -o ./internal/pkg/parsing -p mlr/internal/pkg/parsing internal/pkg/parsing/mlr.bnf + $bingocc -o ./pkg/parsing -p mlr/pkg/parsing pkg/parsing/mlr.bnf fi # Code-gen directories: -# internal/pkg/parsing/errors/ -# internal/pkg/parsing/lexer/ -# internal/pkg/parsing/parser/ -# internal/pkg/parsing/token/ -# internal/pkg/parsing/util/ +# pkg/parsing/errors/ +# pkg/parsing/lexer/ +# pkg/parsing/parser/ +# pkg/parsing/token/ +# pkg/parsing/util/ # Override GOCC codegen with customized error handling -cp internal/pkg/parsing/errors.go.template internal/pkg/parsing/errors/errors.go +cp pkg/parsing/errors.go.template pkg/parsing/errors/errors.go # We might need a manual replace of os.ReadFile by ioutil.ReadFile in autogen code. Note we don't # use latest-and-greatest Go compiler version in our go.mod since we want to build on Centos which # can be trailing-edge in that regard. -for x in internal/pkg/parsing/*/*.go; do gofmt -w $x; done +for x in pkg/parsing/*/*.go; do gofmt -w $x; done From 03eed305f9c676441ddd8069ad13200a52311817 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 10 Sep 2023 17:23:50 -0400 Subject: [PATCH 065/456] doc tweaks --- docs/src/miller-as-library.md | 5 +++++ docs/src/miller-as-library.md.in | 5 +++++ docs/src/release-docs.md | 1 + docs/src/release-docs.md.in | 1 + 4 files changed, 12 insertions(+) diff --git a/docs/src/miller-as-library.md b/docs/src/miller-as-library.md index 49c381138..cf2149165 100644 --- a/docs/src/miller-as-library.md +++ b/docs/src/miller-as-library.md @@ -19,6 +19,11 @@ Quick links: Very initially and experimentally, as of Miller 6.9.1, Go developers will be able to access Miller source code --- moved from `internal/pkg/` to `pkg/` --- within their own Go projects. +Caveat emptor: Miller's backward-compatibility guarantees are at the CLI level; API is not guaranteed stable. +For this reason, please be careful with your version pins. + +I'm happy to discuss this new area further at the [discussions page](https://github.com/johnkerl/miller/discussions). + ## Setup ``` diff --git a/docs/src/miller-as-library.md.in b/docs/src/miller-as-library.md.in index b7051165b..f5bda1b00 100644 --- a/docs/src/miller-as-library.md.in +++ b/docs/src/miller-as-library.md.in @@ -3,6 +3,11 @@ Very initially and experimentally, as of Miller 6.9.1, Go developers will be able to access Miller source code --- moved from `internal/pkg/` to `pkg/` --- within their own Go projects. +Caveat emptor: Miller's backward-compatibility guarantees are at the CLI level; API is not guaranteed stable. +For this reason, please be careful with your version pins. + +I'm happy to discuss this new area further at the [discussions page](https://github.com/johnkerl/miller/discussions). + ## Setup ``` diff --git a/docs/src/release-docs.md b/docs/src/release-docs.md index 868e25337..4b9380d72 100644 --- a/docs/src/release-docs.md +++ b/docs/src/release-docs.md @@ -23,6 +23,7 @@ If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, witho | Release | Docs | Release notes | |---------|---------------------------------------------------------------------|---------------| +main | [main branch](https://miller.readthedocs.io/en/main) | N/A | 6.8.0 | [Miller 6.8.0](https://miller.readthedocs.io/en/6.8.0) | [New case verb, index DSL function, and more](https://github.com/johnkerl/miller/releases/tag/v6.8.0) | 6.7.0 | [Miller 6.7.0](https://miller.readthedocs.io/en/6.7.0) | [New leftpad/rightpad DSL functions, unspace verb, and more](https://github.com/johnkerl/miller/releases/tag/v6.7.0) | 6.6.0 | [Miller 6.6.0](https://miller.readthedocs.io/en/6.6.0) | [Bugfixes and unspace verb](https://github.com/johnkerl/miller/releases/tag/v6.6.0) | diff --git a/docs/src/release-docs.md.in b/docs/src/release-docs.md.in index b98058192..41bd54025 100644 --- a/docs/src/release-docs.md.in +++ b/docs/src/release-docs.md.in @@ -7,6 +7,7 @@ If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, witho | Release | Docs | Release notes | |---------|---------------------------------------------------------------------|---------------| +main | [main branch](https://miller.readthedocs.io/en/main) | N/A | 6.8.0 | [Miller 6.8.0](https://miller.readthedocs.io/en/6.8.0) | [New case verb, index DSL function, and more](https://github.com/johnkerl/miller/releases/tag/v6.8.0) | 6.7.0 | [Miller 6.7.0](https://miller.readthedocs.io/en/6.7.0) | [New leftpad/rightpad DSL functions, unspace verb, and more](https://github.com/johnkerl/miller/releases/tag/v6.7.0) | 6.6.0 | [Miller 6.6.0](https://miller.readthedocs.io/en/6.6.0) | [Bugfixes and unspace verb](https://github.com/johnkerl/miller/releases/tag/v6.6.0) | From 39fa3a19bc15ff98487191fb69ac64b84e75d46e Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 10 Sep 2023 19:47:42 -0400 Subject: [PATCH 066/456] Better API example (#1392) --- docs/src/miller-as-library.md | 118 ++++++++++++++++------------ docs/src/miller-as-library.md.in | 11 ++- docs/src/miller-as-library/main2.go | 107 +++++++++++++++---------- docs/src/miller-as-library/main3.go | 111 ++++++++++++++++++++++++++ 4 files changed, 249 insertions(+), 98 deletions(-) create mode 100644 docs/src/miller-as-library/main3.go diff --git a/docs/src/miller-as-library.md b/docs/src/miller-as-library.md index cf2149165..3b337852f 100644 --- a/docs/src/miller-as-library.md +++ b/docs/src/miller-as-library.md @@ -77,24 +77,41 @@ $ go run main1.go ## Another example use
+// This is an example of using Miller as a library.
 package main
 
 import (
 	"bufio"
 	"container/list"
-	"errors"
 	"fmt"
 	"os"
 
+	"github.com/johnkerl/miller/pkg/bifs"
 	"github.com/johnkerl/miller/pkg/cli"
 	"github.com/johnkerl/miller/pkg/input"
 	"github.com/johnkerl/miller/pkg/output"
-	"github.com/johnkerl/miller/pkg/transformers"
 	"github.com/johnkerl/miller/pkg/types"
 )
 
-func convert_csv_to_json(fileNames []string) error {
-	options := &cli.TOptions{
+// Put your record-processing logic here.
+func custom_record_processor(irac *types.RecordAndContext) (*types.RecordAndContext, error) {
+	irec := irac.Record
+
+	v := irec.Get("i")
+	if v == nil {
+		return nil, fmt.Errorf("did not find key \"i\" at filename %s record number %d",
+			irac.Context.FILENAME, irac.Context.FNR,
+		)
+	}
+	v2 := bifs.BIF_times(v, v)
+	irec.PutReference("i2", v2)
+
+	return irac, nil
+}
+
+// Put your various options here.
+func custom_options() *cli.TOptions {
+	return &cli.TOptions{
 		ReaderOptions: cli.TReaderOptions{
 			InputFileFormat: "csv",
 			IFS:             ",",
@@ -105,6 +122,14 @@ func convert_csv_to_json(fileNames []string) error {
 			OutputFileFormat: "json",
 		},
 	}
+}
+
+// This function you don't need to modify.
+func convert_csv_to_json(
+	fileNames []string,
+	options *cli.TOptions,
+	record_processor func (irac *types.RecordAndContext) (*types.RecordAndContext, error),
+) error {
 	outputStream := os.Stdout
 	outputIsStdout := true
 
@@ -120,60 +145,55 @@ func convert_csv_to_json(fileNames []string) error {
 		return err
 	}
 
+	// Set up the channels for the record-reader.
+	readerChannel := make(chan *list.List, 2) // list of *types.RecordAndContext
+	inputErrorChannel := make(chan error, 1)
+	// Not needed in this example
+	readerDownstreamDoneChannel := make(chan bool, 1)
+
 	// Instantiate the record-writer
 	recordWriter, err := output.Create(&options.WriterOptions)
 	if err != nil {
 		return err
 	}
-
-	cat, err := transformers.NewTransformerCat(
-		false, // doCounters bool,
-		"",    // counterFieldName string,
-		nil,   // groupByFieldNames []string,
-		false, // doFileName bool,
-		false, // doFileNum bool,
-	)
-	if err != nil {
-		return err
-	}
-	recordTransformers := []transformers.IRecordTransformer{cat}
-
-	// Set up the reader-to-transformer and transformer-to-writer channels.
-	readerChannel := make(chan *list.List, 2) // list of *types.RecordAndContext
-	writerChannel := make(chan *list.List, 1) // list of *types.RecordAndContext
-
-	// We're done when a fatal error is registered on input (file not found,
-	// etc) or when the record-writer has written all its output. We use
-	// channels to communicate both of these conditions.
-	inputErrorChannel := make(chan error, 1)
-	doneWritingChannel := make(chan bool, 1)
-	dataProcessingErrorChannel := make(chan bool, 1)
-
-	readerDownstreamDoneChannel := make(chan bool, 1)
-
-	// Start the reader, transformer, and writer. Let them run until fatal input
-	// error or end-of-processing happens.
 	bufferedOutputStream := bufio.NewWriter(outputStream)
 
-	go recordReader.Read(fileNames, *initialContext, readerChannel, inputErrorChannel, readerDownstreamDoneChannel)
-	go transformers.ChainTransformer(readerChannel, readerDownstreamDoneChannel, recordTransformers,
-		writerChannel, options)
-	go output.ChannelWriter(writerChannel, recordWriter, &options.WriterOptions, doneWritingChannel,
-		dataProcessingErrorChannel, bufferedOutputStream, outputIsStdout)
+	// Start the record-reader.
+	go recordReader.Read(
+		fileNames, *initialContext, readerChannel, inputErrorChannel, readerDownstreamDoneChannel)
 
+	// Loop through the record stream.
 	var retval error
 	done := false
 	for !done {
 		select {
+
 		case ierr := <-inputErrorChannel:
 			retval = ierr
 			break
-		case _ = <-dataProcessingErrorChannel:
-			retval = errors.New("exiting due to data error") // details already printed
-			break
-		case _ = <-doneWritingChannel:
-			done = true
+
+		case iracs := <-readerChannel:
+			// Handle the record batch
+			for e := iracs.Front(); e != nil; e = e.Next() {
+				irac := e.Value.(*types.RecordAndContext)
+				if irac.Record != nil {
+					orac, err := record_processor(irac)
+					if err != nil {
+						retval = err
+						done = true
+						break
+					}
+					recordWriter.Write(orac.Record, bufferedOutputStream, outputIsStdout)
+				}
+				if irac.OutputString != "" {
+					fmt.Fprintln(bufferedOutputStream, irac.OutputString)
+				}
+				if irac.EndOfStream {
+					done = true
+				}
+			}
 			break
+
 		}
 	}
 
@@ -183,7 +203,8 @@ func convert_csv_to_json(fileNames []string) error {
 }
 
 func main() {
-	err := convert_csv_to_json(os.Args[1:])
+	options := custom_options()
+	err := convert_csv_to_json(os.Args[1:], options, custom_record_processor)
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "%v\n", err)
 	}
@@ -198,10 +219,9 @@ nadir.west.our.org,down
 
 ```
 $ go build main2.go
-$ ./main2 data/hostnames.csv
-{"host": "apoapsis.east.our.org", "status": "up"}
-{"host": "nadir.west.our.org", "status": "down"}
+{"a": "pan", "b": "pan", "i": 1, "x": 0.3467901443380824, "y": 0.7268028627434533, "i2": 1}
+{"a": "eks", "b": "pan", "i": 2, "x": 0.7586799647899636, "y": 0.5221511083334797, "i2": 4}
+{"a": "wye", "b": "wye", "i": 3, "x": 0.20460330576630303, "y": 0.33831852551664776, "i2": 9}
+{"a": "eks", "b": "wye", "i": 4, "x": 0.38139939387114097, "y": 0.13418874328430463, "i2": 16}
+{"a": "wye", "b": "pan", "i": 5, "x": 0.5732889198020006, "y": 0.8636244699032729, "i2": 25}$ ./main2 data/small.csv
 ```
-
-
-
diff --git a/docs/src/miller-as-library.md.in b/docs/src/miller-as-library.md.in
index f5bda1b00..ba950f0cb 100644
--- a/docs/src/miller-as-library.md.in
+++ b/docs/src/miller-as-library.md.in
@@ -50,10 +50,9 @@ GENMD-INCLUDE-ESCAPED(data/hostnames.csv)
 
 ```
 $ go build main2.go
-$ ./main2 data/hostnames.csv
-{"host": "apoapsis.east.our.org", "status": "up"}
-{"host": "nadir.west.our.org", "status": "down"}
+{"a": "pan", "b": "pan", "i": 1, "x": 0.3467901443380824, "y": 0.7268028627434533, "i2": 1}
+{"a": "eks", "b": "pan", "i": 2, "x": 0.7586799647899636, "y": 0.5221511083334797, "i2": 4}
+{"a": "wye", "b": "wye", "i": 3, "x": 0.20460330576630303, "y": 0.33831852551664776, "i2": 9}
+{"a": "eks", "b": "wye", "i": 4, "x": 0.38139939387114097, "y": 0.13418874328430463, "i2": 16}
+{"a": "wye", "b": "pan", "i": 5, "x": 0.5732889198020006, "y": 0.8636244699032729, "i2": 25}$ ./main2 data/small.csv
 ```
-
-
-
diff --git a/docs/src/miller-as-library/main2.go b/docs/src/miller-as-library/main2.go
index 07d4be50e..e4be222d5 100644
--- a/docs/src/miller-as-library/main2.go
+++ b/docs/src/miller-as-library/main2.go
@@ -1,21 +1,38 @@
+// This is an example of using Miller as a library.
 package main
 
 import (
 	"bufio"
 	"container/list"
-	"errors"
 	"fmt"
 	"os"
 
+	"github.com/johnkerl/miller/pkg/bifs"
 	"github.com/johnkerl/miller/pkg/cli"
 	"github.com/johnkerl/miller/pkg/input"
 	"github.com/johnkerl/miller/pkg/output"
-	"github.com/johnkerl/miller/pkg/transformers"
 	"github.com/johnkerl/miller/pkg/types"
 )
 
-func convert_csv_to_json(fileNames []string) error {
-	options := &cli.TOptions{
+// Put your record-processing logic here.
+func custom_record_processor(irac *types.RecordAndContext) (*types.RecordAndContext, error) {
+	irec := irac.Record
+
+	v := irec.Get("i")
+	if v == nil {
+		return nil, fmt.Errorf("did not find key \"i\" at filename %s record number %d",
+			irac.Context.FILENAME, irac.Context.FNR,
+		)
+	}
+	v2 := bifs.BIF_times(v, v)
+	irec.PutReference("i2", v2)
+
+	return irac, nil
+}
+
+// Put your various options here.
+func custom_options() *cli.TOptions {
+	return &cli.TOptions{
 		ReaderOptions: cli.TReaderOptions{
 			InputFileFormat: "csv",
 			IFS:             ",",
@@ -26,6 +43,14 @@ func convert_csv_to_json(fileNames []string) error {
 			OutputFileFormat: "json",
 		},
 	}
+}
+
+// This function you don't need to modify.
+func convert_csv_to_json(
+	fileNames []string,
+	options *cli.TOptions,
+	record_processor func (irac *types.RecordAndContext) (*types.RecordAndContext, error),
+) error {
 	outputStream := os.Stdout
 	outputIsStdout := true
 
@@ -41,60 +66,55 @@ func convert_csv_to_json(fileNames []string) error {
 		return err
 	}
 
+	// Set up the channels for the record-reader.
+	readerChannel := make(chan *list.List, 2) // list of *types.RecordAndContext
+	inputErrorChannel := make(chan error, 1)
+	// Not needed in this example
+	readerDownstreamDoneChannel := make(chan bool, 1)
+
 	// Instantiate the record-writer
 	recordWriter, err := output.Create(&options.WriterOptions)
 	if err != nil {
 		return err
 	}
-
-	cat, err := transformers.NewTransformerCat(
-		false, // doCounters bool,
-		"",    // counterFieldName string,
-		nil,   // groupByFieldNames []string,
-		false, // doFileName bool,
-		false, // doFileNum bool,
-	)
-	if err != nil {
-		return err
-	}
-	recordTransformers := []transformers.IRecordTransformer{cat}
-
-	// Set up the reader-to-transformer and transformer-to-writer channels.
-	readerChannel := make(chan *list.List, 2) // list of *types.RecordAndContext
-	writerChannel := make(chan *list.List, 1) // list of *types.RecordAndContext
-
-	// We're done when a fatal error is registered on input (file not found,
-	// etc) or when the record-writer has written all its output. We use
-	// channels to communicate both of these conditions.
-	inputErrorChannel := make(chan error, 1)
-	doneWritingChannel := make(chan bool, 1)
-	dataProcessingErrorChannel := make(chan bool, 1)
-
-	readerDownstreamDoneChannel := make(chan bool, 1)
-
-	// Start the reader, transformer, and writer. Let them run until fatal input
-	// error or end-of-processing happens.
 	bufferedOutputStream := bufio.NewWriter(outputStream)
 
-	go recordReader.Read(fileNames, *initialContext, readerChannel, inputErrorChannel, readerDownstreamDoneChannel)
-	go transformers.ChainTransformer(readerChannel, readerDownstreamDoneChannel, recordTransformers,
-		writerChannel, options)
-	go output.ChannelWriter(writerChannel, recordWriter, &options.WriterOptions, doneWritingChannel,
-		dataProcessingErrorChannel, bufferedOutputStream, outputIsStdout)
+	// Start the record-reader.
+	go recordReader.Read(
+		fileNames, *initialContext, readerChannel, inputErrorChannel, readerDownstreamDoneChannel)
 
+	// Loop through the record stream.
 	var retval error
 	done := false
 	for !done {
 		select {
+
 		case ierr := <-inputErrorChannel:
 			retval = ierr
 			break
-		case _ = <-dataProcessingErrorChannel:
-			retval = errors.New("exiting due to data error") // details already printed
-			break
-		case _ = <-doneWritingChannel:
-			done = true
+
+		case iracs := <-readerChannel:
+			// Handle the record batch
+			for e := iracs.Front(); e != nil; e = e.Next() {
+				irac := e.Value.(*types.RecordAndContext)
+				if irac.Record != nil {
+					orac, err := record_processor(irac)
+					if err != nil {
+						retval = err
+						done = true
+						break
+					}
+					recordWriter.Write(orac.Record, bufferedOutputStream, outputIsStdout)
+				}
+				if irac.OutputString != "" {
+					fmt.Fprintln(bufferedOutputStream, irac.OutputString)
+				}
+				if irac.EndOfStream {
+					done = true
+				}
+			}
 			break
+
 		}
 	}
 
@@ -104,7 +124,8 @@ func convert_csv_to_json(fileNames []string) error {
 }
 
 func main() {
-	err := convert_csv_to_json(os.Args[1:])
+	options := custom_options()
+	err := convert_csv_to_json(os.Args[1:], options, custom_record_processor)
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "%v\n", err)
 	}
diff --git a/docs/src/miller-as-library/main3.go b/docs/src/miller-as-library/main3.go
new file mode 100644
index 000000000..07d4be50e
--- /dev/null
+++ b/docs/src/miller-as-library/main3.go
@@ -0,0 +1,111 @@
+package main
+
+import (
+	"bufio"
+	"container/list"
+	"errors"
+	"fmt"
+	"os"
+
+	"github.com/johnkerl/miller/pkg/cli"
+	"github.com/johnkerl/miller/pkg/input"
+	"github.com/johnkerl/miller/pkg/output"
+	"github.com/johnkerl/miller/pkg/transformers"
+	"github.com/johnkerl/miller/pkg/types"
+)
+
+func convert_csv_to_json(fileNames []string) error {
+	options := &cli.TOptions{
+		ReaderOptions: cli.TReaderOptions{
+			InputFileFormat: "csv",
+			IFS:             ",",
+			IRS:             "\n",
+			RecordsPerBatch: 1,
+		},
+		WriterOptions: cli.TWriterOptions{
+			OutputFileFormat: "json",
+		},
+	}
+	outputStream := os.Stdout
+	outputIsStdout := true
+
+	// Since Go is concurrent, the context struct needs to be duplicated and
+	// passed through the channels along with each record.
+	initialContext := types.NewContext()
+
+	// Instantiate the record-reader.
+	// RecordsPerBatch is tracked separately from ReaderOptions since join/repl
+	// may use batch size of 1.
+	recordReader, err := input.Create(&options.ReaderOptions, options.ReaderOptions.RecordsPerBatch)
+	if err != nil {
+		return err
+	}
+
+	// Instantiate the record-writer
+	recordWriter, err := output.Create(&options.WriterOptions)
+	if err != nil {
+		return err
+	}
+
+	cat, err := transformers.NewTransformerCat(
+		false, // doCounters bool,
+		"",    // counterFieldName string,
+		nil,   // groupByFieldNames []string,
+		false, // doFileName bool,
+		false, // doFileNum bool,
+	)
+	if err != nil {
+		return err
+	}
+	recordTransformers := []transformers.IRecordTransformer{cat}
+
+	// Set up the reader-to-transformer and transformer-to-writer channels.
+	readerChannel := make(chan *list.List, 2) // list of *types.RecordAndContext
+	writerChannel := make(chan *list.List, 1) // list of *types.RecordAndContext
+
+	// We're done when a fatal error is registered on input (file not found,
+	// etc) or when the record-writer has written all its output. We use
+	// channels to communicate both of these conditions.
+	inputErrorChannel := make(chan error, 1)
+	doneWritingChannel := make(chan bool, 1)
+	dataProcessingErrorChannel := make(chan bool, 1)
+
+	readerDownstreamDoneChannel := make(chan bool, 1)
+
+	// Start the reader, transformer, and writer. Let them run until fatal input
+	// error or end-of-processing happens.
+	bufferedOutputStream := bufio.NewWriter(outputStream)
+
+	go recordReader.Read(fileNames, *initialContext, readerChannel, inputErrorChannel, readerDownstreamDoneChannel)
+	go transformers.ChainTransformer(readerChannel, readerDownstreamDoneChannel, recordTransformers,
+		writerChannel, options)
+	go output.ChannelWriter(writerChannel, recordWriter, &options.WriterOptions, doneWritingChannel,
+		dataProcessingErrorChannel, bufferedOutputStream, outputIsStdout)
+
+	var retval error
+	done := false
+	for !done {
+		select {
+		case ierr := <-inputErrorChannel:
+			retval = ierr
+			break
+		case _ = <-dataProcessingErrorChannel:
+			retval = errors.New("exiting due to data error") // details already printed
+			break
+		case _ = <-doneWritingChannel:
+			done = true
+			break
+		}
+	}
+
+	bufferedOutputStream.Flush()
+
+	return retval
+}
+
+func main() {
+	err := convert_csv_to_json(os.Args[1:])
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "%v\n", err)
+	}
+}

From 5136507192a0360152bf4fd472d8b7c98959f32f Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sun, 10 Sep 2023 20:01:41 -0400
Subject: [PATCH 067/456] Name-neaten for #1392 (#1393)

---
 docs/src/miller-as-library.md       | 4 ++--
 docs/src/miller-as-library/main2.go | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/src/miller-as-library.md b/docs/src/miller-as-library.md
index 3b337852f..c17872ba0 100644
--- a/docs/src/miller-as-library.md
+++ b/docs/src/miller-as-library.md
@@ -125,7 +125,7 @@ func custom_options() *cli.TOptions {
 }
 
 // This function you don't need to modify.
-func convert_csv_to_json(
+func run_custom_processor(
 	fileNames []string,
 	options *cli.TOptions,
 	record_processor func (irac *types.RecordAndContext) (*types.RecordAndContext, error),
@@ -204,7 +204,7 @@ func convert_csv_to_json(
 
 func main() {
 	options := custom_options()
-	err := convert_csv_to_json(os.Args[1:], options, custom_record_processor)
+	err := run_custom_processor(os.Args[1:], options, custom_record_processor)
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "%v\n", err)
 	}
diff --git a/docs/src/miller-as-library/main2.go b/docs/src/miller-as-library/main2.go
index e4be222d5..c460a174a 100644
--- a/docs/src/miller-as-library/main2.go
+++ b/docs/src/miller-as-library/main2.go
@@ -46,7 +46,7 @@ func custom_options() *cli.TOptions {
 }
 
 // This function you don't need to modify.
-func convert_csv_to_json(
+func run_custom_processor(
 	fileNames []string,
 	options *cli.TOptions,
 	record_processor func (irac *types.RecordAndContext) (*types.RecordAndContext, error),
@@ -125,7 +125,7 @@ func convert_csv_to_json(
 
 func main() {
 	options := custom_options()
-	err := convert_csv_to_json(os.Args[1:], options, custom_record_processor)
+	err := run_custom_processor(os.Args[1:], options, custom_record_processor)
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "%v\n", err)
 	}

From 087f4bb4c9200860962752b7c77304a04a5d4460 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Mon, 11 Sep 2023 17:15:37 -0400
Subject: [PATCH 068/456] Include `null` in `any` typemask (#1395)

---
 pkg/mlrval/mlrval_type.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pkg/mlrval/mlrval_type.go b/pkg/mlrval/mlrval_type.go
index e47f73b8e..e823029e0 100644
--- a/pkg/mlrval/mlrval_type.go
+++ b/pkg/mlrval/mlrval_type.go
@@ -168,6 +168,7 @@ const MT_TYPE_MASK_VAR = (1 << MT_INT) |
 	(1 << MT_FLOAT) |
 	(1 << MT_BOOL) |
 	(1 << MT_VOID) |
+	(1 << MT_NULL) |
 	(1 << MT_STRING) |
 	(1 << MT_ARRAY) |
 	(1 << MT_MAP)

From d19b91ec6b179082c64b7c48874920415258d862 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Sep 2023 08:36:06 -0400
Subject: [PATCH 069/456] Bump goreleaser/goreleaser-action from 4.6.0 to 5.0.0
 (#1396)

Bumps [goreleaser/goreleaser-action](https://github.com/goreleaser/goreleaser-action) from 4.6.0 to 5.0.0.
- [Release notes](https://github.com/goreleaser/goreleaser-action/releases)
- [Commits](https://github.com/goreleaser/goreleaser-action/compare/5fdedb94abba051217030cc86d4523cf3f02243d...7ec5c2b0c6cdda6e8bbb49444bc797dd33d74dd8)

---
updated-dependencies:
- dependency-name: goreleaser/goreleaser-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 6db5dd970..d775bacc0 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -40,7 +40,7 @@ jobs:
 
       # https://goreleaser.com/ci/actions/
       - name: Run GoReleaser
-        uses: goreleaser/goreleaser-action@5fdedb94abba051217030cc86d4523cf3f02243d
+        uses: goreleaser/goreleaser-action@7ec5c2b0c6cdda6e8bbb49444bc797dd33d74dd8
         #if: startsWith(github.ref, 'refs/tags/v')
         with:
           version: latest

From 654577c7769a24624dd2bc73b66cc13b6df96b76 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 25 Sep 2023 09:02:02 -0400
Subject: [PATCH 070/456] Bump actions/checkout from 4.0.0 to 4.1.0 (#1400)

Bumps [actions/checkout](https://github.com/actions/checkout) from 4.0.0 to 4.1.0.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/3df4ab11eba7bda6032a0b82a6bb43b11571feac...8ade135a41bc03ea155e62e844d188df1ea18608)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 2 +-
 .github/workflows/codespell.yml       | 2 +-
 .github/workflows/go.yml              | 2 +-
 .github/workflows/release.yml         | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 4039af88c..3f0adb907 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -36,7 +36,7 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
+      uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
index e5b497011..067eddec2 100644
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -21,7 +21,7 @@ jobs:
     steps:
       # Check out the code base
       - name: Check out code
-        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
+        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608
         with:
           # Full git history is needed to get a proper list of changed files within `super-linter`
           fetch-depth: 0
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 415d462cd..b43afb357 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -15,7 +15,7 @@ jobs:
         os: [ubuntu-latest, macos-latest, windows-latest]
 
     steps:
-    - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
+    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608
 
     - name: Set up Go
       uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index d775bacc0..9273c171b 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -23,7 +23,7 @@ jobs:
         id: go
 
       - name: Check out code into the Go module directory
-        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
+        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608
         with:
           fetch-depth: 0
 

From d785ea3e55013285f1a5cf3d34b101792d386be5 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 6 Oct 2023 08:29:25 -0400
Subject: [PATCH 071/456] Bump golang.org/x/term from 0.12.0 to 0.13.0 (#1404)

Bumps [golang.org/x/term](https://github.com/golang/term) from 0.12.0 to 0.13.0.
- [Commits](https://github.com/golang/term/compare/v0.12.0...v0.13.0)

---
updated-dependencies:
- dependency-name: golang.org/x/term
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 4 ++--
 go.sum | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/go.mod b/go.mod
index 886c54ea0..f2e130e84 100644
--- a/go.mod
+++ b/go.mod
@@ -26,8 +26,8 @@ require (
 	github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4
 	github.com/pkg/profile v1.7.0
 	github.com/stretchr/testify v1.8.4
-	golang.org/x/sys v0.12.0
-	golang.org/x/term v0.12.0
+	golang.org/x/sys v0.13.0
+	golang.org/x/term v0.13.0
 	golang.org/x/text v0.13.0
 )
 
diff --git a/go.sum b/go.sum
index cf44078f7..00a63d52d 100644
--- a/go.sum
+++ b/go.sum
@@ -40,10 +40,10 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o=
-golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/term v0.12.0 h1:/ZfYdc3zq+q02Rv9vGqTeSItdzZTSNDmfTi0mBAuidU=
-golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
+golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
+golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
+golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
 golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
 golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=

From a343d0f34c4250d833c30696af9f8d00fb8bfbf2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 17 Oct 2023 09:44:39 -0400
Subject: [PATCH 072/456] Bump github.com/mattn/go-isatty from 0.0.19 to 0.0.20
 (#1411)

Bumps [github.com/mattn/go-isatty](https://github.com/mattn/go-isatty) from 0.0.19 to 0.0.20.
- [Commits](https://github.com/mattn/go-isatty/compare/v0.0.19...v0.0.20)

---
updated-dependencies:
- dependency-name: github.com/mattn/go-isatty
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index f2e130e84..93fc96b81 100644
--- a/go.mod
+++ b/go.mod
@@ -22,7 +22,7 @@ require (
 	github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
 	github.com/klauspost/compress v1.16.7
 	github.com/lestrrat-go/strftime v1.0.6
-	github.com/mattn/go-isatty v0.0.19
+	github.com/mattn/go-isatty v0.0.20
 	github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4
 	github.com/pkg/profile v1.7.0
 	github.com/stretchr/testify v1.8.4
diff --git a/go.sum b/go.sum
index 00a63d52d..de6043422 100644
--- a/go.sum
+++ b/go.sum
@@ -21,8 +21,8 @@ github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2t
 github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is=
 github.com/lestrrat-go/strftime v1.0.6 h1:CFGsDEt1pOpFNU+TJB0nhz9jl+K0hZSLE205AhTIGQQ=
 github.com/lestrrat-go/strftime v1.0.6/go.mod h1:f7jQKgV5nnJpYgdEasS+/y7EsTb8ykN2z68n3TtcTaw=
-github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
-github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 h1:NC4H8hewgaktBqMI5yzy6L/Vln5/H7BEziyxaE2fX3Y=
 github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4/go.mod h1:eUQxpEiJy001RoaLXrNa5+QQLYiEgmEafwWuA3ppJSo=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=

From 9a8951fc78a45c7bf839beeec1b0621b16ac3777 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 18 Oct 2023 09:36:53 -0400
Subject: [PATCH 073/456] Bump actions/checkout from 4.1.0 to 4.1.1 (#1412)

Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.0 to 4.1.1.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/8ade135a41bc03ea155e62e844d188df1ea18608...b4ffde65f46336ab88eb53be808477a3936bae11)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 2 +-
 .github/workflows/codespell.yml       | 2 +-
 .github/workflows/go.yml              | 2 +-
 .github/workflows/release.yml         | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 3f0adb907..71448c3a7 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -36,7 +36,7 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
index 067eddec2..967a6cbcc 100644
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -21,7 +21,7 @@ jobs:
     steps:
       # Check out the code base
       - name: Check out code
-        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
         with:
           # Full git history is needed to get a proper list of changed files within `super-linter`
           fetch-depth: 0
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index b43afb357..2cea8a5b1 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -15,7 +15,7 @@ jobs:
         os: [ubuntu-latest, macos-latest, windows-latest]
 
     steps:
-    - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
 
     - name: Set up Go
       uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 9273c171b..12d80d58c 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -23,7 +23,7 @@ jobs:
         id: go
 
       - name: Check out code into the Go module directory
-        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
         with:
           fetch-depth: 0
 

From d3798c5aeec07d8e636fcc44d32c0fa5cc220f18 Mon Sep 17 00:00:00 2001
From: Ralph Ursprung <39383228+rursprung@users.noreply.github.com>
Date: Tue, 24 Oct 2023 15:10:18 +0200
Subject: [PATCH 074/456] add `winget` to README (#1414)

@teo-tsirpanis added miller to `winget` with
microsoft/winget-pkgs#123507 (thanks!).
accordingly it should also be mentioned in the README so that people are
aware of it.

fixes #1331
---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0b9cd76e3..96603c2f4 100644
--- a/README.md
+++ b/README.md
@@ -64,12 +64,13 @@ There's a good chance you can get Miller pre-built for your system:
 [![Homebrew/MacOSX](https://img.shields.io/badge/distros-homebrew-ba832b.svg)](https://formulae.brew.sh/formula/miller)
 [![MacPorts/MacOSX](https://img.shields.io/badge/distros-macports-1376ec.svg)](https://www.macports.org/ports.php?by=name&substr=miller)
 [![Chocolatey](https://img.shields.io/badge/distros-chocolatey-red.svg)](https://chocolatey.org/packages/miller)
+[![winget](https://img.shields.io/badge/distros-winget-red.svg)](https://github.com/microsoft/winget-pkgs/tree/master/manifests/m/Miller/Miller)
 
 |OS|Installation command|
 |---|---|
 |Linux|`yum install miller`
`apt-get install miller`| |Mac|`brew install miller`
`port install miller`| -|Windows|`choco install miller`| +|Windows|`choco install miller`
`winget install Miller.Miller`| See also [README-versions.md](./README-versions.md) for a full list of package versions. Note that long-term-support (LtS) releases will likely be on older versions. From 6aab161cb00ee4b3175700f7ef69d3f04d28ea70 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 24 Oct 2023 09:12:47 -0400 Subject: [PATCH 075/456] neaten README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 96603c2f4..be095ed66 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ There's a good chance you can get Miller pre-built for your system: [![Homebrew/MacOSX](https://img.shields.io/badge/distros-homebrew-ba832b.svg)](https://formulae.brew.sh/formula/miller) [![MacPorts/MacOSX](https://img.shields.io/badge/distros-macports-1376ec.svg)](https://www.macports.org/ports.php?by=name&substr=miller) [![Chocolatey](https://img.shields.io/badge/distros-chocolatey-red.svg)](https://chocolatey.org/packages/miller) -[![winget](https://img.shields.io/badge/distros-winget-red.svg)](https://github.com/microsoft/winget-pkgs/tree/master/manifests/m/Miller/Miller) +[![WinGet](https://img.shields.io/badge/distros-winget-392f55.svg)](https://github.com/microsoft/winget-pkgs/tree/master/manifests/m/Miller/Miller) |OS|Installation command| |---|---| From 4b34f80f6a5337711bee307460017320303a1dc4 Mon Sep 17 00:00:00 2001 From: Eng Zer Jun Date: Fri, 27 Oct 2023 21:15:12 +0800 Subject: [PATCH 076/456] transformers/grep: avoid allocations with `(*regexp.Regexp).MatchString` (#1416) We should use `(*regexp.Regexp).MatchString` instead of `(*regexp.Regexp).Match([]byte(...))` when matching string to avoid unnecessary `[]byte` conversions and reduce allocations. Example benchmark: var grepRegex = regexp.MustCompile("foo.*") func BenchmarkMatch(b *testing.B) { for i := 0; i < b.N; i++ { if match := grepRegex.Match([]byte("foo bar baz")); !match { b.Fail() } } } func BenchmarkMatchString(b *testing.B) { for i := 0; i < b.N; i++ { if match := grepRegex.MatchString("foo bar baz"); !match { b.Fail() } } } goos: linux goarch: amd64 pkg: github.com/johnkerl/miller/pkg/transformers cpu: AMD Ryzen 7 PRO 4750U with Radeon Graphics BenchmarkMatch-16 5700908 210.3 ns/op 16 B/op 1 allocs/op BenchmarkMatchString-16 8006731 156.4 ns/op 0 B/op 0 allocs/op PASS ok github.com/johnkerl/miller/pkg/transformers 2.857s Signed-off-by: Eng Zer Jun --- pkg/transformers/grep.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/transformers/grep.go b/pkg/transformers/grep.go index 6e8c8f183..46d01244c 100644 --- a/pkg/transformers/grep.go +++ b/pkg/transformers/grep.go @@ -168,7 +168,7 @@ func (tr *TransformerGrep) Transform( } else { inrecAsString = inrec.ToDKVPString() } - matches := tr.regexp.Match([]byte(inrecAsString)) + matches := tr.regexp.MatchString(inrecAsString) if tr.invert { if !matches { outputRecordsAndContexts.PushBack(inrecAndContext) From e4882b11ed0246e592fc8fc3939f0777a4d2132a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Nov 2023 09:35:23 -0500 Subject: [PATCH 077/456] Bump golang.org/x/text from 0.13.0 to 0.14.0 (#1419) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.13.0 to 0.14.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.13.0...v0.14.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 93fc96b81..bf7ae4736 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/stretchr/testify v1.8.4 golang.org/x/sys v0.13.0 golang.org/x/term v0.13.0 - golang.org/x/text v0.13.0 + golang.org/x/text v0.14.0 ) require ( diff --git a/go.sum b/go.sum index de6043422..9a2cf7687 100644 --- a/go.sum +++ b/go.sum @@ -44,8 +44,8 @@ golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= -golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= -golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From dd12026fbad5980269220f10be5c2df65b799f22 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Nov 2023 09:35:40 -0500 Subject: [PATCH 078/456] Bump golang.org/x/sys from 0.13.0 to 0.14.0 (#1420) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.13.0 to 0.14.0. - [Commits](https://github.com/golang/sys/compare/v0.13.0...v0.14.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index bf7ae4736..e800d98b8 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.8.4 - golang.org/x/sys v0.13.0 + golang.org/x/sys v0.14.0 golang.org/x/term v0.13.0 golang.org/x/text v0.14.0 ) diff --git a/go.sum b/go.sum index 9a2cf7687..19f3a1f9b 100644 --- a/go.sum +++ b/go.sum @@ -40,8 +40,8 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= -golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q= +golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= From f2a9ae5ca470fdd2c8d7de9470629d46def532e1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 8 Nov 2023 08:26:13 -0500 Subject: [PATCH 079/456] Bump golang.org/x/term from 0.13.0 to 0.14.0 (#1423) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.13.0 to 0.14.0. - [Commits](https://github.com/golang/term/compare/v0.13.0...v0.14.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index e800d98b8..afc67de8d 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.8.4 golang.org/x/sys v0.14.0 - golang.org/x/term v0.13.0 + golang.org/x/term v0.14.0 golang.org/x/text v0.14.0 ) diff --git a/go.sum b/go.sum index 19f3a1f9b..a584cff58 100644 --- a/go.sum +++ b/go.sum @@ -42,8 +42,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q= golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= -golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= +golang.org/x/term v0.14.0 h1:LGK9IlZ8T9jvdy6cTdfKUCltatMFOehAQo9SRC46UQ8= +golang.org/x/term v0.14.0/go.mod h1:TySc+nGkYR6qt8km8wUhuFRTVSMIX3XPR58y2lC8vww= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From 5b6a1d4713a19aa42fe5c2bfae8a1d2ebfec3b5f Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 11 Nov 2023 18:58:49 -0500 Subject: [PATCH 080/456] JSONL output does not properly handle keys with quotes (#1425) * mlr --l2j, --j2l * make dev for previous commit * fix #1424 * unit-test cases * iterate --- docs/src/data-diving-examples.md | 46 +++++++++++++++--------------- docs/src/manpage.md | 2 +- docs/src/manpage.txt | 2 +- docs/src/reference-verbs.md | 38 ++++++++++++------------ docs/src/two-pass-algorithms.md | 4 +-- man/manpage.txt | 2 +- man/mlr.1 | 4 +-- pkg/cli/option_parse.go | 27 ++++++++++++++++++ pkg/mlrval/mlrmap_json.go | 6 ++-- test/cases/io-jsonl-io/0031/expout | 4 +-- test/cases/io-jsonl-io/0036/cmd | 1 + test/cases/io-jsonl-io/0036/experr | 0 test/cases/io-jsonl-io/0036/expout | 1 + test/cases/io-jsonl-io/0037/cmd | 1 + test/cases/io-jsonl-io/0037/experr | 0 test/cases/io-jsonl-io/0037/expout | 5 ++++ test/input/quoted-keys.json | 1 + 17 files changed, 90 insertions(+), 54 deletions(-) create mode 100644 test/cases/io-jsonl-io/0036/cmd create mode 100644 test/cases/io-jsonl-io/0036/experr create mode 100644 test/cases/io-jsonl-io/0036/expout create mode 100644 test/cases/io-jsonl-io/0037/cmd create mode 100644 test/cases/io-jsonl-io/0037/experr create mode 100644 test/cases/io-jsonl-io/0037/expout create mode 100644 test/input/quoted-keys.json diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 39738f193..100716ec2 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -160,11 +160,11 @@ CITRUS COUNTY 1332.9 79974.9 483785.1 stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
-tiv_2011_tiv_2012_corr  0.9730497632351692
-tiv_2011_tiv_2012_ols_m 0.9835583980337723
-tiv_2011_tiv_2012_ols_b 433854.6428968317
+tiv_2011_tiv_2012_corr  0.9730497632351701
+tiv_2011_tiv_2012_ols_m 0.9835583980337732
+tiv_2011_tiv_2012_ols_b 433854.6428968301
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320189
+tiv_2011_tiv_2012_r2    0.9468258417320204
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199866178
+0.1334180491027861 -0.011319841199852926
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887236   -0.01856553658708754
-orange   square   0.17685855992752927   -0.07104431573806054
- green   circle   0.05764419437577255    0.01179572988801509
-   red   square   0.05574477124893523 -0.0006801456507510942
-yellow triangle   0.04457273771962798   0.024604310103081825
-yellow   square   0.04379172927296089   -0.04462197201631237
-purple   circle   0.03587354936895086     0.1341133954140899
-  blue   square   0.03241153095761164  -0.053507648119643196
-  blue triangle  0.015356427073158766 -0.0006089997461435399
-orange   circle  0.010518953877704048   -0.16279397329279383
-   red triangle   0.00809782571528034   0.012486621357942596
-purple triangle  0.005155190909099334  -0.045057909256220656
-purple   square -0.025680276963377404    0.05769429647930396
- green   square   -0.0257760734502851  -0.003265173252087127
-orange triangle -0.030456661186085785    -0.1318699981926352
-yellow   circle  -0.06477331572781474    0.07369449819706045
-  blue   circle  -0.10234761901929677  -0.030528539069837757
- green triangle  -0.10901825107358765   -0.04848782060162929
+   red   circle    0.9807984401887242  -0.018565536587084836
+orange   square   0.17685855992752933   -0.07104431573805543
+ green   circle   0.05764419437577257   0.011795729888018455
+   red   square    0.0557447712489348 -0.0006801456507506415
+yellow triangle    0.0445727377196281   0.024604310103079844
+yellow   square    0.0437917292729612  -0.044621972016306265
+purple   circle   0.03587354936895115    0.13411339541407613
+  blue   square   0.03241153095761152   -0.05350764811965621
+  blue triangle  0.015356427073158612 -0.0006089997461408209
+orange   circle  0.010518953877704181    -0.1627939732927932
+   red triangle   0.00809782571528054    0.01248662135795501
+purple triangle  0.005155190909099739   -0.04505790925621933
+purple   square  -0.02568027696337717   0.057694296479293694
+ green   square -0.025776073450284875 -0.0032651732520739014
+orange triangle -0.030456661186085584   -0.13186999819263814
+yellow   circle  -0.06477331572781515     0.0736944981970553
+  blue   circle   -0.1023476190192966  -0.030528539069839333
+ green triangle  -0.10901825107358747   -0.04848782060162855
 
diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 0037efeb2..068cee8c5 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -3645,5 +3645,5 @@ MILLER(1) MILLER(1) - 2023-09-10 MILLER(1) + 2023-11-11 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 3087f568f..41dfa078c 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -3624,4 +3624,4 @@ MILLER(1) MILLER(1) - 2023-09-10 MILLER(1) + 2023-11-11 MILLER(1) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 89bbc2b71..106ad4bf1 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -3406,14 +3406,14 @@ fields, optionally categorized by one or more fields. data/medium
-x_y_cov    0.000042574820827444476
-x_y_corr   0.0005042001844467462
-y_y_cov    0.08461122467974003
+x_y_cov    0.00004257482082749404
+x_y_corr   0.0005042001844473328
+y_y_cov    0.08461122467974005
 y_y_corr   1
-x2_xy_cov  0.04188382281779374
-x2_xy_corr 0.630174342037994
-x2_y2_cov  -0.00030953725962542085
-x2_y2_corr -0.0034249088761121966
+x2_xy_cov  0.041883822817793716
+x2_xy_corr 0.6301743420379936
+x2_y2_cov  -0.0003095372596253918
+x2_y2_corr -0.003424908876111875
 
@@ -3422,12 +3422,12 @@ x2_y2_corr -0.0034249088761121966
   data/medium
 
-a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
-eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
-wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
-zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
-hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
+a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
+eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
+wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
+zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
+hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
 
Here's an example simple line-fit. The `x` and `y` @@ -3513,11 +3513,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.03009744795354 -upsec_count_pca_b 979.9883413064914 +upsec_count_pca_m -39.030097447953594 +upsec_count_pca_b 979.9883413064917 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.10852919630297 +donesec 25.108529196302943 ## step @@ -3794,9 +3794,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 -var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 -skewness - - 0 -0.0006899591185521965 -0.017849760120133784 +stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 +var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 +skewness - - 0 -0.0006899591185517494 -0.01784976012013298 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index 146f3a81e..e475aebf3 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144456 -x_stddev 0.2902925151144007 +x_var 0.08426974433144457 +x_stddev 0.29029251511440074
diff --git a/man/manpage.txt b/man/manpage.txt
index 3087f568f..41dfa078c 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -3624,4 +3624,4 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-09-10                         MILLER(1)
+                                  2023-11-11                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index a98daa906..9a6c75d5c 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2023-09-10
+.\"      Date: 2023-11-11
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2023-09-10" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-11-11" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go
index 7f838a096..37ee1fcc3 100644
--- a/pkg/cli/option_parse.go
+++ b/pkg/cli/option_parse.go
@@ -1680,6 +1680,21 @@ var FormatConversionKeystrokeSaverFlagSection = FlagSection{
 				*pargi += 1
 			},
 		},
+		{
+			name: "--j2l",
+			help: "Use JSON for input, JSONL for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "json"
+				options.WriterOptions.OutputFileFormat = "json"
+				options.WriterOptions.WrapJSONOutputInOuterList = false
+				options.WriterOptions.JSONOutputMultiline = false
+				*pargi += 1
+			},
+		},
+
 		{
 			name: "--j2t",
 			help: "Use JSON for input, TSV for output.",
@@ -1803,6 +1818,18 @@ var FormatConversionKeystrokeSaverFlagSection = FlagSection{
 				*pargi += 1
 			},
 		},
+		{
+			name: "--l2j",
+			help: "Use JSONL for input, JSON for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "json"
+				options.WriterOptions.OutputFileFormat = "json"
+				*pargi += 1
+			},
+		},
 		{
 			name: "--l2n",
 			help: "Use JSON Lines for input, NIDX for output.",
diff --git a/pkg/mlrval/mlrmap_json.go b/pkg/mlrval/mlrmap_json.go
index d185efc99..a985098eb 100644
--- a/pkg/mlrval/mlrmap_json.go
+++ b/pkg/mlrval/mlrmap_json.go
@@ -122,9 +122,9 @@ func (mlrmap *Mlrmap) marshalJSONAuxSingleLine(
 	for pe := mlrmap.Head; pe != nil; pe = pe.Next {
 		// Write the key which is necessarily string-valued in Miller, and in
 		// JSON for that matter :)
-		buffer.WriteString("\"")
-		buffer.WriteString(colorizer.MaybeColorizeKey(pe.Key, outputIsStdout))
-		buffer.WriteString("\": ")
+		encoded := string(millerJSONEncodeString(pe.Key))
+		buffer.WriteString(colorizer.MaybeColorizeKey(encoded, outputIsStdout))
+		buffer.WriteString(": ")
 
 		// Write the value which is a mlrval
 		valueString, err := pe.Value.marshalJSONAux(jsonFormatting, elementNestingDepth+1, outputIsStdout)
diff --git a/test/cases/io-jsonl-io/0031/expout b/test/cases/io-jsonl-io/0031/expout
index 144f10ea8..0abc41834 100644
--- a/test/cases/io-jsonl-io/0031/expout
+++ b/test/cases/io-jsonl-io/0031/expout
@@ -1,2 +1,2 @@
-{"abcdefghi": "jkl\nmno\rpqr\tstu\"vw\\xyz"}
-{"abcdefghi": "jkl\nmno\rpqr\tstu\"vw\\\\xyz"}
+{"abc\bdef\fghi": "jkl\nmno\rpqr\tstu\"vw\\xyz"}
+{"abc\bdef\fghi": "jkl\nmno\rpqr\tstu\"vw\\\\xyz"}
diff --git a/test/cases/io-jsonl-io/0036/cmd b/test/cases/io-jsonl-io/0036/cmd
new file mode 100644
index 000000000..76b1a8219
--- /dev/null
+++ b/test/cases/io-jsonl-io/0036/cmd
@@ -0,0 +1 @@
+mlr --j2l --from test/input/quoted-keys.json cat
diff --git a/test/cases/io-jsonl-io/0036/experr b/test/cases/io-jsonl-io/0036/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-jsonl-io/0036/expout b/test/cases/io-jsonl-io/0036/expout
new file mode 100644
index 000000000..df652c2ae
--- /dev/null
+++ b/test/cases/io-jsonl-io/0036/expout
@@ -0,0 +1 @@
+{"a\"b\"c": "d\"e\"f"}
diff --git a/test/cases/io-jsonl-io/0037/cmd b/test/cases/io-jsonl-io/0037/cmd
new file mode 100644
index 000000000..d4ee3e5d5
--- /dev/null
+++ b/test/cases/io-jsonl-io/0037/cmd
@@ -0,0 +1 @@
+mlr --l2j --from test/input/quoted-keys.json cat
diff --git a/test/cases/io-jsonl-io/0037/experr b/test/cases/io-jsonl-io/0037/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-jsonl-io/0037/expout b/test/cases/io-jsonl-io/0037/expout
new file mode 100644
index 000000000..94495c187
--- /dev/null
+++ b/test/cases/io-jsonl-io/0037/expout
@@ -0,0 +1,5 @@
+[
+{
+  "a\"b\"c": "d\"e\"f"
+}
+]
diff --git a/test/input/quoted-keys.json b/test/input/quoted-keys.json
new file mode 100644
index 000000000..385de2f1c
--- /dev/null
+++ b/test/input/quoted-keys.json
@@ -0,0 +1 @@
+{"a\"b\"c":"d\"e\"f"}

From 2bcf8813d392641ea3dd2fb47b7df92bbb3ecdbb Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 11 Nov 2023 19:09:02 -0500
Subject: [PATCH 081/456] Add a `--files` option (#1426)

* mlr --files

* doc mods
---
 docs/src/keystroke-savers.md         | 10 ++++++
 docs/src/keystroke-savers.md.in      | 10 ++++++
 docs/src/manpage.md                  |  5 ++-
 docs/src/manpage.txt                 |  5 ++-
 docs/src/reference-main-flag-list.md |  1 +
 man/manpage.txt                      |  5 ++-
 man/mlr.1                            |  3 ++
 pkg/cli/option_parse.go              | 47 ++++++++++++++++++++++++++++
 8 files changed, 83 insertions(+), 3 deletions(-)

diff --git a/docs/src/keystroke-savers.md b/docs/src/keystroke-savers.md
index 1cc2485a1..c62231709 100644
--- a/docs/src/keystroke-savers.md
+++ b/docs/src/keystroke-savers.md
@@ -87,6 +87,16 @@ If there's more than one input file, you can use `--mfrom`, then however many fi
 mlr --c2p --mfrom data/*.csv -- sort -n index
 
+Alternatively, you may place filenames within another file, one per line: + +
+cat data/filenames.txt
+
+ +
+mlr --c2p --files data/filenames.txt cat
+
+ ## Shortest flags for CSV, TSV, and JSON The following have even shorter versions: diff --git a/docs/src/keystroke-savers.md.in b/docs/src/keystroke-savers.md.in index b8cb2b3c5..720dfad14 100644 --- a/docs/src/keystroke-savers.md.in +++ b/docs/src/keystroke-savers.md.in @@ -32,6 +32,16 @@ GENMD-SHOW-COMMAND mlr --c2p --mfrom data/*.csv -- sort -n index GENMD-EOF +Alternatively, you may place filenames within another file, one per line: + +GENMD-SHOW-COMMAND +cat data/filenames.txt +GENMD-EOF + +GENMD-SHOW-COMMAND +mlr --c2p --files data/filenames.txt cat +GENMD-EOF + ## Shortest flags for CSV, TSV, and JSON The following have even shorter versions: diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 068cee8c5..3a6046919 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -513,6 +513,9 @@ MILLER(1) MILLER(1) large files. Use this flag to force frequent updates even when output is to a pipe or file, at a performance cost. + --files {filename} Use this to specify a file which itself contains, one + per line, names of input files. May be used more than + once. --from {filename} Use this to specify an input file before the verb(s), rather than after. May be used more than once. Example: `mlr --from a.dat --from b.dat cat` is the @@ -3645,5 +3648,5 @@ MILLER(1) MILLER(1) - 2023-11-11 MILLER(1) + 2023-11-12 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 41dfa078c..ef7569711 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -492,6 +492,9 @@ MILLER(1) MILLER(1) large files. Use this flag to force frequent updates even when output is to a pipe or file, at a performance cost. + --files {filename} Use this to specify a file which itself contains, one + per line, names of input files. May be used more than + once. --from {filename} Use this to specify an input file before the verb(s), rather than after. May be used more than once. Example: `mlr --from a.dat --from b.dat cat` is the @@ -3624,4 +3627,4 @@ MILLER(1) MILLER(1) - 2023-11-11 MILLER(1) + 2023-11-12 MILLER(1) diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index 0a93e12e2..f9ce597ff 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -266,6 +266,7 @@ These are flags which don't fit into any other category. **Flags:** * `--fflush`: Force buffered output to be written after every output record. The default is flush output after every record if the output is to the terminal, or less often if the output is to a file or a pipe. The default is a significant performance optimization for large files. Use this flag to force frequent updates even when output is to a pipe or file, at a performance cost. +* `--files {filename}`: Use this to specify a file which itself contains, one per line, names of input files. May be used more than once. * `--from {filename}`: Use this to specify an input file before the verb(s), rather than after. May be used more than once. Example: `mlr --from a.dat --from b.dat cat` is the same as `mlr cat a.dat b.dat`. * `--hash-records`: This is an internal parameter which normally does not need to be modified. It controls the mechanism by which Miller accesses fields within records. In general --no-hash-records is faster, and is the default. For specific use-cases involving data having many fields, and many of them being processed during a given processing run, --hash-records might offer a slight performance benefit. * `--infer-int-as-float or -A`: Cast all integers in data files to floats. diff --git a/man/manpage.txt b/man/manpage.txt index 41dfa078c..ef7569711 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -492,6 +492,9 @@ MILLER(1) MILLER(1) large files. Use this flag to force frequent updates even when output is to a pipe or file, at a performance cost. + --files {filename} Use this to specify a file which itself contains, one + per line, names of input files. May be used more than + once. --from {filename} Use this to specify an input file before the verb(s), rather than after. May be used more than once. Example: `mlr --from a.dat --from b.dat cat` is the @@ -3624,4 +3627,4 @@ MILLER(1) MILLER(1) - 2023-11-11 MILLER(1) + 2023-11-12 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index 9a6c75d5c..c61b2cb67 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -611,6 +611,9 @@ These are flags which don't fit into any other category. large files. Use this flag to force frequent updates even when output is to a pipe or file, at a performance cost. +--files {filename} Use this to specify a file which itself contains, one + per line, names of input files. May be used more than + once. --from {filename} Use this to specify an input file before the verb(s), rather than after. May be used more than once. Example: `mlr --from a.dat --from b.dat cat` is the diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go index 37ee1fcc3..c9732b025 100644 --- a/pkg/cli/option_parse.go +++ b/pkg/cli/option_parse.go @@ -7,7 +7,9 @@ package cli import ( + "bufio" "fmt" + "io" "os" "strings" @@ -2797,6 +2799,51 @@ var MiscFlagSection = FlagSection{ }, }, + { + name: "--files", + arg: "{filename}", + help: "Use this to specify a file which itself contains, one per line, names of input files. May be used more than once.", + parser: func(args []string, argc int, pargi *int, options *TOptions) { + CheckArgCount(args, *pargi, argc, 2) + + fileName := args[*pargi+1] + handle, err := os.Open(fileName) + if err != nil { + /// XXXX return false + fmt.Fprintln(os.Stderr, "mlr", err) + os.Exit(1) + } + defer handle.Close() + + lineReader := bufio.NewReader(handle) + + eof := false + lineno := 0 + for !eof { + line, err := lineReader.ReadString('\n') + if err == io.EOF { + err = nil + eof = true + break + } + lineno++ + + if err != nil { + fmt.Fprintln(os.Stderr, "mlr", err) + os.Exit(1) + } + + // This is how to do a chomp: + // TODO: handle \r\n with libified solution. + line = strings.TrimRight(line, "\n") + + options.FileNames = append(options.FileNames, line) + } + + *pargi += 2 + }, + }, + { name: "--ofmt", arg: "{format}", From 18a9eaa37739b03e5fe39769d1667fbb63384a1e Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 19 Nov 2023 23:53:53 -0500 Subject: [PATCH 082/456] Fix ragged-CSV auto-pad (#1428) --- pkg/input/record_reader_csv.go | 39 ++++++++------------ test/cases/io-multi/0045/expout | 3 +- test/cases/io-ragged-non-rfc-csv/0001/expout | 1 - 3 files changed, 17 insertions(+), 26 deletions(-) diff --git a/pkg/input/record_reader_csv.go b/pkg/input/record_reader_csv.go index b2c2f1853..e7135e2fc 100644 --- a/pkg/input/record_reader_csv.go +++ b/pkg/input/record_reader_csv.go @@ -249,20 +249,22 @@ func (reader *RecordReaderCSV) getRecordBatch( ) errorChannel <- err return - } else { - i := int64(0) - n := lib.IntMin2(nh, nd) - for i = 0; i < n; i++ { - key := reader.header[i] - value := mlrval.FromDeferredType(csvRecord[i]) - _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) - if err != nil { - errorChannel <- err - return - } + } + + i := int64(0) + n := lib.IntMin2(nh, nd) + for i = 0; i < n; i++ { + key := reader.header[i] + value := mlrval.FromDeferredType(csvRecord[i]) + _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return } - if nh < nd { - // if header shorter than data: use 1-up itoa keys + } + if nh < nd { + // if header shorter than data: use 1-up itoa keys + for i = nh; i < nd; i++ { key := strconv.FormatInt(i+1, 10) value := mlrval.FromDeferredType(csvRecord[i]) _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) @@ -271,17 +273,8 @@ func (reader *RecordReaderCSV) getRecordBatch( return } } - if nh > nd { - // if header longer than data: use "" values - for i = nd; i < nh; i++ { - _, err := record.PutReferenceMaybeDedupe(reader.header[i], mlrval.VOID.Copy(), dedupeFieldNames) - if err != nil { - errorChannel <- err - return - } - } - } } + // if nh > nd: leave it short. This is a job for unsparsify. } context.UpdateForInputRecord() diff --git a/test/cases/io-multi/0045/expout b/test/cases/io-multi/0045/expout index d641fc4fe..7fc515260 100644 --- a/test/cases/io-multi/0045/expout +++ b/test/cases/io-multi/0045/expout @@ -6,8 +6,7 @@ }, { "a": 4, - "b": 5, - "c": "" + "b": 5 }, { "a": 6, diff --git a/test/cases/io-ragged-non-rfc-csv/0001/expout b/test/cases/io-ragged-non-rfc-csv/0001/expout index d83d53baa..46e0a667a 100644 --- a/test/cases/io-ragged-non-rfc-csv/0001/expout +++ b/test/cases/io-ragged-non-rfc-csv/0001/expout @@ -4,7 +4,6 @@ c 3 a 4 b 5 -c a 6 b 7 From 3a3595e404f9e2c1aa37ef20948ada318f5dffd8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 28 Nov 2023 09:25:11 -0500 Subject: [PATCH 083/456] Bump golang.org/x/term from 0.14.0 to 0.15.0 (#1432) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.14.0 to 0.15.0. - [Commits](https://github.com/golang/term/compare/v0.14.0...v0.15.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index afc67de8d..561454200 100644 --- a/go.mod +++ b/go.mod @@ -26,8 +26,8 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.8.4 - golang.org/x/sys v0.14.0 - golang.org/x/term v0.14.0 + golang.org/x/sys v0.15.0 + golang.org/x/term v0.15.0 golang.org/x/text v0.14.0 ) diff --git a/go.sum b/go.sum index a584cff58..25fed86dd 100644 --- a/go.sum +++ b/go.sum @@ -40,10 +40,10 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q= -golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.14.0 h1:LGK9IlZ8T9jvdy6cTdfKUCltatMFOehAQo9SRC46UQ8= -golang.org/x/term v0.14.0/go.mod h1:TySc+nGkYR6qt8km8wUhuFRTVSMIX3XPR58y2lC8vww= +golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= +golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.15.0 h1:y/Oo/a/q3IXu26lQgl04j/gjuBDOBlx7X6Om1j2CPW4= +golang.org/x/term v0.15.0/go.mod h1:BDl952bC7+uMoWR75FIrCDx79TPU9oHkTZ9yRbYOrX0= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From bae1daf847954dbc5e91b62afeb6de014df37149 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 2 Dec 2023 16:00:05 -0500 Subject: [PATCH 084/456] Absent variable on left side of boolean OR (`||`) expression makes it absent (#1434) * Absent-handling with short-circuiting operators `&&` and `||` * add a missing file * artifacts from make dev * type-errors * doc content * artifacts from make dev --- Makefile | 3 + docs/src/data/filenames.txt | 2 + docs/src/manpage.md | 3 +- docs/src/manpage.txt | 3 +- docs/src/online-help.md | 1 + docs/src/record-heterogeneity.md | 7 +- docs/src/reference-main-null-data.md | 45 +++++-- docs/src/reference-main-null-data.md.in | 12 +- man/manpage.txt | 3 +- man/mlr.1 | 5 +- pkg/dsl/cst/builtin_functions.go | 163 ++++++++++++++++-------- pkg/dsl/cst/leaves.go | 18 +++ pkg/terminals/help/entry.go | 75 +++++++++++ 13 files changed, 268 insertions(+), 72 deletions(-) create mode 100644 docs/src/data/filenames.txt diff --git a/Makefile b/Makefile index fb374cb91..fe27b8dd3 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,9 @@ build: @echo "Build complete. The Miller executable is ./mlr (or .\mlr.exe on Windows)." @echo "You can use 'make check' to run tests". +quiet: + @go build github.com/johnkerl/miller/cmd/mlr + # For interactive use, 'mlr regtest' offers more options and transparency. check: unit-test regression-test @echo "Tests complete. You can use 'make install' if you like, optionally preceded" diff --git a/docs/src/data/filenames.txt b/docs/src/data/filenames.txt new file mode 100644 index 000000000..466fa4515 --- /dev/null +++ b/docs/src/data/filenames.txt @@ -0,0 +1,2 @@ +data/a.csv +data/b.csv diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 3a6046919..a1cea93f1 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -178,6 +178,7 @@ MILLER(1) MILLER(1) mlr help mlrrc mlr help output-colorization mlr help type-arithmetic-info + mlr help type-arithmetic-info-extended Shorthands: mlr -g = mlr help flags mlr -l = mlr help list-verbs @@ -3648,5 +3649,5 @@ MILLER(1) MILLER(1) - 2023-11-12 MILLER(1) + 2023-12-02 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index ef7569711..77ca6785f 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -157,6 +157,7 @@ MILLER(1) MILLER(1) mlr help mlrrc mlr help output-colorization mlr help type-arithmetic-info + mlr help type-arithmetic-info-extended Shorthands: mlr -g = mlr help flags mlr -l = mlr help list-verbs @@ -3627,4 +3628,4 @@ MILLER(1) MILLER(1) - 2023-11-12 MILLER(1) + 2023-12-02 MILLER(1) diff --git a/docs/src/online-help.md b/docs/src/online-help.md index 8318c4f0a..5bbee15a1 100644 --- a/docs/src/online-help.md +++ b/docs/src/online-help.md @@ -86,6 +86,7 @@ Other: mlr help mlrrc mlr help output-colorization mlr help type-arithmetic-info + mlr help type-arithmetic-info-extended Shorthands: mlr -g = mlr help flags mlr -l = mlr help list-verbs diff --git a/docs/src/record-heterogeneity.md b/docs/src/record-heterogeneity.md index 5794fc49a..d02a52448 100644 --- a/docs/src/record-heterogeneity.md +++ b/docs/src/record-heterogeneity.md @@ -154,8 +154,7 @@ with 1) for too-long rows: }, { "a": 4, - "b": 5, - "c": "" + "b": 5 }, { "a": 7, @@ -455,7 +454,9 @@ Miller handles explicit header changes as just shown. If your CSV input contains
 a,b,c
 1,2,3
-4,5,
+
+a,b
+4,5
 
 a,b,c,4
 7,8,9,10
diff --git a/docs/src/reference-main-null-data.md b/docs/src/reference-main-null-data.md
index 0de7c67c9..63bfffaa9 100644
--- a/docs/src/reference-main-null-data.md
+++ b/docs/src/reference-main-null-data.md
@@ -239,17 +239,44 @@ resource=/some/other/path,loadsec=0.97,ok=false,loadmillis=970
 
 ## Arithmetic rules
 
-If you're interested in a formal description of how empty and absent fields participate in arithmetic, here's a table for plus (other arithmetic/boolean/bitwise operators are similar):
+If you're interested in a formal description of how empty and absent fields participate in arithmetic, here's a table for `+`, `&&, and `||`. Notes:
+
+* Other arithmetic, boolean, and bitwise operators besides `&&` and `||` are similar to `+`.
+* The `&&` and `||` obey _short-circuiting semantics_. That is:
+  * `false && X` is `false` and `X` is not evaluated even if it is a complex expression (maybe including function calls)
+  * `true || X` is `true` and `X` is not evaluated even if it is a complex expression (maybe including function calls)
+* This means in particular that:
+  * `false && X` is false even if `X` is an error, a non-boolean type, etc.
+  * `true || X` is true even if `X` is an error, a non-boolean type, etc.
 
 
-mlr help type-arithmetic-info
+mlr help type-arithmetic-info-extended
 
-(+)        | 1          2.5       (empty)    (absent)   (error)   
-------     + ------     ------     ------     ------     ------    
-1          | 2          3.5        1          1          (error)   
-2.5        | 3.5        5          2.5        2.5        (error)   
-(empty)    | 1          2.5        (empty)    (absent)   (error)   
-(absent)   | 1          2.5        (absent)   (absent)   (error)   
-(error)    | (error)    (error)    (error)    (error)    (error)   
+(+)        | 1          2.5        true      (empty)    (absent)   (error)   
+------     + ------     ------     ------     ------     ------     ------    
+1          | 2          3.5        (error)    1          1          (error)   
+2.5        | 3.5        5          (error)    2.5        2.5        (error)   
+true       | (error)    (error)    (error)    (error)    (error)    (error)   
+(empty)    | 1          2.5        (error)    (empty)    (absent)   (error)   
+(absent)   | 1          2.5        (error)    (absent)   (absent)   (error)   
+(error)    | (error)    (error)    (error)    (error)    (error)    (error)   
+
+(&&)       | true       false      3         (empty)    (absent)   (error)   
+------     + ------     ------     ------     ------     ------     ------    
+true       | true       false      (error)    (error)    (absent)   (error)   
+false      | false      false      false      false      false      false     
+3          | (error)    (error)    (error)    (error)    (absent)   (error)   
+(empty)    | true       false      (error)    (error)    (absent)   (error)   
+(absent)   | true       false      (error)    (absent)   (absent)   (error)   
+(error)    | (error)    (error)    (error)    (error)    (error)    (error)   
+
+(||)       | true       false      3         (empty)    (absent)   (error)   
+------     + ------     ------     ------     ------     ------     ------    
+true       | true       true       true       true       true       true      
+false      | true       false      (error)    (error)    (absent)   (error)   
+3          | (error)    (error)    (error)    (error)    (absent)   (error)   
+(empty)    | true       false      (error)    (error)    (absent)   (error)   
+(absent)   | true       false      (error)    (absent)   (absent)   (error)   
+(error)    | (error)    (error)    (error)    (error)    (error)    (error)   
 
diff --git a/docs/src/reference-main-null-data.md.in b/docs/src/reference-main-null-data.md.in index 381a46522..087edaa78 100644 --- a/docs/src/reference-main-null-data.md.in +++ b/docs/src/reference-main-null-data.md.in @@ -119,8 +119,16 @@ GENMD-EOF ## Arithmetic rules -If you're interested in a formal description of how empty and absent fields participate in arithmetic, here's a table for plus (other arithmetic/boolean/bitwise operators are similar): +If you're interested in a formal description of how empty and absent fields participate in arithmetic, here's a table for `+`, `&&, and `||`. Notes: + +* Other arithmetic, boolean, and bitwise operators besides `&&` and `||` are similar to `+`. +* The `&&` and `||` obey _short-circuiting semantics_. That is: + * `false && X` is `false` and `X` is not evaluated even if it is a complex expression (maybe including function calls) + * `true || X` is `true` and `X` is not evaluated even if it is a complex expression (maybe including function calls) +* This means in particular that: + * `false && X` is false even if `X` is an error, a non-boolean type, etc. + * `true || X` is true even if `X` is an error, a non-boolean type, etc. GENMD-RUN-COMMAND -mlr help type-arithmetic-info +mlr help type-arithmetic-info-extended GENMD-EOF diff --git a/man/manpage.txt b/man/manpage.txt index ef7569711..77ca6785f 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -157,6 +157,7 @@ MILLER(1) MILLER(1) mlr help mlrrc mlr help output-colorization mlr help type-arithmetic-info + mlr help type-arithmetic-info-extended Shorthands: mlr -g = mlr help flags mlr -l = mlr help list-verbs @@ -3627,4 +3628,4 @@ MILLER(1) MILLER(1) - 2023-11-12 MILLER(1) + 2023-12-02 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index c61b2cb67..6bf4d493d 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2023-11-11 +.\" Date: 2023-12-02 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2023-11-11" "\ \&" "\ \&" +.TH "MILLER" "1" "2023-12-02" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -192,6 +192,7 @@ Other: mlr help mlrrc mlr help output-colorization mlr help type-arithmetic-info + mlr help type-arithmetic-info-extended Shorthands: mlr -g = mlr help flags mlr -l = mlr help list-verbs diff --git a/pkg/dsl/cst/builtin_functions.go b/pkg/dsl/cst/builtin_functions.go index 26f1dacce..397e7869c 100644 --- a/pkg/dsl/cst/builtin_functions.go +++ b/pkg/dsl/cst/builtin_functions.go @@ -32,7 +32,7 @@ func (root *RootNode) BuildBuiltinFunctionCallsiteNode( if builtinFunctionInfo.hasMultipleArities { // E.g. "+" and "-" return root.BuildMultipleArityFunctionCallsiteNode(astNode, builtinFunctionInfo) } else if builtinFunctionInfo.zaryFunc != nil { - return root.BuildZaryFunctionCallsiteNode(astNode, builtinFunctionInfo) + return BuildZaryFunctionCallsiteNode(astNode, builtinFunctionInfo) } else if builtinFunctionInfo.unaryFunc != nil { return root.BuildUnaryFunctionCallsiteNode(astNode, builtinFunctionInfo) } else if builtinFunctionInfo.unaryFuncWithContext != nil { @@ -89,7 +89,7 @@ type ZaryFunctionCallsiteNode struct { zaryFunc bifs.ZaryFunc } -func (root *RootNode) BuildZaryFunctionCallsiteNode( +func BuildZaryFunctionCallsiteNode( astNode *dsl.ASTNode, builtinFunctionInfo *BuiltinFunctionInfo, ) (IEvaluable, error) { @@ -228,25 +228,25 @@ func (root *RootNode) BuildBinaryFunctionCallsiteNode( // Special short-circuiting cases if builtinFunctionInfo.name == "&&" { - return root.BuildLogicalANDOperatorNode( + return BuildLogicalANDOperatorNode( evaluable1, evaluable2, ), nil } if builtinFunctionInfo.name == "||" { - return root.BuildLogicalOROperatorNode( + return BuildLogicalOROperatorNode( evaluable1, evaluable2, ), nil } if builtinFunctionInfo.name == "??" { - return root.BuildAbsentCoalesceOperatorNode( + return BuildAbsentCoalesceOperatorNode( evaluable1, evaluable2, ), nil } if builtinFunctionInfo.name == "???" { - return root.BuildEmptyCoalesceOperatorNode( + return BuildEmptyCoalesceOperatorNode( evaluable1, evaluable2, ), nil @@ -557,7 +557,7 @@ func (root *RootNode) BuildTernaryFunctionCallsiteNode( // Special short-circuiting case if builtinFunctionInfo.name == "?:" { - return root.BuildStandardTernaryOperatorNode( + return BuildStandardTernaryOperatorNode( evaluable1, evaluable2, evaluable3, @@ -703,7 +703,7 @@ type LogicalANDOperatorNode struct { a, b IEvaluable } -func (root *RootNode) BuildLogicalANDOperatorNode(a, b IEvaluable) *LogicalANDOperatorNode { +func BuildLogicalANDOperatorNode(a, b IEvaluable) *LogicalANDOperatorNode { return &LogicalANDOperatorNode{ a: a, b: b, @@ -712,53 +712,74 @@ func (root *RootNode) BuildLogicalANDOperatorNode(a, b IEvaluable) *LogicalANDOp // This is different from most of the evaluator functions in that it does // short-circuiting: since is logical AND, the second argument is not evaluated -// if the first argument is false. +// if the first argument is false. Thus we cannot use disposition matrices. // -// Disposition matrix: -// -// { -//a b ERROR ABSENT EMPTY STRING INT FLOAT BOOL -//ERROR : {ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR}, -//ABSENT : {ERROR, absent, ERROR, ERROR, ERROR, ERROR, absent}, -//EMPTY : {ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR}, -//STRING : {ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR}, -//INT : {ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR}, -//FLOAT : {ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR}, -//BOOL : {ERROR, absent, ERROR, ERROR, ERROR, ERROR, a&&b}, -// } -// -// which without the all-error rows/columns reduces to -// -// { -//a b ABSENT BOOL -//ABSENT : {absent, absent}, -//BOOL : {absent, a&&b}, -// } -// -// So: -// * Evaluate a -// * If a is not absent or bool: return error -// * If a is absent: return absent -// * If a is false: return a -// * Now a is boolean true -// * Evaluate b -// * If b is not absent or bool: return error -// * If b is absent: return absent -// * Return a && b +// * evaluate a +// * if a is error: +// * return a +// * elif a is absent: +// * Evaluate b +// * if b is error: return error +// * elif b is empty or absent: return absent +// * elif b is empty or absent: return absent +// * else: return b +// * elif a is empty: +// * evaluate b +// * if b is error: return error +// * elif b is empty: return empty +// * elif b is absent: return absent +// * else: return b +// * else: +// * return the BIF (using its disposition matrix) + +// mlr help type-arithmetic-info-extended | lumin -c red .error. | lumin -c blue .absent. | lumin -c green .empty. func (node *LogicalANDOperatorNode) Evaluate( state *runtime.State, ) *mlrval.Mlrval { aout := node.a.Evaluate(state) atype := aout.Type() - if !(atype == mlrval.MT_ABSENT || atype == mlrval.MT_BOOL) { - return mlrval.FromNotNamedTypeError("&&", aout, "absent or boolean") + + if atype == mlrval.MT_ERROR { + return aout } + if atype == mlrval.MT_ABSENT { - return mlrval.ABSENT + bout := node.b.Evaluate(state) + btype := bout.Type() + if btype == mlrval.MT_ERROR { + return bout + } + if btype == mlrval.MT_VOID || btype == mlrval.MT_ABSENT { + return mlrval.ABSENT + } + if btype != mlrval.MT_BOOL { + return mlrval.FromNotNamedTypeError("&&", bout, "absent or boolean") + } + return bout } + + if atype == mlrval.MT_VOID { + bout := node.b.Evaluate(state) + btype := bout.Type() + if btype == mlrval.MT_ERROR { + return bout + } + if btype == mlrval.MT_VOID { + return mlrval.FromNotNamedTypeError("&&", bout, "absent or boolean") + } + if btype == mlrval.MT_ABSENT { + return mlrval.ABSENT + } + if btype != mlrval.MT_BOOL { + return mlrval.FromNotNamedTypeError("&&", bout, "absent or boolean") + } + return bout + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if aout.IsFalse() { - // This means false && bogus type evaluates to true, which is sad but + // This means false && bogus type evaluates to false, which is sad but // which we MUST do in order to not violate the short-circuiting // property. We would have to evaluate b to know if it were error or // not. @@ -782,7 +803,7 @@ type LogicalOROperatorNode struct { a, b IEvaluable } -func (root *RootNode) BuildLogicalOROperatorNode(a, b IEvaluable) *LogicalOROperatorNode { +func BuildLogicalOROperatorNode(a, b IEvaluable) *LogicalOROperatorNode { return &LogicalOROperatorNode{ a: a, b: b, @@ -792,19 +813,54 @@ func (root *RootNode) BuildLogicalOROperatorNode(a, b IEvaluable) *LogicalOROper // This is different from most of the evaluator functions in that it does // short-circuiting: since is logical OR, the second argument is not evaluated // if the first argument is false. -// -// See the disposition-matrix discussion for LogicalANDOperator. + func (node *LogicalOROperatorNode) Evaluate( state *runtime.State, ) *mlrval.Mlrval { aout := node.a.Evaluate(state) atype := aout.Type() - if !(atype == mlrval.MT_ABSENT || atype == mlrval.MT_BOOL) { - return mlrval.FromNotNamedTypeError("||", aout, "absent or boolean") + + if atype == mlrval.MT_ERROR { + return aout } + if atype == mlrval.MT_ABSENT { - return mlrval.ABSENT + bout := node.b.Evaluate(state) + btype := bout.Type() + if btype == mlrval.MT_ERROR { + return bout + } + if btype == mlrval.MT_VOID || btype == mlrval.MT_ABSENT { + return mlrval.ABSENT + } + if btype == mlrval.MT_VOID { + return mlrval.FromNotNamedTypeError("||", bout, "absent or boolean") + } + if btype != mlrval.MT_BOOL { + return mlrval.FromNotNamedTypeError("||", bout, "absent or boolean") + } + return bout } + + if atype == mlrval.MT_VOID { + bout := node.b.Evaluate(state) + btype := bout.Type() + if btype == mlrval.MT_ERROR { + return bout + } + if btype == mlrval.MT_VOID { + return mlrval.FromNotNamedTypeError("||", bout, "absent or boolean") + } + if btype == mlrval.MT_ABSENT { + return mlrval.ABSENT + } + if btype != mlrval.MT_BOOL { + return mlrval.FromNotNamedTypeError("||", bout, "absent or boolean") + } + return bout + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if aout.IsTrue() { // This means true || bogus type evaluates to true, which is sad but // which we MUST do in order to not violate the short-circuiting @@ -821,6 +877,7 @@ func (node *LogicalOROperatorNode) Evaluate( if btype == mlrval.MT_ABSENT { return mlrval.ABSENT } + return bifs.BIF_logical_OR(aout, bout) } @@ -829,7 +886,7 @@ func (node *LogicalOROperatorNode) Evaluate( // current record has no field $foo. type AbsentCoalesceOperatorNode struct{ a, b IEvaluable } -func (root *RootNode) BuildAbsentCoalesceOperatorNode(a, b IEvaluable) *AbsentCoalesceOperatorNode { +func BuildAbsentCoalesceOperatorNode(a, b IEvaluable) *AbsentCoalesceOperatorNode { return &AbsentCoalesceOperatorNode{a: a, b: b} } @@ -852,7 +909,7 @@ func (node *AbsentCoalesceOperatorNode) Evaluate( // when the current record has no field $foo, or when $foo is empty.. type EmptyCoalesceOperatorNode struct{ a, b IEvaluable } -func (root *RootNode) BuildEmptyCoalesceOperatorNode(a, b IEvaluable) *EmptyCoalesceOperatorNode { +func BuildEmptyCoalesceOperatorNode(a, b IEvaluable) *EmptyCoalesceOperatorNode { return &EmptyCoalesceOperatorNode{a: a, b: b} } @@ -874,7 +931,7 @@ func (node *EmptyCoalesceOperatorNode) Evaluate( // ================================================================ type StandardTernaryOperatorNode struct{ a, b, c IEvaluable } -func (root *RootNode) BuildStandardTernaryOperatorNode(a, b, c IEvaluable) *StandardTernaryOperatorNode { +func BuildStandardTernaryOperatorNode(a, b, c IEvaluable) *StandardTernaryOperatorNode { return &StandardTernaryOperatorNode{a: a, b: b, c: c} } func (node *StandardTernaryOperatorNode) Evaluate( diff --git a/pkg/dsl/cst/leaves.go b/pkg/dsl/cst/leaves.go index 382525e52..08b3200a9 100644 --- a/pkg/dsl/cst/leaves.go +++ b/pkg/dsl/cst/leaves.go @@ -375,6 +375,24 @@ func (node *NullLiteralNode) Evaluate( return node.literal } +// ---------------------------------------------------------------- +// Used for testing purposes; not used by the main DSL. + +type MlrvalLiteralNode struct { + literal *mlrval.Mlrval +} + +func BuildMlrvalLiteralNode(literal *mlrval.Mlrval) *MlrvalLiteralNode { + return &MlrvalLiteralNode{ + literal: literal.Copy(), + } +} +func (node *MlrvalLiteralNode) Evaluate( + state *runtime.State, +) *mlrval.Mlrval { + return node.literal +} + // ================================================================ func (root *RootNode) BuildContextVariableNode(astNode *dsl.ASTNode) (IEvaluable, error) { lib.InternalCodingErrorIf(astNode.Token == nil) diff --git a/pkg/terminals/help/entry.go b/pkg/terminals/help/entry.go index 3b25b58b5..a9148c385 100644 --- a/pkg/terminals/help/entry.go +++ b/pkg/terminals/help/entry.go @@ -16,6 +16,7 @@ import ( "github.com/johnkerl/miller/pkg/dsl/cst" "github.com/johnkerl/miller/pkg/lib" "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/runtime" "github.com/johnkerl/miller/pkg/transformers" ) @@ -114,6 +115,7 @@ func init() { {name: "mlrrc", zaryHandlerFunc: helpMlrrc}, {name: "output-colorization", zaryHandlerFunc: helpOutputColorization}, {name: "type-arithmetic-info", zaryHandlerFunc: helpTypeArithmeticInfo}, + {name: "type-arithmetic-info-extended", zaryHandlerFunc: helpTypeArithmeticInfoExtended}, }, }, { @@ -483,9 +485,18 @@ func helpOutputColorization() { // ---------------------------------------------------------------- func helpTypeArithmeticInfo() { + helpTypeArithmeticInfoAux(false) +} + +func helpTypeArithmeticInfoExtended() { + helpTypeArithmeticInfoAux(true) +} + +func helpTypeArithmeticInfoAux(extended bool) { mlrvals := []*mlrval.Mlrval{ mlrval.FromInt(1), mlrval.FromFloat(2.5), + mlrval.FromBool(true), mlrval.VOID, mlrval.ABSENT, mlrval.FromAnonymousError(), @@ -524,6 +535,70 @@ func helpTypeArithmeticInfo() { fmt.Println() } + if !extended { + return + } + + mlrvals = []*mlrval.Mlrval{ + mlrval.FromBool(true), + mlrval.FromBool(false), + mlrval.FromInt(3), + mlrval.VOID, + mlrval.ABSENT, + mlrval.FromAnonymousError(), + } + + n = len(mlrvals) + + state := runtime.NewEmptyState(cli.DefaultOptions(), false) + + descs := []string{"(&&)", "(||)"} + for k, desc := range descs { + + fmt.Println() + for i := -2; i < n; i++ { + if i == -2 { + fmt.Printf("%-10s |", desc) + } else if i == -1 { + fmt.Printf("%-10s +", "------") + } else if mlrvals[i].IsVoid() { + fmt.Printf("%-10s |", "(empty)") + } else { + fmt.Printf("%-10s |", mlrvals[i].String()) + } + for j := 0; j < n; j++ { + if i == -2 { + if mlrvals[j].IsVoid() { + fmt.Printf("%-10s", "(empty)") + } else { + fmt.Printf(" %-10s", mlrvals[j].String()) + } + } else if i == -1 { + fmt.Printf(" %-10s", "------") + } else { + + inode := cst.BuildMlrvalLiteralNode(mlrvals[i]) + jnode := cst.BuildMlrvalLiteralNode(mlrvals[j]) + + var binary_node cst.IEvaluable + if k == 0 { + binary_node = cst.BuildLogicalANDOperatorNode(inode, jnode) + } else { + binary_node = cst.BuildLogicalOROperatorNode(inode, jnode) + } + + output := binary_node.Evaluate(state) + + if output.IsVoid() { + fmt.Printf(" %-10s", "(empty)") + } else { + fmt.Printf(" %-10s", output.String()) + } + } + } + fmt.Println() + } + } } // ---------------------------------------------------------------- From b1e2438b2820c75b29282f68d6d415ced34e6a43 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 7 Dec 2023 07:56:29 -0500 Subject: [PATCH 085/456] Bump actions/setup-go from 4.1.0 to 5.0.0 (#1436) Bumps [actions/setup-go](https://github.com/actions/setup-go) from 4.1.0 to 5.0.0. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/93397bea11091df50f3d7e59dc26a7711a8bcfbe...0c52d547c9bc32b1aa3301fd7a9cb496313a4491) --- updated-dependencies: - dependency-name: actions/setup-go dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 2cea8a5b1..3ac41af4d 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -18,7 +18,7 @@ jobs: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 - name: Set up Go - uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe + uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 with: go-version: 1.18 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 12d80d58c..db3d59651 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,7 +17,7 @@ jobs: runs-on: ${{ matrix.platform }} steps: - name: Set up Go - uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe + uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 with: go-version: ${{ env.GO_VERSION }} id: go From 8750d0e3c4b0c3980e90b50990b49d8810feca51 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 11 Dec 2023 17:38:13 -0500 Subject: [PATCH 086/456] Update to Go 1.19 (#1441) --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 561454200..d6288da32 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ module github.com/johnkerl/miller // Local development: // replace github.com/johnkerl/lumin => /Users/kerl/git/johnkerl/lumin -go 1.18 +go 1.19 require ( github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb From f1bc1dace9b6e96e64183f4394d6b1a98c57ddd4 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Wed, 13 Dec 2023 17:58:07 -0500 Subject: [PATCH 087/456] neaten --- scripts/mcountlines | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/scripts/mcountlines b/scripts/mcountlines index 7a9cfd90f..b43d8ebe7 100755 --- a/scripts/mcountlines +++ b/scripts/mcountlines @@ -1,14 +1,12 @@ #!/bin/bash wc -l \ - $(find -name \*.go | grep -v pkg/parsing) \ - pkg/parsing/mlr.bnf \ + $(find pkg -name '*.go' | grep -v pkg/parsing) pkg/parsing/mlr.bnf \ | sort -n echo wc -c \ - $(find -name \*.go | grep -v pkg/parsing) \ - pkg/parsing/mlr.bnf \ + $(find pkg -name '*.go' | grep -v pkg/parsing) pkg/parsing/mlr.bnf \ | sort -n \ | tail -n 5 From 9caa24d7f1c175983166dcaad2a06c803b182890 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Wed, 13 Dec 2023 18:43:00 -0500 Subject: [PATCH 088/456] miller 6.10.0 (#1442) * neaten * miller 6.10.0 --- docs/src/data-diving-examples.md | 46 ++++++++++++++++---------------- docs/src/manpage.md | 4 +-- docs/src/manpage.txt | 4 +-- docs/src/reference-verbs.md | 38 +++++++++++++------------- docs/src/two-pass-algorithms.md | 4 +-- man/manpage.txt | 4 +-- man/mlr.1 | 6 ++--- miller.spec | 5 +++- pkg/version/version.go | 2 +- scripts/mcountlines | 6 ++--- 10 files changed, 60 insertions(+), 59 deletions(-) diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 100716ec2..39738f193 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -160,11 +160,11 @@ CITRUS COUNTY 1332.9 79974.9 483785.1 stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
-tiv_2011_tiv_2012_corr  0.9730497632351701
-tiv_2011_tiv_2012_ols_m 0.9835583980337732
-tiv_2011_tiv_2012_ols_b 433854.6428968301
+tiv_2011_tiv_2012_corr  0.9730497632351692
+tiv_2011_tiv_2012_ols_m 0.9835583980337723
+tiv_2011_tiv_2012_ols_b 433854.6428968317
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320204
+tiv_2011_tiv_2012_r2    0.9468258417320189
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199852926
+0.1334180491027861 -0.011319841199866178
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887242  -0.018565536587084836
-orange   square   0.17685855992752933   -0.07104431573805543
- green   circle   0.05764419437577257   0.011795729888018455
-   red   square    0.0557447712489348 -0.0006801456507506415
-yellow triangle    0.0445727377196281   0.024604310103079844
-yellow   square    0.0437917292729612  -0.044621972016306265
-purple   circle   0.03587354936895115    0.13411339541407613
-  blue   square   0.03241153095761152   -0.05350764811965621
-  blue triangle  0.015356427073158612 -0.0006089997461408209
-orange   circle  0.010518953877704181    -0.1627939732927932
-   red triangle   0.00809782571528054    0.01248662135795501
-purple triangle  0.005155190909099739   -0.04505790925621933
-purple   square  -0.02568027696337717   0.057694296479293694
- green   square -0.025776073450284875 -0.0032651732520739014
-orange triangle -0.030456661186085584   -0.13186999819263814
-yellow   circle  -0.06477331572781515     0.0736944981970553
-  blue   circle   -0.1023476190192966  -0.030528539069839333
- green triangle  -0.10901825107358747   -0.04848782060162855
+   red   circle    0.9807984401887236   -0.01856553658708754
+orange   square   0.17685855992752927   -0.07104431573806054
+ green   circle   0.05764419437577255    0.01179572988801509
+   red   square   0.05574477124893523 -0.0006801456507510942
+yellow triangle   0.04457273771962798   0.024604310103081825
+yellow   square   0.04379172927296089   -0.04462197201631237
+purple   circle   0.03587354936895086     0.1341133954140899
+  blue   square   0.03241153095761164  -0.053507648119643196
+  blue triangle  0.015356427073158766 -0.0006089997461435399
+orange   circle  0.010518953877704048   -0.16279397329279383
+   red triangle   0.00809782571528034   0.012486621357942596
+purple triangle  0.005155190909099334  -0.045057909256220656
+purple   square -0.025680276963377404    0.05769429647930396
+ green   square   -0.0257760734502851  -0.003265173252087127
+orange triangle -0.030456661186085785    -0.1318699981926352
+yellow   circle  -0.06477331572781474    0.07369449819706045
+  blue   circle  -0.10234761901929677  -0.030528539069837757
+ green triangle  -0.10901825107358765   -0.04848782060162929
 
diff --git a/docs/src/manpage.md b/docs/src/manpage.md index a1cea93f1..de7ce4b6f 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -50,7 +50,7 @@ MILLER(1) MILLER(1) insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.9.0. + manpage documents mlr 6.10.0. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv @@ -3649,5 +3649,5 @@ MILLER(1) MILLER(1) - 2023-12-02 MILLER(1) + 2023-12-13 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 77ca6785f..e7e3d3582 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -29,7 +29,7 @@ MILLER(1) MILLER(1) insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.9.0. + manpage documents mlr 6.10.0. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv @@ -3628,4 +3628,4 @@ MILLER(1) MILLER(1) - 2023-12-02 MILLER(1) + 2023-12-13 MILLER(1) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 106ad4bf1..89bbc2b71 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -3406,14 +3406,14 @@ fields, optionally categorized by one or more fields. data/medium
-x_y_cov    0.00004257482082749404
-x_y_corr   0.0005042001844473328
-y_y_cov    0.08461122467974005
+x_y_cov    0.000042574820827444476
+x_y_corr   0.0005042001844467462
+y_y_cov    0.08461122467974003
 y_y_corr   1
-x2_xy_cov  0.041883822817793716
-x2_xy_corr 0.6301743420379936
-x2_y2_cov  -0.0003095372596253918
-x2_y2_corr -0.003424908876111875
+x2_xy_cov  0.04188382281779374
+x2_xy_corr 0.630174342037994
+x2_y2_cov  -0.00030953725962542085
+x2_y2_corr -0.0034249088761121966
 
@@ -3422,12 +3422,12 @@ x2_y2_corr -0.003424908876111875
   data/medium
 
-a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
-eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
-wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
-zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
-hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
+a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
+eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
+wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
+zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
+hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
 
Here's an example simple line-fit. The `x` and `y` @@ -3513,11 +3513,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.030097447953594 -upsec_count_pca_b 979.9883413064917 +upsec_count_pca_m -39.03009744795354 +upsec_count_pca_b 979.9883413064914 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.108529196302943 +donesec 25.10852919630297 ## step @@ -3794,9 +3794,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 -var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 -skewness - - 0 -0.0006899591185517494 -0.01784976012013298 +stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 +var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 +skewness - - 0 -0.0006899591185521965 -0.017849760120133784 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index e475aebf3..146f3a81e 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144457 -x_stddev 0.29029251511440074 +x_var 0.08426974433144456 +x_stddev 0.2902925151144007
diff --git a/man/manpage.txt b/man/manpage.txt
index 77ca6785f..e7e3d3582 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -29,7 +29,7 @@ MILLER(1)                                                            MILLER(1)
        insertion-ordered hash map.  This encompasses a variety of data
        formats, including but not limited to the familiar CSV, TSV, and JSON.
        (Miller can handle positionally-indexed data as a special case.) This
-       manpage documents mlr 6.9.0.
+       manpage documents mlr 6.10.0.
 
 1mEXAMPLES0m
        mlr --icsv --opprint cat example.csv
@@ -3628,4 +3628,4 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-12-02                         MILLER(1)
+                                  2023-12-13                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 6bf4d493d..4d5ee4f5c 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2023-12-02
+.\"      Date: 2023-12-13
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2023-12-02" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-12-13" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -47,7 +47,7 @@ on integer-indexed fields: if the natural data structure for the latter is the
 array, then Miller's natural data structure is the insertion-ordered hash map.
 This encompasses a variety of data formats, including but not limited to the
 familiar CSV, TSV, and JSON.  (Miller can handle positionally-indexed data as
-a special case.) This manpage documents mlr 6.9.0.
+a special case.) This manpage documents mlr 6.10.0.
 .SH "EXAMPLES"
 .sp
 
diff --git a/miller.spec b/miller.spec
index ba30cd94d..413f6cdce 100644
--- a/miller.spec
+++ b/miller.spec
@@ -1,6 +1,6 @@
 Summary: Name-indexed data processing tool
 Name: miller
-Version: 6.9.0
+Version: 6.10.0
 Release: 1%{?dist}
 License: BSD
 Source: https://github.com/johnkerl/miller/releases/download/%{version}/miller-%{version}.tar.gz
@@ -36,6 +36,9 @@ make install
 %{_mandir}/man1/mlr.1*
 
 %changelog
+* Wed Dec 13 2023 John Kerl  - 6.10.0-1
+- 6.10.0 release
+
 * Thu Aug 31 2023 John Kerl  - 6.9.0-1
 - 6.9.0 release
 
diff --git a/pkg/version/version.go b/pkg/version/version.go
index 47daeaa6d..1d4cd9cea 100644
--- a/pkg/version/version.go
+++ b/pkg/version/version.go
@@ -4,4 +4,4 @@ package version
 // Nominally things like "6.0.0" for a release, then "6.0.0-dev" in between.
 // This makes it clear that a given build is on the main dev branch, not a
 // particular snapshot tag.
-var STRING string = "6.9.0"
+var STRING string = "6.10.0"
diff --git a/scripts/mcountlines b/scripts/mcountlines
index 7a9cfd90f..b43d8ebe7 100755
--- a/scripts/mcountlines
+++ b/scripts/mcountlines
@@ -1,14 +1,12 @@
 #!/bin/bash
 
 wc -l \
-  $(find -name \*.go | grep -v pkg/parsing) \
-  pkg/parsing/mlr.bnf \
+  $(find pkg -name '*.go' | grep -v pkg/parsing) pkg/parsing/mlr.bnf \
 | sort -n
 
 echo
 wc -c \
-  $(find -name \*.go | grep -v pkg/parsing) \
-  pkg/parsing/mlr.bnf \
+  $(find pkg -name '*.go' | grep -v pkg/parsing) pkg/parsing/mlr.bnf \
 | sort -n \
 | tail -n 5
 

From fbf320d88a9da51fced367569fddef452be59c92 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Wed, 13 Dec 2023 18:46:16 -0500
Subject: [PATCH 089/456] update path in create_release_tarball

---
 create-release-tarball | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/create-release-tarball b/create-release-tarball
index bfe41caff..d4e87529d 100755
--- a/create-release-tarball
+++ b/create-release-tarball
@@ -91,7 +91,7 @@ $tar \
   ./go.mod \
   ./go.sum \
   ./cmd \
-  ./internal \
+  ./pkg \
   ./regression_test.go \
   ./man \
   ./test \

From 34abb952a442a6f3d7b0d707f8558233038d162c Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Wed, 13 Dec 2023 19:00:57 -0500
Subject: [PATCH 090/456] update go 1.18 -> 1.19 in more spots

---
 .github/workflows/go.yml      | 2 +-
 .github/workflows/release.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 3ac41af4d..4bc73d4a4 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -20,7 +20,7 @@ jobs:
     - name: Set up Go
       uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491
       with:
-        go-version: 1.18
+        go-version: 1.19
 
     - name: Build
       run: make build
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index db3d59651..3e69dee50 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -6,7 +6,7 @@ on:
   workflow_dispatch:
 
 env:
-  GO_VERSION: 1.18.10
+  GO_VERSION: 1.19.13
 
 jobs:
   release:

From c680f3316e25cc7517e3f12665b1de990381b79d Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Wed, 13 Dec 2023 19:04:48 -0500
Subject: [PATCH 091/456] add doc note re snag found on last commit

---
 docs/src/how-to-release.md    | 6 ++++++
 docs/src/how-to-release.md.in | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/docs/src/how-to-release.md b/docs/src/how-to-release.md
index d8675cdbc..2833f1417 100644
--- a/docs/src/how-to-release.md
+++ b/docs/src/how-to-release.md
@@ -28,6 +28,12 @@ In this example I am using version 6.2.0 to 6.3.0; of course that will change fo
     * The ordering in this makefile rule is important: the first build creates `mlr`; the second runs `mlr` to create `manpage.txt`; the third includes `manpage.txt` into one of its outputs.
     * Commit and push.
 
+* If Go version is being updated: edit all three of
+
+  * `go.mod`
+  * `.github/workflows/go.yml`
+  * `.github/workflows/release.yml`
+
 * Create the release tarball:
 
     * `make release_tarball`
diff --git a/docs/src/how-to-release.md.in b/docs/src/how-to-release.md.in
index 2754a2bfd..fac0248c1 100644
--- a/docs/src/how-to-release.md.in
+++ b/docs/src/how-to-release.md.in
@@ -12,6 +12,12 @@ In this example I am using version 6.2.0 to 6.3.0; of course that will change fo
     * The ordering in this makefile rule is important: the first build creates `mlr`; the second runs `mlr` to create `manpage.txt`; the third includes `manpage.txt` into one of its outputs.
     * Commit and push.
 
+* If Go version is being updated: edit all three of
+
+  * `go.mod`
+  * `.github/workflows/go.yml`
+  * `.github/workflows/release.yml`
+
 * Create the release tarball:
 
     * `make release_tarball`

From 856131f7a27913bcde63108f1791ab648a0f4e5c Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Wed, 13 Dec 2023 19:31:59 -0500
Subject: [PATCH 092/456] 6.10.0-dev

---
 pkg/version/version.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/version/version.go b/pkg/version/version.go
index 1d4cd9cea..0e6389aae 100644
--- a/pkg/version/version.go
+++ b/pkg/version/version.go
@@ -4,4 +4,4 @@ package version
 // Nominally things like "6.0.0" for a release, then "6.0.0-dev" in between.
 // This makes it clear that a given build is on the main dev branch, not a
 // particular snapshot tag.
-var STRING string = "6.10.0"
+var STRING string = "6.10.0-dev"

From 4e60ef58ae84cae97c930469f18b7fa8d7cfec87 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Wed, 13 Dec 2023 20:51:37 -0500
Subject: [PATCH 093/456] release docs including 6.9.0 and 6.10.0

---
 docs/src/release-docs.md    | 2 ++
 docs/src/release-docs.md.in | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/docs/src/release-docs.md b/docs/src/release-docs.md
index 4b9380d72..479be3f38 100644
--- a/docs/src/release-docs.md
+++ b/docs/src/release-docs.md
@@ -24,6 +24,8 @@ If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, witho
 | Release | Docs                                                                | Release notes |
 |---------|---------------------------------------------------------------------|---------------|
 main      | [main branch](https://miller.readthedocs.io/en/main)                | N/A |
+6.10.0     | [Miller 6.10.0](https://miller.readthedocs.io/en/6.10.0)           | [Add --files option; bugfixes; use Go 1.110](https://github.com/johnkerl/miller/releases/tag/v6.10.0) |
+6.9.0     | [Miller 6.9.0](https://miller.readthedocs.io/en/6.9.0)              | [Nanosecond timestamps, ZSTD compression, improved data-error handling, and more](https://github.com/johnkerl/miller/releases/tag/v6.9.0) |
 6.8.0     | [Miller 6.8.0](https://miller.readthedocs.io/en/6.8.0)              | [New case verb, index DSL function, and more](https://github.com/johnkerl/miller/releases/tag/v6.8.0) |
 6.7.0     | [Miller 6.7.0](https://miller.readthedocs.io/en/6.7.0)              | [New leftpad/rightpad DSL functions, unspace verb, and more](https://github.com/johnkerl/miller/releases/tag/v6.7.0) |
 6.6.0     | [Miller 6.6.0](https://miller.readthedocs.io/en/6.6.0)              | [Bugfixes and unspace verb](https://github.com/johnkerl/miller/releases/tag/v6.6.0) |
diff --git a/docs/src/release-docs.md.in b/docs/src/release-docs.md.in
index 41bd54025..07dc91719 100644
--- a/docs/src/release-docs.md.in
+++ b/docs/src/release-docs.md.in
@@ -8,6 +8,8 @@ If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, witho
 | Release | Docs                                                                | Release notes |
 |---------|---------------------------------------------------------------------|---------------|
 main      | [main branch](https://miller.readthedocs.io/en/main)                | N/A |
+6.10.0     | [Miller 6.10.0](https://miller.readthedocs.io/en/6.10.0)           | [Add --files option; bugfixes; use Go 1.110](https://github.com/johnkerl/miller/releases/tag/v6.10.0) |
+6.9.0     | [Miller 6.9.0](https://miller.readthedocs.io/en/6.9.0)              | [Nanosecond timestamps, ZSTD compression, improved data-error handling, and more](https://github.com/johnkerl/miller/releases/tag/v6.9.0) |
 6.8.0     | [Miller 6.8.0](https://miller.readthedocs.io/en/6.8.0)              | [New case verb, index DSL function, and more](https://github.com/johnkerl/miller/releases/tag/v6.8.0) |
 6.7.0     | [Miller 6.7.0](https://miller.readthedocs.io/en/6.7.0)              | [New leftpad/rightpad DSL functions, unspace verb, and more](https://github.com/johnkerl/miller/releases/tag/v6.7.0) |
 6.6.0     | [Miller 6.6.0](https://miller.readthedocs.io/en/6.6.0)              | [Bugfixes and unspace verb](https://github.com/johnkerl/miller/releases/tag/v6.6.0) |

From b5dbd7a7518671aba4682ba586d6be756a827728 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 15 Dec 2023 09:42:47 -0500
Subject: [PATCH 094/456] Bump actions/upload-artifact from 3.1.3 to 4.0.0
 (#1445)

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3.1.3 to 4.0.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/a8a3f3ad30e3422c9c7b888a15615d19a852ae32...c7d193f32edcb7bfad88892161225aeda64e9392)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/go.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 4bc73d4a4..b318f6bb9 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -41,7 +41,7 @@ jobs:
       if: matrix.os == 'windows-latest'
       run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}}
 
-    - uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
+    - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392
       with:
         name: mlr-${{matrix.os}}
         path: bin/${{matrix.os}}/*

From 1ae670fd4a2bd740324408f675be347844d33b30 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sun, 17 Dec 2023 12:46:28 -0500
Subject: [PATCH 095/456] Rename internal regex functions (#1446)

---
 docs/src/manpage.md                         |  37 +-
 docs/src/manpage.txt                        |  37 +-
 docs/src/reference-dsl-builtin-functions.md |  18 +-
 man/manpage.txt                             |  37 +-
 man/mlr.1                                   |  51 ++-
 pkg/bifs/regex.go                           |   6 +-
 pkg/dsl/cst/leaves.go                       |   2 +-
 pkg/input/record_reader.go                  |   4 +-
 pkg/input/record_reader_xtab.go             |   4 +-
 pkg/lib/regex.go                            | 437 +++++++++++---------
 pkg/lib/regex_test.go                       |   8 +-
 pkg/runtime/state.go                        |   6 +-
 pkg/transformers/merge_fields.go            |   2 +-
 pkg/transformers/rename.go                  |   4 +-
 14 files changed, 395 insertions(+), 258 deletions(-)

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index de7ce4b6f..19cb2de07 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -220,18 +220,19 @@ MILLER(1)                                                            MILLER(1)
        is_numeric is_present is_string joink joinkv joinv json_parse json_stringify
        kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec
        localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect
-       mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub
-       nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
-       percentiles pow qnorm reduce regextract regextract_or_else rightpad round
-       roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
-       select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-       splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
-       strfntime_local strftime strftime_local string strip strlen strpntime
-       strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2
-       sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate
-       typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement
-       urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - .
-       .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+       mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul
+       mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os
+       percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad
+       round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate
+       sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort
+       sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub
+       stddev strfntime strfntime_local strftime strftime_local string strip strlen
+       strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
+       sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
+       truncate typeof unflatten unformat unformatx upntime uptime urand urand32
+       urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % &
+       && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ |
+       || ~
 
 1mCOMMENTS-IN-DATA FLAGS0m
        Miller lets you put comments in your data, such as
@@ -2650,6 +2651,16 @@ MILLER(1)                                                            MILLER(1)
    1mmapsum0m
         (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'.
 
+   1mmatch0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
+   1mmatchx0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
    1mmax0m
         (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
 
@@ -3649,5 +3660,5 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-12-13                         MILLER(1)
+                                  2023-12-16                         MILLER(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index e7e3d3582..7f3a122af 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -199,18 +199,19 @@ MILLER(1) MILLER(1) is_numeric is_present is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect - mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub - nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile - percentiles pow qnorm reduce regextract regextract_or_else rightpad round - roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime - select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita - splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime - strfntime_local strftime strftime_local string strip strlen strpntime - strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 - sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate - typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement - urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - . - .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul + mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os + percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad + round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate + sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort + sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub + stddev strfntime strfntime_local strftime strftime_local string strip strlen + strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 + sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper + truncate typeof unflatten unformat unformatx upntime uptime urand urand32 + urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % & + && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | + || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -2629,6 +2630,16 @@ MILLER(1) MILLER(1) 1mmapsum0m (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'. + 1mmatch0m + (class=string #args=2) TODO: WRITE ME + Example: + TODO: WRITE ME + + 1mmatchx0m + (class=string #args=2) TODO: WRITE ME + Example: + TODO: WRITE ME + 1mmax0m (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. @@ -3628,4 +3639,4 @@ MILLER(1) MILLER(1) - 2023-12-13 MILLER(1) + 2023-12-16 MILLER(1) diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 8c3b49640..d391e8341 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -75,7 +75,7 @@ is 2. Unary operators such as `!` and `~` show argument-count of 1; the ternary * [**Higher-order-functions functions**](#higher-order-functions-functions): [any](#any), [apply](#apply), [every](#every), [fold](#fold), [reduce](#reduce), [select](#select), [sort](#sort). * [**Math functions**](#math-functions): [abs](#abs), [acos](#acos), [acosh](#acosh), [asin](#asin), [asinh](#asinh), [atan](#atan), [atan2](#atan2), [atanh](#atanh), [cbrt](#cbrt), [ceil](#ceil), [cos](#cos), [cosh](#cosh), [erf](#erf), [erfc](#erfc), [exp](#exp), [expm1](#expm1), [floor](#floor), [invqnorm](#invqnorm), [log](#log), [log10](#log10), [log1p](#log1p), [logifit](#logifit), [max](#max), [min](#min), [qnorm](#qnorm), [round](#round), [roundm](#roundm), [sgn](#sgn), [sin](#sin), [sinh](#sinh), [sqrt](#sqrt), [tan](#tan), [tanh](#tanh), [urand](#urand), [urand32](#urand32), [urandelement](#urandelement), [urandint](#urandint), [urandrange](#urandrange). * [**Stats functions**](#stats-functions): [antimode](#antimode), [count](#count), [distinct_count](#distinct_count), [kurtosis](#kurtosis), [maxlen](#maxlen), [mean](#mean), [meaneb](#meaneb), [median](#median), [minlen](#minlen), [mode](#mode), [null_count](#null_count), [percentile](#percentile), [percentiles](#percentiles), [skewness](#skewness), [sort_collection](#sort_collection), [stddev](#stddev), [sum](#sum), [sum2](#sum2), [sum3](#sum3), [sum4](#sum4), [variance](#variance). -* [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [contains](#contains), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). +* [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [contains](#contains), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [match](#match), [matchx](#matchx), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). * [**System functions**](#system-functions): [exec](#exec), [hostname](#hostname), [os](#os), [system](#system), [version](#version). * [**Time functions**](#time-functions): [dhms2fsec](#dhms2fsec), [dhms2sec](#dhms2sec), [fsec2dhms](#fsec2dhms), [fsec2hms](#fsec2hms), [gmt2localtime](#gmt2localtime), [gmt2nsec](#gmt2nsec), [gmt2sec](#gmt2sec), [hms2fsec](#hms2fsec), [hms2sec](#hms2sec), [localtime2gmt](#localtime2gmt), [localtime2nsec](#localtime2nsec), [localtime2sec](#localtime2sec), [nsec2gmt](#nsec2gmt), [nsec2gmtdate](#nsec2gmtdate), [nsec2localdate](#nsec2localdate), [nsec2localtime](#nsec2localtime), [sec2dhms](#sec2dhms), [sec2gmt](#sec2gmt), [sec2gmtdate](#sec2gmtdate), [sec2hms](#sec2hms), [sec2localdate](#sec2localdate), [sec2localtime](#sec2localtime), [strfntime](#strfntime), [strfntime_local](#strfntime_local), [strftime](#strftime), [strftime_local](#strftime_local), [strpntime](#strpntime), [strpntime_local](#strpntime_local), [strptime](#strptime), [strptime_local](#strptime_local), [sysntime](#sysntime), [systime](#systime), [systimeint](#systimeint), [upntime](#upntime), [uptime](#uptime). * [**Typing functions**](#typing-functions): [asserting_absent](#asserting_absent), [asserting_array](#asserting_array), [asserting_bool](#asserting_bool), [asserting_boolean](#asserting_boolean), [asserting_empty](#asserting_empty), [asserting_empty_map](#asserting_empty_map), [asserting_error](#asserting_error), [asserting_float](#asserting_float), [asserting_int](#asserting_int), [asserting_map](#asserting_map), [asserting_nonempty_map](#asserting_nonempty_map), [asserting_not_array](#asserting_not_array), [asserting_not_empty](#asserting_not_empty), [asserting_not_map](#asserting_not_map), [asserting_not_null](#asserting_not_null), [asserting_null](#asserting_null), [asserting_numeric](#asserting_numeric), [asserting_present](#asserting_present), [asserting_string](#asserting_string), [is_absent](#is_absent), [is_array](#is_array), [is_bool](#is_bool), [is_boolean](#is_boolean), [is_empty](#is_empty), [is_empty_map](#is_empty_map), [is_error](#is_error), [is_float](#is_float), [is_int](#is_int), [is_map](#is_map), [is_nan](#is_nan), [is_nonempty_map](#is_nonempty_map), [is_not_array](#is_not_array), [is_not_empty](#is_not_empty), [is_not_map](#is_not_map), [is_not_null](#is_not_null), [is_null](#is_null), [is_numeric](#is_numeric), [is_present](#is_present), [is_string](#is_string), [typeof](#typeof). @@ -1296,6 +1296,22 @@ lstrip (class=string #args=1) Strip leading whitespace from string. +### match +
+match  (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+
+ + +### matchx +
+matchx  (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+
+ + ### regextract
 regextract  (class=string #args=2) Extracts a substring (the first, if there are multiple matches), matching a regular expression, from the input. Does not use capture groups; see also the =~ operator which does.
diff --git a/man/manpage.txt b/man/manpage.txt
index e7e3d3582..7f3a122af 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -199,18 +199,19 @@ MILLER(1)                                                            MILLER(1)
        is_numeric is_present is_string joink joinkv joinv json_parse json_stringify
        kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec
        localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect
-       mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub
-       nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
-       percentiles pow qnorm reduce regextract regextract_or_else rightpad round
-       roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
-       select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-       splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
-       strfntime_local strftime strftime_local string strip strlen strpntime
-       strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2
-       sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate
-       typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement
-       urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - .
-       .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+       mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul
+       mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os
+       percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad
+       round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate
+       sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort
+       sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub
+       stddev strfntime strfntime_local strftime strftime_local string strip strlen
+       strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
+       sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
+       truncate typeof unflatten unformat unformatx upntime uptime urand urand32
+       urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % &
+       && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ |
+       || ~
 
 1mCOMMENTS-IN-DATA FLAGS0m
        Miller lets you put comments in your data, such as
@@ -2629,6 +2630,16 @@ MILLER(1)                                                            MILLER(1)
    1mmapsum0m
         (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'.
 
+   1mmatch0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
+   1mmatchx0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
    1mmax0m
         (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
 
@@ -3628,4 +3639,4 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-12-13                         MILLER(1)
+                                  2023-12-16                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 4d5ee4f5c..4f0644ed7 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2023-12-13
+.\"      Date: 2023-12-16
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2023-12-13" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-12-16" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -246,18 +246,19 @@ is_nonempty_map is_not_array is_not_empty is_not_map is_not_null is_null
 is_numeric is_present is_string joink joinkv joinv json_parse json_stringify
 kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec
 localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect
-mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub
-nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
-percentiles pow qnorm reduce regextract regextract_or_else rightpad round
-roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
-select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
-strfntime_local strftime strftime_local string strip strlen strpntime
-strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2
-sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate
-typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement
-urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - .
-\&.* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul
+mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os
+percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad
+round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate
+sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort
+sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub
+stddev strfntime strfntime_local strftime strftime_local string strip strlen
+strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
+sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
+truncate typeof unflatten unformat unformatx upntime uptime urand urand32
+urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % &
+&& * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ |
+|| ~
 .fi
 .if n \{\
 .RE
@@ -3938,6 +3939,28 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906"
 .fi
 .if n \{\
 .RE
+.SS "match"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+.fi
+.if n \{\
+.RE
+.SS "matchx"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+.fi
+.if n \{\
+.RE
 .SS "max"
 .if n \{\
 .RS 0
diff --git a/pkg/bifs/regex.go b/pkg/bifs/regex.go
index 52cab9ac5..74c0840f6 100644
--- a/pkg/bifs/regex.go
+++ b/pkg/bifs/regex.go
@@ -81,7 +81,7 @@ func BIF_sub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval {
 	sregex := input2.AcquireStringValue()
 	replacement := input3.AcquireStringValue()
 
-	stringOutput := lib.RegexSub(input, sregex, replacement)
+	stringOutput := lib.RegexStringSub(input, sregex, replacement)
 	return mlrval.FromString(stringOutput)
 }
 
@@ -111,7 +111,7 @@ func BIF_gsub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval {
 	sregex := input2.AcquireStringValue()
 	replacement := input3.AcquireStringValue()
 
-	stringOutput := lib.RegexGsub(input, sregex, replacement)
+	stringOutput := lib.RegexStringGsub(input, sregex, replacement)
 	return mlrval.FromString(stringOutput)
 }
 
@@ -129,7 +129,7 @@ func BIF_string_matches_regexp(input1, input2 *mlrval.Mlrval) (retval *mlrval.Ml
 		return mlrval.FromNotStringError("=~", input2), nil
 	}
 
-	boolOutput, captures := lib.RegexMatches(input1string, input2.AcquireStringValue())
+	boolOutput, captures := lib.RegexStringMatchWithCaptures(input1string, input2.AcquireStringValue())
 	return mlrval.FromBool(boolOutput), captures
 }
 
diff --git a/pkg/dsl/cst/leaves.go b/pkg/dsl/cst/leaves.go
index 08b3200a9..c0b4d8875 100644
--- a/pkg/dsl/cst/leaves.go
+++ b/pkg/dsl/cst/leaves.go
@@ -266,7 +266,7 @@ func (root *RootNode) BuildStringLiteralNode(literal string) IEvaluable {
 	// RegexLiteralNode.  See also https://github.com/johnkerl/miller/issues/297.
 	literal = lib.UnbackslashStringLiteral(literal)
 
-	hasCaptures, replacementCaptureMatrix := lib.RegexReplacementHasCaptures(literal)
+	hasCaptures, replacementCaptureMatrix := lib.ReplacementHasCaptures(literal)
 	if !hasCaptures {
 		return &StringLiteralNode{
 			literal: mlrval.FromString(literal),
diff --git a/pkg/input/record_reader.go b/pkg/input/record_reader.go
index 280201936..096060e62 100644
--- a/pkg/input/record_reader.go
+++ b/pkg/input/record_reader.go
@@ -158,7 +158,7 @@ type tIPSRegexSplitter struct {
 }
 
 func (s *tIPSRegexSplitter) Split(input string) []string {
-	return lib.RegexSplitString(s.ipsRegex, input, 2)
+	return lib.RegexCompiledSplitString(s.ipsRegex, input, 2)
 }
 
 // IFieldSplitter splits a string into pieces, e.g. for IFS.
@@ -193,5 +193,5 @@ type tIFSRegexSplitter struct {
 }
 
 func (s *tIFSRegexSplitter) Split(input string) []string {
-	return lib.RegexSplitString(s.ifsRegex, input, -1)
+	return lib.RegexCompiledSplitString(s.ifsRegex, input, -1)
 }
diff --git a/pkg/input/record_reader_xtab.go b/pkg/input/record_reader_xtab.go
index 0cfc74b25..74d8dac41 100644
--- a/pkg/input/record_reader_xtab.go
+++ b/pkg/input/record_reader_xtab.go
@@ -304,7 +304,7 @@ type tXTABIPSSplitter struct {
 // which we need to produce just a pair of items -- a key and a value -- delimited by one or more
 // IPS. For exaemple, with IPS being a space, in 'abc     123' we need to get key 'abc' and value
 // '123'; for 'abc    123 456' we need key 'abc' and value '123 456'.  It's super-elegant to simply
-// regex-split the line like 'kv = lib.RegexSplitString(reader.readerOptions.IPSRegex, line, 2)' --
+// regex-split the line like 'kv = lib.RegexCompiledSplitString(reader.readerOptions.IPSRegex, line, 2)' --
 // however, that's 3x slower than the current implementation. It turns out regexes are great
 // but we should use them only when we must, since they are expensive.
 func (s *tXTABIPSSplitter) Split(input string) (key, value string, err error) {
@@ -358,7 +358,7 @@ type tXTABIPSRegexSplitter struct {
 }
 
 func (s *tXTABIPSRegexSplitter) Split(input string) (key, value string, err error) {
-	kv := lib.RegexSplitString(s.ipsRegex, input, 2)
+	kv := lib.RegexCompiledSplitString(s.ipsRegex, input, 2)
 	if len(kv) == 0 {
 		return "", "", fmt.Errorf("internal coding error in XTAB reader")
 	} else if len(kv) == 1 {
diff --git a/pkg/lib/regex.go b/pkg/lib/regex.go
index 3bab04036..cabbc1510 100644
--- a/pkg/lib/regex.go
+++ b/pkg/lib/regex.go
@@ -1,5 +1,5 @@
 // ================================================================
-// Support for regexes in Miller.
+// Support for regular expressions in Miller.
 //
 // * By and large we use the Go library.
 //
@@ -13,17 +13,24 @@
 //       $y = "\2:\1";
 //     }
 //   where the '=~' sets the captures and the "\2:\1" uses them.  (Note that
-//   https://github.com/johnkerl/miller/issues/388 has a better suggestion
-//   which would make the captures explicit as variables, rather than implicit
-//   within CST state -- regardless, the current syntax will still be supported
-//   for backward compatibility and so is here to stay.) Here we make use of Go
-//   regexp-library functions to write to, and then later interpolate from, a
-//   captures array which is stored within CST state. (See the `runtime.State`
-//   object.)
+//   https://github.com/johnkerl/miller/issues/388 has a better suggestion which would make the
+//   captures explicit as variables, rather than implicit within CST state: this is implemented by
+//   the `match` and `matchx` DSL functions.  Regardless, the `=~` syntax will still be supported
+//   for backward compatibility and so is here to stay.) Here we make use of Go regexp-library
+//   functions to write to, and then later interpolate from, a captures array which is stored within
+//   CST state. (See the `runtime.State` object.)
 //
 // * "\0" is for a full match; "\1" .. "\9" are for submatch cqptures. E.g.
 //   if $x is "foobarbaz" and the regex is "foo(.)(..)baz", then "\0" is
 //   "foobarbaz", "\1" is "b", "\2" is "ar", and "\3".."\9" are "".
+//
+// * Naming:
+//
+//   o "regexp" and "Regexp" are used for the Go library and its data structure, respectively;
+//
+//   o "regex" is used for regular-expression strings following Miller's idiosyncratic syntax and
+//     semantics as described above.
+//
 // ================================================================
 
 package lib
@@ -34,6 +41,7 @@ import (
 	"os"
 	"regexp"
 	"strings"
+	"sync"
 )
 
 // captureDetector is used to see if a string literal interpolates previous
@@ -44,20 +52,54 @@ var captureDetector = regexp.MustCompile(`\\[0-9]`)
 // "\2:\1" so they don't need to be recomputed on every record.
 var captureSplitter = regexp.MustCompile(`(\\[0-9])`)
 
-// CompileMillerRegex wraps Go regex-compile with some Miller-specific syntax
-// which predate the port of Miller from C to Go.  Miller regexes use a final
-// 'i' to indicate case-insensitivity; Go regexes use an initial "(?i)".
+// See regexpCompileCached
+var regexpCache map[string]*regexp.Regexp
+
+const cacheMaxSize = 1000
+
+var cacheMutex sync.Mutex
+
+// regexpCompileCached keeps a cache of compiled regexes, so that the caller has the flexibility to
+// only pass in strings while getting the benefits of compilation avoidance.
 //
-// (See also mlr.bnf where we specify which things can be backslash-escaped
-// without a syntax error at parse time.)
+// Regarding cache size: in nominal use, regexp strings are within Miller DSL code statements, and
+// there will be a handful. These will all get re-used after their first application, and the cache
+// will remain bounded by the size of the user's DSL code. However, it is possible to have regex
+// strings contained within Miller record-field data.
 //
-// * If the regex_string is of the form a.*b, compiles it case-sensisitively.
-// * If the regex_string is of the form "a.*b", compiles a.*b case-sensisitively.
+// We could solve this by using an LRU cache. However, for simplicity, we limit the number of
+// cached compiles, and for any extras that appear during record processing, we simply recompile
+// each time.
+func regexpCompileCached(s string) (*regexp.Regexp, error) {
+	if len(regexpCache) > cacheMaxSize {
+		return regexp.Compile(s)
+	}
+	r, err := regexp.Compile(s)
+	if err == nil {
+		cacheMutex.Lock()
+		if regexpCache == nil {
+			regexpCache = make(map[string]*regexp.Regexp)
+		}
+		regexpCache[s] = r
+		cacheMutex.Unlock()
+	}
+	return r, err
+}
+
+// CompileMillerRegex wraps Go regex-compile with some Miller-specific syntax which predates the
+// port of Miller from C to Go.  Miller regexes use a final 'i' to indicate case-insensitivity; Go
+// regexes use an initial "(?i)".
+//
+// (See also mlr.bnf where we specify which things can be backslash-escaped without a syntax error
+// at parse time.)
+//
+// * If the regex_string is of the form a.*b, compiles it case-sensitively.
+// * If the regex_string is of the form "a.*b", compiles a.*b case-sensitively.
 // * If the regex_string is of the form "a.*b"i, compiles a.*b case-insensitively.
 func CompileMillerRegex(regexString string) (*regexp.Regexp, error) {
 	n := len(regexString)
 	if n < 2 {
-		return regexp.Compile(regexString)
+		return regexpCompileCached(regexString)
 	}
 
 	// TODO: rethink this. This will strip out things people have entered, e.g. "\"...\"".
@@ -68,20 +110,20 @@ func CompileMillerRegex(regexString string) (*regexp.Regexp, error) {
 	// literals) and from verbs (like cut -r or having-fields).
 
 	if strings.HasPrefix(regexString, "\"") && strings.HasSuffix(regexString, "\"") {
-		return regexp.Compile(regexString[1 : n-1])
+		return regexpCompileCached(regexString[1 : n-1])
 	}
 	if strings.HasPrefix(regexString, "/") && strings.HasSuffix(regexString, "/") {
-		return regexp.Compile(regexString[1 : n-1])
+		return regexpCompileCached(regexString[1 : n-1])
 	}
 
 	if strings.HasPrefix(regexString, "\"") && strings.HasSuffix(regexString, "\"i") {
-		return regexp.Compile("(?i)" + regexString[1:n-2])
+		return regexpCompileCached("(?i)" + regexString[1:n-2])
 	}
 	if strings.HasPrefix(regexString, "/") && strings.HasSuffix(regexString, "/i") {
-		return regexp.Compile("(?i)" + regexString[1:n-2])
+		return regexpCompileCached("(?i)" + regexString[1:n-2])
 	}
 
-	return regexp.Compile(regexString)
+	return regexpCompileCached(regexString)
 }
 
 // CompileMillerRegexOrDie wraps CompileMillerRegex. Usually in Go we want to
@@ -110,7 +152,7 @@ func CompileMillerRegexesOrDie(regexStrings []string) []*regexp.Regexp {
 // In Go as in all languages I'm aware of with a string-split, "a,b,c" splits
 // on "," to ["a", "b", "c" and "a" splits to ["a"], both of which are fine --
 // but "" splits to [""] when I wish it were []. This function does the latter.
-func RegexSplitString(regex *regexp.Regexp, input string, n int) []string {
+func RegexCompiledSplitString(regex *regexp.Regexp, input string, n int) []string {
 	if input == "" {
 		return make([]string, 0)
 	} else {
@@ -118,193 +160,42 @@ func RegexSplitString(regex *regexp.Regexp, input string, n int) []string {
 	}
 }
 
-// MakeEmptyRegexCaptures is for initial CST state at the start of executing
-// the DSL expression for the current record.  Even if '$x =~ "(..)_(...)" set
-// "\1" and "\2" on the previous record, at start of processing for the current
-// record we need to start with a clean slate.
-func MakeEmptyRegexCaptures() []string {
-	return nil
-}
-
-// RegexReplacementHasCaptures is used by the CST builder to see if
-// string-literal is like "foo bar" or "foo \1 bar" -- in the latter case it
-// needs to retain the compiled offsets-matrix information.
-func RegexReplacementHasCaptures(
-	replacement string,
-) (
-	hasCaptures bool,
-	matrix [][]int,
-) {
-	if captureDetector.MatchString(replacement) {
-		return true, captureSplitter.FindAllSubmatchIndex([]byte(replacement), -1)
-	} else {
-		return false, nil
-	}
-}
-
-// RegexMatches implements the =~ DSL operator. The captures are stored in DSL
-// state and may be used by a DSL statement after the =~. For example, in
-//
-//	sub($a, "(..)_(...)", "\1:\2")
-//
-// the replacement string is an argument to sub and therefore the captures are
-// confined to the implementation of the sub function.  Similarly for gsub. But
-// for the match operator, people can do
-//
-//	if ($x =~ "(..)_(...)") {
-//	  ... other lines of code ...
-//	  $y = "\2:\1"
-//	}
-//
-// and the =~ callsite doesn't know if captures will be used or not. So,
-// RegexMatches always returns the captures array. It is stored within the CST
-// state.
-func RegexMatches(
-	input string,
-	sregex string,
-) (
-	matches bool,
-	capturesOneUp []string,
-) {
-	regex := CompileMillerRegexOrDie(sregex)
-	return RegexMatchesCompiled(input, regex)
-}
-
-// RegexMatchesCompiled is the implementation for the =~ operator.  Without
-// Miller-style regex captures this would a simple one-line
-// regex.MatchString(input). However, we return the captures array for the
-// benefit of subsequent references to "\0".."\9".
-func RegexMatchesCompiled(
-	input string,
-	regex *regexp.Regexp,
-) (bool, []string) {
-	matrix := regex.FindAllSubmatchIndex([]byte(input), -1)
-	if matrix == nil || len(matrix) == 0 {
-		// Set all captures to ""
-		return false, make([]string, 10)
-	}
-
-	// "\0" .. "\9"
-	captures := make([]string, 10)
-
-	// If there are multiple matches -- e.g. input is
-	//
-	//   "...ab_cde...fg_hij..."
-	//
-	// with regex
-	//
-	//   "(..)_(...)"
-	//
-	// -- then we only consider the first match: boolean return value is true
-	// (the input string matched the regex), and the captures array will map
-	// "\1" to "ab" and "\2" to "cde".
-	row := matrix[0]
-	n := len(row)
-
-	// Example return value from FindAllSubmatchIndex with input
-	// "...ab_cde...fg_hij..." and regex "(..)_(...)":
-	//
-	// Matrix is [][]int{
-	//   []int{3, 9, 3, 5, 6, 9},
-	//   []int{12, 18, 12, 14, 15, 18},
-	// }
-	//
-	// As noted above we look at only the first row.
-	//
-	// * 3-9 is for the entire match "ab_cde"
-	// * 3-5 is for the first capture "ab"
-	// * 6-9 is for the second capture "cde"
-
-	di := 0
-	for si := 0; si < n && di <= 9; si += 2 {
-		start := row[si]
-		end := row[si+1]
-		if start >= 0 && end >= 0 {
-			captures[di] = input[start:end]
-		}
-		di += 1
-	}
-
-	return true, captures
-}
-
-// InterpolateCaptures example:
-//   - Input $x is "ab_cde"
-//   - DSL expression
-//     if ($x =~ "(..)_(...)") {
-//     ... other lines of code ...
-//     $y = "\2:\1";
-//     }
-//   - InterpolateCaptures is used on the evaluation of "\2:\1"
-//   - replacementString is "\2:\1"
-//   - replacementMatrix contains precomputed/cached offsets for the "\2" and
-//     "\1" substrings within "\2:\1"
-//   - captures has slot 0 being "ab_cde" (for "\0"), slot 1 being "ab" (for "\1"),
-//     slot 2 being "cde" (for "\2"), and slots 3-9 being "".
-func InterpolateCaptures(
-	replacementString string,
-	replacementMatrix [][]int,
-	captures []string,
-) string {
-	if replacementMatrix == nil || captures == nil {
-		return replacementString
-	}
-	var buffer bytes.Buffer
-
-	nonMatchStartIndex := 0
-
-	for _, row := range replacementMatrix {
-		start := row[0]
-		buffer.WriteString(replacementString[nonMatchStartIndex:row[0]])
-
-		// Map "\0".."\9" to integer index 0..9
-		index := replacementString[start+1] - '0'
-		buffer.WriteString(captures[index])
-
-		nonMatchStartIndex = row[1]
-	}
-
-	buffer.WriteString(replacementString[nonMatchStartIndex:])
-
-	return buffer.String()
-}
-
-// RegexSub implements the sub DSL function.
-func RegexSub(
+// RegexStringSub implements the sub DSL function.
+func RegexStringSub(
 	input string,
 	sregex string,
 	replacement string,
 ) string {
 	regex := CompileMillerRegexOrDie(sregex)
-	_, replacementCaptureMatrix := RegexReplacementHasCaptures(replacement)
-	return RegexSubCompiled(input, regex, replacement, replacementCaptureMatrix)
+	_, replacementCaptureMatrix := ReplacementHasCaptures(replacement)
+	return RegexCompiledSub(input, regex, replacement, replacementCaptureMatrix)
 }
 
-// RegexSubCompiled is the same as RegexSub but with compiled regex and
+// RegexCompiledSub is the same as RegexStringSub but with compiled regex and
 // replacement strings.
-func RegexSubCompiled(
+func RegexCompiledSub(
 	input string,
 	regex *regexp.Regexp,
 	replacement string,
 	replacementCaptureMatrix [][]int,
 ) string {
-	return regexSubGsubCompiled(input, regex, replacement, replacementCaptureMatrix, true)
+	return regexCompiledSubOrGsub(input, regex, replacement, replacementCaptureMatrix, true)
 }
 
-// RegexGsub implements the gsub DSL function.
-func RegexGsub(
+// RegexStringGsub implements the `gsub` DSL function.
+func RegexStringGsub(
 	input string,
 	sregex string,
 	replacement string,
 ) string {
 	regex := CompileMillerRegexOrDie(sregex)
-	_, replacementCaptureMatrix := RegexReplacementHasCaptures(replacement)
-	return regexSubGsubCompiled(input, regex, replacement, replacementCaptureMatrix, false)
+	_, replacementCaptureMatrix := ReplacementHasCaptures(replacement)
+	return regexCompiledSubOrGsub(input, regex, replacement, replacementCaptureMatrix, false)
 }
 
-// regexSubGsubCompiled is the implementation for sub/gsub with compilex regex
+// regexCompiledSubOrGsub is the implementation for `sub`/`gsub` with compilex regex
 // and replacement strings.
-func regexSubGsubCompiled(
+func regexCompiledSubOrGsub(
 	input string,
 	regex *regexp.Regexp,
 	replacement string,
@@ -384,3 +275,177 @@ func regexSubGsubCompiled(
 	buffer.WriteString(input[nonMatchStartIndex:])
 	return buffer.String()
 }
+
+// RegexStringMatchSimple is for simple boolean return without any substring captures.
+func RegexStringMatchSimple(
+	input string,
+	sregex string,
+) bool {
+	regex := CompileMillerRegexOrDie(sregex)
+	return RegexCompiledMatchSimple(input, regex)
+}
+
+// RegexCompiledMatchSimple is for simple boolean return without any substring captures.
+func RegexCompiledMatchSimple(
+	input string,
+	regex *regexp.Regexp,
+) bool {
+	return regex.Match([]byte(input))
+}
+
+// RegexStringMatchWithCaptures implements the =~ DSL operator. The captures are stored in DSL
+// state and may be used by a DSL statement after the =~. For example, in
+//
+//	sub($a, "(..)_(...)", "\1:\2")
+//
+// the replacement string is an argument to sub and therefore the captures are
+// confined to the implementation of the sub function.  Similarly for gsub. But
+// for the match operator, people can do
+//
+//	if ($x =~ "(..)_(...)") {
+//	  ... other lines of code ...
+//	  $y = "\2:\1"
+//	}
+//
+// and the =~ callsite doesn't know if captures will be used or not. So,
+// RegexStringMatchWithCaptures always returns the captures array. It is stored within the CST
+// state.
+func RegexStringMatchWithCaptures(
+	input string,
+	sregex string,
+) (
+	matches bool,
+	capturesOneUp []string,
+) {
+	regex := CompileMillerRegexOrDie(sregex)
+	return RegexCompiledMatchWithCaptures(input, regex)
+}
+
+// RegexCompiledMatchWithCaptures is the implementation for the =~ operator.  Without
+// Miller-style regex captures this would a simple one-line
+// regex.MatchString(input). However, we return the captures array for the
+// benefit of subsequent references to "\0".."\9".
+func RegexCompiledMatchWithCaptures(
+	input string,
+	regex *regexp.Regexp,
+) (bool, []string) {
+	matrix := regex.FindAllSubmatchIndex([]byte(input), -1)
+	if matrix == nil || len(matrix) == 0 {
+		// Set all captures to ""
+		return false, make([]string, 10)
+	}
+
+	// "\0" .. "\9"
+	captures := make([]string, 10)
+
+	// If there are multiple matches -- e.g. input is
+	//
+	//   "...ab_cde...fg_hij..."
+	//
+	// with regex
+	//
+	//   "(..)_(...)"
+	//
+	// -- then we only consider the first match: boolean return value is true
+	// (the input string matched the regex), and the captures array will map
+	// "\1" to "ab" and "\2" to "cde".
+	row := matrix[0]
+	n := len(row)
+
+	// Example return value from FindAllSubmatchIndex with input
+	// "...ab_cde...fg_hij..." and regex "(..)_(...)":
+	//
+	// Matrix is [][]int{
+	//   []int{3, 9, 3, 5, 6, 9},
+	//   []int{12, 18, 12, 14, 15, 18},
+	// }
+	//
+	// As noted above we look at only the first row.
+	//
+	// * 3-9 is for the entire match "ab_cde"
+	// * 3-5 is for the first capture "ab"
+	// * 6-9 is for the second capture "cde"
+
+	di := 0
+	for si := 0; si < n && di <= 9; si += 2 {
+		start := row[si]
+		end := row[si+1]
+		if start >= 0 && end >= 0 {
+			captures[di] = input[start:end]
+		}
+		di += 1
+	}
+
+	return true, captures
+}
+
+// MakeEmptyCaptures is for initial CST state at the start of executing the DSL expression for the
+// current record.  Even if '$x =~ "(..)_(...)" set "\1" and "\2" on the previous record, at start
+// of processing for the current record we need to start with a clean slate. This is in support of
+// CST state, which `=~` semantics requires.
+func MakeEmptyCaptures() []string {
+	return nil
+}
+
+// ReplacementHasCaptures is used by the CST builder to see if string-literal is like "foo bar" or
+// "foo \1 bar" -- in the latter case it needs to retain the compiled offsets-matrix information.
+// This is in support of CST state, which `=~` semantics requires.
+func ReplacementHasCaptures(
+	replacement string,
+) (
+	hasCaptures bool,
+	matrix [][]int,
+) {
+	if captureDetector.MatchString(replacement) {
+		return true, captureSplitter.FindAllSubmatchIndex([]byte(replacement), -1)
+	} else {
+		return false, nil
+	}
+}
+
+// InterpolateCaptures example:
+//
+// * Input $x is "ab_cde"
+//
+//   - DSL expression
+//     if ($x =~ "(..)_(...)") {
+//     ... other lines of code ...
+//     $y = "\2:\1";
+//     }
+//
+// * InterpolateCaptures is used on the evaluation of "\2:\1"
+//
+// * replacementString is "\2:\1"
+//
+//   - replacementMatrix contains precomputed/cached offsets for the "\2" and
+//     "\1" substrings within "\2:\1"
+//
+//   - captures has slot 0 being "ab_cde" (for "\0"), slot 1 being "ab" (for "\1"),
+//     slot 2 being "cde" (for "\2"), and slots 3-9 being "".
+func InterpolateCaptures(
+	replacementString string,
+	replacementMatrix [][]int,
+	captures []string,
+) string {
+	if replacementMatrix == nil || captures == nil {
+		return replacementString
+	}
+	var buffer bytes.Buffer
+
+	nonMatchStartIndex := 0
+
+	for _, row := range replacementMatrix {
+		start := row[0]
+		buffer.WriteString(replacementString[nonMatchStartIndex:row[0]])
+
+		// Map "\0".."\9" to integer index 0..9
+		index := replacementString[start+1] - '0'
+		buffer.WriteString(captures[index])
+
+		nonMatchStartIndex = row[1]
+	}
+
+	buffer.WriteString(replacementString[nonMatchStartIndex:])
+
+	return buffer.String()
+}
diff --git a/pkg/lib/regex_test.go b/pkg/lib/regex_test.go
index 961d73f8d..d2a8f5f70 100644
--- a/pkg/lib/regex_test.go
+++ b/pkg/lib/regex_test.go
@@ -88,7 +88,7 @@ var dataForMatches = []tDataForMatches{
 
 func TestRegexReplacementHasCaptures(t *testing.T) {
 	for i, entry := range dataForHasCaptures {
-		actualHasCaptures, actualMatrix := RegexReplacementHasCaptures(entry.replacement)
+		actualHasCaptures, actualMatrix := ReplacementHasCaptures(entry.replacement)
 		if actualHasCaptures != entry.expectedHasCaptures {
 			t.Fatalf("case %d replacement \"%s\" expected %v got %v\n",
 				i, entry.replacement, entry.expectedHasCaptures, actualHasCaptures,
@@ -104,7 +104,7 @@ func TestRegexReplacementHasCaptures(t *testing.T) {
 
 func TestRegexSub(t *testing.T) {
 	for i, entry := range dataForSub {
-		actualOutput := RegexSub(entry.input, entry.sregex, entry.replacement)
+		actualOutput := RegexStringSub(entry.input, entry.sregex, entry.replacement)
 		if actualOutput != entry.expectedOutput {
 			t.Fatalf("case %d input \"%s\" sregex \"%s\" replacement \"%s\" expected \"%s\" got \"%s\"\n",
 				i, entry.input, entry.sregex, entry.replacement, entry.expectedOutput, actualOutput,
@@ -115,7 +115,7 @@ func TestRegexSub(t *testing.T) {
 
 func TestRegexGsub(t *testing.T) {
 	for i, entry := range dataForGsub {
-		actualOutput := RegexGsub(entry.input, entry.sregex, entry.replacement)
+		actualOutput := RegexStringGsub(entry.input, entry.sregex, entry.replacement)
 		if actualOutput != entry.expectedOutput {
 			t.Fatalf("case %d input \"%s\" sregex \"%s\" replacement \"%s\" expected \"%s\" got \"%s\"\n",
 				i, entry.input, entry.sregex, entry.replacement, entry.expectedOutput, actualOutput,
@@ -126,7 +126,7 @@ func TestRegexGsub(t *testing.T) {
 
 func TestRegexMatches(t *testing.T) {
 	for i, entry := range dataForMatches {
-		actualOutput, actualCaptures := RegexMatches(entry.input, entry.sregex)
+		actualOutput, actualCaptures := RegexStringMatchWithCaptures(entry.input, entry.sregex)
 		if actualOutput != entry.expectedOutput {
 			t.Fatalf("case %d input \"%s\" sregex \"%s\" expected %v got %v\n",
 				i, entry.input, entry.sregex, entry.expectedOutput, actualOutput,
diff --git a/pkg/runtime/state.go b/pkg/runtime/state.go
index e94fd4ce5..820f40c3d 100644
--- a/pkg/runtime/state.go
+++ b/pkg/runtime/state.go
@@ -43,8 +43,8 @@ func NewEmptyState(options *cli.TOptions, strictMode bool) *State {
 
 		// OutputRecordsAndContexts is assigned after construction
 
-		// See lib.MakeEmptyRegexCaptures for context.
-		RegexCaptures: lib.MakeEmptyRegexCaptures(),
+		// See lib.MakeEmptyCaptures for context.
+		RegexCaptures: lib.MakeEmptyCaptures(),
 		Options:       options,
 
 		StrictMode: strictMode,
@@ -57,5 +57,5 @@ func (state *State) Update(
 ) {
 	state.Inrec = inrec
 	state.Context = context
-	state.RegexCaptures = lib.MakeEmptyRegexCaptures()
+	state.RegexCaptures = lib.MakeEmptyCaptures()
 }
diff --git a/pkg/transformers/merge_fields.go b/pkg/transformers/merge_fields.go
index de1a555c3..7ee2d9fad 100644
--- a/pkg/transformers/merge_fields.go
+++ b/pkg/transformers/merge_fields.go
@@ -479,7 +479,7 @@ func (tr *TransformerMergeFields) transformByCollapsing(
 			matched = valueFieldNameRegex.MatchString(pe.Key)
 			if matched {
 				// TODO: comment re matrix
-				shortName = lib.RegexSubCompiled(valueFieldName, valueFieldNameRegex, "", nil)
+				shortName = lib.RegexCompiledSub(valueFieldName, valueFieldNameRegex, "", nil)
 				break
 			}
 		}
diff --git a/pkg/transformers/rename.go b/pkg/transformers/rename.go
index e5f0658b8..7880b6ead 100644
--- a/pkg/transformers/rename.go
+++ b/pkg/transformers/rename.go
@@ -169,7 +169,7 @@ func NewTransformerRename(
 			regexString := pe.Key
 			regex := lib.CompileMillerRegexOrDie(regexString)
 			replacement := pe.Value.(string)
-			_, replacementCaptureMatrix := lib.RegexReplacementHasCaptures(replacement)
+			_, replacementCaptureMatrix := lib.ReplacementHasCaptures(replacement)
 			regexAndReplacement := tRegexAndReplacement{
 				regex:                    regex,
 				replacement:              replacement,
@@ -241,7 +241,7 @@ func (tr *TransformerRename) transformWithRegexes(
 						inrec.Rename(oldName, newName)
 					}
 				} else {
-					newName := lib.RegexSubCompiled(oldName, regex, replacement, replacementCaptureMatrix)
+					newName := lib.RegexCompiledSub(oldName, regex, replacement, replacementCaptureMatrix)
 					if newName != oldName {
 						inrec.Rename(oldName, newName)
 					}

From 4053d7684c6c1cc630a892c47b57e6075b3cd951 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Mon, 18 Dec 2023 10:21:09 -0500
Subject: [PATCH 096/456] Preserve regex captures across stack frames (#1447)

* privatize state.RegexCaptures

* stack frame for regex captures

* merge

* unit-test case

* docs re stack frames for regex captures

* more
---
 docs/src/data-diving-examples.md              | 46 +++++++--------
 docs/src/reference-dsl-builtin-functions.md   | 18 +-----
 .../src/reference-main-regular-expressions.md | 20 ++++++-
 .../reference-main-regular-expressions.md.in  | 20 ++++++-
 docs/src/reference-verbs.md                   | 38 ++++++-------
 docs/src/two-pass-algorithms.md               |  4 +-
 pkg/dsl/cst/builtin_functions.go              |  2 +-
 pkg/dsl/cst/leaves.go                         |  4 +-
 pkg/dsl/cst/udf.go                            |  2 +
 pkg/dsl/cst/uds.go                            |  2 +
 pkg/lib/util.go                               |  3 +
 pkg/runtime/state.go                          | 57 +++++++++++++++----
 test/cases/dsl-regex-matching/0017/cmd        |  1 +
 test/cases/dsl-regex-matching/0017/experr     |  0
 test/cases/dsl-regex-matching/0017/expout     |  6 ++
 test/cases/dsl-regex-matching/0017/mlr        | 15 +++++
 test/cases/dsl-regex-matching/0018/cmd        |  1 +
 test/cases/dsl-regex-matching/0018/experr     |  0
 test/cases/dsl-regex-matching/0018/expout     |  6 ++
 test/cases/dsl-regex-matching/0018/mlr        | 15 +++++
 20 files changed, 183 insertions(+), 77 deletions(-)
 create mode 100644 test/cases/dsl-regex-matching/0017/cmd
 create mode 100644 test/cases/dsl-regex-matching/0017/experr
 create mode 100644 test/cases/dsl-regex-matching/0017/expout
 create mode 100644 test/cases/dsl-regex-matching/0017/mlr
 create mode 100644 test/cases/dsl-regex-matching/0018/cmd
 create mode 100644 test/cases/dsl-regex-matching/0018/experr
 create mode 100644 test/cases/dsl-regex-matching/0018/expout
 create mode 100644 test/cases/dsl-regex-matching/0018/mlr

diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md
index 39738f193..100716ec2 100644
--- a/docs/src/data-diving-examples.md
+++ b/docs/src/data-diving-examples.md
@@ -160,11 +160,11 @@ CITRUS COUNTY       1332.9                 79974.9                483785.1
   stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
 
-tiv_2011_tiv_2012_corr  0.9730497632351692
-tiv_2011_tiv_2012_ols_m 0.9835583980337723
-tiv_2011_tiv_2012_ols_b 433854.6428968317
+tiv_2011_tiv_2012_corr  0.9730497632351701
+tiv_2011_tiv_2012_ols_m 0.9835583980337732
+tiv_2011_tiv_2012_ols_b 433854.6428968301
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320189
+tiv_2011_tiv_2012_r2    0.9468258417320204
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199866178
+0.1334180491027861 -0.011319841199852926
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887236   -0.01856553658708754
-orange   square   0.17685855992752927   -0.07104431573806054
- green   circle   0.05764419437577255    0.01179572988801509
-   red   square   0.05574477124893523 -0.0006801456507510942
-yellow triangle   0.04457273771962798   0.024604310103081825
-yellow   square   0.04379172927296089   -0.04462197201631237
-purple   circle   0.03587354936895086     0.1341133954140899
-  blue   square   0.03241153095761164  -0.053507648119643196
-  blue triangle  0.015356427073158766 -0.0006089997461435399
-orange   circle  0.010518953877704048   -0.16279397329279383
-   red triangle   0.00809782571528034   0.012486621357942596
-purple triangle  0.005155190909099334  -0.045057909256220656
-purple   square -0.025680276963377404    0.05769429647930396
- green   square   -0.0257760734502851  -0.003265173252087127
-orange triangle -0.030456661186085785    -0.1318699981926352
-yellow   circle  -0.06477331572781474    0.07369449819706045
-  blue   circle  -0.10234761901929677  -0.030528539069837757
- green triangle  -0.10901825107358765   -0.04848782060162929
+   red   circle    0.9807984401887242  -0.018565536587084836
+orange   square   0.17685855992752933   -0.07104431573805543
+ green   circle   0.05764419437577257   0.011795729888018455
+   red   square    0.0557447712489348 -0.0006801456507506415
+yellow triangle    0.0445727377196281   0.024604310103079844
+yellow   square    0.0437917292729612  -0.044621972016306265
+purple   circle   0.03587354936895115    0.13411339541407613
+  blue   square   0.03241153095761152   -0.05350764811965621
+  blue triangle  0.015356427073158612 -0.0006089997461408209
+orange   circle  0.010518953877704181    -0.1627939732927932
+   red triangle   0.00809782571528054    0.01248662135795501
+purple triangle  0.005155190909099739   -0.04505790925621933
+purple   square  -0.02568027696337717   0.057694296479293694
+ green   square -0.025776073450284875 -0.0032651732520739014
+orange triangle -0.030456661186085584   -0.13186999819263814
+yellow   circle  -0.06477331572781515     0.0736944981970553
+  blue   circle   -0.1023476190192966  -0.030528539069839333
+ green triangle  -0.10901825107358747   -0.04848782060162855
 
diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index d391e8341..8c3b49640 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -75,7 +75,7 @@ is 2. Unary operators such as `!` and `~` show argument-count of 1; the ternary * [**Higher-order-functions functions**](#higher-order-functions-functions): [any](#any), [apply](#apply), [every](#every), [fold](#fold), [reduce](#reduce), [select](#select), [sort](#sort). * [**Math functions**](#math-functions): [abs](#abs), [acos](#acos), [acosh](#acosh), [asin](#asin), [asinh](#asinh), [atan](#atan), [atan2](#atan2), [atanh](#atanh), [cbrt](#cbrt), [ceil](#ceil), [cos](#cos), [cosh](#cosh), [erf](#erf), [erfc](#erfc), [exp](#exp), [expm1](#expm1), [floor](#floor), [invqnorm](#invqnorm), [log](#log), [log10](#log10), [log1p](#log1p), [logifit](#logifit), [max](#max), [min](#min), [qnorm](#qnorm), [round](#round), [roundm](#roundm), [sgn](#sgn), [sin](#sin), [sinh](#sinh), [sqrt](#sqrt), [tan](#tan), [tanh](#tanh), [urand](#urand), [urand32](#urand32), [urandelement](#urandelement), [urandint](#urandint), [urandrange](#urandrange). * [**Stats functions**](#stats-functions): [antimode](#antimode), [count](#count), [distinct_count](#distinct_count), [kurtosis](#kurtosis), [maxlen](#maxlen), [mean](#mean), [meaneb](#meaneb), [median](#median), [minlen](#minlen), [mode](#mode), [null_count](#null_count), [percentile](#percentile), [percentiles](#percentiles), [skewness](#skewness), [sort_collection](#sort_collection), [stddev](#stddev), [sum](#sum), [sum2](#sum2), [sum3](#sum3), [sum4](#sum4), [variance](#variance). -* [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [contains](#contains), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [match](#match), [matchx](#matchx), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). +* [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [contains](#contains), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). * [**System functions**](#system-functions): [exec](#exec), [hostname](#hostname), [os](#os), [system](#system), [version](#version). * [**Time functions**](#time-functions): [dhms2fsec](#dhms2fsec), [dhms2sec](#dhms2sec), [fsec2dhms](#fsec2dhms), [fsec2hms](#fsec2hms), [gmt2localtime](#gmt2localtime), [gmt2nsec](#gmt2nsec), [gmt2sec](#gmt2sec), [hms2fsec](#hms2fsec), [hms2sec](#hms2sec), [localtime2gmt](#localtime2gmt), [localtime2nsec](#localtime2nsec), [localtime2sec](#localtime2sec), [nsec2gmt](#nsec2gmt), [nsec2gmtdate](#nsec2gmtdate), [nsec2localdate](#nsec2localdate), [nsec2localtime](#nsec2localtime), [sec2dhms](#sec2dhms), [sec2gmt](#sec2gmt), [sec2gmtdate](#sec2gmtdate), [sec2hms](#sec2hms), [sec2localdate](#sec2localdate), [sec2localtime](#sec2localtime), [strfntime](#strfntime), [strfntime_local](#strfntime_local), [strftime](#strftime), [strftime_local](#strftime_local), [strpntime](#strpntime), [strpntime_local](#strpntime_local), [strptime](#strptime), [strptime_local](#strptime_local), [sysntime](#sysntime), [systime](#systime), [systimeint](#systimeint), [upntime](#upntime), [uptime](#uptime). * [**Typing functions**](#typing-functions): [asserting_absent](#asserting_absent), [asserting_array](#asserting_array), [asserting_bool](#asserting_bool), [asserting_boolean](#asserting_boolean), [asserting_empty](#asserting_empty), [asserting_empty_map](#asserting_empty_map), [asserting_error](#asserting_error), [asserting_float](#asserting_float), [asserting_int](#asserting_int), [asserting_map](#asserting_map), [asserting_nonempty_map](#asserting_nonempty_map), [asserting_not_array](#asserting_not_array), [asserting_not_empty](#asserting_not_empty), [asserting_not_map](#asserting_not_map), [asserting_not_null](#asserting_not_null), [asserting_null](#asserting_null), [asserting_numeric](#asserting_numeric), [asserting_present](#asserting_present), [asserting_string](#asserting_string), [is_absent](#is_absent), [is_array](#is_array), [is_bool](#is_bool), [is_boolean](#is_boolean), [is_empty](#is_empty), [is_empty_map](#is_empty_map), [is_error](#is_error), [is_float](#is_float), [is_int](#is_int), [is_map](#is_map), [is_nan](#is_nan), [is_nonempty_map](#is_nonempty_map), [is_not_array](#is_not_array), [is_not_empty](#is_not_empty), [is_not_map](#is_not_map), [is_not_null](#is_not_null), [is_null](#is_null), [is_numeric](#is_numeric), [is_present](#is_present), [is_string](#is_string), [typeof](#typeof). @@ -1296,22 +1296,6 @@ lstrip (class=string #args=1) Strip leading whitespace from string. -### match -
-match  (class=string #args=2) TODO: WRITE ME
-Example:
-TODO: WRITE ME
-
- - -### matchx -
-matchx  (class=string #args=2) TODO: WRITE ME
-Example:
-TODO: WRITE ME
-
- - ### regextract
 regextract  (class=string #args=2) Extracts a substring (the first, if there are multiple matches), matching a regular expression, from the input. Does not use capture groups; see also the =~ operator which does.
diff --git a/docs/src/reference-main-regular-expressions.md b/docs/src/reference-main-regular-expressions.md
index f15b55f59..c221c48de 100644
--- a/docs/src/reference-main-regular-expressions.md
+++ b/docs/src/reference-main-regular-expressions.md
@@ -63,7 +63,7 @@ name=bull,regex=^b[ou]ll$
 
 ## Regex captures
 
-Regex captures of the form `\0` through `\9` are supported as
+Regex captures of the form `\0` through `\9` are supported as follows:
 
 * Captures have in-function context for `sub` and `gsub`. For example, the first `\1,\2` pair belong to the first `sub` and the second `\1,\2` pair belong to the second `sub`:
 
@@ -77,6 +77,24 @@ Regex captures of the form `\0` through `\9` are supported as
 mlr put '$a =~ "(..)_(....); $b = "left_\1"; $c = "right_\2"'
 
+* Each user-defined function has its own frame for captures. For example: + +
+mlr -n put '
+func f() {
+    if ("456 defg" =~ "([0-9]+) ([a-z]+)") {
+        print "INNER: \1 \2";
+    }
+}
+end {
+    if ("123 abc" =~ "([0-9]+) ([a-z]+)") {
+        print "OUTER PRE:  \1 \2";
+        f();
+        print "OUTER POST: \1 \2";
+    }
+}'
+
+ * The captures are not retained across multiple puts. For example, here the `\1,\2` won't be expanded from the regex capture:
diff --git a/docs/src/reference-main-regular-expressions.md.in b/docs/src/reference-main-regular-expressions.md.in
index e81f24552..c2fc7b049 100644
--- a/docs/src/reference-main-regular-expressions.md.in
+++ b/docs/src/reference-main-regular-expressions.md.in
@@ -38,7 +38,7 @@ GENMD-EOF
 
 ## Regex captures
 
-Regex captures of the form `\0` through `\9` are supported as
+Regex captures of the form `\0` through `\9` are supported as follows:
 
 * Captures have in-function context for `sub` and `gsub`. For example, the first `\1,\2` pair belong to the first `sub` and the second `\1,\2` pair belong to the second `sub`:
 
@@ -52,6 +52,24 @@ GENMD-SHOW-COMMAND
 mlr put '$a =~ "(..)_(....); $b = "left_\1"; $c = "right_\2"'
 GENMD-EOF
 
+* Each user-defined function has its own frame for captures. For example:
+
+GENMD-SHOW-COMMAND
+mlr -n put '
+func f() {
+    if ("456 defg" =~ "([0-9]+) ([a-z]+)") {
+        print "INNER: \1 \2";
+    }
+}
+end {
+    if ("123 abc" =~ "([0-9]+) ([a-z]+)") {
+        print "OUTER PRE:  \1 \2";
+        f();
+        print "OUTER POST: \1 \2";
+    }
+}'
+GENMD-EOF
+
 * The captures are not retained across multiple puts. For example, here the `\1,\2` won't be expanded from the regex capture:
 
 GENMD-SHOW-COMMAND
diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md
index 89bbc2b71..106ad4bf1 100644
--- a/docs/src/reference-verbs.md
+++ b/docs/src/reference-verbs.md
@@ -3406,14 +3406,14 @@ fields, optionally categorized by one or more fields.
   data/medium
 
-x_y_cov    0.000042574820827444476
-x_y_corr   0.0005042001844467462
-y_y_cov    0.08461122467974003
+x_y_cov    0.00004257482082749404
+x_y_corr   0.0005042001844473328
+y_y_cov    0.08461122467974005
 y_y_corr   1
-x2_xy_cov  0.04188382281779374
-x2_xy_corr 0.630174342037994
-x2_y2_cov  -0.00030953725962542085
-x2_y2_corr -0.0034249088761121966
+x2_xy_cov  0.041883822817793716
+x2_xy_corr 0.6301743420379936
+x2_y2_cov  -0.0003095372596253918
+x2_y2_corr -0.003424908876111875
 
@@ -3422,12 +3422,12 @@ x2_y2_corr -0.0034249088761121966
   data/medium
 
-a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
-eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
-wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
-zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
-hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
+a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
+eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
+wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
+zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
+hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
 
Here's an example simple line-fit. The `x` and `y` @@ -3513,11 +3513,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.03009744795354 -upsec_count_pca_b 979.9883413064914 +upsec_count_pca_m -39.030097447953594 +upsec_count_pca_b 979.9883413064917 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.10852919630297 +donesec 25.108529196302943 ## step @@ -3794,9 +3794,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 -var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 -skewness - - 0 -0.0006899591185521965 -0.017849760120133784 +stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 +var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 +skewness - - 0 -0.0006899591185517494 -0.01784976012013298 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index 146f3a81e..e475aebf3 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144456 -x_stddev 0.2902925151144007 +x_var 0.08426974433144457 +x_stddev 0.29029251511440074
diff --git a/pkg/dsl/cst/builtin_functions.go b/pkg/dsl/cst/builtin_functions.go
index 397e7869c..ef5a6fb98 100644
--- a/pkg/dsl/cst/builtin_functions.go
+++ b/pkg/dsl/cst/builtin_functions.go
@@ -450,7 +450,7 @@ func (node *RegexCaptureBinaryFunctionCallsiteNode) Evaluate(
 		node.evaluable1.Evaluate(state),
 		node.evaluable2.Evaluate(state),
 	)
-	state.RegexCaptures = captures
+	state.SetRegexCaptures(captures)
 	return output
 }
 
diff --git a/pkg/dsl/cst/leaves.go b/pkg/dsl/cst/leaves.go
index c0b4d8875..0e3621d7d 100644
--- a/pkg/dsl/cst/leaves.go
+++ b/pkg/dsl/cst/leaves.go
@@ -293,7 +293,7 @@ func (node *StringLiteralNode) Evaluate(
 //	}
 //
 // the captures can be set (by =~ or !=~) quite far from where they are used.
-// This is why we consult the state.RegexCaptures here, to see if they've been
+// This is why we consult the state's regex captures here, to see if they've been
 // set on some previous invocation of =~ or !=~.
 func (node *RegexCaptureReplacementNode) Evaluate(
 	state *runtime.State,
@@ -302,7 +302,7 @@ func (node *RegexCaptureReplacementNode) Evaluate(
 		lib.InterpolateCaptures(
 			node.replacementString,
 			node.replacementCaptureMatrix,
-			state.RegexCaptures,
+			state.GetRegexCaptures(),
 		),
 	)
 }
diff --git a/pkg/dsl/cst/udf.go b/pkg/dsl/cst/udf.go
index 9be4bf59c..042366afc 100644
--- a/pkg/dsl/cst/udf.go
+++ b/pkg/dsl/cst/udf.go
@@ -223,6 +223,8 @@ func (site *UDFCallsite) EvaluateWithArguments(
 		state.Stack.PushStackFrameSet()
 		defer state.Stack.PopStackFrameSet()
 	}
+	state.PushRegexCapturesFrame()
+	defer state.PopRegexCapturesFrame()
 
 	cacheable := !udf.isFunctionLiteral
 
diff --git a/pkg/dsl/cst/uds.go b/pkg/dsl/cst/uds.go
index 3a72e4c23..2ed14fa56 100644
--- a/pkg/dsl/cst/uds.go
+++ b/pkg/dsl/cst/uds.go
@@ -120,6 +120,8 @@ func (site *UDSCallsite) Execute(state *runtime.State) (*BlockExitPayload, error
 	// Bind the arguments to the parameters
 	state.Stack.PushStackFrameSet()
 	defer state.Stack.PopStackFrameSet()
+	state.PushRegexCapturesFrame()
+	defer state.PopRegexCapturesFrame()
 
 	for i := range arguments {
 		err := state.Stack.DefineTypedAtScope(
diff --git a/pkg/lib/util.go b/pkg/lib/util.go
index 4a8faa86d..d78809d21 100644
--- a/pkg/lib/util.go
+++ b/pkg/lib/util.go
@@ -209,6 +209,9 @@ func WriteTempFileOrDie(contents string) string {
 }
 
 func CopyStringArray(input []string) []string {
+	if input == nil {
+		return nil
+	}
 	output := make([]string, len(input))
 	copy(output, input)
 	return output
diff --git a/pkg/runtime/state.go b/pkg/runtime/state.go
index 820f40c3d..cfd9e11a7 100644
--- a/pkg/runtime/state.go
+++ b/pkg/runtime/state.go
@@ -25,27 +25,42 @@ type State struct {
 
 	// For holding "\0".."\9" between where they are set via things like
 	// '$x =~ "(..)_(...)"', and interpolated via things like '$y = "\2:\1"'.
-	RegexCaptures []string
-	Options       *cli.TOptions
+	//
+	// Each top-level block and user-defined function has its own captures.
+	//
+	// For example, in function `f()`, one can do `somevar =~ someregex`, then
+	// call some function `g()` which also uses `=~`, and then when `g()` returns,
+	// `f()` will have its "\1", "\2", etc intact.
+	//
+	// This is necessary for the stateful semantics of `=~` and "\1", "\2", etc.
+	// Those are avoided when the user calls `matchx`, which is newer, and
+	// stateless. However, `=~` exists in the Miller DSL and we must support it.
+	regexCapturesByFrame *list.List // list of []string
+
+	Options *cli.TOptions
 
 	// StrictMode allows for runtime handling of absent-reads and untyped assignments.
 	StrictMode bool
 }
 
 func NewEmptyState(options *cli.TOptions, strictMode bool) *State {
+
+	// See lib.MakeEmptyCaptures for context.
+	regexCapturesByFrame := list.New()
+	regexCapturesByFrame.PushFront(lib.MakeEmptyCaptures())
+
 	oosvars := mlrval.NewMlrmap()
 	return &State{
-		Inrec:            nil,
-		Context:          nil,
-		Oosvars:          oosvars,
-		FilterExpression: mlrval.TRUE,
-		Stack:            NewStack(),
+		Inrec:                nil,
+		Context:              nil,
+		Oosvars:              oosvars,
+		FilterExpression:     mlrval.TRUE,
+		Stack:                NewStack(),
+		regexCapturesByFrame: regexCapturesByFrame,
 
 		// OutputRecordsAndContexts is assigned after construction
 
-		// See lib.MakeEmptyCaptures for context.
-		RegexCaptures: lib.MakeEmptyCaptures(),
-		Options:       options,
+		Options: options,
 
 		StrictMode: strictMode,
 	}
@@ -57,5 +72,25 @@ func (state *State) Update(
 ) {
 	state.Inrec = inrec
 	state.Context = context
-	state.RegexCaptures = lib.MakeEmptyCaptures()
+	state.regexCapturesByFrame.Front().Value = lib.MakeEmptyCaptures()
+}
+
+func (state *State) SetRegexCaptures(
+	captures []string,
+) {
+	state.regexCapturesByFrame.Front().Value = lib.CopyStringArray(captures)
+}
+
+func (state *State) GetRegexCaptures() []string {
+	regexCaptures := state.regexCapturesByFrame.Front().Value.([]string)
+	return lib.CopyStringArray(regexCaptures)
+}
+
+func (state *State) PushRegexCapturesFrame() {
+	state.regexCapturesByFrame.PushFront(lib.MakeEmptyCaptures())
+}
+
+func (state *State) PopRegexCapturesFrame() {
+	// There is no PopFront
+	state.regexCapturesByFrame.Remove(state.regexCapturesByFrame.Front())
 }
diff --git a/test/cases/dsl-regex-matching/0017/cmd b/test/cases/dsl-regex-matching/0017/cmd
new file mode 100644
index 000000000..6add080d4
--- /dev/null
+++ b/test/cases/dsl-regex-matching/0017/cmd
@@ -0,0 +1 @@
+mlr -n put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-regex-matching/0017/experr b/test/cases/dsl-regex-matching/0017/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-regex-matching/0017/expout b/test/cases/dsl-regex-matching/0017/expout
new file mode 100644
index 000000000..860e81046
--- /dev/null
+++ b/test/cases/dsl-regex-matching/0017/expout
@@ -0,0 +1,6 @@
+OUTER PRE:  123 abc
+OUTER PRE:  123 abc
+INNER: 456 defg
+INNER: 456 defg
+OUTER POST: 123 abc
+OUTER POST: 123 abc
diff --git a/test/cases/dsl-regex-matching/0017/mlr b/test/cases/dsl-regex-matching/0017/mlr
new file mode 100644
index 000000000..bec25114e
--- /dev/null
+++ b/test/cases/dsl-regex-matching/0017/mlr
@@ -0,0 +1,15 @@
+func f() {
+    if ("456 defg" =~ "([0-9]+) ([a-z]+)") {
+        print "INNER: \1 \2";
+        print "INNER: \1 \2";
+    }
+}
+end {
+    if ("123 abc" =~ "([0-9]+) ([a-z]+)") {
+        print "OUTER PRE:  \1 \2";
+        print "OUTER PRE:  \1 \2";
+        f();
+        print "OUTER POST: \1 \2";
+        print "OUTER POST: \1 \2";
+    }
+}
diff --git a/test/cases/dsl-regex-matching/0018/cmd b/test/cases/dsl-regex-matching/0018/cmd
new file mode 100644
index 000000000..6add080d4
--- /dev/null
+++ b/test/cases/dsl-regex-matching/0018/cmd
@@ -0,0 +1 @@
+mlr -n put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-regex-matching/0018/experr b/test/cases/dsl-regex-matching/0018/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-regex-matching/0018/expout b/test/cases/dsl-regex-matching/0018/expout
new file mode 100644
index 000000000..860e81046
--- /dev/null
+++ b/test/cases/dsl-regex-matching/0018/expout
@@ -0,0 +1,6 @@
+OUTER PRE:  123 abc
+OUTER PRE:  123 abc
+INNER: 456 defg
+INNER: 456 defg
+OUTER POST: 123 abc
+OUTER POST: 123 abc
diff --git a/test/cases/dsl-regex-matching/0018/mlr b/test/cases/dsl-regex-matching/0018/mlr
new file mode 100644
index 000000000..992fa1d0b
--- /dev/null
+++ b/test/cases/dsl-regex-matching/0018/mlr
@@ -0,0 +1,15 @@
+subr s() {
+    if ("456 defg" =~ "([0-9]+) ([a-z]+)") {
+        print "INNER: \1 \2";
+        print "INNER: \1 \2";
+    }
+}
+end {
+    if ("123 abc" =~ "([0-9]+) ([a-z]+)") {
+        print "OUTER PRE:  \1 \2";
+        print "OUTER PRE:  \1 \2";
+        call s();
+        print "OUTER POST: \1 \2";
+        print "OUTER POST: \1 \2";
+    }
+}

From b13adbe6c0be4fb4f709174a6374161838d707ad Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Tue, 19 Dec 2023 09:33:34 -0500
Subject: [PATCH 097/456] mlr --norc cat was erroring (#1450)

---
 docs/src/manpage.md                  | 40 ++++++++-------------
 docs/src/manpage.txt                 | 40 ++++++++-------------
 docs/src/reference-main-flag-list.md |  1 +
 man/manpage.txt                      | 40 ++++++++-------------
 man/mlr.1                            | 54 +++++++++-------------------
 pkg/cli/option_parse.go              |  8 +++++
 pkg/climain/mlrcli_parse.go          |  2 +-
 test/cases/cli-norc/0001/cmd         |  1 +
 test/cases/cli-norc/0001/experr      |  0
 test/cases/cli-norc/0001/expout      |  0
 10 files changed, 72 insertions(+), 114 deletions(-)
 create mode 100644 test/cases/cli-norc/0001/cmd
 create mode 100644 test/cases/cli-norc/0001/experr
 create mode 100644 test/cases/cli-norc/0001/expout

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 19cb2de07..283d6dd97 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -50,7 +50,7 @@ MILLER(1)                                                            MILLER(1)
        insertion-ordered hash map.  This encompasses a variety of data
        formats, including but not limited to the familiar CSV, TSV, and JSON.
        (Miller can handle positionally-indexed data as a special case.) This
-       manpage documents mlr 6.10.0.
+       manpage documents mlr 6.10.0-dev.
 
 1mEXAMPLES0m
        mlr --icsv --opprint cat example.csv
@@ -220,19 +220,18 @@ MILLER(1)                                                            MILLER(1)
        is_numeric is_present is_string joink joinkv joinv json_parse json_stringify
        kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec
        localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect
-       mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul
-       mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os
-       percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad
-       round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate
-       sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort
-       sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub
-       stddev strfntime strfntime_local strftime strftime_local string strip strlen
-       strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
-       sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
-       truncate typeof unflatten unformat unformatx upntime uptime urand urand32
-       urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % &
-       && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ |
-       || ~
+       mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub
+       nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
+       percentiles pow qnorm reduce regextract regextract_or_else rightpad round
+       roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
+       select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
+       splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
+       strfntime_local strftime strftime_local string strip strlen strpntime
+       strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2
+       sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate
+       typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement
+       urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - .
+       .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
 
 1mCOMMENTS-IN-DATA FLAGS0m
        Miller lets you put comments in your data, such as
@@ -569,6 +568,7 @@ MILLER(1)                                                            MILLER(1)
                                 since direct-to-screen output for large files has its
                                 own overhead.
        --no-hash-records        See --hash-records.
+       --norc                   Do not load a .mlrrc file.
        --nr-progress-mod {m}    With m a positive integer: print filename and record
                                 count to os.Stderr every m input records.
        --ofmt {format}          E.g. `%.18f`, `%.0f`, `%9.6e`. Please use
@@ -2651,16 +2651,6 @@ MILLER(1)                                                            MILLER(1)
    1mmapsum0m
         (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'.
 
-   1mmatch0m
-        (class=string #args=2) TODO: WRITE ME
-       Example:
-       TODO: WRITE ME
-
-   1mmatchx0m
-        (class=string #args=2) TODO: WRITE ME
-       Example:
-       TODO: WRITE ME
-
    1mmax0m
         (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
 
@@ -3660,5 +3650,5 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-12-16                         MILLER(1)
+                                  2023-12-19                         MILLER(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 7f3a122af..b79cc6bca 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -29,7 +29,7 @@ MILLER(1) MILLER(1) insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.10.0. + manpage documents mlr 6.10.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv @@ -199,19 +199,18 @@ MILLER(1) MILLER(1) is_numeric is_present is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect - mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul - mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os - percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad - round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate - sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort - sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub - stddev strfntime strfntime_local strftime strftime_local string strip strlen - strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 - sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper - truncate typeof unflatten unformat unformatx upntime uptime urand urand32 - urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % & - && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | - || ~ + mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub + nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile + percentiles pow qnorm reduce regextract regextract_or_else rightpad round + roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime + select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita + splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime + strfntime_local strftime strftime_local string strip strlen strpntime + strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 + sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate + typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement + urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - . + .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -548,6 +547,7 @@ MILLER(1) MILLER(1) since direct-to-screen output for large files has its own overhead. --no-hash-records See --hash-records. + --norc Do not load a .mlrrc file. --nr-progress-mod {m} With m a positive integer: print filename and record count to os.Stderr every m input records. --ofmt {format} E.g. `%.18f`, `%.0f`, `%9.6e`. Please use @@ -2630,16 +2630,6 @@ MILLER(1) MILLER(1) 1mmapsum0m (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'. - 1mmatch0m - (class=string #args=2) TODO: WRITE ME - Example: - TODO: WRITE ME - - 1mmatchx0m - (class=string #args=2) TODO: WRITE ME - Example: - TODO: WRITE ME - 1mmax0m (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. @@ -3639,4 +3629,4 @@ MILLER(1) MILLER(1) - 2023-12-16 MILLER(1) + 2023-12-19 MILLER(1) diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index f9ce597ff..a53d1565e 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -278,6 +278,7 @@ These are flags which don't fit into any other category. * `--no-dedupe-field-names`: By default, if an input record has a field named `x` and another also named `x`, the second will be renamed `x_2`, and so on. With this flag provided, the second `x`'s value will replace the first `x`'s value when the record is read. This flag has no effect on JSON input records, where duplicate keys always result in the last one's value being retained. * `--no-fflush`: Let buffered output not be written after every output record. The default is flush output after every record if the output is to the terminal, or less often if the output is to a file or a pipe. The default is a significant performance optimization for large files. Use this flag to allow less-frequent updates when output is to the terminal. This is unlikely to be a noticeable performance improvement, since direct-to-screen output for large files has its own overhead. * `--no-hash-records`: See --hash-records. +* `--norc`: Do not load a .mlrrc file. * `--nr-progress-mod {m}`: With m a positive integer: print filename and record count to os.Stderr every m input records. * `--ofmt {format}`: E.g. `%.18f`, `%.0f`, `%9.6e`. Please use sprintf-style codes (https://pkg.go.dev/fmt) for floating-point numbers. If not specified, default formatting is used. See also the `fmtnum` function and the `format-values` verb. * `--ofmte {n}`: Use --ofmte 6 as shorthand for --ofmt %.6e, etc. diff --git a/man/manpage.txt b/man/manpage.txt index 7f3a122af..b79cc6bca 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -29,7 +29,7 @@ MILLER(1) MILLER(1) insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.10.0. + manpage documents mlr 6.10.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv @@ -199,19 +199,18 @@ MILLER(1) MILLER(1) is_numeric is_present is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect - mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul - mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os - percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad - round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate - sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort - sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub - stddev strfntime strfntime_local strftime strftime_local string strip strlen - strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 - sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper - truncate typeof unflatten unformat unformatx upntime uptime urand urand32 - urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % & - && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | - || ~ + mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub + nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile + percentiles pow qnorm reduce regextract regextract_or_else rightpad round + roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime + select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita + splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime + strfntime_local strftime strftime_local string strip strlen strpntime + strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 + sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate + typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement + urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - . + .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -548,6 +547,7 @@ MILLER(1) MILLER(1) since direct-to-screen output for large files has its own overhead. --no-hash-records See --hash-records. + --norc Do not load a .mlrrc file. --nr-progress-mod {m} With m a positive integer: print filename and record count to os.Stderr every m input records. --ofmt {format} E.g. `%.18f`, `%.0f`, `%9.6e`. Please use @@ -2630,16 +2630,6 @@ MILLER(1) MILLER(1) 1mmapsum0m (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'. - 1mmatch0m - (class=string #args=2) TODO: WRITE ME - Example: - TODO: WRITE ME - - 1mmatchx0m - (class=string #args=2) TODO: WRITE ME - Example: - TODO: WRITE ME - 1mmax0m (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. @@ -3639,4 +3629,4 @@ MILLER(1) MILLER(1) - 2023-12-16 MILLER(1) + 2023-12-19 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index 4f0644ed7..fd05c9f8d 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2023-12-16 +.\" Date: 2023-12-19 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2023-12-16" "\ \&" "\ \&" +.TH "MILLER" "1" "2023-12-19" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -47,7 +47,7 @@ on integer-indexed fields: if the natural data structure for the latter is the array, then Miller's natural data structure is the insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as -a special case.) This manpage documents mlr 6.10.0. +a special case.) This manpage documents mlr 6.10.0-dev. .SH "EXAMPLES" .sp @@ -246,19 +246,18 @@ is_nonempty_map is_not_array is_not_empty is_not_map is_not_null is_null is_numeric is_present is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect -mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul -mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os -percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad -round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate -sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort -sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub -stddev strfntime strfntime_local strftime strftime_local string strip strlen -strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 -sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper -truncate typeof unflatten unformat unformatx upntime uptime urand urand32 -urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % & -&& * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | -|| ~ +mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub +nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile +percentiles pow qnorm reduce regextract regextract_or_else rightpad round +roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime +select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita +splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime +strfntime_local strftime strftime_local string strip strlen strpntime +strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 +sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate +typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement +urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - . +\&.* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ .fi .if n \{\ .RE @@ -667,6 +666,7 @@ These are flags which don't fit into any other category. since direct-to-screen output for large files has its own overhead. --no-hash-records See --hash-records. +--norc Do not load a .mlrrc file. --nr-progress-mod {m} With m a positive integer: print filename and record count to os.Stderr every m input records. --ofmt {format} E.g. `%.18f`, `%.0f`, `%9.6e`. Please use @@ -3939,28 +3939,6 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906" .fi .if n \{\ .RE -.SS "match" -.if n \{\ -.RS 0 -.\} -.nf - (class=string #args=2) TODO: WRITE ME -Example: -TODO: WRITE ME -.fi -.if n \{\ -.RE -.SS "matchx" -.if n \{\ -.RS 0 -.\} -.nf - (class=string #args=2) TODO: WRITE ME -Example: -TODO: WRITE ME -.fi -.if n \{\ -.RE .SS "max" .if n \{\ .RS 0 diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go index c9732b025..5b8414f5d 100644 --- a/pkg/cli/option_parse.go +++ b/pkg/cli/option_parse.go @@ -3102,5 +3102,13 @@ has its own overhead.`, *pargi += 2 }, }, + + { + name: "--norc", + help: "Do not load a .mlrrc file.", + parser: func(args []string, argc int, pargi *int, options *TOptions) { + *pargi += 1 + }, + }, }, } diff --git a/pkg/climain/mlrcli_parse.go b/pkg/climain/mlrcli_parse.go index 9e8679eef..586c94d78 100644 --- a/pkg/climain/mlrcli_parse.go +++ b/pkg/climain/mlrcli_parse.go @@ -151,8 +151,8 @@ func parseCommandLinePassOne( os.Exit(0) } else if args[argi] == "--norc" { - flagSequences = append(flagSequences, args[oargi:argi]) argi += 1 + flagSequences = append(flagSequences, args[oargi:argi]) } else if cli.FLAG_TABLE.Parse(args, argc, &argi, options) { flagSequences = append(flagSequences, args[oargi:argi]) diff --git a/test/cases/cli-norc/0001/cmd b/test/cases/cli-norc/0001/cmd new file mode 100644 index 000000000..57174b0ec --- /dev/null +++ b/test/cases/cli-norc/0001/cmd @@ -0,0 +1 @@ +mlr --norc -n cat diff --git a/test/cases/cli-norc/0001/experr b/test/cases/cli-norc/0001/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/cli-norc/0001/expout b/test/cases/cli-norc/0001/expout new file mode 100644 index 000000000..e69de29bb From 4706b4bb785d81e642c53eb945acc84dbf2fc82b Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 19 Dec 2023 09:47:59 -0500 Subject: [PATCH 098/456] Document and unit-test regex-capture reset logic (#1451) * mlr --norc cat was erroring * Document and unit-test regex-capture reset logic --- .../reference-main-regular-expressions.md.in | 40 +++++++++++++++++++ test/cases/dsl-regex-matching/null-reset/cmd | 1 + .../dsl-regex-matching/null-reset/experr | 0 .../dsl-regex-matching/null-reset/expout | 9 +++++ test/cases/dsl-regex-matching/null-reset/mlr | 11 +++++ 5 files changed, 61 insertions(+) create mode 100644 test/cases/dsl-regex-matching/null-reset/cmd create mode 100644 test/cases/dsl-regex-matching/null-reset/experr create mode 100644 test/cases/dsl-regex-matching/null-reset/expout create mode 100644 test/cases/dsl-regex-matching/null-reset/mlr diff --git a/docs/src/reference-main-regular-expressions.md.in b/docs/src/reference-main-regular-expressions.md.in index c2fc7b049..434225f35 100644 --- a/docs/src/reference-main-regular-expressions.md.in +++ b/docs/src/reference-main-regular-expressions.md.in @@ -78,6 +78,46 @@ GENMD-EOF * Up to nine matches are supported: `\1` through `\9`, while `\0` is the entire match string; `\15` is treated as `\1` followed by an unrelated `5`. +## Resetting captures + +If you use `(...)` in your regular expression, then up to 9 matches are supported for the `=~` +operator, and an arbitrary number of matches are supported for the `match` DSL function. + +* Before any match is done, `"\1"` etc. in a string evaluate to themselves. +* After a successful match is done, `"\1"` etc. in a string evaluate to the matched substring. +* After an unsuccessful match is done, `"\1"` etc. in a string evaluate to the empty string. +* You can match against `null` to reset to the original state. + +GENMD-CARDIFY-HIGHLIGHT-ONE +mlr repl + +[mlr] "\1:\2" +"\1:\2" + +[mlr] "abc" =~ "..." +true + +[mlr] "\1:\2" +":" + +[mlr] "abc" =~ "(.).(.)" +true + +[mlr] "\1:\2" +"a:c" + +[mlr] "abc" =~ "(.)x(.)" +false + +[mlr] "\1:\2" +":" + +[mlr] "abc" =~ null + +[mlr] "\1:\2" +"\1:\2" +GENMD-EOF + ## More information Regular expressions are those supported by the [Go regexp package](https://pkg.go.dev/regexp), which in turn are of type [RE2](https://github.com/google/re2/wiki/Syntax) except for `\C`: diff --git a/test/cases/dsl-regex-matching/null-reset/cmd b/test/cases/dsl-regex-matching/null-reset/cmd new file mode 100644 index 000000000..6add080d4 --- /dev/null +++ b/test/cases/dsl-regex-matching/null-reset/cmd @@ -0,0 +1 @@ +mlr -n put -f ${CASEDIR}/mlr diff --git a/test/cases/dsl-regex-matching/null-reset/experr b/test/cases/dsl-regex-matching/null-reset/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-regex-matching/null-reset/expout b/test/cases/dsl-regex-matching/null-reset/expout new file mode 100644 index 000000000..38eba4339 --- /dev/null +++ b/test/cases/dsl-regex-matching/null-reset/expout @@ -0,0 +1,9 @@ +[\1]:[\2] +true +[]:[] +true +[a]:[c] +false +[]:[] +null +[\1]:[\2] diff --git a/test/cases/dsl-regex-matching/null-reset/mlr b/test/cases/dsl-regex-matching/null-reset/mlr new file mode 100644 index 000000000..0caec5ae3 --- /dev/null +++ b/test/cases/dsl-regex-matching/null-reset/mlr @@ -0,0 +1,11 @@ +end { + print("[\1]:[\2]"); + print("abc" =~ "..."); + print("[\1]:[\2]"); + print("abc" =~ "(.).(.)"); + print("[\1]:[\2]"); + print("abc" =~ "(.)x(.)"); + print("[\1]:[\2]"); + print("abc" =~ null); + print("[\1]:[\2]"); +} From 211b15ad4fff43db371e9e22c624a044234c9d9e Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 19 Dec 2023 09:52:16 -0500 Subject: [PATCH 099/456] make docs --- docs/src/kubectl-and-helm.md.in | 2 +- docs/src/reference-dsl-time.md.in | 4 +- .../src/reference-main-regular-expressions.md | 42 +++++++++++++++++++ .../reference-main-regular-expressions.md.in | 2 +- docs/src/reference-main-strings.md.in | 2 +- docs/src/release-docs.md.in | 2 +- docs/src/shapes-of-data.md.in | 12 +++--- docs/src/statistics-examples.md.in | 4 +- docs/src/why.md.in | 2 +- 9 files changed, 57 insertions(+), 15 deletions(-) diff --git a/docs/src/kubectl-and-helm.md.in b/docs/src/kubectl-and-helm.md.in index 2f7d7d26f..14c0facf4 100644 --- a/docs/src/kubectl-and-helm.md.in +++ b/docs/src/kubectl-and-helm.md.in @@ -136,7 +136,7 @@ $ helm list | mlr --itsv --ojson head -n 1 ] GENMD-EOF -A solution here is Miller's +A solution here is Miller's [clean-whitespace verb](reference-verbs.md#clean-whitespace): GENMD-CARDIFY diff --git a/docs/src/reference-dsl-time.md.in b/docs/src/reference-dsl-time.md.in index e2e02c397..869a58495 100644 --- a/docs/src/reference-dsl-time.md.in +++ b/docs/src/reference-dsl-time.md.in @@ -67,7 +67,7 @@ the [ISO8601](https://en.wikipedia.org/wiki/ISO_8601) format. This was the first (and initially only) human-readable date/time format supported by Miller going all the way back to Miller 1.0.0. -You can get these from epoch-seconds using the +You can get these from epoch-seconds using the [sec2gmt](reference-dsl-builtin-functions.md#sec2gmt) DSL function. (Note that the terms _UTC_ and _GMT_ are used interchangeably in Miller.) We also have [sec2gmtdate](reference-dsl-builtin-functions.md#sec2gmtdate) DSL function. @@ -142,7 +142,7 @@ GENMD-EOF Note that for local times, Miller omits the `T` and the `Z` you see in GMT times. -We also have the +We also have the [gmt2localtime](reference-dsl-builtin-functions.md#gmt2localtime) and [localtime2gmt](reference-dsl-builtin-functions.md#localtime2gmt) convenience functions: diff --git a/docs/src/reference-main-regular-expressions.md b/docs/src/reference-main-regular-expressions.md index c221c48de..ba6d955ff 100644 --- a/docs/src/reference-main-regular-expressions.md +++ b/docs/src/reference-main-regular-expressions.md @@ -103,6 +103,48 @@ Regex captures of the form `\0` through `\9` are supported as follows: * Up to nine matches are supported: `\1` through `\9`, while `\0` is the entire match string; `\15` is treated as `\1` followed by an unrelated `5`. +## Resetting captures + +If you use `(...)` in your regular expression, then up to 9 matches are supported for the `=~` +operator, and an arbitrary number of matches are supported for the `match` DSL function. + +* Before any match is done, `"\1"` etc. in a string evaluate to themselves. +* After a successful match is done, `"\1"` etc. in a string evaluate to the matched substring. +* After an unsuccessful match is done, `"\1"` etc. in a string evaluate to the empty string. +* You can match against `null` to reset to the original state. + +
+mlr repl
+
+
+
+[mlr] "\1:\2"
+"\1:\2"
+
+[mlr] "abc" =~ "..."
+true
+
+[mlr] "\1:\2"
+":"
+
+[mlr] "abc" =~ "(.).(.)"
+true
+
+[mlr] "\1:\2"
+"a:c"
+
+[mlr] "abc" =~ "(.)x(.)"
+false
+
+[mlr] "\1:\2"
+":"
+
+[mlr] "abc" =~ null
+
+[mlr] "\1:\2"
+"\1:\2"
+
+ ## More information Regular expressions are those supported by the [Go regexp package](https://pkg.go.dev/regexp), which in turn are of type [RE2](https://github.com/google/re2/wiki/Syntax) except for `\C`: diff --git a/docs/src/reference-main-regular-expressions.md.in b/docs/src/reference-main-regular-expressions.md.in index 434225f35..d3b091207 100644 --- a/docs/src/reference-main-regular-expressions.md.in +++ b/docs/src/reference-main-regular-expressions.md.in @@ -83,7 +83,7 @@ GENMD-EOF If you use `(...)` in your regular expression, then up to 9 matches are supported for the `=~` operator, and an arbitrary number of matches are supported for the `match` DSL function. -* Before any match is done, `"\1"` etc. in a string evaluate to themselves. +* Before any match is done, `"\1"` etc. in a string evaluate to themselves. * After a successful match is done, `"\1"` etc. in a string evaluate to the matched substring. * After an unsuccessful match is done, `"\1"` etc. in a string evaluate to the empty string. * You can match against `null` to reset to the original state. diff --git a/docs/src/reference-main-strings.md.in b/docs/src/reference-main-strings.md.in index e67560550..7ad9e431d 100644 --- a/docs/src/reference-main-strings.md.in +++ b/docs/src/reference-main-strings.md.in @@ -143,4 +143,4 @@ See also [https://en.wikipedia.org/wiki/Escape_sequences_in_C](https://en.wikipe These replacements apply only to strings you key in for the DSL expressions for `filter` and `put`: that is, if you type `\t` in a string literal for a `filter`/`put` expression, it will be turned into a tab character. If you want a backslash followed by a `t`, then please type `\\t`. -However, these replacements are done automatically only for string literals within DSL expressions -- they are not done automatically to fields within your data stream. If you wish to make these replacements, you can do (for example) `mlr put '$field = gsub($field, "\\t", "\t")'`. If you need to make such a replacement for all fields in your data, you should probably use the system `sed` command instead. +However, these replacements are done automatically only for string literals within DSL expressions -- they are not done automatically to fields within your data stream. If you wish to make these replacements, you can do (for example) `mlr put '$field = gsub($field, "\\t", "\t")'`. If you need to make such a replacement for all fields in your data, you should probably use the system `sed` command instead. diff --git a/docs/src/release-docs.md.in b/docs/src/release-docs.md.in index 07dc91719..e82b42755 100644 --- a/docs/src/release-docs.md.in +++ b/docs/src/release-docs.md.in @@ -1,6 +1,6 @@ # Documents for releases -If your `mlr version` says something like `mlr 6.0.0-dev`, with the `-dev` suffix, you're likely building from source, or you've obtained a recent artifact from GitHub Actions -- +If your `mlr version` says something like `mlr 6.0.0-dev`, with the `-dev` suffix, you're likely building from source, or you've obtained a recent artifact from GitHub Actions -- the page [https://miller.readthedocs.io/en/main](https://miller.readthedocs.io/en/main) contains information for the latest contributions to the [Miller repository](https://github.com/johnkerl/miller). If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, without the `-dev` suffix, you're likely using a Miller executable from a package manager -- please see below for the documentation for Miller as of the release you're using. diff --git a/docs/src/shapes-of-data.md.in b/docs/src/shapes-of-data.md.in index c32b0dad1..3636f406d 100644 --- a/docs/src/shapes-of-data.md.in +++ b/docs/src/shapes-of-data.md.in @@ -17,14 +17,14 @@ Also try `od -xcv` and/or `cat -e` on your file to check for non-printable chara Use the `file` command to see if there are CR/LF terminators (in this case, there are not): GENMD-CARDIFY-HIGHLIGHT-ONE -file data/colours.csv +file data/colours.csv data/colours.csv: Unicode text, UTF-8 text GENMD-EOF Look at the file to find names of fields: GENMD-CARDIFY-HIGHLIGHT-ONE -cat data/colours.csv +cat data/colours.csv KEY;DE;EN;ES;FI;FR;IT;NL;PL;TO;TR masterdata_colourcode_1;WeiรŸ;White;Blanco;Valkoinen;Blanc;Bianco;Wit;Biaล‚y;Alb;Beyaz masterdata_colourcode_2;Schwarz;Black;Negro;Musta;Noir;Nero;Zwart;Czarny;Negru;Siyah @@ -33,13 +33,13 @@ GENMD-EOF Extract a few fields: GENMD-CARDIFY-HIGHLIGHT-ONE -mlr --csv cut -f KEY,PL,TO data/colours.csv +mlr --csv cut -f KEY,PL,TO data/colours.csv GENMD-EOF Use XTAB output format to get a sharper picture of where records/fields are being split: GENMD-CARDIFY-HIGHLIGHT-ONE -mlr --icsv --oxtab cat data/colours.csv +mlr --icsv --oxtab cat data/colours.csv KEY;DE;EN;ES;FI;FR;IT;NL;PL;TO;TR masterdata_colourcode_1;WeiรŸ;White;Blanco;Valkoinen;Blanc;Bianco;Wit;Biaล‚y;Alb;Beyaz KEY;DE;EN;ES;FI;FR;IT;NL;PL;TO;TR masterdata_colourcode_2;Schwarz;Black;Negro;Musta;Noir;Nero;Zwart;Czarny;Negru;Siyah @@ -48,7 +48,7 @@ GENMD-EOF Using XTAB output format makes it clearer that `KEY;DE;...;TR` is being treated as a single field name in the CSV header, and likewise each subsequent line is being treated as a single field value. This is because the default field separator is a comma but we have semicolons here. Use XTAB again with different field separator (`--fs semicolon`): GENMD-CARDIFY-HIGHLIGHT-ONE -mlr --icsv --ifs semicolon --oxtab cat data/colours.csv +mlr --icsv --ifs semicolon --oxtab cat data/colours.csv KEY masterdata_colourcode_1 DE WeiรŸ EN White @@ -77,7 +77,7 @@ GENMD-EOF Using the new field-separator, retry the cut: GENMD-CARDIFY-HIGHLIGHT-ONE -mlr --csv --fs semicolon cut -f KEY,PL,TO data/colours.csv +mlr --csv --fs semicolon cut -f KEY,PL,TO data/colours.csv KEY;PL;TO masterdata_colourcode_1;Biaล‚y;Alb masterdata_colourcode_2;Czarny;Negru diff --git a/docs/src/statistics-examples.md.in b/docs/src/statistics-examples.md.in index a98ead194..1da4aa235 100644 --- a/docs/src/statistics-examples.md.in +++ b/docs/src/statistics-examples.md.in @@ -7,7 +7,7 @@ For one or more specified field names, simply compute p25 and p75, then write th GENMD-RUN-COMMAND mlr --oxtab stats1 -f x -a p25,p75 \ then put '$x_iqr = $x_p75 - $x_p25' \ - data/medium + data/medium GENMD-EOF For wildcarded field names, first compute p25 and p75, then loop over field names with `p25` in them: @@ -19,7 +19,7 @@ mlr --oxtab stats1 --fr '[i-z]' -a p25,p75 \ $["\1_iqr"] = $["\1_p75"] - $["\1_p25"] } }' \ - data/medium + data/medium GENMD-EOF ## Computing weighted means diff --git a/docs/src/why.md.in b/docs/src/why.md.in index 3c83c39c4..e33529ba2 100644 --- a/docs/src/why.md.in +++ b/docs/src/why.md.in @@ -32,7 +32,7 @@ Eighth thing: It's an **awful lot of fun to write**. In my experience I didn't f Miller is command-line-only by design. People who want a graphical user interface won't find it here. This is in part (a) accommodating my personal preferences, and in part (b) guided by my experience/belief that the command line is very expressive. Steeper learning curve than a GUI, yes. I consider that price worth paying for the tool-niche which Miller occupies. -Another tradeoff: supporting lists of records keeps me supporting only what can be expressed in *all* of those formats. For example, `[1,2,3,4,5]` is valid but unmillerable JSON: the list elements are not records. So Miller can't (and won't) handle arbitrary JSON -- because Miller only handles tabular data which can be expressed in a variety of formats. +Another tradeoff: supporting lists of records keeps me supporting only what can be expressed in *all* of those formats. For example, `[1,2,3,4,5]` is valid but unmillerable JSON: the list elements are not records. So Miller can't (and won't) handle arbitrary JSON -- because Miller only handles tabular data which can be expressed in a variety of formats. A third tradeoff is doing build-from-scratch in a low-level language. It'd be quicker to write (but slower to run) if written in a high-level language. If Miller were written in Python, it would be implemented in significantly fewer lines of code than its current Go implementation. The DSL would just be an `eval` of Python code. And it would run slower, but maybe not enough slower to be a problem for most folks. Later I found out about the [rows](https://github.com/turicas/rows) tool -- if you find Miller useful, you should check out `rows` as well. From c6b745537a9e06d859838a714de9e4fb623e8832 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 19 Dec 2023 14:34:54 -0500 Subject: [PATCH 100/456] New `strmatch`/`strmatchx` DSL functions (#1448) * New `match`/`matchx` DSL functions * unit-test cases * match/matchx -> strmatch/strmatchx * help strings for strmatch and strmatchx * update regex doc page re strmatch/strmatchx * unit-test update --- docs/src/manpage.md | 47 +++++++- docs/src/manpage.txt | 47 +++++++- docs/src/reference-dsl-builtin-functions.md | 42 ++++++- .../src/reference-main-regular-expressions.md | 96 ++++++++++++++- .../reference-main-regular-expressions.md.in | 92 ++++++++++++++- man/manpage.txt | 47 +++++++- man/mlr.1 | 59 +++++++++- pkg/bifs/regex.go | 78 ++++++++++++- pkg/dsl/cst/builtin_function_manager.go | 44 +++++++ pkg/lib/regex.go | 79 ++++++++++++- pkg/mlrval/mlrval_new.go | 2 +- test/cases/dsl-match/0001/cmd | 1 + test/cases/dsl-match/0001/experr | 0 test/cases/dsl-match/0001/expout | 11 ++ test/cases/dsl-match/0001/input | 11 ++ test/cases/dsl-match/0001/mlr | 1 + test/cases/dsl-match/0002/cmd | 1 + test/cases/dsl-match/0002/experr | 0 test/cases/dsl-match/0002/expout | 110 ++++++++++++++++++ test/cases/dsl-match/0002/input | 12 ++ test/cases/dsl-match/0002/mlr | 1 + 21 files changed, 747 insertions(+), 34 deletions(-) create mode 100644 test/cases/dsl-match/0001/cmd create mode 100644 test/cases/dsl-match/0001/experr create mode 100644 test/cases/dsl-match/0001/expout create mode 100644 test/cases/dsl-match/0001/input create mode 100644 test/cases/dsl-match/0001/mlr create mode 100644 test/cases/dsl-match/0002/cmd create mode 100644 test/cases/dsl-match/0002/experr create mode 100644 test/cases/dsl-match/0002/expout create mode 100644 test/cases/dsl-match/0002/input create mode 100644 test/cases/dsl-match/0002/mlr diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 283d6dd97..369a7bbf6 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -226,12 +226,13 @@ MILLER(1) MILLER(1) roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime - strfntime_local strftime strftime_local string strip strlen strpntime - strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 - sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate - typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement - urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - . - .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + strfntime_local strftime strftime_local string strip strlen strmatch strmatchx + strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 + sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper + truncate typeof unflatten unformat unformatx upntime uptime urand urand32 + urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % & + && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | + || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -2996,6 +2997,40 @@ MILLER(1) MILLER(1) 1mstrlen0m (class=string #args=1) String length. + 1mstrmatch0m + (class=string #args=2) Boolean yes/no for whether the stringable first argument matches the regular-expression second argument. No regex captures are provided; please see `strmatch`. + Examples: + strmatch("a", "abc") is false + strmatch("abc", "a") is true + strmatch("abc", "a[a-z]c") is true + strmatch("abc", "(a).(c)") is true + strmatch(12345, "34") is true + + 1mstrmatchx0m + (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here. + Examples: + strmatchx("a", "abc") returns: + { + "matched": false + } + strmatchx("abc", "a") returns: + { + "matched": true, + "full_capture": "a", + "full_start": 1, + "full_end": 1 + } + strmatchx("[zy:3458]", "([a-z]+):([0-9]+)") returns: + { + "matched": true, + "full_capture": "zy:3458", + "full_start": 2, + "full_end": 8, + "captures": ["zy", "3458"], + "starts": [2, 5], + "ends": [3, 8] + } + 1mstrpntime0m (class=time #args=2) strpntime: Parses timestamp as integer nanoseconds since the epoch. See also strpntime_local. Examples: diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index b79cc6bca..a7aec87a7 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -205,12 +205,13 @@ MILLER(1) MILLER(1) roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime - strfntime_local strftime strftime_local string strip strlen strpntime - strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 - sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate - typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement - urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - . - .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + strfntime_local strftime strftime_local string strip strlen strmatch strmatchx + strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 + sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper + truncate typeof unflatten unformat unformatx upntime uptime urand urand32 + urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % & + && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | + || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -2975,6 +2976,40 @@ MILLER(1) MILLER(1) 1mstrlen0m (class=string #args=1) String length. + 1mstrmatch0m + (class=string #args=2) Boolean yes/no for whether the stringable first argument matches the regular-expression second argument. No regex captures are provided; please see `strmatch`. + Examples: + strmatch("a", "abc") is false + strmatch("abc", "a") is true + strmatch("abc", "a[a-z]c") is true + strmatch("abc", "(a).(c)") is true + strmatch(12345, "34") is true + + 1mstrmatchx0m + (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here. + Examples: + strmatchx("a", "abc") returns: + { + "matched": false + } + strmatchx("abc", "a") returns: + { + "matched": true, + "full_capture": "a", + "full_start": 1, + "full_end": 1 + } + strmatchx("[zy:3458]", "([a-z]+):([0-9]+)") returns: + { + "matched": true, + "full_capture": "zy:3458", + "full_start": 2, + "full_end": 8, + "captures": ["zy", "3458"], + "starts": [2, 5], + "ends": [3, 8] + } + 1mstrpntime0m (class=time #args=2) strpntime: Parses timestamp as integer nanoseconds since the epoch. See also strpntime_local. Examples: diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 8c3b49640..f3b8efdef 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -75,7 +75,7 @@ is 2. Unary operators such as `!` and `~` show argument-count of 1; the ternary * [**Higher-order-functions functions**](#higher-order-functions-functions): [any](#any), [apply](#apply), [every](#every), [fold](#fold), [reduce](#reduce), [select](#select), [sort](#sort). * [**Math functions**](#math-functions): [abs](#abs), [acos](#acos), [acosh](#acosh), [asin](#asin), [asinh](#asinh), [atan](#atan), [atan2](#atan2), [atanh](#atanh), [cbrt](#cbrt), [ceil](#ceil), [cos](#cos), [cosh](#cosh), [erf](#erf), [erfc](#erfc), [exp](#exp), [expm1](#expm1), [floor](#floor), [invqnorm](#invqnorm), [log](#log), [log10](#log10), [log1p](#log1p), [logifit](#logifit), [max](#max), [min](#min), [qnorm](#qnorm), [round](#round), [roundm](#roundm), [sgn](#sgn), [sin](#sin), [sinh](#sinh), [sqrt](#sqrt), [tan](#tan), [tanh](#tanh), [urand](#urand), [urand32](#urand32), [urandelement](#urandelement), [urandint](#urandint), [urandrange](#urandrange). * [**Stats functions**](#stats-functions): [antimode](#antimode), [count](#count), [distinct_count](#distinct_count), [kurtosis](#kurtosis), [maxlen](#maxlen), [mean](#mean), [meaneb](#meaneb), [median](#median), [minlen](#minlen), [mode](#mode), [null_count](#null_count), [percentile](#percentile), [percentiles](#percentiles), [skewness](#skewness), [sort_collection](#sort_collection), [stddev](#stddev), [sum](#sum), [sum2](#sum2), [sum3](#sum3), [sum4](#sum4), [variance](#variance). -* [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [contains](#contains), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). +* [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [contains](#contains), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [strmatch](#strmatch), [strmatchx](#strmatchx), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). * [**System functions**](#system-functions): [exec](#exec), [hostname](#hostname), [os](#os), [system](#system), [version](#version). * [**Time functions**](#time-functions): [dhms2fsec](#dhms2fsec), [dhms2sec](#dhms2sec), [fsec2dhms](#fsec2dhms), [fsec2hms](#fsec2hms), [gmt2localtime](#gmt2localtime), [gmt2nsec](#gmt2nsec), [gmt2sec](#gmt2sec), [hms2fsec](#hms2fsec), [hms2sec](#hms2sec), [localtime2gmt](#localtime2gmt), [localtime2nsec](#localtime2nsec), [localtime2sec](#localtime2sec), [nsec2gmt](#nsec2gmt), [nsec2gmtdate](#nsec2gmtdate), [nsec2localdate](#nsec2localdate), [nsec2localtime](#nsec2localtime), [sec2dhms](#sec2dhms), [sec2gmt](#sec2gmt), [sec2gmtdate](#sec2gmtdate), [sec2hms](#sec2hms), [sec2localdate](#sec2localdate), [sec2localtime](#sec2localtime), [strfntime](#strfntime), [strfntime_local](#strfntime_local), [strftime](#strftime), [strftime_local](#strftime_local), [strpntime](#strpntime), [strpntime_local](#strpntime_local), [strptime](#strptime), [strptime_local](#strptime_local), [sysntime](#sysntime), [systime](#systime), [systimeint](#systimeint), [upntime](#upntime), [uptime](#uptime). * [**Typing functions**](#typing-functions): [asserting_absent](#asserting_absent), [asserting_array](#asserting_array), [asserting_bool](#asserting_bool), [asserting_boolean](#asserting_boolean), [asserting_empty](#asserting_empty), [asserting_empty_map](#asserting_empty_map), [asserting_error](#asserting_error), [asserting_float](#asserting_float), [asserting_int](#asserting_int), [asserting_map](#asserting_map), [asserting_nonempty_map](#asserting_nonempty_map), [asserting_not_array](#asserting_not_array), [asserting_not_empty](#asserting_not_empty), [asserting_not_map](#asserting_not_map), [asserting_not_null](#asserting_not_null), [asserting_null](#asserting_null), [asserting_numeric](#asserting_numeric), [asserting_present](#asserting_present), [asserting_string](#asserting_string), [is_absent](#is_absent), [is_array](#is_array), [is_bool](#is_bool), [is_boolean](#is_boolean), [is_empty](#is_empty), [is_empty_map](#is_empty_map), [is_error](#is_error), [is_float](#is_float), [is_int](#is_int), [is_map](#is_map), [is_nan](#is_nan), [is_nonempty_map](#is_nonempty_map), [is_not_array](#is_not_array), [is_not_empty](#is_not_empty), [is_not_map](#is_not_map), [is_not_null](#is_not_null), [is_null](#is_null), [is_numeric](#is_numeric), [is_present](#is_present), [is_string](#is_string), [typeof](#typeof). @@ -1350,6 +1350,46 @@ strlen (class=string #args=1) String length. +### strmatch +
+strmatch  (class=string #args=2) Boolean yes/no for whether the stringable first argument matches the regular-expression second argument. No regex captures are provided; please see `strmatch`.
+Examples:
+strmatch("a", "abc") is false
+strmatch("abc", "a") is true
+strmatch("abc", "a[a-z]c") is true
+strmatch("abc", "(a).(c)") is true
+strmatch(12345, "34") is true
+
+ + +### strmatchx +
+strmatchx  (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
+Examples:
+strmatchx("a", "abc") returns:
+  {
+    "matched": false
+  }
+strmatchx("abc", "a") returns:
+  {
+    "matched": true,
+    "full_capture": "a",
+    "full_start": 1,
+    "full_end": 1
+  }
+strmatchx("[zy:3458]", "([a-z]+):([0-9]+)") returns:
+  {
+    "matched": true,
+    "full_capture": "zy:3458",
+    "full_start": 2,
+    "full_end": 8,
+    "captures": ["zy", "3458"],
+    "starts": [2, 5],
+    "ends": [3, 8]
+  }
+
+ + ### sub
 sub  (class=string #args=3) '$name = sub($name, "old", "new")': replace once (first match, if there are multiple matches), with support for regular expressions. Capture groups \1 through \9 in the new part are matched from (...) in the old part, and must be used within the same call to sub -- they don't persist for subsequent DSL statements. See also =~ and regextract. See also "Regular expressions" at https://miller.readthedocs.io.
diff --git a/docs/src/reference-main-regular-expressions.md b/docs/src/reference-main-regular-expressions.md
index ba6d955ff..982c60634 100644
--- a/docs/src/reference-main-regular-expressions.md
+++ b/docs/src/reference-main-regular-expressions.md
@@ -61,7 +61,7 @@ name=jane,regex=^j.*e$
 name=bull,regex=^b[ou]ll$
 
-## Regex captures +## Regex captures for the `=~` operator Regex captures of the form `\0` through `\9` are supported as follows: @@ -145,6 +145,100 @@ false "\1:\2" +## The `strmatch` and `strmatchx` DSL functions + +The `=~` and `!=~` operators have been in Miller for a long time, and they will continue to be +supported. They do, however, have some deficiencies. As of Miller 6.11 and beyond, the `strmatch` +and `strmatchx` provide more robust ways to do capturing. + +First, some examples. + +The `strmatch` function only returns a boolean result, and it doesn't set `\0..\9`: + +
+mlr repl
+
+
+
+[mlr] strmatch("abc", "....")
+false
+
+[mlr] strmatch("abc", "...")
+true
+
+[mlr] strmatch("abc", "(.).(.)")
+true
+
+[mlr] strmatch("[ab:3458]", "([a-z]+):([0-9]+)")
+true
+
+ +The `strmatchx` function also doesn't set `\0..\9`, but returns a map-valued result: + +
+mlr repl
+
+
+
+[mlr] strmatchx("abc", "....")
+{
+  "matched": false
+}
+
+[mlr] strmatchx("abc", "...")
+{
+  "matched": true,
+  "full_capture": "abc",
+  "full_start": 1,
+  "full_end": 3
+}
+
+[mlr] strmatchx("abc", "(.).(.)")
+{
+  "matched": true,
+  "full_capture": "abc",
+  "full_start": 1,
+  "full_end": 3,
+  "captures": ["a", "c"],
+  "starts": [1, 3],
+  "ends": [1, 3]
+}
+
+[mlr] "[ab:3458]" =~ "([a-z]+):([0-9]+)"
+true
+
+[mlr] "\1"
+"ab"
+
+[mlr] "\2"
+"3458"
+
+[mlr] strmatchx("[ab:3458]", "([a-z]+):([0-9]+)")
+{
+  "matched": true,
+  "full_capture": "ab:3458",
+  "full_start": 2,
+  "full_end": 8,
+  "captures": ["ab", "3458"],
+  "starts": [2, 5],
+  "ends": [3, 8]
+}
+
+ +Notes: + +* When there is no match, the result from `strmatchx` only has the `"matched":false` key/value pair. +* When there is a match with no captures, the result from `strmatchx` has the `"matched":true` key/value pair, + as well as `full_capture` (taking the place of `\0` set by `=~`), and `full_start` and `full_end` + which `=~` does not offer. +* When there is a match with no captures, the result from `strmatchx` also has the `captures` array + whose slots 1, 2, 3, ... are the same as would have been set by `=~` via `\1, \2, \3, ...`. + However, `strmatchx` offers an arbitrary number of captures, not just `\1..\9`. + Additionally, the `starts` and `ends` arrays are indices into the input string. +* Since you hold the return value from `strmatchx`, you can operate on it as you wish --- instead of + relying on the (function-scoped) globals `\0..\9`. +* The price paid is that using `strmatchx` does indeed tend to take more keystrokes than `=~`. + ## More information Regular expressions are those supported by the [Go regexp package](https://pkg.go.dev/regexp), which in turn are of type [RE2](https://github.com/google/re2/wiki/Syntax) except for `\C`: diff --git a/docs/src/reference-main-regular-expressions.md.in b/docs/src/reference-main-regular-expressions.md.in index d3b091207..893378627 100644 --- a/docs/src/reference-main-regular-expressions.md.in +++ b/docs/src/reference-main-regular-expressions.md.in @@ -36,7 +36,7 @@ GENMD-RUN-COMMAND mlr filter '$name =~ $regex' data/regex-in-data.dat GENMD-EOF -## Regex captures +## Regex captures for the `=~` operator Regex captures of the form `\0` through `\9` are supported as follows: @@ -118,6 +118,96 @@ false "\1:\2" GENMD-EOF +## The `strmatch` and `strmatchx` DSL functions + +The `=~` and `!=~` operators have been in Miller for a long time, and they will continue to be +supported. They do, however, have some deficiencies. As of Miller 6.11 and beyond, the `strmatch` +and `strmatchx` provide more robust ways to do capturing. + +First, some examples. + +The `strmatch` function only returns a boolean result, and it doesn't set `\0..\9`: + +GENMD-CARDIFY-HIGHLIGHT-ONE +mlr repl + +[mlr] strmatch("abc", "....") +false + +[mlr] strmatch("abc", "...") +true + +[mlr] strmatch("abc", "(.).(.)") +true + +[mlr] strmatch("[ab:3458]", "([a-z]+):([0-9]+)") +true +GENMD-EOF + +The `strmatchx` function also doesn't set `\0..\9`, but returns a map-valued result: + +GENMD-CARDIFY-HIGHLIGHT-ONE +mlr repl + +[mlr] strmatchx("abc", "....") +{ + "matched": false +} + +[mlr] strmatchx("abc", "...") +{ + "matched": true, + "full_capture": "abc", + "full_start": 1, + "full_end": 3 +} + +[mlr] strmatchx("abc", "(.).(.)") +{ + "matched": true, + "full_capture": "abc", + "full_start": 1, + "full_end": 3, + "captures": ["a", "c"], + "starts": [1, 3], + "ends": [1, 3] +} + +[mlr] "[ab:3458]" =~ "([a-z]+):([0-9]+)" +true + +[mlr] "\1" +"ab" + +[mlr] "\2" +"3458" + +[mlr] strmatchx("[ab:3458]", "([a-z]+):([0-9]+)") +{ + "matched": true, + "full_capture": "ab:3458", + "full_start": 2, + "full_end": 8, + "captures": ["ab", "3458"], + "starts": [2, 5], + "ends": [3, 8] +} +GENMD-EOF + +Notes: + +* When there is no match, the result from `strmatchx` only has the `"matched":false` key/value pair. +* When there is a match with no captures, the result from `strmatchx` has the `"matched":true` key/value pair, + as well as `full_capture` (taking the place of `\0` set by `=~`), and `full_start` and `full_end` + which `=~` does not offer. +* When there is a match with no captures, the result from `strmatchx` also has the `captures` array + whose slots 1, 2, 3, ... are the same as would have been set by `=~` via `\1, \2, \3, ...`. + However, `strmatchx` offers an arbitrary number of captures, not just `\1..\9`. + Additionally, the `starts` and `ends` arrays are indices into the input string. +* Since you hold the return value from `strmatchx`, you can operate on it as you wish --- instead of + relying on the (function-scoped) globals `\0..\9`. +* The price paid is that using `strmatchx` does indeed tend to take more keystrokes than `=~`. + ## More information Regular expressions are those supported by the [Go regexp package](https://pkg.go.dev/regexp), which in turn are of type [RE2](https://github.com/google/re2/wiki/Syntax) except for `\C`: diff --git a/man/manpage.txt b/man/manpage.txt index b79cc6bca..a7aec87a7 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -205,12 +205,13 @@ MILLER(1) MILLER(1) roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime - strfntime_local strftime strftime_local string strip strlen strpntime - strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 - sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate - typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement - urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - . - .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + strfntime_local strftime strftime_local string strip strlen strmatch strmatchx + strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 + sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper + truncate typeof unflatten unformat unformatx upntime uptime urand urand32 + urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % & + && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | + || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -2975,6 +2976,40 @@ MILLER(1) MILLER(1) 1mstrlen0m (class=string #args=1) String length. + 1mstrmatch0m + (class=string #args=2) Boolean yes/no for whether the stringable first argument matches the regular-expression second argument. No regex captures are provided; please see `strmatch`. + Examples: + strmatch("a", "abc") is false + strmatch("abc", "a") is true + strmatch("abc", "a[a-z]c") is true + strmatch("abc", "(a).(c)") is true + strmatch(12345, "34") is true + + 1mstrmatchx0m + (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here. + Examples: + strmatchx("a", "abc") returns: + { + "matched": false + } + strmatchx("abc", "a") returns: + { + "matched": true, + "full_capture": "a", + "full_start": 1, + "full_end": 1 + } + strmatchx("[zy:3458]", "([a-z]+):([0-9]+)") returns: + { + "matched": true, + "full_capture": "zy:3458", + "full_start": 2, + "full_end": 8, + "captures": ["zy", "3458"], + "starts": [2, 5], + "ends": [3, 8] + } + 1mstrpntime0m (class=time #args=2) strpntime: Parses timestamp as integer nanoseconds since the epoch. See also strpntime_local. Examples: diff --git a/man/mlr.1 b/man/mlr.1 index fd05c9f8d..92224547a 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -252,12 +252,13 @@ percentiles pow qnorm reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime -strfntime_local strftime strftime_local string strip strlen strpntime -strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 -sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate -typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement -urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - . -\&.* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ +strfntime_local strftime strftime_local string strip strlen strmatch strmatchx +strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 +sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper +truncate typeof unflatten unformat unformatx upntime uptime urand urand32 +urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % & +&& * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | +|| ~ .fi .if n \{\ .RE @@ -4650,6 +4651,52 @@ strftime_local(1440768801.7, "%Y-%m-%d %H:%M:%3S %z", "Asia/Istanbul") = "2015-0 .fi .if n \{\ .RE +.SS "strmatch" +.if n \{\ +.RS 0 +.\} +.nf + (class=string #args=2) Boolean yes/no for whether the stringable first argument matches the regular-expression second argument. No regex captures are provided; please see `strmatch`. +Examples: +strmatch("a", "abc") is false +strmatch("abc", "a") is true +strmatch("abc", "a[a-z]c") is true +strmatch("abc", "(a).(c)") is true +strmatch(12345, "34") is true +.fi +.if n \{\ +.RE +.SS "strmatchx" +.if n \{\ +.RS 0 +.\} +.nf + (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \e1, \e2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \e1 through \e9, an arbitrary number are supported here. +Examples: +strmatchx("a", "abc") returns: + { + "matched": false + } +strmatchx("abc", "a") returns: + { + "matched": true, + "full_capture": "a", + "full_start": 1, + "full_end": 1 + } +strmatchx("[zy:3458]", "([a-z]+):([0-9]+)") returns: + { + "matched": true, + "full_capture": "zy:3458", + "full_start": 2, + "full_end": 8, + "captures": ["zy", "3458"], + "starts": [2, 5], + "ends": [3, 8] + } +.fi +.if n \{\ +.RE .SS "strpntime" .if n \{\ .RS 0 diff --git a/pkg/bifs/regex.go b/pkg/bifs/regex.go index 74c0840f6..331b07e76 100644 --- a/pkg/bifs/regex.go +++ b/pkg/bifs/regex.go @@ -52,11 +52,6 @@ func bif_ssub_gssub(input1, input2, input3 *mlrval.Mlrval, doAll bool, funcname // BIF_sub implements the sub function, with support for regexes and regex captures // of the form "\1" .. "\9". -// -// TODO: make a variant which allows compiling the regexp once and reusing it -// on each record. Likewise for other regex-using functions in this file. But -// first, do a profiling run to see how much time would be saved, and if this -// precomputing+caching would be worthwhile. func BIF_sub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { if input1.IsErrorOrAbsent() { return input1 @@ -115,6 +110,79 @@ func BIF_gsub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromString(stringOutput) } +func BIF_strmatch(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + if !input1.IsLegit() { + return mlrval.FromNotStringError("strmatch", input1) + } + if !input2.IsLegit() { + return mlrval.FromNotStringError("strmatch", input2) + } + input1string := input1.String() + if !input2.IsStringOrVoid() { + return mlrval.FromNotStringError("strmatch", input2) + } + + boolOutput := lib.RegexStringMatchSimple(input1string, input2.AcquireStringValue()) + + return mlrval.FromBool(boolOutput) +} + +func BIF_strmatchx(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { + if !input1.IsLegit() { + return mlrval.FromNotStringError("strmatchx", input1) + } + if !input2.IsLegit() { + return mlrval.FromNotStringError("strmatchx", input2) + } + input1string := input1.String() + if !input2.IsStringOrVoid() { + return mlrval.FromNotStringError("strmatchx", input2) + } + + boolOutput, captures, starts, ends := lib.RegexStringMatchWithMapResults(input1string, input2.AcquireStringValue()) + + results := mlrval.NewMlrmap() + results.PutReference("matched", mlrval.FromBool(boolOutput)) + + captures_array := make([]*mlrval.Mlrval, len(captures)) + + if len(captures) > 0 { + for i, _ := range captures { + if i == 0 { + results.PutReference("full_capture", mlrval.FromString(captures[i])) + } else { + captures_array[i] = mlrval.FromString(captures[i]) + } + } + + starts_array := make([]*mlrval.Mlrval, len(starts)) + for i, _ := range starts { + if i == 0 { + results.PutReference("full_start", mlrval.FromInt(int64(starts[i]))) + } else { + starts_array[i] = mlrval.FromInt(int64(starts[i])) + } + } + + ends_array := make([]*mlrval.Mlrval, len(ends)) + for i, _ := range ends { + if i == 0 { + results.PutReference("full_end", mlrval.FromInt(int64(ends[i]))) + } else { + ends_array[i] = mlrval.FromInt(int64(ends[i])) + } + } + + if len(captures) > 1 { + results.PutReference("captures", mlrval.FromArray(captures_array[1:])) + results.PutReference("starts", mlrval.FromArray(starts_array[1:])) + results.PutReference("ends", mlrval.FromArray(ends_array[1:])) + } + } + + return mlrval.FromMap(results) +} + // BIF_string_matches_regexp implements the =~ operator, with support for // setting regex-captures for later expressions to access using "\1" .. "\9". func BIF_string_matches_regexp(input1, input2 *mlrval.Mlrval) (retval *mlrval.Mlrval, captures []string) { diff --git a/pkg/dsl/cst/builtin_function_manager.go b/pkg/dsl/cst/builtin_function_manager.go index 7fbf60f3d..c55f9edd9 100644 --- a/pkg/dsl/cst/builtin_function_manager.go +++ b/pkg/dsl/cst/builtin_function_manager.go @@ -338,6 +338,50 @@ used within subsequent DSL statements. See also "Regular expressions" at ` + lib regexCaptureBinaryFunc: bifs.BIF_string_does_not_match_regexp, }, + { + name: "strmatch", + class: FUNC_CLASS_STRING, + help: `Boolean yes/no for whether the stringable first argument matches the regular-expression second argument. No regex captures are provided; please see ` + "`strmatch`.", + examples: []string{ + `strmatch("a", "abc") is false`, + `strmatch("abc", "a") is true`, + `strmatch("abc", "a[a-z]c") is true`, + `strmatch("abc", "(a).(c)") is true`, + `strmatch(12345, "34") is true`, + }, + binaryFunc: bifs.BIF_strmatch, + }, + + { + name: "strmatchx", + class: FUNC_CLASS_STRING, + help: `Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the ` + "`=~` operator. As well, while the `=~` operator limits matches to \\1 through \\9, an arbitrary number are supported here.", + examples: []string{ + `strmatchx("a", "abc") returns:`, + ` {`, + ` "matched": false`, + ` }`, + `strmatchx("abc", "a") returns:`, + ` {`, + ` "matched": true,`, + ` "full_capture": "a",`, + ` "full_start": 1,`, + ` "full_end": 1`, + ` }`, + `strmatchx("[zy:3458]", "([a-z]+):([0-9]+)") returns:`, + ` {`, + ` "matched": true,`, + ` "full_capture": "zy:3458",`, + ` "full_start": 2,`, + ` "full_end": 8,`, + ` "captures": ["zy", "3458"],`, + ` "starts": [2, 5],`, + ` "ends": [3, 8]`, + ` }`, + }, + binaryFunc: bifs.BIF_strmatchx, + }, + { name: "&&", class: FUNC_CLASS_BOOLEAN, diff --git a/pkg/lib/regex.go b/pkg/lib/regex.go index cabbc1510..af0a18805 100644 --- a/pkg/lib/regex.go +++ b/pkg/lib/regex.go @@ -15,7 +15,7 @@ // where the '=~' sets the captures and the "\2:\1" uses them. (Note that // https://github.com/johnkerl/miller/issues/388 has a better suggestion which would make the // captures explicit as variables, rather than implicit within CST state: this is implemented by -// the `match` and `matchx` DSL functions. Regardless, the `=~` syntax will still be supported +// the `strmatch` and `strmatchx` DSL functions. Regardless, the `=~` syntax will still be supported // for backward compatibility and so is here to stay.) Here we make use of Go regexp-library // functions to write to, and then later interpolate from, a captures array which is stored within // CST state. (See the `runtime.State` object.) @@ -293,6 +293,83 @@ func RegexCompiledMatchSimple( return regex.Match([]byte(input)) } +// RegexStringMatchWithMapResults implements much of the `strmatchx` DSL function. This returns +// captures via return values. This is distinct from RegexStringMatchWithCaptures which is for the +// `=~` DSL operator. +func RegexStringMatchWithMapResults( + input string, + sregex string, +) ( + matches bool, + captures []string, + starts []int, + ends []int, +) { + regex := CompileMillerRegexOrDie(sregex) + return RegexCompiledMatchWithMapResults(input, regex) +} + +// RegexCompiledMatchWithMapResults does the work for RegexStringMatchWithMapResults once +// a compiled regexp is available. Array slot 0 is for the full match; slots 1 and up +// are for the capture-matches such as "\([0-9]+\):\([a-z]+\)". +func RegexCompiledMatchWithMapResults( + input string, + regex *regexp.Regexp, +) (bool, []string, []int, []int) { + captures := make([]string, 0, 10) + starts := make([]int, 0, 10) + ends := make([]int, 0, 10) + + matrix := regex.FindAllSubmatchIndex([]byte(input), -1) + if matrix == nil || len(matrix) == 0 { + return false, captures, starts, ends + } + + // If there are multiple matches -- e.g. input is + // + // "...ab_cde...fg_hij..." + // + // with regex + // + // "(..)_(...)" + // + // -- then we only consider the first match: boolean return value is true + // (the input string matched the regex), and the captures array will map + // slot 1 to "ab" and slot 2 to "cde". + row := matrix[0] + n := len(row) + + // Example return value from FindAllSubmatchIndex with input + // "...ab_cde...fg_hij..." and regex "(..)_(...)": + // + // Matrix is [][]int{ + // []int{3, 9, 3, 5, 6, 9}, + // []int{12, 18, 12, 14, 15, 18}, + // } + // + // As noted above we look at only the first row. + // + // * 3-9 is for the entire match "ab_cde" + // * 3-5 is for the first capture "ab" + // * 6-9 is for the second capture "cde" + + for si := 0; si < n; si += 2 { + start := row[si] + end := row[si+1] + if start >= 0 && end >= 0 { + captures = append(captures, input[start:end]) + starts = append(starts, start+1) + ends = append(ends, end) + } else { + captures = append(captures, "") + starts = append(starts, -1) + ends = append(ends, -1) + } + } + + return true, captures, starts, ends +} + // RegexStringMatchWithCaptures implements the =~ DSL operator. The captures are stored in DSL // state and may be used by a DSL statement after the =~. For example, in // diff --git a/pkg/mlrval/mlrval_new.go b/pkg/mlrval/mlrval_new.go index eafea9afd..cb548c3d0 100644 --- a/pkg/mlrval/mlrval_new.go +++ b/pkg/mlrval/mlrval_new.go @@ -132,7 +132,7 @@ func FromNotFunctionError(funcname string, v *Mlrval) *Mlrval { func FromNotNamedTypeError(funcname string, v *Mlrval, expected_type_name string) *Mlrval { return FromError( fmt.Errorf( - "%s: unacceptable non-array value %s with type %s; needed type %s", + "%s: unacceptable value %s with type %s; needed type %s", funcname, v.StringMaybeQuoted(), v.GetTypeName(), diff --git a/test/cases/dsl-match/0001/cmd b/test/cases/dsl-match/0001/cmd new file mode 100644 index 000000000..0e3ce7786 --- /dev/null +++ b/test/cases/dsl-match/0001/cmd @@ -0,0 +1 @@ +mlr --ojsonl --from ${CASEDIR}/input put -f ${CASEDIR}/mlr diff --git a/test/cases/dsl-match/0001/experr b/test/cases/dsl-match/0001/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-match/0001/expout b/test/cases/dsl-match/0001/expout new file mode 100644 index 000000000..e025c7754 --- /dev/null +++ b/test/cases/dsl-match/0001/expout @@ -0,0 +1,11 @@ +{"x": "a", "y": "b", "z": false} +{"x": "abc", "y": "ab", "z": true} +{"x": " 345 78 ", "y": "([0-9]+)", "z": true} +{"x": " 345 78 ", "y": "([0-9]+) ([0-9]+)", "z": true} +{"x": " 345 78 ", "y": "([0-9]+)(.)([0-9]+)", "z": true} +{"x": "", "y": "", "z": true} +{"x": "", "y": "b", "z": false} +{"x": "a", "y": "", "z": true} +{"x": "a", "z": (error)} +{"y": "b", "z": (error)} +{"foo": "bar", "z": (error)} diff --git a/test/cases/dsl-match/0001/input b/test/cases/dsl-match/0001/input new file mode 100644 index 000000000..5facdc4fb --- /dev/null +++ b/test/cases/dsl-match/0001/input @@ -0,0 +1,11 @@ +x=a,y=b +x=abc,y=ab +x= 345 78 ,y=([0-9]+) +x= 345 78 ,y=([0-9]+) ([0-9]+) +x= 345 78 ,y=([0-9]+)(.)([0-9]+) +x=,y= +x=,y=b +x=a,y= +x=a +y=b +foo=bar diff --git a/test/cases/dsl-match/0001/mlr b/test/cases/dsl-match/0001/mlr new file mode 100644 index 000000000..9b015fdb7 --- /dev/null +++ b/test/cases/dsl-match/0001/mlr @@ -0,0 +1 @@ +$z = strmatch($x, $y) diff --git a/test/cases/dsl-match/0002/cmd b/test/cases/dsl-match/0002/cmd new file mode 100644 index 000000000..1fc3ab4d5 --- /dev/null +++ b/test/cases/dsl-match/0002/cmd @@ -0,0 +1 @@ +mlr --ojson --from ${CASEDIR}/input put -f ${CASEDIR}/mlr diff --git a/test/cases/dsl-match/0002/experr b/test/cases/dsl-match/0002/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-match/0002/expout b/test/cases/dsl-match/0002/expout new file mode 100644 index 000000000..1c44eb6c3 --- /dev/null +++ b/test/cases/dsl-match/0002/expout @@ -0,0 +1,110 @@ +[ +{ + "x": "a", + "y": "b", + "z": { + "matched": false + } +}, +{ + "x": "abc", + "y": "ab", + "z": { + "matched": true, + "full_capture": "ab", + "full_start": 1, + "full_end": 2 + } +}, +{ + "x": " 345 78 ", + "y": "([0-9]+)", + "z": { + "matched": true, + "full_capture": "345", + "full_start": 3, + "full_end": 5, + "captures": ["345"], + "starts": [3], + "ends": [5] + } +}, +{ + "x": " 345 78 ", + "y": "([0-9]+) ([0-9]+)", + "z": { + "matched": true, + "full_capture": "345 78", + "full_start": 3, + "full_end": 8, + "captures": ["345", "78"], + "starts": [3, 7], + "ends": [5, 8] + } +}, +{ + "x": " 345 78 ", + "y": "([0-9]+)(.)([0-9]+)", + "z": { + "matched": true, + "full_capture": "345 78", + "full_start": 3, + "full_end": 8, + "captures": ["345", " ", "78"], + "starts": [3, 6, 7], + "ends": [5, 6, 8] + } +}, +{ + "x": "", + "y": "", + "z": { + "matched": true, + "full_capture": "", + "full_start": 1, + "full_end": 0 + } +}, +{ + "x": "", + "y": "b", + "z": { + "matched": false + } +}, +{ + "x": "a", + "y": "", + "z": { + "matched": true, + "full_capture": "", + "full_start": 1, + "full_end": 0 + } +}, +{ + "x": "a", + "z": (error) +}, +{ + "y": "b", + "z": (error) +}, +{ + "foo": "bar", + "z": (error) +}, +{ + "x": "1234567890abcdefghij", + "y": "(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)", + "z": { + "matched": true, + "full_capture": "1234567890abcdefghij", + "full_start": 1, + "full_end": 20, + "captures": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j"], + "starts": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], + "ends": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + } +} +] diff --git a/test/cases/dsl-match/0002/input b/test/cases/dsl-match/0002/input new file mode 100644 index 000000000..10308f01d --- /dev/null +++ b/test/cases/dsl-match/0002/input @@ -0,0 +1,12 @@ +x=a,y=b +x=abc,y=ab +x= 345 78 ,y=([0-9]+) +x= 345 78 ,y=([0-9]+) ([0-9]+) +x= 345 78 ,y=([0-9]+)(.)([0-9]+) +x=,y= +x=,y=b +x=a,y= +x=a +y=b +foo=bar +x=1234567890abcdefghij,y=(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.) diff --git a/test/cases/dsl-match/0002/mlr b/test/cases/dsl-match/0002/mlr new file mode 100644 index 000000000..184b3e286 --- /dev/null +++ b/test/cases/dsl-match/0002/mlr @@ -0,0 +1 @@ +$z = strmatchx($x, $y) From f4cf166358e25037fe13e3b4dc59b41b091e1131 Mon Sep 17 00:00:00 2001 From: Eng Zer Jun Date: Wed, 20 Dec 2023 22:44:02 +0800 Subject: [PATCH 101/456] Replace deprecated `io/ioutil` functions (#1452) The io/ioutil package has been deprecated as of Go 1.16 [1]. This commit replaces the existing io/ioutil functions with their new definitions in io and os packages. [1]: https://golang.org/doc/go1.16#ioutil Signed-off-by: Eng Zer Jun --- pkg/climain/mlrcli_shebang.go | 6 +-- pkg/entrypoint/entrypoint.go | 3 +- pkg/lib/readfiles.go | 17 +++++--- pkg/lib/util.go | 5 +-- pkg/terminals/regtest/regtester.go | 69 ++++++++++++++---------------- 5 files changed, 48 insertions(+), 52 deletions(-) diff --git a/pkg/climain/mlrcli_shebang.go b/pkg/climain/mlrcli_shebang.go index b05643bce..e0e2f91c8 100644 --- a/pkg/climain/mlrcli_shebang.go +++ b/pkg/climain/mlrcli_shebang.go @@ -2,7 +2,7 @@ package climain import ( "fmt" - "io/ioutil" + "os" "regexp" "strings" @@ -49,7 +49,7 @@ func maybeInterpolateDashS(args []string) ([]string, error) { remainingArgs := args[3:] // Read the bytes in the filename given after -s. - byteContents, rerr := ioutil.ReadFile(filename) + byteContents, rerr := os.ReadFile(filename) if rerr != nil { return nil, fmt.Errorf("mlr: cannot read %s: %v", filename, rerr) } @@ -68,7 +68,7 @@ func maybeInterpolateDashS(args []string) ([]string, error) { if stripComments { re := regexp.MustCompile(`#.*`) - for i, _ := range lines { + for i := range lines { lines[i] = re.ReplaceAllString(lines[i], "") } } diff --git a/pkg/entrypoint/entrypoint.go b/pkg/entrypoint/entrypoint.go index 7f7fab711..962fc59fc 100644 --- a/pkg/entrypoint/entrypoint.go +++ b/pkg/entrypoint/entrypoint.go @@ -7,7 +7,6 @@ package entrypoint import ( "fmt" - "io/ioutil" "os" "path" @@ -135,7 +134,7 @@ func processInPlace( containingDirectory := path.Dir(fileName) // Names like ./mlr-in-place-2148227797 and ./mlr-in-place-1792078347, // as revealed by printing handle.Name(). - handle, err := ioutil.TempFile(containingDirectory, "mlr-in-place-") + handle, err := os.CreateTemp(containingDirectory, "mlr-in-place-") if err != nil { return err } diff --git a/pkg/lib/readfiles.go b/pkg/lib/readfiles.go index 53ce49cc2..305f8a2b5 100644 --- a/pkg/lib/readfiles.go +++ b/pkg/lib/readfiles.go @@ -6,7 +6,6 @@ package lib import ( - "io/ioutil" "os" "strings" @@ -34,10 +33,10 @@ func LoadStringsFromFileOrDir(path string, extension string) ([]string, error) { } } -// LoadStringFromFile is just a wrapper around ioutil.ReadFile, +// LoadStringFromFile is just a wrapper around os.ReadFile, // with a cast from []byte to string. func LoadStringFromFile(filename string) (string, error) { - data, err := ioutil.ReadFile(filename) + data, err := os.ReadFile(filename) if err != nil { return "", err } @@ -51,14 +50,18 @@ func LoadStringFromFile(filename string) (string, error) { func LoadStringsFromDir(dirname string, extension string) ([]string, error) { dslStrings := make([]string, 0) - entries, err := ioutil.ReadDir(dirname) + f, err := os.Open(dirname) + if err != nil { + return nil, err + } + defer f.Close() + + names, err := f.Readdirnames(-1) if err != nil { return nil, err } - for i := range entries { - entry := &entries[i] - name := (*entry).Name() + for _, name := range names { if !strings.HasSuffix(name, extension) { continue } diff --git a/pkg/lib/util.go b/pkg/lib/util.go index d78809d21..37e90ce46 100644 --- a/pkg/lib/util.go +++ b/pkg/lib/util.go @@ -2,7 +2,6 @@ package lib import ( "fmt" - "io/ioutil" "os" "sort" "strconv" @@ -186,9 +185,9 @@ func GetArrayKeysSorted(input map[string]string) []string { // WriteTempFile places the contents string into a temp file, which the caller // must remove. func WriteTempFileOrDie(contents string) string { - // Use "" as first argument to ioutil.TempFile to use default directory. + // Use "" as first argument to os.CreateTemp to use default directory. // Nominally "/tmp" or somesuch on all unix-like systems, but not for Windows. - handle, err := ioutil.TempFile("", "mlr-temp") + handle, err := os.CreateTemp("", "mlr-temp") if err != nil { fmt.Printf("mlr: could not create temp file.\n") os.Exit(1) diff --git a/pkg/terminals/regtest/regtester.go b/pkg/terminals/regtest/regtester.go index ec58bdf73..8b5231c8e 100644 --- a/pkg/terminals/regtest/regtester.go +++ b/pkg/terminals/regtest/regtester.go @@ -58,7 +58,6 @@ package regtest import ( "container/list" "fmt" - "io/ioutil" "os" "path/filepath" "runtime" @@ -153,7 +152,6 @@ func (regtester *RegTester) resetCounts() { func (regtester *RegTester) Execute( casePaths []string, ) bool { - // Don't let the current user's settings affect expected results for _, name := range envVarsToUnset { os.Unsetenv(name) @@ -279,7 +277,7 @@ func (regtester *RegTester) executeSingleDirectory( ) (bool, bool) { passed := true // TODO: comment - hasCaseSubdirectories := regtester.hasCaseSubdirectories(dirName) + fileNames, hasCaseSubdirectories := regtester.hasCaseSubdirectories(dirName) if !regtester.plainMode { if hasCaseSubdirectories && regtester.verbosityLevel >= 2 { @@ -287,34 +285,26 @@ func (regtester *RegTester) executeSingleDirectory( } } - entries, err := ioutil.ReadDir(dirName) - if err != nil { - fmt.Printf("%s: %v\n", dirName, err) - passed = false - } else { + for _, name := range fileNames { + path := dirName + "/" + name - for i := range entries { - entry := &entries[i] - path := dirName + "/" + (*entry).Name() - - ok := regtester.executeSinglePath(path) - if !ok { - passed = false - } + ok := regtester.executeSinglePath(path) + if !ok { + passed = false } + } - // Only print if there are .cmd files directly in this directory. - // Otherwise it's just a directory-of-directories and we don't need to - // multiply announce. - if hasCaseSubdirectories { - if passed { - if !regtester.plainMode { - fmt.Printf("%s %s\n", colorizer.MaybeColorizePass("PASS", true), dirName) - } - } else { - if !regtester.plainMode { - fmt.Printf("%s %s\n", colorizer.MaybeColorizeFail("FAIL", true), dirName) - } + // Only print if there are .cmd files directly in this directory. + // Otherwise it's just a directory-of-directories and we don't need to + // multiply announce. + if hasCaseSubdirectories { + if passed { + if !regtester.plainMode { + fmt.Printf("%s %s\n", colorizer.MaybeColorizePass("PASS", true), dirName) + } + } else { + if !regtester.plainMode { + fmt.Printf("%s %s\n", colorizer.MaybeColorizeFail("FAIL", true), dirName) } } } @@ -340,22 +330,27 @@ func (regtester *RegTester) executeSingleDirectory( func (regtester *RegTester) hasCaseSubdirectories( dirName string, -) bool { +) ([]string, bool) { + f, err := os.Open(dirName) + if err != nil { + fmt.Printf("%s: %v\n", dirName, err) + os.Exit(1) + } + defer f.Close() - entries, err := ioutil.ReadDir(dirName) + names, err := f.Readdirnames(-1) if err != nil { fmt.Printf("%s: %v\n", dirName, err) os.Exit(1) } - for i := range entries { - entry := &entries[i] - path := dirName + string(filepath.Separator) + (*entry).Name() + for _, name := range names { + path := dirName + string(filepath.Separator) + name if regtester.isCaseDirectory(path) { - return true + return names, true } } - return false + return names, false } func (regtester *RegTester) isCaseDirectory( @@ -774,7 +769,7 @@ func (regtester *RegTester) loadFile( fileName string, caseDir string, ) (string, error) { - byteContents, err := ioutil.ReadFile(fileName) + byteContents, err := os.ReadFile(fileName) if err != nil { return "", err } @@ -789,7 +784,7 @@ func (regtester *RegTester) storeFile( fileName string, contents string, ) error { - err := ioutil.WriteFile(fileName, []byte(contents), 0666) + err := os.WriteFile(fileName, []byte(contents), 0o666) if err != nil { return err } From 0e3a54ed68d7d77376c717f63e74a3c5bede085f Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 23 Dec 2023 16:20:11 -0500 Subject: [PATCH 102/456] Implement `mlr uniq -x` (#1457) * mlr uniq -x * unit-test cases * make dev --- docs/src/kubectl-and-helm.md | 2 +- docs/src/manpage.md | 4 +- docs/src/manpage.txt | 4 +- docs/src/reference-dsl-time.md | 4 +- .../src/reference-main-regular-expressions.md | 2 +- docs/src/reference-main-strings.md | 2 +- docs/src/reference-verbs.md | 2 + docs/src/release-docs.md | 2 +- docs/src/shapes-of-data.md | 12 ++-- docs/src/statistics-examples.md | 4 +- docs/src/why.md | 2 +- man/manpage.txt | 4 +- man/mlr.1 | 6 +- pkg/lib/ordered_map.go | 23 +++++++ pkg/mlrval/mlrmap_accessors.go | 13 ++++ pkg/mlrval/mlrmap_accessors_test.go | 19 ++++++ pkg/transformers/uniq.go | 66 ++++++++++++++----- test/cases/cli-help/0001/expout | 2 + test/cases/verb-uniq/uniq-c-x-change/cmd | 1 + test/cases/verb-uniq/uniq-c-x-change/experr | 0 test/cases/verb-uniq/uniq-c-x-change/expout | 7 ++ test/cases/verb-uniq/uniq-c-x-het/cmd | 1 + test/cases/verb-uniq/uniq-c-x-het/experr | 0 test/cases/verb-uniq/uniq-c-x-het/expout | 6 ++ test/cases/verb-uniq/uniq-c-x-long/cmd | 1 + test/cases/verb-uniq/uniq-c-x-long/experr | 0 test/cases/verb-uniq/uniq-c-x-long/expout | 7 ++ test/cases/verb-uniq/uniq-c-x-short/cmd | 1 + test/cases/verb-uniq/uniq-c-x-short/experr | 0 test/cases/verb-uniq/uniq-c-x-short/expout | 6 ++ test/cases/verb-uniq/uniq-x-change/cmd | 1 + test/cases/verb-uniq/uniq-x-change/experr | 0 test/cases/verb-uniq/uniq-x-change/expout | 7 ++ test/cases/verb-uniq/uniq-x-het/cmd | 1 + test/cases/verb-uniq/uniq-x-het/experr | 0 test/cases/verb-uniq/uniq-x-het/expout | 6 ++ test/cases/verb-uniq/uniq-x-long/cmd | 1 + test/cases/verb-uniq/uniq-x-long/experr | 0 test/cases/verb-uniq/uniq-x-long/expout | 7 ++ test/cases/verb-uniq/uniq-x-short/cmd | 1 + test/cases/verb-uniq/uniq-x-short/experr | 0 test/cases/verb-uniq/uniq-x-short/expout | 6 ++ test/input/example-with-changed-keys.dkvp | 10 +++ test/input/example-with-extra-keys.dkvp | 10 +++ test/input/example-with-missing-keys.dkvp | 10 +++ test/input/example.dkvp | 10 +++ 46 files changed, 238 insertions(+), 35 deletions(-) create mode 100644 test/cases/verb-uniq/uniq-c-x-change/cmd create mode 100644 test/cases/verb-uniq/uniq-c-x-change/experr create mode 100644 test/cases/verb-uniq/uniq-c-x-change/expout create mode 100644 test/cases/verb-uniq/uniq-c-x-het/cmd create mode 100644 test/cases/verb-uniq/uniq-c-x-het/experr create mode 100644 test/cases/verb-uniq/uniq-c-x-het/expout create mode 100644 test/cases/verb-uniq/uniq-c-x-long/cmd create mode 100644 test/cases/verb-uniq/uniq-c-x-long/experr create mode 100644 test/cases/verb-uniq/uniq-c-x-long/expout create mode 100644 test/cases/verb-uniq/uniq-c-x-short/cmd create mode 100644 test/cases/verb-uniq/uniq-c-x-short/experr create mode 100644 test/cases/verb-uniq/uniq-c-x-short/expout create mode 100644 test/cases/verb-uniq/uniq-x-change/cmd create mode 100644 test/cases/verb-uniq/uniq-x-change/experr create mode 100644 test/cases/verb-uniq/uniq-x-change/expout create mode 100644 test/cases/verb-uniq/uniq-x-het/cmd create mode 100644 test/cases/verb-uniq/uniq-x-het/experr create mode 100644 test/cases/verb-uniq/uniq-x-het/expout create mode 100644 test/cases/verb-uniq/uniq-x-long/cmd create mode 100644 test/cases/verb-uniq/uniq-x-long/experr create mode 100644 test/cases/verb-uniq/uniq-x-long/expout create mode 100644 test/cases/verb-uniq/uniq-x-short/cmd create mode 100644 test/cases/verb-uniq/uniq-x-short/experr create mode 100644 test/cases/verb-uniq/uniq-x-short/expout create mode 100644 test/input/example-with-changed-keys.dkvp create mode 100644 test/input/example-with-extra-keys.dkvp create mode 100644 test/input/example-with-missing-keys.dkvp create mode 100644 test/input/example.dkvp diff --git a/docs/src/kubectl-and-helm.md b/docs/src/kubectl-and-helm.md index 38bd31abf..5f53001be 100644 --- a/docs/src/kubectl-and-helm.md +++ b/docs/src/kubectl-and-helm.md @@ -152,7 +152,7 @@ $ helm list | mlr --itsv --ojson head -n 1 ] -A solution here is Miller's +A solution here is Miller's [clean-whitespace verb](reference-verbs.md#clean-whitespace):
diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 369a7bbf6..28182f146 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -988,6 +988,7 @@ MILLER(1)                                                            MILLER(1)
 
        Options:
        -f {a,b,c}    Field names for distinct count.
+       -x {a,b,c}    Field names to exclude for distinct count: use each record's others instead.
        -n            Show only the number of distinct values. Not compatible with -u.
        -o {name}     Field name for output count. Default "count".
                      Ignored with -u.
@@ -2154,6 +2155,7 @@ MILLER(1)                                                            MILLER(1)
 
        Options:
        -g {d,e,f}    Group-by-field names for uniq counts.
+       -x {a,b,c}    Field names to exclude for uniq: use each record's others instead.
        -c            Show repeat counts in addition to unique values.
        -n            Show only the number of distinct values.
        -o {name}     Field name for output count. Default "count".
@@ -3685,5 +3687,5 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-12-19                         MILLER(1)
+                                  2023-12-23                         MILLER(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index a7aec87a7..4262cc6c7 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -967,6 +967,7 @@ MILLER(1) MILLER(1) Options: -f {a,b,c} Field names for distinct count. + -x {a,b,c} Field names to exclude for distinct count: use each record's others instead. -n Show only the number of distinct values. Not compatible with -u. -o {name} Field name for output count. Default "count". Ignored with -u. @@ -2133,6 +2134,7 @@ MILLER(1) MILLER(1) Options: -g {d,e,f} Group-by-field names for uniq counts. + -x {a,b,c} Field names to exclude for uniq: use each record's others instead. -c Show repeat counts in addition to unique values. -n Show only the number of distinct values. -o {name} Field name for output count. Default "count". @@ -3664,4 +3666,4 @@ MILLER(1) MILLER(1) - 2023-12-19 MILLER(1) + 2023-12-23 MILLER(1) diff --git a/docs/src/reference-dsl-time.md b/docs/src/reference-dsl-time.md index 867bc8dc1..0a3aa721e 100644 --- a/docs/src/reference-dsl-time.md +++ b/docs/src/reference-dsl-time.md @@ -89,7 +89,7 @@ the [ISO8601](https://en.wikipedia.org/wiki/ISO_8601) format. This was the first (and initially only) human-readable date/time format supported by Miller going all the way back to Miller 1.0.0. -You can get these from epoch-seconds using the +You can get these from epoch-seconds using the [sec2gmt](reference-dsl-builtin-functions.md#sec2gmt) DSL function. (Note that the terms _UTC_ and _GMT_ are used interchangeably in Miller.) We also have [sec2gmtdate](reference-dsl-builtin-functions.md#sec2gmtdate) DSL function. @@ -200,7 +200,7 @@ mlr: TZ environment variable appears malformed: "This/Is/A/Typo" Note that for local times, Miller omits the `T` and the `Z` you see in GMT times. -We also have the +We also have the [gmt2localtime](reference-dsl-builtin-functions.md#gmt2localtime) and [localtime2gmt](reference-dsl-builtin-functions.md#localtime2gmt) convenience functions: diff --git a/docs/src/reference-main-regular-expressions.md b/docs/src/reference-main-regular-expressions.md index 982c60634..f679669b8 100644 --- a/docs/src/reference-main-regular-expressions.md +++ b/docs/src/reference-main-regular-expressions.md @@ -108,7 +108,7 @@ Regex captures of the form `\0` through `\9` are supported as follows: If you use `(...)` in your regular expression, then up to 9 matches are supported for the `=~` operator, and an arbitrary number of matches are supported for the `match` DSL function. -* Before any match is done, `"\1"` etc. in a string evaluate to themselves. +* Before any match is done, `"\1"` etc. in a string evaluate to themselves. * After a successful match is done, `"\1"` etc. in a string evaluate to the matched substring. * After an unsuccessful match is done, `"\1"` etc. in a string evaluate to the empty string. * You can match against `null` to reset to the original state. diff --git a/docs/src/reference-main-strings.md b/docs/src/reference-main-strings.md index df35284f4..b16b03483 100644 --- a/docs/src/reference-main-strings.md +++ b/docs/src/reference-main-strings.md @@ -197,4 +197,4 @@ See also [https://en.wikipedia.org/wiki/Escape_sequences_in_C](https://en.wikipe These replacements apply only to strings you key in for the DSL expressions for `filter` and `put`: that is, if you type `\t` in a string literal for a `filter`/`put` expression, it will be turned into a tab character. If you want a backslash followed by a `t`, then please type `\\t`. -However, these replacements are done automatically only for string literals within DSL expressions -- they are not done automatically to fields within your data stream. If you wish to make these replacements, you can do (for example) `mlr put '$field = gsub($field, "\\t", "\t")'`. If you need to make such a replacement for all fields in your data, you should probably use the system `sed` command instead. +However, these replacements are done automatically only for string literals within DSL expressions -- they are not done automatically to fields within your data stream. If you wish to make these replacements, you can do (for example) `mlr put '$field = gsub($field, "\\t", "\t")'`. If you need to make such a replacement for all fields in your data, you should probably use the system `sed` command instead. diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 106ad4bf1..9a50a1dbb 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -596,6 +596,7 @@ Same as uniq -c. Options: -f {a,b,c} Field names for distinct count. +-x {a,b,c} Field names to exclude for distinct count: use each record's others instead. -n Show only the number of distinct values. Not compatible with -u. -o {name} Field name for output count. Default "count". Ignored with -u. @@ -4066,6 +4067,7 @@ count-distinct. For uniq, -f is a synonym for -g. Options: -g {d,e,f} Group-by-field names for uniq counts. +-x {a,b,c} Field names to exclude for uniq: use each record's others instead. -c Show repeat counts in addition to unique values. -n Show only the number of distinct values. -o {name} Field name for output count. Default "count". diff --git a/docs/src/release-docs.md b/docs/src/release-docs.md index 479be3f38..85a13d804 100644 --- a/docs/src/release-docs.md +++ b/docs/src/release-docs.md @@ -16,7 +16,7 @@ Quick links: # Documents for releases -If your `mlr version` says something like `mlr 6.0.0-dev`, with the `-dev` suffix, you're likely building from source, or you've obtained a recent artifact from GitHub Actions -- +If your `mlr version` says something like `mlr 6.0.0-dev`, with the `-dev` suffix, you're likely building from source, or you've obtained a recent artifact from GitHub Actions -- the page [https://miller.readthedocs.io/en/main](https://miller.readthedocs.io/en/main) contains information for the latest contributions to the [Miller repository](https://github.com/johnkerl/miller). If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, without the `-dev` suffix, you're likely using a Miller executable from a package manager -- please see below for the documentation for Miller as of the release you're using. diff --git a/docs/src/shapes-of-data.md b/docs/src/shapes-of-data.md index bab58b7f0..f97040543 100644 --- a/docs/src/shapes-of-data.md +++ b/docs/src/shapes-of-data.md @@ -33,7 +33,7 @@ Also try `od -xcv` and/or `cat -e` on your file to check for non-printable chara Use the `file` command to see if there are CR/LF terminators (in this case, there are not):
-file data/colours.csv 
+file data/colours.csv
 
 data/colours.csv: Unicode text, UTF-8 text
@@ -42,7 +42,7 @@ data/colours.csv: Unicode text, UTF-8 text
 Look at the file to find names of fields:
 
 
-cat data/colours.csv 
+cat data/colours.csv
 
 KEY;DE;EN;ES;FI;FR;IT;NL;PL;TO;TR
@@ -53,13 +53,13 @@ masterdata_colourcode_2;Schwarz;Black;Negro;Musta;Noir;Nero;Zwart;Czarny;Negru;S
 Extract a few fields:
 
 
-mlr --csv cut -f KEY,PL,TO data/colours.csv 
+mlr --csv cut -f KEY,PL,TO data/colours.csv
 
Use XTAB output format to get a sharper picture of where records/fields are being split:
-mlr --icsv --oxtab cat data/colours.csv 
+mlr --icsv --oxtab cat data/colours.csv
 
 KEY;DE;EN;ES;FI;FR;IT;NL;PL;TO;TR masterdata_colourcode_1;WeiรŸ;White;Blanco;Valkoinen;Blanc;Bianco;Wit;Biaล‚y;Alb;Beyaz
@@ -70,7 +70,7 @@ KEY;DE;EN;ES;FI;FR;IT;NL;PL;TO;TR masterdata_colourcode_2;Schwarz;Black;Negro;Mu
 Using XTAB output format makes it clearer that `KEY;DE;...;TR` is being treated as a single field name in the CSV header, and likewise each subsequent line is being treated as a single field value. This is because the default field separator is a comma but we have semicolons here.  Use XTAB again with different field separator (`--fs semicolon`):
 
 
-mlr --icsv --ifs semicolon --oxtab cat data/colours.csv 
+mlr --icsv --ifs semicolon --oxtab cat data/colours.csv
 
 KEY masterdata_colourcode_1
@@ -101,7 +101,7 @@ TR  Siyah
 Using the new field-separator, retry the cut:
 
 
-mlr --csv --fs semicolon cut -f KEY,PL,TO data/colours.csv 
+mlr --csv --fs semicolon cut -f KEY,PL,TO data/colours.csv
 
 KEY;PL;TO
diff --git a/docs/src/statistics-examples.md b/docs/src/statistics-examples.md
index b1b7ea7b3..2e80e8a39 100644
--- a/docs/src/statistics-examples.md
+++ b/docs/src/statistics-examples.md
@@ -23,7 +23,7 @@ For one or more specified field names, simply compute p25 and p75, then write th
 
 mlr --oxtab stats1 -f x -a p25,p75 \
     then put '$x_iqr = $x_p75 - $x_p25' \
-    data/medium 
+    data/medium
 
 x_p25 0.24667037823231752
@@ -40,7 +40,7 @@ For wildcarded field names, first compute p25 and p75, then loop over field name
         $["\1_iqr"] = $["\1_p75"] - $["\1_p25"]
       }
     }' \
-    data/medium 
+    data/medium
 
 i_p25 2501
diff --git a/docs/src/why.md b/docs/src/why.md
index a8b2ed585..937bd0386 100644
--- a/docs/src/why.md
+++ b/docs/src/why.md
@@ -48,7 +48,7 @@ Eighth thing: It's an **awful lot of fun to write**. In my experience I didn't f
 
 Miller is command-line-only by design. People who want a graphical user interface won't find it here.  This is in part (a) accommodating my personal preferences, and in part (b) guided by my experience/belief that the command line is very expressive. Steeper learning curve than a GUI, yes. I consider that price worth paying for the tool-niche which Miller occupies.
 
-Another tradeoff: supporting lists of records keeps me supporting only what can be expressed in *all* of those formats. For example, `[1,2,3,4,5]` is valid but unmillerable JSON: the list elements are not records.  So Miller can't (and won't) handle arbitrary JSON -- because Miller only handles tabular data which can be expressed in a variety of formats. 
+Another tradeoff: supporting lists of records keeps me supporting only what can be expressed in *all* of those formats. For example, `[1,2,3,4,5]` is valid but unmillerable JSON: the list elements are not records.  So Miller can't (and won't) handle arbitrary JSON -- because Miller only handles tabular data which can be expressed in a variety of formats.
 
 A third tradeoff is doing build-from-scratch in a low-level language. It'd be quicker to write (but slower to run) if written in a high-level language. If Miller were written in Python, it would be implemented in significantly fewer lines of code than its current Go implementation. The DSL would just be an `eval` of Python code. And it would run slower, but maybe not enough slower to be a problem for most folks. Later I found out about the [rows](https://github.com/turicas/rows) tool -- if you find Miller useful, you should check out `rows` as well.
 
diff --git a/man/manpage.txt b/man/manpage.txt
index a7aec87a7..4262cc6c7 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -967,6 +967,7 @@ MILLER(1)                                                            MILLER(1)
 
        Options:
        -f {a,b,c}    Field names for distinct count.
+       -x {a,b,c}    Field names to exclude for distinct count: use each record's others instead.
        -n            Show only the number of distinct values. Not compatible with -u.
        -o {name}     Field name for output count. Default "count".
                      Ignored with -u.
@@ -2133,6 +2134,7 @@ MILLER(1)                                                            MILLER(1)
 
        Options:
        -g {d,e,f}    Group-by-field names for uniq counts.
+       -x {a,b,c}    Field names to exclude for uniq: use each record's others instead.
        -c            Show repeat counts in addition to unique values.
        -n            Show only the number of distinct values.
        -o {name}     Field name for output count. Default "count".
@@ -3664,4 +3666,4 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-12-19                         MILLER(1)
+                                  2023-12-23                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 92224547a..04c2151e3 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2023-12-19
+.\"      Date: 2023-12-23
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2023-12-19" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-12-23" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1186,6 +1186,7 @@ Same as uniq -c.
 
 Options:
 -f {a,b,c}    Field names for distinct count.
+-x {a,b,c}    Field names to exclude for distinct count: use each record's others instead.
 -n            Show only the number of distinct values. Not compatible with -u.
 -o {name}     Field name for output count. Default "count".
               Ignored with -u.
@@ -2700,6 +2701,7 @@ count-distinct. For uniq, -f is a synonym for -g.
 
 Options:
 -g {d,e,f}    Group-by-field names for uniq counts.
+-x {a,b,c}    Field names to exclude for uniq: use each record's others instead.
 -c            Show repeat counts in addition to unique values.
 -n            Show only the number of distinct values.
 -o {name}     Field name for output count. Default "count".
diff --git a/pkg/lib/ordered_map.go b/pkg/lib/ordered_map.go
index 093c1ca84..a3d54bd50 100644
--- a/pkg/lib/ordered_map.go
+++ b/pkg/lib/ordered_map.go
@@ -111,6 +111,29 @@ func (omap *OrderedMap) GetWithCheck(key string) (interface{}, bool) {
 	}
 }
 
+func (omap *OrderedMap) GetKeys() []string {
+	keys := make([]string, omap.FieldCount)
+	i := 0
+	for pe := omap.Head; pe != nil; pe = pe.Next {
+		keys[i] = pe.Key
+		i++
+	}
+	return keys
+}
+
+// Returns an array of keys, not including the ones specified. The ones
+// specified are to be passed in as a map from string to bool, as Go
+// doesn't have hash-sets.
+func (omap *OrderedMap) GetKeysExcept(exceptions map[string]bool) []string {
+	keys := make([]string, 0)
+	for pe := omap.Head; pe != nil; pe = pe.Next {
+		if _, present := exceptions[pe.Key]; !present {
+			keys = append(keys, pe.Key)
+		}
+	}
+	return keys
+}
+
 // ----------------------------------------------------------------
 func (omap *OrderedMap) Clear() {
 	omap.FieldCount = 0
diff --git a/pkg/mlrval/mlrmap_accessors.go b/pkg/mlrval/mlrmap_accessors.go
index befb5f726..8540ac5dc 100644
--- a/pkg/mlrval/mlrmap_accessors.go
+++ b/pkg/mlrval/mlrmap_accessors.go
@@ -281,6 +281,19 @@ func (mlrmap *Mlrmap) GetKeys() []string {
 	return keys
 }
 
+// Returns an array of keys, not including the ones specified. The ones
+// specified are to be passed in as a map from string to bool, as Go
+// doesn't have hash-sets.
+func (mlrmap *Mlrmap) GetKeysExcept(exceptions map[string]bool) []string {
+	keys := make([]string, 0)
+	for pe := mlrmap.Head; pe != nil; pe = pe.Next {
+		if _, present := exceptions[pe.Key]; !present {
+			keys = append(keys, pe.Key)
+		}
+	}
+	return keys
+}
+
 // ----------------------------------------------------------------
 // TODO: put error-return into this API
 func (mlrmap *Mlrmap) PutNameWithPositionalIndex(position int64, name *Mlrval) {
diff --git a/pkg/mlrval/mlrmap_accessors_test.go b/pkg/mlrval/mlrmap_accessors_test.go
index 890ac6a9e..6bae83f89 100644
--- a/pkg/mlrval/mlrmap_accessors_test.go
+++ b/pkg/mlrval/mlrmap_accessors_test.go
@@ -47,3 +47,22 @@ func TestPutReference(t *testing.T) {
 }
 
 // TODO: TestPrependReference
+
+func TestGetKeysExcept(t *testing.T) {
+	mlrmap := NewMlrmap()
+	mlrmap.PutReference("a", FromInt(1))
+	mlrmap.PutReference("b", FromInt(2))
+
+	exceptions := make(map[string]bool)
+	exceptions["x"] = true
+	exceptions["y"] = true
+
+	assert.Equal(t, mlrmap.GetKeys(), []string{"a", "b"})
+	assert.Equal(t, mlrmap.GetKeysExcept(exceptions), []string{"a", "b"})
+
+	exceptions["a"] = true
+	assert.Equal(t, mlrmap.GetKeysExcept(exceptions), []string{"b"})
+
+	exceptions["b"] = true
+	assert.Equal(t, mlrmap.GetKeysExcept(exceptions), []string{})
+}
diff --git a/pkg/transformers/uniq.go b/pkg/transformers/uniq.go
index f28e6c854..5893b689a 100644
--- a/pkg/transformers/uniq.go
+++ b/pkg/transformers/uniq.go
@@ -43,6 +43,7 @@ func transformerCountDistinctUsage(
 	fmt.Fprintf(o, "\n")
 	fmt.Fprintf(o, "Options:\n")
 	fmt.Fprintf(o, "-f {a,b,c}    Field names for distinct count.\n")
+	fmt.Fprintf(o, "-x {a,b,c}    Field names to exclude for distinct count: use each record's others instead.\n")
 	fmt.Fprintf(o, "-n            Show only the number of distinct values. Not compatible with -u.\n")
 	fmt.Fprintf(o, "-o {name}     Field name for output count. Default \"%s\".\n", uniqDefaultOutputFieldName)
 	fmt.Fprintf(o, "              Ignored with -u.\n")
@@ -68,6 +69,7 @@ func transformerCountDistinctParseCLI(
 
 	// Parse local flags
 	var fieldNames []string = nil
+	invertFieldNames := false
 	showNumDistinctOnly := false
 	outputFieldName := uniqDefaultOutputFieldName
 	doLashed := true
@@ -89,6 +91,10 @@ func transformerCountDistinctParseCLI(
 		} else if opt == "-g" || opt == "-f" {
 			fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
 
+		} else if opt == "-x" {
+			fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
+			invertFieldNames = true
+
 		} else if opt == "-n" {
 			showNumDistinctOnly = true
 
@@ -123,6 +129,7 @@ func transformerCountDistinctParseCLI(
 
 	transformer, err := NewTransformerUniq(
 		fieldNames,
+		invertFieldNames,
 		showCounts,
 		showNumDistinctOnly,
 		outputFieldName,
@@ -149,6 +156,7 @@ func transformerUniqUsage(
 	fmt.Fprintf(o, "\n")
 	fmt.Fprintf(o, "Options:\n")
 	fmt.Fprintf(o, "-g {d,e,f}    Group-by-field names for uniq counts.\n")
+	fmt.Fprintf(o, "-x {a,b,c}    Field names to exclude for uniq: use each record's others instead.\n")
 	fmt.Fprintf(o, "-c            Show repeat counts in addition to unique values.\n")
 	fmt.Fprintf(o, "-n            Show only the number of distinct values.\n")
 	fmt.Fprintf(o, "-o {name}     Field name for output count. Default \"%s\".\n", uniqDefaultOutputFieldName)
@@ -173,6 +181,7 @@ func transformerUniqParseCLI(
 
 	// Parse local flags
 	var fieldNames []string = nil
+	invertFieldNames := false
 	showCounts := false
 	showNumDistinctOnly := false
 	outputFieldName := uniqDefaultOutputFieldName
@@ -195,6 +204,10 @@ func transformerUniqParseCLI(
 		} else if opt == "-g" || opt == "-f" {
 			fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
 
+		} else if opt == "-x" {
+			fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
+			invertFieldNames = true
+
 		} else if opt == "-c" {
 			showCounts = true
 
@@ -238,6 +251,7 @@ func transformerUniqParseCLI(
 
 	transformer, _ := NewTransformerUniq(
 		fieldNames,
+		invertFieldNames,
 		showCounts,
 		showNumDistinctOnly,
 		outputFieldName,
@@ -250,9 +264,11 @@ func transformerUniqParseCLI(
 
 // ----------------------------------------------------------------
 type TransformerUniq struct {
-	fieldNames      []string
-	showCounts      bool
-	outputFieldName string
+	fieldNames       []string
+	fieldNamesSet    map[string]bool
+	invertFieldNames bool
+	showCounts       bool
+	outputFieldName  string
 
 	// Example:
 	// Input is:
@@ -280,6 +296,7 @@ type TransformerUniq struct {
 	//   "a" => "4" => 4
 	uniqifiedRecordCounts *lib.OrderedMap // record-as-string -> counts
 	uniqifiedRecords      *lib.OrderedMap // record-as-string -> records
+	keysByGroup           *lib.OrderedMap // XXX COMMENT ME
 	countsByGroup         *lib.OrderedMap // grouping key -> count
 	valuesByGroup         *lib.OrderedMap // grouping key -> array of values
 	unlashedCounts        *lib.OrderedMap // field name -> string field value -> count
@@ -291,6 +308,7 @@ type TransformerUniq struct {
 // ----------------------------------------------------------------
 func NewTransformerUniq(
 	fieldNames []string,
+	invertFieldNames bool,
 	showCounts bool,
 	showNumDistinctOnly bool,
 	outputFieldName string,
@@ -299,12 +317,15 @@ func NewTransformerUniq(
 ) (*TransformerUniq, error) {
 
 	tr := &TransformerUniq{
-		fieldNames:      fieldNames,
-		showCounts:      showCounts,
-		outputFieldName: outputFieldName,
+		fieldNames:       fieldNames,
+		fieldNamesSet:    lib.StringListToSet(fieldNames),
+		invertFieldNames: invertFieldNames,
+		showCounts:       showCounts,
+		outputFieldName:  outputFieldName,
 
 		uniqifiedRecordCounts: lib.NewOrderedMap(),
 		uniqifiedRecords:      lib.NewOrderedMap(),
+		keysByGroup:           lib.NewOrderedMap(),
 		countsByGroup:         lib.NewOrderedMap(),
 		valuesByGroup:         lib.NewOrderedMap(),
 		unlashedCounts:        lib.NewOrderedMap(),
@@ -334,6 +355,16 @@ func NewTransformerUniq(
 
 // ----------------------------------------------------------------
 
+func (tr *TransformerUniq) getFieldNamesForGrouping(
+	inrec *mlrval.Mlrmap,
+) []string {
+	if tr.invertFieldNames {
+		return inrec.GetKeysExcept(tr.fieldNamesSet)
+	} else {
+		return tr.fieldNames
+	}
+}
+
 func (tr *TransformerUniq) Transform(
 	inrecAndContext *types.RecordAndContext,
 	outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
@@ -441,7 +472,7 @@ func (tr *TransformerUniq) transformUnlashed(
 	if !inrecAndContext.EndOfStream {
 		inrec := inrecAndContext.Record
 
-		for _, fieldName := range tr.fieldNames {
+		for _, fieldName := range tr.getFieldNamesForGrouping(inrec) {
 			var countsForFieldName *lib.OrderedMap = nil
 			iCountsForFieldName, present := tr.unlashedCounts.GetWithCheck(fieldName)
 			if !present {
@@ -496,7 +527,7 @@ func (tr *TransformerUniq) transformNumDistinctOnly(
 	if !inrecAndContext.EndOfStream {
 		inrec := inrecAndContext.Record
 
-		groupingKey, ok := inrec.GetSelectedValuesJoined(tr.fieldNames)
+		groupingKey, ok := inrec.GetSelectedValuesJoined(tr.getFieldNamesForGrouping(inrec))
 		if ok {
 			iCount, present := tr.countsByGroup.GetWithCheck(groupingKey)
 			if !present {
@@ -528,28 +559,33 @@ func (tr *TransformerUniq) transformWithCounts(
 	if !inrecAndContext.EndOfStream {
 		inrec := inrecAndContext.Record
 
-		groupingKey, selectedValues, ok := inrec.GetSelectedValuesAndJoined(tr.fieldNames)
+		fieldNamesForGrouping := tr.getFieldNamesForGrouping(inrec)
+
+		groupingKey, selectedValues, ok := inrec.GetSelectedValuesAndJoined(fieldNamesForGrouping)
 		if ok {
 			iCount, present := tr.countsByGroup.GetWithCheck(groupingKey)
 			if !present {
 				tr.countsByGroup.Put(groupingKey, int64(1))
 				tr.valuesByGroup.Put(groupingKey, selectedValues)
+				tr.keysByGroup.Put(groupingKey, fieldNamesForGrouping)
 			} else {
 				tr.countsByGroup.Put(groupingKey, iCount.(int64)+1)
 			}
 		}
 
 	} else { // end of record stream
-
 		for pa := tr.countsByGroup.Head; pa != nil; pa = pa.Next {
 			outrec := mlrval.NewMlrmapAsRecord()
 			valuesForGroup := tr.valuesByGroup.Get(pa.Key).([]*mlrval.Mlrval)
-			for i, fieldName := range tr.fieldNames {
+			keysForGroup := tr.keysByGroup.Get(pa.Key).([]string)
+
+			for i, fieldNameForGrouping := range keysForGroup {
 				outrec.PutCopy(
-					fieldName,
+					fieldNameForGrouping,
 					valuesForGroup[i],
 				)
 			}
+
 			if tr.showCounts {
 				outrec.PutReference(
 					tr.outputFieldName,
@@ -573,7 +609,7 @@ func (tr *TransformerUniq) transformWithoutCounts(
 	if !inrecAndContext.EndOfStream {
 		inrec := inrecAndContext.Record
 
-		groupingKey, selectedValues, ok := inrec.GetSelectedValuesAndJoined(tr.fieldNames)
+		groupingKey, selectedValues, ok := inrec.GetSelectedValuesAndJoined(tr.getFieldNamesForGrouping(inrec))
 		if !ok {
 			return
 		}
@@ -584,9 +620,9 @@ func (tr *TransformerUniq) transformWithoutCounts(
 			tr.valuesByGroup.Put(groupingKey, selectedValues)
 			outrec := mlrval.NewMlrmapAsRecord()
 
-			for i, fieldName := range tr.fieldNames {
+			for i, fieldNameForGrouping := range tr.getFieldNamesForGrouping(inrec) {
 				outrec.PutCopy(
-					fieldName,
+					fieldNameForGrouping,
 					selectedValues[i],
 				)
 			}
diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout
index bdb23ad6c..6dfaf2b80 100644
--- a/test/cases/cli-help/0001/expout
+++ b/test/cases/cli-help/0001/expout
@@ -96,6 +96,7 @@ Same as uniq -c.
 
 Options:
 -f {a,b,c}    Field names for distinct count.
+-x {a,b,c}    Field names to exclude for distinct count: use each record's others instead.
 -n            Show only the number of distinct values. Not compatible with -u.
 -o {name}     Field name for output count. Default "count".
               Ignored with -u.
@@ -1320,6 +1321,7 @@ count-distinct. For uniq, -f is a synonym for -g.
 
 Options:
 -g {d,e,f}    Group-by-field names for uniq counts.
+-x {a,b,c}    Field names to exclude for uniq: use each record's others instead.
 -c            Show repeat counts in addition to unique values.
 -n            Show only the number of distinct values.
 -o {name}     Field name for output count. Default "count".
diff --git a/test/cases/verb-uniq/uniq-c-x-change/cmd b/test/cases/verb-uniq/uniq-c-x-change/cmd
new file mode 100644
index 000000000..2f3418461
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-c-x-change/cmd
@@ -0,0 +1 @@
+mlr --dkvp uniq -c -x flag,k,index,quantity,rate test/input/example-with-changed-keys.dkvp
diff --git a/test/cases/verb-uniq/uniq-c-x-change/experr b/test/cases/verb-uniq/uniq-c-x-change/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-uniq/uniq-c-x-change/expout b/test/cases/verb-uniq/uniq-c-x-change/expout
new file mode 100644
index 000000000..a61ce84e8
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-c-x-change/expout
@@ -0,0 +1,7 @@
+color=yellow,shape=triangle,count=1
+color=red,shape=square,count=2
+weird=red,shape=circle,count=1
+color=purple,shape=triangle,count=2
+color=red,shape=square,odd=77.19910000,count=1
+color=yellow,shape=circle,count=2
+color=purple,shape=square,count=1
diff --git a/test/cases/verb-uniq/uniq-c-x-het/cmd b/test/cases/verb-uniq/uniq-c-x-het/cmd
new file mode 100644
index 000000000..051906fe1
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-c-x-het/cmd
@@ -0,0 +1 @@
+mlr --dkvp uniq -c -x flag,k,index,quantity,rate test/input/example.dkvp
diff --git a/test/cases/verb-uniq/uniq-c-x-het/experr b/test/cases/verb-uniq/uniq-c-x-het/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-uniq/uniq-c-x-het/expout b/test/cases/verb-uniq/uniq-c-x-het/expout
new file mode 100644
index 000000000..5392f140e
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-c-x-het/expout
@@ -0,0 +1,6 @@
+color=yellow,shape=triangle,count=1
+color=red,shape=square,count=3
+color=red,shape=circle,count=1
+color=purple,shape=triangle,count=2
+color=yellow,shape=circle,count=2
+color=purple,shape=square,count=1
diff --git a/test/cases/verb-uniq/uniq-c-x-long/cmd b/test/cases/verb-uniq/uniq-c-x-long/cmd
new file mode 100644
index 000000000..38fe9e5c3
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-c-x-long/cmd
@@ -0,0 +1 @@
+mlr --dkvp uniq -c -x flag,k,index,quantity,rate test/input/example-with-extra-keys.dkvp
diff --git a/test/cases/verb-uniq/uniq-c-x-long/experr b/test/cases/verb-uniq/uniq-c-x-long/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-uniq/uniq-c-x-long/expout b/test/cases/verb-uniq/uniq-c-x-long/expout
new file mode 100644
index 000000000..d77e08b27
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-c-x-long/expout
@@ -0,0 +1,7 @@
+color=yellow,shape=triangle,count=1
+color=red,shape=square,count=3
+color=red,shape=circle,count=1
+color=purple,shape=triangle,extra=here,count=1
+color=purple,shape=triangle,count=1
+color=yellow,shape=circle,count=2
+color=purple,shape=square,count=1
diff --git a/test/cases/verb-uniq/uniq-c-x-short/cmd b/test/cases/verb-uniq/uniq-c-x-short/cmd
new file mode 100644
index 000000000..9561cc361
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-c-x-short/cmd
@@ -0,0 +1 @@
+mlr --dkvp uniq -c -x flag,k,index,quantity,rate test/input/example-with-missing-keys.dkvp
diff --git a/test/cases/verb-uniq/uniq-c-x-short/experr b/test/cases/verb-uniq/uniq-c-x-short/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-uniq/uniq-c-x-short/expout b/test/cases/verb-uniq/uniq-c-x-short/expout
new file mode 100644
index 000000000..7e7269533
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-c-x-short/expout
@@ -0,0 +1,6 @@
+color=yellow,shape=triangle,count=1
+color=red,shape=square,count=3
+shape=circle,count=1
+color=purple,shape=triangle,count=2
+color=yellow,shape=circle,count=2
+color=purple,shape=square,count=1
diff --git a/test/cases/verb-uniq/uniq-x-change/cmd b/test/cases/verb-uniq/uniq-x-change/cmd
new file mode 100644
index 000000000..43006f390
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-x-change/cmd
@@ -0,0 +1 @@
+mlr --dkvp uniq -x flag,k,index,quantity,rate test/input/example-with-changed-keys.dkvp
diff --git a/test/cases/verb-uniq/uniq-x-change/experr b/test/cases/verb-uniq/uniq-x-change/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-uniq/uniq-x-change/expout b/test/cases/verb-uniq/uniq-x-change/expout
new file mode 100644
index 000000000..67f9598af
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-x-change/expout
@@ -0,0 +1,7 @@
+color=yellow,shape=triangle
+color=red,shape=square
+weird=red,shape=circle
+color=purple,shape=triangle
+color=red,shape=square,odd=77.19910000
+color=yellow,shape=circle
+color=purple,shape=square
diff --git a/test/cases/verb-uniq/uniq-x-het/cmd b/test/cases/verb-uniq/uniq-x-het/cmd
new file mode 100644
index 000000000..326412e62
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-x-het/cmd
@@ -0,0 +1 @@
+mlr --dkvp uniq -x flag,k,index,quantity,rate test/input/example.dkvp
diff --git a/test/cases/verb-uniq/uniq-x-het/experr b/test/cases/verb-uniq/uniq-x-het/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-uniq/uniq-x-het/expout b/test/cases/verb-uniq/uniq-x-het/expout
new file mode 100644
index 000000000..ddc9002b1
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-x-het/expout
@@ -0,0 +1,6 @@
+color=yellow,shape=triangle
+color=red,shape=square
+color=red,shape=circle
+color=purple,shape=triangle
+color=yellow,shape=circle
+color=purple,shape=square
diff --git a/test/cases/verb-uniq/uniq-x-long/cmd b/test/cases/verb-uniq/uniq-x-long/cmd
new file mode 100644
index 000000000..bcdfe98e0
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-x-long/cmd
@@ -0,0 +1 @@
+mlr --dkvp uniq -x flag,k,index,quantity,rate test/input/example-with-extra-keys.dkvp
diff --git a/test/cases/verb-uniq/uniq-x-long/experr b/test/cases/verb-uniq/uniq-x-long/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-uniq/uniq-x-long/expout b/test/cases/verb-uniq/uniq-x-long/expout
new file mode 100644
index 000000000..d5b3f26eb
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-x-long/expout
@@ -0,0 +1,7 @@
+color=yellow,shape=triangle
+color=red,shape=square
+color=red,shape=circle
+color=purple,shape=triangle,extra=here
+color=purple,shape=triangle
+color=yellow,shape=circle
+color=purple,shape=square
diff --git a/test/cases/verb-uniq/uniq-x-short/cmd b/test/cases/verb-uniq/uniq-x-short/cmd
new file mode 100644
index 000000000..5c2f73021
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-x-short/cmd
@@ -0,0 +1 @@
+mlr --dkvp uniq -x flag,k,index,quantity,rate test/input/example-with-missing-keys.dkvp
diff --git a/test/cases/verb-uniq/uniq-x-short/experr b/test/cases/verb-uniq/uniq-x-short/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-uniq/uniq-x-short/expout b/test/cases/verb-uniq/uniq-x-short/expout
new file mode 100644
index 000000000..b566d5536
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-x-short/expout
@@ -0,0 +1,6 @@
+color=yellow,shape=triangle
+color=red,shape=square
+shape=circle
+color=purple,shape=triangle
+color=yellow,shape=circle
+color=purple,shape=square
diff --git a/test/input/example-with-changed-keys.dkvp b/test/input/example-with-changed-keys.dkvp
new file mode 100644
index 000000000..4ec2ac863
--- /dev/null
+++ b/test/input/example-with-changed-keys.dkvp
@@ -0,0 +1,10 @@
+color=yellow,shape=triangle,flag=true,k=1,index=11,quantity=43.6498,rate=9.8870
+color=red,shape=square,flag=true,k=2,index=15,quantity=79.2778,rate=0.0130
+weird=red,shape=circle,flag=true,k=3,index=16,quantity=13.8103,rate=2.9010
+color=red,shape=square,flag=false,k=4,index=48,quantity=77.5542,rate=7.4670
+color=purple,shape=triangle,flag=false,k=5,index=51,quantity=81.2290,rate=8.5910
+color=red,shape=square,flag=false,k=6,index=64,odd=77.1991,rate=9.5310
+color=purple,shape=triangle,flag=false,k=7,index=65,quantity=80.1405,rate=5.8240
+color=yellow,shape=circle,flag=true,k=8,index=73,quantity=63.9785,rate=4.2370
+color=yellow,shape=circle,flag=true,k=9,index=87,quantity=63.5058,rate=8.3350
+color=purple,shape=square,flag=false,k=10,index=91,quantity=72.3735,rate=8.2430
diff --git a/test/input/example-with-extra-keys.dkvp b/test/input/example-with-extra-keys.dkvp
new file mode 100644
index 000000000..54ecf74e2
--- /dev/null
+++ b/test/input/example-with-extra-keys.dkvp
@@ -0,0 +1,10 @@
+color=yellow,shape=triangle,flag=true,k=1,index=11,quantity=43.6498,rate=9.8870
+color=red,shape=square,flag=true,k=2,index=15,quantity=79.2778,rate=0.0130
+color=red,shape=circle,flag=true,k=3,index=16,quantity=13.8103,rate=2.9010
+color=red,shape=square,flag=false,k=4,index=48,quantity=77.5542,rate=7.4670
+color=purple,shape=triangle,flag=false,k=5,index=51,quantity=81.2290,rate=8.5910,extra=here
+color=red,shape=square,flag=false,k=6,index=64,quantity=77.1991,rate=9.5310
+color=purple,shape=triangle,flag=false,k=7,index=65,quantity=80.1405,rate=5.8240
+color=yellow,shape=circle,flag=true,k=8,index=73,quantity=63.9785,rate=4.2370
+color=yellow,shape=circle,flag=true,k=9,index=87,quantity=63.5058,rate=8.3350
+color=purple,shape=square,flag=false,k=10,index=91,quantity=72.3735,rate=8.2430
diff --git a/test/input/example-with-missing-keys.dkvp b/test/input/example-with-missing-keys.dkvp
new file mode 100644
index 000000000..ae8632ec0
--- /dev/null
+++ b/test/input/example-with-missing-keys.dkvp
@@ -0,0 +1,10 @@
+color=yellow,shape=triangle,flag=true,k=1,index=11,quantity=43.6498,rate=9.8870
+color=red,shape=square,flag=true,k=2,index=15,quantity=79.2778,rate=0.0130
+shape=circle,flag=true,k=3,index=16,quantity=13.8103,rate=2.9010
+color=red,shape=square,flag=false,k=4,index=48,quantity=77.5542,rate=7.4670
+color=purple,shape=triangle,flag=false,index=51,quantity=81.2290,rate=8.5910
+color=red,shape=square,flag=false,k=6,index=64,quantity=77.1991,rate=9.5310
+color=purple,shape=triangle,flag=false,k=7,index=65,quantity=80.1405,rate=5.8240
+color=yellow,shape=circle,flag=true,k=8,index=73,quantity=63.9785,rate=4.2370
+color=yellow,shape=circle,flag=true,k=9,index=87,quantity=63.5058,rate=8.3350
+color=purple,shape=square,flag=false,k=10,index=91,quantity=72.3735,rate=8.2430
diff --git a/test/input/example.dkvp b/test/input/example.dkvp
new file mode 100644
index 000000000..73bc10242
--- /dev/null
+++ b/test/input/example.dkvp
@@ -0,0 +1,10 @@
+color=yellow,shape=triangle,flag=true,k=1,index=11,quantity=43.6498,rate=9.8870
+color=red,shape=square,flag=true,k=2,index=15,quantity=79.2778,rate=0.0130
+color=red,shape=circle,flag=true,k=3,index=16,quantity=13.8103,rate=2.9010
+color=red,shape=square,flag=false,k=4,index=48,quantity=77.5542,rate=7.4670
+color=purple,shape=triangle,flag=false,k=5,index=51,quantity=81.2290,rate=8.5910
+color=red,shape=square,flag=false,k=6,index=64,quantity=77.1991,rate=9.5310
+color=purple,shape=triangle,flag=false,k=7,index=65,quantity=80.1405,rate=5.8240
+color=yellow,shape=circle,flag=true,k=8,index=73,quantity=63.9785,rate=4.2370
+color=yellow,shape=circle,flag=true,k=9,index=87,quantity=63.5058,rate=8.3350
+color=purple,shape=square,flag=false,k=10,index=91,quantity=72.3735,rate=8.2430

From e3b98cd62188cd67e93dcaa0925c0876f461daa9 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sun, 24 Dec 2023 12:43:26 -0500
Subject: [PATCH 103/456] On-line help info for `mlr join --lk ""` (#1458)

* Doc info for `mlr join --lk ""`

* make dev output
---
 docs/src/manpage.md             | 2 ++
 docs/src/manpage.txt            | 2 ++
 docs/src/reference-verbs.md     | 2 ++
 man/manpage.txt                 | 2 ++
 man/mlr.1                       | 2 ++
 pkg/transformers/join.go        | 2 ++
 test/cases/cli-help/0001/expout | 2 ++
 7 files changed, 14 insertions(+)

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 28182f146..47587dacb 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -1338,6 +1338,8 @@ MILLER(1)                                                            MILLER(1)
          --lk|--left-keep-field-names {a,b,c} If supplied, this means keep only the specified field
                       names from the left file. Automatically includes the join-field name(s). Helpful
                       for when you only want a limited subset of information from the left file.
+                      Tip: you can use --lk "": this means the left file becomes solely a row-selector
+                      for the input files.
          --lp {text}  Additional prefix for non-join output field names from
                       the left file
          --rp {text}  Additional prefix for non-join output field names from
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt
index 4262cc6c7..8f6810bf6 100644
--- a/docs/src/manpage.txt
+++ b/docs/src/manpage.txt
@@ -1317,6 +1317,8 @@ MILLER(1)                                                            MILLER(1)
          --lk|--left-keep-field-names {a,b,c} If supplied, this means keep only the specified field
                       names from the left file. Automatically includes the join-field name(s). Helpful
                       for when you only want a limited subset of information from the left file.
+                      Tip: you can use --lk "": this means the left file becomes solely a row-selector
+                      for the input files.
          --lp {text}  Additional prefix for non-join output field names from
                       the left file
          --rp {text}  Additional prefix for non-join output field names from
diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md
index 9a50a1dbb..9e5fff6df 100644
--- a/docs/src/reference-verbs.md
+++ b/docs/src/reference-verbs.md
@@ -1671,6 +1671,8 @@ Options:
   --lk|--left-keep-field-names {a,b,c} If supplied, this means keep only the specified field
                names from the left file. Automatically includes the join-field name(s). Helpful
                for when you only want a limited subset of information from the left file.
+               Tip: you can use --lk "": this means the left file becomes solely a row-selector
+               for the input files.
   --lp {text}  Additional prefix for non-join output field names from
                the left file
   --rp {text}  Additional prefix for non-join output field names from
diff --git a/man/manpage.txt b/man/manpage.txt
index 4262cc6c7..8f6810bf6 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -1317,6 +1317,8 @@ MILLER(1)                                                            MILLER(1)
          --lk|--left-keep-field-names {a,b,c} If supplied, this means keep only the specified field
                       names from the left file. Automatically includes the join-field name(s). Helpful
                       for when you only want a limited subset of information from the left file.
+                      Tip: you can use --lk "": this means the left file becomes solely a row-selector
+                      for the input files.
          --lp {text}  Additional prefix for non-join output field names from
                       the left file
          --rp {text}  Additional prefix for non-join output field names from
diff --git a/man/mlr.1 b/man/mlr.1
index 04c2151e3..ae47df883 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -1662,6 +1662,8 @@ Options:
   --lk|--left-keep-field-names {a,b,c} If supplied, this means keep only the specified field
                names from the left file. Automatically includes the join-field name(s). Helpful
                for when you only want a limited subset of information from the left file.
+               Tip: you can use --lk "": this means the left file becomes solely a row-selector
+               for the input files.
   --lp {text}  Additional prefix for non-join output field names from
                the left file
   --rp {text}  Additional prefix for non-join output field names from
diff --git a/pkg/transformers/join.go b/pkg/transformers/join.go
index 3d8547a12..deeea1623 100644
--- a/pkg/transformers/join.go
+++ b/pkg/transformers/join.go
@@ -92,6 +92,8 @@ func transformerJoinUsage(
 	fmt.Fprintf(o, "  --lk|--left-keep-field-names {a,b,c} If supplied, this means keep only the specified field\n")
 	fmt.Fprintf(o, "               names from the left file. Automatically includes the join-field name(s). Helpful\n")
 	fmt.Fprintf(o, "               for when you only want a limited subset of information from the left file.\n")
+	fmt.Fprintf(o, "               Tip: you can use --lk \"\": this means the left file becomes solely a row-selector\n")
+	fmt.Fprintf(o, "               for the input files.\n")
 	fmt.Fprintf(o, "  --lp {text}  Additional prefix for non-join output field names from\n")
 	fmt.Fprintf(o, "               the left file\n")
 	fmt.Fprintf(o, "  --rp {text}  Additional prefix for non-join output field names from\n")
diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout
index 6dfaf2b80..b25e4a56d 100644
--- a/test/cases/cli-help/0001/expout
+++ b/test/cases/cli-help/0001/expout
@@ -467,6 +467,8 @@ Options:
   --lk|--left-keep-field-names {a,b,c} If supplied, this means keep only the specified field
                names from the left file. Automatically includes the join-field name(s). Helpful
                for when you only want a limited subset of information from the left file.
+               Tip: you can use --lk "": this means the left file becomes solely a row-selector
+               for the input files.
   --lp {text}  Additional prefix for non-join output field names from
                the left file
   --rp {text}  Additional prefix for non-join output field names from

From 2f42c6f508b770e99b8048c40b20470c40dbf845 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Mon, 1 Jan 2024 15:50:56 -0700
Subject: [PATCH 104/456] Fix #1462: remove limit of 1000 on dedupe field names
 (#1463)

* Fix #1462: remove limit of 1000 on dedupe field names

* make dev output
---
 docs/src/manpage.md            | 2 +-
 docs/src/manpage.txt           | 2 +-
 man/manpage.txt                | 2 +-
 man/mlr.1                      | 4 ++--
 pkg/mlrval/mlrmap_accessors.go | 3 +--
 5 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 47587dacb..c4a65ea40 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -3689,5 +3689,5 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-12-23                         MILLER(1)
+                                  2024-01-01                         MILLER(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 8f6810bf6..fe77c5672 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -3668,4 +3668,4 @@ MILLER(1) MILLER(1) - 2023-12-23 MILLER(1) + 2024-01-01 MILLER(1) diff --git a/man/manpage.txt b/man/manpage.txt index 8f6810bf6..fe77c5672 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -3668,4 +3668,4 @@ MILLER(1) MILLER(1) - 2023-12-23 MILLER(1) + 2024-01-01 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index ae47df883..c6c5c540f 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2023-12-23 +.\" Date: 2024-01-01 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2023-12-23" "\ \&" "\ \&" +.TH "MILLER" "1" "2024-01-01" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pkg/mlrval/mlrmap_accessors.go b/pkg/mlrval/mlrmap_accessors.go index 8540ac5dc..0ba61fda5 100644 --- a/pkg/mlrval/mlrmap_accessors.go +++ b/pkg/mlrval/mlrmap_accessors.go @@ -74,7 +74,7 @@ func (mlrmap *Mlrmap) PutReferenceMaybeDedupe(key string, value *Mlrval, dedupe return key, nil } - for i := 2; i < 1000; i++ { + for i := 2; ; i++ { newKey := key + "_" + strconv.Itoa(i) pe := mlrmap.findEntry(newKey) if pe == nil { @@ -82,7 +82,6 @@ func (mlrmap *Mlrmap) PutReferenceMaybeDedupe(key string, value *Mlrval, dedupe return newKey, nil } } - return key, fmt.Errorf("record has too many input fields named \"%s\"", key) } // PutCopy copies the key and value (deep-copying in case the value is array/map). From d2559b8387725a7429650bf543ef090e14da7d88 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 1 Jan 2024 16:39:27 -0700 Subject: [PATCH 105/456] Have `clean_whitespace` re-run type inference (#1464) * Have `clean_whitespace` re-infer types * make dev output * unit-test files * drive-by typofix * make dev output --- docs/src/manpage.md | 4 ++-- docs/src/manpage.txt | 4 ++-- docs/src/reference-dsl-builtin-functions.md | 4 ++-- man/manpage.txt | 4 ++-- man/mlr.1 | 4 ++-- pkg/bifs/strings.go | 3 ++- pkg/dsl/cst/builtin_function_manager.go | 4 ++-- test/cases/dsl-clean-whitespace/0010/cmd | 1 + test/cases/dsl-clean-whitespace/0010/experr | 0 test/cases/dsl-clean-whitespace/0010/expout | 18 ++++++++++++++++++ test/cases/dsl-clean-whitespace/0010/input.csv | 3 +++ test/cases/dsl-clean-whitespace/0010/mlr | 2 ++ 12 files changed, 38 insertions(+), 13 deletions(-) create mode 100644 test/cases/dsl-clean-whitespace/0010/cmd create mode 100644 test/cases/dsl-clean-whitespace/0010/experr create mode 100644 test/cases/dsl-clean-whitespace/0010/expout create mode 100644 test/cases/dsl-clean-whitespace/0010/input.csv create mode 100644 test/cases/dsl-clean-whitespace/0010/mlr diff --git a/docs/src/manpage.md b/docs/src/manpage.md index c4a65ea40..8d2532969 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -2312,7 +2312,7 @@ MILLER(1) MILLER(1) (class=math #args=1) Ceiling: nearest integer at or above. 1mclean_whitespace0m - (class=string #args=1) Same as collapse_whitespace and strip. + (class=string #args=1) Same as collapse_whitespace and strip, followed by type inference. 1mcollapse_whitespace0m (class=string #args=1) Strip repeated whitespace from string. @@ -3011,7 +3011,7 @@ MILLER(1) MILLER(1) strmatch(12345, "34") is true 1mstrmatchx0m - (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here. + (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in contrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here. Examples: strmatchx("a", "abc") returns: { diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index fe77c5672..915a1b727 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -2291,7 +2291,7 @@ MILLER(1) MILLER(1) (class=math #args=1) Ceiling: nearest integer at or above. 1mclean_whitespace0m - (class=string #args=1) Same as collapse_whitespace and strip. + (class=string #args=1) Same as collapse_whitespace and strip, followed by type inference. 1mcollapse_whitespace0m (class=string #args=1) Strip repeated whitespace from string. @@ -2990,7 +2990,7 @@ MILLER(1) MILLER(1) strmatch(12345, "34") is true 1mstrmatchx0m - (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here. + (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in contrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here. Examples: strmatchx("a", "abc") returns: { diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index f3b8efdef..3a55821f3 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -1209,7 +1209,7 @@ capitalize (class=string #args=1) Convert string's first character to uppercase ### clean_whitespace
-clean_whitespace  (class=string #args=1) Same as collapse_whitespace and strip.
+clean_whitespace  (class=string #args=1) Same as collapse_whitespace and strip, followed by type inference.
 
@@ -1364,7 +1364,7 @@ strmatch(12345, "34") is true ### strmatchx
-strmatchx  (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
+strmatchx  (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in contrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
 Examples:
 strmatchx("a", "abc") returns:
   {
diff --git a/man/manpage.txt b/man/manpage.txt
index fe77c5672..915a1b727 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -2291,7 +2291,7 @@ MILLER(1)                                                            MILLER(1)
         (class=math #args=1) Ceiling: nearest integer at or above.
 
    1mclean_whitespace0m
-        (class=string #args=1) Same as collapse_whitespace and strip.
+        (class=string #args=1) Same as collapse_whitespace and strip, followed by type inference.
 
    1mcollapse_whitespace0m
         (class=string #args=1) Strip repeated whitespace from string.
@@ -2990,7 +2990,7 @@ MILLER(1)                                                            MILLER(1)
        strmatch(12345, "34") is true
 
    1mstrmatchx0m
-        (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
+        (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in contrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
        Examples:
        strmatchx("a", "abc") returns:
          {
diff --git a/man/mlr.1 b/man/mlr.1
index c6c5c540f..28940393c 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -3100,7 +3100,7 @@ Map example: apply({"a":1, "b":3, "c":5}, func(k,v) {return {toupper(k): v ** 2}
 .RS 0
 .\}
 .nf
- (class=string #args=1) Same as collapse_whitespace and strip.
+ (class=string #args=1) Same as collapse_whitespace and strip, followed by type inference.
 .fi
 .if n \{\
 .RE
@@ -4675,7 +4675,7 @@ strmatch(12345, "34") is true
 .RS 0
 .\}
 .nf
- (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \e1, \e2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \e1 through \e9, an arbitrary number are supported here.
+ (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \e1, \e2, etc. are not set, in contrast to the `=~` operator. As well, while the `=~` operator limits matches to \e1 through \e9, an arbitrary number are supported here.
 Examples:
 strmatchx("a", "abc") returns:
   {
diff --git a/pkg/bifs/strings.go b/pkg/bifs/strings.go
index cd68ee480..e77de7c68 100644
--- a/pkg/bifs/strings.go
+++ b/pkg/bifs/strings.go
@@ -344,11 +344,12 @@ func BIF_capitalize(input1 *mlrval.Mlrval) *mlrval.Mlrval {
 
 // ----------------------------------------------------------------
 func BIF_clean_whitespace(input1 *mlrval.Mlrval) *mlrval.Mlrval {
-	return BIF_strip(
+	mv := BIF_strip(
 		BIF_collapse_whitespace_regexp(
 			input1, _whitespace_regexp,
 		),
 	)
+	return mlrval.FromInferredType(mv.String())
 }
 
 // ================================================================
diff --git a/pkg/dsl/cst/builtin_function_manager.go b/pkg/dsl/cst/builtin_function_manager.go
index c55f9edd9..965c9529b 100644
--- a/pkg/dsl/cst/builtin_function_manager.go
+++ b/pkg/dsl/cst/builtin_function_manager.go
@@ -355,7 +355,7 @@ used within subsequent DSL statements. See also "Regular expressions" at ` + lib
 		{
 			name:  "strmatchx",
 			class: FUNC_CLASS_STRING,
-			help:  `Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the ` + "`=~` operator. As well, while the `=~` operator limits matches to \\1 through \\9, an arbitrary number are supported here.",
+			help:  `Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in contrast to the ` + "`=~` operator. As well, while the `=~` operator limits matches to \\1 through \\9, an arbitrary number are supported here.",
 			examples: []string{
 				`strmatchx("a", "abc") returns:`,
 				`  {`,
@@ -444,7 +444,7 @@ used within subsequent DSL statements. See also "Regular expressions" at ` + lib
 		{
 			name:      "clean_whitespace",
 			class:     FUNC_CLASS_STRING,
-			help:      "Same as collapse_whitespace and strip.",
+			help:      "Same as collapse_whitespace and strip, followed by type inference.",
 			unaryFunc: bifs.BIF_clean_whitespace,
 		},
 
diff --git a/test/cases/dsl-clean-whitespace/0010/cmd b/test/cases/dsl-clean-whitespace/0010/cmd
new file mode 100644
index 000000000..2fd915d02
--- /dev/null
+++ b/test/cases/dsl-clean-whitespace/0010/cmd
@@ -0,0 +1 @@
+mlr --icsv --ojson clean-whitespace then put -f ${CASEDIR}/mlr ${CASEDIR}/input.csv
diff --git a/test/cases/dsl-clean-whitespace/0010/experr b/test/cases/dsl-clean-whitespace/0010/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-clean-whitespace/0010/expout b/test/cases/dsl-clean-whitespace/0010/expout
new file mode 100644
index 000000000..db3fe878d
--- /dev/null
+++ b/test/cases/dsl-clean-whitespace/0010/expout
@@ -0,0 +1,18 @@
+[
+{
+  "a": 1,
+  "b": 2,
+  "c": 3,
+  "d": 4,
+  "e": 9,
+  "t": "int"
+},
+{
+  "a": 5,
+  "b": 6,
+  "c": 7,
+  "d": 8,
+  "e": 13,
+  "t": "int"
+}
+]
diff --git a/test/cases/dsl-clean-whitespace/0010/input.csv b/test/cases/dsl-clean-whitespace/0010/input.csv
new file mode 100644
index 000000000..432037239
--- /dev/null
+++ b/test/cases/dsl-clean-whitespace/0010/input.csv
@@ -0,0 +1,3 @@
+a, b, c, d
+1, 2, 3, 4
+5, 6, 7, 8
diff --git a/test/cases/dsl-clean-whitespace/0010/mlr b/test/cases/dsl-clean-whitespace/0010/mlr
new file mode 100644
index 000000000..e51c30c8b
--- /dev/null
+++ b/test/cases/dsl-clean-whitespace/0010/mlr
@@ -0,0 +1,2 @@
+$e = $d + 5;
+$t = typeof($d)

From 664a84fadb342fe4b1615edef50806ba719b6e7a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 5 Jan 2024 07:41:39 -0500
Subject: [PATCH 106/456] Bump golang.org/x/term from 0.15.0 to 0.16.0 (#1466)

Bumps [golang.org/x/term](https://github.com/golang/term) from 0.15.0 to 0.16.0.
- [Commits](https://github.com/golang/term/compare/v0.15.0...v0.16.0)

---
updated-dependencies:
- dependency-name: golang.org/x/term
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 4 ++--
 go.sum | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/go.mod b/go.mod
index d6288da32..c5a7600fd 100644
--- a/go.mod
+++ b/go.mod
@@ -26,8 +26,8 @@ require (
 	github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4
 	github.com/pkg/profile v1.7.0
 	github.com/stretchr/testify v1.8.4
-	golang.org/x/sys v0.15.0
-	golang.org/x/term v0.15.0
+	golang.org/x/sys v0.16.0
+	golang.org/x/term v0.16.0
 	golang.org/x/text v0.14.0
 )
 
diff --git a/go.sum b/go.sum
index 25fed86dd..82013f687 100644
--- a/go.sum
+++ b/go.sum
@@ -40,10 +40,10 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc=
-golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/term v0.15.0 h1:y/Oo/a/q3IXu26lQgl04j/gjuBDOBlx7X6Om1j2CPW4=
-golang.org/x/term v0.15.0/go.mod h1:BDl952bC7+uMoWR75FIrCDx79TPU9oHkTZ9yRbYOrX0=
+golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
+golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE=
+golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
 golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=

From f2be82b7bb01f1851bac2573ba29f8fe8cbe38a4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 12 Jan 2024 09:42:38 -0500
Subject: [PATCH 107/456] Bump actions/cache from 3.3.2 to 3.3.3 (#1468)

Bumps [actions/cache](https://github.com/actions/cache) from 3.3.2 to 3.3.3.
- [Release notes](https://github.com/actions/cache/releases)
- [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md)
- [Commits](https://github.com/actions/cache/compare/704facf57e6136b1bc63b828d79edcd491f0ee84...e12d46a63a90f2fae62d114769bbf2a179198b5c)

---
updated-dependencies:
- dependency-name: actions/cache
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 3e69dee50..bb407b8fb 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -29,7 +29,7 @@ jobs:
 
       # https://github.com/marketplace/actions/cache
       - name: Cache Go modules
-        uses: actions/cache@704facf57e6136b1bc63b828d79edcd491f0ee84
+        uses: actions/cache@e12d46a63a90f2fae62d114769bbf2a179198b5c
         with:
           path: |
             ~/.cache/go-build

From 4c0bd62b64cf7b82c10ae551f6e1195ea545040b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Jan 2024 15:24:47 -0500
Subject: [PATCH 108/456] Bump actions/upload-artifact from 4.0.0 to 4.1.0
 (#1469)

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.0.0 to 4.1.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/c7d193f32edcb7bfad88892161225aeda64e9392...1eb3cb2b3e0f29609092a73eb033bb759a334595)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/go.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index b318f6bb9..df5532c4e 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -41,7 +41,7 @@ jobs:
       if: matrix.os == 'windows-latest'
       run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}}
 
-    - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392
+    - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
       with:
         name: mlr-${{matrix.os}}
         path: bin/${{matrix.os}}/*

From ee30154c6f44f687a290ce076297e656adfb1524 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 17 Jan 2024 09:30:49 -0500
Subject: [PATCH 109/456] Bump actions/cache from 3.3.3 to 4.0.0 (#1470)

Bumps [actions/cache](https://github.com/actions/cache) from 3.3.3 to 4.0.0.
- [Release notes](https://github.com/actions/cache/releases)
- [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md)
- [Commits](https://github.com/actions/cache/compare/e12d46a63a90f2fae62d114769bbf2a179198b5c...13aacd865c20de90d75de3b17ebe84f7a17d57d2)

---
updated-dependencies:
- dependency-name: actions/cache
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index bb407b8fb..aa4c73aba 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -29,7 +29,7 @@ jobs:
 
       # https://github.com/marketplace/actions/cache
       - name: Cache Go modules
-        uses: actions/cache@e12d46a63a90f2fae62d114769bbf2a179198b5c
+        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2
         with:
           path: |
             ~/.cache/go-build

From 76408f33588cea735981309ac638bbcc21614b72 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 19 Jan 2024 09:17:18 -0500
Subject: [PATCH 110/456] Bump actions/upload-artifact from 4.1.0 to 4.2.0
 (#1471)

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.1.0 to 4.2.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/1eb3cb2b3e0f29609092a73eb033bb759a334595...694cdabd8bdb0f10b2cea11669e1bf5453eed0a6)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/go.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index df5532c4e..733c6cb48 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -41,7 +41,7 @@ jobs:
       if: matrix.os == 'windows-latest'
       run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}}
 
-    - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
+    - uses: actions/upload-artifact@694cdabd8bdb0f10b2cea11669e1bf5453eed0a6
       with:
         name: mlr-${{matrix.os}}
         path: bin/${{matrix.os}}/*

From 794a754c367507e5c1ebd07502d1c591f592207a Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 20 Jan 2024 12:59:12 -0500
Subject: [PATCH 111/456] Support PPRINT barred input (#1472)

* Support PPRINT barred input

* regression-test files

* output from `make dev`

* doc updates
---
 docs/src/file-formats.md                      |  33 +-
 docs/src/file-formats.md.in                   |   8 +-
 docs/src/manpage.md                           |  16 +-
 docs/src/manpage.txt                          |  16 +-
 docs/src/reference-main-flag-list.md          |   3 +-
 man/manpage.txt                               |  16 +-
 man/mkman.rb                                  |   2 +-
 man/mlr.1                                     |  10 +-
 pkg/cli/option_parse.go                       |  15 +-
 pkg/cli/option_types.go                       |   1 +
 pkg/input/record_reader_csvlite.go            |  20 -
 pkg/input/record_reader_pprint.go             | 462 ++++++++++++++++++
 .../barred-input-headerless/cmd               |   1 +
 .../barred-input-headerless/experr            |   2 +
 .../barred-input-headerless/expout            |   0
 .../barred-input-headerless/should-fail       |   0
 test/cases/io-barred-pprint/barred-input/cmd  |   1 +
 .../io-barred-pprint/barred-input/experr      |   2 +
 .../io-barred-pprint/barred-input/expout      |   0
 .../io-barred-pprint/barred-input/should-fail |   0
 test/input/abixy.tbl                          |  14 +
 21 files changed, 565 insertions(+), 57 deletions(-)
 create mode 100644 pkg/input/record_reader_pprint.go
 create mode 100644 test/cases/io-barred-pprint/barred-input-headerless/cmd
 create mode 100644 test/cases/io-barred-pprint/barred-input-headerless/experr
 create mode 100644 test/cases/io-barred-pprint/barred-input-headerless/expout
 create mode 100644 test/cases/io-barred-pprint/barred-input-headerless/should-fail
 create mode 100644 test/cases/io-barred-pprint/barred-input/cmd
 create mode 100644 test/cases/io-barred-pprint/barred-input/experr
 create mode 100644 test/cases/io-barred-pprint/barred-input/expout
 create mode 100644 test/cases/io-barred-pprint/barred-input/should-fail
 create mode 100644 test/input/abixy.tbl

diff --git a/docs/src/file-formats.md b/docs/src/file-formats.md
index 8611a7a22..2df1004a7 100644
--- a/docs/src/file-formats.md
+++ b/docs/src/file-formats.md
@@ -366,7 +366,7 @@ Note that while Miller is a line-at-a-time processor and retains input lines in
 
 See [Record Heterogeneity](record-heterogeneity.md) for how Miller handles changes of field names within a single data stream.
 
-For output only (this isn't supported in the input-scanner as of 5.0.0) you can use `--barred` with pprint output format:
+Since Miller 5.0.0, you can use `--barred` or `--barred-output` with pprint output format:
 
 
 mlr --opprint --barred cat data/small
@@ -383,6 +383,37 @@ For output only (this isn't supported in the input-scanner as of 5.0.0) you can
 +-----+-----+---+----------+----------+
 
+Since Miller 6.11.0, you can use `--barred-input` with pprint output format: + +
+mlr -o pprint --barred cat data/small | mlr -i pprint --barred-input -o json filter '$b == "pan"'
+
+
+[
+{
+  "a": "pan",
+  "b": "pan",
+  "i": 1,
+  "x": 0.346791,
+  "y": 0.726802
+},
+{
+  "a": "eks",
+  "b": "pan",
+  "i": 2,
+  "x": 0.758679,
+  "y": 0.522151
+},
+{
+  "a": "wye",
+  "b": "pan",
+  "i": 5,
+  "x": 0.573288,
+  "y": 0.863624
+}
+]
+
+ ## Markdown tabular Markdown format looks like this: diff --git a/docs/src/file-formats.md.in b/docs/src/file-formats.md.in index fd624a80e..c64609a00 100644 --- a/docs/src/file-formats.md.in +++ b/docs/src/file-formats.md.in @@ -153,12 +153,18 @@ Note that while Miller is a line-at-a-time processor and retains input lines in See [Record Heterogeneity](record-heterogeneity.md) for how Miller handles changes of field names within a single data stream. -For output only (this isn't supported in the input-scanner as of 5.0.0) you can use `--barred` with pprint output format: +Since Miller 5.0.0, you can use `--barred` or `--barred-output` with pprint output format: GENMD-RUN-COMMAND mlr --opprint --barred cat data/small GENMD-EOF +Since Miller 6.11.0, you can use `--barred-input` with pprint output format: + +GENMD-RUN-COMMAND +mlr -o pprint --barred cat data/small | mlr -i pprint --barred-input -o json filter '$b == "pan"' +GENMD-EOF + ## Markdown tabular Markdown format looks like this: diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 8d2532969..2381322af 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -19,9 +19,7 @@ Quick links: This is simply a copy of what you should see on running `man mlr` at a command prompt, once Miller is installed on your system.
-MILLER(1)                                                            MILLER(1)
-
-
+4mMILLER24m(1)                                                            4mMILLER24m(1)
 
 1mNAME0m
        Miller -- like awk, sed, cut, join, and sort for name-indexed data such
@@ -697,8 +695,10 @@ MILLER(1)                                                            MILLER(1)
 1mPPRINT-ONLY FLAGS0m
        These are flags which are applicable to PPRINT format.
 
-       --barred                 Prints a border around PPRINT output (not available
-                                for input).
+       --barred or --barred-output
+                                Prints a border around PPRINT output.
+       --barred-input           When used in conjunction with --pprint, accepts
+                                barred input.
        --right                  Right-justifies all fields for PPRINT output.
 
 1mPROFILING FLAGS0m
@@ -807,7 +807,7 @@ MILLER(1)                                                            MILLER(1)
                markdown " "    N/A    "\n"
                nidx     " "    N/A    "\n"
                pprint   " "    N/A    "\n"
-               tsv      "  "    N/A    "\n"
+               tsv      "     "    N/A    "\n"
                xtab     "\n"   " "    "\n\n"
 
        --fs {string}            Specify FS for input and output.
@@ -3687,7 +3687,5 @@ MILLER(1)                                                            MILLER(1)
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-
-
-                                  2024-01-01                         MILLER(1)
+                                  2024-01-20                         4mMILLER24m(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 915a1b727..4edeb4b37 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1,6 +1,4 @@ -MILLER(1) MILLER(1) - - +4mMILLER24m(1) 4mMILLER24m(1) 1mNAME0m Miller -- like awk, sed, cut, join, and sort for name-indexed data such @@ -676,8 +674,10 @@ MILLER(1) MILLER(1) 1mPPRINT-ONLY FLAGS0m These are flags which are applicable to PPRINT format. - --barred Prints a border around PPRINT output (not available - for input). + --barred or --barred-output + Prints a border around PPRINT output. + --barred-input When used in conjunction with --pprint, accepts + barred input. --right Right-justifies all fields for PPRINT output. 1mPROFILING FLAGS0m @@ -786,7 +786,7 @@ MILLER(1) MILLER(1) markdown " " N/A "\n" nidx " " N/A "\n" pprint " " N/A "\n" - tsv " " N/A "\n" + tsv " " N/A "\n" xtab "\n" " " "\n\n" --fs {string} Specify FS for input and output. @@ -3666,6 +3666,4 @@ MILLER(1) MILLER(1) MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - - - 2024-01-01 MILLER(1) + 2024-01-20 4mMILLER24m(1) diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index a53d1565e..758f10a37 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -373,7 +373,8 @@ These are flags which are applicable to PPRINT format. **Flags:** -* `--barred`: Prints a border around PPRINT output (not available for input). +* `--barred or --barred-output`: Prints a border around PPRINT output. +* `--barred-input`: When used in conjunction with --pprint, accepts barred input. * `--right`: Right-justifies all fields for PPRINT output. ## Profiling flags diff --git a/man/manpage.txt b/man/manpage.txt index 915a1b727..4edeb4b37 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -1,6 +1,4 @@ -MILLER(1) MILLER(1) - - +4mMILLER24m(1) 4mMILLER24m(1) 1mNAME0m Miller -- like awk, sed, cut, join, and sort for name-indexed data such @@ -676,8 +674,10 @@ MILLER(1) MILLER(1) 1mPPRINT-ONLY FLAGS0m These are flags which are applicable to PPRINT format. - --barred Prints a border around PPRINT output (not available - for input). + --barred or --barred-output + Prints a border around PPRINT output. + --barred-input When used in conjunction with --pprint, accepts + barred input. --right Right-justifies all fields for PPRINT output. 1mPROFILING FLAGS0m @@ -786,7 +786,7 @@ MILLER(1) MILLER(1) markdown " " N/A "\n" nidx " " N/A "\n" pprint " " N/A "\n" - tsv " " N/A "\n" + tsv " " N/A "\n" xtab "\n" " " "\n\n" --fs {string} Specify FS for input and output. @@ -3666,6 +3666,4 @@ MILLER(1) MILLER(1) MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - - - 2024-01-01 MILLER(1) + 2024-01-20 4mMILLER24m(1) diff --git a/man/mkman.rb b/man/mkman.rb index 6b13f5462..325515fe5 100755 --- a/man/mkman.rb +++ b/man/mkman.rb @@ -19,7 +19,7 @@ def main # Live code-generation needs to be using mlr from *this* tree, not from # somewhere else in the PATH. unless File.executable?('../mlr') - $stderr.puts "#{$0}: Need ../../mlr to exist: please check 'make build' in ../.." + $stderr.puts "#{$0}: Need ../mlr to exist: please check 'make build' in ../.." exit 1 end `../mlr --version` diff --git a/man/mlr.1 b/man/mlr.1 index 28940393c..a9367fead 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2024-01-01 +.\" Date: 2024-01-20 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2024-01-01" "\ \&" "\ \&" +.TH "MILLER" "1" "2024-01-20" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -811,8 +811,10 @@ those can be joined with a "-", like "red-bold", "bold-170", "bold-underline", e .nf These are flags which are applicable to PPRINT format. ---barred Prints a border around PPRINT output (not available - for input). +--barred or --barred-output + Prints a border around PPRINT output. +--barred-input When used in conjunction with --pprint, accepts + barred input. --right Right-justifies all fields for PPRINT output. .fi .if n \{\ diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go index 5b8414f5d..00e2c94d6 100644 --- a/pkg/cli/option_parse.go +++ b/pkg/cli/option_parse.go @@ -494,13 +494,24 @@ var PPRINTOnlyFlagSection = FlagSection{ }, { - name: "--barred", - help: "Prints a border around PPRINT output (not available for input).", + name: "--barred", + altNames: []string{"--barred-output"}, + help: "Prints a border around PPRINT output.", parser: func(args []string, argc int, pargi *int, options *TOptions) { options.WriterOptions.BarredPprintOutput = true *pargi += 1 }, }, + + { + name: "--barred-input", + help: "When used in conjunction with --pprint, accepts barred input.", + parser: func(args []string, argc int, pargi *int, options *TOptions) { + options.ReaderOptions.BarredPprintInput = true + options.ReaderOptions.IFS = "|" + *pargi += 1 + }, + }, }, } diff --git a/pkg/cli/option_types.go b/pkg/cli/option_types.go index d959e0c52..859d67a8d 100644 --- a/pkg/cli/option_types.go +++ b/pkg/cli/option_types.go @@ -57,6 +57,7 @@ type TReaderOptions struct { AllowRaggedCSVInput bool CSVLazyQuotes bool CSVTrimLeadingSpace bool + BarredPprintInput bool CommentHandling TCommentHandling CommentString string diff --git a/pkg/input/record_reader_csvlite.go b/pkg/input/record_reader_csvlite.go index 222064358..67d7ebced 100644 --- a/pkg/input/record_reader_csvlite.go +++ b/pkg/input/record_reader_csvlite.go @@ -78,26 +78,6 @@ func NewRecordReaderCSVLite( return reader, nil } -func NewRecordReaderPPRINT( - readerOptions *cli.TReaderOptions, - recordsPerBatch int64, -) (*RecordReaderCSVLite, error) { - reader := &RecordReaderCSVLite{ - readerOptions: readerOptions, - recordsPerBatch: recordsPerBatch, - fieldSplitter: newFieldSplitter(readerOptions), - - useVoidRep: true, - voidRep: "-", - } - if reader.readerOptions.UseImplicitCSVHeader { - reader.recordBatchGetter = getRecordBatchImplicitCSVHeader - } else { - reader.recordBatchGetter = getRecordBatchExplicitCSVHeader - } - return reader, nil -} - func (reader *RecordReaderCSVLite) Read( filenames []string, context types.Context, diff --git a/pkg/input/record_reader_pprint.go b/pkg/input/record_reader_pprint.go new file mode 100644 index 000000000..9a2f279f3 --- /dev/null +++ b/pkg/input/record_reader_pprint.go @@ -0,0 +1,462 @@ +package input + +// Multi-file cases: +// +// a,a a,b c d +// -- FILE1: -- FILE1: -- FILE1: -- FILE1: +// a,b,c a,b,c a,b,c a,b,c +// 1,2,3 1,2,3 1,2,3 1,2,3 +// 4,5,6 4,5,6 4,5,6 4,5,6 +// -- FILE2: -- FILE2: +// a,b,c d,e,f,g a,b,c d,e,f +// 7,8,9 3,4,5,6 7,8,9 3,4,5 +// --OUTPUT: --OUTPUT: --OUTPUT: --OUTPUT: +// a,b,c a,b,c a,b,c a,b,c +// 1,2,3 1,2,3 1,2,3 1,2,3 +// 4,5,6 4,5,6 4,5,6 4,5,6 +// 7,8,9 7,8,9 +// d,e,f,g d,e,f +// 3,4,5,6 3,4,5 + +import ( + "container/list" + "fmt" + "io" + "regexp" + "strconv" + "strings" + + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" +) + +func NewRecordReaderPPRINT( + readerOptions *cli.TReaderOptions, + recordsPerBatch int64, +) (IRecordReader, error) { + if readerOptions.BarredPprintInput { + // Implemented in this file + + // XXX TEMP + readerOptions.IFS = "|" + readerOptions.AllowRepeatIFS = false + + reader := &RecordReaderPprintBarred{ + readerOptions: readerOptions, + recordsPerBatch: recordsPerBatch, + separatorMatcher: regexp.MustCompile(`^\+[-+]*\+`), + fieldSplitter: newFieldSplitter(readerOptions), + } + if reader.readerOptions.UseImplicitCSVHeader { + reader.recordBatchGetter = getRecordBatchImplicitPprintHeader + } else { + reader.recordBatchGetter = getRecordBatchExplicitPprintHeader + } + return reader, nil + + } else { + // Use the CSVLite record-reader, which is implemented in another file, + // with multiple spaces instead of commas + reader := &RecordReaderCSVLite{ + readerOptions: readerOptions, + recordsPerBatch: recordsPerBatch, + fieldSplitter: newFieldSplitter(readerOptions), + + useVoidRep: true, + voidRep: "-", + } + // XXX RENAME THERE + if reader.readerOptions.UseImplicitCSVHeader { + reader.recordBatchGetter = getRecordBatchImplicitCSVHeader + } else { + reader.recordBatchGetter = getRecordBatchExplicitCSVHeader + } + return reader, nil + } +} + +type RecordReaderPprintBarred struct { + readerOptions *cli.TReaderOptions + recordsPerBatch int64 // distinct from readerOptions.RecordsPerBatch for join/repl + + separatorMatcher *regexp.Regexp + fieldSplitter iFieldSplitter + recordBatchGetter recordBatchGetterPprint + + inputLineNumber int64 + headerStrings []string +} + +// recordBatchGetterPprint points to either an explicit-PPRINT-header or +// implicit-PPRINT-header record-batch getter. +type recordBatchGetterPprint func( + reader *RecordReaderPprintBarred, + linesChannel <-chan *list.List, + filename string, + context *types.Context, + errorChannel chan error, +) ( + recordsAndContexts *list.List, + eof bool, +) + +func (reader *RecordReaderPprintBarred) Read( + filenames []string, + context types.Context, + readerChannel chan<- *list.List, // list of *types.RecordAndContext + errorChannel chan error, + downstreamDoneChannel <-chan bool, // for mlr head +) { + if filenames != nil { // nil for mlr -n + if len(filenames) == 0 { // read from stdin + handle, err := lib.OpenStdin( + reader.readerOptions.Prepipe, + reader.readerOptions.PrepipeIsRaw, + reader.readerOptions.FileInputEncoding, + ) + if err != nil { + errorChannel <- err + return + } + reader.processHandle( + handle, + "(stdin)", + &context, + readerChannel, + errorChannel, + downstreamDoneChannel, + ) + } else { + for _, filename := range filenames { + handle, err := lib.OpenFileForRead( + filename, + reader.readerOptions.Prepipe, + reader.readerOptions.PrepipeIsRaw, + reader.readerOptions.FileInputEncoding, + ) + if err != nil { + errorChannel <- err + return + } + reader.processHandle( + handle, + filename, + &context, + readerChannel, + errorChannel, + downstreamDoneChannel, + ) + handle.Close() + } + } + } + readerChannel <- types.NewEndOfStreamMarkerList(&context) +} + +func (reader *RecordReaderPprintBarred) processHandle( + handle io.Reader, + filename string, + context *types.Context, + readerChannel chan<- *list.List, // list of *types.RecordAndContext + errorChannel chan error, + downstreamDoneChannel <-chan bool, // for mlr head +) { + context.UpdateForStartOfFile(filename) + reader.inputLineNumber = 0 + reader.headerStrings = nil + + recordsPerBatch := reader.recordsPerBatch + lineScanner := NewLineScanner(handle, reader.readerOptions.IRS) + linesChannel := make(chan *list.List, recordsPerBatch) + go channelizedLineScanner(lineScanner, linesChannel, downstreamDoneChannel, recordsPerBatch) + + for { + recordsAndContexts, eof := reader.recordBatchGetter(reader, linesChannel, filename, context, errorChannel) + if recordsAndContexts.Len() > 0 { + readerChannel <- recordsAndContexts + } + if eof { + break + } + } +} + +func getRecordBatchExplicitPprintHeader( + reader *RecordReaderPprintBarred, + linesChannel <-chan *list.List, + filename string, + context *types.Context, + errorChannel chan error, +) ( + recordsAndContexts *list.List, + eof bool, +) { + recordsAndContexts = list.New() + dedupeFieldNames := reader.readerOptions.DedupeFieldNames + + lines, more := <-linesChannel + if !more { + return recordsAndContexts, true + } + + for e := lines.Front(); e != nil; e = e.Next() { + line := e.Value.(string) + + reader.inputLineNumber++ + + // Check for comments-in-data feature + // TODO: function-pointer this away + if reader.readerOptions.CommentHandling != cli.CommentsAreData { + if strings.HasPrefix(line, reader.readerOptions.CommentString) { + if reader.readerOptions.CommentHandling == cli.PassComments { + recordsAndContexts.PushBack(types.NewOutputString(line+"\n", context)) + continue + } else if reader.readerOptions.CommentHandling == cli.SkipComments { + continue + } + // else comments are data + } + } + + if line == "" { + // Reset to new schema + reader.headerStrings = nil + continue + } + + // Example input: + // +-----+-----+----+---------------------+---------------------+ + // | a | b | i | x | y | + // +-----+-----+----+---------------------+---------------------+ + // | pan | pan | 1 | 0.3467901443380824 | 0.7268028627434533 | + // | eks | pan | 2 | 0.7586799647899636 | 0.5221511083334797 | + // +-----+-----+----+---------------------+---------------------+ + + // Skip lines like + // +-----+-----+----+---------------------+---------------------+ + if reader.separatorMatcher.MatchString(line) { + continue + } + + // Skip the leading and trailing pipes + paddedFields := reader.fieldSplitter.Split(line) + npad := len(paddedFields) + fields := make([]string, npad-2) + for i, _ := range paddedFields { + if i == 0 || i == npad-1 { + continue + } + fields[i-1] = strings.TrimSpace(paddedFields[i]) + } + + if reader.headerStrings == nil { + reader.headerStrings = fields + // Get data lines on subsequent loop iterations + } else { + if !reader.readerOptions.AllowRaggedCSVInput && len(reader.headerStrings) != len(fields) { + err := fmt.Errorf( + "mlr: PPRINT-barred header/data length mismatch %d != %d "+ + "at filename %s line %d.\n", + len(reader.headerStrings), len(fields), filename, reader.inputLineNumber, + ) + errorChannel <- err + return + } + + record := mlrval.NewMlrmapAsRecord() + if !reader.readerOptions.AllowRaggedCSVInput { + for i, field := range fields { + value := mlrval.FromDeferredType(field) + _, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } + } + } else { + nh := int64(len(reader.headerStrings)) + nd := int64(len(fields)) + n := lib.IntMin2(nh, nd) + var i int64 + for i = 0; i < n; i++ { + field := fields[i] + value := mlrval.FromDeferredType(field) + _, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } + } + if nh < nd { + // if header shorter than data: use 1-up itoa keys + for i = nh; i < nd; i++ { + key := strconv.FormatInt(i+1, 10) + value := mlrval.FromDeferredType(fields[i]) + _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } + } + } + if nh > nd { + // if header longer than data: use "" values + for i = nd; i < nh; i++ { + record.PutCopy(reader.headerStrings[i], mlrval.VOID) + } + } + } + + context.UpdateForInputRecord() + recordsAndContexts.PushBack(types.NewRecordAndContext(record, context)) + + } + } + + return recordsAndContexts, false +} + +func getRecordBatchImplicitPprintHeader( + reader *RecordReaderPprintBarred, + linesChannel <-chan *list.List, + filename string, + context *types.Context, + errorChannel chan error, +) ( + recordsAndContexts *list.List, + eof bool, +) { + recordsAndContexts = list.New() + dedupeFieldNames := reader.readerOptions.DedupeFieldNames + + lines, more := <-linesChannel + if !more { + return recordsAndContexts, true + } + + for e := lines.Front(); e != nil; e = e.Next() { + line := e.Value.(string) + + reader.inputLineNumber++ + + // Check for comments-in-data feature + // TODO: function-pointer this away + if reader.readerOptions.CommentHandling != cli.CommentsAreData { + if strings.HasPrefix(line, reader.readerOptions.CommentString) { + if reader.readerOptions.CommentHandling == cli.PassComments { + recordsAndContexts.PushBack(types.NewOutputString(line+"\n", context)) + continue + } else if reader.readerOptions.CommentHandling == cli.SkipComments { + continue + } + // else comments are data + } + } + + if line == "" { + // Reset to new schema + reader.headerStrings = nil + continue + } + + // Example input: + // +-----+-----+----+---------------------+---------------------+ + // | a | b | i | x | y | + // +-----+-----+----+---------------------+---------------------+ + // | pan | pan | 1 | 0.3467901443380824 | 0.7268028627434533 | + // | eks | pan | 2 | 0.7586799647899636 | 0.5221511083334797 | + // +-----+-----+----+---------------------+---------------------+ + + // Skip lines like + // +-----+-----+----+---------------------+---------------------+ + if reader.separatorMatcher.MatchString(line) { + continue + } + + // Skip the leading and trailing pipes + paddedFields := reader.fieldSplitter.Split(line) + npad := len(paddedFields) + fields := make([]string, npad-2) + for i, _ := range paddedFields { + if i == 0 || i == npad-1 { + continue + } + fields[i-1] = strings.TrimSpace(paddedFields[i]) + } + + if reader.headerStrings == nil { + n := len(fields) + reader.headerStrings = make([]string, n) + for i := 0; i < n; i++ { + reader.headerStrings[i] = strconv.Itoa(i + 1) + } + } else { + if !reader.readerOptions.AllowRaggedCSVInput && len(reader.headerStrings) != len(fields) { + err := fmt.Errorf( + "mlr: CSV header/data length mismatch %d != %d "+ + "at filename %s line %d.\n", + len(reader.headerStrings), len(fields), filename, reader.inputLineNumber, + ) + errorChannel <- err + return + } + } + + record := mlrval.NewMlrmapAsRecord() + if !reader.readerOptions.AllowRaggedCSVInput { + for i, field := range fields { + value := mlrval.FromDeferredType(field) + _, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } + } + } else { + nh := int64(len(reader.headerStrings)) + nd := int64(len(fields)) + n := lib.IntMin2(nh, nd) + var i int64 + for i = 0; i < n; i++ { + field := fields[i] + value := mlrval.FromDeferredType(field) + _, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } + } + if nh < nd { + // if header shorter than data: use 1-up itoa keys + key := strconv.FormatInt(i+1, 10) + value := mlrval.FromDeferredType(fields[i]) + _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } + } + if nh > nd { + // if header longer than data: use "" values + for i = nd; i < nh; i++ { + _, err := record.PutReferenceMaybeDedupe( + reader.headerStrings[i], + mlrval.VOID.Copy(), + dedupeFieldNames, + ) + if err != nil { + errorChannel <- err + return + } + } + } + } + + context.UpdateForInputRecord() + recordsAndContexts.PushBack(types.NewRecordAndContext(record, context)) + } + + return recordsAndContexts, false +} diff --git a/test/cases/io-barred-pprint/barred-input-headerless/cmd b/test/cases/io-barred-pprint/barred-input-headerless/cmd new file mode 100644 index 000000000..41ebbe671 --- /dev/null +++ b/test/cases/io-barred-pprint/barred-input-headerless/cmd @@ -0,0 +1 @@ +mlr --hi --i pprint --barred-input -o json test/input/abixy.tbl diff --git a/test/cases/io-barred-pprint/barred-input-headerless/experr b/test/cases/io-barred-pprint/barred-input-headerless/experr new file mode 100644 index 000000000..87bf02e26 --- /dev/null +++ b/test/cases/io-barred-pprint/barred-input-headerless/experr @@ -0,0 +1,2 @@ +mlr: option "--i" not recognized. +Please run "mlr --help" for usage information. diff --git a/test/cases/io-barred-pprint/barred-input-headerless/expout b/test/cases/io-barred-pprint/barred-input-headerless/expout new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-barred-pprint/barred-input-headerless/should-fail b/test/cases/io-barred-pprint/barred-input-headerless/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-barred-pprint/barred-input/cmd b/test/cases/io-barred-pprint/barred-input/cmd new file mode 100644 index 000000000..f84126571 --- /dev/null +++ b/test/cases/io-barred-pprint/barred-input/cmd @@ -0,0 +1 @@ +mlr --i pprint --barred-input -o json test/input/abixy.tbl diff --git a/test/cases/io-barred-pprint/barred-input/experr b/test/cases/io-barred-pprint/barred-input/experr new file mode 100644 index 000000000..87bf02e26 --- /dev/null +++ b/test/cases/io-barred-pprint/barred-input/experr @@ -0,0 +1,2 @@ +mlr: option "--i" not recognized. +Please run "mlr --help" for usage information. diff --git a/test/cases/io-barred-pprint/barred-input/expout b/test/cases/io-barred-pprint/barred-input/expout new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-barred-pprint/barred-input/should-fail b/test/cases/io-barred-pprint/barred-input/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/input/abixy.tbl b/test/input/abixy.tbl new file mode 100644 index 000000000..448a68bf0 --- /dev/null +++ b/test/input/abixy.tbl @@ -0,0 +1,14 @@ ++-----+-----+----+---------------------+---------------------+ +| a | b | i | x | y | ++-----+-----+----+---------------------+---------------------+ +| pan | pan | 1 | 0.3467901443380824 | 0.7268028627434533 | +| eks | pan | 2 | 0.7586799647899636 | 0.5221511083334797 | +| wye | wye | 3 | 0.20460330576630303 | 0.33831852551664776 | +| eks | wye | 4 | 0.38139939387114097 | 0.13418874328430463 | +| wye | pan | 5 | 0.5732889198020006 | 0.8636244699032729 | +| zee | pan | 6 | 0.5271261600918548 | 0.49322128674835697 | +| eks | zee | 7 | 0.6117840605678454 | 0.1878849191181694 | +| zee | wye | 8 | 0.5985540091064224 | 0.976181385699006 | +| hat | wye | 9 | 0.03144187646093577 | 0.7495507603507059 | +| pan | wye | 10 | 0.5026260055412137 | 0.9526183602969864 | ++-----+-----+----+---------------------+---------------------+ From aff07efe3aa0d9ee80e0cd6fcf6fa95c401f5bd3 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 20 Jan 2024 13:01:37 -0500 Subject: [PATCH 112/456] typofix --- docs/src/file-formats.md | 2 +- docs/src/file-formats.md.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/file-formats.md b/docs/src/file-formats.md index 2df1004a7..9b0cb6362 100644 --- a/docs/src/file-formats.md +++ b/docs/src/file-formats.md @@ -383,7 +383,7 @@ Since Miller 5.0.0, you can use `--barred` or `--barred-output` with pprint outp +-----+-----+---+----------+----------+
-Since Miller 6.11.0, you can use `--barred-input` with pprint output format: +Since Miller 6.11.0, you can use `--barred-input` with pprint input format:
 mlr -o pprint --barred cat data/small | mlr -i pprint --barred-input -o json filter '$b == "pan"'
diff --git a/docs/src/file-formats.md.in b/docs/src/file-formats.md.in
index c64609a00..8da809fad 100644
--- a/docs/src/file-formats.md.in
+++ b/docs/src/file-formats.md.in
@@ -159,7 +159,7 @@ GENMD-RUN-COMMAND
 mlr --opprint --barred cat data/small
 GENMD-EOF
 
-Since Miller 6.11.0, you can use `--barred-input` with pprint output format:
+Since Miller 6.11.0, you can use `--barred-input` with pprint input format:
 
 GENMD-RUN-COMMAND
 mlr -o pprint --barred cat data/small | mlr -i pprint --barred-input -o json filter '$b == "pan"'

From bfc829a3816fa5297852892b4af068bfa872cb44 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 20 Jan 2024 13:36:28 -0500
Subject: [PATCH 113/456] Internal name-neatens (#1475)

---
 pkg/cli/option_parse.go             | 10 +++++-----
 pkg/cli/option_types.go             | 14 +++++++-------
 pkg/input/record_reader_csv.go      |  2 +-
 pkg/input/record_reader_csvlite.go  |  2 +-
 pkg/input/record_reader_pprint.go   |  5 ++---
 pkg/input/record_reader_tsv.go      |  2 +-
 pkg/output/record_writer_csv.go     |  2 +-
 pkg/output/record_writer_csvlite.go |  2 +-
 pkg/output/record_writer_pprint.go  |  4 ++--
 pkg/output/record_writer_tsv.go     |  2 +-
 10 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go
index 00e2c94d6..aa01c17ce 100644
--- a/pkg/cli/option_parse.go
+++ b/pkg/cli/option_parse.go
@@ -2159,7 +2159,7 @@ var CSVTSVOnlyFlagSection = FlagSection{
 			altNames: []string{"--no-implicit-tsv-header"},
 			help:     "Opposite of `--implicit-csv-header`. This is the default anyway -- the main use is for the flags to `mlr join` if you have main file(s) which are headerless but you want to join in on a file which does have a CSV/TSV header. Then you could use `mlr --csv --implicit-csv-header join --no-implicit-csv-header -l your-join-in-with-header.csv ... your-headerless.csv`.",
 			parser: func(args []string, argc int, pargi *int, options *TOptions) {
-				options.ReaderOptions.UseImplicitCSVHeader = false
+				options.ReaderOptions.UseImplicitHeader = false
 				*pargi += 1
 			},
 		},
@@ -2179,7 +2179,7 @@ var CSVTSVOnlyFlagSection = FlagSection{
 			altNames: []string{"--headerless-csv-input", "--hi", "--implicit-tsv-header"},
 			help:     "Use 1,2,3,... as field labels, rather than from line 1 of input files. Tip: combine with `label` to recreate missing headers.",
 			parser: func(args []string, argc int, pargi *int, options *TOptions) {
-				options.ReaderOptions.UseImplicitCSVHeader = true
+				options.ReaderOptions.UseImplicitHeader = true
 				*pargi += 1
 			},
 		},
@@ -2189,7 +2189,7 @@ var CSVTSVOnlyFlagSection = FlagSection{
 			altNames: []string{"--ho", "--headerless-tsv-output"},
 			help:     "Print only CSV/TSV data lines; do not print CSV/TSV header lines.",
 			parser: func(args []string, argc int, pargi *int, options *TOptions) {
-				options.WriterOptions.HeaderlessCSVOutput = true
+				options.WriterOptions.HeaderlessOutput = true
 				*pargi += 1
 			},
 		},
@@ -2198,8 +2198,8 @@ var CSVTSVOnlyFlagSection = FlagSection{
 			name: "-N",
 			help: "Keystroke-saver for `--implicit-csv-header --headerless-csv-output`.",
 			parser: func(args []string, argc int, pargi *int, options *TOptions) {
-				options.ReaderOptions.UseImplicitCSVHeader = true
-				options.WriterOptions.HeaderlessCSVOutput = true
+				options.ReaderOptions.UseImplicitHeader = true
+				options.WriterOptions.HeaderlessOutput = true
 				*pargi += 1
 			},
 		},
diff --git a/pkg/cli/option_types.go b/pkg/cli/option_types.go
index 859d67a8d..22f32658f 100644
--- a/pkg/cli/option_types.go
+++ b/pkg/cli/option_types.go
@@ -53,11 +53,11 @@ type TReaderOptions struct {
 	irsWasSpecified            bool
 	allowRepeatIFSWasSpecified bool
 
-	UseImplicitCSVHeader bool
-	AllowRaggedCSVInput  bool
-	CSVLazyQuotes        bool
-	CSVTrimLeadingSpace  bool
-	BarredPprintInput    bool
+	UseImplicitHeader   bool
+	AllowRaggedCSVInput bool
+	CSVLazyQuotes       bool
+	CSVTrimLeadingSpace bool
+	BarredPprintInput   bool
 
 	CommentHandling TCommentHandling
 	CommentString   string
@@ -96,7 +96,7 @@ type TWriterOptions struct {
 	opsWasSpecified bool
 	orsWasSpecified bool
 
-	HeaderlessCSVOutput      bool
+	HeaderlessOutput         bool
 	BarredPprintOutput       bool
 	RightAlignedPPRINTOutput bool
 	RightAlignedXTABOutput   bool
@@ -214,7 +214,7 @@ func DefaultWriterOptions() TWriterOptions {
 		FLATSEP:            ".",
 		FlushOnEveryRecord: true,
 
-		HeaderlessCSVOutput: false,
+		HeaderlessOutput: false,
 
 		WrapJSONOutputInOuterList: true,
 		JSONOutputMultiline:       true,
diff --git a/pkg/input/record_reader_csv.go b/pkg/input/record_reader_csv.go
index e7135e2fc..ab7c1d761 100644
--- a/pkg/input/record_reader_csv.go
+++ b/pkg/input/record_reader_csv.go
@@ -101,7 +101,7 @@ func (reader *RecordReaderCSV) processHandle(
 	// Reset state for start of next input file
 	reader.filename = filename
 	reader.rowNumber = 0
-	reader.needHeader = !reader.readerOptions.UseImplicitCSVHeader
+	reader.needHeader = !reader.readerOptions.UseImplicitHeader
 	reader.header = nil
 
 	csvReader := csv.NewReader(NewBOMStrippingReader(handle))
diff --git a/pkg/input/record_reader_csvlite.go b/pkg/input/record_reader_csvlite.go
index 67d7ebced..3664d6ea3 100644
--- a/pkg/input/record_reader_csvlite.go
+++ b/pkg/input/record_reader_csvlite.go
@@ -70,7 +70,7 @@ func NewRecordReaderCSVLite(
 		useVoidRep: false,
 		voidRep:    "",
 	}
-	if reader.readerOptions.UseImplicitCSVHeader {
+	if reader.readerOptions.UseImplicitHeader {
 		reader.recordBatchGetter = getRecordBatchImplicitCSVHeader
 	} else {
 		reader.recordBatchGetter = getRecordBatchExplicitCSVHeader
diff --git a/pkg/input/record_reader_pprint.go b/pkg/input/record_reader_pprint.go
index 9a2f279f3..8adad8756 100644
--- a/pkg/input/record_reader_pprint.go
+++ b/pkg/input/record_reader_pprint.go
@@ -49,7 +49,7 @@ func NewRecordReaderPPRINT(
 			separatorMatcher: regexp.MustCompile(`^\+[-+]*\+`),
 			fieldSplitter:    newFieldSplitter(readerOptions),
 		}
-		if reader.readerOptions.UseImplicitCSVHeader {
+		if reader.readerOptions.UseImplicitHeader {
 			reader.recordBatchGetter = getRecordBatchImplicitPprintHeader
 		} else {
 			reader.recordBatchGetter = getRecordBatchExplicitPprintHeader
@@ -67,8 +67,7 @@ func NewRecordReaderPPRINT(
 			useVoidRep: true,
 			voidRep:    "-",
 		}
-		// XXX RENAME THERE
-		if reader.readerOptions.UseImplicitCSVHeader {
+		if reader.readerOptions.UseImplicitHeader {
 			reader.recordBatchGetter = getRecordBatchImplicitCSVHeader
 		} else {
 			reader.recordBatchGetter = getRecordBatchExplicitCSVHeader
diff --git a/pkg/input/record_reader_tsv.go b/pkg/input/record_reader_tsv.go
index d3b9d75a3..a0d77aec4 100644
--- a/pkg/input/record_reader_tsv.go
+++ b/pkg/input/record_reader_tsv.go
@@ -52,7 +52,7 @@ func NewRecordReaderTSV(
 		recordsPerBatch: recordsPerBatch,
 		fieldSplitter:   newFieldSplitter(readerOptions),
 	}
-	if reader.readerOptions.UseImplicitCSVHeader {
+	if reader.readerOptions.UseImplicitHeader {
 		reader.recordBatchGetter = getRecordBatchImplicitTSVHeader
 	} else {
 		reader.recordBatchGetter = getRecordBatchExplicitTSVHeader
diff --git a/pkg/output/record_writer_csv.go b/pkg/output/record_writer_csv.go
index fd4801d29..947400275 100644
--- a/pkg/output/record_writer_csv.go
+++ b/pkg/output/record_writer_csv.go
@@ -77,7 +77,7 @@ func (writer *RecordWriterCSV) Write(
 		needToPrintHeader = true
 	}
 
-	if needToPrintHeader && !writer.writerOptions.HeaderlessCSVOutput {
+	if needToPrintHeader && !writer.writerOptions.HeaderlessOutput {
 		fields := make([]string, outrec.FieldCount)
 		i := 0
 		for pe := outrec.Head; pe != nil; pe = pe.Next {
diff --git a/pkg/output/record_writer_csvlite.go b/pkg/output/record_writer_csvlite.go
index 251cf9580..ced670c13 100644
--- a/pkg/output/record_writer_csvlite.go
+++ b/pkg/output/record_writer_csvlite.go
@@ -58,7 +58,7 @@ func (writer *RecordWriterCSVLite) Write(
 		needToPrintHeader = true
 	}
 
-	if needToPrintHeader && !writer.writerOptions.HeaderlessCSVOutput {
+	if needToPrintHeader && !writer.writerOptions.HeaderlessOutput {
 		for pe := outrec.Head; pe != nil; pe = pe.Next {
 			bufferedOutputStream.WriteString(colorizer.MaybeColorizeKey(pe.Key, outputIsStdout))
 
diff --git a/pkg/output/record_writer_pprint.go b/pkg/output/record_writer_pprint.go
index 79d49b316..2fd4aaa70 100644
--- a/pkg/output/record_writer_pprint.go
+++ b/pkg/output/record_writer_pprint.go
@@ -155,7 +155,7 @@ func (writer *RecordWriterPPRINT) writeHeterogenousListNonBarred(
 		outrec := e.Value.(*mlrval.Mlrmap)
 
 		// Print header line
-		if onFirst && !writer.writerOptions.HeaderlessCSVOutput {
+		if onFirst && !writer.writerOptions.HeaderlessOutput {
 			for pe := outrec.Head; pe != nil; pe = pe.Next {
 				if !writer.writerOptions.RightAlignedPPRINTOutput { // left-align
 					if pe.Next != nil {
@@ -257,7 +257,7 @@ func (writer *RecordWriterPPRINT) writeHeterogenousListBarred(
 		outrec := e.Value.(*mlrval.Mlrmap)
 
 		// Print header line
-		if onFirst && !writer.writerOptions.HeaderlessCSVOutput {
+		if onFirst && !writer.writerOptions.HeaderlessOutput {
 			bufferedOutputStream.WriteString(horizontalStart)
 			for pe := outrec.Head; pe != nil; pe = pe.Next {
 				bufferedOutputStream.WriteString(horizontalBars[pe.Key])
diff --git a/pkg/output/record_writer_tsv.go b/pkg/output/record_writer_tsv.go
index 149ac9530..48db403d8 100644
--- a/pkg/output/record_writer_tsv.go
+++ b/pkg/output/record_writer_tsv.go
@@ -66,7 +66,7 @@ func (writer *RecordWriterTSV) Write(
 		needToPrintHeader = true
 	}
 
-	if needToPrintHeader && !writer.writerOptions.HeaderlessCSVOutput {
+	if needToPrintHeader && !writer.writerOptions.HeaderlessOutput {
 		for pe := outrec.Head; pe != nil; pe = pe.Next {
 			bufferedOutputStream.WriteString(
 				colorizer.MaybeColorizeKey(

From 36b4654445fb2ba972d92d6b7643db91f73ec400 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 20 Jan 2024 14:07:27 -0500
Subject: [PATCH 114/456] Fix typos in tests for PPRINT barred input (#1476)

---
 .../barred-input-headerless/cmd               |  2 +-
 .../barred-input-headerless/experr            |  2 -
 .../barred-input-headerless/expout            | 79 +++++++++++++++++++
 .../barred-input-headerless/should-fail       |  0
 test/cases/io-barred-pprint/barred-input/cmd  |  2 +-
 .../io-barred-pprint/barred-input/experr      |  2 -
 .../io-barred-pprint/barred-input/expout      | 72 +++++++++++++++++
 .../io-barred-pprint/barred-input/should-fail |  0
 8 files changed, 153 insertions(+), 6 deletions(-)
 delete mode 100644 test/cases/io-barred-pprint/barred-input-headerless/should-fail
 delete mode 100644 test/cases/io-barred-pprint/barred-input/should-fail

diff --git a/test/cases/io-barred-pprint/barred-input-headerless/cmd b/test/cases/io-barred-pprint/barred-input-headerless/cmd
index 41ebbe671..de4ebd0d8 100644
--- a/test/cases/io-barred-pprint/barred-input-headerless/cmd
+++ b/test/cases/io-barred-pprint/barred-input-headerless/cmd
@@ -1 +1 @@
-mlr --hi --i pprint --barred-input -o json test/input/abixy.tbl
+mlr --hi -i pprint --barred-input -o json cat test/input/abixy.tbl
diff --git a/test/cases/io-barred-pprint/barred-input-headerless/experr b/test/cases/io-barred-pprint/barred-input-headerless/experr
index 87bf02e26..e69de29bb 100644
--- a/test/cases/io-barred-pprint/barred-input-headerless/experr
+++ b/test/cases/io-barred-pprint/barred-input-headerless/experr
@@ -1,2 +0,0 @@
-mlr: option "--i" not recognized.
-Please run "mlr --help" for usage information.
diff --git a/test/cases/io-barred-pprint/barred-input-headerless/expout b/test/cases/io-barred-pprint/barred-input-headerless/expout
index e69de29bb..e08b56a90 100644
--- a/test/cases/io-barred-pprint/barred-input-headerless/expout
+++ b/test/cases/io-barred-pprint/barred-input-headerless/expout
@@ -0,0 +1,79 @@
+[
+{
+  "1": "a",
+  "2": "b",
+  "3": "i",
+  "4": "x",
+  "5": "y"
+},
+{
+  "1": "pan",
+  "2": "pan",
+  "3": 1,
+  "4": 0.34679014,
+  "5": 0.72680286
+},
+{
+  "1": "eks",
+  "2": "pan",
+  "3": 2,
+  "4": 0.75867996,
+  "5": 0.52215111
+},
+{
+  "1": "wye",
+  "2": "wye",
+  "3": 3,
+  "4": 0.20460331,
+  "5": 0.33831853
+},
+{
+  "1": "eks",
+  "2": "wye",
+  "3": 4,
+  "4": 0.38139939,
+  "5": 0.13418874
+},
+{
+  "1": "wye",
+  "2": "pan",
+  "3": 5,
+  "4": 0.57328892,
+  "5": 0.86362447
+},
+{
+  "1": "zee",
+  "2": "pan",
+  "3": 6,
+  "4": 0.52712616,
+  "5": 0.49322129
+},
+{
+  "1": "eks",
+  "2": "zee",
+  "3": 7,
+  "4": 0.61178406,
+  "5": 0.18788492
+},
+{
+  "1": "zee",
+  "2": "wye",
+  "3": 8,
+  "4": 0.59855401,
+  "5": 0.97618139
+},
+{
+  "1": "hat",
+  "2": "wye",
+  "3": 9,
+  "4": 0.03144188,
+  "5": 0.74955076
+},
+{
+  "1": "pan",
+  "2": "wye",
+  "3": 10,
+  "4": 0.50262601,
+  "5": 0.95261836
+}
+]
diff --git a/test/cases/io-barred-pprint/barred-input-headerless/should-fail b/test/cases/io-barred-pprint/barred-input-headerless/should-fail
deleted file mode 100644
index e69de29bb..000000000
diff --git a/test/cases/io-barred-pprint/barred-input/cmd b/test/cases/io-barred-pprint/barred-input/cmd
index f84126571..4c6742df6 100644
--- a/test/cases/io-barred-pprint/barred-input/cmd
+++ b/test/cases/io-barred-pprint/barred-input/cmd
@@ -1 +1 @@
-mlr --i pprint --barred-input -o json test/input/abixy.tbl
+mlr -i pprint --barred-input -o json cat test/input/abixy.tbl
diff --git a/test/cases/io-barred-pprint/barred-input/experr b/test/cases/io-barred-pprint/barred-input/experr
index 87bf02e26..e69de29bb 100644
--- a/test/cases/io-barred-pprint/barred-input/experr
+++ b/test/cases/io-barred-pprint/barred-input/experr
@@ -1,2 +0,0 @@
-mlr: option "--i" not recognized.
-Please run "mlr --help" for usage information.
diff --git a/test/cases/io-barred-pprint/barred-input/expout b/test/cases/io-barred-pprint/barred-input/expout
index e69de29bb..48da25210 100644
--- a/test/cases/io-barred-pprint/barred-input/expout
+++ b/test/cases/io-barred-pprint/barred-input/expout
@@ -0,0 +1,72 @@
+[
+{
+  "a": "pan",
+  "b": "pan",
+  "i": 1,
+  "x": 0.34679014,
+  "y": 0.72680286
+},
+{
+  "a": "eks",
+  "b": "pan",
+  "i": 2,
+  "x": 0.75867996,
+  "y": 0.52215111
+},
+{
+  "a": "wye",
+  "b": "wye",
+  "i": 3,
+  "x": 0.20460331,
+  "y": 0.33831853
+},
+{
+  "a": "eks",
+  "b": "wye",
+  "i": 4,
+  "x": 0.38139939,
+  "y": 0.13418874
+},
+{
+  "a": "wye",
+  "b": "pan",
+  "i": 5,
+  "x": 0.57328892,
+  "y": 0.86362447
+},
+{
+  "a": "zee",
+  "b": "pan",
+  "i": 6,
+  "x": 0.52712616,
+  "y": 0.49322129
+},
+{
+  "a": "eks",
+  "b": "zee",
+  "i": 7,
+  "x": 0.61178406,
+  "y": 0.18788492
+},
+{
+  "a": "zee",
+  "b": "wye",
+  "i": 8,
+  "x": 0.59855401,
+  "y": 0.97618139
+},
+{
+  "a": "hat",
+  "b": "wye",
+  "i": 9,
+  "x": 0.03144188,
+  "y": 0.74955076
+},
+{
+  "a": "pan",
+  "b": "wye",
+  "i": 10,
+  "x": 0.50262601,
+  "y": 0.95261836
+}
+]
diff --git a/test/cases/io-barred-pprint/barred-input/should-fail b/test/cases/io-barred-pprint/barred-input/should-fail
deleted file mode 100644
index e69de29bb..000000000

From 2abb9b47290f80b9b9ff7911358ec599e10a9d32 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 20 Jan 2024 14:24:12 -0500
Subject: [PATCH 115/456] Don't run regression tests twice in GitHub CI (#1477)

---
 .github/workflows/go.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 733c6cb48..9ff2f41a6 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -25,8 +25,8 @@ jobs:
     - name: Build
       run: make build
 
-    - name: Test
-      run: make check
+    - name: Unit tests
+      run: make unit-test
 
     - name: Regression tests
       # We run these with a convoluted path to ensure the tests don't

From af021f28d71885960a35fb826f0420cefab33ce1 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 20 Jan 2024 16:51:15 -0500
Subject: [PATCH 116/456] Support markdown format on input (#1478)

* Support markdown on input

* unit-test files

* doc mods

* Unit-test cases for I/O-format keystroke-savers

* -i/-o md as well as -i/-o markdown
---
 docs/src/file-formats.md                      |  29 +--
 docs/src/file-formats.md.in                   |   3 +-
 docs/src/manpage.md                           |  29 +--
 docs/src/manpage.txt                          |  29 +--
 docs/src/reference-main-flag-list.md          |   3 +-
 man/manpage.txt                               |  29 +--
 man/mlr.1                                     |  29 +--
 pkg/cli/option_parse.go                       | 238 ++++++++++++++++--
 pkg/input/record_reader_factory.go            |   4 +
 pkg/input/record_reader_markdown.go           |  30 +++
 pkg/input/record_reader_pprint.go             |  38 +--
 pkg/output/record_writer_factory.go           |   2 +
 pkg/terminals/help/entry.go                   |   2 +-
 .../{0049 => c}/cmd                           |   0
 .../{0001 => c}/experr                        |   0
 .../{0014 => c}/expout                        |   0
 .../{0008 => c2d}/cmd                         |   0
 .../{0002 => c2d}/experr                      |   0
 .../{0008 => c2d}/expout                      |   0
 .../{0010 => c2j}/cmd                         |   0
 .../{0003 => c2j}/experr                      |   0
 .../{0003 => c2j}/expout                      |   0
 .../{0011 => c2m/c2p}/cmd                     |   0
 .../{0004 => c2m/c2p}/experr                  |   0
 .../{0004 => c2m/c2p}/expout                  |   0
 .../{0013 => c2m}/cmd                         |   0
 .../{0005 => c2m}/experr                      |   0
 .../{0006 => c2m}/expout                      |   0
 .../{0009 => c2n}/cmd                         |   0
 .../{0006 => c2n}/experr                      |   0
 .../{0002 => c2n}/expout                      |   0
 .../c2p/cmd                                   |   1 +
 .../{0007 => c2p}/experr                      |   0
 .../{0011 => c2p}/expout                      |   0
 .../{0007 => c2t}/cmd                         |   0
 .../{0008 => c2t}/experr                      |   0
 .../{0001 => c2t}/expout                      |   0
 .../{0012 => c2x}/cmd                         |   0
 .../{0009 => c2x}/experr                      |   0
 .../{0005 => c2x}/expout                      |   0
 .../{0003 => d2j}/cmd                         |   0
 .../{0010 => d2j}/experr                      |   0
 .../{0010 => d2j}/expout                      |   0
 .../{0006 => d2m}/cmd                         |   0
 .../{0004 => d2m/d2p}/cmd                     |   0
 .../{0011 => d2m/d2p}/experr                  |   0
 .../{0018 => d2m/d2p}/expout                  |   0
 .../{0012 => d2m}/experr                      |   0
 .../{0013 => d2m}/expout                      |   0
 .../{0002 => d2n}/cmd                         |   0
 .../{0013 => d2n}/experr                      |   0
 .../{0009 => d2n}/expout                      |   0
 .../d2p/cmd                                   |   1 +
 .../{0014 => d2p}/experr                      |   0
 .../{0032 => d2p}/expout                      |   0
 .../{0001 => d2t}/cmd                         |   0
 .../{0015 => d2t}/experr                      |   0
 .../{0007 => d2t}/expout                      |   0
 .../{0005 => d2x}/cmd                         |   0
 .../{0016 => d2x}/experr                      |   0
 .../{0012 => d2x}/expout                      |   0
 .../{0053 => itsv-odkvp}/cmd                  |   0
 .../{0017 => itsv-odkvp}/experr               |   0
 .../{0053 => itsv-odkvp}/expout               |   0
 .../{0052 => itsv-odkvp}/input.tsv            |   0
 .../{0052 => itsvlite-odkvp}/cmd              |   0
 .../{0018 => itsvlite-odkvp}/experr           |   0
 .../{0052 => itsvlite-odkvp}/expout           |   0
 .../{0053 => itsvlite-odkvp}/input.tsv        |   0
 .../{0051 => j}/cmd                           |   0
 .../{0019 => j}/experr                        |   0
 .../{0017 => j}/expout                        |   0
 .../{0028 => j2c}/cmd                         |   0
 .../{0020 => j2c}/experr                      |   0
 .../{0028 => j2c}/expout                      |   0
 .../{0030 => j2d}/cmd                         |   0
 .../{0021 => j2d}/experr                      |   0
 .../{0015 => j2d}/expout                      |   0
 .../{0034 => j2m}/cmd                         |   0
 .../{0022 => j2m}/experr                      |   0
 .../{0020 => j2m}/expout                      |   0
 .../{0032 => j2m/j2p}/cmd                     |   0
 .../{0023 => j2m/j2p}/experr                  |   0
 .../{0047 => j2m/j2p}/expout                  |   0
 .../{0031 => j2n}/cmd                         |   0
 .../{0024 => j2n}/experr                      |   0
 .../{0016 => j2n}/expout                      |   0
 .../j2p/cmd                                   |   1 +
 .../{0025 => j2p}/experr                      |   0
 .../j2p/expout                                |  11 +
 .../{0029 => j2t}/cmd                         |   0
 .../{0026 => j2t}/experr                      |   0
 .../{0029 => j2t}/expout                      |   0
 .../{0033 => j2x}/cmd                         |   0
 .../{0027 => j2x}/experr                      |   0
 .../{0019 => j2x}/expout                      |   0
 .../l2m/cmd                                   |   1 +
 .../{0028 => l2m}/experr                      |   0
 .../{0034 => l2m}/expout                      |   0
 .../m2c/cmd                                   |   1 +
 .../{0029 => m2c}/experr                      |   0
 .../{0035 => m2c}/expout                      |   0
 .../m2d/cmd                                   |   1 +
 .../{0030 => m2d}/experr                      |   0
 .../{0030 => m2d}/expout                      |   0
 .../m2j/cmd                                   |   1 +
 .../{0031 => m2j}/experr                      |   0
 .../{0039 => m2j}/expout                      |   0
 .../m2l/cmd                                   |   1 +
 .../{0032 => m2l}/experr                      |   0
 .../m2l/expout                                |  10 +
 .../m2n/cmd                                   |   1 +
 .../{0033 => m2n}/experr                      |   0
 .../{0031 => m2n}/expout                      |   0
 .../m2p/cmd                                   |   1 +
 .../{0034 => m2p}/experr                      |   0
 .../m2p/expout                                |  11 +
 .../m2t/cmd                                   |   1 +
 .../{0035 => m2t}/experr                      |   0
 .../{0036 => m2t}/expout                      |   0
 .../m2x/cmd                                   |   1 +
 .../{0036 => m2x}/experr                      |   0
 .../{0033 => m2x}/expout                      |   0
 .../{0021 => n2c}/cmd                         |   0
 .../{0037 => n2c}/experr                      |   0
 .../{0021 => n2c}/expout                      |   0
 .../{0023 => n2d}/cmd                         |   0
 .../{0038 => n2d}/experr                      |   0
 .../{0023 => n2d}/expout                      |   0
 .../{0024 => n2j}/cmd                         |   0
 .../{0039 => n2j}/experr                      |   0
 .../{0024 => n2j}/expout                      |   0
 .../{0027 => n2m}/cmd                         |   0
 .../{0040 => n2m}/experr                      |   0
 .../{0027 => n2m}/expout                      |   0
 .../{0025 => n2m/n2p}/cmd                     |   0
 .../{0041 => n2m/n2p}/experr                  |   0
 .../{0025 => n2m/n2p}/expout                  |   0
 .../n2p/cmd                                   |   1 +
 .../{0042 => n2p}/experr                      |   0
 .../n2p/expout                                |  11 +
 .../{0022 => n2t}/cmd                         |   0
 .../{0043 => n2t}/experr                      |   0
 .../{0022 => n2t}/expout                      |   0
 .../{0026 => n2x}/cmd                         |   0
 .../{0044 => n2x}/experr                      |   0
 .../{0026 => n2x}/expout                      |   0
 .../{0035 => p2c}/cmd                         |   0
 .../{0045 => p2c}/experr                      |   0
 .../{0042 => p2c}/expout                      |   0
 .../{0037 => p2d}/cmd                         |   0
 .../{0046 => p2d}/experr                      |   0
 .../{0037 => p2d}/expout                      |   0
 .../{0039 => p2j}/cmd                         |   0
 .../{0047 => p2j}/experr                      |   0
 .../{0046 => p2j}/expout                      |   0
 .../{0041 => p2m}/cmd                         |   0
 .../{0048 => p2m}/experr                      |   0
 .../{0041 => p2m}/expout                      |   0
 .../{0047 => p2m/x2p}/cmd                     |   0
 .../{0049 => p2m/x2p}/experr                  |   0
 .../p2m/x2p/expout                            |  11 +
 .../{0038 => p2n}/cmd                         |   0
 .../{0050 => p2n}/experr                      |   0
 .../{0038 => p2n}/expout                      |   0
 .../{0036 => p2t}/cmd                         |   0
 .../{0051 => p2t}/experr                      |   0
 .../{0043 => p2t}/expout                      |   0
 .../{0040 => p2x}/cmd                         |   0
 .../{0052 => p2x}/experr                      |   0
 .../{0040 => p2x}/expout                      |   0
 .../{0050 => t}/cmd                           |   0
 .../{0053 => t}/experr                        |   0
 .../{0050 => t}/expout                        |   0
 .../{0014 => t2c}/cmd                         |   0
 .../{0054 => t2c}/experr                      |   0
 .../{0049 => t2c}/expout                      |   0
 .../{0015 => t2d}/cmd                         |   0
 .../{0055 => t2d}/experr                      |   0
 .../{0044 => t2d}/expout                      |   0
 .../{0017 => t2j}/cmd                         |   0
 .../t2j}/experr                               |   0
 .../{0051 => t2j}/expout                      |   0
 .../{0020 => t2m}/cmd                         |   0
 .../t2m}/experr                               |   0
 .../{0048 => t2m}/expout                      |   0
 .../{0018 => t2m/t2p}/cmd                     |   0
 .../t2m/t2p/experr                            |   0
 .../t2m/t2p/expout                            |  11 +
 .../{0016 => t2n}/cmd                         |   0
 .../t2n/experr                                |   0
 .../{0045 => t2n}/expout                      |   0
 .../t2p/cmd                                   |   1 +
 .../t2p/experr                                |   0
 .../t2p/expout                                |  11 +
 .../{0019 => t2x}/cmd                         |   0
 .../t2x/experr                                |   0
 .../t2x/expout                                |  59 +++++
 .../{0055 => tsv}/cmd                         |   0
 .../tsv/experr                                |   0
 .../{0055 => tsv}/expout                      |   0
 .../{0054 => tsv}/input.tsv                   |   0
 .../{0054 => tsvlite}/cmd                     |   0
 .../tsvlite/experr                            |   0
 .../{0054 => tsvlite}/expout                  |   0
 .../{0055 => tsvlite}/input.tsv               |   0
 .../{0042 => x2c}/cmd                         |   0
 .../x2c/experr                                |   0
 .../x2c/expout                                |  11 +
 .../{0044 => x2d}/cmd                         |   0
 .../x2d/experr                                |   0
 .../x2d/expout                                |  10 +
 .../{0046 => x2j}/cmd                         |   0
 .../x2j/experr                                |   0
 .../x2j/expout                                |  72 ++++++
 .../{0048 => x2m}/cmd                         |   0
 .../x2m/experr                                |   0
 .../x2m/expout                                |  12 +
 .../x2m/x2p/cmd                               |   1 +
 .../x2m/x2p/experr                            |   0
 .../x2m/x2p/expout                            |  11 +
 .../{0045 => x2n}/cmd                         |   0
 .../x2n/experr                                |   0
 .../x2n/expout                                |  10 +
 .../x2p/cmd                                   |   1 +
 .../x2p/experr                                |   0
 .../x2p/expout                                |  11 +
 .../{0043 => x2t}/cmd                         |   0
 .../x2t/experr                                |   0
 .../x2t/expout                                |  11 +
 .../0001/cmd                                  |   0
 test/cases/io-markdown/0001/experr            |   0
 .../0001/expout                               |   0
 .../0002/cmd                                  |   0
 test/cases/io-markdown/0002/experr            |   0
 .../0002/expout                               |   0
 .../0002/input                                |   0
 .../io-markdown/markdown-input-headerless/cmd |   1 +
 .../markdown-input-headerless/experr          |   0
 .../markdown-input-headerless/expout          |  79 ++++++
 test/cases/io-markdown/markdown-input/cmd     |   1 +
 test/cases/io-markdown/markdown-input/experr  |   0
 test/cases/io-markdown/markdown-input/expout  |  72 ++++++
 243 files changed, 803 insertions(+), 114 deletions(-)
 create mode 100644 pkg/input/record_reader_markdown.go
 rename test/cases/io-format-conversion-keystroke-savers/{0049 => c}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0001 => c}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0014 => c}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0008 => c2d}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0002 => c2d}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0008 => c2d}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0010 => c2j}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0003 => c2j}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0003 => c2j}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0011 => c2m/c2p}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0004 => c2m/c2p}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0004 => c2m/c2p}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0013 => c2m}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0005 => c2m}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0006 => c2m}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0009 => c2n}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0006 => c2n}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0002 => c2n}/expout (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/c2p/cmd
 rename test/cases/io-format-conversion-keystroke-savers/{0007 => c2p}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0011 => c2p}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0007 => c2t}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0008 => c2t}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0001 => c2t}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0012 => c2x}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0009 => c2x}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0005 => c2x}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0003 => d2j}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0010 => d2j}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0010 => d2j}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0006 => d2m}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0004 => d2m/d2p}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0011 => d2m/d2p}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0018 => d2m/d2p}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0012 => d2m}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0013 => d2m}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0002 => d2n}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0013 => d2n}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0009 => d2n}/expout (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/d2p/cmd
 rename test/cases/io-format-conversion-keystroke-savers/{0014 => d2p}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0032 => d2p}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0001 => d2t}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0015 => d2t}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0007 => d2t}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0005 => d2x}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0016 => d2x}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0012 => d2x}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0053 => itsv-odkvp}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0017 => itsv-odkvp}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0053 => itsv-odkvp}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0052 => itsv-odkvp}/input.tsv (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0052 => itsvlite-odkvp}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0018 => itsvlite-odkvp}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0052 => itsvlite-odkvp}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0053 => itsvlite-odkvp}/input.tsv (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0051 => j}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0019 => j}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0017 => j}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0028 => j2c}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0020 => j2c}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0028 => j2c}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0030 => j2d}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0021 => j2d}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0015 => j2d}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0034 => j2m}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0022 => j2m}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0020 => j2m}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0032 => j2m/j2p}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0023 => j2m/j2p}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0047 => j2m/j2p}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0031 => j2n}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0024 => j2n}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0016 => j2n}/expout (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/j2p/cmd
 rename test/cases/io-format-conversion-keystroke-savers/{0025 => j2p}/experr (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/j2p/expout
 rename test/cases/io-format-conversion-keystroke-savers/{0029 => j2t}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0026 => j2t}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0029 => j2t}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0033 => j2x}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0027 => j2x}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0019 => j2x}/expout (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/l2m/cmd
 rename test/cases/io-format-conversion-keystroke-savers/{0028 => l2m}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0034 => l2m}/expout (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/m2c/cmd
 rename test/cases/io-format-conversion-keystroke-savers/{0029 => m2c}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0035 => m2c}/expout (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/m2d/cmd
 rename test/cases/io-format-conversion-keystroke-savers/{0030 => m2d}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0030 => m2d}/expout (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/m2j/cmd
 rename test/cases/io-format-conversion-keystroke-savers/{0031 => m2j}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0039 => m2j}/expout (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/m2l/cmd
 rename test/cases/io-format-conversion-keystroke-savers/{0032 => m2l}/experr (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/m2l/expout
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/m2n/cmd
 rename test/cases/io-format-conversion-keystroke-savers/{0033 => m2n}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0031 => m2n}/expout (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/m2p/cmd
 rename test/cases/io-format-conversion-keystroke-savers/{0034 => m2p}/experr (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/m2p/expout
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/m2t/cmd
 rename test/cases/io-format-conversion-keystroke-savers/{0035 => m2t}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0036 => m2t}/expout (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/m2x/cmd
 rename test/cases/io-format-conversion-keystroke-savers/{0036 => m2x}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0033 => m2x}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0021 => n2c}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0037 => n2c}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0021 => n2c}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0023 => n2d}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0038 => n2d}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0023 => n2d}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0024 => n2j}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0039 => n2j}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0024 => n2j}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0027 => n2m}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0040 => n2m}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0027 => n2m}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0025 => n2m/n2p}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0041 => n2m/n2p}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0025 => n2m/n2p}/expout (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/n2p/cmd
 rename test/cases/io-format-conversion-keystroke-savers/{0042 => n2p}/experr (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/n2p/expout
 rename test/cases/io-format-conversion-keystroke-savers/{0022 => n2t}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0043 => n2t}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0022 => n2t}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0026 => n2x}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0044 => n2x}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0026 => n2x}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0035 => p2c}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0045 => p2c}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0042 => p2c}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0037 => p2d}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0046 => p2d}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0037 => p2d}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0039 => p2j}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0047 => p2j}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0046 => p2j}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0041 => p2m}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0048 => p2m}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0041 => p2m}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0047 => p2m/x2p}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0049 => p2m/x2p}/experr (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/p2m/x2p/expout
 rename test/cases/io-format-conversion-keystroke-savers/{0038 => p2n}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0050 => p2n}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0038 => p2n}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0036 => p2t}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0051 => p2t}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0043 => p2t}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0040 => p2x}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0052 => p2x}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0040 => p2x}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0050 => t}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0053 => t}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0050 => t}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0014 => t2c}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0054 => t2c}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0049 => t2c}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0015 => t2d}/cmd (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0055 => t2d}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0044 => t2d}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0017 => t2j}/cmd (100%)
 rename test/cases/{io-markdown-output/0001 => io-format-conversion-keystroke-savers/t2j}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0051 => t2j}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0020 => t2m}/cmd (100%)
 rename test/cases/{io-markdown-output/0002 => io-format-conversion-keystroke-savers/t2m}/experr (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0048 => t2m}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0018 => t2m/t2p}/cmd (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/t2m/t2p/experr
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/t2m/t2p/expout
 rename test/cases/io-format-conversion-keystroke-savers/{0016 => t2n}/cmd (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/t2n/experr
 rename test/cases/io-format-conversion-keystroke-savers/{0045 => t2n}/expout (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/t2p/cmd
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/t2p/experr
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/t2p/expout
 rename test/cases/io-format-conversion-keystroke-savers/{0019 => t2x}/cmd (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/t2x/experr
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/t2x/expout
 rename test/cases/io-format-conversion-keystroke-savers/{0055 => tsv}/cmd (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/tsv/experr
 rename test/cases/io-format-conversion-keystroke-savers/{0055 => tsv}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0054 => tsv}/input.tsv (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0054 => tsvlite}/cmd (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/tsvlite/experr
 rename test/cases/io-format-conversion-keystroke-savers/{0054 => tsvlite}/expout (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0055 => tsvlite}/input.tsv (100%)
 rename test/cases/io-format-conversion-keystroke-savers/{0042 => x2c}/cmd (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2c/experr
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2c/expout
 rename test/cases/io-format-conversion-keystroke-savers/{0044 => x2d}/cmd (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2d/experr
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2d/expout
 rename test/cases/io-format-conversion-keystroke-savers/{0046 => x2j}/cmd (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2j/experr
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2j/expout
 rename test/cases/io-format-conversion-keystroke-savers/{0048 => x2m}/cmd (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2m/experr
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2m/expout
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2m/x2p/cmd
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2m/x2p/experr
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2m/x2p/expout
 rename test/cases/io-format-conversion-keystroke-savers/{0045 => x2n}/cmd (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2n/experr
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2n/expout
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2p/cmd
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2p/experr
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2p/expout
 rename test/cases/io-format-conversion-keystroke-savers/{0043 => x2t}/cmd (100%)
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2t/experr
 create mode 100644 test/cases/io-format-conversion-keystroke-savers/x2t/expout
 rename test/cases/{io-markdown-output => io-markdown}/0001/cmd (100%)
 create mode 100644 test/cases/io-markdown/0001/experr
 rename test/cases/{io-markdown-output => io-markdown}/0001/expout (100%)
 rename test/cases/{io-markdown-output => io-markdown}/0002/cmd (100%)
 create mode 100644 test/cases/io-markdown/0002/experr
 rename test/cases/{io-markdown-output => io-markdown}/0002/expout (100%)
 rename test/cases/{io-markdown-output => io-markdown}/0002/input (100%)
 create mode 100644 test/cases/io-markdown/markdown-input-headerless/cmd
 create mode 100644 test/cases/io-markdown/markdown-input-headerless/experr
 create mode 100644 test/cases/io-markdown/markdown-input-headerless/expout
 create mode 100644 test/cases/io-markdown/markdown-input/cmd
 create mode 100644 test/cases/io-markdown/markdown-input/experr
 create mode 100644 test/cases/io-markdown/markdown-input/expout

diff --git a/docs/src/file-formats.md b/docs/src/file-formats.md
index 9b0cb6362..3af248ce5 100644
--- a/docs/src/file-formats.md
+++ b/docs/src/file-formats.md
@@ -69,7 +69,7 @@ PPRINT: pretty-printed tabular
 | 4     5   6         | Record 2: "apple":"4", "bat":"5", "cog":"6"
 +---------------------+
 
-Markdown tabular (supported for output only):
+Markdown tabular:
 +-----------------------+
 | | apple | bat | cog | |
 | | ---   | --- | --- | |
@@ -435,7 +435,8 @@ which renders like this when dropped into various web tools (e.g. github comment
 
 ![pix/omd.png](pix/omd.png)
 
-As of Miller 4.3.0, markdown format is supported only for output, not input.
+As of Miller 4.3.0, markdown format is supported only for output, not input; as of Miller 6.11.0, markdown format
+is supported for input as well.
 
 ## XTAB: Vertical tabular
 
@@ -635,19 +636,19 @@ While you can do format conversion using `mlr --icsv --ojson cat myfile.csv`, th
 FORMAT-CONVERSION KEYSTROKE-SAVER FLAGS
 As keystroke-savers for format-conversion you may use the following.
 The letters c, t, j, l, d, n, x, p, and m refer to formats CSV, TSV, DKVP, NIDX,
-JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively. Note that markdown
-format is available for output only.
+JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively.
 
-| In\out | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
-+--------+-------+-------+--------+--------+--------+--------+--------+----------+
-| CSV    |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
-| TSV    | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
-| JSON   | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
-| JSONL  | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
-| DKVP   | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
-| NIDX   | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
-| XTAB   | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
-| PPRINT | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
+| In\out   | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
++----------+-------+-------+--------+--------+--------+--------+--------+--------+----------|
+| CSV      |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
+| TSV      | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
+| JSON     | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
+| JSONL    | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
+| DKVP     | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
+| NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
+| XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
+| PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
+| Markdown | --m2c | --m2t | --m2j  | --m2l  | --m2d  | --m2n  | --m2x  | --m2p  |          |
 
 -p                       Keystroke-saver for `--nidx --fs space --repifs`.
 -T                       Keystroke-saver for `--nidx --fs tab`.
diff --git a/docs/src/file-formats.md.in b/docs/src/file-formats.md.in
index 8da809fad..7e3d50308 100644
--- a/docs/src/file-formats.md.in
+++ b/docs/src/file-formats.md.in
@@ -177,7 +177,8 @@ which renders like this when dropped into various web tools (e.g. github comment
 
 ![pix/omd.png](pix/omd.png)
 
-As of Miller 4.3.0, markdown format is supported only for output, not input.
+As of Miller 4.3.0, markdown format is supported only for output, not input; as of Miller 6.11.0, markdown format
+is supported for input as well.
 
 ## XTAB: Vertical tabular
 
diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 2381322af..e2123e752 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -101,7 +101,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        | 4     5   6         | Record 2: "apple":"4", "bat":"5", "cog":"6"
        +---------------------+
 
-       Markdown tabular (supported for output only):
+       Markdown tabular:
        +-----------------------+
        | | apple | bat | cog | |
        | | ---   | --- | --- | |
@@ -381,6 +381,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p
                                 seqgen verb, which is more useful/intuitive.
        --ijson                  Use JSON format for input data.
        --ijsonl                 Use JSON Lines format for input data.
+       --imd or --imarkdown     Use markdown-tabular format for input data.
        --inidx                  Use NIDX format for input data.
        --io {format name}       Use format name for input and output data. For
                                 example: `--io csv` is the same as `--csv`.
@@ -398,7 +399,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        --odkvp                  Use DKVP format for output data.
        --ojson                  Use JSON format for output data.
        --ojsonl                 Use JSON Lines format for output data.
-       --omd                    Use markdown-tabular format for output data.
+       --omd or --omarkdown     Use markdown-tabular format for output data.
        --onidx                  Use NIDX format for output data.
        --opprint                Use PPRINT format for output data.
        --otsv                   Use TSV format for output data.
@@ -440,19 +441,19 @@ This is simply a copy of what you should see on running `man mlr` at a command p
 1mFORMAT-CONVERSION KEYSTROKE-SAVER FLAGS0m
        As keystroke-savers for format-conversion you may use the following.
        The letters c, t, j, l, d, n, x, p, and m refer to formats CSV, TSV, DKVP, NIDX,
-       JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively. Note that markdown
-       format is available for output only.
+       JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively.
 
-       | In\out | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
-       +--------+-------+-------+--------+--------+--------+--------+--------+----------+
-       | CSV    |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
-       | TSV    | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
-       | JSON   | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
-       | JSONL  | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
-       | DKVP   | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
-       | NIDX   | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
-       | XTAB   | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
-       | PPRINT | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
+       | In\out   | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
+       +----------+-------+-------+--------+--------+--------+--------+--------+--------+----------|
+       | CSV      |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
+       | TSV      | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
+       | JSON     | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
+       | JSONL    | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
+       | DKVP     | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
+       | NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
+       | XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
+       | PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
+       | Markdown | --m2c | --m2t | --m2j  | --m2l  | --m2d  | --m2n  | --m2x  | --m2p  |          |
 
        -p                       Keystroke-saver for `--nidx --fs space --repifs`.
        -T                       Keystroke-saver for `--nidx --fs tab`.
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt
index 4edeb4b37..ce0a53994 100644
--- a/docs/src/manpage.txt
+++ b/docs/src/manpage.txt
@@ -80,7 +80,7 @@
        | 4     5   6         | Record 2: "apple":"4", "bat":"5", "cog":"6"
        +---------------------+
 
-       Markdown tabular (supported for output only):
+       Markdown tabular:
        +-----------------------+
        | | apple | bat | cog | |
        | | ---   | --- | --- | |
@@ -360,6 +360,7 @@
                                 seqgen verb, which is more useful/intuitive.
        --ijson                  Use JSON format for input data.
        --ijsonl                 Use JSON Lines format for input data.
+       --imd or --imarkdown     Use markdown-tabular format for input data.
        --inidx                  Use NIDX format for input data.
        --io {format name}       Use format name for input and output data. For
                                 example: `--io csv` is the same as `--csv`.
@@ -377,7 +378,7 @@
        --odkvp                  Use DKVP format for output data.
        --ojson                  Use JSON format for output data.
        --ojsonl                 Use JSON Lines format for output data.
-       --omd                    Use markdown-tabular format for output data.
+       --omd or --omarkdown     Use markdown-tabular format for output data.
        --onidx                  Use NIDX format for output data.
        --opprint                Use PPRINT format for output data.
        --otsv                   Use TSV format for output data.
@@ -419,19 +420,19 @@
 1mFORMAT-CONVERSION KEYSTROKE-SAVER FLAGS0m
        As keystroke-savers for format-conversion you may use the following.
        The letters c, t, j, l, d, n, x, p, and m refer to formats CSV, TSV, DKVP, NIDX,
-       JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively. Note that markdown
-       format is available for output only.
+       JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively.
 
-       | In\out | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
-       +--------+-------+-------+--------+--------+--------+--------+--------+----------+
-       | CSV    |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
-       | TSV    | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
-       | JSON   | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
-       | JSONL  | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
-       | DKVP   | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
-       | NIDX   | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
-       | XTAB   | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
-       | PPRINT | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
+       | In\out   | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
+       +----------+-------+-------+--------+--------+--------+--------+--------+--------+----------|
+       | CSV      |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
+       | TSV      | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
+       | JSON     | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
+       | JSONL    | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
+       | DKVP     | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
+       | NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
+       | XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
+       | PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
+       | Markdown | --m2c | --m2t | --m2j  | --m2l  | --m2d  | --m2n  | --m2x  | --m2p  |          |
 
        -p                       Keystroke-saver for `--nidx --fs space --repifs`.
        -T                       Keystroke-saver for `--nidx --fs tab`.
diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md
index 758f10a37..fde4d9496 100644
--- a/docs/src/reference-main-flag-list.md
+++ b/docs/src/reference-main-flag-list.md
@@ -157,6 +157,7 @@ are overridden in all cases by setting output format to `format2`.
 * `--igen`: Ignore input files and instead generate sequential numeric input using --gen-field-name, --gen-start, --gen-step, and --gen-stop values. See also the seqgen verb, which is more useful/intuitive.
 * `--ijson`: Use JSON format for input data.
 * `--ijsonl`: Use JSON Lines format for input data.
+* `--imd or --imarkdown`: Use markdown-tabular format for input data.
 * `--inidx`: Use NIDX format for input data.
 * `--io {format name}`: Use format name for input and output data. For example: `--io csv` is the same as `--csv`.
 * `--ipprint`: Use PPRINT format for input data.
@@ -173,7 +174,7 @@ are overridden in all cases by setting output format to `format2`.
 * `--odkvp`: Use DKVP format for output data.
 * `--ojson`: Use JSON format for output data.
 * `--ojsonl`: Use JSON Lines format for output data.
-* `--omd`: Use markdown-tabular format for output data.
+* `--omd or --omarkdown`: Use markdown-tabular format for output data.
 * `--onidx`: Use NIDX format for output data.
 * `--opprint`: Use PPRINT format for output data.
 * `--otsv`: Use TSV format for output data.
diff --git a/man/manpage.txt b/man/manpage.txt
index 4edeb4b37..ce0a53994 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -80,7 +80,7 @@
        | 4     5   6         | Record 2: "apple":"4", "bat":"5", "cog":"6"
        +---------------------+
 
-       Markdown tabular (supported for output only):
+       Markdown tabular:
        +-----------------------+
        | | apple | bat | cog | |
        | | ---   | --- | --- | |
@@ -360,6 +360,7 @@
                                 seqgen verb, which is more useful/intuitive.
        --ijson                  Use JSON format for input data.
        --ijsonl                 Use JSON Lines format for input data.
+       --imd or --imarkdown     Use markdown-tabular format for input data.
        --inidx                  Use NIDX format for input data.
        --io {format name}       Use format name for input and output data. For
                                 example: `--io csv` is the same as `--csv`.
@@ -377,7 +378,7 @@
        --odkvp                  Use DKVP format for output data.
        --ojson                  Use JSON format for output data.
        --ojsonl                 Use JSON Lines format for output data.
-       --omd                    Use markdown-tabular format for output data.
+       --omd or --omarkdown     Use markdown-tabular format for output data.
        --onidx                  Use NIDX format for output data.
        --opprint                Use PPRINT format for output data.
        --otsv                   Use TSV format for output data.
@@ -419,19 +420,19 @@
 1mFORMAT-CONVERSION KEYSTROKE-SAVER FLAGS0m
        As keystroke-savers for format-conversion you may use the following.
        The letters c, t, j, l, d, n, x, p, and m refer to formats CSV, TSV, DKVP, NIDX,
-       JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively. Note that markdown
-       format is available for output only.
+       JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively.
 
-       | In\out | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
-       +--------+-------+-------+--------+--------+--------+--------+--------+----------+
-       | CSV    |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
-       | TSV    | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
-       | JSON   | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
-       | JSONL  | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
-       | DKVP   | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
-       | NIDX   | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
-       | XTAB   | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
-       | PPRINT | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
+       | In\out   | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
+       +----------+-------+-------+--------+--------+--------+--------+--------+--------+----------|
+       | CSV      |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
+       | TSV      | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
+       | JSON     | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
+       | JSONL    | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
+       | DKVP     | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
+       | NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
+       | XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
+       | PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
+       | Markdown | --m2c | --m2t | --m2j  | --m2l  | --m2d  | --m2n  | --m2x  | --m2p  |          |
 
        -p                       Keystroke-saver for `--nidx --fs space --repifs`.
        -T                       Keystroke-saver for `--nidx --fs tab`.
diff --git a/man/mlr.1 b/man/mlr.1
index a9367fead..f7dde70fe 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -111,7 +111,7 @@ PPRINT: pretty-printed tabular
 | 4     5   6         | Record 2: "apple":"4", "bat":"5", "cog":"6"
 +---------------------+
 
-Markdown tabular (supported for output only):
+Markdown tabular:
 +-----------------------+
 | | apple | bat | cog | |
 | | ---   | --- | --- | |
@@ -441,6 +441,7 @@ are overridden in all cases by setting output format to `format2`.
                          seqgen verb, which is more useful/intuitive.
 --ijson                  Use JSON format for input data.
 --ijsonl                 Use JSON Lines format for input data.
+--imd or --imarkdown     Use markdown-tabular format for input data.
 --inidx                  Use NIDX format for input data.
 --io {format name}       Use format name for input and output data. For
                          example: `--io csv` is the same as `--csv`.
@@ -458,7 +459,7 @@ are overridden in all cases by setting output format to `format2`.
 --odkvp                  Use DKVP format for output data.
 --ojson                  Use JSON format for output data.
 --ojsonl                 Use JSON Lines format for output data.
---omd                    Use markdown-tabular format for output data.
+--omd or --omarkdown     Use markdown-tabular format for output data.
 --onidx                  Use NIDX format for output data.
 --opprint                Use PPRINT format for output data.
 --otsv                   Use TSV format for output data.
@@ -516,19 +517,19 @@ See the Flatten/unflatten doc page for more information.
 .nf
 As keystroke-savers for format-conversion you may use the following.
 The letters c, t, j, l, d, n, x, p, and m refer to formats CSV, TSV, DKVP, NIDX,
-JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively. Note that markdown
-format is available for output only.
+JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively.
 
-| In\eout | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
-+--------+-------+-------+--------+--------+--------+--------+--------+----------+
-| CSV    |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
-| TSV    | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
-| JSON   | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
-| JSONL  | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
-| DKVP   | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
-| NIDX   | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
-| XTAB   | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
-| PPRINT | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
+| In\eout   | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
++----------+-------+-------+--------+--------+--------+--------+--------+--------+----------|
+| CSV      |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
+| TSV      | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
+| JSON     | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
+| JSONL    | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
+| DKVP     | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
+| NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
+| XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
+| PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
+| Markdown | --m2c | --m2t | --m2j  | --m2l  | --m2d  | --m2n  | --m2x  | --m2p  |          |
 
 -p                       Keystroke-saver for `--nidx --fs space --repifs`.
 -T                       Keystroke-saver for `--nidx --fs tab`.
diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go
index aa01c17ce..3cec34dc6 100644
--- a/pkg/cli/option_parse.go
+++ b/pkg/cli/option_parse.go
@@ -762,6 +762,9 @@ var FileFormatFlagSection = FlagSection{
 			parser: func(args []string, argc int, pargi *int, options *TOptions) {
 				CheckArgCount(args, *pargi, argc, 2)
 				options.ReaderOptions.InputFileFormat = args[*pargi+1]
+				if options.ReaderOptions.InputFileFormat == "md" {
+					options.ReaderOptions.InputFileFormat = "markdown" // alias
+				}
 				*pargi += 2
 			},
 		},
@@ -823,6 +826,9 @@ var FileFormatFlagSection = FlagSection{
 			parser: func(args []string, argc int, pargi *int, options *TOptions) {
 				CheckArgCount(args, *pargi, argc, 2)
 				options.WriterOptions.OutputFileFormat = args[*pargi+1]
+				if options.WriterOptions.OutputFileFormat == "md" {
+					options.WriterOptions.OutputFileFormat = "markdown" // alias
+				}
 				*pargi += 2
 			},
 		},
@@ -896,8 +902,19 @@ var FileFormatFlagSection = FlagSection{
 		},
 
 		{
-			name: "--omd",
-			help: "Use markdown-tabular format for output data.",
+			name:     "--imd",
+			altNames: []string{"--imarkdown"},
+			help:     "Use markdown-tabular format for input data.",
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "markdown"
+				*pargi += 1
+			},
+		},
+
+		{
+			name:     "--omd",
+			altNames: []string{"--omarkdown"},
+			help:     "Use markdown-tabular format for output data.",
 			parser: func(args []string, argc int, pargi *int, options *TOptions) {
 				options.WriterOptions.OutputFileFormat = "markdown"
 				*pargi += 1
@@ -1151,19 +1168,19 @@ var FileFormatFlagSection = FlagSection{
 func FormatConversionKeystrokeSaverPrintInfo() {
 	fmt.Println(`As keystroke-savers for format-conversion you may use the following.
 The letters c, t, j, l, d, n, x, p, and m refer to formats CSV, TSV, DKVP, NIDX,
-JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively. Note that markdown
-format is available for output only.
+JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively.
 
-| In\out | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
-+--------+-------+-------+--------+--------+--------+--------+--------+----------+
-| CSV    |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
-| TSV    | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
-| JSON   | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
-| JSONL  | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
-| DKVP   | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
-| NIDX   | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
-| XTAB   | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
-| PPRINT | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |`)
+| In\out   | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
++----------+-------+-------+--------+--------+--------+--------+--------+--------+----------|
+| CSV      |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
+| TSV      | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
+| JSON     | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
+| JSONL    | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
+| DKVP     | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
+| NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
+| XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
+| PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
+| Markdown | --m2c | --m2t | --m2j  | --m2l  | --m2d  | --m2n  | --m2x  | --m2p  |          |`)
 }
 
 func init() { FormatConversionKeystrokeSaverFlagSection.Sort() }
@@ -1288,6 +1305,18 @@ var FormatConversionKeystrokeSaverFlagSection = FlagSection{
 				*pargi += 1
 			},
 		},
+		{
+			name: "--c2m",
+			help: "Use CSV for input, markdown-tabular for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "csv"
+				options.WriterOptions.OutputFileFormat = "markdown"
+				*pargi += 1
+			},
+		},
 		{
 			name: "--c2b",
 			help: "Use CSV for input, PPRINT with `--barred` for output.",
@@ -1408,6 +1437,18 @@ var FormatConversionKeystrokeSaverFlagSection = FlagSection{
 				*pargi += 1
 			},
 		},
+		{
+			name: "--t2m",
+			help: "Use TSV for input, markdown tabular for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "tsv"
+				options.WriterOptions.OutputFileFormat = "markdown"
+				*pargi += 1
+			},
+		},
 		{
 			name: "--t2b",
 			help: "Use TSV for input, PPRINT with `--barred` for output.",
@@ -1527,6 +1568,18 @@ var FormatConversionKeystrokeSaverFlagSection = FlagSection{
 				*pargi += 1
 			},
 		},
+		{
+			name: "--d2m",
+			help: "Use DKVP for input, markdown tabular for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "dkvp"
+				options.WriterOptions.OutputFileFormat = "markdown"
+				*pargi += 1
+			},
+		},
 		{
 			name: "--d2b",
 			help: "Use DKVP for input, PPRINT with `--barred` for output.",
@@ -1642,6 +1695,18 @@ var FormatConversionKeystrokeSaverFlagSection = FlagSection{
 				*pargi += 1
 			},
 		},
+		{
+			name: "--n2m",
+			help: "Use NIDX for input, markdown tabular for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "nidx"
+				options.WriterOptions.OutputFileFormat = "markdown"
+				*pargi += 1
+			},
+		},
 		{
 			name: "--n2b",
 			help: "Use NIDX for input, PPRINT with `--barred` for output.",
@@ -1756,6 +1821,18 @@ var FormatConversionKeystrokeSaverFlagSection = FlagSection{
 				*pargi += 1
 			},
 		},
+		{
+			name: "--j2m",
+			help: "Use JSON for input, markdown-tabular for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "json"
+				options.WriterOptions.OutputFileFormat = "markdown"
+				*pargi += 1
+			},
+		},
 		{
 			name: "--j2b",
 			help: "Use JSON for input, PPRINT with --barred for output.",
@@ -1867,6 +1944,18 @@ var FormatConversionKeystrokeSaverFlagSection = FlagSection{
 				*pargi += 1
 			},
 		},
+		{
+			name: "--l2m",
+			help: "Use JSON Lines for input, markdown-tabular for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "json"
+				options.WriterOptions.OutputFileFormat = "markdown"
+				*pargi += 1
+			},
+		},
 		{
 			name: "--l2b",
 			help: "Use JSON Lines for input, PPRINT with --barred for output.",
@@ -2023,6 +2112,115 @@ var FormatConversionKeystrokeSaverFlagSection = FlagSection{
 			},
 		},
 
+		{
+			name: "--m2c",
+			help: "Use markdown-tabular for input, CSV for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "markdown"
+				options.WriterOptions.OutputFileFormat = "csv"
+				options.ReaderOptions.ifsWasSpecified = true
+				options.WriterOptions.orsWasSpecified = true
+				*pargi += 1
+			},
+		},
+		{
+			name: "--m2t",
+			help: "Use markdown-tabular for input, TSV for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "markdown"
+				options.WriterOptions.OutputFileFormat = "tsv"
+				options.ReaderOptions.ifsWasSpecified = true
+				*pargi += 1
+			},
+		},
+		{
+			name: "--m2d",
+			help: "Use markdown-tabular for input, DKVP for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "markdown"
+				options.WriterOptions.OutputFileFormat = "dkvp"
+				options.ReaderOptions.ifsWasSpecified = true
+				*pargi += 1
+			},
+		},
+		{
+			name: "--m2n",
+			help: "Use markdown-tabular for input, NIDX for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "markdown"
+				options.WriterOptions.OutputFileFormat = "nidx"
+				options.ReaderOptions.ifsWasSpecified = true
+				*pargi += 1
+			},
+		},
+		{
+			name: "--m2j",
+			help: "Use markdown-tabular for input, JSON for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "markdown"
+				options.WriterOptions.OutputFileFormat = "json"
+				options.WriterOptions.WrapJSONOutputInOuterList = true
+				options.WriterOptions.JSONOutputMultiline = true
+				options.ReaderOptions.ifsWasSpecified = true
+				*pargi += 1
+			},
+		},
+		{
+			name: "--m2l",
+			help: "Use markdown-tabular for input, JSON Lines for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "markdown"
+				options.WriterOptions.OutputFileFormat = "json"
+				options.WriterOptions.WrapJSONOutputInOuterList = false
+				options.WriterOptions.JSONOutputMultiline = false
+				options.ReaderOptions.ifsWasSpecified = true
+				*pargi += 1
+			},
+		},
+		{
+			name: "--m2x",
+			help: "Use markdown-tabular for input, XTAB for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "markdown"
+				options.WriterOptions.OutputFileFormat = "xtab"
+				options.ReaderOptions.ifsWasSpecified = true
+				*pargi += 1
+			},
+		},
+		{
+			name: "--m2p",
+			help: "Use markdown-tabular for input, PPRINT for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "markdown"
+				options.WriterOptions.OutputFileFormat = "pprint"
+				*pargi += 1
+			},
+		},
+
 		{
 			name: "--x2c",
 			help: "Use XTAB for input, CSV for output.",
@@ -2112,6 +2310,18 @@ var FormatConversionKeystrokeSaverFlagSection = FlagSection{
 				*pargi += 1
 			},
 		},
+		{
+			name: "--x2m",
+			help: "Use XTAB for input, markdown-tabular for output.",
+			// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+			// need to print a tedious 60-line list.
+			suppressFlagEnumeration: true,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.InputFileFormat = "xtab"
+				options.WriterOptions.OutputFileFormat = "markdown"
+				*pargi += 1
+			},
+		},
 		{
 			name: "--x2b",
 			help: "Use XTAB for input, PPRINT with `--barred` for output.",
diff --git a/pkg/input/record_reader_factory.go b/pkg/input/record_reader_factory.go
index e8328fd51..26d2f81ed 100644
--- a/pkg/input/record_reader_factory.go
+++ b/pkg/input/record_reader_factory.go
@@ -18,6 +18,10 @@ func Create(readerOptions *cli.TReaderOptions, recordsPerBatch int64) (IRecordRe
 		return NewRecordReaderJSON(readerOptions, recordsPerBatch)
 	case "nidx":
 		return NewRecordReaderNIDX(readerOptions, recordsPerBatch)
+	case "md":
+		return NewRecordReaderMarkdown(readerOptions, recordsPerBatch)
+	case "markdown":
+		return NewRecordReaderMarkdown(readerOptions, recordsPerBatch)
 	case "pprint":
 		return NewRecordReaderPPRINT(readerOptions, recordsPerBatch)
 	case "tsv":
diff --git a/pkg/input/record_reader_markdown.go b/pkg/input/record_reader_markdown.go
new file mode 100644
index 000000000..22cc3a078
--- /dev/null
+++ b/pkg/input/record_reader_markdown.go
@@ -0,0 +1,30 @@
+package input
+
+import (
+	"regexp"
+
+	"github.com/johnkerl/miller/pkg/cli"
+)
+
+func NewRecordReaderMarkdown(
+	readerOptions *cli.TReaderOptions,
+	recordsPerBatch int64,
+) (IRecordReader, error) {
+
+	readerOptions.IFS = "|"
+	readerOptions.AllowRepeatIFS = false
+
+	reader := &RecordReaderPprintBarredOrMarkdown{
+		readerOptions:    readerOptions,
+		recordsPerBatch:  recordsPerBatch,
+		separatorMatcher: regexp.MustCompile(`^\|[-\| ]+\|$`),
+		fieldSplitter:    newFieldSplitter(readerOptions),
+	}
+	if reader.readerOptions.UseImplicitHeader {
+		reader.recordBatchGetter = getRecordBatchImplicitPprintHeader
+	} else {
+		reader.recordBatchGetter = getRecordBatchExplicitPprintHeader
+	}
+	return reader, nil
+
+}
diff --git a/pkg/input/record_reader_pprint.go b/pkg/input/record_reader_pprint.go
index 8adad8756..7495a8d80 100644
--- a/pkg/input/record_reader_pprint.go
+++ b/pkg/input/record_reader_pprint.go
@@ -1,23 +1,5 @@
 package input
 
-// Multi-file cases:
-//
-// a,a        a,b        c          d
-// -- FILE1:  -- FILE1:  -- FILE1:  -- FILE1:
-// a,b,c      a,b,c      a,b,c      a,b,c
-// 1,2,3      1,2,3      1,2,3      1,2,3
-// 4,5,6      4,5,6      4,5,6      4,5,6
-// -- FILE2:  -- FILE2:
-// a,b,c      d,e,f,g    a,b,c      d,e,f
-// 7,8,9      3,4,5,6    7,8,9      3,4,5
-// --OUTPUT:  --OUTPUT:  --OUTPUT:  --OUTPUT:
-// a,b,c      a,b,c      a,b,c      a,b,c
-// 1,2,3      1,2,3      1,2,3      1,2,3
-// 4,5,6      4,5,6      4,5,6      4,5,6
-// 7,8,9                 7,8,9
-//            d,e,f,g               d,e,f
-//            3,4,5,6               3,4,5
-
 import (
 	"container/list"
 	"fmt"
@@ -39,14 +21,13 @@ func NewRecordReaderPPRINT(
 	if readerOptions.BarredPprintInput {
 		// Implemented in this file
 
-		// XXX TEMP
 		readerOptions.IFS = "|"
 		readerOptions.AllowRepeatIFS = false
 
-		reader := &RecordReaderPprintBarred{
+		reader := &RecordReaderPprintBarredOrMarkdown{
 			readerOptions:    readerOptions,
 			recordsPerBatch:  recordsPerBatch,
-			separatorMatcher: regexp.MustCompile(`^\+[-+]*\+`),
+			separatorMatcher: regexp.MustCompile(`^\+[-+]*\+$`),
 			fieldSplitter:    newFieldSplitter(readerOptions),
 		}
 		if reader.readerOptions.UseImplicitHeader {
@@ -76,7 +57,7 @@ func NewRecordReaderPPRINT(
 	}
 }
 
-type RecordReaderPprintBarred struct {
+type RecordReaderPprintBarredOrMarkdown struct {
 	readerOptions   *cli.TReaderOptions
 	recordsPerBatch int64 // distinct from readerOptions.RecordsPerBatch for join/repl
 
@@ -91,7 +72,7 @@ type RecordReaderPprintBarred struct {
 // recordBatchGetterPprint points to either an explicit-PPRINT-header or
 // implicit-PPRINT-header record-batch getter.
 type recordBatchGetterPprint func(
-	reader *RecordReaderPprintBarred,
+	reader *RecordReaderPprintBarredOrMarkdown,
 	linesChannel <-chan *list.List,
 	filename string,
 	context *types.Context,
@@ -101,7 +82,7 @@ type recordBatchGetterPprint func(
 	eof bool,
 )
 
-func (reader *RecordReaderPprintBarred) Read(
+func (reader *RecordReaderPprintBarredOrMarkdown) Read(
 	filenames []string,
 	context types.Context,
 	readerChannel chan<- *list.List, // list of *types.RecordAndContext
@@ -154,7 +135,7 @@ func (reader *RecordReaderPprintBarred) Read(
 	readerChannel <- types.NewEndOfStreamMarkerList(&context)
 }
 
-func (reader *RecordReaderPprintBarred) processHandle(
+func (reader *RecordReaderPprintBarredOrMarkdown) processHandle(
 	handle io.Reader,
 	filename string,
 	context *types.Context,
@@ -183,7 +164,7 @@ func (reader *RecordReaderPprintBarred) processHandle(
 }
 
 func getRecordBatchExplicitPprintHeader(
-	reader *RecordReaderPprintBarred,
+	reader *RecordReaderPprintBarredOrMarkdown,
 	linesChannel <-chan *list.List,
 	filename string,
 	context *types.Context,
@@ -242,6 +223,9 @@ func getRecordBatchExplicitPprintHeader(
 		// Skip the leading and trailing pipes
 		paddedFields := reader.fieldSplitter.Split(line)
 		npad := len(paddedFields)
+		if npad < 2 {
+			continue
+		}
 		fields := make([]string, npad-2)
 		for i, _ := range paddedFields {
 			if i == 0 || i == npad-1 {
@@ -318,7 +302,7 @@ func getRecordBatchExplicitPprintHeader(
 }
 
 func getRecordBatchImplicitPprintHeader(
-	reader *RecordReaderPprintBarred,
+	reader *RecordReaderPprintBarredOrMarkdown,
 	linesChannel <-chan *list.List,
 	filename string,
 	context *types.Context,
diff --git a/pkg/output/record_writer_factory.go b/pkg/output/record_writer_factory.go
index b706f21bd..ae7941490 100644
--- a/pkg/output/record_writer_factory.go
+++ b/pkg/output/record_writer_factory.go
@@ -16,6 +16,8 @@ func Create(writerOptions *cli.TWriterOptions) (IRecordWriter, error) {
 		return NewRecordWriterDKVP(writerOptions)
 	case "json":
 		return NewRecordWriterJSON(writerOptions)
+	case "md":
+		return NewRecordWriterMarkdown(writerOptions)
 	case "markdown":
 		return NewRecordWriterMarkdown(writerOptions)
 	case "nidx":
diff --git a/pkg/terminals/help/entry.go b/pkg/terminals/help/entry.go
index a9148c385..4666adfa9 100644
--- a/pkg/terminals/help/entry.go
+++ b/pkg/terminals/help/entry.go
@@ -408,7 +408,7 @@ PPRINT: pretty-printed tabular
 | 4     5   6         | Record 2: "apple":"4", "bat":"5", "cog":"6"
 +---------------------+
 
-Markdown tabular (supported for output only):
+Markdown tabular:
 +-----------------------+
 | | apple | bat | cog | |
 | | ---   | --- | --- | |
diff --git a/test/cases/io-format-conversion-keystroke-savers/0049/cmd b/test/cases/io-format-conversion-keystroke-savers/c/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0049/cmd
rename to test/cases/io-format-conversion-keystroke-savers/c/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0001/experr b/test/cases/io-format-conversion-keystroke-savers/c/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0001/experr
rename to test/cases/io-format-conversion-keystroke-savers/c/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0014/expout b/test/cases/io-format-conversion-keystroke-savers/c/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0014/expout
rename to test/cases/io-format-conversion-keystroke-savers/c/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0008/cmd b/test/cases/io-format-conversion-keystroke-savers/c2d/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0008/cmd
rename to test/cases/io-format-conversion-keystroke-savers/c2d/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0002/experr b/test/cases/io-format-conversion-keystroke-savers/c2d/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0002/experr
rename to test/cases/io-format-conversion-keystroke-savers/c2d/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0008/expout b/test/cases/io-format-conversion-keystroke-savers/c2d/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0008/expout
rename to test/cases/io-format-conversion-keystroke-savers/c2d/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0010/cmd b/test/cases/io-format-conversion-keystroke-savers/c2j/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0010/cmd
rename to test/cases/io-format-conversion-keystroke-savers/c2j/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0003/experr b/test/cases/io-format-conversion-keystroke-savers/c2j/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0003/experr
rename to test/cases/io-format-conversion-keystroke-savers/c2j/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0003/expout b/test/cases/io-format-conversion-keystroke-savers/c2j/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0003/expout
rename to test/cases/io-format-conversion-keystroke-savers/c2j/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0011/cmd b/test/cases/io-format-conversion-keystroke-savers/c2m/c2p/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0011/cmd
rename to test/cases/io-format-conversion-keystroke-savers/c2m/c2p/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0004/experr b/test/cases/io-format-conversion-keystroke-savers/c2m/c2p/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0004/experr
rename to test/cases/io-format-conversion-keystroke-savers/c2m/c2p/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0004/expout b/test/cases/io-format-conversion-keystroke-savers/c2m/c2p/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0004/expout
rename to test/cases/io-format-conversion-keystroke-savers/c2m/c2p/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0013/cmd b/test/cases/io-format-conversion-keystroke-savers/c2m/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0013/cmd
rename to test/cases/io-format-conversion-keystroke-savers/c2m/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0005/experr b/test/cases/io-format-conversion-keystroke-savers/c2m/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0005/experr
rename to test/cases/io-format-conversion-keystroke-savers/c2m/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0006/expout b/test/cases/io-format-conversion-keystroke-savers/c2m/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0006/expout
rename to test/cases/io-format-conversion-keystroke-savers/c2m/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0009/cmd b/test/cases/io-format-conversion-keystroke-savers/c2n/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0009/cmd
rename to test/cases/io-format-conversion-keystroke-savers/c2n/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0006/experr b/test/cases/io-format-conversion-keystroke-savers/c2n/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0006/experr
rename to test/cases/io-format-conversion-keystroke-savers/c2n/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0002/expout b/test/cases/io-format-conversion-keystroke-savers/c2n/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0002/expout
rename to test/cases/io-format-conversion-keystroke-savers/c2n/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/c2p/cmd b/test/cases/io-format-conversion-keystroke-savers/c2p/cmd
new file mode 100644
index 000000000..8779b448d
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/c2p/cmd
@@ -0,0 +1 @@
+mlr --c2p cat test/input/abixy.csv
diff --git a/test/cases/io-format-conversion-keystroke-savers/0007/experr b/test/cases/io-format-conversion-keystroke-savers/c2p/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0007/experr
rename to test/cases/io-format-conversion-keystroke-savers/c2p/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0011/expout b/test/cases/io-format-conversion-keystroke-savers/c2p/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0011/expout
rename to test/cases/io-format-conversion-keystroke-savers/c2p/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0007/cmd b/test/cases/io-format-conversion-keystroke-savers/c2t/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0007/cmd
rename to test/cases/io-format-conversion-keystroke-savers/c2t/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0008/experr b/test/cases/io-format-conversion-keystroke-savers/c2t/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0008/experr
rename to test/cases/io-format-conversion-keystroke-savers/c2t/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0001/expout b/test/cases/io-format-conversion-keystroke-savers/c2t/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0001/expout
rename to test/cases/io-format-conversion-keystroke-savers/c2t/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0012/cmd b/test/cases/io-format-conversion-keystroke-savers/c2x/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0012/cmd
rename to test/cases/io-format-conversion-keystroke-savers/c2x/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0009/experr b/test/cases/io-format-conversion-keystroke-savers/c2x/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0009/experr
rename to test/cases/io-format-conversion-keystroke-savers/c2x/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0005/expout b/test/cases/io-format-conversion-keystroke-savers/c2x/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0005/expout
rename to test/cases/io-format-conversion-keystroke-savers/c2x/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0003/cmd b/test/cases/io-format-conversion-keystroke-savers/d2j/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0003/cmd
rename to test/cases/io-format-conversion-keystroke-savers/d2j/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0010/experr b/test/cases/io-format-conversion-keystroke-savers/d2j/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0010/experr
rename to test/cases/io-format-conversion-keystroke-savers/d2j/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0010/expout b/test/cases/io-format-conversion-keystroke-savers/d2j/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0010/expout
rename to test/cases/io-format-conversion-keystroke-savers/d2j/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0006/cmd b/test/cases/io-format-conversion-keystroke-savers/d2m/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0006/cmd
rename to test/cases/io-format-conversion-keystroke-savers/d2m/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0004/cmd b/test/cases/io-format-conversion-keystroke-savers/d2m/d2p/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0004/cmd
rename to test/cases/io-format-conversion-keystroke-savers/d2m/d2p/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0011/experr b/test/cases/io-format-conversion-keystroke-savers/d2m/d2p/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0011/experr
rename to test/cases/io-format-conversion-keystroke-savers/d2m/d2p/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0018/expout b/test/cases/io-format-conversion-keystroke-savers/d2m/d2p/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0018/expout
rename to test/cases/io-format-conversion-keystroke-savers/d2m/d2p/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0012/experr b/test/cases/io-format-conversion-keystroke-savers/d2m/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0012/experr
rename to test/cases/io-format-conversion-keystroke-savers/d2m/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0013/expout b/test/cases/io-format-conversion-keystroke-savers/d2m/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0013/expout
rename to test/cases/io-format-conversion-keystroke-savers/d2m/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0002/cmd b/test/cases/io-format-conversion-keystroke-savers/d2n/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0002/cmd
rename to test/cases/io-format-conversion-keystroke-savers/d2n/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0013/experr b/test/cases/io-format-conversion-keystroke-savers/d2n/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0013/experr
rename to test/cases/io-format-conversion-keystroke-savers/d2n/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0009/expout b/test/cases/io-format-conversion-keystroke-savers/d2n/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0009/expout
rename to test/cases/io-format-conversion-keystroke-savers/d2n/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/d2p/cmd b/test/cases/io-format-conversion-keystroke-savers/d2p/cmd
new file mode 100644
index 000000000..7ee7ebd21
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/d2p/cmd
@@ -0,0 +1 @@
+mlr --d2p cat test/input/abixy.dkvp
diff --git a/test/cases/io-format-conversion-keystroke-savers/0014/experr b/test/cases/io-format-conversion-keystroke-savers/d2p/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0014/experr
rename to test/cases/io-format-conversion-keystroke-savers/d2p/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0032/expout b/test/cases/io-format-conversion-keystroke-savers/d2p/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0032/expout
rename to test/cases/io-format-conversion-keystroke-savers/d2p/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0001/cmd b/test/cases/io-format-conversion-keystroke-savers/d2t/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0001/cmd
rename to test/cases/io-format-conversion-keystroke-savers/d2t/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0015/experr b/test/cases/io-format-conversion-keystroke-savers/d2t/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0015/experr
rename to test/cases/io-format-conversion-keystroke-savers/d2t/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0007/expout b/test/cases/io-format-conversion-keystroke-savers/d2t/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0007/expout
rename to test/cases/io-format-conversion-keystroke-savers/d2t/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0005/cmd b/test/cases/io-format-conversion-keystroke-savers/d2x/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0005/cmd
rename to test/cases/io-format-conversion-keystroke-savers/d2x/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0016/experr b/test/cases/io-format-conversion-keystroke-savers/d2x/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0016/experr
rename to test/cases/io-format-conversion-keystroke-savers/d2x/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0012/expout b/test/cases/io-format-conversion-keystroke-savers/d2x/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0012/expout
rename to test/cases/io-format-conversion-keystroke-savers/d2x/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0053/cmd b/test/cases/io-format-conversion-keystroke-savers/itsv-odkvp/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0053/cmd
rename to test/cases/io-format-conversion-keystroke-savers/itsv-odkvp/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0017/experr b/test/cases/io-format-conversion-keystroke-savers/itsv-odkvp/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0017/experr
rename to test/cases/io-format-conversion-keystroke-savers/itsv-odkvp/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0053/expout b/test/cases/io-format-conversion-keystroke-savers/itsv-odkvp/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0053/expout
rename to test/cases/io-format-conversion-keystroke-savers/itsv-odkvp/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0052/input.tsv b/test/cases/io-format-conversion-keystroke-savers/itsv-odkvp/input.tsv
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0052/input.tsv
rename to test/cases/io-format-conversion-keystroke-savers/itsv-odkvp/input.tsv
diff --git a/test/cases/io-format-conversion-keystroke-savers/0052/cmd b/test/cases/io-format-conversion-keystroke-savers/itsvlite-odkvp/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0052/cmd
rename to test/cases/io-format-conversion-keystroke-savers/itsvlite-odkvp/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0018/experr b/test/cases/io-format-conversion-keystroke-savers/itsvlite-odkvp/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0018/experr
rename to test/cases/io-format-conversion-keystroke-savers/itsvlite-odkvp/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0052/expout b/test/cases/io-format-conversion-keystroke-savers/itsvlite-odkvp/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0052/expout
rename to test/cases/io-format-conversion-keystroke-savers/itsvlite-odkvp/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0053/input.tsv b/test/cases/io-format-conversion-keystroke-savers/itsvlite-odkvp/input.tsv
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0053/input.tsv
rename to test/cases/io-format-conversion-keystroke-savers/itsvlite-odkvp/input.tsv
diff --git a/test/cases/io-format-conversion-keystroke-savers/0051/cmd b/test/cases/io-format-conversion-keystroke-savers/j/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0051/cmd
rename to test/cases/io-format-conversion-keystroke-savers/j/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0019/experr b/test/cases/io-format-conversion-keystroke-savers/j/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0019/experr
rename to test/cases/io-format-conversion-keystroke-savers/j/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0017/expout b/test/cases/io-format-conversion-keystroke-savers/j/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0017/expout
rename to test/cases/io-format-conversion-keystroke-savers/j/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0028/cmd b/test/cases/io-format-conversion-keystroke-savers/j2c/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0028/cmd
rename to test/cases/io-format-conversion-keystroke-savers/j2c/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0020/experr b/test/cases/io-format-conversion-keystroke-savers/j2c/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0020/experr
rename to test/cases/io-format-conversion-keystroke-savers/j2c/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0028/expout b/test/cases/io-format-conversion-keystroke-savers/j2c/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0028/expout
rename to test/cases/io-format-conversion-keystroke-savers/j2c/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0030/cmd b/test/cases/io-format-conversion-keystroke-savers/j2d/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0030/cmd
rename to test/cases/io-format-conversion-keystroke-savers/j2d/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0021/experr b/test/cases/io-format-conversion-keystroke-savers/j2d/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0021/experr
rename to test/cases/io-format-conversion-keystroke-savers/j2d/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0015/expout b/test/cases/io-format-conversion-keystroke-savers/j2d/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0015/expout
rename to test/cases/io-format-conversion-keystroke-savers/j2d/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0034/cmd b/test/cases/io-format-conversion-keystroke-savers/j2m/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0034/cmd
rename to test/cases/io-format-conversion-keystroke-savers/j2m/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0022/experr b/test/cases/io-format-conversion-keystroke-savers/j2m/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0022/experr
rename to test/cases/io-format-conversion-keystroke-savers/j2m/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0020/expout b/test/cases/io-format-conversion-keystroke-savers/j2m/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0020/expout
rename to test/cases/io-format-conversion-keystroke-savers/j2m/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0032/cmd b/test/cases/io-format-conversion-keystroke-savers/j2m/j2p/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0032/cmd
rename to test/cases/io-format-conversion-keystroke-savers/j2m/j2p/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0023/experr b/test/cases/io-format-conversion-keystroke-savers/j2m/j2p/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0023/experr
rename to test/cases/io-format-conversion-keystroke-savers/j2m/j2p/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0047/expout b/test/cases/io-format-conversion-keystroke-savers/j2m/j2p/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0047/expout
rename to test/cases/io-format-conversion-keystroke-savers/j2m/j2p/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0031/cmd b/test/cases/io-format-conversion-keystroke-savers/j2n/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0031/cmd
rename to test/cases/io-format-conversion-keystroke-savers/j2n/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0024/experr b/test/cases/io-format-conversion-keystroke-savers/j2n/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0024/experr
rename to test/cases/io-format-conversion-keystroke-savers/j2n/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0016/expout b/test/cases/io-format-conversion-keystroke-savers/j2n/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0016/expout
rename to test/cases/io-format-conversion-keystroke-savers/j2n/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/j2p/cmd b/test/cases/io-format-conversion-keystroke-savers/j2p/cmd
new file mode 100644
index 000000000..1c11e7e36
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/j2p/cmd
@@ -0,0 +1 @@
+mlr --j2p cat test/input/abixy.json
diff --git a/test/cases/io-format-conversion-keystroke-savers/0025/experr b/test/cases/io-format-conversion-keystroke-savers/j2p/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0025/experr
rename to test/cases/io-format-conversion-keystroke-savers/j2p/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/j2p/expout b/test/cases/io-format-conversion-keystroke-savers/j2p/expout
new file mode 100644
index 000000000..b8ac13481
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/j2p/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+pan pan 1  0.34679014 0.72680286
+eks pan 2  0.75867996 0.52215111
+wye wye 3  0.20460331 0.33831853
+eks wye 4  0.38139939 0.13418874
+wye pan 5  0.57328892 0.86362447
+zee pan 6  0.52712616 0.49322129
+eks zee 7  0.61178406 0.18788492
+zee wye 8  0.59855401 0.97618139
+hat wye 9  0.03144188 0.74955076
+pan wye 10 0.50262601 0.95261836
diff --git a/test/cases/io-format-conversion-keystroke-savers/0029/cmd b/test/cases/io-format-conversion-keystroke-savers/j2t/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0029/cmd
rename to test/cases/io-format-conversion-keystroke-savers/j2t/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0026/experr b/test/cases/io-format-conversion-keystroke-savers/j2t/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0026/experr
rename to test/cases/io-format-conversion-keystroke-savers/j2t/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0029/expout b/test/cases/io-format-conversion-keystroke-savers/j2t/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0029/expout
rename to test/cases/io-format-conversion-keystroke-savers/j2t/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0033/cmd b/test/cases/io-format-conversion-keystroke-savers/j2x/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0033/cmd
rename to test/cases/io-format-conversion-keystroke-savers/j2x/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0027/experr b/test/cases/io-format-conversion-keystroke-savers/j2x/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0027/experr
rename to test/cases/io-format-conversion-keystroke-savers/j2x/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0019/expout b/test/cases/io-format-conversion-keystroke-savers/j2x/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0019/expout
rename to test/cases/io-format-conversion-keystroke-savers/j2x/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/l2m/cmd b/test/cases/io-format-conversion-keystroke-savers/l2m/cmd
new file mode 100644
index 000000000..462e4a9a2
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/l2m/cmd
@@ -0,0 +1 @@
+mlr --l2m cat test/input/abixy.json
diff --git a/test/cases/io-format-conversion-keystroke-savers/0028/experr b/test/cases/io-format-conversion-keystroke-savers/l2m/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0028/experr
rename to test/cases/io-format-conversion-keystroke-savers/l2m/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0034/expout b/test/cases/io-format-conversion-keystroke-savers/l2m/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0034/expout
rename to test/cases/io-format-conversion-keystroke-savers/l2m/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/m2c/cmd b/test/cases/io-format-conversion-keystroke-savers/m2c/cmd
new file mode 100644
index 000000000..029dc93d3
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/m2c/cmd
@@ -0,0 +1 @@
+mlr --m2c cat test/input/abixy.md
diff --git a/test/cases/io-format-conversion-keystroke-savers/0029/experr b/test/cases/io-format-conversion-keystroke-savers/m2c/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0029/experr
rename to test/cases/io-format-conversion-keystroke-savers/m2c/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0035/expout b/test/cases/io-format-conversion-keystroke-savers/m2c/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0035/expout
rename to test/cases/io-format-conversion-keystroke-savers/m2c/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/m2d/cmd b/test/cases/io-format-conversion-keystroke-savers/m2d/cmd
new file mode 100644
index 000000000..9619267ee
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/m2d/cmd
@@ -0,0 +1 @@
+mlr --m2d cat test/input/abixy.md
diff --git a/test/cases/io-format-conversion-keystroke-savers/0030/experr b/test/cases/io-format-conversion-keystroke-savers/m2d/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0030/experr
rename to test/cases/io-format-conversion-keystroke-savers/m2d/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0030/expout b/test/cases/io-format-conversion-keystroke-savers/m2d/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0030/expout
rename to test/cases/io-format-conversion-keystroke-savers/m2d/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/m2j/cmd b/test/cases/io-format-conversion-keystroke-savers/m2j/cmd
new file mode 100644
index 000000000..ae6a63c33
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/m2j/cmd
@@ -0,0 +1 @@
+mlr --m2j cat test/input/abixy.md
diff --git a/test/cases/io-format-conversion-keystroke-savers/0031/experr b/test/cases/io-format-conversion-keystroke-savers/m2j/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0031/experr
rename to test/cases/io-format-conversion-keystroke-savers/m2j/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0039/expout b/test/cases/io-format-conversion-keystroke-savers/m2j/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0039/expout
rename to test/cases/io-format-conversion-keystroke-savers/m2j/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/m2l/cmd b/test/cases/io-format-conversion-keystroke-savers/m2l/cmd
new file mode 100644
index 000000000..bf8cea46c
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/m2l/cmd
@@ -0,0 +1 @@
+mlr --m2l cat test/input/abixy.md
diff --git a/test/cases/io-format-conversion-keystroke-savers/0032/experr b/test/cases/io-format-conversion-keystroke-savers/m2l/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0032/experr
rename to test/cases/io-format-conversion-keystroke-savers/m2l/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/m2l/expout b/test/cases/io-format-conversion-keystroke-savers/m2l/expout
new file mode 100644
index 000000000..5a5818e0f
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/m2l/expout
@@ -0,0 +1,10 @@
+{"a": "pan", "b": "pan", "i": 1, "x": 0.34679014, "y": 0.72680286}
+{"a": "eks", "b": "pan", "i": 2, "x": 0.75867996, "y": 0.52215111}
+{"a": "wye", "b": "wye", "i": 3, "x": 0.20460331, "y": 0.33831853}
+{"a": "eks", "b": "wye", "i": 4, "x": 0.38139939, "y": 0.13418874}
+{"a": "wye", "b": "pan", "i": 5, "x": 0.57328892, "y": 0.86362447}
+{"a": "zee", "b": "pan", "i": 6, "x": 0.52712616, "y": 0.49322129}
+{"a": "eks", "b": "zee", "i": 7, "x": 0.61178406, "y": 0.18788492}
+{"a": "zee", "b": "wye", "i": 8, "x": 0.59855401, "y": 0.97618139}
+{"a": "hat", "b": "wye", "i": 9, "x": 0.03144188, "y": 0.74955076}
+{"a": "pan", "b": "wye", "i": 10, "x": 0.50262601, "y": 0.95261836}
diff --git a/test/cases/io-format-conversion-keystroke-savers/m2n/cmd b/test/cases/io-format-conversion-keystroke-savers/m2n/cmd
new file mode 100644
index 000000000..ba7179b2a
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/m2n/cmd
@@ -0,0 +1 @@
+mlr --m2n cat test/input/abixy.md
diff --git a/test/cases/io-format-conversion-keystroke-savers/0033/experr b/test/cases/io-format-conversion-keystroke-savers/m2n/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0033/experr
rename to test/cases/io-format-conversion-keystroke-savers/m2n/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0031/expout b/test/cases/io-format-conversion-keystroke-savers/m2n/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0031/expout
rename to test/cases/io-format-conversion-keystroke-savers/m2n/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/m2p/cmd b/test/cases/io-format-conversion-keystroke-savers/m2p/cmd
new file mode 100644
index 000000000..5dfd5e425
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/m2p/cmd
@@ -0,0 +1 @@
+mlr --m2p cat test/input/abixy.md
diff --git a/test/cases/io-format-conversion-keystroke-savers/0034/experr b/test/cases/io-format-conversion-keystroke-savers/m2p/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0034/experr
rename to test/cases/io-format-conversion-keystroke-savers/m2p/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/m2p/expout b/test/cases/io-format-conversion-keystroke-savers/m2p/expout
new file mode 100644
index 000000000..b8ac13481
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/m2p/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+pan pan 1  0.34679014 0.72680286
+eks pan 2  0.75867996 0.52215111
+wye wye 3  0.20460331 0.33831853
+eks wye 4  0.38139939 0.13418874
+wye pan 5  0.57328892 0.86362447
+zee pan 6  0.52712616 0.49322129
+eks zee 7  0.61178406 0.18788492
+zee wye 8  0.59855401 0.97618139
+hat wye 9  0.03144188 0.74955076
+pan wye 10 0.50262601 0.95261836
diff --git a/test/cases/io-format-conversion-keystroke-savers/m2t/cmd b/test/cases/io-format-conversion-keystroke-savers/m2t/cmd
new file mode 100644
index 000000000..b24a15801
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/m2t/cmd
@@ -0,0 +1 @@
+mlr --m2t cat test/input/abixy.md
diff --git a/test/cases/io-format-conversion-keystroke-savers/0035/experr b/test/cases/io-format-conversion-keystroke-savers/m2t/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0035/experr
rename to test/cases/io-format-conversion-keystroke-savers/m2t/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0036/expout b/test/cases/io-format-conversion-keystroke-savers/m2t/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0036/expout
rename to test/cases/io-format-conversion-keystroke-savers/m2t/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/m2x/cmd b/test/cases/io-format-conversion-keystroke-savers/m2x/cmd
new file mode 100644
index 000000000..dfff2ec6b
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/m2x/cmd
@@ -0,0 +1 @@
+mlr --m2x cat test/input/abixy.md
diff --git a/test/cases/io-format-conversion-keystroke-savers/0036/experr b/test/cases/io-format-conversion-keystroke-savers/m2x/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0036/experr
rename to test/cases/io-format-conversion-keystroke-savers/m2x/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0033/expout b/test/cases/io-format-conversion-keystroke-savers/m2x/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0033/expout
rename to test/cases/io-format-conversion-keystroke-savers/m2x/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0021/cmd b/test/cases/io-format-conversion-keystroke-savers/n2c/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0021/cmd
rename to test/cases/io-format-conversion-keystroke-savers/n2c/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0037/experr b/test/cases/io-format-conversion-keystroke-savers/n2c/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0037/experr
rename to test/cases/io-format-conversion-keystroke-savers/n2c/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0021/expout b/test/cases/io-format-conversion-keystroke-savers/n2c/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0021/expout
rename to test/cases/io-format-conversion-keystroke-savers/n2c/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0023/cmd b/test/cases/io-format-conversion-keystroke-savers/n2d/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0023/cmd
rename to test/cases/io-format-conversion-keystroke-savers/n2d/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0038/experr b/test/cases/io-format-conversion-keystroke-savers/n2d/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0038/experr
rename to test/cases/io-format-conversion-keystroke-savers/n2d/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0023/expout b/test/cases/io-format-conversion-keystroke-savers/n2d/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0023/expout
rename to test/cases/io-format-conversion-keystroke-savers/n2d/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0024/cmd b/test/cases/io-format-conversion-keystroke-savers/n2j/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0024/cmd
rename to test/cases/io-format-conversion-keystroke-savers/n2j/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0039/experr b/test/cases/io-format-conversion-keystroke-savers/n2j/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0039/experr
rename to test/cases/io-format-conversion-keystroke-savers/n2j/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0024/expout b/test/cases/io-format-conversion-keystroke-savers/n2j/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0024/expout
rename to test/cases/io-format-conversion-keystroke-savers/n2j/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0027/cmd b/test/cases/io-format-conversion-keystroke-savers/n2m/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0027/cmd
rename to test/cases/io-format-conversion-keystroke-savers/n2m/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0040/experr b/test/cases/io-format-conversion-keystroke-savers/n2m/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0040/experr
rename to test/cases/io-format-conversion-keystroke-savers/n2m/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0027/expout b/test/cases/io-format-conversion-keystroke-savers/n2m/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0027/expout
rename to test/cases/io-format-conversion-keystroke-savers/n2m/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0025/cmd b/test/cases/io-format-conversion-keystroke-savers/n2m/n2p/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0025/cmd
rename to test/cases/io-format-conversion-keystroke-savers/n2m/n2p/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0041/experr b/test/cases/io-format-conversion-keystroke-savers/n2m/n2p/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0041/experr
rename to test/cases/io-format-conversion-keystroke-savers/n2m/n2p/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0025/expout b/test/cases/io-format-conversion-keystroke-savers/n2m/n2p/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0025/expout
rename to test/cases/io-format-conversion-keystroke-savers/n2m/n2p/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/n2p/cmd b/test/cases/io-format-conversion-keystroke-savers/n2p/cmd
new file mode 100644
index 000000000..d1e5973fb
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/n2p/cmd
@@ -0,0 +1 @@
+mlr --n2p cat test/input/abixy.nidx
diff --git a/test/cases/io-format-conversion-keystroke-savers/0042/experr b/test/cases/io-format-conversion-keystroke-savers/n2p/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0042/experr
rename to test/cases/io-format-conversion-keystroke-savers/n2p/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/n2p/expout b/test/cases/io-format-conversion-keystroke-savers/n2p/expout
new file mode 100644
index 000000000..4deb25a0b
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/n2p/expout
@@ -0,0 +1,11 @@
+1   2   3  4          5
+pan pan 1  0.34679014 0.72680286
+eks pan 2  0.75867996 0.52215111
+wye wye 3  0.20460331 0.33831853
+eks wye 4  0.38139939 0.13418874
+wye pan 5  0.57328892 0.86362447
+zee pan 6  0.52712616 0.49322129
+eks zee 7  0.61178406 0.18788492
+zee wye 8  0.59855401 0.97618139
+hat wye 9  0.03144188 0.74955076
+pan wye 10 0.50262601 0.95261836
diff --git a/test/cases/io-format-conversion-keystroke-savers/0022/cmd b/test/cases/io-format-conversion-keystroke-savers/n2t/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0022/cmd
rename to test/cases/io-format-conversion-keystroke-savers/n2t/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0043/experr b/test/cases/io-format-conversion-keystroke-savers/n2t/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0043/experr
rename to test/cases/io-format-conversion-keystroke-savers/n2t/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0022/expout b/test/cases/io-format-conversion-keystroke-savers/n2t/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0022/expout
rename to test/cases/io-format-conversion-keystroke-savers/n2t/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0026/cmd b/test/cases/io-format-conversion-keystroke-savers/n2x/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0026/cmd
rename to test/cases/io-format-conversion-keystroke-savers/n2x/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0044/experr b/test/cases/io-format-conversion-keystroke-savers/n2x/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0044/experr
rename to test/cases/io-format-conversion-keystroke-savers/n2x/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0026/expout b/test/cases/io-format-conversion-keystroke-savers/n2x/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0026/expout
rename to test/cases/io-format-conversion-keystroke-savers/n2x/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0035/cmd b/test/cases/io-format-conversion-keystroke-savers/p2c/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0035/cmd
rename to test/cases/io-format-conversion-keystroke-savers/p2c/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0045/experr b/test/cases/io-format-conversion-keystroke-savers/p2c/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0045/experr
rename to test/cases/io-format-conversion-keystroke-savers/p2c/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0042/expout b/test/cases/io-format-conversion-keystroke-savers/p2c/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0042/expout
rename to test/cases/io-format-conversion-keystroke-savers/p2c/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0037/cmd b/test/cases/io-format-conversion-keystroke-savers/p2d/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0037/cmd
rename to test/cases/io-format-conversion-keystroke-savers/p2d/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0046/experr b/test/cases/io-format-conversion-keystroke-savers/p2d/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0046/experr
rename to test/cases/io-format-conversion-keystroke-savers/p2d/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0037/expout b/test/cases/io-format-conversion-keystroke-savers/p2d/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0037/expout
rename to test/cases/io-format-conversion-keystroke-savers/p2d/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0039/cmd b/test/cases/io-format-conversion-keystroke-savers/p2j/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0039/cmd
rename to test/cases/io-format-conversion-keystroke-savers/p2j/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0047/experr b/test/cases/io-format-conversion-keystroke-savers/p2j/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0047/experr
rename to test/cases/io-format-conversion-keystroke-savers/p2j/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0046/expout b/test/cases/io-format-conversion-keystroke-savers/p2j/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0046/expout
rename to test/cases/io-format-conversion-keystroke-savers/p2j/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0041/cmd b/test/cases/io-format-conversion-keystroke-savers/p2m/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0041/cmd
rename to test/cases/io-format-conversion-keystroke-savers/p2m/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0048/experr b/test/cases/io-format-conversion-keystroke-savers/p2m/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0048/experr
rename to test/cases/io-format-conversion-keystroke-savers/p2m/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0041/expout b/test/cases/io-format-conversion-keystroke-savers/p2m/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0041/expout
rename to test/cases/io-format-conversion-keystroke-savers/p2m/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0047/cmd b/test/cases/io-format-conversion-keystroke-savers/p2m/x2p/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0047/cmd
rename to test/cases/io-format-conversion-keystroke-savers/p2m/x2p/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0049/experr b/test/cases/io-format-conversion-keystroke-savers/p2m/x2p/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0049/experr
rename to test/cases/io-format-conversion-keystroke-savers/p2m/x2p/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/p2m/x2p/expout b/test/cases/io-format-conversion-keystroke-savers/p2m/x2p/expout
new file mode 100644
index 000000000..b8ac13481
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/p2m/x2p/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+pan pan 1  0.34679014 0.72680286
+eks pan 2  0.75867996 0.52215111
+wye wye 3  0.20460331 0.33831853
+eks wye 4  0.38139939 0.13418874
+wye pan 5  0.57328892 0.86362447
+zee pan 6  0.52712616 0.49322129
+eks zee 7  0.61178406 0.18788492
+zee wye 8  0.59855401 0.97618139
+hat wye 9  0.03144188 0.74955076
+pan wye 10 0.50262601 0.95261836
diff --git a/test/cases/io-format-conversion-keystroke-savers/0038/cmd b/test/cases/io-format-conversion-keystroke-savers/p2n/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0038/cmd
rename to test/cases/io-format-conversion-keystroke-savers/p2n/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0050/experr b/test/cases/io-format-conversion-keystroke-savers/p2n/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0050/experr
rename to test/cases/io-format-conversion-keystroke-savers/p2n/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0038/expout b/test/cases/io-format-conversion-keystroke-savers/p2n/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0038/expout
rename to test/cases/io-format-conversion-keystroke-savers/p2n/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0036/cmd b/test/cases/io-format-conversion-keystroke-savers/p2t/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0036/cmd
rename to test/cases/io-format-conversion-keystroke-savers/p2t/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0051/experr b/test/cases/io-format-conversion-keystroke-savers/p2t/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0051/experr
rename to test/cases/io-format-conversion-keystroke-savers/p2t/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0043/expout b/test/cases/io-format-conversion-keystroke-savers/p2t/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0043/expout
rename to test/cases/io-format-conversion-keystroke-savers/p2t/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0040/cmd b/test/cases/io-format-conversion-keystroke-savers/p2x/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0040/cmd
rename to test/cases/io-format-conversion-keystroke-savers/p2x/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0052/experr b/test/cases/io-format-conversion-keystroke-savers/p2x/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0052/experr
rename to test/cases/io-format-conversion-keystroke-savers/p2x/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0040/expout b/test/cases/io-format-conversion-keystroke-savers/p2x/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0040/expout
rename to test/cases/io-format-conversion-keystroke-savers/p2x/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0050/cmd b/test/cases/io-format-conversion-keystroke-savers/t/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0050/cmd
rename to test/cases/io-format-conversion-keystroke-savers/t/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0053/experr b/test/cases/io-format-conversion-keystroke-savers/t/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0053/experr
rename to test/cases/io-format-conversion-keystroke-savers/t/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0050/expout b/test/cases/io-format-conversion-keystroke-savers/t/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0050/expout
rename to test/cases/io-format-conversion-keystroke-savers/t/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0014/cmd b/test/cases/io-format-conversion-keystroke-savers/t2c/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0014/cmd
rename to test/cases/io-format-conversion-keystroke-savers/t2c/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0054/experr b/test/cases/io-format-conversion-keystroke-savers/t2c/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0054/experr
rename to test/cases/io-format-conversion-keystroke-savers/t2c/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0049/expout b/test/cases/io-format-conversion-keystroke-savers/t2c/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0049/expout
rename to test/cases/io-format-conversion-keystroke-savers/t2c/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0015/cmd b/test/cases/io-format-conversion-keystroke-savers/t2d/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0015/cmd
rename to test/cases/io-format-conversion-keystroke-savers/t2d/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/0055/experr b/test/cases/io-format-conversion-keystroke-savers/t2d/experr
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0055/experr
rename to test/cases/io-format-conversion-keystroke-savers/t2d/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0044/expout b/test/cases/io-format-conversion-keystroke-savers/t2d/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0044/expout
rename to test/cases/io-format-conversion-keystroke-savers/t2d/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0017/cmd b/test/cases/io-format-conversion-keystroke-savers/t2j/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0017/cmd
rename to test/cases/io-format-conversion-keystroke-savers/t2j/cmd
diff --git a/test/cases/io-markdown-output/0001/experr b/test/cases/io-format-conversion-keystroke-savers/t2j/experr
similarity index 100%
rename from test/cases/io-markdown-output/0001/experr
rename to test/cases/io-format-conversion-keystroke-savers/t2j/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0051/expout b/test/cases/io-format-conversion-keystroke-savers/t2j/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0051/expout
rename to test/cases/io-format-conversion-keystroke-savers/t2j/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0020/cmd b/test/cases/io-format-conversion-keystroke-savers/t2m/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0020/cmd
rename to test/cases/io-format-conversion-keystroke-savers/t2m/cmd
diff --git a/test/cases/io-markdown-output/0002/experr b/test/cases/io-format-conversion-keystroke-savers/t2m/experr
similarity index 100%
rename from test/cases/io-markdown-output/0002/experr
rename to test/cases/io-format-conversion-keystroke-savers/t2m/experr
diff --git a/test/cases/io-format-conversion-keystroke-savers/0048/expout b/test/cases/io-format-conversion-keystroke-savers/t2m/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0048/expout
rename to test/cases/io-format-conversion-keystroke-savers/t2m/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0018/cmd b/test/cases/io-format-conversion-keystroke-savers/t2m/t2p/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0018/cmd
rename to test/cases/io-format-conversion-keystroke-savers/t2m/t2p/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/t2m/t2p/experr b/test/cases/io-format-conversion-keystroke-savers/t2m/t2p/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-format-conversion-keystroke-savers/t2m/t2p/expout b/test/cases/io-format-conversion-keystroke-savers/t2m/t2p/expout
new file mode 100644
index 000000000..b8ac13481
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/t2m/t2p/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+pan pan 1  0.34679014 0.72680286
+eks pan 2  0.75867996 0.52215111
+wye wye 3  0.20460331 0.33831853
+eks wye 4  0.38139939 0.13418874
+wye pan 5  0.57328892 0.86362447
+zee pan 6  0.52712616 0.49322129
+eks zee 7  0.61178406 0.18788492
+zee wye 8  0.59855401 0.97618139
+hat wye 9  0.03144188 0.74955076
+pan wye 10 0.50262601 0.95261836
diff --git a/test/cases/io-format-conversion-keystroke-savers/0016/cmd b/test/cases/io-format-conversion-keystroke-savers/t2n/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0016/cmd
rename to test/cases/io-format-conversion-keystroke-savers/t2n/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/t2n/experr b/test/cases/io-format-conversion-keystroke-savers/t2n/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-format-conversion-keystroke-savers/0045/expout b/test/cases/io-format-conversion-keystroke-savers/t2n/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0045/expout
rename to test/cases/io-format-conversion-keystroke-savers/t2n/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/t2p/cmd b/test/cases/io-format-conversion-keystroke-savers/t2p/cmd
new file mode 100644
index 000000000..7963d101b
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/t2p/cmd
@@ -0,0 +1 @@
+mlr --t2p cat test/input/abixy.tsv
diff --git a/test/cases/io-format-conversion-keystroke-savers/t2p/experr b/test/cases/io-format-conversion-keystroke-savers/t2p/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-format-conversion-keystroke-savers/t2p/expout b/test/cases/io-format-conversion-keystroke-savers/t2p/expout
new file mode 100644
index 000000000..b8ac13481
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/t2p/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+pan pan 1  0.34679014 0.72680286
+eks pan 2  0.75867996 0.52215111
+wye wye 3  0.20460331 0.33831853
+eks wye 4  0.38139939 0.13418874
+wye pan 5  0.57328892 0.86362447
+zee pan 6  0.52712616 0.49322129
+eks zee 7  0.61178406 0.18788492
+zee wye 8  0.59855401 0.97618139
+hat wye 9  0.03144188 0.74955076
+pan wye 10 0.50262601 0.95261836
diff --git a/test/cases/io-format-conversion-keystroke-savers/0019/cmd b/test/cases/io-format-conversion-keystroke-savers/t2x/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0019/cmd
rename to test/cases/io-format-conversion-keystroke-savers/t2x/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/t2x/experr b/test/cases/io-format-conversion-keystroke-savers/t2x/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-format-conversion-keystroke-savers/t2x/expout b/test/cases/io-format-conversion-keystroke-savers/t2x/expout
new file mode 100644
index 000000000..9c955fb87
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/t2x/expout
@@ -0,0 +1,59 @@
+a pan
+b pan
+i 1
+x 0.34679014
+y 0.72680286
+
+a eks
+b pan
+i 2
+x 0.75867996
+y 0.52215111
+
+a wye
+b wye
+i 3
+x 0.20460331
+y 0.33831853
+
+a eks
+b wye
+i 4
+x 0.38139939
+y 0.13418874
+
+a wye
+b pan
+i 5
+x 0.57328892
+y 0.86362447
+
+a zee
+b pan
+i 6
+x 0.52712616
+y 0.49322129
+
+a eks
+b zee
+i 7
+x 0.61178406
+y 0.18788492
+
+a zee
+b wye
+i 8
+x 0.59855401
+y 0.97618139
+
+a hat
+b wye
+i 9
+x 0.03144188
+y 0.74955076
+
+a pan
+b wye
+i 10
+x 0.50262601
+y 0.95261836
diff --git a/test/cases/io-format-conversion-keystroke-savers/0055/cmd b/test/cases/io-format-conversion-keystroke-savers/tsv/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0055/cmd
rename to test/cases/io-format-conversion-keystroke-savers/tsv/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/tsv/experr b/test/cases/io-format-conversion-keystroke-savers/tsv/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-format-conversion-keystroke-savers/0055/expout b/test/cases/io-format-conversion-keystroke-savers/tsv/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0055/expout
rename to test/cases/io-format-conversion-keystroke-savers/tsv/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0054/input.tsv b/test/cases/io-format-conversion-keystroke-savers/tsv/input.tsv
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0054/input.tsv
rename to test/cases/io-format-conversion-keystroke-savers/tsv/input.tsv
diff --git a/test/cases/io-format-conversion-keystroke-savers/0054/cmd b/test/cases/io-format-conversion-keystroke-savers/tsvlite/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0054/cmd
rename to test/cases/io-format-conversion-keystroke-savers/tsvlite/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/tsvlite/experr b/test/cases/io-format-conversion-keystroke-savers/tsvlite/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-format-conversion-keystroke-savers/0054/expout b/test/cases/io-format-conversion-keystroke-savers/tsvlite/expout
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0054/expout
rename to test/cases/io-format-conversion-keystroke-savers/tsvlite/expout
diff --git a/test/cases/io-format-conversion-keystroke-savers/0055/input.tsv b/test/cases/io-format-conversion-keystroke-savers/tsvlite/input.tsv
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0055/input.tsv
rename to test/cases/io-format-conversion-keystroke-savers/tsvlite/input.tsv
diff --git a/test/cases/io-format-conversion-keystroke-savers/0042/cmd b/test/cases/io-format-conversion-keystroke-savers/x2c/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0042/cmd
rename to test/cases/io-format-conversion-keystroke-savers/x2c/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2c/experr b/test/cases/io-format-conversion-keystroke-savers/x2c/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2c/expout b/test/cases/io-format-conversion-keystroke-savers/x2c/expout
new file mode 100644
index 000000000..37eac50e4
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/x2c/expout
@@ -0,0 +1,11 @@
+a,b,i,x,y
+pan,pan,1,0.34679014,0.72680286
+eks,pan,2,0.75867996,0.52215111
+wye,wye,3,0.20460331,0.33831853
+eks,wye,4,0.38139939,0.13418874
+wye,pan,5,0.57328892,0.86362447
+zee,pan,6,0.52712616,0.49322129
+eks,zee,7,0.61178406,0.18788492
+zee,wye,8,0.59855401,0.97618139
+hat,wye,9,0.03144188,0.74955076
+pan,wye,10,0.50262601,0.95261836
diff --git a/test/cases/io-format-conversion-keystroke-savers/0044/cmd b/test/cases/io-format-conversion-keystroke-savers/x2d/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0044/cmd
rename to test/cases/io-format-conversion-keystroke-savers/x2d/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2d/experr b/test/cases/io-format-conversion-keystroke-savers/x2d/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2d/expout b/test/cases/io-format-conversion-keystroke-savers/x2d/expout
new file mode 100644
index 000000000..940df3d11
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/x2d/expout
@@ -0,0 +1,10 @@
+a=pan,b=pan,i=1,x=0.34679014,y=0.72680286
+a=eks,b=pan,i=2,x=0.75867996,y=0.52215111
+a=wye,b=wye,i=3,x=0.20460331,y=0.33831853
+a=eks,b=wye,i=4,x=0.38139939,y=0.13418874
+a=wye,b=pan,i=5,x=0.57328892,y=0.86362447
+a=zee,b=pan,i=6,x=0.52712616,y=0.49322129
+a=eks,b=zee,i=7,x=0.61178406,y=0.18788492
+a=zee,b=wye,i=8,x=0.59855401,y=0.97618139
+a=hat,b=wye,i=9,x=0.03144188,y=0.74955076
+a=pan,b=wye,i=10,x=0.50262601,y=0.95261836
diff --git a/test/cases/io-format-conversion-keystroke-savers/0046/cmd b/test/cases/io-format-conversion-keystroke-savers/x2j/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0046/cmd
rename to test/cases/io-format-conversion-keystroke-savers/x2j/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2j/experr b/test/cases/io-format-conversion-keystroke-savers/x2j/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2j/expout b/test/cases/io-format-conversion-keystroke-savers/x2j/expout
new file mode 100644
index 000000000..48da25210
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/x2j/expout
@@ -0,0 +1,72 @@
+[
+{
+  "a": "pan",
+  "b": "pan",
+  "i": 1,
+  "x": 0.34679014,
+  "y": 0.72680286
+},
+{
+  "a": "eks",
+  "b": "pan",
+  "i": 2,
+  "x": 0.75867996,
+  "y": 0.52215111
+},
+{
+  "a": "wye",
+  "b": "wye",
+  "i": 3,
+  "x": 0.20460331,
+  "y": 0.33831853
+},
+{
+  "a": "eks",
+  "b": "wye",
+  "i": 4,
+  "x": 0.38139939,
+  "y": 0.13418874
+},
+{
+  "a": "wye",
+  "b": "pan",
+  "i": 5,
+  "x": 0.57328892,
+  "y": 0.86362447
+},
+{
+  "a": "zee",
+  "b": "pan",
+  "i": 6,
+  "x": 0.52712616,
+  "y": 0.49322129
+},
+{
+  "a": "eks",
+  "b": "zee",
+  "i": 7,
+  "x": 0.61178406,
+  "y": 0.18788492
+},
+{
+  "a": "zee",
+  "b": "wye",
+  "i": 8,
+  "x": 0.59855401,
+  "y": 0.97618139
+},
+{
+  "a": "hat",
+  "b": "wye",
+  "i": 9,
+  "x": 0.03144188,
+  "y": 0.74955076
+},
+{
+  "a": "pan",
+  "b": "wye",
+  "i": 10,
+  "x": 0.50262601,
+  "y": 0.95261836
+}
+]
diff --git a/test/cases/io-format-conversion-keystroke-savers/0048/cmd b/test/cases/io-format-conversion-keystroke-savers/x2m/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0048/cmd
rename to test/cases/io-format-conversion-keystroke-savers/x2m/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2m/experr b/test/cases/io-format-conversion-keystroke-savers/x2m/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2m/expout b/test/cases/io-format-conversion-keystroke-savers/x2m/expout
new file mode 100644
index 000000000..3d77a0324
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/x2m/expout
@@ -0,0 +1,12 @@
+| a | b | i | x | y |
+| --- | --- | --- | --- | --- |
+| pan | pan | 1 | 0.34679014 | 0.72680286 |
+| eks | pan | 2 | 0.75867996 | 0.52215111 |
+| wye | wye | 3 | 0.20460331 | 0.33831853 |
+| eks | wye | 4 | 0.38139939 | 0.13418874 |
+| wye | pan | 5 | 0.57328892 | 0.86362447 |
+| zee | pan | 6 | 0.52712616 | 0.49322129 |
+| eks | zee | 7 | 0.61178406 | 0.18788492 |
+| zee | wye | 8 | 0.59855401 | 0.97618139 |
+| hat | wye | 9 | 0.03144188 | 0.74955076 |
+| pan | wye | 10 | 0.50262601 | 0.95261836 |
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2m/x2p/cmd b/test/cases/io-format-conversion-keystroke-savers/x2m/x2p/cmd
new file mode 100644
index 000000000..2dad8a232
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/x2m/x2p/cmd
@@ -0,0 +1 @@
+mlr --x2p cat test/input/abixy.xtab
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2m/x2p/experr b/test/cases/io-format-conversion-keystroke-savers/x2m/x2p/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2m/x2p/expout b/test/cases/io-format-conversion-keystroke-savers/x2m/x2p/expout
new file mode 100644
index 000000000..b8ac13481
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/x2m/x2p/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+pan pan 1  0.34679014 0.72680286
+eks pan 2  0.75867996 0.52215111
+wye wye 3  0.20460331 0.33831853
+eks wye 4  0.38139939 0.13418874
+wye pan 5  0.57328892 0.86362447
+zee pan 6  0.52712616 0.49322129
+eks zee 7  0.61178406 0.18788492
+zee wye 8  0.59855401 0.97618139
+hat wye 9  0.03144188 0.74955076
+pan wye 10 0.50262601 0.95261836
diff --git a/test/cases/io-format-conversion-keystroke-savers/0045/cmd b/test/cases/io-format-conversion-keystroke-savers/x2n/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0045/cmd
rename to test/cases/io-format-conversion-keystroke-savers/x2n/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2n/experr b/test/cases/io-format-conversion-keystroke-savers/x2n/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2n/expout b/test/cases/io-format-conversion-keystroke-savers/x2n/expout
new file mode 100644
index 000000000..17f7e1ee6
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/x2n/expout
@@ -0,0 +1,10 @@
+pan pan 1 0.34679014 0.72680286
+eks pan 2 0.75867996 0.52215111
+wye wye 3 0.20460331 0.33831853
+eks wye 4 0.38139939 0.13418874
+wye pan 5 0.57328892 0.86362447
+zee pan 6 0.52712616 0.49322129
+eks zee 7 0.61178406 0.18788492
+zee wye 8 0.59855401 0.97618139
+hat wye 9 0.03144188 0.74955076
+pan wye 10 0.50262601 0.95261836
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2p/cmd b/test/cases/io-format-conversion-keystroke-savers/x2p/cmd
new file mode 100644
index 000000000..2dad8a232
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/x2p/cmd
@@ -0,0 +1 @@
+mlr --x2p cat test/input/abixy.xtab
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2p/experr b/test/cases/io-format-conversion-keystroke-savers/x2p/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2p/expout b/test/cases/io-format-conversion-keystroke-savers/x2p/expout
new file mode 100644
index 000000000..b8ac13481
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/x2p/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+pan pan 1  0.34679014 0.72680286
+eks pan 2  0.75867996 0.52215111
+wye wye 3  0.20460331 0.33831853
+eks wye 4  0.38139939 0.13418874
+wye pan 5  0.57328892 0.86362447
+zee pan 6  0.52712616 0.49322129
+eks zee 7  0.61178406 0.18788492
+zee wye 8  0.59855401 0.97618139
+hat wye 9  0.03144188 0.74955076
+pan wye 10 0.50262601 0.95261836
diff --git a/test/cases/io-format-conversion-keystroke-savers/0043/cmd b/test/cases/io-format-conversion-keystroke-savers/x2t/cmd
similarity index 100%
rename from test/cases/io-format-conversion-keystroke-savers/0043/cmd
rename to test/cases/io-format-conversion-keystroke-savers/x2t/cmd
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2t/experr b/test/cases/io-format-conversion-keystroke-savers/x2t/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-format-conversion-keystroke-savers/x2t/expout b/test/cases/io-format-conversion-keystroke-savers/x2t/expout
new file mode 100644
index 000000000..03ac8f384
--- /dev/null
+++ b/test/cases/io-format-conversion-keystroke-savers/x2t/expout
@@ -0,0 +1,11 @@
+a	b	i	x	y
+pan	pan	1	0.34679014	0.72680286
+eks	pan	2	0.75867996	0.52215111
+wye	wye	3	0.20460331	0.33831853
+eks	wye	4	0.38139939	0.13418874
+wye	pan	5	0.57328892	0.86362447
+zee	pan	6	0.52712616	0.49322129
+eks	zee	7	0.61178406	0.18788492
+zee	wye	8	0.59855401	0.97618139
+hat	wye	9	0.03144188	0.74955076
+pan	wye	10	0.50262601	0.95261836
diff --git a/test/cases/io-markdown-output/0001/cmd b/test/cases/io-markdown/0001/cmd
similarity index 100%
rename from test/cases/io-markdown-output/0001/cmd
rename to test/cases/io-markdown/0001/cmd
diff --git a/test/cases/io-markdown/0001/experr b/test/cases/io-markdown/0001/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-markdown-output/0001/expout b/test/cases/io-markdown/0001/expout
similarity index 100%
rename from test/cases/io-markdown-output/0001/expout
rename to test/cases/io-markdown/0001/expout
diff --git a/test/cases/io-markdown-output/0002/cmd b/test/cases/io-markdown/0002/cmd
similarity index 100%
rename from test/cases/io-markdown-output/0002/cmd
rename to test/cases/io-markdown/0002/cmd
diff --git a/test/cases/io-markdown/0002/experr b/test/cases/io-markdown/0002/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-markdown-output/0002/expout b/test/cases/io-markdown/0002/expout
similarity index 100%
rename from test/cases/io-markdown-output/0002/expout
rename to test/cases/io-markdown/0002/expout
diff --git a/test/cases/io-markdown-output/0002/input b/test/cases/io-markdown/0002/input
similarity index 100%
rename from test/cases/io-markdown-output/0002/input
rename to test/cases/io-markdown/0002/input
diff --git a/test/cases/io-markdown/markdown-input-headerless/cmd b/test/cases/io-markdown/markdown-input-headerless/cmd
new file mode 100644
index 000000000..f38836b97
--- /dev/null
+++ b/test/cases/io-markdown/markdown-input-headerless/cmd
@@ -0,0 +1 @@
+mlr --hi -i markdown -o json cat test/input/abixy.md
diff --git a/test/cases/io-markdown/markdown-input-headerless/experr b/test/cases/io-markdown/markdown-input-headerless/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-markdown/markdown-input-headerless/expout b/test/cases/io-markdown/markdown-input-headerless/expout
new file mode 100644
index 000000000..e08b56a90
--- /dev/null
+++ b/test/cases/io-markdown/markdown-input-headerless/expout
@@ -0,0 +1,79 @@
+[
+{
+  "1": "a",
+  "2": "b",
+  "3": "i",
+  "4": "x",
+  "5": "y"
+},
+{
+  "1": "pan",
+  "2": "pan",
+  "3": 1,
+  "4": 0.34679014,
+  "5": 0.72680286
+},
+{
+  "1": "eks",
+  "2": "pan",
+  "3": 2,
+  "4": 0.75867996,
+  "5": 0.52215111
+},
+{
+  "1": "wye",
+  "2": "wye",
+  "3": 3,
+  "4": 0.20460331,
+  "5": 0.33831853
+},
+{
+  "1": "eks",
+  "2": "wye",
+  "3": 4,
+  "4": 0.38139939,
+  "5": 0.13418874
+},
+{
+  "1": "wye",
+  "2": "pan",
+  "3": 5,
+  "4": 0.57328892,
+  "5": 0.86362447
+},
+{
+  "1": "zee",
+  "2": "pan",
+  "3": 6,
+  "4": 0.52712616,
+  "5": 0.49322129
+},
+{
+  "1": "eks",
+  "2": "zee",
+  "3": 7,
+  "4": 0.61178406,
+  "5": 0.18788492
+},
+{
+  "1": "zee",
+  "2": "wye",
+  "3": 8,
+  "4": 0.59855401,
+  "5": 0.97618139
+},
+{
+  "1": "hat",
+  "2": "wye",
+  "3": 9,
+  "4": 0.03144188,
+  "5": 0.74955076
+},
+{
+  "1": "pan",
+  "2": "wye",
+  "3": 10,
+  "4": 0.50262601,
+  "5": 0.95261836
+}
+]
diff --git a/test/cases/io-markdown/markdown-input/cmd b/test/cases/io-markdown/markdown-input/cmd
new file mode 100644
index 000000000..5dd4673c5
--- /dev/null
+++ b/test/cases/io-markdown/markdown-input/cmd
@@ -0,0 +1 @@
+mlr -i markdown -o json cat test/input/abixy.md
diff --git a/test/cases/io-markdown/markdown-input/experr b/test/cases/io-markdown/markdown-input/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-markdown/markdown-input/expout b/test/cases/io-markdown/markdown-input/expout
new file mode 100644
index 000000000..48da25210
--- /dev/null
+++ b/test/cases/io-markdown/markdown-input/expout
@@ -0,0 +1,72 @@
+[
+{
+  "a": "pan",
+  "b": "pan",
+  "i": 1,
+  "x": 0.34679014,
+  "y": 0.72680286
+},
+{
+  "a": "eks",
+  "b": "pan",
+  "i": 2,
+  "x": 0.75867996,
+  "y": 0.52215111
+},
+{
+  "a": "wye",
+  "b": "wye",
+  "i": 3,
+  "x": 0.20460331,
+  "y": 0.33831853
+},
+{
+  "a": "eks",
+  "b": "wye",
+  "i": 4,
+  "x": 0.38139939,
+  "y": 0.13418874
+},
+{
+  "a": "wye",
+  "b": "pan",
+  "i": 5,
+  "x": 0.57328892,
+  "y": 0.86362447
+},
+{
+  "a": "zee",
+  "b": "pan",
+  "i": 6,
+  "x": 0.52712616,
+  "y": 0.49322129
+},
+{
+  "a": "eks",
+  "b": "zee",
+  "i": 7,
+  "x": 0.61178406,
+  "y": 0.18788492
+},
+{
+  "a": "zee",
+  "b": "wye",
+  "i": 8,
+  "x": 0.59855401,
+  "y": 0.97618139
+},
+{
+  "a": "hat",
+  "b": "wye",
+  "i": 9,
+  "x": 0.03144188,
+  "y": 0.74955076
+},
+{
+  "a": "pan",
+  "b": "wye",
+  "i": 10,
+  "x": 0.50262601,
+  "y": 0.95261836
+}
+]

From ac65675ab1e346d6730d9cd7b092820b63f9471a Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 20 Jan 2024 18:43:49 -0500
Subject: [PATCH 117/456] Auto-unsparsify CSV and TSV on output (#1479)

* Auto-unsparsify CSV

* Update unit-test cases

* More unit-test cases

* Key-change handling for CSV output

* Same for TSV, with unit-test and doc updates
---
 docs/src/data/key-change.json                 |  5 +
 docs/src/data/under-over.json                 |  6 ++
 docs/src/file-formats.md                      | 68 +++++++++++++
 docs/src/file-formats.md.in                   | 25 +++++
 docs/src/questions-about-joins.md             |  4 +-
 docs/src/record-heterogeneity.md              | 51 +++++++---
 docs/src/record-heterogeneity.md.in           | 27 +++--
 pkg/output/channel_writer.go                  | 15 ++-
 pkg/output/record_writer.go                   |  2 +-
 pkg/output/record_writer_csv.go               | 88 ++++++++---------
 pkg/output/record_writer_csvlite.go           |  8 +-
 pkg/output/record_writer_dkvp.go              |  8 +-
 pkg/output/record_writer_json.go              |  3 +-
 pkg/output/record_writer_markdown.go          |  6 +-
 pkg/output/record_writer_nidx.go              |  6 +-
 pkg/output/record_writer_pprint.go            |  4 +-
 pkg/output/record_writer_tsv.go               | 98 ++++++++++---------
 pkg/output/record_writer_xtab.go              |  6 +-
 test/cases/io-csv-auto-unsparsify/at/cmd      |  1 +
 test/cases/io-csv-auto-unsparsify/at/experr   |  0
 test/cases/io-csv-auto-unsparsify/at/expout   |  4 +
 .../io-csv-auto-unsparsify/at/input.json      | 17 ++++
 .../io-csv-auto-unsparsify/key-change/cmd     |  1 +
 .../io-csv-auto-unsparsify/key-change/experr  |  2 +
 .../io-csv-auto-unsparsify/key-change/expout  |  3 +
 .../key-change/input.json                     | 17 ++++
 .../key-change/should-fail                    |  0
 test/cases/io-csv-auto-unsparsify/over/cmd    |  1 +
 test/cases/io-csv-auto-unsparsify/over/experr |  0
 test/cases/io-csv-auto-unsparsify/over/expout |  4 +
 .../io-csv-auto-unsparsify/over/input.json    | 18 ++++
 test/cases/io-csv-auto-unsparsify/under/cmd   |  1 +
 .../cases/io-csv-auto-unsparsify/under/experr |  0
 .../cases/io-csv-auto-unsparsify/under/expout |  4 +
 .../io-csv-auto-unsparsify/under/input.json   | 16 +++
 test/cases/io-multi/0010/experr               |  2 +
 test/cases/io-multi/0010/expout               | 33 -------
 test/cases/io-multi/0010/should-fail          |  0
 test/cases/io-multi/0033/experr               |  2 +
 test/cases/io-multi/0033/expout               | 33 -------
 test/cases/io-multi/0033/should-fail          |  0
 test/cases/io-multi/0034/experr               |  2 +
 test/cases/io-multi/0034/expout               | 22 -----
 test/cases/io-multi/0034/should-fail          |  0
 test/cases/io-tsv-auto-unsparsify/at/cmd      |  1 +
 test/cases/io-tsv-auto-unsparsify/at/experr   |  0
 test/cases/io-tsv-auto-unsparsify/at/expout   |  4 +
 .../io-tsv-auto-unsparsify/at/input.json      | 17 ++++
 .../io-tsv-auto-unsparsify/key-change/cmd     |  1 +
 .../io-tsv-auto-unsparsify/key-change/experr  |  2 +
 .../io-tsv-auto-unsparsify/key-change/expout  |  3 +
 .../key-change/input.json                     | 17 ++++
 .../key-change/should-fail                    |  0
 test/cases/io-tsv-auto-unsparsify/over/cmd    |  1 +
 test/cases/io-tsv-auto-unsparsify/over/experr |  0
 test/cases/io-tsv-auto-unsparsify/over/expout |  4 +
 .../io-tsv-auto-unsparsify/over/input.json    | 18 ++++
 test/cases/io-tsv-auto-unsparsify/under/cmd   |  1 +
 .../cases/io-tsv-auto-unsparsify/under/experr |  0
 .../cases/io-tsv-auto-unsparsify/under/expout |  4 +
 .../io-tsv-auto-unsparsify/under/input.json   | 16 +++
 61 files changed, 481 insertions(+), 221 deletions(-)
 create mode 100644 docs/src/data/key-change.json
 create mode 100644 docs/src/data/under-over.json
 create mode 100644 test/cases/io-csv-auto-unsparsify/at/cmd
 create mode 100644 test/cases/io-csv-auto-unsparsify/at/experr
 create mode 100644 test/cases/io-csv-auto-unsparsify/at/expout
 create mode 100644 test/cases/io-csv-auto-unsparsify/at/input.json
 create mode 100644 test/cases/io-csv-auto-unsparsify/key-change/cmd
 create mode 100644 test/cases/io-csv-auto-unsparsify/key-change/experr
 create mode 100644 test/cases/io-csv-auto-unsparsify/key-change/expout
 create mode 100644 test/cases/io-csv-auto-unsparsify/key-change/input.json
 create mode 100644 test/cases/io-csv-auto-unsparsify/key-change/should-fail
 create mode 100644 test/cases/io-csv-auto-unsparsify/over/cmd
 create mode 100644 test/cases/io-csv-auto-unsparsify/over/experr
 create mode 100644 test/cases/io-csv-auto-unsparsify/over/expout
 create mode 100644 test/cases/io-csv-auto-unsparsify/over/input.json
 create mode 100644 test/cases/io-csv-auto-unsparsify/under/cmd
 create mode 100644 test/cases/io-csv-auto-unsparsify/under/experr
 create mode 100644 test/cases/io-csv-auto-unsparsify/under/expout
 create mode 100644 test/cases/io-csv-auto-unsparsify/under/input.json
 create mode 100644 test/cases/io-multi/0010/should-fail
 create mode 100644 test/cases/io-multi/0033/should-fail
 create mode 100644 test/cases/io-multi/0034/should-fail
 create mode 100644 test/cases/io-tsv-auto-unsparsify/at/cmd
 create mode 100644 test/cases/io-tsv-auto-unsparsify/at/experr
 create mode 100644 test/cases/io-tsv-auto-unsparsify/at/expout
 create mode 100644 test/cases/io-tsv-auto-unsparsify/at/input.json
 create mode 100644 test/cases/io-tsv-auto-unsparsify/key-change/cmd
 create mode 100644 test/cases/io-tsv-auto-unsparsify/key-change/experr
 create mode 100644 test/cases/io-tsv-auto-unsparsify/key-change/expout
 create mode 100644 test/cases/io-tsv-auto-unsparsify/key-change/input.json
 create mode 100644 test/cases/io-tsv-auto-unsparsify/key-change/should-fail
 create mode 100644 test/cases/io-tsv-auto-unsparsify/over/cmd
 create mode 100644 test/cases/io-tsv-auto-unsparsify/over/experr
 create mode 100644 test/cases/io-tsv-auto-unsparsify/over/expout
 create mode 100644 test/cases/io-tsv-auto-unsparsify/over/input.json
 create mode 100644 test/cases/io-tsv-auto-unsparsify/under/cmd
 create mode 100644 test/cases/io-tsv-auto-unsparsify/under/experr
 create mode 100644 test/cases/io-tsv-auto-unsparsify/under/expout
 create mode 100644 test/cases/io-tsv-auto-unsparsify/under/input.json

diff --git a/docs/src/data/key-change.json b/docs/src/data/key-change.json
new file mode 100644
index 000000000..c2719c54f
--- /dev/null
+++ b/docs/src/data/key-change.json
@@ -0,0 +1,5 @@
+[
+  { "a": 1, "b": 2, "c": 3 },
+  { "a": 4, "b": 5, "c": 6 },
+  { "a": 7, "X": 8, "c": 9 }
+]
diff --git a/docs/src/data/under-over.json b/docs/src/data/under-over.json
new file mode 100644
index 000000000..0de486a83
--- /dev/null
+++ b/docs/src/data/under-over.json
@@ -0,0 +1,6 @@
+[
+  { "a": 1, "b": 2, "c": 3 },
+  { "a": 4, "b": 5, "c": 6, "d": 7 },
+  { "a": 7, "b": 8 },
+  { "a": 9, "b": 10, "c": 11 }
+]
diff --git a/docs/src/file-formats.md b/docs/src/file-formats.md
index 3af248ce5..7064b9b49 100644
--- a/docs/src/file-formats.md
+++ b/docs/src/file-formats.md
@@ -130,6 +130,74 @@ In particular, no encode/decode of  `\r`, `\n`, `\t`, or `\\` is done.
 
 * CSV-lite allows changing FS and/or RS to any values, perhaps multi-character.
 
+* CSV-lite and TSV-lite handle schema changes ("schema" meaning "ordered list of field names in a given record") by adding a newline and re-emitting the header. CSV and TSV, by contrast, do the following:
+  * If there are too few keys, but these match the header, empty fields are emitted.
+  * If there are too many keys, but these match the header up to the number of header fields, the extra fields are emitted.
+  * If keys don't match the header, this is an error.
+
+
+cat data/under-over.json
+
+
+[
+  { "a": 1, "b": 2, "c": 3 },
+  { "a": 4, "b": 5, "c": 6, "d": 7 },
+  { "a": 7, "b": 8 },
+  { "a": 9, "b": 10, "c": 11 }
+]
+
+ +
+mlr --ijson --ocsvlite cat data/under-over.json
+
+
+a,b,c
+1,2,3
+
+a,b,c,d
+4,5,6,7
+
+a,b
+7,8
+
+a,b,c
+9,10,11
+
+ +
+mlr --ijson --ocsvlite cat data/key-change.json
+
+
+a,b,c
+1,2,3
+4,5,6
+
+a,X,c
+7,8,9
+
+ +
+mlr --ijson --ocsv cat data/under-over.json
+
+
+a,b,c
+1,2,3
+4,5,6,7
+7,8,
+9,10,11
+
+ +
+mlr --ijson --ocsv cat data/key-change.json
+
+
+a,b,c
+1,2,3
+4,5,6
+mlr: CSV schema change: first keys "a,b,c"; current keys "a,X,c"
+mlr: exiting due to data error.
+
+ * In short, use-cases for CSV-lite and TSV-lite are often found when dealing with CSV/TSV files which are formatted in some non-standard way -- you have a little more flexibility available to you. (As an example of this flexibility: ASV and USV are nothing more than CSV-lite with different values for FS and RS.) CSV, TSV, CSV-lite, and TSV-lite have in common the `--implicit-csv-header` flag for input and the `--headerless-csv-output` flag for output. diff --git a/docs/src/file-formats.md.in b/docs/src/file-formats.md.in index 7e3d50308..36365a1fb 100644 --- a/docs/src/file-formats.md.in +++ b/docs/src/file-formats.md.in @@ -42,6 +42,31 @@ In particular, no encode/decode of `\r`, `\n`, `\t`, or `\\` is done. * CSV-lite allows changing FS and/or RS to any values, perhaps multi-character. +* CSV-lite and TSV-lite handle schema changes ("schema" meaning "ordered list of field names in a given record") by adding a newline and re-emitting the header. CSV and TSV, by contrast, do the following: + * If there are too few keys, but these match the header, empty fields are emitted. + * If there are too many keys, but these match the header up to the number of header fields, the extra fields are emitted. + * If keys don't match the header, this is an error. + +GENMD-RUN-COMMAND +cat data/under-over.json +GENMD-EOF + +GENMD-RUN-COMMAND +mlr --ijson --ocsvlite cat data/under-over.json +GENMD-EOF + +GENMD-RUN-COMMAND-TOLERATING-ERROR +mlr --ijson --ocsvlite cat data/key-change.json +GENMD-EOF + +GENMD-RUN-COMMAND +mlr --ijson --ocsv cat data/under-over.json +GENMD-EOF + +GENMD-RUN-COMMAND-TOLERATING-ERROR +mlr --ijson --ocsv cat data/key-change.json +GENMD-EOF + * In short, use-cases for CSV-lite and TSV-lite are often found when dealing with CSV/TSV files which are formatted in some non-standard way -- you have a little more flexibility available to you. (As an example of this flexibility: ASV and USV are nothing more than CSV-lite with different values for FS and RS.) CSV, TSV, CSV-lite, and TSV-lite have in common the `--implicit-csv-header` flag for input and the `--headerless-csv-output` flag for output. diff --git a/docs/src/questions-about-joins.md b/docs/src/questions-about-joins.md index b8bde2d46..e3974877e 100644 --- a/docs/src/questions-about-joins.md +++ b/docs/src/questions-about-joins.md @@ -118,9 +118,7 @@ However, if we ask for left-unpaireds, since there's no `color` column, we get a id,code,color 4,ff0000,red 2,00ff00,green - -id,code -3,0000ff +3,0000ff,
To fix this, we can use **unsparsify**: diff --git a/docs/src/record-heterogeneity.md b/docs/src/record-heterogeneity.md index d02a52448..de96ae69c 100644 --- a/docs/src/record-heterogeneity.md +++ b/docs/src/record-heterogeneity.md @@ -375,13 +375,12 @@ record_count=150,resource=/path/to/second/file CSV and pretty-print formats expect rectangular structure. But Miller lets you process non-rectangular using CSV and pretty-print. -Miller simply prints a newline and a new header when there is a schema change --- where by _schema_ we mean simply the list of record keys in the order they -are encountered. When there is no schema change, you get CSV per se as a -special case. Likewise, Miller reads heterogeneous CSV or pretty-print input -the same way. The difference between CSV and CSV-lite is that the former is -[RFC-4180-compliant](file-formats.md#csvtsvasvusvetc), while the latter readily -handles heterogeneous data (which is non-compliant). For example: +For CSV-lite and TSV-lite, Miller simply prints a newline and a new header when there is a schema +change -- where by _schema_ we mean simply the list of record keys in the order they are +encountered. When there is no schema change, you get CSV per se as a special case. Likewise, Miller +reads heterogeneous CSV or pretty-print input the same way. The difference between CSV and CSV-lite +is that the former is [RFC-4180-compliant](file-formats.md#csvtsvasvusvetc), while the latter +readily handles heterogeneous data (which is non-compliant). For example:
 cat data/het.json
@@ -446,19 +445,43 @@ record_count resource
 150          /path/to/second/file
 
-Miller handles explicit header changes as just shown. If your CSV input contains ragged data -- if there are implicit header changes (no intervening blank line and new header line) as seen above -- you can use `--allow-ragged-csv-input` (or keystroke-saver `--ragged`). +
+mlr --ijson --ocsvlite group-like data/het.json
+
+
+resource,loadsec,ok
+/path/to/file,0.45,true
+/path/to/second/file,0.32,true
+/some/other/path,0.97,false
+
+record_count,resource
+100,/path/to/file
+150,/path/to/second/file
+
-mlr --csv --ragged cat data/het/ragged.csv
+mlr --ijson --ocsv group-like data/het.json
+
+
+resource,loadsec,ok
+/path/to/file,0.45,true
+/path/to/second/file,0.32,true
+/some/other/path,0.97,false
+mlr: CSV schema change: first keys "resource,loadsec,ok"; current keys "record_count,resource"
+mlr: exiting due to data error.
+
+ +Miller handles explicit header changes as just shown. If your CSV input contains ragged data -- if +there are implicit header changes (no intervening blank line and new header line) as seen above -- +you can use `--allow-ragged-csv-input` (or keystroke-saver `--ragged`). + +
+mlr --csv --allow-ragged-csv-input cat data/het/ragged.csv
 
 a,b,c
 1,2,3
-
-a,b
-4,5
-
-a,b,c,4
+4,5,
 7,8,9,10
 
diff --git a/docs/src/record-heterogeneity.md.in b/docs/src/record-heterogeneity.md.in index 1aab9dfaa..677098ee8 100644 --- a/docs/src/record-heterogeneity.md.in +++ b/docs/src/record-heterogeneity.md.in @@ -180,13 +180,12 @@ GENMD-EOF CSV and pretty-print formats expect rectangular structure. But Miller lets you process non-rectangular using CSV and pretty-print. -Miller simply prints a newline and a new header when there is a schema change --- where by _schema_ we mean simply the list of record keys in the order they -are encountered. When there is no schema change, you get CSV per se as a -special case. Likewise, Miller reads heterogeneous CSV or pretty-print input -the same way. The difference between CSV and CSV-lite is that the former is -[RFC-4180-compliant](file-formats.md#csvtsvasvusvetc), while the latter readily -handles heterogeneous data (which is non-compliant). For example: +For CSV-lite and TSV-lite, Miller simply prints a newline and a new header when there is a schema +change -- where by _schema_ we mean simply the list of record keys in the order they are +encountered. When there is no schema change, you get CSV per se as a special case. Likewise, Miller +reads heterogeneous CSV or pretty-print input the same way. The difference between CSV and CSV-lite +is that the former is [RFC-4180-compliant](file-formats.md#csvtsvasvusvetc), while the latter +readily handles heterogeneous data (which is non-compliant). For example: GENMD-RUN-COMMAND cat data/het.json @@ -200,10 +199,20 @@ GENMD-RUN-COMMAND mlr --ijson --opprint group-like data/het.json GENMD-EOF -Miller handles explicit header changes as just shown. If your CSV input contains ragged data -- if there are implicit header changes (no intervening blank line and new header line) as seen above -- you can use `--allow-ragged-csv-input` (or keystroke-saver `--ragged`). +GENMD-RUN-COMMAND +mlr --ijson --ocsvlite group-like data/het.json +GENMD-EOF GENMD-RUN-COMMAND-TOLERATING-ERROR -mlr --csv --ragged cat data/het/ragged.csv +mlr --ijson --ocsv group-like data/het.json +GENMD-EOF + +Miller handles explicit header changes as just shown. If your CSV input contains ragged data -- if +there are implicit header changes (no intervening blank line and new header line) as seen above -- +you can use `--allow-ragged-csv-input` (or keystroke-saver `--ragged`). + +GENMD-RUN-COMMAND +mlr --csv --allow-ragged-csv-input cat data/het/ragged.csv GENMD-EOF ## Processing heterogeneous data diff --git a/pkg/output/channel_writer.go b/pkg/output/channel_writer.go index 6805ad890..e7b0e802e 100644 --- a/pkg/output/channel_writer.go +++ b/pkg/output/channel_writer.go @@ -94,7 +94,11 @@ func channelWriterHandleBatch( } if record != nil { - recordWriter.Write(record, bufferedOutputStream, outputIsStdout) + err := recordWriter.Write(record, bufferedOutputStream, outputIsStdout) + if err != nil { + fmt.Fprintf(os.Stderr, "mlr: %v\n", err) + return true, true + } } outputString := recordAndContext.OutputString @@ -111,8 +115,13 @@ func channelWriterHandleBatch( // queued up. For example, PPRINT needs to see all same-schema // records before printing any, since it needs to compute max width // down columns. - recordWriter.Write(nil, bufferedOutputStream, outputIsStdout) - return true, false + err := recordWriter.Write(nil, bufferedOutputStream, outputIsStdout) + if err != nil { + fmt.Fprintf(os.Stderr, "mlr: %v\n", err) + return true, true + } else { + return true, false + } } } return false, false diff --git a/pkg/output/record_writer.go b/pkg/output/record_writer.go index 37d8a7780..3ce49743d 100644 --- a/pkg/output/record_writer.go +++ b/pkg/output/record_writer.go @@ -20,5 +20,5 @@ type IRecordWriter interface { outrec *mlrval.Mlrmap, bufferedOutputStream *bufio.Writer, outputIsStdout bool, - ) + ) error } diff --git a/pkg/output/record_writer_csv.go b/pkg/output/record_writer_csv.go index 947400275..b71af63d2 100644 --- a/pkg/output/record_writer_csv.go +++ b/pkg/output/record_writer_csv.go @@ -12,15 +12,13 @@ import ( ) type RecordWriterCSV struct { - writerOptions *cli.TWriterOptions - ofs0 byte // Go's CSV library only lets its 'Comma' be a single character - csvWriter *csv.Writer - // For reporting schema changes: we print a newline and the new header - lastJoinedHeader *string - // Only write one blank line for schema changes / blank input lines - justWroteEmptyLine bool - // For double-quote around all fields - quoteAll bool + writerOptions *cli.TWriterOptions + ofs0 byte // Go's CSV library only lets its 'Comma' be a single character + csvWriter *csv.Writer + needToPrintHeader bool + firstRecordKeys []string + firstRecordNF int64 + quoteAll bool // For double-quote around all fields } func NewRecordWriterCSV(writerOptions *cli.TWriterOptions) (*RecordWriterCSV, error) { @@ -30,23 +28,25 @@ func NewRecordWriterCSV(writerOptions *cli.TWriterOptions) (*RecordWriterCSV, er if writerOptions.ORS != "\n" && writerOptions.ORS != "\r\n" { return nil, fmt.Errorf("for CSV, ORS cannot be altered") } - return &RecordWriterCSV{ - writerOptions: writerOptions, - csvWriter: nil, // will be set on first Write() wherein we have the output stream - lastJoinedHeader: nil, - justWroteEmptyLine: false, - quoteAll: writerOptions.CSVQuoteAll, - }, nil + writer := &RecordWriterCSV{ + writerOptions: writerOptions, + csvWriter: nil, // will be set on first Write() wherein we have the output stream + needToPrintHeader: !writerOptions.HeaderlessOutput, + firstRecordKeys: nil, + firstRecordNF: -1, + quoteAll: writerOptions.CSVQuoteAll, + } + return writer, nil } func (writer *RecordWriterCSV) Write( outrec *mlrval.Mlrmap, bufferedOutputStream *bufio.Writer, outputIsStdout bool, -) { +) error { // End of record stream: nothing special for this output format if outrec == nil { - return + return nil } if writer.csvWriter == nil { @@ -54,46 +54,46 @@ func (writer *RecordWriterCSV) Write( writer.csvWriter.Comma = rune(writer.writerOptions.OFS[0]) // xxx temp } - if outrec.IsEmpty() { - if !writer.justWroteEmptyLine { - bufferedOutputStream.WriteString("\n") - } - joinedHeader := "" - writer.lastJoinedHeader = &joinedHeader - writer.justWroteEmptyLine = true - return + if writer.firstRecordKeys == nil { + writer.firstRecordKeys = outrec.GetKeys() + writer.firstRecordNF = int64(len(writer.firstRecordKeys)) } - needToPrintHeader := false - joinedHeader := strings.Join(outrec.GetKeys(), ",") - if writer.lastJoinedHeader == nil || *writer.lastJoinedHeader != joinedHeader { - if writer.lastJoinedHeader != nil { - if !writer.justWroteEmptyLine { - bufferedOutputStream.WriteString("\n") - } - writer.justWroteEmptyLine = true - } - writer.lastJoinedHeader = &joinedHeader - needToPrintHeader = true - } - - if needToPrintHeader && !writer.writerOptions.HeaderlessOutput { + if writer.needToPrintHeader { fields := make([]string, outrec.FieldCount) i := 0 for pe := outrec.Head; pe != nil; pe = pe.Next { fields[i] = pe.Key i++ } - //////writer.csvWriter.Write(fields) writer.WriteCSVRecordMaybeColorized(fields, bufferedOutputStream, outputIsStdout, true, writer.quoteAll) + writer.needToPrintHeader = false } - fields := make([]string, outrec.FieldCount) - i := 0 + var outputNF int64 = outrec.FieldCount + if outputNF < writer.firstRecordNF { + outputNF = writer.firstRecordNF + } + + fields := make([]string, outputNF) + var i int64 = 0 for pe := outrec.Head; pe != nil; pe = pe.Next { + if i < writer.firstRecordNF && pe.Key != writer.firstRecordKeys[i] { + return fmt.Errorf( + "CSV schema change: first keys \"%s\"; current keys \"%s\"", + strings.Join(writer.firstRecordKeys, writer.writerOptions.OFS), + strings.Join(outrec.GetKeys(), writer.writerOptions.OFS), + ) + } fields[i] = pe.Value.String() i++ } + + for ; i < outputNF; i++ { + fields[i] = "" + } + writer.WriteCSVRecordMaybeColorized(fields, bufferedOutputStream, outputIsStdout, false, writer.quoteAll) - writer.justWroteEmptyLine = false + + return nil } diff --git a/pkg/output/record_writer_csvlite.go b/pkg/output/record_writer_csvlite.go index ced670c13..c59556b30 100644 --- a/pkg/output/record_writer_csvlite.go +++ b/pkg/output/record_writer_csvlite.go @@ -29,10 +29,10 @@ func (writer *RecordWriterCSVLite) Write( outrec *mlrval.Mlrmap, bufferedOutputStream *bufio.Writer, outputIsStdout bool, -) { +) error { // End of record stream: nothing special for this output format if outrec == nil { - return + return nil } if outrec.IsEmpty() { @@ -42,7 +42,7 @@ func (writer *RecordWriterCSVLite) Write( joinedHeader := "" writer.lastJoinedHeader = &joinedHeader writer.justWroteEmptyLine = true - return + return nil } needToPrintHeader := false @@ -79,4 +79,6 @@ func (writer *RecordWriterCSVLite) Write( bufferedOutputStream.WriteString(writer.writerOptions.ORS) writer.justWroteEmptyLine = false + + return nil } diff --git a/pkg/output/record_writer_dkvp.go b/pkg/output/record_writer_dkvp.go index bc60868ca..d27420ede 100644 --- a/pkg/output/record_writer_dkvp.go +++ b/pkg/output/record_writer_dkvp.go @@ -22,15 +22,15 @@ func (writer *RecordWriterDKVP) Write( outrec *mlrval.Mlrmap, bufferedOutputStream *bufio.Writer, outputIsStdout bool, -) { +) error { // End of record stream: nothing special for this output format if outrec == nil { - return + return nil } if outrec.IsEmpty() { bufferedOutputStream.WriteString(writer.writerOptions.ORS) - return + return nil } for pe := outrec.Head; pe != nil; pe = pe.Next { @@ -42,4 +42,6 @@ func (writer *RecordWriterDKVP) Write( } } bufferedOutputStream.WriteString(writer.writerOptions.ORS) + + return nil } diff --git a/pkg/output/record_writer_json.go b/pkg/output/record_writer_json.go index 578e9f8ba..e832f169e 100644 --- a/pkg/output/record_writer_json.go +++ b/pkg/output/record_writer_json.go @@ -39,7 +39,7 @@ func (writer *RecordWriterJSON) Write( outrec *mlrval.Mlrmap, bufferedOutputStream *bufio.Writer, outputIsStdout bool, -) { +) error { if outrec != nil && writer.jvQuoteAll { outrec.StringifyValuesRecursively() } @@ -49,6 +49,7 @@ func (writer *RecordWriterJSON) Write( } else { writer.writeWithoutListWrap(outrec, bufferedOutputStream, outputIsStdout) } + return nil } // ---------------------------------------------------------------- diff --git a/pkg/output/record_writer_markdown.go b/pkg/output/record_writer_markdown.go index 2688c2962..6c2983a59 100644 --- a/pkg/output/record_writer_markdown.go +++ b/pkg/output/record_writer_markdown.go @@ -31,9 +31,9 @@ func (writer *RecordWriterMarkdown) Write( outrec *mlrval.Mlrmap, bufferedOutputStream *bufio.Writer, outputIsStdout bool, -) { +) error { if outrec == nil { // end of record stream - return + return nil } currentJoinedHeader := outrec.GetKeysJoined() @@ -73,4 +73,6 @@ func (writer *RecordWriterMarkdown) Write( bufferedOutputStream.WriteString(" |") } bufferedOutputStream.WriteString(writer.writerOptions.ORS) + + return nil } diff --git a/pkg/output/record_writer_nidx.go b/pkg/output/record_writer_nidx.go index d3babd35a..551fe47aa 100644 --- a/pkg/output/record_writer_nidx.go +++ b/pkg/output/record_writer_nidx.go @@ -21,10 +21,10 @@ func (writer *RecordWriterNIDX) Write( outrec *mlrval.Mlrmap, bufferedOutputStream *bufio.Writer, outputIsStdout bool, -) { +) error { // End of record stream: nothing special for this output format if outrec == nil { - return + return nil } for pe := outrec.Head; pe != nil; pe = pe.Next { @@ -34,4 +34,6 @@ func (writer *RecordWriterNIDX) Write( } } bufferedOutputStream.WriteString(writer.writerOptions.ORS) + + return nil } diff --git a/pkg/output/record_writer_pprint.go b/pkg/output/record_writer_pprint.go index 2fd4aaa70..b9f48cd93 100644 --- a/pkg/output/record_writer_pprint.go +++ b/pkg/output/record_writer_pprint.go @@ -37,7 +37,7 @@ func (writer *RecordWriterPPRINT) Write( outrec *mlrval.Mlrmap, bufferedOutputStream *bufio.Writer, outputIsStdout bool, -) { +) error { // Group records by have-same-schema or not. Pretty-print each // homoegeneous sublist, or "batch". // @@ -83,6 +83,8 @@ func (writer *RecordWriterPPRINT) Write( bufferedOutputStream, outputIsStdout) } } + + return nil } // ---------------------------------------------------------------- diff --git a/pkg/output/record_writer_tsv.go b/pkg/output/record_writer_tsv.go index 48db403d8..2a79793b2 100644 --- a/pkg/output/record_writer_tsv.go +++ b/pkg/output/record_writer_tsv.go @@ -12,11 +12,10 @@ import ( ) type RecordWriterTSV struct { - writerOptions *cli.TWriterOptions - // For reporting schema changes: we print a newline and the new header - lastJoinedHeader *string - // Only write one blank line for schema changes / blank input lines - justWroteEmptyLine bool + writerOptions *cli.TWriterOptions + needToPrintHeader bool + firstRecordKeys []string + firstRecordNF int64 } func NewRecordWriterTSV(writerOptions *cli.TWriterOptions) (*RecordWriterTSV, error) { @@ -27,9 +26,10 @@ func NewRecordWriterTSV(writerOptions *cli.TWriterOptions) (*RecordWriterTSV, er return nil, fmt.Errorf("for CSV, ORS cannot be altered") } return &RecordWriterTSV{ - writerOptions: writerOptions, - lastJoinedHeader: nil, - justWroteEmptyLine: false, + writerOptions: writerOptions, + needToPrintHeader: !writerOptions.HeaderlessOutput, + firstRecordKeys: nil, + firstRecordNF: -1, }, nil } @@ -37,42 +37,28 @@ func (writer *RecordWriterTSV) Write( outrec *mlrval.Mlrmap, bufferedOutputStream *bufio.Writer, outputIsStdout bool, -) { +) error { // End of record stream: nothing special for this output format if outrec == nil { - return + return nil } - if outrec.IsEmpty() { - if !writer.justWroteEmptyLine { - bufferedOutputStream.WriteString(writer.writerOptions.ORS) + if writer.firstRecordKeys == nil { + writer.firstRecordKeys = outrec.GetKeys() + writer.firstRecordNF = int64(len(writer.firstRecordKeys)) + } + + if writer.needToPrintHeader { + fields := make([]string, outrec.FieldCount) + i := 0 + for pe := outrec.Head; pe != nil; pe = pe.Next { + fields[i] = pe.Key + i++ } - joinedHeader := "" - writer.lastJoinedHeader = &joinedHeader - writer.justWroteEmptyLine = true - return - } - - needToPrintHeader := false - joinedHeader := strings.Join(outrec.GetKeys(), ",") - if writer.lastJoinedHeader == nil || *writer.lastJoinedHeader != joinedHeader { - if writer.lastJoinedHeader != nil { - if !writer.justWroteEmptyLine { - bufferedOutputStream.WriteString(writer.writerOptions.ORS) - } - writer.justWroteEmptyLine = true - } - writer.lastJoinedHeader = &joinedHeader - needToPrintHeader = true - } - - if needToPrintHeader && !writer.writerOptions.HeaderlessOutput { for pe := outrec.Head; pe != nil; pe = pe.Next { bufferedOutputStream.WriteString( colorizer.MaybeColorizeKey( - lib.TSVEncodeField( - pe.Key, - ), + lib.TSVEncodeField(pe.Key), outputIsStdout, ), ) @@ -83,22 +69,44 @@ func (writer *RecordWriterTSV) Write( } bufferedOutputStream.WriteString(writer.writerOptions.ORS) + + writer.needToPrintHeader = false } + var outputNF int64 = outrec.FieldCount + if outputNF < writer.firstRecordNF { + outputNF = writer.firstRecordNF + } + + fields := make([]string, outputNF) + var i int64 = 0 for pe := outrec.Head; pe != nil; pe = pe.Next { - bufferedOutputStream.WriteString( - colorizer.MaybeColorizeValue( - lib.TSVEncodeField( - pe.Value.String(), - ), - outputIsStdout, - ), + if i < writer.firstRecordNF && pe.Key != writer.firstRecordKeys[i] { + return fmt.Errorf( + "TSV schema change: first keys \"%s\"; current keys \"%s\"", + strings.Join(writer.firstRecordKeys, writer.writerOptions.OFS), + strings.Join(outrec.GetKeys(), writer.writerOptions.OFS), + ) + } + fields[i] = colorizer.MaybeColorizeValue( + lib.TSVEncodeField(pe.Value.String()), + outputIsStdout, ) - if pe.Next != nil { + i++ + } + + for ; i < outputNF; i++ { + fields[i] = "" + } + + for j, field := range fields { + if j > 0 { bufferedOutputStream.WriteString(writer.writerOptions.OFS) } + bufferedOutputStream.WriteString(field) } + bufferedOutputStream.WriteString(writer.writerOptions.ORS) - writer.justWroteEmptyLine = false + return nil } diff --git a/pkg/output/record_writer_xtab.go b/pkg/output/record_writer_xtab.go index 9093935e9..27f3b1bcb 100644 --- a/pkg/output/record_writer_xtab.go +++ b/pkg/output/record_writer_xtab.go @@ -45,10 +45,10 @@ func (writer *RecordWriterXTAB) Write( outrec *mlrval.Mlrmap, bufferedOutputStream *bufio.Writer, outputIsStdout bool, -) { +) error { // End of record stream: nothing special for this output format if outrec == nil { - return + return nil } maxKeyLength := 1 @@ -64,6 +64,8 @@ func (writer *RecordWriterXTAB) Write( } else { writer.writeWithLeftAlignedValues(outrec, bufferedOutputStream, outputIsStdout, maxKeyLength) } + + return nil } func (writer *RecordWriterXTAB) writeWithLeftAlignedValues( diff --git a/test/cases/io-csv-auto-unsparsify/at/cmd b/test/cases/io-csv-auto-unsparsify/at/cmd new file mode 100644 index 000000000..64a5e8c77 --- /dev/null +++ b/test/cases/io-csv-auto-unsparsify/at/cmd @@ -0,0 +1 @@ +mlr -i json -o csv cat ${CASEDIR}/input.json diff --git a/test/cases/io-csv-auto-unsparsify/at/experr b/test/cases/io-csv-auto-unsparsify/at/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-csv-auto-unsparsify/at/expout b/test/cases/io-csv-auto-unsparsify/at/expout new file mode 100644 index 000000000..29e4b3171 --- /dev/null +++ b/test/cases/io-csv-auto-unsparsify/at/expout @@ -0,0 +1,4 @@ +a,b,c +1,2,3 +4,5,6 +7,8,9 diff --git a/test/cases/io-csv-auto-unsparsify/at/input.json b/test/cases/io-csv-auto-unsparsify/at/input.json new file mode 100644 index 000000000..832be9c9e --- /dev/null +++ b/test/cases/io-csv-auto-unsparsify/at/input.json @@ -0,0 +1,17 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": 6 +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/io-csv-auto-unsparsify/key-change/cmd b/test/cases/io-csv-auto-unsparsify/key-change/cmd new file mode 100644 index 000000000..64a5e8c77 --- /dev/null +++ b/test/cases/io-csv-auto-unsparsify/key-change/cmd @@ -0,0 +1 @@ +mlr -i json -o csv cat ${CASEDIR}/input.json diff --git a/test/cases/io-csv-auto-unsparsify/key-change/experr b/test/cases/io-csv-auto-unsparsify/key-change/experr new file mode 100644 index 000000000..699fbb70f --- /dev/null +++ b/test/cases/io-csv-auto-unsparsify/key-change/experr @@ -0,0 +1,2 @@ +mlr: CSV schema change: first keys "a,b,c"; current keys "a,X,c" +mlr: exiting due to data error. diff --git a/test/cases/io-csv-auto-unsparsify/key-change/expout b/test/cases/io-csv-auto-unsparsify/key-change/expout new file mode 100644 index 000000000..88700c714 --- /dev/null +++ b/test/cases/io-csv-auto-unsparsify/key-change/expout @@ -0,0 +1,3 @@ +a,b,c +1,2,3 +4,5,6 diff --git a/test/cases/io-csv-auto-unsparsify/key-change/input.json b/test/cases/io-csv-auto-unsparsify/key-change/input.json new file mode 100644 index 000000000..841abab57 --- /dev/null +++ b/test/cases/io-csv-auto-unsparsify/key-change/input.json @@ -0,0 +1,17 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": 6 +}, +{ + "a": 7, + "X": 8, + "c": 9 +} +] diff --git a/test/cases/io-csv-auto-unsparsify/key-change/should-fail b/test/cases/io-csv-auto-unsparsify/key-change/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-csv-auto-unsparsify/over/cmd b/test/cases/io-csv-auto-unsparsify/over/cmd new file mode 100644 index 000000000..64a5e8c77 --- /dev/null +++ b/test/cases/io-csv-auto-unsparsify/over/cmd @@ -0,0 +1 @@ +mlr -i json -o csv cat ${CASEDIR}/input.json diff --git a/test/cases/io-csv-auto-unsparsify/over/experr b/test/cases/io-csv-auto-unsparsify/over/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-csv-auto-unsparsify/over/expout b/test/cases/io-csv-auto-unsparsify/over/expout new file mode 100644 index 000000000..44ad0219a --- /dev/null +++ b/test/cases/io-csv-auto-unsparsify/over/expout @@ -0,0 +1,4 @@ +a,b,c +1,2,3 +4,5,6,7 +7,8,9 diff --git a/test/cases/io-csv-auto-unsparsify/over/input.json b/test/cases/io-csv-auto-unsparsify/over/input.json new file mode 100644 index 000000000..38b47c2f0 --- /dev/null +++ b/test/cases/io-csv-auto-unsparsify/over/input.json @@ -0,0 +1,18 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": 6, + "d": 7 +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/io-csv-auto-unsparsify/under/cmd b/test/cases/io-csv-auto-unsparsify/under/cmd new file mode 100644 index 000000000..64a5e8c77 --- /dev/null +++ b/test/cases/io-csv-auto-unsparsify/under/cmd @@ -0,0 +1 @@ +mlr -i json -o csv cat ${CASEDIR}/input.json diff --git a/test/cases/io-csv-auto-unsparsify/under/experr b/test/cases/io-csv-auto-unsparsify/under/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-csv-auto-unsparsify/under/expout b/test/cases/io-csv-auto-unsparsify/under/expout new file mode 100644 index 000000000..48f0b0017 --- /dev/null +++ b/test/cases/io-csv-auto-unsparsify/under/expout @@ -0,0 +1,4 @@ +a,b,c +1,2,3 +4,5, +7,8,9 diff --git a/test/cases/io-csv-auto-unsparsify/under/input.json b/test/cases/io-csv-auto-unsparsify/under/input.json new file mode 100644 index 000000000..e90f7439a --- /dev/null +++ b/test/cases/io-csv-auto-unsparsify/under/input.json @@ -0,0 +1,16 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5 +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/io-multi/0010/experr b/test/cases/io-multi/0010/experr index e69de29bb..15e296abb 100644 --- a/test/cases/io-multi/0010/experr +++ b/test/cases/io-multi/0010/experr @@ -0,0 +1,2 @@ +mlr: CSV schema change: first keys "host"; current keys "df/tmp,uptime" +mlr: exiting due to data error. diff --git a/test/cases/io-multi/0010/expout b/test/cases/io-multi/0010/expout index 0d20e38d9..57d47ff76 100644 --- a/test/cases/io-multi/0010/expout +++ b/test/cases/io-multi/0010/expout @@ -1,35 +1,2 @@ host jupiter - -df/tmp,uptime -2.43MB,32345sec - -host -saturn - -df/tmp,uptime -1.34MB,234214132sec - -host -mars - -df/tmp,uptime -4.97MB,345089805sec - -host -jupiter - -df/tmp,uptime -0.04MB,890sec - -host -mars - -df/tmp,uptime -8.55MB,787897777sec - -host -saturn - -df/tmp,uptime -9.47MB,234289080sec diff --git a/test/cases/io-multi/0010/should-fail b/test/cases/io-multi/0010/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-multi/0033/experr b/test/cases/io-multi/0033/experr index e69de29bb..15e296abb 100644 --- a/test/cases/io-multi/0033/experr +++ b/test/cases/io-multi/0033/experr @@ -0,0 +1,2 @@ +mlr: CSV schema change: first keys "host"; current keys "df/tmp,uptime" +mlr: exiting due to data error. diff --git a/test/cases/io-multi/0033/expout b/test/cases/io-multi/0033/expout index 0d20e38d9..57d47ff76 100644 --- a/test/cases/io-multi/0033/expout +++ b/test/cases/io-multi/0033/expout @@ -1,35 +1,2 @@ host jupiter - -df/tmp,uptime -2.43MB,32345sec - -host -saturn - -df/tmp,uptime -1.34MB,234214132sec - -host -mars - -df/tmp,uptime -4.97MB,345089805sec - -host -jupiter - -df/tmp,uptime -0.04MB,890sec - -host -mars - -df/tmp,uptime -8.55MB,787897777sec - -host -saturn - -df/tmp,uptime -9.47MB,234289080sec diff --git a/test/cases/io-multi/0033/should-fail b/test/cases/io-multi/0033/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-multi/0034/experr b/test/cases/io-multi/0034/experr index e69de29bb..15e296abb 100644 --- a/test/cases/io-multi/0034/experr +++ b/test/cases/io-multi/0034/experr @@ -0,0 +1,2 @@ +mlr: CSV schema change: first keys "host"; current keys "df/tmp,uptime" +mlr: exiting due to data error. diff --git a/test/cases/io-multi/0034/expout b/test/cases/io-multi/0034/expout index 2a14e7a0b..9ad9ee391 100644 --- a/test/cases/io-multi/0034/expout +++ b/test/cases/io-multi/0034/expout @@ -1,23 +1 @@ jupiter - -2.43MB,32345sec - -saturn - -1.34MB,234214132sec - -mars - -4.97MB,345089805sec - -jupiter - -0.04MB,890sec - -mars - -8.55MB,787897777sec - -saturn - -9.47MB,234289080sec diff --git a/test/cases/io-multi/0034/should-fail b/test/cases/io-multi/0034/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-tsv-auto-unsparsify/at/cmd b/test/cases/io-tsv-auto-unsparsify/at/cmd new file mode 100644 index 000000000..818cba82b --- /dev/null +++ b/test/cases/io-tsv-auto-unsparsify/at/cmd @@ -0,0 +1 @@ +mlr -i json -o tsv cat ${CASEDIR}/input.json diff --git a/test/cases/io-tsv-auto-unsparsify/at/experr b/test/cases/io-tsv-auto-unsparsify/at/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-tsv-auto-unsparsify/at/expout b/test/cases/io-tsv-auto-unsparsify/at/expout new file mode 100644 index 000000000..c0232182d --- /dev/null +++ b/test/cases/io-tsv-auto-unsparsify/at/expout @@ -0,0 +1,4 @@ +a b c +1 2 3 +4 5 6 +7 8 9 diff --git a/test/cases/io-tsv-auto-unsparsify/at/input.json b/test/cases/io-tsv-auto-unsparsify/at/input.json new file mode 100644 index 000000000..832be9c9e --- /dev/null +++ b/test/cases/io-tsv-auto-unsparsify/at/input.json @@ -0,0 +1,17 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": 6 +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/io-tsv-auto-unsparsify/key-change/cmd b/test/cases/io-tsv-auto-unsparsify/key-change/cmd new file mode 100644 index 000000000..818cba82b --- /dev/null +++ b/test/cases/io-tsv-auto-unsparsify/key-change/cmd @@ -0,0 +1 @@ +mlr -i json -o tsv cat ${CASEDIR}/input.json diff --git a/test/cases/io-tsv-auto-unsparsify/key-change/experr b/test/cases/io-tsv-auto-unsparsify/key-change/experr new file mode 100644 index 000000000..ce615563a --- /dev/null +++ b/test/cases/io-tsv-auto-unsparsify/key-change/experr @@ -0,0 +1,2 @@ +mlr: TSV schema change: first keys "a b c"; current keys "a X c" +mlr: exiting due to data error. diff --git a/test/cases/io-tsv-auto-unsparsify/key-change/expout b/test/cases/io-tsv-auto-unsparsify/key-change/expout new file mode 100644 index 000000000..c96a25f19 --- /dev/null +++ b/test/cases/io-tsv-auto-unsparsify/key-change/expout @@ -0,0 +1,3 @@ +a b c +1 2 3 +4 5 6 diff --git a/test/cases/io-tsv-auto-unsparsify/key-change/input.json b/test/cases/io-tsv-auto-unsparsify/key-change/input.json new file mode 100644 index 000000000..841abab57 --- /dev/null +++ b/test/cases/io-tsv-auto-unsparsify/key-change/input.json @@ -0,0 +1,17 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": 6 +}, +{ + "a": 7, + "X": 8, + "c": 9 +} +] diff --git a/test/cases/io-tsv-auto-unsparsify/key-change/should-fail b/test/cases/io-tsv-auto-unsparsify/key-change/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-tsv-auto-unsparsify/over/cmd b/test/cases/io-tsv-auto-unsparsify/over/cmd new file mode 100644 index 000000000..818cba82b --- /dev/null +++ b/test/cases/io-tsv-auto-unsparsify/over/cmd @@ -0,0 +1 @@ +mlr -i json -o tsv cat ${CASEDIR}/input.json diff --git a/test/cases/io-tsv-auto-unsparsify/over/experr b/test/cases/io-tsv-auto-unsparsify/over/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-tsv-auto-unsparsify/over/expout b/test/cases/io-tsv-auto-unsparsify/over/expout new file mode 100644 index 000000000..0a61a2406 --- /dev/null +++ b/test/cases/io-tsv-auto-unsparsify/over/expout @@ -0,0 +1,4 @@ +a b c +1 2 3 +4 5 6 7 +7 8 9 diff --git a/test/cases/io-tsv-auto-unsparsify/over/input.json b/test/cases/io-tsv-auto-unsparsify/over/input.json new file mode 100644 index 000000000..38b47c2f0 --- /dev/null +++ b/test/cases/io-tsv-auto-unsparsify/over/input.json @@ -0,0 +1,18 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": 6, + "d": 7 +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/io-tsv-auto-unsparsify/under/cmd b/test/cases/io-tsv-auto-unsparsify/under/cmd new file mode 100644 index 000000000..818cba82b --- /dev/null +++ b/test/cases/io-tsv-auto-unsparsify/under/cmd @@ -0,0 +1 @@ +mlr -i json -o tsv cat ${CASEDIR}/input.json diff --git a/test/cases/io-tsv-auto-unsparsify/under/experr b/test/cases/io-tsv-auto-unsparsify/under/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-tsv-auto-unsparsify/under/expout b/test/cases/io-tsv-auto-unsparsify/under/expout new file mode 100644 index 000000000..7b24f5bdb --- /dev/null +++ b/test/cases/io-tsv-auto-unsparsify/under/expout @@ -0,0 +1,4 @@ +a b c +1 2 3 +4 5 +7 8 9 diff --git a/test/cases/io-tsv-auto-unsparsify/under/input.json b/test/cases/io-tsv-auto-unsparsify/under/input.json new file mode 100644 index 000000000..e90f7439a --- /dev/null +++ b/test/cases/io-tsv-auto-unsparsify/under/input.json @@ -0,0 +1,16 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5 +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] From 81d11365a0a1a859e72ae318df26aa0e9842cdd0 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 21 Jan 2024 15:17:33 -0500 Subject: [PATCH 118/456] `mlr reorder` with regex support [WIP] (#1473) * mlr reorder with regex support for field-name selection * neaten * -r -b/-a; unit-test cases --- pkg/transformers/aaa_record_transformer.go | 6 + pkg/transformers/reorder.go | 363 ++++++++++++-------- test/cases/verb-reorder/regex-after/cmd | 1 + test/cases/verb-reorder/regex-after/experr | 0 test/cases/verb-reorder/regex-after/expout | 10 + test/cases/verb-reorder/regex-before/cmd | 1 + test/cases/verb-reorder/regex-before/experr | 0 test/cases/verb-reorder/regex-before/expout | 10 + test/cases/verb-reorder/regex-end/cmd | 1 + test/cases/verb-reorder/regex-end/experr | 0 test/cases/verb-reorder/regex-end/expout | 10 + test/cases/verb-reorder/regex-start/cmd | 1 + test/cases/verb-reorder/regex-start/experr | 0 test/cases/verb-reorder/regex-start/expout | 10 + test/input/reorder-regex.nidx | 1 + 15 files changed, 269 insertions(+), 145 deletions(-) create mode 100644 test/cases/verb-reorder/regex-after/cmd create mode 100644 test/cases/verb-reorder/regex-after/experr create mode 100644 test/cases/verb-reorder/regex-after/expout create mode 100644 test/cases/verb-reorder/regex-before/cmd create mode 100644 test/cases/verb-reorder/regex-before/experr create mode 100644 test/cases/verb-reorder/regex-before/expout create mode 100644 test/cases/verb-reorder/regex-end/cmd create mode 100644 test/cases/verb-reorder/regex-end/experr create mode 100644 test/cases/verb-reorder/regex-end/expout create mode 100644 test/cases/verb-reorder/regex-start/cmd create mode 100644 test/cases/verb-reorder/regex-start/experr create mode 100644 test/cases/verb-reorder/regex-start/expout create mode 100644 test/input/reorder-regex.nidx diff --git a/pkg/transformers/aaa_record_transformer.go b/pkg/transformers/aaa_record_transformer.go index 1f9bae7dd..1be4fc917 100644 --- a/pkg/transformers/aaa_record_transformer.go +++ b/pkg/transformers/aaa_record_transformer.go @@ -27,6 +27,12 @@ type RecordTransformerFunc func( outputDownstreamDoneChannel chan<- bool, ) +// Used within some verbs +type RecordTransformerHelperFunc func( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext +) + type TransformerUsageFunc func( ostream *os.File, ) diff --git a/pkg/transformers/reorder.go b/pkg/transformers/reorder.go index 216dd714d..141b55c19 100644 --- a/pkg/transformers/reorder.go +++ b/pkg/transformers/reorder.go @@ -4,6 +4,7 @@ import ( "container/list" "fmt" "os" + "regexp" "strings" "github.com/johnkerl/miller/pkg/cli" @@ -61,9 +62,9 @@ func transformerReorderParseCLI( argi++ var fieldNames []string = nil - putAtEnd := false - beforeFieldName := "" - afterFieldName := "" + doRegexes := false + putAfter := false + centerFieldName := "" for argi < argc /* variable increment: 1 or 2 depending on flag */ { opt := args[argi] @@ -81,21 +82,23 @@ func transformerReorderParseCLI( } else if opt == "-f" { fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + doRegexes = false + + } else if opt == "-r" { + fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + doRegexes = true } else if opt == "-b" { - beforeFieldName = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) - afterFieldName = "" - putAtEnd = false + centerFieldName = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) + putAfter = false } else if opt == "-a" { - afterFieldName = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) - beforeFieldName = "" - putAtEnd = false + centerFieldName = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) + putAfter = true } else if opt == "-e" { - putAtEnd = true - beforeFieldName = "" - afterFieldName = "" + putAfter = true + centerFieldName = "" } else { transformerReorderUsage(os.Stderr) @@ -115,9 +118,9 @@ func transformerReorderParseCLI( transformer, err := NewTransformerReorder( fieldNames, - putAtEnd, - beforeFieldName, - afterFieldName, + doRegexes, + putAfter, + centerFieldName, ) if err != nil { fmt.Fprintln(os.Stderr, err) @@ -132,43 +135,71 @@ type TransformerReorder struct { // input fieldNames []string fieldNamesSet map[string]bool - beforeFieldName string - afterFieldName string + regexes []*regexp.Regexp + centerFieldName string + putAfter bool // state - recordTransformerFunc RecordTransformerFunc + recordTransformerFunc RecordTransformerHelperFunc } func NewTransformerReorder( fieldNames []string, - putAtEnd bool, - beforeFieldName string, - afterFieldName string, + doRegexes bool, + putAfter bool, + centerFieldName string, ) (*TransformerReorder, error) { tr := &TransformerReorder{ fieldNames: fieldNames, fieldNamesSet: lib.StringListToSet(fieldNames), - beforeFieldName: beforeFieldName, - afterFieldName: afterFieldName, + centerFieldName: centerFieldName, + putAfter: putAfter, } - if putAtEnd { - tr.recordTransformerFunc = tr.reorderToEnd - } else if beforeFieldName != "" { - tr.recordTransformerFunc = tr.reorderBefore - } else if afterFieldName != "" { - tr.recordTransformerFunc = tr.reorderAfter + if centerFieldName == "" { + if putAfter { + if doRegexes { + tr.recordTransformerFunc = tr.reorderToEndWithRegex + } else { + tr.recordTransformerFunc = tr.reorderToEndNoRegex + } + } else { + if doRegexes { + tr.recordTransformerFunc = tr.reorderToStartWithRegex + } else { + tr.recordTransformerFunc = tr.reorderToStartNoRegex + lib.ReverseStringList(tr.fieldNames) + } + } } else { - tr.recordTransformerFunc = tr.reorderToStart - lib.ReverseStringList(tr.fieldNames) + if doRegexes { + tr.recordTransformerFunc = tr.reorderBeforeOrAfterWithRegex + } else { + tr.recordTransformerFunc = tr.reorderBeforeOrAfterNoRegex + } + } + + if doRegexes { + tr.regexes = make([]*regexp.Regexp, len(fieldNames)) + for i, regexString := range fieldNames { + // Handles "a.*b"i Miller case-insensitive-regex specification + regex, err := lib.CompileMillerRegex(regexString) + if err != nil { + fmt.Fprintf( + os.Stderr, + "%s %s: cannot compile regex [%s]\n", + "mlr", verbNameCut, regexString, + ) + os.Exit(1) + } + tr.regexes[i] = regex + } } return tr, nil } -// ---------------------------------------------------------------- - func (tr *TransformerReorder) Transform( inrecAndContext *types.RecordAndContext, outputRecordsAndContexts *list.List, // list of *types.RecordAndContext @@ -176,156 +207,198 @@ func (tr *TransformerReorder) Transform( outputDownstreamDoneChannel chan<- bool, ) { HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) - tr.recordTransformerFunc(inrecAndContext, outputRecordsAndContexts, inputDownstreamDoneChannel, outputDownstreamDoneChannel) -} - -// ---------------------------------------------------------------- -func (tr *TransformerReorder) reorderToStart( - inrecAndContext *types.RecordAndContext, - outputRecordsAndContexts *list.List, // list of *types.RecordAndContext - inputDownstreamDoneChannel <-chan bool, - outputDownstreamDoneChannel chan<- bool, -) { if !inrecAndContext.EndOfStream { - inrec := inrecAndContext.Record - for _, fieldName := range tr.fieldNames { - inrec.MoveToHead(fieldName) - } - outputRecordsAndContexts.PushBack(inrecAndContext) - + tr.recordTransformerFunc( + inrecAndContext, + outputRecordsAndContexts, + ) } else { outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker } } -// ---------------------------------------------------------------- -func (tr *TransformerReorder) reorderToEnd( +func (tr *TransformerReorder) reorderToStartNoRegex( inrecAndContext *types.RecordAndContext, outputRecordsAndContexts *list.List, // list of *types.RecordAndContext - inputDownstreamDoneChannel <-chan bool, - outputDownstreamDoneChannel chan<- bool, ) { - if !inrecAndContext.EndOfStream { - inrec := inrecAndContext.Record - for _, fieldName := range tr.fieldNames { - inrec.MoveToTail(fieldName) - } - outputRecordsAndContexts.PushBack(inrecAndContext) - } else { - outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker + inrec := inrecAndContext.Record + for _, fieldName := range tr.fieldNames { + inrec.MoveToHead(fieldName) } + outputRecordsAndContexts.PushBack(inrecAndContext) } -// ---------------------------------------------------------------- -func (tr *TransformerReorder) reorderBefore( +func (tr *TransformerReorder) reorderToStartWithRegex( inrecAndContext *types.RecordAndContext, outputRecordsAndContexts *list.List, // list of *types.RecordAndContext - inputDownstreamDoneChannel <-chan bool, - outputDownstreamDoneChannel chan<- bool, ) { - if !inrecAndContext.EndOfStream { - inrec := inrecAndContext.Record - if inrec.Get(tr.beforeFieldName) == nil { - outputRecordsAndContexts.PushBack(inrecAndContext) - return - } + inrec := inrecAndContext.Record - outrec := mlrval.NewMlrmapAsRecord() - pe := inrec.Head - - // * inrec will be GC'ed - // * We will use outrec.PutReference not output.PutCopy since inrec will be GC'ed - - for ; pe != nil; pe = pe.Next { - if pe.Key == tr.beforeFieldName { + outrec := mlrval.NewMlrmapAsRecord() + atEnds := list.New() + for pe := inrec.Head; pe != nil; pe = pe.Next { + found := false + for _, regex := range tr.regexes { + if regex.MatchString(pe.Key) { + outrec.PutReference(pe.Key, pe.Value) + found = true break } - if !tr.fieldNamesSet[pe.Key] { - outrec.PutReference(pe.Key, pe.Value) + } + if !found { + atEnds.PushBack(pe) + } + } + + for atEnd := atEnds.Front(); atEnd != nil; atEnd = atEnd.Next() { + // Ownership transfer; no copy needed + pe := atEnd.Value.(*mlrval.MlrmapEntry) + outrec.PutReference(pe.Key, pe.Value) + } + + outrecAndContext := types.NewRecordAndContext(outrec, &inrecAndContext.Context) + outputRecordsAndContexts.PushBack(outrecAndContext) +} + +func (tr *TransformerReorder) reorderToEndNoRegex( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext +) { + inrec := inrecAndContext.Record + for _, fieldName := range tr.fieldNames { + inrec.MoveToTail(fieldName) + } + outputRecordsAndContexts.PushBack(inrecAndContext) + +} + +func (tr *TransformerReorder) reorderToEndWithRegex( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext +) { + inrec := inrecAndContext.Record + outrec := mlrval.NewMlrmapAsRecord() + atEnds := list.New() + for pe := inrec.Head; pe != nil; pe = pe.Next { + found := false + for _, regex := range tr.regexes { + if regex.MatchString(pe.Key) { + atEnds.PushBack(pe) + found = true + break } } + if !found { + outrec.PutReference(pe.Key, pe.Value) + } + } + for atEnd := atEnds.Front(); atEnd != nil; atEnd = atEnd.Next() { + // Ownership transfer; no copy needed + pe := atEnd.Value.(*mlrval.MlrmapEntry) + outrec.PutReference(pe.Key, pe.Value) + } + + outrecAndContext := types.NewRecordAndContext(outrec, &inrecAndContext.Context) + outputRecordsAndContexts.PushBack(outrecAndContext) +} + +func (tr *TransformerReorder) reorderBeforeOrAfterNoRegex( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext +) { + inrec := inrecAndContext.Record + if inrec.Get(tr.centerFieldName) == nil { + outputRecordsAndContexts.PushBack(inrecAndContext) + return + } + + outrec := mlrval.NewMlrmapAsRecord() + pe := inrec.Head + + // We use outrec.PutReference not output.PutCopy since inrec will be GC'ed + + for ; pe != nil; pe = pe.Next { + if pe.Key == tr.centerFieldName { + break + } + if !tr.fieldNamesSet[pe.Key] { + outrec.PutReference(pe.Key, pe.Value) + } + } + + if !tr.putAfter { for _, fieldName := range tr.fieldNames { value := inrec.Get(fieldName) if value != nil { outrec.PutReference(fieldName, value) } } - - value := inrec.Get(tr.beforeFieldName) - if value != nil { - outrec.PutReference(tr.beforeFieldName, value) - } - - for ; pe != nil; pe = pe.Next { - if pe.Key != tr.beforeFieldName && !tr.fieldNamesSet[pe.Key] { - outrec.PutReference(pe.Key, pe.Value) - } - } - - for _, fieldName := range tr.fieldNames { - inrec.MoveToHead(fieldName) - } - outputRecordsAndContexts.PushBack(types.NewRecordAndContext(outrec, &inrecAndContext.Context)) - - } else { - outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker } -} -// ---------------------------------------------------------------- -func (tr *TransformerReorder) reorderAfter( - inrecAndContext *types.RecordAndContext, - outputRecordsAndContexts *list.List, // list of *types.RecordAndContext - inputDownstreamDoneChannel <-chan bool, - outputDownstreamDoneChannel chan<- bool, -) { - if !inrecAndContext.EndOfStream { - inrec := inrecAndContext.Record - if inrec.Get(tr.afterFieldName) == nil { - outputRecordsAndContexts.PushBack(inrecAndContext) - return - } - - outrec := mlrval.NewMlrmapAsRecord() - pe := inrec.Head - - // * inrec will be GC'ed - // * We will use outrec.PutReference not output.PutCopy since inrec will be GC'ed - - for ; pe != nil; pe = pe.Next { - if pe.Key == tr.afterFieldName { - break - } - if !tr.fieldNamesSet[pe.Key] { - outrec.PutReference(pe.Key, pe.Value) - } - } - - value := inrec.Get(tr.afterFieldName) - if value != nil { - outrec.PutReference(tr.afterFieldName, value) - } + value := inrec.Get(tr.centerFieldName) + if value != nil { + outrec.PutReference(tr.centerFieldName, value) + } + if tr.putAfter { for _, fieldName := range tr.fieldNames { value := inrec.Get(fieldName) if value != nil { outrec.PutReference(fieldName, value) } } + } - for ; pe != nil; pe = pe.Next { - if pe.Key != tr.afterFieldName && !tr.fieldNamesSet[pe.Key] { - outrec.PutReference(pe.Key, pe.Value) + for ; pe != nil; pe = pe.Next { + if pe.Key != tr.centerFieldName && !tr.fieldNamesSet[pe.Key] { + outrec.PutReference(pe.Key, pe.Value) + } + } + + outputRecordsAndContexts.PushBack(types.NewRecordAndContext(outrec, &inrecAndContext.Context)) + +} + +func (tr *TransformerReorder) reorderBeforeOrAfterWithRegex( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext +) { + inrec := inrecAndContext.Record + if inrec.Get(tr.centerFieldName) == nil { + outputRecordsAndContexts.PushBack(inrecAndContext) + return + } + + matchingFieldNamesSet := lib.NewOrderedMap() + for pe := inrec.Head; pe != nil; pe = pe.Next { + for _, regex := range tr.regexes { + if regex.MatchString(pe.Key) { + if pe.Key != tr.centerFieldName { + matchingFieldNamesSet.Put(pe.Key, pe.Value) + break + } } } - - for _, fieldName := range tr.fieldNames { - inrec.MoveToHead(fieldName) - } - outputRecordsAndContexts.PushBack(types.NewRecordAndContext(outrec, &inrecAndContext.Context)) - - } else { - outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker } + + // We use outrec.PutReference not output.PutCopy since inrec will be GC'ed + outrec := mlrval.NewMlrmapAsRecord() + for pe := inrec.Head; pe != nil; pe = pe.Next { + if pe.Key == tr.centerFieldName { + if tr.putAfter { + outrec.PutReference(pe.Key, pe.Value) + } + for pf := matchingFieldNamesSet.Head; pf != nil; pf = pf.Next { + outrec.PutReference(pf.Key, pf.Value.(*mlrval.Mlrval)) + } + if !tr.putAfter { + outrec.PutReference(pe.Key, pe.Value) + } + } else if !matchingFieldNamesSet.Has(pe.Key) { + outrec.PutReference(pe.Key, pe.Value) + } + } + + outputRecordsAndContexts.PushBack(types.NewRecordAndContext(outrec, &inrecAndContext.Context)) } diff --git a/test/cases/verb-reorder/regex-after/cmd b/test/cases/verb-reorder/regex-after/cmd new file mode 100644 index 000000000..59a79f7f6 --- /dev/null +++ b/test/cases/verb-reorder/regex-after/cmd @@ -0,0 +1 @@ +mlr --n2x reorder -r 3,9,8 -a 6 test/input/reorder-regex.nidx diff --git a/test/cases/verb-reorder/regex-after/experr b/test/cases/verb-reorder/regex-after/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-reorder/regex-after/expout b/test/cases/verb-reorder/regex-after/expout new file mode 100644 index 000000000..62cb82ad5 --- /dev/null +++ b/test/cases/verb-reorder/regex-after/expout @@ -0,0 +1,10 @@ +1 a +2 b +4 d +5 e +6 f +3 c +8 h +9 i +7 g +10 j diff --git a/test/cases/verb-reorder/regex-before/cmd b/test/cases/verb-reorder/regex-before/cmd new file mode 100644 index 000000000..f207567a8 --- /dev/null +++ b/test/cases/verb-reorder/regex-before/cmd @@ -0,0 +1 @@ +mlr --n2x reorder -r 3,9,8 -b 6 test/input/reorder-regex.nidx diff --git a/test/cases/verb-reorder/regex-before/experr b/test/cases/verb-reorder/regex-before/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-reorder/regex-before/expout b/test/cases/verb-reorder/regex-before/expout new file mode 100644 index 000000000..ef4d4f166 --- /dev/null +++ b/test/cases/verb-reorder/regex-before/expout @@ -0,0 +1,10 @@ +1 a +2 b +4 d +5 e +3 c +8 h +9 i +6 f +7 g +10 j diff --git a/test/cases/verb-reorder/regex-end/cmd b/test/cases/verb-reorder/regex-end/cmd new file mode 100644 index 000000000..8c3e21c81 --- /dev/null +++ b/test/cases/verb-reorder/regex-end/cmd @@ -0,0 +1 @@ +mlr --n2x reorder -r 3,9,8 -e test/input/reorder-regex.nidx diff --git a/test/cases/verb-reorder/regex-end/experr b/test/cases/verb-reorder/regex-end/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-reorder/regex-end/expout b/test/cases/verb-reorder/regex-end/expout new file mode 100644 index 000000000..7a7424aa9 --- /dev/null +++ b/test/cases/verb-reorder/regex-end/expout @@ -0,0 +1,10 @@ +1 a +2 b +4 d +5 e +6 f +7 g +10 j +3 c +8 h +9 i diff --git a/test/cases/verb-reorder/regex-start/cmd b/test/cases/verb-reorder/regex-start/cmd new file mode 100644 index 000000000..2020a1393 --- /dev/null +++ b/test/cases/verb-reorder/regex-start/cmd @@ -0,0 +1 @@ +mlr --n2x reorder -r 3,9,8 test/input/reorder-regex.nidx diff --git a/test/cases/verb-reorder/regex-start/experr b/test/cases/verb-reorder/regex-start/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-reorder/regex-start/expout b/test/cases/verb-reorder/regex-start/expout new file mode 100644 index 000000000..ee16332d9 --- /dev/null +++ b/test/cases/verb-reorder/regex-start/expout @@ -0,0 +1,10 @@ +3 c +8 h +9 i +1 a +2 b +4 d +5 e +6 f +7 g +10 j diff --git a/test/input/reorder-regex.nidx b/test/input/reorder-regex.nidx new file mode 100644 index 000000000..6a76ef8fa --- /dev/null +++ b/test/input/reorder-regex.nidx @@ -0,0 +1 @@ +a b c d e f g h i j From e5ec9f67bd01c8ade7d773bd6382805ef9a4f2c2 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 23 Jan 2024 17:18:13 -0500 Subject: [PATCH 119/456] Implement all/by-regex field selection (`-a`/`-r`) for `mlr sub`, `gsub`, and `ssub` (#1480) * Code-dedupe `sub`, `gsub`, and `ssub` verbs * More dedupe * Start with -a * Implement -r * unit-test cases * Windows command-line parsing --- pkg/transformers/gsub.go | 157 -------- pkg/transformers/ssub.go | 156 -------- pkg/transformers/sub.go | 157 -------- pkg/transformers/subs.go | 355 ++++++++++++++++++ test/cases/verb-sub-gsub-ssub/0001/cmd | 1 - test/cases/verb-sub-gsub-ssub/0001/expout | 11 - test/cases/verb-sub-gsub-ssub/0002/cmd | 1 - test/cases/verb-sub-gsub-ssub/0002/expout | 11 - test/cases/verb-sub-gsub-ssub/0003/cmd | 1 - test/cases/verb-sub-gsub-ssub/0003/expout | 11 - test/cases/verb-sub-gsub-ssub/0004/cmd | 1 - test/cases/verb-sub-gsub-ssub/0004/expout | 11 - test/cases/verb-sub-gsub-ssub/gsub-a/cmd | 1 + .../{0001 => gsub-a}/experr | 0 test/cases/verb-sub-gsub-ssub/gsub-a/expout | 11 + test/cases/verb-sub-gsub-ssub/gsub-f/cmd | 1 + .../{0002 => gsub-f}/experr | 0 test/cases/verb-sub-gsub-ssub/gsub-f/expout | 11 + .../verb-sub-gsub-ssub/non-windows/gsub-r/cmd | 1 + .../{0003 => non-windows/gsub-r}/experr | 0 .../non-windows/gsub-r/expout | 11 + .../verb-sub-gsub-ssub/non-windows/ssub-r/cmd | 1 + .../{0004 => non-windows/ssub-r}/experr | 0 .../non-windows/ssub-r/expout | 11 + .../verb-sub-gsub-ssub/non-windows/sub-r/cmd | 1 + .../non-windows/sub-r/experr | 0 .../non-windows/sub-r/expout | 11 + test/cases/verb-sub-gsub-ssub/ssub-a/cmd | 1 + test/cases/verb-sub-gsub-ssub/ssub-a/experr | 0 test/cases/verb-sub-gsub-ssub/ssub-a/expout | 11 + test/cases/verb-sub-gsub-ssub/ssub-f/cmd | 1 + test/cases/verb-sub-gsub-ssub/ssub-f/experr | 0 test/cases/verb-sub-gsub-ssub/ssub-f/expout | 11 + test/cases/verb-sub-gsub-ssub/sub-a/cmd | 1 + test/cases/verb-sub-gsub-ssub/sub-a/experr | 0 test/cases/verb-sub-gsub-ssub/sub-a/expout | 11 + test/cases/verb-sub-gsub-ssub/sub-f-2/cmd | 1 + test/cases/verb-sub-gsub-ssub/sub-f-2/experr | 0 test/cases/verb-sub-gsub-ssub/sub-f-2/expout | 11 + test/cases/verb-sub-gsub-ssub/sub-f/cmd | 1 + test/cases/verb-sub-gsub-ssub/sub-f/experr | 0 test/cases/verb-sub-gsub-ssub/sub-f/expout | 11 + 42 files changed, 475 insertions(+), 518 deletions(-) delete mode 100644 pkg/transformers/gsub.go delete mode 100644 pkg/transformers/ssub.go delete mode 100644 pkg/transformers/sub.go create mode 100644 pkg/transformers/subs.go delete mode 100644 test/cases/verb-sub-gsub-ssub/0001/cmd delete mode 100644 test/cases/verb-sub-gsub-ssub/0001/expout delete mode 100644 test/cases/verb-sub-gsub-ssub/0002/cmd delete mode 100644 test/cases/verb-sub-gsub-ssub/0002/expout delete mode 100644 test/cases/verb-sub-gsub-ssub/0003/cmd delete mode 100644 test/cases/verb-sub-gsub-ssub/0003/expout delete mode 100644 test/cases/verb-sub-gsub-ssub/0004/cmd delete mode 100644 test/cases/verb-sub-gsub-ssub/0004/expout create mode 100644 test/cases/verb-sub-gsub-ssub/gsub-a/cmd rename test/cases/verb-sub-gsub-ssub/{0001 => gsub-a}/experr (100%) create mode 100644 test/cases/verb-sub-gsub-ssub/gsub-a/expout create mode 100644 test/cases/verb-sub-gsub-ssub/gsub-f/cmd rename test/cases/verb-sub-gsub-ssub/{0002 => gsub-f}/experr (100%) create mode 100644 test/cases/verb-sub-gsub-ssub/gsub-f/expout create mode 100644 test/cases/verb-sub-gsub-ssub/non-windows/gsub-r/cmd rename test/cases/verb-sub-gsub-ssub/{0003 => non-windows/gsub-r}/experr (100%) create mode 100644 test/cases/verb-sub-gsub-ssub/non-windows/gsub-r/expout create mode 100644 test/cases/verb-sub-gsub-ssub/non-windows/ssub-r/cmd rename test/cases/verb-sub-gsub-ssub/{0004 => non-windows/ssub-r}/experr (100%) create mode 100644 test/cases/verb-sub-gsub-ssub/non-windows/ssub-r/expout create mode 100644 test/cases/verb-sub-gsub-ssub/non-windows/sub-r/cmd create mode 100644 test/cases/verb-sub-gsub-ssub/non-windows/sub-r/experr create mode 100644 test/cases/verb-sub-gsub-ssub/non-windows/sub-r/expout create mode 100644 test/cases/verb-sub-gsub-ssub/ssub-a/cmd create mode 100644 test/cases/verb-sub-gsub-ssub/ssub-a/experr create mode 100644 test/cases/verb-sub-gsub-ssub/ssub-a/expout create mode 100644 test/cases/verb-sub-gsub-ssub/ssub-f/cmd create mode 100644 test/cases/verb-sub-gsub-ssub/ssub-f/experr create mode 100644 test/cases/verb-sub-gsub-ssub/ssub-f/expout create mode 100644 test/cases/verb-sub-gsub-ssub/sub-a/cmd create mode 100644 test/cases/verb-sub-gsub-ssub/sub-a/experr create mode 100644 test/cases/verb-sub-gsub-ssub/sub-a/expout create mode 100644 test/cases/verb-sub-gsub-ssub/sub-f-2/cmd create mode 100644 test/cases/verb-sub-gsub-ssub/sub-f-2/experr create mode 100644 test/cases/verb-sub-gsub-ssub/sub-f-2/expout create mode 100644 test/cases/verb-sub-gsub-ssub/sub-f/cmd create mode 100644 test/cases/verb-sub-gsub-ssub/sub-f/experr create mode 100644 test/cases/verb-sub-gsub-ssub/sub-f/expout diff --git a/pkg/transformers/gsub.go b/pkg/transformers/gsub.go deleted file mode 100644 index 0b188505b..000000000 --- a/pkg/transformers/gsub.go +++ /dev/null @@ -1,157 +0,0 @@ -package transformers - -import ( - "container/list" - "fmt" - "os" - "strings" - - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" -) - -// ---------------------------------------------------------------- -const verbNameGsub = "gsub" - -var GsubSetup = TransformerSetup{ - Verb: verbNameGsub, - UsageFunc: transformerGsubUsage, - ParseCLIFunc: transformerGsubParseCLI, - IgnoresInput: false, -} - -func transformerGsubUsage( - o *os.File, -) { - fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameGsub) - fmt.Fprintf(o, "Replaces old string with new string in specified field(s), with regex support\n") - fmt.Fprintf(o, "for the old string and handling multiple matches, like the `gsub` DSL function.\n") - fmt.Fprintf(o, "See also the `sub` and `ssub` verbs.\n") - fmt.Fprintf(o, "Options:\n") - fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") - fmt.Fprintf(o, "-h|--help Show this message.\n") -} - -func transformerGsubParseCLI( - pargi *int, - argc int, - args []string, - _ *cli.TOptions, - doConstruct bool, // false for first pass of CLI-parse, true for second pass -) IRecordTransformer { - - // Skip the verb name from the current spot in the mlr command line - argi := *pargi - verb := args[argi] - argi++ - - // Parse local flags - var fieldNames []string = nil - var oldText string - var newText string - - for argi < argc /* variable increment: 1 or 2 depending on flag */ { - opt := args[argi] - if !strings.HasPrefix(opt, "-") { - break // No more flag options to process - } - if args[argi] == "--" { - break // All transformers must do this so main-flags can follow verb-flags - } - argi++ - - if opt == "-h" || opt == "--help" { - transformerGsubUsage(os.Stdout) - os.Exit(0) - - } else if opt == "-f" { - fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) - } else { - transformerGsubUsage(os.Stderr) - os.Exit(1) - } - } - - if fieldNames == nil { - transformerGsubUsage(os.Stderr) - os.Exit(1) - } - - // Get the old and new text from the command line - if (argc - argi) < 2 { - transformerGsubUsage(os.Stderr) - os.Exit(1) - } - oldText = args[argi] - newText = args[argi+1] - - argi += 2 - - *pargi = argi - if !doConstruct { // All transformers must do this for main command-line parsing - return nil - } - - transformer, err := NewTransformerGsub( - fieldNames, - oldText, - newText, - ) - if err != nil { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) - } - - return transformer -} - -// ---------------------------------------------------------------- -type TransformerGsub struct { - fieldNames []string - oldText *mlrval.Mlrval - newText *mlrval.Mlrval -} - -// ---------------------------------------------------------------- -func NewTransformerGsub( - fieldNames []string, - oldText string, - newText string, -) (*TransformerGsub, error) { - tr := &TransformerGsub{ - fieldNames: fieldNames, - oldText: mlrval.FromString(oldText), - newText: mlrval.FromString(newText), - } - return tr, nil -} - -func (tr *TransformerGsub) Transform( - inrecAndContext *types.RecordAndContext, - outputRecordsAndContexts *list.List, // list of *types.RecordAndContext - inputDownstreamDoneChannel <-chan bool, - outputDownstreamDoneChannel chan<- bool, -) { - HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) - - if !inrecAndContext.EndOfStream { - inrec := inrecAndContext.Record - - for _, fieldName := range tr.fieldNames { - oldValue := inrec.Get(fieldName) - if oldValue == nil { - continue - } - - newValue := bifs.BIF_gsub(oldValue, tr.oldText, tr.newText) - - inrec.PutReference(fieldName, newValue) - } - - outputRecordsAndContexts.PushBack(inrecAndContext) - } else { - outputRecordsAndContexts.PushBack(inrecAndContext) // emit end-of-stream marker - } -} diff --git a/pkg/transformers/ssub.go b/pkg/transformers/ssub.go deleted file mode 100644 index a31864711..000000000 --- a/pkg/transformers/ssub.go +++ /dev/null @@ -1,156 +0,0 @@ -package transformers - -import ( - "container/list" - "fmt" - "os" - "strings" - - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" -) - -// ---------------------------------------------------------------- -const verbNameSsub = "ssub" - -var SsubSetup = TransformerSetup{ - Verb: verbNameSsub, - UsageFunc: transformerSsubUsage, - ParseCLIFunc: transformerSsubParseCLI, - IgnoresInput: false, -} - -func transformerSsubUsage( - o *os.File, -) { - fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameSsub) - fmt.Fprintf(o, "Replaces old string with new string in specified field(s), without regex support for\n") - fmt.Fprintf(o, "the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.\n") - fmt.Fprintf(o, "Options:\n") - fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") - fmt.Fprintf(o, "-h|--help Show this message.\n") -} - -func transformerSsubParseCLI( - pargi *int, - argc int, - args []string, - _ *cli.TOptions, - doConstruct bool, // false for first pass of CLI-parse, true for second pass -) IRecordTransformer { - - // Skip the verb name from the current spot in the mlr command line - argi := *pargi - verb := args[argi] - argi++ - - // Parse local flags - var fieldNames []string = nil - var oldText string - var newText string - - for argi < argc /* variable increment: 1 or 2 depending on flag */ { - opt := args[argi] - if !strings.HasPrefix(opt, "-") { - break // No more flag options to process - } - if args[argi] == "--" { - break // All transformers must do this so main-flags can follow verb-flags - } - argi++ - - if opt == "-h" || opt == "--help" { - transformerSsubUsage(os.Stdout) - os.Exit(0) - - } else if opt == "-f" { - fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) - } else { - transformerSsubUsage(os.Stderr) - os.Exit(1) - } - } - - if fieldNames == nil { - transformerSsubUsage(os.Stderr) - os.Exit(1) - } - - // Get the old and new text from the command line - if (argc - argi) < 2 { - transformerSsubUsage(os.Stderr) - os.Exit(1) - } - oldText = args[argi] - newText = args[argi+1] - - argi += 2 - - *pargi = argi - if !doConstruct { // All transformers must do this for main command-line parsing - return nil - } - - transformer, err := NewTransformerSsub( - fieldNames, - oldText, - newText, - ) - if err != nil { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) - } - - return transformer -} - -// ---------------------------------------------------------------- -type TransformerSsub struct { - fieldNames []string - oldText *mlrval.Mlrval - newText *mlrval.Mlrval -} - -// ---------------------------------------------------------------- -func NewTransformerSsub( - fieldNames []string, - oldText string, - newText string, -) (*TransformerSsub, error) { - tr := &TransformerSsub{ - fieldNames: fieldNames, - oldText: mlrval.FromString(oldText), - newText: mlrval.FromString(newText), - } - return tr, nil -} - -func (tr *TransformerSsub) Transform( - inrecAndContext *types.RecordAndContext, - outputRecordsAndContexts *list.List, // list of *types.RecordAndContext - inputDownstreamDoneChannel <-chan bool, - outputDownstreamDoneChannel chan<- bool, -) { - HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) - - if !inrecAndContext.EndOfStream { - inrec := inrecAndContext.Record - - for _, fieldName := range tr.fieldNames { - oldValue := inrec.Get(fieldName) - if oldValue == nil { - continue - } - - newValue := bifs.BIF_ssub(oldValue, tr.oldText, tr.newText) - - inrec.PutReference(fieldName, newValue) - } - - outputRecordsAndContexts.PushBack(inrecAndContext) - } else { - outputRecordsAndContexts.PushBack(inrecAndContext) // emit end-of-stream marker - } -} diff --git a/pkg/transformers/sub.go b/pkg/transformers/sub.go deleted file mode 100644 index 1c96b45fc..000000000 --- a/pkg/transformers/sub.go +++ /dev/null @@ -1,157 +0,0 @@ -package transformers - -import ( - "container/list" - "fmt" - "os" - "strings" - - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" -) - -// ---------------------------------------------------------------- -const verbNameSub = "sub" - -var SubSetup = TransformerSetup{ - Verb: verbNameSub, - UsageFunc: transformerSubUsage, - ParseCLIFunc: transformerSubParseCLI, - IgnoresInput: false, -} - -func transformerSubUsage( - o *os.File, -) { - fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameSub) - fmt.Fprintf(o, "Replaces old string with new string in specified field(s), with regex support\n") - fmt.Fprintf(o, "for the old string and not handling multiple matches, like the `sub` DSL function.\n") - fmt.Fprintf(o, "See also the `gsub` and `ssub` verbs.\n") - fmt.Fprintf(o, "Options:\n") - fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") - fmt.Fprintf(o, "-h|--help Show this message.\n") -} - -func transformerSubParseCLI( - pargi *int, - argc int, - args []string, - _ *cli.TOptions, - doConstruct bool, // false for first pass of CLI-parse, true for second pass -) IRecordTransformer { - - // Skip the verb name from the current spot in the mlr command line - argi := *pargi - verb := args[argi] - argi++ - - // Parse local flags - var fieldNames []string = nil - var oldText string - var newText string - - for argi < argc /* variable increment: 1 or 2 depending on flag */ { - opt := args[argi] - if !strings.HasPrefix(opt, "-") { - break // No more flag options to process - } - if args[argi] == "--" { - break // All transformers must do this so main-flags can follow verb-flags - } - argi++ - - if opt == "-h" || opt == "--help" { - transformerSubUsage(os.Stdout) - os.Exit(0) - - } else if opt == "-f" { - fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) - } else { - transformerSubUsage(os.Stderr) - os.Exit(1) - } - } - - if fieldNames == nil { - transformerSubUsage(os.Stderr) - os.Exit(1) - } - - // Get the old and new text from the command line - if (argc - argi) < 2 { - transformerSubUsage(os.Stderr) - os.Exit(1) - } - oldText = args[argi] - newText = args[argi+1] - - argi += 2 - - *pargi = argi - if !doConstruct { // All transformers must do this for main command-line parsing - return nil - } - - transformer, err := NewTransformerSub( - fieldNames, - oldText, - newText, - ) - if err != nil { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) - } - - return transformer -} - -// ---------------------------------------------------------------- -type TransformerSub struct { - fieldNames []string - oldText *mlrval.Mlrval - newText *mlrval.Mlrval -} - -// ---------------------------------------------------------------- -func NewTransformerSub( - fieldNames []string, - oldText string, - newText string, -) (*TransformerSub, error) { - tr := &TransformerSub{ - fieldNames: fieldNames, - oldText: mlrval.FromString(oldText), - newText: mlrval.FromString(newText), - } - return tr, nil -} - -func (tr *TransformerSub) Transform( - inrecAndContext *types.RecordAndContext, - outputRecordsAndContexts *list.List, // list of *types.RecordAndContext - inputDownstreamDoneChannel <-chan bool, - outputDownstreamDoneChannel chan<- bool, -) { - HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) - - if !inrecAndContext.EndOfStream { - inrec := inrecAndContext.Record - - for _, fieldName := range tr.fieldNames { - oldValue := inrec.Get(fieldName) - if oldValue == nil { - continue - } - - newValue := bifs.BIF_sub(oldValue, tr.oldText, tr.newText) - - inrec.PutReference(fieldName, newValue) - } - - outputRecordsAndContexts.PushBack(inrecAndContext) - } else { - outputRecordsAndContexts.PushBack(inrecAndContext) // emit end-of-stream marker - } -} diff --git a/pkg/transformers/subs.go b/pkg/transformers/subs.go new file mode 100644 index 000000000..e57469d91 --- /dev/null +++ b/pkg/transformers/subs.go @@ -0,0 +1,355 @@ +package transformers + +import ( + "container/list" + "fmt" + "os" + "regexp" + "strings" + + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" +) + +// ---------------------------------------------------------------- +const verbNameSub = "sub" +const verbNameGsub = "gsub" +const verbNameSsub = "ssub" + +var SubSetup = TransformerSetup{ + Verb: verbNameSub, + UsageFunc: transformerSubUsage, + ParseCLIFunc: transformerSubParseCLI, + IgnoresInput: false, +} + +var GsubSetup = TransformerSetup{ + Verb: verbNameGsub, + UsageFunc: transformerGsubUsage, + ParseCLIFunc: transformerGsubParseCLI, + IgnoresInput: false, +} + +var SsubSetup = TransformerSetup{ + Verb: verbNameSsub, + UsageFunc: transformerSsubUsage, + ParseCLIFunc: transformerSsubParseCLI, + IgnoresInput: false, +} + +func transformerSubUsage( + o *os.File, +) { + fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameSub) + fmt.Fprintf(o, "Replaces old string with new string in specified field(s), with regex support\n") + fmt.Fprintf(o, "for the old string and not handling multiple matches, like the `sub` DSL function.\n") + fmt.Fprintf(o, "See also the `gsub` and `ssub` verbs.\n") + fmt.Fprintf(o, "Options:\n") + fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") + fmt.Fprintf(o, "-h|--help Show this message.\n") +} + +func transformerGsubUsage( + o *os.File, +) { + fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameGsub) + fmt.Fprintf(o, "Replaces old string with new string in specified field(s), with regex support\n") + fmt.Fprintf(o, "for the old string and handling multiple matches, like the `gsub` DSL function.\n") + fmt.Fprintf(o, "See also the `sub` and `ssub` verbs.\n") + fmt.Fprintf(o, "Options:\n") + fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") + fmt.Fprintf(o, "-h|--help Show this message.\n") +} + +func transformerSsubUsage( + o *os.File, +) { + fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameSsub) + fmt.Fprintf(o, "Replaces old string with new string in specified field(s), without regex support for\n") + fmt.Fprintf(o, "the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.\n") + fmt.Fprintf(o, "Options:\n") + fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") + fmt.Fprintf(o, "-h|--help Show this message.\n") +} + +type subConstructorFunc func( + fieldNames []string, + doAllFieldNames bool, + doRegexes bool, + oldText string, + newText string, +) (IRecordTransformer, error) + +type fieldAcceptorFunc func( + fieldName string, +) bool + +func transformerSubParseCLI( + pargi *int, + argc int, + args []string, + opts *cli.TOptions, + doConstruct bool, // false for first pass of CLI-parse, true for second pass +) IRecordTransformer { + return transformerSubsParseCLI(pargi, argc, args, opts, doConstruct, transformerSubUsage, NewTransformerSub) +} + +func transformerGsubParseCLI( + pargi *int, + argc int, + args []string, + opts *cli.TOptions, + doConstruct bool, // false for first pass of CLI-parse, true for second pass +) IRecordTransformer { + return transformerSubsParseCLI(pargi, argc, args, opts, doConstruct, transformerGsubUsage, NewTransformerGsub) +} + +func transformerSsubParseCLI( + pargi *int, + argc int, + args []string, + opts *cli.TOptions, + doConstruct bool, // false for first pass of CLI-parse, true for second pass +) IRecordTransformer { + return transformerSubsParseCLI(pargi, argc, args, opts, doConstruct, transformerSsubUsage, NewTransformerSsub) +} + +// transformerSubsParseCLI is a shared CLI-parser for the sub, gsub, and ssub verbs. +func transformerSubsParseCLI( + pargi *int, + argc int, + args []string, + _ *cli.TOptions, + doConstruct bool, // false for first pass of CLI-parse, true for second pass + usageFunc TransformerUsageFunc, + constructorFunc subConstructorFunc, +) IRecordTransformer { + + // Skip the verb name from the current spot in the mlr command line + argi := *pargi + verb := args[argi] + argi++ + + // Parse local flags + var fieldNames []string = nil + doAllFieldNames := false + doRegexes := false + var oldText string + var newText string + + for argi < argc /* variable increment: 1 or 2 depending on flag */ { + opt := args[argi] + if !strings.HasPrefix(opt, "-") { + break // No more flag options to process + } + if args[argi] == "--" { + break // All transformers must do this so main-flags can follow verb-flags + } + argi++ + + if opt == "-h" || opt == "--help" { + usageFunc(os.Stdout) + os.Exit(0) + + } else if opt == "-a" { + doAllFieldNames = true + doRegexes = false + fieldNames = nil + + } else if opt == "-r" { + doRegexes = true + + } else if opt == "-f" { + fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + doAllFieldNames = false + } else { + usageFunc(os.Stderr) + os.Exit(1) + } + } + + if fieldNames == nil && !doAllFieldNames { + usageFunc(os.Stderr) + os.Exit(1) + } + + // Get the old and new text from the command line + if (argc - argi) < 2 { + usageFunc(os.Stderr) + os.Exit(1) + } + oldText = args[argi] + newText = args[argi+1] + + argi += 2 + + *pargi = argi + if !doConstruct { // All transformers must do this for main command-line parsing + return nil + } + + transformer, err := constructorFunc( + fieldNames, + doAllFieldNames, + doRegexes, + oldText, + newText, + ) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + return transformer +} + +type TransformerSubs struct { + fieldNamesSet map[string]bool // for -f + regexes []*regexp.Regexp // for -r + oldText *mlrval.Mlrval + newText *mlrval.Mlrval + fieldAcceptor fieldAcceptorFunc // for -f, -r, -a + subber bifs.TernaryFunc // for sub, gsub, ssub +} + +func NewTransformerSub( + fieldNames []string, + doAllFieldNames bool, + doRegexes bool, + oldText string, + newText string, +) (IRecordTransformer, error) { + return NewTransformerSubs(fieldNames, doAllFieldNames, doRegexes, oldText, newText, safe_sub) +} + +func NewTransformerGsub( + fieldNames []string, + doAllFieldNames bool, + doRegexes bool, + oldText string, + newText string, +) (IRecordTransformer, error) { + return NewTransformerSubs(fieldNames, doAllFieldNames, doRegexes, oldText, newText, safe_gsub) +} + +func NewTransformerSsub( + fieldNames []string, + doAllFieldNames bool, + doRegexes bool, + oldText string, + newText string, +) (IRecordTransformer, error) { + return NewTransformerSubs(fieldNames, doAllFieldNames, doRegexes, oldText, newText, safe_ssub) +} + +func NewTransformerSubs( + fieldNames []string, + doAllFieldNames bool, + doRegexes bool, + oldText string, + newText string, + subber bifs.TernaryFunc, +) (IRecordTransformer, error) { + tr := &TransformerSubs{ + fieldNamesSet: lib.StringListToSet(fieldNames), + oldText: mlrval.FromString(oldText), + newText: mlrval.FromString(newText), + subber: subber, + } + if doAllFieldNames { + tr.fieldAcceptor = tr.fieldAcceptorAll + } else if doRegexes { + tr.fieldAcceptor = tr.fieldAcceptorByRegexes + + tr.regexes = make([]*regexp.Regexp, len(fieldNames)) + for i, regexString := range fieldNames { + // Handles "a.*b"i Miller case-insensitive-regex specification + regex, err := lib.CompileMillerRegex(regexString) + if err != nil { + fmt.Fprintf(os.Stderr, "%s %s: cannot compile regex [%s]\n", "mlr", verbNameCut, regexString) + os.Exit(1) + } + tr.regexes[i] = regex + } + } else { + tr.fieldAcceptor = tr.fieldAcceptorByNames + } + return tr, nil +} + +func (tr *TransformerSubs) Transform( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + inputDownstreamDoneChannel <-chan bool, + outputDownstreamDoneChannel chan<- bool, +) { + HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) + + if !inrecAndContext.EndOfStream { + inrec := inrecAndContext.Record + // Run sub, gsub, or ssub on the user-specified field names + for pe := inrec.Head; pe != nil; pe = pe.Next { + if tr.fieldAcceptor(pe.Key) { + pe.Value = tr.subber(pe.Value, tr.oldText, tr.newText) + } + } + } + // Including emit of end-of-stream marker + outputRecordsAndContexts.PushBack(inrecAndContext) +} + +// fieldAcceptorByNames implements -f +func (tr *TransformerSubs) fieldAcceptorByNames( + fieldName string, +) bool { + return tr.fieldNamesSet[fieldName] +} + +// fieldAcceptorByNames implements -r +func (tr *TransformerSubs) fieldAcceptorByRegexes( + fieldName string, +) bool { + for _, regex := range tr.regexes { + if regex.MatchString(fieldName) { + return true + } + } + return false +} + +// fieldAcceptorByNames implements -a +func (tr *TransformerSubs) fieldAcceptorAll( + fieldName string, +) bool { + return true +} + +// safe_sub implements sub, but doesn't produce error-type on non-string input. +func safe_sub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { + if input1.IsString() { + return bifs.BIF_sub(input1, input2, input3) + } else { + return input1 + } +} + +// safe_gsub implements gsub, but doesn't produce error-type on non-string input. +func safe_gsub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { + if input1.IsString() { + return bifs.BIF_gsub(input1, input2, input3) + } else { + return input1 + } +} + +// safe_ssub implements ssub, but doesn't produce error-type on non-string input. +func safe_ssub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { + if input1.IsString() { + return bifs.BIF_ssub(input1, input2, input3) + } else { + return input1 + } +} diff --git a/test/cases/verb-sub-gsub-ssub/0001/cmd b/test/cases/verb-sub-gsub-ssub/0001/cmd deleted file mode 100644 index 7d4cec775..000000000 --- a/test/cases/verb-sub-gsub-ssub/0001/cmd +++ /dev/null @@ -1 +0,0 @@ -mlr --d2p --from test/input/abixy sub -f a,b e X diff --git a/test/cases/verb-sub-gsub-ssub/0001/expout b/test/cases/verb-sub-gsub-ssub/0001/expout deleted file mode 100644 index 917c3f5ed..000000000 --- a/test/cases/verb-sub-gsub-ssub/0001/expout +++ /dev/null @@ -1,11 +0,0 @@ -a b i x y -pan pan 1 0.34679014 0.72680286 -Xks pan 2 0.75867996 0.52215111 -wyX wyX 3 0.20460331 0.33831853 -Xks wyX 4 0.38139939 0.13418874 -wyX pan 5 0.57328892 0.86362447 -zXe pan 6 0.52712616 0.49322129 -Xks zXe 7 0.61178406 0.18788492 -zXe wyX 8 0.59855401 0.97618139 -hat wyX 9 0.03144188 0.74955076 -pan wyX 10 0.50262601 0.95261836 diff --git a/test/cases/verb-sub-gsub-ssub/0002/cmd b/test/cases/verb-sub-gsub-ssub/0002/cmd deleted file mode 100644 index f33200891..000000000 --- a/test/cases/verb-sub-gsub-ssub/0002/cmd +++ /dev/null @@ -1 +0,0 @@ -mlr --d2p --from test/input/abixy gsub -f a,b e X diff --git a/test/cases/verb-sub-gsub-ssub/0002/expout b/test/cases/verb-sub-gsub-ssub/0002/expout deleted file mode 100644 index 49d53727b..000000000 --- a/test/cases/verb-sub-gsub-ssub/0002/expout +++ /dev/null @@ -1,11 +0,0 @@ -a b i x y -pan pan 1 0.34679014 0.72680286 -Xks pan 2 0.75867996 0.52215111 -wyX wyX 3 0.20460331 0.33831853 -Xks wyX 4 0.38139939 0.13418874 -wyX pan 5 0.57328892 0.86362447 -zXX pan 6 0.52712616 0.49322129 -Xks zXX 7 0.61178406 0.18788492 -zXX wyX 8 0.59855401 0.97618139 -hat wyX 9 0.03144188 0.74955076 -pan wyX 10 0.50262601 0.95261836 diff --git a/test/cases/verb-sub-gsub-ssub/0003/cmd b/test/cases/verb-sub-gsub-ssub/0003/cmd deleted file mode 100644 index ff6b15c4a..000000000 --- a/test/cases/verb-sub-gsub-ssub/0003/cmd +++ /dev/null @@ -1 +0,0 @@ -mlr --d2p --from test/input/abixy sub -f a,b . X diff --git a/test/cases/verb-sub-gsub-ssub/0003/expout b/test/cases/verb-sub-gsub-ssub/0003/expout deleted file mode 100644 index a8b8e8643..000000000 --- a/test/cases/verb-sub-gsub-ssub/0003/expout +++ /dev/null @@ -1,11 +0,0 @@ -a b i x y -Xan Xan 1 0.34679014 0.72680286 -Xks Xan 2 0.75867996 0.52215111 -Xye Xye 3 0.20460331 0.33831853 -Xks Xye 4 0.38139939 0.13418874 -Xye Xan 5 0.57328892 0.86362447 -Xee Xan 6 0.52712616 0.49322129 -Xks Xee 7 0.61178406 0.18788492 -Xee Xye 8 0.59855401 0.97618139 -Xat Xye 9 0.03144188 0.74955076 -Xan Xye 10 0.50262601 0.95261836 diff --git a/test/cases/verb-sub-gsub-ssub/0004/cmd b/test/cases/verb-sub-gsub-ssub/0004/cmd deleted file mode 100644 index 8770d578d..000000000 --- a/test/cases/verb-sub-gsub-ssub/0004/cmd +++ /dev/null @@ -1 +0,0 @@ -mlr --d2p --from test/input/abixy ssub -f a,b e X diff --git a/test/cases/verb-sub-gsub-ssub/0004/expout b/test/cases/verb-sub-gsub-ssub/0004/expout deleted file mode 100644 index 917c3f5ed..000000000 --- a/test/cases/verb-sub-gsub-ssub/0004/expout +++ /dev/null @@ -1,11 +0,0 @@ -a b i x y -pan pan 1 0.34679014 0.72680286 -Xks pan 2 0.75867996 0.52215111 -wyX wyX 3 0.20460331 0.33831853 -Xks wyX 4 0.38139939 0.13418874 -wyX pan 5 0.57328892 0.86362447 -zXe pan 6 0.52712616 0.49322129 -Xks zXe 7 0.61178406 0.18788492 -zXe wyX 8 0.59855401 0.97618139 -hat wyX 9 0.03144188 0.74955076 -pan wyX 10 0.50262601 0.95261836 diff --git a/test/cases/verb-sub-gsub-ssub/gsub-a/cmd b/test/cases/verb-sub-gsub-ssub/gsub-a/cmd new file mode 100644 index 000000000..21a9e342c --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/gsub-a/cmd @@ -0,0 +1 @@ +mlr --c2p --from test/input/example.csv gsub -a l X diff --git a/test/cases/verb-sub-gsub-ssub/0001/experr b/test/cases/verb-sub-gsub-ssub/gsub-a/experr similarity index 100% rename from test/cases/verb-sub-gsub-ssub/0001/experr rename to test/cases/verb-sub-gsub-ssub/gsub-a/experr diff --git a/test/cases/verb-sub-gsub-ssub/gsub-a/expout b/test/cases/verb-sub-gsub-ssub/gsub-a/expout new file mode 100644 index 000000000..b75a98d8f --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/gsub-a/expout @@ -0,0 +1,11 @@ +color shape flag k index quantity rate +yeXXow triangXe true 1 11 43.64980000 9.88700000 +red square true 2 15 79.27780000 0.01300000 +red circXe true 3 16 13.81030000 2.90100000 +red square faXse 4 48 77.55420000 7.46700000 +purpXe triangXe faXse 5 51 81.22900000 8.59100000 +red square faXse 6 64 77.19910000 9.53100000 +purpXe triangXe faXse 7 65 80.14050000 5.82400000 +yeXXow circXe true 8 73 63.97850000 4.23700000 +yeXXow circXe true 9 87 63.50580000 8.33500000 +purpXe square faXse 10 91 72.37350000 8.24300000 diff --git a/test/cases/verb-sub-gsub-ssub/gsub-f/cmd b/test/cases/verb-sub-gsub-ssub/gsub-f/cmd new file mode 100644 index 000000000..a4c3ffc4b --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/gsub-f/cmd @@ -0,0 +1 @@ +mlr --c2p --from test/input/example.csv gsub -f color,shape,index l X diff --git a/test/cases/verb-sub-gsub-ssub/0002/experr b/test/cases/verb-sub-gsub-ssub/gsub-f/experr similarity index 100% rename from test/cases/verb-sub-gsub-ssub/0002/experr rename to test/cases/verb-sub-gsub-ssub/gsub-f/experr diff --git a/test/cases/verb-sub-gsub-ssub/gsub-f/expout b/test/cases/verb-sub-gsub-ssub/gsub-f/expout new file mode 100644 index 000000000..fda761674 --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/gsub-f/expout @@ -0,0 +1,11 @@ +color shape flag k index quantity rate +yeXXow triangXe true 1 11 43.64980000 9.88700000 +red square true 2 15 79.27780000 0.01300000 +red circXe true 3 16 13.81030000 2.90100000 +red square false 4 48 77.55420000 7.46700000 +purpXe triangXe false 5 51 81.22900000 8.59100000 +red square false 6 64 77.19910000 9.53100000 +purpXe triangXe false 7 65 80.14050000 5.82400000 +yeXXow circXe true 8 73 63.97850000 4.23700000 +yeXXow circXe true 9 87 63.50580000 8.33500000 +purpXe square false 10 91 72.37350000 8.24300000 diff --git a/test/cases/verb-sub-gsub-ssub/non-windows/gsub-r/cmd b/test/cases/verb-sub-gsub-ssub/non-windows/gsub-r/cmd new file mode 100644 index 000000000..14c697154 --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/non-windows/gsub-r/cmd @@ -0,0 +1 @@ +mlr --c2p --from test/input/example.csv gsub -r -f '.*e' l X diff --git a/test/cases/verb-sub-gsub-ssub/0003/experr b/test/cases/verb-sub-gsub-ssub/non-windows/gsub-r/experr similarity index 100% rename from test/cases/verb-sub-gsub-ssub/0003/experr rename to test/cases/verb-sub-gsub-ssub/non-windows/gsub-r/experr diff --git a/test/cases/verb-sub-gsub-ssub/non-windows/gsub-r/expout b/test/cases/verb-sub-gsub-ssub/non-windows/gsub-r/expout new file mode 100644 index 000000000..93b24ea0c --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/non-windows/gsub-r/expout @@ -0,0 +1,11 @@ +color shape flag k index quantity rate +yellow triangXe true 1 11 43.64980000 9.88700000 +red square true 2 15 79.27780000 0.01300000 +red circXe true 3 16 13.81030000 2.90100000 +red square false 4 48 77.55420000 7.46700000 +purple triangXe false 5 51 81.22900000 8.59100000 +red square false 6 64 77.19910000 9.53100000 +purple triangXe false 7 65 80.14050000 5.82400000 +yellow circXe true 8 73 63.97850000 4.23700000 +yellow circXe true 9 87 63.50580000 8.33500000 +purple square false 10 91 72.37350000 8.24300000 diff --git a/test/cases/verb-sub-gsub-ssub/non-windows/ssub-r/cmd b/test/cases/verb-sub-gsub-ssub/non-windows/ssub-r/cmd new file mode 100644 index 000000000..f6cf74d5e --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/non-windows/ssub-r/cmd @@ -0,0 +1 @@ +mlr --c2p --from test/input/example.csv ssub -r -f '.*e' l X diff --git a/test/cases/verb-sub-gsub-ssub/0004/experr b/test/cases/verb-sub-gsub-ssub/non-windows/ssub-r/experr similarity index 100% rename from test/cases/verb-sub-gsub-ssub/0004/experr rename to test/cases/verb-sub-gsub-ssub/non-windows/ssub-r/experr diff --git a/test/cases/verb-sub-gsub-ssub/non-windows/ssub-r/expout b/test/cases/verb-sub-gsub-ssub/non-windows/ssub-r/expout new file mode 100644 index 000000000..93b24ea0c --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/non-windows/ssub-r/expout @@ -0,0 +1,11 @@ +color shape flag k index quantity rate +yellow triangXe true 1 11 43.64980000 9.88700000 +red square true 2 15 79.27780000 0.01300000 +red circXe true 3 16 13.81030000 2.90100000 +red square false 4 48 77.55420000 7.46700000 +purple triangXe false 5 51 81.22900000 8.59100000 +red square false 6 64 77.19910000 9.53100000 +purple triangXe false 7 65 80.14050000 5.82400000 +yellow circXe true 8 73 63.97850000 4.23700000 +yellow circXe true 9 87 63.50580000 8.33500000 +purple square false 10 91 72.37350000 8.24300000 diff --git a/test/cases/verb-sub-gsub-ssub/non-windows/sub-r/cmd b/test/cases/verb-sub-gsub-ssub/non-windows/sub-r/cmd new file mode 100644 index 000000000..cae049e51 --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/non-windows/sub-r/cmd @@ -0,0 +1 @@ +mlr --c2p --from test/input/example.csv sub -r -f '.*e' l X diff --git a/test/cases/verb-sub-gsub-ssub/non-windows/sub-r/experr b/test/cases/verb-sub-gsub-ssub/non-windows/sub-r/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-sub-gsub-ssub/non-windows/sub-r/expout b/test/cases/verb-sub-gsub-ssub/non-windows/sub-r/expout new file mode 100644 index 000000000..93b24ea0c --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/non-windows/sub-r/expout @@ -0,0 +1,11 @@ +color shape flag k index quantity rate +yellow triangXe true 1 11 43.64980000 9.88700000 +red square true 2 15 79.27780000 0.01300000 +red circXe true 3 16 13.81030000 2.90100000 +red square false 4 48 77.55420000 7.46700000 +purple triangXe false 5 51 81.22900000 8.59100000 +red square false 6 64 77.19910000 9.53100000 +purple triangXe false 7 65 80.14050000 5.82400000 +yellow circXe true 8 73 63.97850000 4.23700000 +yellow circXe true 9 87 63.50580000 8.33500000 +purple square false 10 91 72.37350000 8.24300000 diff --git a/test/cases/verb-sub-gsub-ssub/ssub-a/cmd b/test/cases/verb-sub-gsub-ssub/ssub-a/cmd new file mode 100644 index 000000000..f0af9a1c9 --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/ssub-a/cmd @@ -0,0 +1 @@ +mlr --c2p --from test/input/example.csv ssub -a l X diff --git a/test/cases/verb-sub-gsub-ssub/ssub-a/experr b/test/cases/verb-sub-gsub-ssub/ssub-a/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-sub-gsub-ssub/ssub-a/expout b/test/cases/verb-sub-gsub-ssub/ssub-a/expout new file mode 100644 index 000000000..643a0290f --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/ssub-a/expout @@ -0,0 +1,11 @@ +color shape flag k index quantity rate +yeXlow triangXe true 1 11 43.64980000 9.88700000 +red square true 2 15 79.27780000 0.01300000 +red circXe true 3 16 13.81030000 2.90100000 +red square faXse 4 48 77.55420000 7.46700000 +purpXe triangXe faXse 5 51 81.22900000 8.59100000 +red square faXse 6 64 77.19910000 9.53100000 +purpXe triangXe faXse 7 65 80.14050000 5.82400000 +yeXlow circXe true 8 73 63.97850000 4.23700000 +yeXlow circXe true 9 87 63.50580000 8.33500000 +purpXe square faXse 10 91 72.37350000 8.24300000 diff --git a/test/cases/verb-sub-gsub-ssub/ssub-f/cmd b/test/cases/verb-sub-gsub-ssub/ssub-f/cmd new file mode 100644 index 000000000..26b395415 --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/ssub-f/cmd @@ -0,0 +1 @@ +mlr --c2p --from test/input/example.csv ssub -f color,shape,index l X diff --git a/test/cases/verb-sub-gsub-ssub/ssub-f/experr b/test/cases/verb-sub-gsub-ssub/ssub-f/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-sub-gsub-ssub/ssub-f/expout b/test/cases/verb-sub-gsub-ssub/ssub-f/expout new file mode 100644 index 000000000..f4bf55be4 --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/ssub-f/expout @@ -0,0 +1,11 @@ +color shape flag k index quantity rate +yeXlow triangXe true 1 11 43.64980000 9.88700000 +red square true 2 15 79.27780000 0.01300000 +red circXe true 3 16 13.81030000 2.90100000 +red square false 4 48 77.55420000 7.46700000 +purpXe triangXe false 5 51 81.22900000 8.59100000 +red square false 6 64 77.19910000 9.53100000 +purpXe triangXe false 7 65 80.14050000 5.82400000 +yeXlow circXe true 8 73 63.97850000 4.23700000 +yeXlow circXe true 9 87 63.50580000 8.33500000 +purpXe square false 10 91 72.37350000 8.24300000 diff --git a/test/cases/verb-sub-gsub-ssub/sub-a/cmd b/test/cases/verb-sub-gsub-ssub/sub-a/cmd new file mode 100644 index 000000000..b0ca748b5 --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/sub-a/cmd @@ -0,0 +1 @@ +mlr --c2p --from test/input/example.csv sub -a l X diff --git a/test/cases/verb-sub-gsub-ssub/sub-a/experr b/test/cases/verb-sub-gsub-ssub/sub-a/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-sub-gsub-ssub/sub-a/expout b/test/cases/verb-sub-gsub-ssub/sub-a/expout new file mode 100644 index 000000000..643a0290f --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/sub-a/expout @@ -0,0 +1,11 @@ +color shape flag k index quantity rate +yeXlow triangXe true 1 11 43.64980000 9.88700000 +red square true 2 15 79.27780000 0.01300000 +red circXe true 3 16 13.81030000 2.90100000 +red square faXse 4 48 77.55420000 7.46700000 +purpXe triangXe faXse 5 51 81.22900000 8.59100000 +red square faXse 6 64 77.19910000 9.53100000 +purpXe triangXe faXse 7 65 80.14050000 5.82400000 +yeXlow circXe true 8 73 63.97850000 4.23700000 +yeXlow circXe true 9 87 63.50580000 8.33500000 +purpXe square faXse 10 91 72.37350000 8.24300000 diff --git a/test/cases/verb-sub-gsub-ssub/sub-f-2/cmd b/test/cases/verb-sub-gsub-ssub/sub-f-2/cmd new file mode 100644 index 000000000..8d5de9b90 --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/sub-f-2/cmd @@ -0,0 +1 @@ +mlr --c2p --from test/input/example.csv sub -f a,b l X diff --git a/test/cases/verb-sub-gsub-ssub/sub-f-2/experr b/test/cases/verb-sub-gsub-ssub/sub-f-2/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-sub-gsub-ssub/sub-f-2/expout b/test/cases/verb-sub-gsub-ssub/sub-f-2/expout new file mode 100644 index 000000000..c3c56133c --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/sub-f-2/expout @@ -0,0 +1,11 @@ +color shape flag k index quantity rate +yellow triangle true 1 11 43.64980000 9.88700000 +red square true 2 15 79.27780000 0.01300000 +red circle true 3 16 13.81030000 2.90100000 +red square false 4 48 77.55420000 7.46700000 +purple triangle false 5 51 81.22900000 8.59100000 +red square false 6 64 77.19910000 9.53100000 +purple triangle false 7 65 80.14050000 5.82400000 +yellow circle true 8 73 63.97850000 4.23700000 +yellow circle true 9 87 63.50580000 8.33500000 +purple square false 10 91 72.37350000 8.24300000 diff --git a/test/cases/verb-sub-gsub-ssub/sub-f/cmd b/test/cases/verb-sub-gsub-ssub/sub-f/cmd new file mode 100644 index 000000000..605605ad0 --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/sub-f/cmd @@ -0,0 +1 @@ +mlr --c2p --from test/input/example.csv sub -f color,shape,index l X diff --git a/test/cases/verb-sub-gsub-ssub/sub-f/experr b/test/cases/verb-sub-gsub-ssub/sub-f/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-sub-gsub-ssub/sub-f/expout b/test/cases/verb-sub-gsub-ssub/sub-f/expout new file mode 100644 index 000000000..f4bf55be4 --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/sub-f/expout @@ -0,0 +1,11 @@ +color shape flag k index quantity rate +yeXlow triangXe true 1 11 43.64980000 9.88700000 +red square true 2 15 79.27780000 0.01300000 +red circXe true 3 16 13.81030000 2.90100000 +red square false 4 48 77.55420000 7.46700000 +purpXe triangXe false 5 51 81.22900000 8.59100000 +red square false 6 64 77.19910000 9.53100000 +purpXe triangXe false 7 65 80.14050000 5.82400000 +yeXlow circXe true 8 73 63.97850000 4.23700000 +yeXlow circXe true 9 87 63.50580000 8.33500000 +purpXe square false 10 91 72.37350000 8.24300000 From 1834a925b39089d9dc23cee5affcb4149d0aab2e Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 23 Jan 2024 17:31:58 -0500 Subject: [PATCH 120/456] Miller 6.11.0 (#1481) * miller 6.11.0 * Artifacts from `make dev` --- docs/src/data-diving-examples.md | 46 ++++++++++++++++---------------- docs/src/how-to-release.md | 6 ++--- docs/src/how-to-release.md.in | 6 ++--- docs/src/manpage.md | 12 ++++++--- docs/src/manpage.txt | 12 ++++++--- docs/src/reference-verbs.md | 38 +++++++++++++------------- docs/src/two-pass-algorithms.md | 4 +-- man/manpage.txt | 12 ++++++--- man/mlr.1 | 6 ++--- miller.spec | 5 +++- pkg/version/version.go | 2 +- 11 files changed, 82 insertions(+), 67 deletions(-) diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 100716ec2..39738f193 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -160,11 +160,11 @@ CITRUS COUNTY 1332.9 79974.9 483785.1 stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
-tiv_2011_tiv_2012_corr  0.9730497632351701
-tiv_2011_tiv_2012_ols_m 0.9835583980337732
-tiv_2011_tiv_2012_ols_b 433854.6428968301
+tiv_2011_tiv_2012_corr  0.9730497632351692
+tiv_2011_tiv_2012_ols_m 0.9835583980337723
+tiv_2011_tiv_2012_ols_b 433854.6428968317
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320204
+tiv_2011_tiv_2012_r2    0.9468258417320189
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199852926
+0.1334180491027861 -0.011319841199866178
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887242  -0.018565536587084836
-orange   square   0.17685855992752933   -0.07104431573805543
- green   circle   0.05764419437577257   0.011795729888018455
-   red   square    0.0557447712489348 -0.0006801456507506415
-yellow triangle    0.0445727377196281   0.024604310103079844
-yellow   square    0.0437917292729612  -0.044621972016306265
-purple   circle   0.03587354936895115    0.13411339541407613
-  blue   square   0.03241153095761152   -0.05350764811965621
-  blue triangle  0.015356427073158612 -0.0006089997461408209
-orange   circle  0.010518953877704181    -0.1627939732927932
-   red triangle   0.00809782571528054    0.01248662135795501
-purple triangle  0.005155190909099739   -0.04505790925621933
-purple   square  -0.02568027696337717   0.057694296479293694
- green   square -0.025776073450284875 -0.0032651732520739014
-orange triangle -0.030456661186085584   -0.13186999819263814
-yellow   circle  -0.06477331572781515     0.0736944981970553
-  blue   circle   -0.1023476190192966  -0.030528539069839333
- green triangle  -0.10901825107358747   -0.04848782060162855
+   red   circle    0.9807984401887236   -0.01856553658708754
+orange   square   0.17685855992752927   -0.07104431573806054
+ green   circle   0.05764419437577255    0.01179572988801509
+   red   square   0.05574477124893523 -0.0006801456507510942
+yellow triangle   0.04457273771962798   0.024604310103081825
+yellow   square   0.04379172927296089   -0.04462197201631237
+purple   circle   0.03587354936895086     0.1341133954140899
+  blue   square   0.03241153095761164  -0.053507648119643196
+  blue triangle  0.015356427073158766 -0.0006089997461435399
+orange   circle  0.010518953877704048   -0.16279397329279383
+   red triangle   0.00809782571528034   0.012486621357942596
+purple triangle  0.005155190909099334  -0.045057909256220656
+purple   square -0.025680276963377404    0.05769429647930396
+ green   square   -0.0257760734502851  -0.003265173252087127
+orange triangle -0.030456661186085785    -0.1318699981926352
+yellow   circle  -0.06477331572781474    0.07369449819706045
+  blue   circle  -0.10234761901929677  -0.030528539069837757
+ green triangle  -0.10901825107358765   -0.04848782060162929
 
diff --git a/docs/src/how-to-release.md b/docs/src/how-to-release.md index 2833f1417..57d39b2ff 100644 --- a/docs/src/how-to-release.md +++ b/docs/src/how-to-release.md @@ -30,9 +30,9 @@ In this example I am using version 6.2.0 to 6.3.0; of course that will change fo * If Go version is being updated: edit all three of - * `go.mod` - * `.github/workflows/go.yml` - * `.github/workflows/release.yml` + * `go.mod` + * `.github/workflows/go.yml` + * `.github/workflows/release.yml` * Create the release tarball: diff --git a/docs/src/how-to-release.md.in b/docs/src/how-to-release.md.in index fac0248c1..b54b1be26 100644 --- a/docs/src/how-to-release.md.in +++ b/docs/src/how-to-release.md.in @@ -14,9 +14,9 @@ In this example I am using version 6.2.0 to 6.3.0; of course that will change fo * If Go version is being updated: edit all three of - * `go.mod` - * `.github/workflows/go.yml` - * `.github/workflows/release.yml` + * `go.mod` + * `.github/workflows/go.yml` + * `.github/workflows/release.yml` * Create the release tarball: diff --git a/docs/src/manpage.md b/docs/src/manpage.md index e2123e752..967d18d7f 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -19,7 +19,9 @@ Quick links: This is simply a copy of what you should see on running `man mlr` at a command prompt, once Miller is installed on your system.
-4mMILLER24m(1)                                                            4mMILLER24m(1)
+MILLER(1)                                                            MILLER(1)
+
+
 
 1mNAME0m
        Miller -- like awk, sed, cut, join, and sort for name-indexed data such
@@ -48,7 +50,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        insertion-ordered hash map.  This encompasses a variety of data
        formats, including but not limited to the familiar CSV, TSV, and JSON.
        (Miller can handle positionally-indexed data as a special case.) This
-       manpage documents mlr 6.10.0-dev.
+       manpage documents mlr 6.11.0.
 
 1mEXAMPLES0m
        mlr --icsv --opprint cat example.csv
@@ -808,7 +810,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p
                markdown " "    N/A    "\n"
                nidx     " "    N/A    "\n"
                pprint   " "    N/A    "\n"
-               tsv      "     "    N/A    "\n"
+               tsv      "  "    N/A    "\n"
                xtab     "\n"   " "    "\n\n"
 
        --fs {string}            Specify FS for input and output.
@@ -3688,5 +3690,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-                                  2024-01-20                         4mMILLER24m(1)
+
+
+                                  2024-01-23                         MILLER(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index ce0a53994..eee5e7837 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1,4 +1,6 @@ -4mMILLER24m(1) 4mMILLER24m(1) +MILLER(1) MILLER(1) + + 1mNAME0m Miller -- like awk, sed, cut, join, and sort for name-indexed data such @@ -27,7 +29,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.10.0-dev. + manpage documents mlr 6.11.0. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv @@ -787,7 +789,7 @@ markdown " " N/A "\n" nidx " " N/A "\n" pprint " " N/A "\n" - tsv " " N/A "\n" + tsv " " N/A "\n" xtab "\n" " " "\n\n" --fs {string} Specify FS for input and output. @@ -3667,4 +3669,6 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-01-20 4mMILLER24m(1) + + + 2024-01-23 MILLER(1) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 9e5fff6df..cfa66dd82 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -3409,14 +3409,14 @@ fields, optionally categorized by one or more fields. data/medium
-x_y_cov    0.00004257482082749404
-x_y_corr   0.0005042001844473328
-y_y_cov    0.08461122467974005
+x_y_cov    0.000042574820827444476
+x_y_corr   0.0005042001844467462
+y_y_cov    0.08461122467974003
 y_y_corr   1
-x2_xy_cov  0.041883822817793716
-x2_xy_corr 0.6301743420379936
-x2_y2_cov  -0.0003095372596253918
-x2_y2_corr -0.003424908876111875
+x2_xy_cov  0.04188382281779374
+x2_xy_corr 0.630174342037994
+x2_y2_cov  -0.00030953725962542085
+x2_y2_corr -0.0034249088761121966
 
@@ -3425,12 +3425,12 @@ x2_y2_corr -0.003424908876111875
   data/medium
 
-a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
-eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
-wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
-zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
-hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
+a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
+eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
+wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
+zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
+hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
 
Here's an example simple line-fit. The `x` and `y` @@ -3516,11 +3516,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.030097447953594 -upsec_count_pca_b 979.9883413064917 +upsec_count_pca_m -39.03009744795354 +upsec_count_pca_b 979.9883413064914 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.108529196302943 +donesec 25.10852919630297
## step @@ -3797,9 +3797,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 -var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 -skewness - - 0 -0.0006899591185517494 -0.01784976012013298 +stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 +var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 +skewness - - 0 -0.0006899591185521965 -0.017849760120133784 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index e475aebf3..146f3a81e 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144457 -x_stddev 0.29029251511440074 +x_var 0.08426974433144456 +x_stddev 0.2902925151144007
diff --git a/man/manpage.txt b/man/manpage.txt
index ce0a53994..eee5e7837 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -1,4 +1,6 @@
-4mMILLER24m(1)                                                            4mMILLER24m(1)
+MILLER(1)                                                            MILLER(1)
+
+
 
 1mNAME0m
        Miller -- like awk, sed, cut, join, and sort for name-indexed data such
@@ -27,7 +29,7 @@
        insertion-ordered hash map.  This encompasses a variety of data
        formats, including but not limited to the familiar CSV, TSV, and JSON.
        (Miller can handle positionally-indexed data as a special case.) This
-       manpage documents mlr 6.10.0-dev.
+       manpage documents mlr 6.11.0.
 
 1mEXAMPLES0m
        mlr --icsv --opprint cat example.csv
@@ -787,7 +789,7 @@
                markdown " "    N/A    "\n"
                nidx     " "    N/A    "\n"
                pprint   " "    N/A    "\n"
-               tsv      "     "    N/A    "\n"
+               tsv      "  "    N/A    "\n"
                xtab     "\n"   " "    "\n\n"
 
        --fs {string}            Specify FS for input and output.
@@ -3667,4 +3669,6 @@
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-                                  2024-01-20                         4mMILLER24m(1)
+
+
+                                  2024-01-23                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index f7dde70fe..18a426704 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2024-01-20
+.\"      Date: 2024-01-23
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2024-01-20" "\ \&" "\ \&"
+.TH "MILLER" "1" "2024-01-23" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -47,7 +47,7 @@ on integer-indexed fields: if the natural data structure for the latter is the
 array, then Miller's natural data structure is the insertion-ordered hash map.
 This encompasses a variety of data formats, including but not limited to the
 familiar CSV, TSV, and JSON.  (Miller can handle positionally-indexed data as
-a special case.) This manpage documents mlr 6.10.0-dev.
+a special case.) This manpage documents mlr 6.11.0.
 .SH "EXAMPLES"
 .sp
 
diff --git a/miller.spec b/miller.spec
index 413f6cdce..137618c56 100644
--- a/miller.spec
+++ b/miller.spec
@@ -1,6 +1,6 @@
 Summary: Name-indexed data processing tool
 Name: miller
-Version: 6.10.0
+Version: 6.11.0
 Release: 1%{?dist}
 License: BSD
 Source: https://github.com/johnkerl/miller/releases/download/%{version}/miller-%{version}.tar.gz
@@ -36,6 +36,9 @@ make install
 %{_mandir}/man1/mlr.1*
 
 %changelog
+* Tue Jan 23 2023 John Kerl  - 6.11.0-1
+- 6.11.0 release
+
 * Wed Dec 13 2023 John Kerl  - 6.10.0-1
 - 6.10.0 release
 
diff --git a/pkg/version/version.go b/pkg/version/version.go
index 0e6389aae..88e4fdfeb 100644
--- a/pkg/version/version.go
+++ b/pkg/version/version.go
@@ -4,4 +4,4 @@ package version
 // Nominally things like "6.0.0" for a release, then "6.0.0-dev" in between.
 // This makes it clear that a given build is on the main dev branch, not a
 // particular snapshot tag.
-var STRING string = "6.10.0-dev"
+var STRING string = "6.11.0"

From 6f24fb399977e3cd037cd0a9140a1364be836799 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Tue, 23 Jan 2024 17:35:31 -0500
Subject: [PATCH 121/456] miller.spec typofix

---
 miller.spec | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/miller.spec b/miller.spec
index 137618c56..2ac4f6265 100644
--- a/miller.spec
+++ b/miller.spec
@@ -36,7 +36,7 @@ make install
 %{_mandir}/man1/mlr.1*
 
 %changelog
-* Tue Jan 23 2023 John Kerl  - 6.11.0-1
+* Tue Jan 23 2024 John Kerl  - 6.11.0-1
 - 6.11.0 release
 
 * Wed Dec 13 2023 John Kerl  - 6.10.0-1

From f26bc0d9a13ef63903359a745de925278a4f9a57 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Tue, 23 Jan 2024 18:32:56 -0500
Subject: [PATCH 122/456] update release docs

---
 docs/src/release-docs.md    | 3 ++-
 docs/src/release-docs.md.in | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/src/release-docs.md b/docs/src/release-docs.md
index 85a13d804..ca6404042 100644
--- a/docs/src/release-docs.md
+++ b/docs/src/release-docs.md
@@ -24,7 +24,8 @@ If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, witho
 | Release | Docs                                                                | Release notes |
 |---------|---------------------------------------------------------------------|---------------|
 main      | [main branch](https://miller.readthedocs.io/en/main)                | N/A |
-6.10.0     | [Miller 6.10.0](https://miller.readthedocs.io/en/6.10.0)           | [Add --files option; bugfixes; use Go 1.110](https://github.com/johnkerl/miller/releases/tag/v6.10.0) |
+6.11.0     | [Miller 6.11.0](https://miller.readthedocs.io/en/6.11.0)           | [CSV/TSV auto-unsparsify, regex-fieldname support for reorder/sub/ssub/gsub, strmatch DSL function, and more](https://github.com/johnkerl/miller/releases/tag/v6.11.0) |
+6.10.0     | [Miller 6.10.0](https://miller.readthedocs.io/en/6.10.0)           | [Add --files option; bugfixes; use Go 1.19](https://github.com/johnkerl/miller/releases/tag/v6.10.0) |
 6.9.0     | [Miller 6.9.0](https://miller.readthedocs.io/en/6.9.0)              | [Nanosecond timestamps, ZSTD compression, improved data-error handling, and more](https://github.com/johnkerl/miller/releases/tag/v6.9.0) |
 6.8.0     | [Miller 6.8.0](https://miller.readthedocs.io/en/6.8.0)              | [New case verb, index DSL function, and more](https://github.com/johnkerl/miller/releases/tag/v6.8.0) |
 6.7.0     | [Miller 6.7.0](https://miller.readthedocs.io/en/6.7.0)              | [New leftpad/rightpad DSL functions, unspace verb, and more](https://github.com/johnkerl/miller/releases/tag/v6.7.0) |
diff --git a/docs/src/release-docs.md.in b/docs/src/release-docs.md.in
index e82b42755..bac3ef2ad 100644
--- a/docs/src/release-docs.md.in
+++ b/docs/src/release-docs.md.in
@@ -8,7 +8,8 @@ If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, witho
 | Release | Docs                                                                | Release notes |
 |---------|---------------------------------------------------------------------|---------------|
 main      | [main branch](https://miller.readthedocs.io/en/main)                | N/A |
-6.10.0     | [Miller 6.10.0](https://miller.readthedocs.io/en/6.10.0)           | [Add --files option; bugfixes; use Go 1.110](https://github.com/johnkerl/miller/releases/tag/v6.10.0) |
+6.11.0     | [Miller 6.11.0](https://miller.readthedocs.io/en/6.11.0)           | [CSV/TSV auto-unsparsify, regex-fieldname support for reorder/sub/ssub/gsub, strmatch DSL function, and more](https://github.com/johnkerl/miller/releases/tag/v6.11.0) |
+6.10.0     | [Miller 6.10.0](https://miller.readthedocs.io/en/6.10.0)           | [Add --files option; bugfixes; use Go 1.19](https://github.com/johnkerl/miller/releases/tag/v6.10.0) |
 6.9.0     | [Miller 6.9.0](https://miller.readthedocs.io/en/6.9.0)              | [Nanosecond timestamps, ZSTD compression, improved data-error handling, and more](https://github.com/johnkerl/miller/releases/tag/v6.9.0) |
 6.8.0     | [Miller 6.8.0](https://miller.readthedocs.io/en/6.8.0)              | [New case verb, index DSL function, and more](https://github.com/johnkerl/miller/releases/tag/v6.8.0) |
 6.7.0     | [Miller 6.7.0](https://miller.readthedocs.io/en/6.7.0)              | [New leftpad/rightpad DSL functions, unspace verb, and more](https://github.com/johnkerl/miller/releases/tag/v6.7.0) |

From 02ff56bd21022189fab3bf816ae73e9dd56dff07 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 24 Jan 2024 09:40:39 -0500
Subject: [PATCH 123/456] Bump actions/upload-artifact from 4.2.0 to 4.3.0
 (#1483)

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.2.0 to 4.3.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/694cdabd8bdb0f10b2cea11669e1bf5453eed0a6...26f96dfa697d77e81fd5907df203aa23a56210a8)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/go.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 9ff2f41a6..04a9f7c45 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -41,7 +41,7 @@ jobs:
       if: matrix.os == 'windows-latest'
       run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}}
 
-    - uses: actions/upload-artifact@694cdabd8bdb0f10b2cea11669e1bf5453eed0a6
+    - uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
       with:
         name: mlr-${{matrix.os}}
         path: bin/${{matrix.os}}/*

From c0e9be0e0c060d79b1d666589c04ef40fbb5af1f Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Wed, 24 Jan 2024 13:27:04 -0500
Subject: [PATCH 124/456] 6.11.0-dev (#1484)

* 6.11.0-dev

* 6.11.0-dev
---
 docs/src/manpage.md    | 2 +-
 docs/src/manpage.txt   | 2 +-
 man/manpage.txt        | 2 +-
 man/mlr.1              | 2 +-
 pkg/version/version.go | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 967d18d7f..442f006dc 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -50,7 +50,7 @@ MILLER(1)                                                            MILLER(1)
        insertion-ordered hash map.  This encompasses a variety of data
        formats, including but not limited to the familiar CSV, TSV, and JSON.
        (Miller can handle positionally-indexed data as a special case.) This
-       manpage documents mlr 6.11.0.
+       manpage documents mlr 6.11.0-dev.
 
 1mEXAMPLES0m
        mlr --icsv --opprint cat example.csv
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt
index eee5e7837..bc525f8de 100644
--- a/docs/src/manpage.txt
+++ b/docs/src/manpage.txt
@@ -29,7 +29,7 @@ MILLER(1)                                                            MILLER(1)
        insertion-ordered hash map.  This encompasses a variety of data
        formats, including but not limited to the familiar CSV, TSV, and JSON.
        (Miller can handle positionally-indexed data as a special case.) This
-       manpage documents mlr 6.11.0.
+       manpage documents mlr 6.11.0-dev.
 
 1mEXAMPLES0m
        mlr --icsv --opprint cat example.csv
diff --git a/man/manpage.txt b/man/manpage.txt
index eee5e7837..bc525f8de 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -29,7 +29,7 @@ MILLER(1)                                                            MILLER(1)
        insertion-ordered hash map.  This encompasses a variety of data
        formats, including but not limited to the familiar CSV, TSV, and JSON.
        (Miller can handle positionally-indexed data as a special case.) This
-       manpage documents mlr 6.11.0.
+       manpage documents mlr 6.11.0-dev.
 
 1mEXAMPLES0m
        mlr --icsv --opprint cat example.csv
diff --git a/man/mlr.1 b/man/mlr.1
index 18a426704..9a5cb0487 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -47,7 +47,7 @@ on integer-indexed fields: if the natural data structure for the latter is the
 array, then Miller's natural data structure is the insertion-ordered hash map.
 This encompasses a variety of data formats, including but not limited to the
 familiar CSV, TSV, and JSON.  (Miller can handle positionally-indexed data as
-a special case.) This manpage documents mlr 6.11.0.
+a special case.) This manpage documents mlr 6.11.0-dev.
 .SH "EXAMPLES"
 .sp
 
diff --git a/pkg/version/version.go b/pkg/version/version.go
index 88e4fdfeb..f3d43c46e 100644
--- a/pkg/version/version.go
+++ b/pkg/version/version.go
@@ -4,4 +4,4 @@ package version
 // Nominally things like "6.0.0" for a release, then "6.0.0-dev" in between.
 // This makes it clear that a given build is on the main dev branch, not a
 // particular snapshot tag.
-var STRING string = "6.11.0"
+var STRING string = "6.11.0-dev"

From 3a2149b9aed26c7d1b1faa3c4030019c83933f08 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 29 Jan 2024 08:33:10 -0500
Subject: [PATCH 125/456] Bump github.com/klauspost/compress from 1.16.7 to
 1.17.5 (#1486)

Bumps [github.com/klauspost/compress](https://github.com/klauspost/compress) from 1.16.7 to 1.17.5.
- [Release notes](https://github.com/klauspost/compress/releases)
- [Changelog](https://github.com/klauspost/compress/blob/master/.goreleaser.yml)
- [Commits](https://github.com/klauspost/compress/compare/v1.16.7...v1.17.5)

---
updated-dependencies:
- dependency-name: github.com/klauspost/compress
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index c5a7600fd..19a377883 100644
--- a/go.mod
+++ b/go.mod
@@ -20,7 +20,7 @@ require (
 	github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb
 	github.com/johnkerl/lumin v1.0.0
 	github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
-	github.com/klauspost/compress v1.16.7
+	github.com/klauspost/compress v1.17.5
 	github.com/lestrrat-go/strftime v1.0.6
 	github.com/mattn/go-isatty v0.0.20
 	github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4
diff --git a/go.sum b/go.sum
index 82013f687..6a9f1daa4 100644
--- a/go.sum
+++ b/go.sum
@@ -15,8 +15,8 @@ github.com/johnkerl/lumin v1.0.0 h1:CV34cHZOJ92Y02RbQ0rd4gA0C06Qck9q8blOyaPoWpU=
 github.com/johnkerl/lumin v1.0.0/go.mod h1:eLf5AdQOaLvzZ2zVy4REr/DSeEwG+CZreHwNLICqv9E=
 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8=
-github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
-github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
+github.com/klauspost/compress v1.17.5 h1:d4vBd+7CHydUqpFBgUEKkSdtSugf9YFmSkvUYPquI5E=
+github.com/klauspost/compress v1.17.5/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
 github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8=
 github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is=
 github.com/lestrrat-go/strftime v1.0.6 h1:CFGsDEt1pOpFNU+TJB0nhz9jl+K0hZSLE205AhTIGQQ=

From 62220ca0fae9dd332138c1373fadb1bdf22c3cac Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Mon, 5 Feb 2024 09:39:49 -0500
Subject: [PATCH 126/456] sort-link doc update

---
 docs/src/sorting.md    | 2 ++
 docs/src/sorting.md.in | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/docs/src/sorting.md b/docs/src/sorting.md
index 68e1f4a02..7d876eda2 100644
--- a/docs/src/sorting.md
+++ b/docs/src/sorting.md
@@ -214,6 +214,8 @@ a b c
 
 ## The sort function by example
 
+The Miller DSL has a [`sort`](reference-dsl-builtin-functions.md#sort) function:
+
 * It returns a sorted copy of an input array or map.
 * Without second argument, uses Miller's default ordering which is numbers numerically, then strings lexically.
 * With second which is string, takes sorting flags from it: `"f"` for lexical or `"c"` for case-folded lexical, or `"t"` for natural sort order. An additional `"r"` in this string is for reverse/descending.
diff --git a/docs/src/sorting.md.in b/docs/src/sorting.md.in
index 28617c697..0d59836e9 100644
--- a/docs/src/sorting.md.in
+++ b/docs/src/sorting.md.in
@@ -66,6 +66,8 @@ GENMD-EOF
 
 ## The sort function by example
 
+The Miller DSL has a [`sort`](reference-dsl-builtin-functions.md#sort) function:
+
 * It returns a sorted copy of an input array or map.
 * Without second argument, uses Miller's default ordering which is numbers numerically, then strings lexically.
 * With second which is string, takes sorting flags from it: `"f"` for lexical or `"c"` for case-folded lexical, or `"t"` for natural sort order. An additional `"r"` in this string is for reverse/descending.

From 2ea00b0e40c4006b294f36652a6ef4ff091865bf Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Feb 2024 09:32:55 -0500
Subject: [PATCH 127/456] Bump actions/upload-artifact from 4.3.0 to 4.3.1
 (#1491)

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.0 to 4.3.1.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/26f96dfa697d77e81fd5907df203aa23a56210a8...5d5d22a31266ced268874388b861e4b58bb5c2f3)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/go.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 04a9f7c45..428c57a4d 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -41,7 +41,7 @@ jobs:
       if: matrix.os == 'windows-latest'
       run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}}
 
-    - uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+    - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
       with:
         name: mlr-${{matrix.os}}
         path: bin/${{matrix.os}}/*

From 56d6730f213c478af64b5b84c37e979f8e39d8dc Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 7 Feb 2024 20:39:08 -0500
Subject: [PATCH 128/456] Bump github.com/klauspost/compress from 1.17.5 to
 1.17.6 (#1492)

Bumps [github.com/klauspost/compress](https://github.com/klauspost/compress) from 1.17.5 to 1.17.6.
- [Release notes](https://github.com/klauspost/compress/releases)
- [Changelog](https://github.com/klauspost/compress/blob/master/.goreleaser.yml)
- [Commits](https://github.com/klauspost/compress/compare/v1.17.5...v1.17.6)

---
updated-dependencies:
- dependency-name: github.com/klauspost/compress
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 19a377883..5a1ae56b3 100644
--- a/go.mod
+++ b/go.mod
@@ -20,7 +20,7 @@ require (
 	github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb
 	github.com/johnkerl/lumin v1.0.0
 	github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
-	github.com/klauspost/compress v1.17.5
+	github.com/klauspost/compress v1.17.6
 	github.com/lestrrat-go/strftime v1.0.6
 	github.com/mattn/go-isatty v0.0.20
 	github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4
diff --git a/go.sum b/go.sum
index 6a9f1daa4..5c6c5a382 100644
--- a/go.sum
+++ b/go.sum
@@ -15,8 +15,8 @@ github.com/johnkerl/lumin v1.0.0 h1:CV34cHZOJ92Y02RbQ0rd4gA0C06Qck9q8blOyaPoWpU=
 github.com/johnkerl/lumin v1.0.0/go.mod h1:eLf5AdQOaLvzZ2zVy4REr/DSeEwG+CZreHwNLICqv9E=
 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8=
-github.com/klauspost/compress v1.17.5 h1:d4vBd+7CHydUqpFBgUEKkSdtSugf9YFmSkvUYPquI5E=
-github.com/klauspost/compress v1.17.5/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
+github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI=
+github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
 github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8=
 github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is=
 github.com/lestrrat-go/strftime v1.0.6 h1:CFGsDEt1pOpFNU+TJB0nhz9jl+K0hZSLE205AhTIGQQ=

From cd6d42736f5a60439f2aaec0caccad9814f3f0f4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 8 Feb 2024 09:34:39 -0500
Subject: [PATCH 129/456] Bump golang.org/x/term from 0.16.0 to 0.17.0 (#1494)

Bumps [golang.org/x/term](https://github.com/golang/term) from 0.16.0 to 0.17.0.
- [Commits](https://github.com/golang/term/compare/v0.16.0...v0.17.0)

---
updated-dependencies:
- dependency-name: golang.org/x/term
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 4 ++--
 go.sum | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/go.mod b/go.mod
index 5a1ae56b3..5e1953883 100644
--- a/go.mod
+++ b/go.mod
@@ -26,8 +26,8 @@ require (
 	github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4
 	github.com/pkg/profile v1.7.0
 	github.com/stretchr/testify v1.8.4
-	golang.org/x/sys v0.16.0
-	golang.org/x/term v0.16.0
+	golang.org/x/sys v0.17.0
+	golang.org/x/term v0.17.0
 	golang.org/x/text v0.14.0
 )
 
diff --git a/go.sum b/go.sum
index 5c6c5a382..6bf3be875 100644
--- a/go.sum
+++ b/go.sum
@@ -40,10 +40,10 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
-golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE=
-golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
+golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
+golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/term v0.17.0 h1:mkTF7LCd6WGJNL3K1Ad7kwxNfYAW6a8a8QqtMblp/4U=
+golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
 golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=

From f5eaf290cf9d2822224bcf1e7c0749de49503bef Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sun, 18 Feb 2024 10:56:26 -0500
Subject: [PATCH 130/456] mlr sparsify (#1498)

* mlr sparsify

* regression-test cases

* typofix

* Remove mods due to processor-architecture change
---
 docs/src/reference-verbs.md               |  55 ++++---
 docs/src/reference-verbs.md.in            |   6 +
 pkg/transformers/aaa_transformer_table.go |   1 +
 pkg/transformers/sparsify.go              | 192 ++++++++++++++++++++++
 test/cases/cli-help/0001/expout           |  12 ++
 test/cases/verb-sparsify/0001/cmd         |   1 +
 test/cases/verb-sparsify/0001/experr      |   0
 test/cases/verb-sparsify/0001/expout      |  17 ++
 test/cases/verb-sparsify/0002/cmd         |   1 +
 test/cases/verb-sparsify/0002/experr      |   0
 test/cases/verb-sparsify/0002/expout      |  21 +++
 test/cases/verb-sparsify/0003/cmd         |   1 +
 test/cases/verb-sparsify/0003/experr      |   0
 test/cases/verb-sparsify/0003/expout      |  21 +++
 test/cases/verb-sparsify/0004/cmd         |   1 +
 test/cases/verb-sparsify/0004/experr      |   0
 test/cases/verb-sparsify/0004/expout      |  19 +++
 test/cases/verb-sparsify/0005/cmd         |   1 +
 test/cases/verb-sparsify/0005/experr      |   0
 test/cases/verb-sparsify/0005/expout      |  21 +++
 test/cases/verb-sparsify/0006/cmd         |   1 +
 test/cases/verb-sparsify/0006/experr      |   0
 test/cases/verb-sparsify/0006/expout      |  21 +++
 test/cases/verb-sparsify/0007/cmd         |   1 +
 test/cases/verb-sparsify/0007/experr      |   0
 test/cases/verb-sparsify/0007/expout      |  22 +++
 test/cases/verb-sparsify/0008/cmd         |   1 +
 test/cases/verb-sparsify/0008/experr      |   0
 test/cases/verb-sparsify/0008/expout      |  22 +++
 test/input/sparsify-input.csv             |   5 +
 30 files changed, 424 insertions(+), 19 deletions(-)
 create mode 100644 pkg/transformers/sparsify.go
 create mode 100644 test/cases/verb-sparsify/0001/cmd
 create mode 100644 test/cases/verb-sparsify/0001/experr
 create mode 100644 test/cases/verb-sparsify/0001/expout
 create mode 100644 test/cases/verb-sparsify/0002/cmd
 create mode 100644 test/cases/verb-sparsify/0002/experr
 create mode 100644 test/cases/verb-sparsify/0002/expout
 create mode 100644 test/cases/verb-sparsify/0003/cmd
 create mode 100644 test/cases/verb-sparsify/0003/experr
 create mode 100644 test/cases/verb-sparsify/0003/expout
 create mode 100644 test/cases/verb-sparsify/0004/cmd
 create mode 100644 test/cases/verb-sparsify/0004/experr
 create mode 100644 test/cases/verb-sparsify/0004/expout
 create mode 100644 test/cases/verb-sparsify/0005/cmd
 create mode 100644 test/cases/verb-sparsify/0005/experr
 create mode 100644 test/cases/verb-sparsify/0005/expout
 create mode 100644 test/cases/verb-sparsify/0006/cmd
 create mode 100644 test/cases/verb-sparsify/0006/experr
 create mode 100644 test/cases/verb-sparsify/0006/expout
 create mode 100644 test/cases/verb-sparsify/0007/cmd
 create mode 100644 test/cases/verb-sparsify/0007/experr
 create mode 100644 test/cases/verb-sparsify/0007/expout
 create mode 100644 test/cases/verb-sparsify/0008/cmd
 create mode 100644 test/cases/verb-sparsify/0008/experr
 create mode 100644 test/cases/verb-sparsify/0008/expout
 create mode 100644 test/input/sparsify-input.csv

diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md
index cfa66dd82..417aa4d9b 100644
--- a/docs/src/reference-verbs.md
+++ b/docs/src/reference-verbs.md
@@ -3126,6 +3126,23 @@ a b c
 9 8 7
 
+## sparsify + +
+mlr sparsify --help
+
+
+Usage: mlr sparsify [options]
+Unsets fields for which the key is the empty string (or, optionally, another
+specified value). Only makes sense with output format not being CSV or TSV.
+Options:
+-s {filler string} What values to remove. Defaults to the empty string.
+-f {a,b,c} Specify field names to be operated on; any other fields won't be
+           modified. The default is to modify all fields.
+-h|--help  Show this message.
+Example: if input is a=1,b=,c=3 then output is a=1,c=3.
+
+ ## split
@@ -3409,14 +3426,14 @@ fields, optionally categorized by one or more fields.
   data/medium
 
-x_y_cov    0.000042574820827444476
-x_y_corr   0.0005042001844467462
-y_y_cov    0.08461122467974003
+x_y_cov    0.00004257482082749404
+x_y_corr   0.0005042001844473328
+y_y_cov    0.08461122467974005
 y_y_corr   1
-x2_xy_cov  0.04188382281779374
-x2_xy_corr 0.630174342037994
-x2_y2_cov  -0.00030953725962542085
-x2_y2_corr -0.0034249088761121966
+x2_xy_cov  0.041883822817793716
+x2_xy_corr 0.6301743420379936
+x2_y2_cov  -0.0003095372596253918
+x2_y2_corr -0.003424908876111875
 
@@ -3425,12 +3442,12 @@ x2_y2_corr -0.0034249088761121966
   data/medium
 
-a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
-eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
-wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
-zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
-hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
+a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
+eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
+wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
+zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
+hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
 
Here's an example simple line-fit. The `x` and `y` @@ -3516,11 +3533,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.03009744795354 -upsec_count_pca_b 979.9883413064914 +upsec_count_pca_m -39.030097447953594 +upsec_count_pca_b 979.9883413064917 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.10852919630297 +donesec 25.108529196302943
## step @@ -3797,9 +3814,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 -var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 -skewness - - 0 -0.0006899591185521965 -0.017849760120133784 +stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 +var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 +skewness - - 0 -0.0006899591185517494 -0.01784976012013298 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/reference-verbs.md.in b/docs/src/reference-verbs.md.in index 44feda3de..8959ebf6b 100644 --- a/docs/src/reference-verbs.md.in +++ b/docs/src/reference-verbs.md.in @@ -995,6 +995,12 @@ GENMD-RUN-COMMAND mlr --ijson --opprint sort-within-records data/sort-within-records.json GENMD-EOF +## sparsify + +GENMD-RUN-COMMAND +mlr sparsify --help +GENMD-EOF + ## split GENMD-RUN-COMMAND diff --git a/pkg/transformers/aaa_transformer_table.go b/pkg/transformers/aaa_transformer_table.go index ed98af07f..34a5b6ea8 100644 --- a/pkg/transformers/aaa_transformer_table.go +++ b/pkg/transformers/aaa_transformer_table.go @@ -62,6 +62,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{ SkipTrivialRecordsSetup, SortSetup, SortWithinRecordsSetup, + SparsifySetup, SplitSetup, SsubSetup, Stats1Setup, diff --git a/pkg/transformers/sparsify.go b/pkg/transformers/sparsify.go new file mode 100644 index 000000000..b6ae40c51 --- /dev/null +++ b/pkg/transformers/sparsify.go @@ -0,0 +1,192 @@ +package transformers + +import ( + "container/list" + "fmt" + "os" + "strings" + + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" +) + +// ---------------------------------------------------------------- +const verbNameSparsify = "sparsify" + +var SparsifySetup = TransformerSetup{ + Verb: verbNameSparsify, + UsageFunc: transformerSparsifyUsage, + ParseCLIFunc: transformerSparsifyParseCLI, + IgnoresInput: false, +} + +func transformerSparsifyUsage( + o *os.File, +) { + fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameSparsify) + fmt.Fprint(o, + `Unsets fields for which the key is the empty string (or, optionally, another +specified value). Only makes sense with output format not being CSV or TSV. +`) + + fmt.Fprintf(o, "Options:\n") + fmt.Fprintf(o, "-s {filler string} What values to remove. Defaults to the empty string.\n") + fmt.Fprintf(o, "-f {a,b,c} Specify field names to be operated on; any other fields won't be\n") + fmt.Fprintf(o, " modified. The default is to modify all fields.\n") + fmt.Fprintf(o, "-h|--help Show this message.\n") + + fmt.Fprint(o, + `Example: if input is a=1,b=,c=3 then output is a=1,c=3. +`) +} + +func transformerSparsifyParseCLI( + pargi *int, + argc int, + args []string, + _ *cli.TOptions, + doConstruct bool, // false for first pass of CLI-parse, true for second pass +) IRecordTransformer { + + // Skip the verb name from the current spot in the mlr command line + argi := *pargi + verb := args[argi] + argi++ + + fillerString := "" + var specifiedFieldNames []string = nil + + for argi < argc /* variable increment: 1 or 2 depending on flag */ { + opt := args[argi] + if !strings.HasPrefix(opt, "-") { + break // No more flag options to process + } + if args[argi] == "--" { + break // All transformers must do this so main-flags can follow verb-flags + } + argi++ + + if opt == "-h" || opt == "--help" { + transformerSparsifyUsage(os.Stdout) + os.Exit(0) + + } else if opt == "-s" { + fillerString = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) + + } else if opt == "-f" { + specifiedFieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + + } else { + transformerSparsifyUsage(os.Stderr) + os.Exit(1) + } + } + + *pargi = argi + if !doConstruct { // All transformers must do this for main command-line parsing + return nil + } + + transformer, err := NewTransformerSparsify( + fillerString, + specifiedFieldNames, + ) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + return transformer +} + +// ---------------------------------------------------------------- +type TransformerSparsify struct { + fillerString string + fieldNamesSet map[string]bool + recordTransformerFunc RecordTransformerFunc +} + +func NewTransformerSparsify( + fillerString string, + specifiedFieldNames []string, +) (*TransformerSparsify, error) { + + tr := &TransformerSparsify{ + fillerString: fillerString, + fieldNamesSet: lib.StringListToSet(specifiedFieldNames), + } + if specifiedFieldNames == nil { + tr.recordTransformerFunc = tr.transformAll + } else { + tr.recordTransformerFunc = tr.transformSome + } + + return tr, nil +} + +func (tr *TransformerSparsify) Transform( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + inputDownstreamDoneChannel <-chan bool, + outputDownstreamDoneChannel chan<- bool, +) { + HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) + + if !inrecAndContext.EndOfStream { + tr.recordTransformerFunc( + inrecAndContext, + outputRecordsAndContexts, + inputDownstreamDoneChannel, + outputDownstreamDoneChannel, + ) + } else { + outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker + } +} + +func (tr *TransformerSparsify) transformAll( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + inputDownstreamDoneChannel <-chan bool, + outputDownstreamDoneChannel chan<- bool, +) { + inrec := inrecAndContext.Record + outrec := mlrval.NewMlrmapAsRecord() + + for pe := inrec.Head; pe != nil; pe = pe.Next { + if pe.Value.String() != tr.fillerString { + // Reference OK because ownership transfer + outrec.PutReference(pe.Key, pe.Value) + } + } + + outrecAndContext := types.NewRecordAndContext(outrec, &inrecAndContext.Context) + outputRecordsAndContexts.PushBack(outrecAndContext) +} + +// ---------------------------------------------------------------- +func (tr *TransformerSparsify) transformSome( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + inputDownstreamDoneChannel <-chan bool, + outputDownstreamDoneChannel chan<- bool, +) { + inrec := inrecAndContext.Record + outrec := mlrval.NewMlrmapAsRecord() + + for pe := inrec.Head; pe != nil; pe = pe.Next { + if tr.fieldNamesSet[pe.Key] { + if pe.Value.String() != tr.fillerString { + // Reference OK because ownership transfer + outrec.PutReference(pe.Key, pe.Value) + } + } else { + outrec.PutReference(pe.Key, pe.Value) + } + } + + outrecAndContext := types.NewRecordAndContext(outrec, &inrecAndContext.Context) + outputRecordsAndContexts.PushBack(outrecAndContext) +} diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index b25e4a56d..95b4d3f14 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -988,6 +988,18 @@ Options: -r Recursively sort subobjects/submaps, e.g. for JSON input. -h|--help Show this message. +================================================================ +sparsify +Usage: mlr sparsify [options] +Unsets fields for which the key is the empty string (or, optionally, another +specified value). Only makes sense with output format not being CSV or TSV. +Options: +-s {filler string} What values to remove. Defaults to the empty string. +-f {a,b,c} Specify field names to be operated on; any other fields won't be + modified. The default is to modify all fields. +-h|--help Show this message. +Example: if input is a=1,b=,c=3 then output is a=1,c=3. + ================================================================ split Usage: mlr split [options] {filename} diff --git a/test/cases/verb-sparsify/0001/cmd b/test/cases/verb-sparsify/0001/cmd new file mode 100644 index 000000000..38ec29b15 --- /dev/null +++ b/test/cases/verb-sparsify/0001/cmd @@ -0,0 +1 @@ +mlr --c2j --from test/input/sparsify-input.csv sparsify diff --git a/test/cases/verb-sparsify/0001/experr b/test/cases/verb-sparsify/0001/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-sparsify/0001/expout b/test/cases/verb-sparsify/0001/expout new file mode 100644 index 000000000..e9c9893a9 --- /dev/null +++ b/test/cases/verb-sparsify/0001/expout @@ -0,0 +1,17 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5 +}, +{}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/verb-sparsify/0002/cmd b/test/cases/verb-sparsify/0002/cmd new file mode 100644 index 000000000..3ac1c9630 --- /dev/null +++ b/test/cases/verb-sparsify/0002/cmd @@ -0,0 +1 @@ +mlr --c2j --from test/input/sparsify-input.csv sparsify -f a diff --git a/test/cases/verb-sparsify/0002/experr b/test/cases/verb-sparsify/0002/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-sparsify/0002/expout b/test/cases/verb-sparsify/0002/expout new file mode 100644 index 000000000..8bc89d0aa --- /dev/null +++ b/test/cases/verb-sparsify/0002/expout @@ -0,0 +1,21 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": "" +}, +{ + "b": "", + "c": "" +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/verb-sparsify/0003/cmd b/test/cases/verb-sparsify/0003/cmd new file mode 100644 index 000000000..fc08ebef9 --- /dev/null +++ b/test/cases/verb-sparsify/0003/cmd @@ -0,0 +1 @@ +mlr --c2j --from test/input/sparsify-input.csv sparsify -f b diff --git a/test/cases/verb-sparsify/0003/experr b/test/cases/verb-sparsify/0003/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-sparsify/0003/expout b/test/cases/verb-sparsify/0003/expout new file mode 100644 index 000000000..b607e3893 --- /dev/null +++ b/test/cases/verb-sparsify/0003/expout @@ -0,0 +1,21 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": "" +}, +{ + "a": "", + "c": "" +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/verb-sparsify/0004/cmd b/test/cases/verb-sparsify/0004/cmd new file mode 100644 index 000000000..5ea1aa7bd --- /dev/null +++ b/test/cases/verb-sparsify/0004/cmd @@ -0,0 +1 @@ +mlr --c2j --from test/input/sparsify-input.csv sparsify -f b,c diff --git a/test/cases/verb-sparsify/0004/experr b/test/cases/verb-sparsify/0004/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-sparsify/0004/expout b/test/cases/verb-sparsify/0004/expout new file mode 100644 index 000000000..ebf9878cd --- /dev/null +++ b/test/cases/verb-sparsify/0004/expout @@ -0,0 +1,19 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5 +}, +{ + "a": "" +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/verb-sparsify/0005/cmd b/test/cases/verb-sparsify/0005/cmd new file mode 100644 index 000000000..012aee2b6 --- /dev/null +++ b/test/cases/verb-sparsify/0005/cmd @@ -0,0 +1 @@ +mlr --c2j --from test/input/sparsify-input.csv sparsify -s 1 diff --git a/test/cases/verb-sparsify/0005/experr b/test/cases/verb-sparsify/0005/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-sparsify/0005/expout b/test/cases/verb-sparsify/0005/expout new file mode 100644 index 000000000..839476d58 --- /dev/null +++ b/test/cases/verb-sparsify/0005/expout @@ -0,0 +1,21 @@ +[ +{ + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": "" +}, +{ + "a": "", + "b": "", + "c": "" +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/verb-sparsify/0006/cmd b/test/cases/verb-sparsify/0006/cmd new file mode 100644 index 000000000..42567786a --- /dev/null +++ b/test/cases/verb-sparsify/0006/cmd @@ -0,0 +1 @@ +mlr --c2j --from test/input/sparsify-input.csv sparsify -f a -s 1 diff --git a/test/cases/verb-sparsify/0006/experr b/test/cases/verb-sparsify/0006/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-sparsify/0006/expout b/test/cases/verb-sparsify/0006/expout new file mode 100644 index 000000000..839476d58 --- /dev/null +++ b/test/cases/verb-sparsify/0006/expout @@ -0,0 +1,21 @@ +[ +{ + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": "" +}, +{ + "a": "", + "b": "", + "c": "" +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/verb-sparsify/0007/cmd b/test/cases/verb-sparsify/0007/cmd new file mode 100644 index 000000000..99b590da4 --- /dev/null +++ b/test/cases/verb-sparsify/0007/cmd @@ -0,0 +1 @@ +mlr --c2j --from test/input/sparsify-input.csv sparsify -f b -s 1 diff --git a/test/cases/verb-sparsify/0007/experr b/test/cases/verb-sparsify/0007/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-sparsify/0007/expout b/test/cases/verb-sparsify/0007/expout new file mode 100644 index 000000000..d7f95feba --- /dev/null +++ b/test/cases/verb-sparsify/0007/expout @@ -0,0 +1,22 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": "" +}, +{ + "a": "", + "b": "", + "c": "" +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/verb-sparsify/0008/cmd b/test/cases/verb-sparsify/0008/cmd new file mode 100644 index 000000000..b943d2c79 --- /dev/null +++ b/test/cases/verb-sparsify/0008/cmd @@ -0,0 +1 @@ +mlr --c2j --from test/input/sparsify-input.csv sparsify -f b,c -s 1 diff --git a/test/cases/verb-sparsify/0008/experr b/test/cases/verb-sparsify/0008/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-sparsify/0008/expout b/test/cases/verb-sparsify/0008/expout new file mode 100644 index 000000000..d7f95feba --- /dev/null +++ b/test/cases/verb-sparsify/0008/expout @@ -0,0 +1,22 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": "" +}, +{ + "a": "", + "b": "", + "c": "" +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/input/sparsify-input.csv b/test/input/sparsify-input.csv new file mode 100644 index 000000000..16916596e --- /dev/null +++ b/test/input/sparsify-input.csv @@ -0,0 +1,5 @@ +a,b,c +1,2,3 +4,5, +,, +7,8,9 From 0424320199d43182dd8810e17fe6dd5a7ced9453 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 18 Feb 2024 13:54:42 -0500 Subject: [PATCH 131/456] make dev artifacts for sparsify --- docs/src/data-diving-examples.md | 46 ++++++++++++++++---------------- docs/src/manpage.md | 27 ++++++++++++------- docs/src/manpage.txt | 27 ++++++++++++------- docs/src/two-pass-algorithms.md | 4 +-- man/manpage.txt | 27 ++++++++++++------- man/mlr.1 | 27 +++++++++++++++---- 6 files changed, 98 insertions(+), 60 deletions(-) diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 39738f193..100716ec2 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -160,11 +160,11 @@ CITRUS COUNTY 1332.9 79974.9 483785.1 stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
-tiv_2011_tiv_2012_corr  0.9730497632351692
-tiv_2011_tiv_2012_ols_m 0.9835583980337723
-tiv_2011_tiv_2012_ols_b 433854.6428968317
+tiv_2011_tiv_2012_corr  0.9730497632351701
+tiv_2011_tiv_2012_ols_m 0.9835583980337732
+tiv_2011_tiv_2012_ols_b 433854.6428968301
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320189
+tiv_2011_tiv_2012_r2    0.9468258417320204
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199866178
+0.1334180491027861 -0.011319841199852926
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887236   -0.01856553658708754
-orange   square   0.17685855992752927   -0.07104431573806054
- green   circle   0.05764419437577255    0.01179572988801509
-   red   square   0.05574477124893523 -0.0006801456507510942
-yellow triangle   0.04457273771962798   0.024604310103081825
-yellow   square   0.04379172927296089   -0.04462197201631237
-purple   circle   0.03587354936895086     0.1341133954140899
-  blue   square   0.03241153095761164  -0.053507648119643196
-  blue triangle  0.015356427073158766 -0.0006089997461435399
-orange   circle  0.010518953877704048   -0.16279397329279383
-   red triangle   0.00809782571528034   0.012486621357942596
-purple triangle  0.005155190909099334  -0.045057909256220656
-purple   square -0.025680276963377404    0.05769429647930396
- green   square   -0.0257760734502851  -0.003265173252087127
-orange triangle -0.030456661186085785    -0.1318699981926352
-yellow   circle  -0.06477331572781474    0.07369449819706045
-  blue   circle  -0.10234761901929677  -0.030528539069837757
- green triangle  -0.10901825107358765   -0.04848782060162929
+   red   circle    0.9807984401887242  -0.018565536587084836
+orange   square   0.17685855992752933   -0.07104431573805543
+ green   circle   0.05764419437577257   0.011795729888018455
+   red   square    0.0557447712489348 -0.0006801456507506415
+yellow triangle    0.0445727377196281   0.024604310103079844
+yellow   square    0.0437917292729612  -0.044621972016306265
+purple   circle   0.03587354936895115    0.13411339541407613
+  blue   square   0.03241153095761152   -0.05350764811965621
+  blue triangle  0.015356427073158612 -0.0006089997461408209
+orange   circle  0.010518953877704181    -0.1627939732927932
+   red triangle   0.00809782571528054    0.01248662135795501
+purple triangle  0.005155190909099739   -0.04505790925621933
+purple   square  -0.02568027696337717   0.057694296479293694
+ green   square -0.025776073450284875 -0.0032651732520739014
+orange triangle -0.030456661186085584   -0.13186999819263814
+yellow   circle  -0.06477331572781515     0.0736944981970553
+  blue   circle   -0.1023476190192966  -0.030528539069839333
+ green triangle  -0.10901825107358747   -0.04848782060162855
 
diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 442f006dc..2d7935bd6 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -19,9 +19,7 @@ Quick links: This is simply a copy of what you should see on running `man mlr` at a command prompt, once Miller is installed on your system.
-MILLER(1)                                                            MILLER(1)
-
-
+4mMILLER24m(1)                                                            4mMILLER24m(1)
 
 1mNAME0m
        Miller -- like awk, sed, cut, join, and sort for name-indexed data such
@@ -199,9 +197,9 @@ MILLER(1)                                                            MILLER(1)
        json-parse json-stringify join label latin1-to-utf8 least-frequent
        merge-fields most-frequent nest nothing put regularize remove-empty-columns
        rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
-       skip-trivial-records sort sort-within-records split ssub stats1 stats2 step
-       sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace
-       unsparsify
+       skip-trivial-records sort sort-within-records sparsify split ssub stats1
+       stats2 step sub summary tac tail tee template top utf8-to-latin1 unflatten
+       uniq unspace unsparsify
 
 1mFUNCTION LIST0m
        abs acos acosh antimode any append apply arrayify asin asinh asserting_absent
@@ -810,7 +808,7 @@ MILLER(1)                                                            MILLER(1)
                markdown " "    N/A    "\n"
                nidx     " "    N/A    "\n"
                pprint   " "    N/A    "\n"
-               tsv      "  "    N/A    "\n"
+               tsv      "     "    N/A    "\n"
                xtab     "\n"   " "    "\n\n"
 
        --fs {string}            Specify FS for input and output.
@@ -1840,6 +1838,17 @@ MILLER(1)                                                            MILLER(1)
        -r        Recursively sort subobjects/submaps, e.g. for JSON input.
        -h|--help Show this message.
 
+   1msparsify0m
+       Usage: mlr sparsify [options]
+       Unsets fields for which the key is the empty string (or, optionally, another
+       specified value). Only makes sense with output format not being CSV or TSV.
+       Options:
+       -s {filler string} What values to remove. Defaults to the empty string.
+       -f {a,b,c} Specify field names to be operated on; any other fields won't be
+                  modified. The default is to modify all fields.
+       -h|--help  Show this message.
+       Example: if input is a=1,b=,c=3 then output is a=1,c=3.
+
    1msplit0m
        Usage: mlr split [options] {filename}
        Options:
@@ -3690,7 +3699,5 @@ MILLER(1)                                                            MILLER(1)
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-
-
-                                  2024-01-23                         MILLER(1)
+                                  2024-02-18                         4mMILLER24m(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index bc525f8de..151b0fc33 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1,6 +1,4 @@ -MILLER(1) MILLER(1) - - +4mMILLER24m(1) 4mMILLER24m(1) 1mNAME0m Miller -- like awk, sed, cut, join, and sort for name-indexed data such @@ -178,9 +176,9 @@ MILLER(1) MILLER(1) json-parse json-stringify join label latin1-to-utf8 least-frequent merge-fields most-frequent nest nothing put regularize remove-empty-columns rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle - skip-trivial-records sort sort-within-records split ssub stats1 stats2 step - sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace - unsparsify + skip-trivial-records sort sort-within-records sparsify split ssub stats1 + stats2 step sub summary tac tail tee template top utf8-to-latin1 unflatten + uniq unspace unsparsify 1mFUNCTION LIST0m abs acos acosh antimode any append apply arrayify asin asinh asserting_absent @@ -789,7 +787,7 @@ MILLER(1) MILLER(1) markdown " " N/A "\n" nidx " " N/A "\n" pprint " " N/A "\n" - tsv " " N/A "\n" + tsv " " N/A "\n" xtab "\n" " " "\n\n" --fs {string} Specify FS for input and output. @@ -1819,6 +1817,17 @@ MILLER(1) MILLER(1) -r Recursively sort subobjects/submaps, e.g. for JSON input. -h|--help Show this message. + 1msparsify0m + Usage: mlr sparsify [options] + Unsets fields for which the key is the empty string (or, optionally, another + specified value). Only makes sense with output format not being CSV or TSV. + Options: + -s {filler string} What values to remove. Defaults to the empty string. + -f {a,b,c} Specify field names to be operated on; any other fields won't be + modified. The default is to modify all fields. + -h|--help Show this message. + Example: if input is a=1,b=,c=3 then output is a=1,c=3. + 1msplit0m Usage: mlr split [options] {filename} Options: @@ -3669,6 +3678,4 @@ MILLER(1) MILLER(1) MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - - - 2024-01-23 MILLER(1) + 2024-02-18 4mMILLER24m(1) diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index 146f3a81e..e475aebf3 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144456 -x_stddev 0.2902925151144007 +x_var 0.08426974433144457 +x_stddev 0.29029251511440074
diff --git a/man/manpage.txt b/man/manpage.txt
index bc525f8de..151b0fc33 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -1,6 +1,4 @@
-MILLER(1)                                                            MILLER(1)
-
-
+4mMILLER24m(1)                                                            4mMILLER24m(1)
 
 1mNAME0m
        Miller -- like awk, sed, cut, join, and sort for name-indexed data such
@@ -178,9 +176,9 @@ MILLER(1)                                                            MILLER(1)
        json-parse json-stringify join label latin1-to-utf8 least-frequent
        merge-fields most-frequent nest nothing put regularize remove-empty-columns
        rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
-       skip-trivial-records sort sort-within-records split ssub stats1 stats2 step
-       sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace
-       unsparsify
+       skip-trivial-records sort sort-within-records sparsify split ssub stats1
+       stats2 step sub summary tac tail tee template top utf8-to-latin1 unflatten
+       uniq unspace unsparsify
 
 1mFUNCTION LIST0m
        abs acos acosh antimode any append apply arrayify asin asinh asserting_absent
@@ -789,7 +787,7 @@ MILLER(1)                                                            MILLER(1)
                markdown " "    N/A    "\n"
                nidx     " "    N/A    "\n"
                pprint   " "    N/A    "\n"
-               tsv      "  "    N/A    "\n"
+               tsv      "     "    N/A    "\n"
                xtab     "\n"   " "    "\n\n"
 
        --fs {string}            Specify FS for input and output.
@@ -1819,6 +1817,17 @@ MILLER(1)                                                            MILLER(1)
        -r        Recursively sort subobjects/submaps, e.g. for JSON input.
        -h|--help Show this message.
 
+   1msparsify0m
+       Usage: mlr sparsify [options]
+       Unsets fields for which the key is the empty string (or, optionally, another
+       specified value). Only makes sense with output format not being CSV or TSV.
+       Options:
+       -s {filler string} What values to remove. Defaults to the empty string.
+       -f {a,b,c} Specify field names to be operated on; any other fields won't be
+                  modified. The default is to modify all fields.
+       -h|--help  Show this message.
+       Example: if input is a=1,b=,c=3 then output is a=1,c=3.
+
    1msplit0m
        Usage: mlr split [options] {filename}
        Options:
@@ -3669,6 +3678,4 @@ MILLER(1)                                                            MILLER(1)
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-
-
-                                  2024-01-23                         MILLER(1)
+                                  2024-02-18                         4mMILLER24m(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 9a5cb0487..3d5c75b4e 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2024-01-23
+.\"      Date: 2024-02-18
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2024-01-23" "\ \&" "\ \&"
+.TH "MILLER" "1" "2024-02-18" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -219,9 +219,9 @@ fraction gap grep group-by group-like gsub having-fields head histogram
 json-parse json-stringify join label latin1-to-utf8 least-frequent
 merge-fields most-frequent nest nothing put regularize remove-empty-columns
 rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
-skip-trivial-records sort sort-within-records split ssub stats1 stats2 step
-sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace
-unsparsify
+skip-trivial-records sort sort-within-records sparsify split ssub stats1
+stats2 step sub summary tac tail tee template top utf8-to-latin1 unflatten
+uniq unspace unsparsify
 .fi
 .if n \{\
 .RE
@@ -2298,6 +2298,23 @@ Options:
 .fi
 .if n \{\
 .RE
+.SS "sparsify"
+.if n \{\
+.RS 0
+.\}
+.nf
+Usage: mlr sparsify [options]
+Unsets fields for which the key is the empty string (or, optionally, another
+specified value). Only makes sense with output format not being CSV or TSV.
+Options:
+-s {filler string} What values to remove. Defaults to the empty string.
+-f {a,b,c} Specify field names to be operated on; any other fields won't be
+           modified. The default is to modify all fields.
+-h|--help  Show this message.
+Example: if input is a=1,b=,c=3 then output is a=1,c=3.
+.fi
+.if n \{\
+.RE
 .SS "split"
 .if n \{\
 .RS 0

From 7bd460a3b8c4c86535f31527f7355150a95a6bf5 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sun, 18 Feb 2024 14:01:46 -0500
Subject: [PATCH 132/456] Support thousands separator in `fmtnum` (#1499)

* Support thousands separator in `fmtnum`

* doc bits
---
 docs/src/manpage.md                         | 11 ++-
 docs/src/manpage.txt                        | 11 ++-
 docs/src/reference-dsl-builtin-functions.md | 11 ++-
 man/manpage.txt                             | 11 ++-
 man/mlr.1                                   | 11 ++-
 pkg/dsl/cst/builtin_function_manager.go     |  9 +-
 pkg/mlrval/mlrval_format.go                 | 93 ++++++++++++++++++++-
 7 files changed, 138 insertions(+), 19 deletions(-)

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 2d7935bd6..4b80cdeee 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -2416,9 +2416,14 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        $* = fmtifnum($*, "%.6f") formats numeric fields in the current record, leaving non-numeric ones alone
 
    1mfmtnum0m
-        (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. This function recurses on array and map values.
-       Example:
-       $x = fmtnum($x, "%.6f")
+        (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. Miller-specific extension: "%_d" and "%_f" for comma-separated thousands. This function recurses on array and map values.
+       Examples:
+       $y = fmtnum($x, "%.6f")
+       $o = fmtnum($n, "%d")
+       $o = fmtnum($n, "%12d")
+       $y = fmtnum($x, "%.6_f")
+       $o = fmtnum($n, "%_d")
+       $o = fmtnum($n, "%12_d")
 
    1mfold0m
         (class=higher-order-functions #args=3) Given a map or array as first argument and a function as second argument, accumulates entries into a final output -- for example, sum or product. For arrays, the function should take two arguments, for accumulated value and array element. For maps, it should take four arguments, for accumulated key and value, and map-element key and value; it should return the updated accumulator as a new key-value pair (i.e. a single-entry map). The start value for the accumulator is taken from the third argument.
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt
index 151b0fc33..15c9d6232 100644
--- a/docs/src/manpage.txt
+++ b/docs/src/manpage.txt
@@ -2395,9 +2395,14 @@
        $* = fmtifnum($*, "%.6f") formats numeric fields in the current record, leaving non-numeric ones alone
 
    1mfmtnum0m
-        (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. This function recurses on array and map values.
-       Example:
-       $x = fmtnum($x, "%.6f")
+        (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. Miller-specific extension: "%_d" and "%_f" for comma-separated thousands. This function recurses on array and map values.
+       Examples:
+       $y = fmtnum($x, "%.6f")
+       $o = fmtnum($n, "%d")
+       $o = fmtnum($n, "%12d")
+       $y = fmtnum($x, "%.6_f")
+       $o = fmtnum($n, "%_d")
+       $o = fmtnum($n, "%12_d")
 
    1mfold0m
         (class=higher-order-functions #args=3) Given a map or array as first argument and a function as second argument, accumulates entries into a final output -- for example, sum or product. For arrays, the function should take two arguments, for accumulated value and array element. For maps, it should take four arguments, for accumulated key and value, and map-element key and value; it should return the updated accumulator as a new key-value pair (i.e. a single-entry map). The start value for the accumulator is taken from the third argument.
diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md
index 3a55821f3..dbfafc04f 100644
--- a/docs/src/reference-dsl-builtin-functions.md
+++ b/docs/src/reference-dsl-builtin-functions.md
@@ -534,9 +534,14 @@ $* = fmtifnum($*, "%.6f") formats numeric fields in the current record, leaving
 
 ### fmtnum
 
-fmtnum  (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. This function recurses on array and map values.
-Example:
-$x = fmtnum($x, "%.6f")
+fmtnum  (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. Miller-specific extension: "%_d" and "%_f" for comma-separated thousands. This function recurses on array and map values.
+Examples:
+$y = fmtnum($x, "%.6f")
+$o = fmtnum($n, "%d")
+$o = fmtnum($n, "%12d")
+$y = fmtnum($x, "%.6_f")
+$o = fmtnum($n, "%_d")
+$o = fmtnum($n, "%12_d")
 
diff --git a/man/manpage.txt b/man/manpage.txt index 151b0fc33..15c9d6232 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -2395,9 +2395,14 @@ $* = fmtifnum($*, "%.6f") formats numeric fields in the current record, leaving non-numeric ones alone 1mfmtnum0m - (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. This function recurses on array and map values. - Example: - $x = fmtnum($x, "%.6f") + (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. Miller-specific extension: "%_d" and "%_f" for comma-separated thousands. This function recurses on array and map values. + Examples: + $y = fmtnum($x, "%.6f") + $o = fmtnum($n, "%d") + $o = fmtnum($n, "%12d") + $y = fmtnum($x, "%.6_f") + $o = fmtnum($n, "%_d") + $o = fmtnum($n, "%12_d") 1mfold0m (class=higher-order-functions #args=3) Given a map or array as first argument and a function as second argument, accumulates entries into a final output -- for example, sum or product. For arrays, the function should take two arguments, for accumulated value and array element. For maps, it should take four arguments, for accumulated key and value, and map-element key and value; it should return the updated accumulator as a new key-value pair (i.e. a single-entry map). The start value for the accumulator is taken from the third argument. diff --git a/man/mlr.1 b/man/mlr.1 index 3d5c75b4e..505cb8d08 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -3338,9 +3338,14 @@ $* = fmtifnum($*, "%.6f") formats numeric fields in the current record, leaving .RS 0 .\} .nf - (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. This function recurses on array and map values. -Example: -$x = fmtnum($x, "%.6f") + (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. Miller-specific extension: "%_d" and "%_f" for comma-separated thousands. This function recurses on array and map values. +Examples: +$y = fmtnum($x, "%.6f") +$o = fmtnum($n, "%d") +$o = fmtnum($n, "%12d") +$y = fmtnum($x, "%.6_f") +$o = fmtnum($n, "%_d") +$o = fmtnum($n, "%12_d") .fi .if n \{\ .RE diff --git a/pkg/dsl/cst/builtin_function_manager.go b/pkg/dsl/cst/builtin_function_manager.go index 965c9529b..90f854fe0 100644 --- a/pkg/dsl/cst/builtin_function_manager.go +++ b/pkg/dsl/cst/builtin_function_manager.go @@ -2000,10 +2000,15 @@ Note that NaN has the property that NaN != NaN, so you need 'is_nan(x)' rather t name: "fmtnum", class: FUNC_CLASS_CONVERSION, help: `Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. -'$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. This function recurses on array and map values.`, +'$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. Miller-specific extension: "%_d" and "%_f" for comma-separated thousands. This function recurses on array and map values.`, binaryFunc: bifs.BIF_fmtnum, examples: []string{ - `$x = fmtnum($x, "%.6f")`, + `$y = fmtnum($x, "%.6f")`, + `$o = fmtnum($n, "%d")`, + `$o = fmtnum($n, "%12d")`, + `$y = fmtnum($x, "%.6_f")`, + `$o = fmtnum($n, "%_d")`, + `$o = fmtnum($n, "%12_d")`, }, }, diff --git a/pkg/mlrval/mlrval_format.go b/pkg/mlrval/mlrval_format.go index 9f4cb2fe6..661cad153 100644 --- a/pkg/mlrval/mlrval_format.go +++ b/pkg/mlrval/mlrval_format.go @@ -2,8 +2,12 @@ package mlrval import ( "fmt" + "os" "strconv" "strings" + + "golang.org/x/text/language" + "golang.org/x/text/message" ) //---------------------------------------------------------------- @@ -103,9 +107,14 @@ func newFormatter( goFormatString = strings.ReplaceAll(goFormatString, "le", "e") goFormatString = strings.ReplaceAll(goFormatString, "lg", "g") - // MIller 5 and below required C format strings compatible with 64-bit ints + // Miller 5 and below required C format strings compatible with 64-bit ints // and double-precision floats: e.g. "%08lld" and "%9.6lf". For Miller 6, - // We must still accept these for backward compatibility. + // we must still accept these for backward compatibility. + if strings.HasSuffix(goFormatString, "_d") { + // Special sub-case of "d"; must be checked first + n := len(goFormatString) + return newFormatterToSeparatedInt(goFormatString[:n-2] + "d"), nil + } if strings.HasSuffix(goFormatString, "d") { return newFormatterToInt(goFormatString), nil } @@ -113,6 +122,11 @@ func newFormatter( return newFormatterToInt(goFormatString), nil } + if strings.HasSuffix(goFormatString, "_f") { + // Special sub-case of "f"; must be checked first + n := len(goFormatString) + return newFormatterToSeparatedFloat(goFormatString[:n-2] + "f"), nil + } if strings.HasSuffix(goFormatString, "f") { return newFormatterToFloat(goFormatString), nil } @@ -164,6 +178,81 @@ func (formatter *formatterToFloat) FormatFloat(floatValue float64) string { // ---------------------------------------------------------------- +func getLanguageTag() language.Tag { + v, ok := os.LookupEnv("LANG") + if ok { + return language.Make(v) + } else { + return language.Make("en") + } +} + +// ---------------------------------------------------------------- + +type formatterToSeparatedInt struct { + goFormatString string + printer *message.Printer +} + +func newFormatterToSeparatedInt(goFormatString string) IFormatter { + return &formatterToSeparatedInt{ + goFormatString: goFormatString, + printer: message.NewPrinter(getLanguageTag()), + } +} + +func (formatter *formatterToSeparatedInt) Format(mv *Mlrval) *Mlrval { + intValue, isInt := mv.GetIntValue() + if isInt { + formatted := formatter.printer.Sprintf(formatter.goFormatString, intValue) + return TryFromIntString(formatted) + } + floatValue, isFloat := mv.GetFloatValue() + if isFloat { + formatted := formatter.printer.Sprintf(formatter.goFormatString, int(floatValue)) + return TryFromIntString(formatted) + } + return mv +} + +func (formatter *formatterToSeparatedInt) FormatFloat(floatValue float64) string { + return formatter.printer.Sprintf(formatter.goFormatString, int(floatValue)) +} + +// ---------------------------------------------------------------- + +type formatterToSeparatedFloat struct { + goFormatString string + printer *message.Printer +} + +func newFormatterToSeparatedFloat(goFormatString string) IFormatter { + return &formatterToSeparatedFloat{ + goFormatString: goFormatString, + printer: message.NewPrinter(getLanguageTag()), + } +} + +func (formatter *formatterToSeparatedFloat) Format(mv *Mlrval) *Mlrval { + floatValue, isFloat := mv.GetFloatValue() + if isFloat { + formatted := formatter.printer.Sprintf(formatter.goFormatString, floatValue) + return TryFromFloatString(formatted) + } + intValue, isInt := mv.GetIntValue() + if isInt { + formatted := formatter.printer.Sprintf(formatter.goFormatString, float64(intValue)) + return TryFromFloatString(formatted) + } + return mv +} + +func (formatter *formatterToSeparatedFloat) FormatFloat(floatValue float64) string { + return formatter.printer.Sprintf(formatter.goFormatString, floatValue) +} + +// ---------------------------------------------------------------- + type formatterToInt struct { goFormatString string } From 296ff87ae246e9d2b7703d20f9792c9ecdcf4267 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 22 Feb 2024 09:08:13 -0500 Subject: [PATCH 133/456] Bump github.com/klauspost/compress from 1.17.6 to 1.17.7 (#1502) Bumps [github.com/klauspost/compress](https://github.com/klauspost/compress) from 1.17.6 to 1.17.7. - [Release notes](https://github.com/klauspost/compress/releases) - [Changelog](https://github.com/klauspost/compress/blob/master/.goreleaser.yml) - [Commits](https://github.com/klauspost/compress/compare/v1.17.6...v1.17.7) --- updated-dependencies: - dependency-name: github.com/klauspost/compress dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 5e1953883..947ce4857 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb github.com/johnkerl/lumin v1.0.0 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 - github.com/klauspost/compress v1.17.6 + github.com/klauspost/compress v1.17.7 github.com/lestrrat-go/strftime v1.0.6 github.com/mattn/go-isatty v0.0.20 github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 diff --git a/go.sum b/go.sum index 6bf3be875..71047a014 100644 --- a/go.sum +++ b/go.sum @@ -15,8 +15,8 @@ github.com/johnkerl/lumin v1.0.0 h1:CV34cHZOJ92Y02RbQ0rd4gA0C06Qck9q8blOyaPoWpU= github.com/johnkerl/lumin v1.0.0/go.mod h1:eLf5AdQOaLvzZ2zVy4REr/DSeEwG+CZreHwNLICqv9E= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= -github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI= -github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg= +github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is= github.com/lestrrat-go/strftime v1.0.6 h1:CFGsDEt1pOpFNU+TJB0nhz9jl+K0hZSLE205AhTIGQQ= From 57b32c3e9b182b5e10341ffc9e2cef365134c2d3 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 24 Feb 2024 22:07:56 -0500 Subject: [PATCH 134/456] Separate out `ILineReader` abstraction (#1504) * Split up pkg/input/record_reader.go * new ILineReader/TLineReader --- pkg/input/constants.go | 3 + pkg/input/line_reader.go | 126 ++++++++++++++++++++ pkg/input/record_reader.go | 171 --------------------------- pkg/input/record_reader_csvlite.go | 4 +- pkg/input/record_reader_dkvp_nidx.go | 4 +- pkg/input/record_reader_json.go | 9 +- pkg/input/record_reader_pprint.go | 4 +- pkg/input/record_reader_tsv.go | 4 +- pkg/input/record_reader_xtab.go | 11 +- pkg/input/splitters.go | 77 ++++++++++++ 10 files changed, 223 insertions(+), 190 deletions(-) create mode 100644 pkg/input/constants.go create mode 100644 pkg/input/line_reader.go create mode 100644 pkg/input/splitters.go diff --git a/pkg/input/constants.go b/pkg/input/constants.go new file mode 100644 index 000000000..42030c3eb --- /dev/null +++ b/pkg/input/constants.go @@ -0,0 +1,3 @@ +package input + +const CSV_BOM = "\xef\xbb\xbf" diff --git a/pkg/input/line_reader.go b/pkg/input/line_reader.go new file mode 100644 index 000000000..c6b272609 --- /dev/null +++ b/pkg/input/line_reader.go @@ -0,0 +1,126 @@ +// This file contains the interface for file-format-specific record-readers, as +// well as a collection of utility functions. + +package input + +import ( + "bufio" + "container/list" + "io" +) + +type ILineReader interface { + Scan() bool + Text() string +} + +type TLineReader struct { + scanner *bufio.Scanner +} + +// NewLineReader handles reading lines which may be delimited by multi-line separators, +// e.g. "\xe2\x90\x9e" for USV. +func NewLineReader(handle io.Reader, irs string) *TLineReader { + scanner := bufio.NewScanner(handle) + + if irs == "\n" || irs == "\r\n" { + // Handled by default scanner. + } else { + irsbytes := []byte(irs) + irslen := len(irsbytes) + + // Custom splitter + recordSplitter := func( + data []byte, + atEOF bool, + ) ( + advance int, + token []byte, + err error, + ) { + datalen := len(data) + end := datalen - irslen + for i := 0; i <= end; i++ { + if data[i] == irsbytes[0] { + match := true + for j := 1; j < irslen; j++ { + if data[i+j] != irsbytes[j] { + match = false + break + } + } + if match { + return i + irslen, data[:i], nil + } + } + } + if !atEOF { + return 0, nil, nil + } + // There is one final token to be delivered, which may be the empty string. + // Returning bufio.ErrFinalToken here tells Scan there are no more tokens after this + // but does not trigger an error to be returned from Scan itself. + return 0, data, bufio.ErrFinalToken + } + + scanner.Split(recordSplitter) + } + + return &TLineReader{ + scanner: scanner, + } +} + +func (r *TLineReader) Scan() bool { + return r.scanner.Scan() +} + +func (r *TLineReader) Text() string { + return r.scanner.Text() +} + +// TODO: comment copiously +// +// Lines are written to the channel with their trailing newline (or whatever +// IRS) stripped off. So, callers get "a=1,b=2" rather than "a=1,b=2\n". +func channelizedLineReader( + lineReader ILineReader, + linesChannel chan<- *list.List, + downstreamDoneChannel <-chan bool, // for mlr head + recordsPerBatch int64, +) { + i := int64(0) + done := false + + lines := list.New() + + for lineReader.Scan() { + i++ + + lines.PushBack(lineReader.Text()) + + // See if downstream processors will be ignoring further data (e.g. mlr + // head). If so, stop reading. This makes 'mlr head hugefile' exit + // quickly, as it should. + if i%recordsPerBatch == 0 { + select { + case _ = <-downstreamDoneChannel: + done = true + break + default: + break + } + if done { + break + } + linesChannel <- lines + lines = list.New() + } + + if done { + break + } + } + linesChannel <- lines + close(linesChannel) // end-of-stream marker +} diff --git a/pkg/input/record_reader.go b/pkg/input/record_reader.go index 096060e62..62a411f22 100644 --- a/pkg/input/record_reader.go +++ b/pkg/input/record_reader.go @@ -4,19 +4,11 @@ package input import ( - "bufio" "container/list" - "io" - "regexp" - "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" "github.com/johnkerl/miller/pkg/types" ) -const CSV_BOM = "\xef\xbb\xbf" - // Since Go is concurrent, the context struct (AWK-like variables such as // FILENAME, NF, NF, FNR, etc.) needs to be duplicated and passed through the // channels along with each record. Hence the initial context, which readers @@ -32,166 +24,3 @@ type IRecordReader interface { downstreamDoneChannel <-chan bool, // for mlr head ) } - -// NewLineScanner handles read lines which may be delimited by multi-line separators, -// e.g. "\xe2\x90\x9e" for USV. -func NewLineScanner(handle io.Reader, irs string) *bufio.Scanner { - scanner := bufio.NewScanner(handle) - - // Handled by default scanner. - if irs == "\n" || irs == "\r\n" { - return scanner - } - - irsbytes := []byte(irs) - irslen := len(irsbytes) - - // Custom splitter - recordSplitter := func( - data []byte, - atEOF bool, - ) ( - advance int, - token []byte, - err error, - ) { - datalen := len(data) - end := datalen - irslen - for i := 0; i <= end; i++ { - if data[i] == irsbytes[0] { - match := true - for j := 1; j < irslen; j++ { - if data[i+j] != irsbytes[j] { - match = false - break - } - } - if match { - return i + irslen, data[:i], nil - } - } - } - if !atEOF { - return 0, nil, nil - } - // There is one final token to be delivered, which may be the empty string. - // Returning bufio.ErrFinalToken here tells Scan there are no more tokens after this - // but does not trigger an error to be returned from Scan itself. - return 0, data, bufio.ErrFinalToken - } - - scanner.Split(recordSplitter) - - return scanner -} - -// TODO: comment copiously -// -// Lines are written to the channel with their trailing newline (or whatever -// IRS) stripped off. So, callers get "a=1,b=2" rather than "a=1,b=2\n". -func channelizedLineScanner( - lineScanner *bufio.Scanner, - linesChannel chan<- *list.List, - downstreamDoneChannel <-chan bool, // for mlr head - recordsPerBatch int64, -) { - i := int64(0) - done := false - - lines := list.New() - - for lineScanner.Scan() { - i++ - - lines.PushBack(lineScanner.Text()) - - // See if downstream processors will be ignoring further data (e.g. mlr - // head). If so, stop reading. This makes 'mlr head hugefile' exit - // quickly, as it should. - if i%recordsPerBatch == 0 { - select { - case _ = <-downstreamDoneChannel: - done = true - break - default: - break - } - if done { - break - } - linesChannel <- lines - lines = list.New() - } - - if done { - break - } - } - linesChannel <- lines - close(linesChannel) // end-of-stream marker -} - -// IPairSplitter splits a string into left and right, e.g. for IPS. -// This helps us reuse code for splitting by IPS string, or IPS regex. -type iPairSplitter interface { - Split(input string) []string -} - -func newPairSplitter(options *cli.TReaderOptions) iPairSplitter { - if options.IPSRegex == nil { - return &tIPSSplitter{ips: options.IPS} - } else { - return &tIPSRegexSplitter{ipsRegex: options.IPSRegex} - } -} - -type tIPSSplitter struct { - ips string -} - -func (s *tIPSSplitter) Split(input string) []string { - return strings.SplitN(input, s.ips, 2) -} - -type tIPSRegexSplitter struct { - ipsRegex *regexp.Regexp -} - -func (s *tIPSRegexSplitter) Split(input string) []string { - return lib.RegexCompiledSplitString(s.ipsRegex, input, 2) -} - -// IFieldSplitter splits a string into pieces, e.g. for IFS. -// This helps us reuse code for splitting by IFS string, or IFS regex. -type iFieldSplitter interface { - Split(input string) []string -} - -func newFieldSplitter(options *cli.TReaderOptions) iFieldSplitter { - if options.IFSRegex == nil { - return &tIFSSplitter{ifs: options.IFS, allowRepeatIFS: options.AllowRepeatIFS} - } else { - return &tIFSRegexSplitter{ifsRegex: options.IFSRegex} - } -} - -type tIFSSplitter struct { - ifs string - allowRepeatIFS bool -} - -func (s *tIFSSplitter) Split(input string) []string { - fields := lib.SplitString(input, s.ifs) - if s.allowRepeatIFS { - fields = lib.StripEmpties(fields) // left/right trim - } - return fields -} - -type tIFSRegexSplitter struct { - ifsRegex *regexp.Regexp -} - -func (s *tIFSRegexSplitter) Split(input string) []string { - return lib.RegexCompiledSplitString(s.ifsRegex, input, -1) -} diff --git a/pkg/input/record_reader_csvlite.go b/pkg/input/record_reader_csvlite.go index 3664d6ea3..bfc188719 100644 --- a/pkg/input/record_reader_csvlite.go +++ b/pkg/input/record_reader_csvlite.go @@ -144,9 +144,9 @@ func (reader *RecordReaderCSVLite) processHandle( reader.headerStrings = nil recordsPerBatch := reader.recordsPerBatch - lineScanner := NewLineScanner(handle, reader.readerOptions.IRS) + lineReader := NewLineReader(handle, reader.readerOptions.IRS) linesChannel := make(chan *list.List, recordsPerBatch) - go channelizedLineScanner(lineScanner, linesChannel, downstreamDoneChannel, recordsPerBatch) + go channelizedLineReader(lineReader, linesChannel, downstreamDoneChannel, recordsPerBatch) for { recordsAndContexts, eof := reader.recordBatchGetter(reader, linesChannel, filename, context, errorChannel) diff --git a/pkg/input/record_reader_dkvp_nidx.go b/pkg/input/record_reader_dkvp_nidx.go index 5cd92f77d..943fbcb5e 100644 --- a/pkg/input/record_reader_dkvp_nidx.go +++ b/pkg/input/record_reader_dkvp_nidx.go @@ -101,9 +101,9 @@ func (reader *RecordReaderDKVPNIDX) processHandle( context.UpdateForStartOfFile(filename) recordsPerBatch := reader.recordsPerBatch - lineScanner := NewLineScanner(handle, reader.readerOptions.IRS) + lineReader := NewLineReader(handle, reader.readerOptions.IRS) linesChannel := make(chan *list.List, recordsPerBatch) - go channelizedLineScanner(lineScanner, linesChannel, downstreamDoneChannel, recordsPerBatch) + go channelizedLineReader(lineReader, linesChannel, downstreamDoneChannel, recordsPerBatch) for { recordsAndContexts, eof := reader.getRecordBatch(linesChannel, errorChannel, context) diff --git a/pkg/input/record_reader_json.go b/pkg/input/record_reader_json.go index 27b9b8e2c..1607fb0a5 100644 --- a/pkg/input/record_reader_json.go +++ b/pkg/input/record_reader_json.go @@ -1,7 +1,6 @@ package input import ( - "bufio" "container/list" "fmt" "io" @@ -203,7 +202,7 @@ func (reader *RecordReaderJSON) processHandle( // JSONCommentEnabledReader implements io.Reader to strip comment lines // off of CSV data. type JSONCommentEnabledReader struct { - lineScanner *bufio.Scanner + lineReader ILineReader readerOptions *cli.TReaderOptions context *types.Context // Needed for channelized stdout-printing logic readerChannel chan<- *list.List // list of *types.RecordAndContext @@ -220,7 +219,7 @@ func NewJSONCommentEnabledReader( readerChannel chan<- *list.List, // list of *types.RecordAndContext ) *JSONCommentEnabledReader { return &JSONCommentEnabledReader{ - lineScanner: bufio.NewScanner(underlying), + lineReader: NewLineReader(underlying, "\n"), readerOptions: readerOptions, context: types.NewNilContext(), readerChannel: readerChannel, @@ -237,10 +236,10 @@ func (bsr *JSONCommentEnabledReader) Read(p []byte) (n int, err error) { // Loop until we can get a non-comment line to pass on, or end of file. for { // EOF - if !bsr.lineScanner.Scan() { + if !bsr.lineReader.Scan() { return 0, io.EOF } - line := bsr.lineScanner.Text() + line := bsr.lineReader.Text() // Non-comment line if !strings.HasPrefix(line, bsr.readerOptions.CommentString) { diff --git a/pkg/input/record_reader_pprint.go b/pkg/input/record_reader_pprint.go index 7495a8d80..3fa9cd6da 100644 --- a/pkg/input/record_reader_pprint.go +++ b/pkg/input/record_reader_pprint.go @@ -148,9 +148,9 @@ func (reader *RecordReaderPprintBarredOrMarkdown) processHandle( reader.headerStrings = nil recordsPerBatch := reader.recordsPerBatch - lineScanner := NewLineScanner(handle, reader.readerOptions.IRS) + lineReader := NewLineReader(handle, reader.readerOptions.IRS) linesChannel := make(chan *list.List, recordsPerBatch) - go channelizedLineScanner(lineScanner, linesChannel, downstreamDoneChannel, recordsPerBatch) + go channelizedLineReader(lineReader, linesChannel, downstreamDoneChannel, recordsPerBatch) for { recordsAndContexts, eof := reader.recordBatchGetter(reader, linesChannel, filename, context, errorChannel) diff --git a/pkg/input/record_reader_tsv.go b/pkg/input/record_reader_tsv.go index a0d77aec4..635dc0840 100644 --- a/pkg/input/record_reader_tsv.go +++ b/pkg/input/record_reader_tsv.go @@ -126,9 +126,9 @@ func (reader *RecordReaderTSV) processHandle( reader.headerStrings = nil recordsPerBatch := reader.recordsPerBatch - lineScanner := NewLineScanner(handle, reader.readerOptions.IRS) + lineReader := NewLineReader(handle, reader.readerOptions.IRS) linesChannel := make(chan *list.List, recordsPerBatch) - go channelizedLineScanner(lineScanner, linesChannel, downstreamDoneChannel, recordsPerBatch) + go channelizedLineReader(lineReader, linesChannel, downstreamDoneChannel, recordsPerBatch) for { recordsAndContexts, eof := reader.recordBatchGetter(reader, linesChannel, filename, context, errorChannel) diff --git a/pkg/input/record_reader_xtab.go b/pkg/input/record_reader_xtab.go index 74d8dac41..e683294cb 100644 --- a/pkg/input/record_reader_xtab.go +++ b/pkg/input/record_reader_xtab.go @@ -1,7 +1,6 @@ package input import ( - "bufio" "container/list" "fmt" "io" @@ -105,10 +104,10 @@ func (reader *RecordReaderXTAB) processHandle( recordsPerBatch := reader.recordsPerBatch // XTAB uses repeated IFS, rather than IRS, to delimit records - lineScanner := NewLineScanner(handle, reader.readerOptions.IFS) + lineReader := NewLineReader(handle, reader.readerOptions.IFS) stanzasChannel := make(chan *list.List, recordsPerBatch) - go channelizedStanzaScanner(lineScanner, reader.readerOptions, stanzasChannel, downstreamDoneChannel, + go channelizedStanzaScanner(lineReader, reader.readerOptions, stanzasChannel, downstreamDoneChannel, recordsPerBatch) for { @@ -137,7 +136,7 @@ func (reader *RecordReaderXTAB) processHandle( // start or end of file. A single stanza, once parsed, will become a single // record. func channelizedStanzaScanner( - lineScanner *bufio.Scanner, + lineReader ILineReader, readerOptions *cli.TReaderOptions, stanzasChannel chan<- *list.List, // list of list of string downstreamDoneChannel <-chan bool, // for mlr head @@ -150,8 +149,8 @@ func channelizedStanzaScanner( stanzas := list.New() stanza := newStanza() - for lineScanner.Scan() { - line := lineScanner.Text() + for lineReader.Scan() { + line := lineReader.Text() // Check for comments-in-data feature // TODO: function-pointer this away diff --git a/pkg/input/splitters.go b/pkg/input/splitters.go new file mode 100644 index 000000000..aa3e43b59 --- /dev/null +++ b/pkg/input/splitters.go @@ -0,0 +1,77 @@ +// This file contains the interface for file-format-specific record-readers, as +// well as a collection of utility functions. + +package input + +import ( + "regexp" + "strings" + + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" +) + +// IPairSplitter splits a string into left and right, e.g. for IPS. +// This helps us reuse code for splitting by IPS string, or IPS regex. +type iPairSplitter interface { + Split(input string) []string +} + +func newPairSplitter(options *cli.TReaderOptions) iPairSplitter { + if options.IPSRegex == nil { + return &tIPSSplitter{ips: options.IPS} + } else { + return &tIPSRegexSplitter{ipsRegex: options.IPSRegex} + } +} + +type tIPSSplitter struct { + ips string +} + +func (s *tIPSSplitter) Split(input string) []string { + return strings.SplitN(input, s.ips, 2) +} + +type tIPSRegexSplitter struct { + ipsRegex *regexp.Regexp +} + +func (s *tIPSRegexSplitter) Split(input string) []string { + return lib.RegexCompiledSplitString(s.ipsRegex, input, 2) +} + +// IFieldSplitter splits a string into pieces, e.g. for IFS. +// This helps us reuse code for splitting by IFS string, or IFS regex. +type iFieldSplitter interface { + Split(input string) []string +} + +func newFieldSplitter(options *cli.TReaderOptions) iFieldSplitter { + if options.IFSRegex == nil { + return &tIFSSplitter{ifs: options.IFS, allowRepeatIFS: options.AllowRepeatIFS} + } else { + return &tIFSRegexSplitter{ifsRegex: options.IFSRegex} + } +} + +type tIFSSplitter struct { + ifs string + allowRepeatIFS bool +} + +func (s *tIFSSplitter) Split(input string) []string { + fields := lib.SplitString(input, s.ifs) + if s.allowRepeatIFS { + fields = lib.StripEmpties(fields) // left/right trim + } + return fields +} + +type tIFSRegexSplitter struct { + ifsRegex *regexp.Regexp +} + +func (s *tIFSRegexSplitter) Split(input string) []string { + return lib.RegexCompiledSplitString(s.ifsRegex, input, -1) +} From 3ff43fa8185a5d85242a8bc4635d654e7f671ae3 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 25 Feb 2024 15:50:50 -0500 Subject: [PATCH 135/456] Miller produces no output on TSV with > 64K characters per line (#1505) * Switch to bufio.Reader, first pass * temp * Simplify ILineReader by making it stateless * Interface not necessary; ILineReader -> TLineReader * neaten * iterating --- pkg/cli/option_parse.go | 12 +- pkg/cli/separators.go | 4 + pkg/climain/mlrcli_parse.go | 10 +- pkg/input/line_reader.go | 209 +++++++++++++++++++++++--------- pkg/input/record_reader_json.go | 15 ++- pkg/input/record_reader_xtab.go | 14 ++- 6 files changed, 198 insertions(+), 66 deletions(-) diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go index 3cec34dc6..f4c455366 100644 --- a/pkg/cli/option_parse.go +++ b/pkg/cli/option_parse.go @@ -8,6 +8,7 @@ package cli import ( "bufio" + "errors" "fmt" "io" "os" @@ -29,7 +30,7 @@ import ( // - IFS/IPS can have escapes like "\x1f" which aren't valid regex literals // so we unhex them. For example, from "\x1f" -- the four bytes '\', 'x', '1', 'f' // -- to the single byte with hex code 0x1f. -func FinalizeReaderOptions(readerOptions *TReaderOptions) { +func FinalizeReaderOptions(readerOptions *TReaderOptions) error { readerOptions.IFS = lib.UnhexStringLiteral(readerOptions.IFS) readerOptions.IPS = lib.UnhexStringLiteral(readerOptions.IPS) @@ -57,12 +58,17 @@ func FinalizeReaderOptions(readerOptions *TReaderOptions) { readerOptions.IFS = lib.UnbackslashStringLiteral(readerOptions.IFS) readerOptions.IPS = lib.UnbackslashStringLiteral(readerOptions.IPS) readerOptions.IRS = lib.UnbackslashStringLiteral(readerOptions.IRS) + + if readerOptions.IRS == "" { + return errors.New("empty IRS") + } + return nil } // FinalizeWriterOptions unbackslashes OPS, OFS, and ORS. This is because // because the '\n' at the command line which is Go "\\n" (a backslash and an // n) needs to become the single newline character., and likewise for "\t", etc. -func FinalizeWriterOptions(writerOptions *TWriterOptions) { +func FinalizeWriterOptions(writerOptions *TWriterOptions) error { if !writerOptions.ofsWasSpecified { writerOptions.OFS = defaultFSes[writerOptions.OutputFileFormat] } @@ -84,6 +90,8 @@ func FinalizeWriterOptions(writerOptions *TWriterOptions) { writerOptions.OFS = lib.UnbackslashStringLiteral(writerOptions.OFS) writerOptions.OPS = lib.UnbackslashStringLiteral(writerOptions.OPS) writerOptions.ORS = lib.UnbackslashStringLiteral(writerOptions.ORS) + + return nil } // ================================================================ diff --git a/pkg/cli/separators.go b/pkg/cli/separators.go index 6a52c3f2c..0a5278f64 100644 --- a/pkg/cli/separators.go +++ b/pkg/cli/separators.go @@ -82,6 +82,7 @@ var SEPARATOR_REGEX_NAMES_TO_VALUES = map[string]string{ // E.g. if IFS isn't specified, it's space for NIDX and comma for DKVP, etc. var defaultFSes = map[string]string{ + "gen": ",", "csv": ",", "csvlite": ",", "dkvp": ",", @@ -94,6 +95,7 @@ var defaultFSes = map[string]string{ } var defaultPSes = map[string]string{ + "gen": "N/A", "csv": "N/A", "csvlite": "N/A", "dkvp": "=", @@ -106,6 +108,7 @@ var defaultPSes = map[string]string{ } var defaultRSes = map[string]string{ + "gen": "\n", "csv": "\n", "csvlite": "\n", "dkvp": "\n", @@ -118,6 +121,7 @@ var defaultRSes = map[string]string{ } var defaultAllowRepeatIFSes = map[string]bool{ + "gen": false, "csv": false, "csvlite": false, "dkvp": false, diff --git a/pkg/climain/mlrcli_parse.go b/pkg/climain/mlrcli_parse.go index 586c94d78..eeecfa6dc 100644 --- a/pkg/climain/mlrcli_parse.go +++ b/pkg/climain/mlrcli_parse.go @@ -306,8 +306,14 @@ func parseCommandLinePassTwo( return nil, nil, err } - cli.FinalizeReaderOptions(&options.ReaderOptions) - cli.FinalizeWriterOptions(&options.WriterOptions) + err = cli.FinalizeReaderOptions(&options.ReaderOptions) + if err != nil { + return nil, nil, err + } + err = cli.FinalizeWriterOptions(&options.WriterOptions) + if err != nil { + return nil, nil, err + } // Set an optional global formatter for floating-point values if options.WriterOptions.FPOFMT != "" { diff --git a/pkg/input/line_reader.go b/pkg/input/line_reader.go index c6b272609..6779b65db 100644 --- a/pkg/input/line_reader.go +++ b/pkg/input/line_reader.go @@ -7,79 +7,166 @@ import ( "bufio" "container/list" "io" + "strings" + + "github.com/johnkerl/miller/pkg/lib" ) type ILineReader interface { - Scan() bool - Text() string + // Read returns the string without the final newline (or whatever terminator). + // The error condition io.EOF as non-error "error" case. + // EOF is always returned with empty line: the code here is structured so that + // we do not return a non-empty line along with an EOF indicator. + Read() (string, error) } -type TLineReader struct { - scanner *bufio.Scanner +type DefaultLineReader struct { + underlying *bufio.Reader + eof bool } -// NewLineReader handles reading lines which may be delimited by multi-line separators, -// e.g. "\xe2\x90\x9e" for USV. -func NewLineReader(handle io.Reader, irs string) *TLineReader { - scanner := bufio.NewScanner(handle) +// SingleIRSLineReader handles reading lines with a single-character terminator. +type SingleIRSLineReader struct { + underlying *bufio.Reader + end_irs byte + eof bool +} - if irs == "\n" || irs == "\r\n" { - // Handled by default scanner. - } else { - irsbytes := []byte(irs) - irslen := len(irsbytes) +// MultiIRSLineReader handles reading lines which may be delimited by multi-line separators, e.g. +// "\xe2\x90\x9e" for USV. +type MultiIRSLineReader struct { + underlying *bufio.Reader + irs string + irs_len int + end_irs byte + eof bool +} - // Custom splitter - recordSplitter := func( - data []byte, - atEOF bool, - ) ( - advance int, - token []byte, - err error, - ) { - datalen := len(data) - end := datalen - irslen - for i := 0; i <= end; i++ { - if data[i] == irsbytes[0] { - match := true - for j := 1; j < irslen; j++ { - if data[i+j] != irsbytes[j] { - match = false - break - } - } - if match { - return i + irslen, data[:i], nil - } - } - } - if !atEOF { - return 0, nil, nil - } - // There is one final token to be delivered, which may be the empty string. - // Returning bufio.ErrFinalToken here tells Scan there are no more tokens after this - // but does not trigger an error to be returned from Scan itself. - return 0, data, bufio.ErrFinalToken +func NewLineReader(handle io.Reader, irs string) ILineReader { + underlying := bufio.NewReader(handle) + + irs_len := len(irs) + + // Not worth complicating the API by adding an error return. + // Empty IRS is checked elsewhere. + if irs_len < 1 { + panic("Empty IRS") + + } else if irs == "\n" || irs == "\r\n" { + return &DefaultLineReader{ + underlying: underlying, } - scanner.Split(recordSplitter) - } + } else if irs_len == 1 { + return &SingleIRSLineReader{ + underlying: underlying, + end_irs: irs[0], + } - return &TLineReader{ - scanner: scanner, + } else { + return &MultiIRSLineReader{ + underlying: underlying, + irs: irs, + irs_len: irs_len, + end_irs: irs[irs_len-1], + } } } -func (r *TLineReader) Scan() bool { - return r.scanner.Scan() +func (r *DefaultLineReader) Read() (string, error) { + + if r.eof { + return "", io.EOF + } + + line, err := r.underlying.ReadString('\n') + + // If we have EOF and a non-empty line, defer the EOF return to the next Read call. + if len(line) > 0 && lib.IsEOF(err) { + r.eof = true + err = nil + } + + n := len(line) + if strings.HasSuffix(line, "\r\n") { + line = line[:n-2] + } else if strings.HasSuffix(line, "\n") { + line = line[:n-1] + } + + return line, err } -func (r *TLineReader) Text() string { - return r.scanner.Text() +func (r *SingleIRSLineReader) Read() (string, error) { + + if r.eof { + return "", io.EOF + } + + line, err := r.underlying.ReadString(r.end_irs) + + // If we have EOF and a non-empty line, defer the EOF return to the next Read call. + if len(line) > 0 && lib.IsEOF(err) { + r.eof = true + err = nil + } + + n := len(line) + if n > 0 && line[n-1] == r.end_irs { + line = line[:n-1] + } + + return line, err } -// TODO: comment copiously +func (r *MultiIRSLineReader) Read() (string, error) { + + // bufio.Reader.ReadString supports only a single-character terminator. So we read lines ending + // in the final character, until we get a line that ends in the entire sequence or EOF. + // + // Note that bufio.Scanner has a very nice bufio.Scanner.Split method which can be overridden to + // support custom line-ending logic. Sadly, though, bufio.Scanner _only_ supports a fixed + // maximum line length, and misbehaves badly when presented with longer lines. So we cannot use + // bufio.Scanner. See also https://github.com/johnkerl/miller/issues/1501. + + if r.eof { + return "", io.EOF + } + + line := "" + + for { + + piece, err := r.underlying.ReadString(r.end_irs) + + // If we have EOF and a non-empty line, defer the EOF return to the next Read call. + if len(piece) > 0 && lib.IsEOF(err) { + r.eof = true + err = nil + } + + if err != nil { + return line, err // includes io.EOF as a non-error "error" case + } + + if strings.HasSuffix(piece, r.irs) { + piece = piece[:len(piece)-r.irs_len] + line += piece + break + } + + if r.eof { + line += piece + break + } + + } + + return line, nil +} + +// channelizedLineReader puts the line reading/splitting into its own goroutine in order to pipeline +// the I/O with regard to further processing. Used by record-readers for multiple file formats. // // Lines are written to the channel with their trailing newline (or whatever // IRS) stripped off. So, callers get "a=1,b=2" rather than "a=1,b=2\n". @@ -94,10 +181,20 @@ func channelizedLineReader( lines := list.New() - for lineReader.Scan() { + for { + line, err := lineReader.Read() + if err != nil { + if lib.IsEOF(err) { + done = true + break + } else { + break + } + } + i++ - lines.PushBack(lineReader.Text()) + lines.PushBack(line) // See if downstream processors will be ignoring further data (e.g. mlr // head). If so, stop reading. This makes 'mlr head hugefile' exit diff --git a/pkg/input/record_reader_json.go b/pkg/input/record_reader_json.go index 1607fb0a5..ecc44e061 100644 --- a/pkg/input/record_reader_json.go +++ b/pkg/input/record_reader_json.go @@ -233,13 +233,15 @@ func (bsr *JSONCommentEnabledReader) Read(p []byte) (n int, err error) { return bsr.populateFromLine(p), nil } + done := false + // Loop until we can get a non-comment line to pass on, or end of file. - for { + for !done { // EOF - if !bsr.lineReader.Scan() { - return 0, io.EOF + line, err := bsr.lineReader.Read() + if err != nil { + return 0, err } - line := bsr.lineReader.Text() // Non-comment line if !strings.HasPrefix(line, bsr.readerOptions.CommentString) { @@ -255,7 +257,12 @@ func (bsr *JSONCommentEnabledReader) Read(p []byte) (n int, err error) { ell.PushBack(types.NewOutputString(line+"\n", bsr.context)) bsr.readerChannel <- ell } + + if done { + break + } } + return 0, nil } // populateFromLine is a helper for Read. It takes a full line from the diff --git a/pkg/input/record_reader_xtab.go b/pkg/input/record_reader_xtab.go index e683294cb..8dd88c308 100644 --- a/pkg/input/record_reader_xtab.go +++ b/pkg/input/record_reader_xtab.go @@ -4,6 +4,7 @@ import ( "container/list" "fmt" "io" + "os" "regexp" "strings" @@ -149,8 +150,17 @@ func channelizedStanzaScanner( stanzas := list.New() stanza := newStanza() - for lineReader.Scan() { - line := lineReader.Text() + for { + line, err := lineReader.Read() + if err != nil { + if lib.IsEOF(err) { + done = true + break + } else { + fmt.Fprintf(os.Stderr, "mlr: %#v\n", err) + break + } + } // Check for comments-in-data feature // TODO: function-pointer this away From fb1f7f8421342aa4c44ded5e392f048f067a8631 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 25 Feb 2024 21:51:41 -0500 Subject: [PATCH 136/456] Enable record-hashing by default (#1507) * Enable record-hashing by default * comments --- pkg/mlrval/mlrmap.go | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/pkg/mlrval/mlrmap.go b/pkg/mlrval/mlrmap.go index e2596f09d..562a91def 100644 --- a/pkg/mlrval/mlrmap.go +++ b/pkg/mlrval/mlrmap.go @@ -10,8 +10,8 @@ // // * It keeps a doubly-linked list of key-value pairs. // -// * By default, no hash functions are computed when the map is written to or -// read from. +// * With hash-records set to false, no hash functions are computed when the map +// is written to or read from. // // * Gets are implemented by sequential scan through the list: given a key, // the key-value pairs are scanned through until a match is (or is not) found. @@ -20,6 +20,10 @@ // was found in the Go implementation. Test data was million-line CSV and // DKVP, with a dozen columns or so. // +// * However, with higher column-count (see https://github.com/johnkerl/miller/issues/1506 +// and https://github.com/johnkerl/miller/pull/1507), non-hashing becomes +// a substantial penalty. +// // Note however that an auxiliary constructor is provided which does use // a key-to-entry hashmap in place of linear search for get/put/has/delete. // This may be useful in certain contexts, even though it's not the default @@ -53,12 +57,11 @@ package mlrval -// For the C port having this off was a noticeable performance improvement (10-15%). -// For the Go port having it off is a less-noticeable performance improvement (5%). -// Both these figures are for just doing mlr cat. At the moment I'm leaving this -// default-on pending more profiling on more complex record-processing operations -// such as mlr sort. -var hashRecords = false +// As noted above, hashing has a minor penalty for low column count: computing +// hashmaps takes more time than is saved later on. But for higher column-count, +// non-hashing has a huge penalty. Therefore we default to on. And users can +// use `mlr --no-hash-records` or `mlr --hash-records` to flip the behavior. +var hashRecords = true func HashRecords(onOff bool) { hashRecords = onOff @@ -70,9 +73,7 @@ type Mlrmap struct { Head *MlrmapEntry Tail *MlrmapEntry - // Surprisingly, using this costs about 25% for cat/cut/etc tests - // on million-line data files (CSV, DKVP) with a dozen or so columns. - // So, the constructor allows callsites to use it, or not. + // This can be nil if hashRecords is off. keysToEntries map[string]*MlrmapEntry } From 900409849965269b98e34c8a2e02d0632f6f2c46 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 25 Feb 2024 21:56:52 -0500 Subject: [PATCH 137/456] python/make-tsv.py --- python/make-tsv.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100755 python/make-tsv.py diff --git a/python/make-tsv.py b/python/make-tsv.py new file mode 100755 index 000000000..bb55d0ba0 --- /dev/null +++ b/python/make-tsv.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python + +import sys + +nrow = 2 +ncol = 100 +if len(sys.argv) == 2: + ncol = int(sys.argv[1]) +if len(sys.argv) == 3: + nrow = int(sys.argv[1]) + ncol = int(sys.argv[2]) + +prefix = "k" +for i in range(nrow): + for j in range(ncol): + if j == 0: + sys.stdout.write("%s%07d" % (prefix, j)) + else: + sys.stdout.write("\t%s%07d" % (prefix, j)) + sys.stdout.write("\n") + prefix = "v" From aff4b9f32dde3ee75aca260f822bfd17d91aa4a0 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 26 Feb 2024 00:12:31 -0500 Subject: [PATCH 138/456] Improved file-not-found handling (#1508) --- pkg/input/record_reader_csv.go | 3 +- pkg/input/record_reader_csvlite.go | 38 +++++++++---------- pkg/input/record_reader_dkvp_nidx.go | 3 +- pkg/input/record_reader_json.go | 3 +- pkg/input/record_reader_pprint.go | 38 +++++++++---------- pkg/input/record_reader_tsv.go | 38 +++++++++---------- pkg/input/record_reader_xtab.go | 3 +- test/cases/non-windows/file-not-found/csv/cmd | 1 + .../non-windows/file-not-found/csv/experr | 1 + .../non-windows/file-not-found/csv/expout | 0 .../file-not-found/csv/should-fail | 0 .../cases/non-windows/file-not-found/dkvp/cmd | 1 + .../non-windows/file-not-found/dkvp/experr | 1 + .../non-windows/file-not-found/dkvp/expout | 0 .../file-not-found/dkvp/should-fail | 0 test/cases/non-windows/file-not-found/imd/cmd | 1 + .../non-windows/file-not-found/imd/experr | 1 + .../non-windows/file-not-found/imd/expout | 0 .../file-not-found/imd/should-fail | 0 .../cases/non-windows/file-not-found/json/cmd | 1 + .../non-windows/file-not-found/json/experr | 1 + .../non-windows/file-not-found/json/expout | 2 + .../file-not-found/json/should-fail | 0 .../non-windows/file-not-found/jsonl/cmd | 1 + .../non-windows/file-not-found/jsonl/experr | 1 + .../non-windows/file-not-found/jsonl/expout | 0 .../file-not-found/jsonl/should-fail | 0 .../cases/non-windows/file-not-found/nidx/cmd | 1 + .../non-windows/file-not-found/nidx/experr | 1 + .../non-windows/file-not-found/nidx/expout | 0 .../file-not-found/nidx/should-fail | 0 .../non-windows/file-not-found/pprint/cmd | 1 + .../non-windows/file-not-found/pprint/experr | 1 + .../non-windows/file-not-found/pprint/expout | 0 .../file-not-found/pprint/should-fail | 0 test/cases/non-windows/file-not-found/tsv/cmd | 1 + .../non-windows/file-not-found/tsv/experr | 1 + .../non-windows/file-not-found/tsv/expout | 0 .../file-not-found/tsv/should-fail | 0 .../cases/non-windows/file-not-found/xtab/cmd | 1 + .../non-windows/file-not-found/xtab/experr | 1 + .../non-windows/file-not-found/xtab/expout | 0 .../file-not-found/xtab/should-fail | 0 43 files changed, 85 insertions(+), 61 deletions(-) create mode 100644 test/cases/non-windows/file-not-found/csv/cmd create mode 100644 test/cases/non-windows/file-not-found/csv/experr create mode 100644 test/cases/non-windows/file-not-found/csv/expout create mode 100644 test/cases/non-windows/file-not-found/csv/should-fail create mode 100644 test/cases/non-windows/file-not-found/dkvp/cmd create mode 100644 test/cases/non-windows/file-not-found/dkvp/experr create mode 100644 test/cases/non-windows/file-not-found/dkvp/expout create mode 100644 test/cases/non-windows/file-not-found/dkvp/should-fail create mode 100644 test/cases/non-windows/file-not-found/imd/cmd create mode 100644 test/cases/non-windows/file-not-found/imd/experr create mode 100644 test/cases/non-windows/file-not-found/imd/expout create mode 100644 test/cases/non-windows/file-not-found/imd/should-fail create mode 100644 test/cases/non-windows/file-not-found/json/cmd create mode 100644 test/cases/non-windows/file-not-found/json/experr create mode 100644 test/cases/non-windows/file-not-found/json/expout create mode 100644 test/cases/non-windows/file-not-found/json/should-fail create mode 100644 test/cases/non-windows/file-not-found/jsonl/cmd create mode 100644 test/cases/non-windows/file-not-found/jsonl/experr create mode 100644 test/cases/non-windows/file-not-found/jsonl/expout create mode 100644 test/cases/non-windows/file-not-found/jsonl/should-fail create mode 100644 test/cases/non-windows/file-not-found/nidx/cmd create mode 100644 test/cases/non-windows/file-not-found/nidx/experr create mode 100644 test/cases/non-windows/file-not-found/nidx/expout create mode 100644 test/cases/non-windows/file-not-found/nidx/should-fail create mode 100644 test/cases/non-windows/file-not-found/pprint/cmd create mode 100644 test/cases/non-windows/file-not-found/pprint/experr create mode 100644 test/cases/non-windows/file-not-found/pprint/expout create mode 100644 test/cases/non-windows/file-not-found/pprint/should-fail create mode 100644 test/cases/non-windows/file-not-found/tsv/cmd create mode 100644 test/cases/non-windows/file-not-found/tsv/experr create mode 100644 test/cases/non-windows/file-not-found/tsv/expout create mode 100644 test/cases/non-windows/file-not-found/tsv/should-fail create mode 100644 test/cases/non-windows/file-not-found/xtab/cmd create mode 100644 test/cases/non-windows/file-not-found/xtab/experr create mode 100644 test/cases/non-windows/file-not-found/xtab/expout create mode 100644 test/cases/non-windows/file-not-found/xtab/should-fail diff --git a/pkg/input/record_reader_csv.go b/pkg/input/record_reader_csv.go index ab7c1d761..505020cee 100644 --- a/pkg/input/record_reader_csv.go +++ b/pkg/input/record_reader_csv.go @@ -65,8 +65,9 @@ func (reader *RecordReaderCSV) Read( ) if err != nil { errorChannel <- err + } else { + reader.processHandle(handle, "(stdin)", &context, readerChannel, errorChannel, downstreamDoneChannel) } - reader.processHandle(handle, "(stdin)", &context, readerChannel, errorChannel, downstreamDoneChannel) } else { for _, filename := range filenames { handle, err := lib.OpenFileForRead( diff --git a/pkg/input/record_reader_csvlite.go b/pkg/input/record_reader_csvlite.go index bfc188719..5109770df 100644 --- a/pkg/input/record_reader_csvlite.go +++ b/pkg/input/record_reader_csvlite.go @@ -94,16 +94,16 @@ func (reader *RecordReaderCSVLite) Read( ) if err != nil { errorChannel <- err - return + } else { + reader.processHandle( + handle, + "(stdin)", + &context, + readerChannel, + errorChannel, + downstreamDoneChannel, + ) } - reader.processHandle( - handle, - "(stdin)", - &context, - readerChannel, - errorChannel, - downstreamDoneChannel, - ) } else { for _, filename := range filenames { handle, err := lib.OpenFileForRead( @@ -114,17 +114,17 @@ func (reader *RecordReaderCSVLite) Read( ) if err != nil { errorChannel <- err - return + } else { + reader.processHandle( + handle, + filename, + &context, + readerChannel, + errorChannel, + downstreamDoneChannel, + ) + handle.Close() } - reader.processHandle( - handle, - filename, - &context, - readerChannel, - errorChannel, - downstreamDoneChannel, - ) - handle.Close() } } } diff --git a/pkg/input/record_reader_dkvp_nidx.go b/pkg/input/record_reader_dkvp_nidx.go index 943fbcb5e..a5509a23d 100644 --- a/pkg/input/record_reader_dkvp_nidx.go +++ b/pkg/input/record_reader_dkvp_nidx.go @@ -68,8 +68,9 @@ func (reader *RecordReaderDKVPNIDX) Read( ) if err != nil { errorChannel <- err + } else { + reader.processHandle(handle, "(stdin)", &context, readerChannel, errorChannel, downstreamDoneChannel) } - reader.processHandle(handle, "(stdin)", &context, readerChannel, errorChannel, downstreamDoneChannel) } else { for _, filename := range filenames { handle, err := lib.OpenFileForRead( diff --git a/pkg/input/record_reader_json.go b/pkg/input/record_reader_json.go index ecc44e061..80ce17440 100644 --- a/pkg/input/record_reader_json.go +++ b/pkg/input/record_reader_json.go @@ -45,8 +45,9 @@ func (reader *RecordReaderJSON) Read( ) if err != nil { errorChannel <- err + } else { + reader.processHandle(handle, "(stdin)", &context, readerChannel, errorChannel, downstreamDoneChannel) } - reader.processHandle(handle, "(stdin)", &context, readerChannel, errorChannel, downstreamDoneChannel) } else { for _, filename := range filenames { handle, err := lib.OpenFileForRead( diff --git a/pkg/input/record_reader_pprint.go b/pkg/input/record_reader_pprint.go index 3fa9cd6da..5cb4bfbad 100644 --- a/pkg/input/record_reader_pprint.go +++ b/pkg/input/record_reader_pprint.go @@ -98,16 +98,16 @@ func (reader *RecordReaderPprintBarredOrMarkdown) Read( ) if err != nil { errorChannel <- err - return + } else { + reader.processHandle( + handle, + "(stdin)", + &context, + readerChannel, + errorChannel, + downstreamDoneChannel, + ) } - reader.processHandle( - handle, - "(stdin)", - &context, - readerChannel, - errorChannel, - downstreamDoneChannel, - ) } else { for _, filename := range filenames { handle, err := lib.OpenFileForRead( @@ -118,17 +118,17 @@ func (reader *RecordReaderPprintBarredOrMarkdown) Read( ) if err != nil { errorChannel <- err - return + } else { + reader.processHandle( + handle, + filename, + &context, + readerChannel, + errorChannel, + downstreamDoneChannel, + ) + handle.Close() } - reader.processHandle( - handle, - filename, - &context, - readerChannel, - errorChannel, - downstreamDoneChannel, - ) - handle.Close() } } } diff --git a/pkg/input/record_reader_tsv.go b/pkg/input/record_reader_tsv.go index 635dc0840..02a3c4f6e 100644 --- a/pkg/input/record_reader_tsv.go +++ b/pkg/input/record_reader_tsv.go @@ -76,16 +76,16 @@ func (reader *RecordReaderTSV) Read( ) if err != nil { errorChannel <- err - return + } else { + reader.processHandle( + handle, + "(stdin)", + &context, + readerChannel, + errorChannel, + downstreamDoneChannel, + ) } - reader.processHandle( - handle, - "(stdin)", - &context, - readerChannel, - errorChannel, - downstreamDoneChannel, - ) } else { for _, filename := range filenames { handle, err := lib.OpenFileForRead( @@ -96,17 +96,17 @@ func (reader *RecordReaderTSV) Read( ) if err != nil { errorChannel <- err - return + } else { + reader.processHandle( + handle, + filename, + &context, + readerChannel, + errorChannel, + downstreamDoneChannel, + ) + handle.Close() } - reader.processHandle( - handle, - filename, - &context, - readerChannel, - errorChannel, - downstreamDoneChannel, - ) - handle.Close() } } } diff --git a/pkg/input/record_reader_xtab.go b/pkg/input/record_reader_xtab.go index 8dd88c308..31294012c 100644 --- a/pkg/input/record_reader_xtab.go +++ b/pkg/input/record_reader_xtab.go @@ -71,8 +71,9 @@ func (reader *RecordReaderXTAB) Read( ) if err != nil { errorChannel <- err + } else { + reader.processHandle(handle, "(stdin)", &context, readerChannel, errorChannel, downstreamDoneChannel) } - reader.processHandle(handle, "(stdin)", &context, readerChannel, errorChannel, downstreamDoneChannel) } else { for _, filename := range filenames { handle, err := lib.OpenFileForRead( diff --git a/test/cases/non-windows/file-not-found/csv/cmd b/test/cases/non-windows/file-not-found/csv/cmd new file mode 100644 index 000000000..c0111df1c --- /dev/null +++ b/test/cases/non-windows/file-not-found/csv/cmd @@ -0,0 +1 @@ +mlr --csv cat /nonesuch/nope/never diff --git a/test/cases/non-windows/file-not-found/csv/experr b/test/cases/non-windows/file-not-found/csv/experr new file mode 100644 index 000000000..486e326b3 --- /dev/null +++ b/test/cases/non-windows/file-not-found/csv/experr @@ -0,0 +1 @@ +mlr: open /nonesuch/nope/never: no such file or directory. diff --git a/test/cases/non-windows/file-not-found/csv/expout b/test/cases/non-windows/file-not-found/csv/expout new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/non-windows/file-not-found/csv/should-fail b/test/cases/non-windows/file-not-found/csv/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/non-windows/file-not-found/dkvp/cmd b/test/cases/non-windows/file-not-found/dkvp/cmd new file mode 100644 index 000000000..e0a95d06c --- /dev/null +++ b/test/cases/non-windows/file-not-found/dkvp/cmd @@ -0,0 +1 @@ +mlr --dkvp cat /nonesuch/nope/never diff --git a/test/cases/non-windows/file-not-found/dkvp/experr b/test/cases/non-windows/file-not-found/dkvp/experr new file mode 100644 index 000000000..486e326b3 --- /dev/null +++ b/test/cases/non-windows/file-not-found/dkvp/experr @@ -0,0 +1 @@ +mlr: open /nonesuch/nope/never: no such file or directory. diff --git a/test/cases/non-windows/file-not-found/dkvp/expout b/test/cases/non-windows/file-not-found/dkvp/expout new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/non-windows/file-not-found/dkvp/should-fail b/test/cases/non-windows/file-not-found/dkvp/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/non-windows/file-not-found/imd/cmd b/test/cases/non-windows/file-not-found/imd/cmd new file mode 100644 index 000000000..53adb3fe5 --- /dev/null +++ b/test/cases/non-windows/file-not-found/imd/cmd @@ -0,0 +1 @@ +mlr --imd cat /nonesuch/nope/never diff --git a/test/cases/non-windows/file-not-found/imd/experr b/test/cases/non-windows/file-not-found/imd/experr new file mode 100644 index 000000000..486e326b3 --- /dev/null +++ b/test/cases/non-windows/file-not-found/imd/experr @@ -0,0 +1 @@ +mlr: open /nonesuch/nope/never: no such file or directory. diff --git a/test/cases/non-windows/file-not-found/imd/expout b/test/cases/non-windows/file-not-found/imd/expout new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/non-windows/file-not-found/imd/should-fail b/test/cases/non-windows/file-not-found/imd/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/non-windows/file-not-found/json/cmd b/test/cases/non-windows/file-not-found/json/cmd new file mode 100644 index 000000000..92c89b17f --- /dev/null +++ b/test/cases/non-windows/file-not-found/json/cmd @@ -0,0 +1 @@ +mlr --json cat /nonesuch/nope/never diff --git a/test/cases/non-windows/file-not-found/json/experr b/test/cases/non-windows/file-not-found/json/experr new file mode 100644 index 000000000..486e326b3 --- /dev/null +++ b/test/cases/non-windows/file-not-found/json/experr @@ -0,0 +1 @@ +mlr: open /nonesuch/nope/never: no such file or directory. diff --git a/test/cases/non-windows/file-not-found/json/expout b/test/cases/non-windows/file-not-found/json/expout new file mode 100644 index 000000000..0d4f101c7 --- /dev/null +++ b/test/cases/non-windows/file-not-found/json/expout @@ -0,0 +1,2 @@ +[ +] diff --git a/test/cases/non-windows/file-not-found/json/should-fail b/test/cases/non-windows/file-not-found/json/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/non-windows/file-not-found/jsonl/cmd b/test/cases/non-windows/file-not-found/jsonl/cmd new file mode 100644 index 000000000..551c0b0a7 --- /dev/null +++ b/test/cases/non-windows/file-not-found/jsonl/cmd @@ -0,0 +1 @@ +mlr --jsonl cat /nonesuch/nope/never diff --git a/test/cases/non-windows/file-not-found/jsonl/experr b/test/cases/non-windows/file-not-found/jsonl/experr new file mode 100644 index 000000000..486e326b3 --- /dev/null +++ b/test/cases/non-windows/file-not-found/jsonl/experr @@ -0,0 +1 @@ +mlr: open /nonesuch/nope/never: no such file or directory. diff --git a/test/cases/non-windows/file-not-found/jsonl/expout b/test/cases/non-windows/file-not-found/jsonl/expout new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/non-windows/file-not-found/jsonl/should-fail b/test/cases/non-windows/file-not-found/jsonl/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/non-windows/file-not-found/nidx/cmd b/test/cases/non-windows/file-not-found/nidx/cmd new file mode 100644 index 000000000..b5b1a2316 --- /dev/null +++ b/test/cases/non-windows/file-not-found/nidx/cmd @@ -0,0 +1 @@ +mlr --nidx cat /nonesuch/nope/never diff --git a/test/cases/non-windows/file-not-found/nidx/experr b/test/cases/non-windows/file-not-found/nidx/experr new file mode 100644 index 000000000..486e326b3 --- /dev/null +++ b/test/cases/non-windows/file-not-found/nidx/experr @@ -0,0 +1 @@ +mlr: open /nonesuch/nope/never: no such file or directory. diff --git a/test/cases/non-windows/file-not-found/nidx/expout b/test/cases/non-windows/file-not-found/nidx/expout new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/non-windows/file-not-found/nidx/should-fail b/test/cases/non-windows/file-not-found/nidx/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/non-windows/file-not-found/pprint/cmd b/test/cases/non-windows/file-not-found/pprint/cmd new file mode 100644 index 000000000..51a3a4d6e --- /dev/null +++ b/test/cases/non-windows/file-not-found/pprint/cmd @@ -0,0 +1 @@ +mlr --pprint cat /nonesuch/nope/never diff --git a/test/cases/non-windows/file-not-found/pprint/experr b/test/cases/non-windows/file-not-found/pprint/experr new file mode 100644 index 000000000..486e326b3 --- /dev/null +++ b/test/cases/non-windows/file-not-found/pprint/experr @@ -0,0 +1 @@ +mlr: open /nonesuch/nope/never: no such file or directory. diff --git a/test/cases/non-windows/file-not-found/pprint/expout b/test/cases/non-windows/file-not-found/pprint/expout new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/non-windows/file-not-found/pprint/should-fail b/test/cases/non-windows/file-not-found/pprint/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/non-windows/file-not-found/tsv/cmd b/test/cases/non-windows/file-not-found/tsv/cmd new file mode 100644 index 000000000..fbb231b7e --- /dev/null +++ b/test/cases/non-windows/file-not-found/tsv/cmd @@ -0,0 +1 @@ +mlr --tsv cat /nonesuch/nope/never diff --git a/test/cases/non-windows/file-not-found/tsv/experr b/test/cases/non-windows/file-not-found/tsv/experr new file mode 100644 index 000000000..486e326b3 --- /dev/null +++ b/test/cases/non-windows/file-not-found/tsv/experr @@ -0,0 +1 @@ +mlr: open /nonesuch/nope/never: no such file or directory. diff --git a/test/cases/non-windows/file-not-found/tsv/expout b/test/cases/non-windows/file-not-found/tsv/expout new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/non-windows/file-not-found/tsv/should-fail b/test/cases/non-windows/file-not-found/tsv/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/non-windows/file-not-found/xtab/cmd b/test/cases/non-windows/file-not-found/xtab/cmd new file mode 100644 index 000000000..dca5d1681 --- /dev/null +++ b/test/cases/non-windows/file-not-found/xtab/cmd @@ -0,0 +1 @@ +mlr --xtab cat /nonesuch/nope/never diff --git a/test/cases/non-windows/file-not-found/xtab/experr b/test/cases/non-windows/file-not-found/xtab/experr new file mode 100644 index 000000000..486e326b3 --- /dev/null +++ b/test/cases/non-windows/file-not-found/xtab/experr @@ -0,0 +1 @@ +mlr: open /nonesuch/nope/never: no such file or directory. diff --git a/test/cases/non-windows/file-not-found/xtab/expout b/test/cases/non-windows/file-not-found/xtab/expout new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/non-windows/file-not-found/xtab/should-fail b/test/cases/non-windows/file-not-found/xtab/should-fail new file mode 100644 index 000000000..e69de29bb From e528b9e112d0a793c93cf4f1a30267ce82bf1ec3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 Mar 2024 09:54:59 -0500 Subject: [PATCH 139/456] Bump actions/cache from 4.0.0 to 4.0.1 (#1511) Bumps [actions/cache](https://github.com/actions/cache) from 4.0.0 to 4.0.1. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/13aacd865c20de90d75de3b17ebe84f7a17d57d2...ab5e6d0c87105b4c9c2047343972218f562e4319) --- updated-dependencies: - dependency-name: actions/cache dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index aa4c73aba..98eeee613 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -29,7 +29,7 @@ jobs: # https://github.com/marketplace/actions/cache - name: Cache Go modules - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 + uses: actions/cache@ab5e6d0c87105b4c9c2047343972218f562e4319 with: path: | ~/.cache/go-build From 8d6455dfab557f1ba913186bae00b1d68e0dd4a7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Mar 2024 09:11:04 -0500 Subject: [PATCH 140/456] Bump github.com/stretchr/testify from 1.8.4 to 1.9.0 (#1516) Bumps [github.com/stretchr/testify](https://github.com/stretchr/testify) from 1.8.4 to 1.9.0. - [Release notes](https://github.com/stretchr/testify/releases) - [Commits](https://github.com/stretchr/testify/compare/v1.8.4...v1.9.0) --- updated-dependencies: - dependency-name: github.com/stretchr/testify dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 947ce4857..58400f006 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,7 @@ require ( github.com/mattn/go-isatty v0.0.20 github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 - github.com/stretchr/testify v1.8.4 + github.com/stretchr/testify v1.9.0 golang.org/x/sys v0.17.0 golang.org/x/term v0.17.0 golang.org/x/text v0.14.0 diff --git a/go.sum b/go.sum index 71047a014..c0a8a969a 100644 --- a/go.sum +++ b/go.sum @@ -36,8 +36,8 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= From 99e13f6105db21e03b3b68f0be2b136d6f0e3b10 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 5 Mar 2024 09:32:39 -0500 Subject: [PATCH 141/456] Bump golang.org/x/sys from 0.17.0 to 0.18.0 (#1521) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.17.0 to 0.18.0. - [Commits](https://github.com/golang/sys/compare/v0.17.0...v0.18.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 58400f006..07da1dcf8 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.9.0 - golang.org/x/sys v0.17.0 + golang.org/x/sys v0.18.0 golang.org/x/term v0.17.0 golang.org/x/text v0.14.0 ) diff --git a/go.sum b/go.sum index c0a8a969a..157353ecb 100644 --- a/go.sum +++ b/go.sum @@ -40,8 +40,8 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= -golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.17.0 h1:mkTF7LCd6WGJNL3K1Ad7kwxNfYAW6a8a8QqtMblp/4U= golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= From 78aa768cbe78a813cf4259931c8e4bf4be262f6b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 6 Mar 2024 18:13:20 -0500 Subject: [PATCH 142/456] Bump golang.org/x/term from 0.17.0 to 0.18.0 (#1522) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.17.0 to 0.18.0. - [Commits](https://github.com/golang/term/compare/v0.17.0...v0.18.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 07da1dcf8..1a003057a 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.9.0 golang.org/x/sys v0.18.0 - golang.org/x/term v0.17.0 + golang.org/x/term v0.18.0 golang.org/x/text v0.14.0 ) diff --git a/go.sum b/go.sum index 157353ecb..3ae4002fb 100644 --- a/go.sum +++ b/go.sum @@ -42,8 +42,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.17.0 h1:mkTF7LCd6WGJNL3K1Ad7kwxNfYAW6a8a8QqtMblp/4U= -golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= +golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= +golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From d0a1acececea24986d2a14339b072f326df8d908 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 12 Mar 2024 09:18:53 -0400 Subject: [PATCH 143/456] Bump actions/checkout from 4.1.1 to 4.1.2 (#1526) Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.1 to 4.1.2. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/b4ffde65f46336ab88eb53be808477a3936bae11...9bb56186c3b09b4f86b1c65136769dd318469633) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/codespell.yml | 2 +- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 71448c3a7..7e4d9296f 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -36,7 +36,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 + uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 967a6cbcc..0e7ab4ed1 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -21,7 +21,7 @@ jobs: steps: # Check out the code base - name: Check out code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 + uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 with: # Full git history is needed to get a proper list of changed files within `super-linter` fetch-depth: 0 diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 428c57a4d..9f87a9b99 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -15,7 +15,7 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 + - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 - name: Set up Go uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 98eeee613..288e95a17 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -23,7 +23,7 @@ jobs: id: go - name: Check out code into the Go module directory - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 + uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 with: fetch-depth: 0 From f01bb92da7f1bfa3962d9b0e0812c22e57bedae3 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 16 Mar 2024 17:00:59 -0400 Subject: [PATCH 144/456] Avoid spurious `[]` on JSON output in some cases (#1528) * JSON empty vs `[]` handling [WIP] * unit-test mods --- pkg/input/record_reader_json.go | 6 ++++ pkg/output/channel_writer.go | 6 ++-- pkg/output/record_writer.go | 2 ++ pkg/output/record_writer_csv.go | 2 ++ pkg/output/record_writer_csvlite.go | 2 ++ pkg/output/record_writer_dkvp.go | 2 ++ pkg/output/record_writer_json.go | 36 +++++++++++++-------- pkg/output/record_writer_markdown.go | 2 ++ pkg/output/record_writer_nidx.go | 2 ++ pkg/output/record_writer_pprint.go | 2 ++ pkg/output/record_writer_tsv.go | 2 ++ pkg/output/record_writer_xtab.go | 2 ++ pkg/terminals/repl/verbs.go | 3 +- pkg/types/context.go | 3 ++ test/cases/dsl-functional-tests/0051/expout | 2 -- test/cases/dsl-output-redirects/0071/expout | 2 -- test/cases/dsl-sorts/sorta-natural/expout | 2 -- test/cases/dsl-sorts/sortmf-within/expout | 2 -- test/cases/io-multi/0053/expout | 2 -- test/cases/io-multi/0057/expout | 2 -- 20 files changed, 56 insertions(+), 28 deletions(-) diff --git a/pkg/input/record_reader_json.go b/pkg/input/record_reader_json.go index 80ce17440..aaa49a178 100644 --- a/pkg/input/record_reader_json.go +++ b/pkg/input/record_reader_json.go @@ -17,6 +17,8 @@ import ( type RecordReaderJSON struct { readerOptions *cli.TReaderOptions recordsPerBatch int64 // distinct from readerOptions.RecordsPerBatch for join/repl + // XXX 1513 + sawBrackets bool } func NewRecordReaderJSON( @@ -65,6 +67,7 @@ func (reader *RecordReaderJSON) Read( } } } + context.JSONHadBrackets = reader.sawBrackets readerChannel <- types.NewEndOfStreamMarkerList(&context) } @@ -137,6 +140,9 @@ func (reader *RecordReaderJSON) processHandle( } } else if mlrval.IsArray() { + + reader.sawBrackets = true + records := mlrval.GetArray() if records == nil { errorChannel <- fmt.Errorf("internal coding error detected in JSON record-reader") diff --git a/pkg/output/channel_writer.go b/pkg/output/channel_writer.go index e7b0e802e..3eb8b1338 100644 --- a/pkg/output/channel_writer.go +++ b/pkg/output/channel_writer.go @@ -66,6 +66,7 @@ func channelWriterHandleBatch( if !recordAndContext.EndOfStream { record := recordAndContext.Record + context := &recordAndContext.Context // XXX more // XXX also make sure this results in exit 1 & goroutine cleanup @@ -94,7 +95,7 @@ func channelWriterHandleBatch( } if record != nil { - err := recordWriter.Write(record, bufferedOutputStream, outputIsStdout) + err := recordWriter.Write(record, context, bufferedOutputStream, outputIsStdout) if err != nil { fmt.Fprintf(os.Stderr, "mlr: %v\n", err) return true, true @@ -115,7 +116,8 @@ func channelWriterHandleBatch( // queued up. For example, PPRINT needs to see all same-schema // records before printing any, since it needs to compute max width // down columns. - err := recordWriter.Write(nil, bufferedOutputStream, outputIsStdout) + context := &recordAndContext.Context + err := recordWriter.Write(nil, context, bufferedOutputStream, outputIsStdout) if err != nil { fmt.Fprintf(os.Stderr, "mlr: %v\n", err) return true, true diff --git a/pkg/output/record_writer.go b/pkg/output/record_writer.go index 3ce49743d..ceb7522d2 100644 --- a/pkg/output/record_writer.go +++ b/pkg/output/record_writer.go @@ -4,6 +4,7 @@ import ( "bufio" "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // IRecordWriter is the abstract interface for all record-writers. They are @@ -18,6 +19,7 @@ import ( type IRecordWriter interface { Write( outrec *mlrval.Mlrmap, + context *types.Context, bufferedOutputStream *bufio.Writer, outputIsStdout bool, ) error diff --git a/pkg/output/record_writer_csv.go b/pkg/output/record_writer_csv.go index b71af63d2..032a57f03 100644 --- a/pkg/output/record_writer_csv.go +++ b/pkg/output/record_writer_csv.go @@ -9,6 +9,7 @@ import ( "github.com/johnkerl/miller/pkg/cli" "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) type RecordWriterCSV struct { @@ -41,6 +42,7 @@ func NewRecordWriterCSV(writerOptions *cli.TWriterOptions) (*RecordWriterCSV, er func (writer *RecordWriterCSV) Write( outrec *mlrval.Mlrmap, + _ *types.Context, bufferedOutputStream *bufio.Writer, outputIsStdout bool, ) error { diff --git a/pkg/output/record_writer_csvlite.go b/pkg/output/record_writer_csvlite.go index c59556b30..e3ecf9196 100644 --- a/pkg/output/record_writer_csvlite.go +++ b/pkg/output/record_writer_csvlite.go @@ -7,6 +7,7 @@ import ( "github.com/johnkerl/miller/pkg/cli" "github.com/johnkerl/miller/pkg/colorizer" "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) type RecordWriterCSVLite struct { @@ -27,6 +28,7 @@ func NewRecordWriterCSVLite(writerOptions *cli.TWriterOptions) (*RecordWriterCSV func (writer *RecordWriterCSVLite) Write( outrec *mlrval.Mlrmap, + _ *types.Context, bufferedOutputStream *bufio.Writer, outputIsStdout bool, ) error { diff --git a/pkg/output/record_writer_dkvp.go b/pkg/output/record_writer_dkvp.go index d27420ede..d7a516955 100644 --- a/pkg/output/record_writer_dkvp.go +++ b/pkg/output/record_writer_dkvp.go @@ -6,6 +6,7 @@ import ( "github.com/johnkerl/miller/pkg/cli" "github.com/johnkerl/miller/pkg/colorizer" "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) type RecordWriterDKVP struct { @@ -20,6 +21,7 @@ func NewRecordWriterDKVP(writerOptions *cli.TWriterOptions) (*RecordWriterDKVP, func (writer *RecordWriterDKVP) Write( outrec *mlrval.Mlrmap, + _ *types.Context, bufferedOutputStream *bufio.Writer, outputIsStdout bool, ) error { diff --git a/pkg/output/record_writer_json.go b/pkg/output/record_writer_json.go index e832f169e..1a1e7ed58 100644 --- a/pkg/output/record_writer_json.go +++ b/pkg/output/record_writer_json.go @@ -7,6 +7,7 @@ import ( "github.com/johnkerl/miller/pkg/cli" "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- @@ -17,7 +18,7 @@ type RecordWriterJSON struct { jvQuoteAll bool // State: - onFirst bool + wroteAnyRecords bool } // ---------------------------------------------------------------- @@ -27,16 +28,17 @@ func NewRecordWriterJSON(writerOptions *cli.TWriterOptions) (*RecordWriterJSON, jsonFormatting = mlrval.JSON_MULTILINE } return &RecordWriterJSON{ - writerOptions: writerOptions, - jsonFormatting: jsonFormatting, - jvQuoteAll: writerOptions.JVQuoteAll, - onFirst: true, + writerOptions: writerOptions, + jsonFormatting: jsonFormatting, + jvQuoteAll: writerOptions.JVQuoteAll, + wroteAnyRecords: false, }, nil } // ---------------------------------------------------------------- func (writer *RecordWriterJSON) Write( outrec *mlrval.Mlrmap, + context *types.Context, bufferedOutputStream *bufio.Writer, outputIsStdout bool, ) error { @@ -45,9 +47,9 @@ func (writer *RecordWriterJSON) Write( } if writer.writerOptions.WrapJSONOutputInOuterList { - writer.writeWithListWrap(outrec, bufferedOutputStream, outputIsStdout) + writer.writeWithListWrap(outrec, context, bufferedOutputStream, outputIsStdout) } else { - writer.writeWithoutListWrap(outrec, bufferedOutputStream, outputIsStdout) + writer.writeWithoutListWrap(outrec, context, bufferedOutputStream, outputIsStdout) } return nil } @@ -55,11 +57,12 @@ func (writer *RecordWriterJSON) Write( // ---------------------------------------------------------------- func (writer *RecordWriterJSON) writeWithListWrap( outrec *mlrval.Mlrmap, + context *types.Context, bufferedOutputStream *bufio.Writer, outputIsStdout bool, ) { if outrec != nil { // Not end of record stream - if writer.onFirst { + if !writer.wroteAnyRecords { bufferedOutputStream.WriteString("[\n") } @@ -71,25 +74,32 @@ func (writer *RecordWriterJSON) writeWithListWrap( os.Exit(1) } - if !writer.onFirst { + if writer.wroteAnyRecords { bufferedOutputStream.WriteString(",\n") } bufferedOutputStream.WriteString(s) - writer.onFirst = false + writer.wroteAnyRecords = true } else { // End of record stream - if writer.onFirst { // zero records in the entire output stream - bufferedOutputStream.WriteString("[") + + if !writer.wroteAnyRecords { + if context.JSONHadBrackets { + bufferedOutputStream.WriteString("[") + bufferedOutputStream.WriteString("\n]\n") + } + } else { + bufferedOutputStream.WriteString("\n]\n") } - bufferedOutputStream.WriteString("\n]\n") + } } // ---------------------------------------------------------------- func (writer *RecordWriterJSON) writeWithoutListWrap( outrec *mlrval.Mlrmap, + _ *types.Context, bufferedOutputStream *bufio.Writer, outputIsStdout bool, ) { diff --git a/pkg/output/record_writer_markdown.go b/pkg/output/record_writer_markdown.go index 6c2983a59..94137822d 100644 --- a/pkg/output/record_writer_markdown.go +++ b/pkg/output/record_writer_markdown.go @@ -7,6 +7,7 @@ import ( "github.com/johnkerl/miller/pkg/cli" "github.com/johnkerl/miller/pkg/colorizer" "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) type RecordWriterMarkdown struct { @@ -29,6 +30,7 @@ func NewRecordWriterMarkdown(writerOptions *cli.TWriterOptions) (*RecordWriterMa // ---------------------------------------------------------------- func (writer *RecordWriterMarkdown) Write( outrec *mlrval.Mlrmap, + _ *types.Context, bufferedOutputStream *bufio.Writer, outputIsStdout bool, ) error { diff --git a/pkg/output/record_writer_nidx.go b/pkg/output/record_writer_nidx.go index 551fe47aa..b8a5573c1 100644 --- a/pkg/output/record_writer_nidx.go +++ b/pkg/output/record_writer_nidx.go @@ -5,6 +5,7 @@ import ( "github.com/johnkerl/miller/pkg/cli" "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) type RecordWriterNIDX struct { @@ -19,6 +20,7 @@ func NewRecordWriterNIDX(writerOptions *cli.TWriterOptions) (*RecordWriterNIDX, func (writer *RecordWriterNIDX) Write( outrec *mlrval.Mlrmap, + _ *types.Context, bufferedOutputStream *bufio.Writer, outputIsStdout bool, ) error { diff --git a/pkg/output/record_writer_pprint.go b/pkg/output/record_writer_pprint.go index b9f48cd93..6b2f92f1f 100644 --- a/pkg/output/record_writer_pprint.go +++ b/pkg/output/record_writer_pprint.go @@ -10,6 +10,7 @@ import ( "github.com/johnkerl/miller/pkg/cli" "github.com/johnkerl/miller/pkg/colorizer" "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) type RecordWriterPPRINT struct { @@ -35,6 +36,7 @@ func NewRecordWriterPPRINT(writerOptions *cli.TWriterOptions) (*RecordWriterPPRI // ---------------------------------------------------------------- func (writer *RecordWriterPPRINT) Write( outrec *mlrval.Mlrmap, + _ *types.Context, bufferedOutputStream *bufio.Writer, outputIsStdout bool, ) error { diff --git a/pkg/output/record_writer_tsv.go b/pkg/output/record_writer_tsv.go index 2a79793b2..40f89350a 100644 --- a/pkg/output/record_writer_tsv.go +++ b/pkg/output/record_writer_tsv.go @@ -9,6 +9,7 @@ import ( "github.com/johnkerl/miller/pkg/colorizer" "github.com/johnkerl/miller/pkg/lib" "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) type RecordWriterTSV struct { @@ -35,6 +36,7 @@ func NewRecordWriterTSV(writerOptions *cli.TWriterOptions) (*RecordWriterTSV, er func (writer *RecordWriterTSV) Write( outrec *mlrval.Mlrmap, + _ *types.Context, bufferedOutputStream *bufio.Writer, outputIsStdout bool, ) error { diff --git a/pkg/output/record_writer_xtab.go b/pkg/output/record_writer_xtab.go index 27f3b1bcb..cd014ddce 100644 --- a/pkg/output/record_writer_xtab.go +++ b/pkg/output/record_writer_xtab.go @@ -8,6 +8,7 @@ import ( "github.com/johnkerl/miller/pkg/cli" "github.com/johnkerl/miller/pkg/colorizer" "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" ) // ---------------------------------------------------------------- @@ -43,6 +44,7 @@ func NewRecordWriterXTAB(writerOptions *cli.TWriterOptions) (*RecordWriterXTAB, func (writer *RecordWriterXTAB) Write( outrec *mlrval.Mlrmap, + _ *types.Context, bufferedOutputStream *bufio.Writer, outputIsStdout bool, ) error { diff --git a/pkg/terminals/repl/verbs.go b/pkg/terminals/repl/verbs.go index 92d9046ff..ac5440ffd 100644 --- a/pkg/terminals/repl/verbs.go +++ b/pkg/terminals/repl/verbs.go @@ -639,7 +639,8 @@ func writeRecord(repl *Repl, outrec *mlrval.Mlrmap) { outrec.Unflatten(repl.options.WriterOptions.FLATSEP) } } - repl.recordWriter.Write(outrec, repl.bufferedRecordOutputStream, true /*outputIsStdout*/) + // XXX TEMP + repl.recordWriter.Write(outrec, nil, repl.bufferedRecordOutputStream, true /*outputIsStdout*/) repl.bufferedRecordOutputStream.Flush() } diff --git a/pkg/types/context.go b/pkg/types/context.go index 08ba3cbb6..a3da4f71a 100644 --- a/pkg/types/context.go +++ b/pkg/types/context.go @@ -99,6 +99,9 @@ type Context struct { // NF int NR int64 FNR int64 + + // XXX 1513 + JSONHadBrackets bool } // TODO: comment: Remember command-line values to pass along to CST evaluators. diff --git a/test/cases/dsl-functional-tests/0051/expout b/test/cases/dsl-functional-tests/0051/expout index d14a2c4d2..97353ee3a 100644 --- a/test/cases/dsl-functional-tests/0051/expout +++ b/test/cases/dsl-functional-tests/0051/expout @@ -60,5 +60,3 @@ "zsgnt": "int" } ] -[ -] diff --git a/test/cases/dsl-output-redirects/0071/expout b/test/cases/dsl-output-redirects/0071/expout index 4a1435f7c..eed189aad 100644 --- a/test/cases/dsl-output-redirects/0071/expout +++ b/test/cases/dsl-output-redirects/0071/expout @@ -9,5 +9,3 @@ x 8 9 10 -[ -] diff --git a/test/cases/dsl-sorts/sorta-natural/expout b/test/cases/dsl-sorts/sorta-natural/expout index 01349be34..05972250a 100644 --- a/test/cases/dsl-sorts/sorta-natural/expout +++ b/test/cases/dsl-sorts/sorta-natural/expout @@ -2,5 +2,3 @@ ["X200", "X20", "X2", "X100", "X10", "X1"] ["X1", "X2", "X10", "X20", "X100", "X200"] ["X200", "X100", "X20", "X10", "X2", "X1"] -[ -] diff --git a/test/cases/dsl-sorts/sortmf-within/expout b/test/cases/dsl-sorts/sortmf-within/expout index c683738c5..acb15cce5 100644 --- a/test/cases/dsl-sorts/sortmf-within/expout +++ b/test/cases/dsl-sorts/sortmf-within/expout @@ -18,5 +18,3 @@ "b": 2, "c": 1 } -[ -] diff --git a/test/cases/io-multi/0053/expout b/test/cases/io-multi/0053/expout index 0d4f101c7..e69de29bb 100644 --- a/test/cases/io-multi/0053/expout +++ b/test/cases/io-multi/0053/expout @@ -1,2 +0,0 @@ -[ -] diff --git a/test/cases/io-multi/0057/expout b/test/cases/io-multi/0057/expout index 0d4f101c7..e69de29bb 100644 --- a/test/cases/io-multi/0057/expout +++ b/test/cases/io-multi/0057/expout @@ -1,2 +0,0 @@ -[ -] From 83c44e6d74d8fcaec0db712c713338a3f4b10a99 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 16 Mar 2024 17:09:01 -0400 Subject: [PATCH 145/456] Add descriptions for `put` and `filter` verbs (#1529) * Add more info in online help about what put/filter do * `make dev` artifacts --- docs/src/manpage.md | 10 +++++++++- docs/src/manpage.txt | 10 +++++++++- docs/src/reference-main-flag-list.md | 1 + docs/src/reference-verbs.md | 7 +++++++ man/manpage.txt | 10 +++++++++- man/mlr.1 | 12 ++++++++++-- pkg/transformers/put_or_filter.go | 8 ++++++++ test/cases/cli-help/0001/expout | 7 +++++++ 8 files changed, 60 insertions(+), 5 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 4b80cdeee..f85e6bd57 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -804,6 +804,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p csv "," N/A "\n" csvlite "," N/A "\n" dkvp "," "=" "\n" + gen "," N/A "\n" json N/A N/A N/A markdown " " N/A "\n" nidx " " N/A "\n" @@ -1072,6 +1073,10 @@ This is simply a copy of what you should see on running `man mlr` at a command p 1mfilter0m Usage: mlr filter [options] {DSL expression} + Lets you use a domain-specific language to progamatically filter which + stream records will be output. + See also: https://miller.readthedocs.io/en/latest/reference-verbs + Options: -f {file name} File containing a DSL expression (see examples below). If the filename is a directory, all *.mlr files in that directory are loaded. @@ -1525,6 +1530,9 @@ This is simply a copy of what you should see on running `man mlr` at a command p 1mput0m Usage: mlr put [options] {DSL expression} + Lets you use a domain-specific language to progamatically alter stream records. + See also: https://miller.readthedocs.io/en/latest/reference-verbs + Options: -f {file name} File containing a DSL expression (see examples below). If the filename is a directory, all *.mlr files in that directory are loaded. @@ -3704,5 +3712,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-02-18 4mMILLER24m(1) + 2024-03-16 4mMILLER24m(1)
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 15c9d6232..5f1695701 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -783,6 +783,7 @@ csv "," N/A "\n" csvlite "," N/A "\n" dkvp "," "=" "\n" + gen "," N/A "\n" json N/A N/A N/A markdown " " N/A "\n" nidx " " N/A "\n" @@ -1051,6 +1052,10 @@ 1mfilter0m Usage: mlr filter [options] {DSL expression} + Lets you use a domain-specific language to progamatically filter which + stream records will be output. + See also: https://miller.readthedocs.io/en/latest/reference-verbs + Options: -f {file name} File containing a DSL expression (see examples below). If the filename is a directory, all *.mlr files in that directory are loaded. @@ -1504,6 +1509,9 @@ 1mput0m Usage: mlr put [options] {DSL expression} + Lets you use a domain-specific language to progamatically alter stream records. + See also: https://miller.readthedocs.io/en/latest/reference-verbs + Options: -f {file name} File containing a DSL expression (see examples below). If the filename is a directory, all *.mlr files in that directory are loaded. @@ -3683,4 +3691,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-02-18 4mMILLER24m(1) + 2024-03-16 4mMILLER24m(1) diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index fde4d9496..11f07f9af 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -477,6 +477,7 @@ Notes about all other separators: csv "," N/A "\n" csvlite "," N/A "\n" dkvp "," "=" "\n" + gen "," N/A "\n" json N/A N/A N/A markdown " " N/A "\n" nidx " " N/A "\n" diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 417aa4d9b..485343ee7 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -970,6 +970,10 @@ a,b,c
 Usage: mlr filter [options] {DSL expression}
+Lets you use a domain-specific language to progamatically filter which
+stream records will be output.
+See also: https://miller.readthedocs.io/en/latest/reference-verbs
+
 Options:
 -f {file name} File containing a DSL expression (see examples below). If the filename
    is a directory, all *.mlr files in that directory are loaded.
@@ -2301,6 +2305,9 @@ Options:
 
 Usage: mlr put [options] {DSL expression}
+Lets you use a domain-specific language to progamatically alter stream records.
+See also: https://miller.readthedocs.io/en/latest/reference-verbs
+
 Options:
 -f {file name} File containing a DSL expression (see examples below). If the filename
    is a directory, all *.mlr files in that directory are loaded.
diff --git a/man/manpage.txt b/man/manpage.txt
index 15c9d6232..5f1695701 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -783,6 +783,7 @@
                csv      ","    N/A    "\n"
                csvlite  ","    N/A    "\n"
                dkvp     ","    "="    "\n"
+               gen      ","    N/A    "\n"
                json     N/A    N/A    N/A
                markdown " "    N/A    "\n"
                nidx     " "    N/A    "\n"
@@ -1051,6 +1052,10 @@
 
    1mfilter0m
        Usage: mlr filter [options] {DSL expression}
+       Lets you use a domain-specific language to progamatically filter which
+       stream records will be output.
+       See also: https://miller.readthedocs.io/en/latest/reference-verbs
+
        Options:
        -f {file name} File containing a DSL expression (see examples below). If the filename
           is a directory, all *.mlr files in that directory are loaded.
@@ -1504,6 +1509,9 @@
 
    1mput0m
        Usage: mlr put [options] {DSL expression}
+       Lets you use a domain-specific language to progamatically alter stream records.
+       See also: https://miller.readthedocs.io/en/latest/reference-verbs
+
        Options:
        -f {file name} File containing a DSL expression (see examples below). If the filename
           is a directory, all *.mlr files in that directory are loaded.
@@ -3683,4 +3691,4 @@
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-                                  2024-02-18                         4mMILLER24m(1)
+                                  2024-03-16                         4mMILLER24m(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 505cb8d08..9c78a653a 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2024-02-18
+.\"      Date: 2024-03-16
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2024-02-18" "\ \&" "\ \&"
+.TH "MILLER" "1" "2024-03-16" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -936,6 +936,7 @@ Notes about all other separators:
         csv      ","    N/A    "\en"
         csvlite  ","    N/A    "\en"
         dkvp     ","    "="    "\en"
+        gen      ","    N/A    "\en"
         json     N/A    N/A    N/A
         markdown " "    N/A    "\en"
         nidx     " "    N/A    "\en"
@@ -1314,6 +1315,10 @@ Options:
 .\}
 .nf
 Usage: mlr filter [options] {DSL expression}
+Lets you use a domain-specific language to progamatically filter which
+stream records will be output.
+See also: https://miller.readthedocs.io/en/latest/reference-verbs
+
 Options:
 -f {file name} File containing a DSL expression (see examples below). If the filename
    is a directory, all *.mlr files in that directory are loaded.
@@ -1899,6 +1904,9 @@ Options:
 .\}
 .nf
 Usage: mlr put [options] {DSL expression}
+Lets you use a domain-specific language to progamatically alter stream records.
+See also: https://miller.readthedocs.io/en/latest/reference-verbs
+
 Options:
 -f {file name} File containing a DSL expression (see examples below). If the filename
    is a directory, all *.mlr files in that directory are loaded.
diff --git a/pkg/transformers/put_or_filter.go b/pkg/transformers/put_or_filter.go
index 1437c8b15..5ba42435b 100644
--- a/pkg/transformers/put_or_filter.go
+++ b/pkg/transformers/put_or_filter.go
@@ -52,6 +52,14 @@ func transformerPutOrFilterUsage(
 	verb string,
 ) {
 	fmt.Fprintf(o, "Usage: %s %s [options] {DSL expression}\n", "mlr", verb)
+	if verb == "put" {
+		fmt.Fprintf(o, "Lets you use a domain-specific language to progamatically alter stream records.\n")
+	} else if verb == "filter" {
+		fmt.Fprintf(o, "Lets you use a domain-specific language to progamatically filter which\n")
+		fmt.Fprintf(o, "stream records will be output.\n")
+	}
+	fmt.Fprintf(o, "See also: https://miller.readthedocs.io/en/latest/reference-verbs\n")
+	fmt.Fprintf(o, "\n")
 	fmt.Fprintf(o, "Options:\n")
 	fmt.Fprintf(o,
 		`-f {file name} File containing a DSL expression (see examples below). If the filename
diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout
index 95b4d3f14..33eed96d5 100644
--- a/test/cases/cli-help/0001/expout
+++ b/test/cases/cli-help/0001/expout
@@ -186,6 +186,10 @@ Options:
 ================================================================
 filter
 Usage: mlr filter [options] {DSL expression}
+Lets you use a domain-specific language to progamatically filter which
+stream records will be output.
+See also: https://miller.readthedocs.io/en/latest/reference-verbs
+
 Options:
 -f {file name} File containing a DSL expression (see examples below). If the filename
    is a directory, all *.mlr files in that directory are loaded.
@@ -661,6 +665,9 @@ Options:
 ================================================================
 put
 Usage: mlr put [options] {DSL expression}
+Lets you use a domain-specific language to progamatically alter stream records.
+See also: https://miller.readthedocs.io/en/latest/reference-verbs
+
 Options:
 -f {file name} File containing a DSL expression (see examples below). If the filename
    is a directory, all *.mlr files in that directory are loaded.

From a0bead409304a66dc2bdd1ea98e5db3c93ed3e5a Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 16 Mar 2024 17:19:05 -0400
Subject: [PATCH 146/456] miller 6.12.0

---
 docs/src/manpage.md                  | 2 +-
 docs/src/manpage.txt                 | 2 +-
 docs/src/operating-on-all-records.md | 2 --
 docs/src/reference-dsl-operators.md  | 8 --------
 man/manpage.txt                      | 2 +-
 man/mlr.1                            | 2 +-
 miller.spec                          | 5 ++++-
 pkg/version/version.go               | 2 +-
 8 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index f85e6bd57..032922f9f 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -48,7 +48,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        insertion-ordered hash map.  This encompasses a variety of data
        formats, including but not limited to the familiar CSV, TSV, and JSON.
        (Miller can handle positionally-indexed data as a special case.) This
-       manpage documents mlr 6.11.0-dev.
+       manpage documents mlr 6.12.0.
 
 1mEXAMPLES0m
        mlr --icsv --opprint cat example.csv
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt
index 5f1695701..96de3b270 100644
--- a/docs/src/manpage.txt
+++ b/docs/src/manpage.txt
@@ -27,7 +27,7 @@
        insertion-ordered hash map.  This encompasses a variety of data
        formats, including but not limited to the familiar CSV, TSV, and JSON.
        (Miller can handle positionally-indexed data as a special case.) This
-       manpage documents mlr 6.11.0-dev.
+       manpage documents mlr 6.12.0.
 
 1mEXAMPLES0m
        mlr --icsv --opprint cat example.csv
diff --git a/docs/src/operating-on-all-records.md b/docs/src/operating-on-all-records.md
index 6663f1c18..668dcc367 100644
--- a/docs/src/operating-on-all-records.md
+++ b/docs/src/operating-on-all-records.md
@@ -274,8 +274,6 @@ array will have [null-gaps](reference-main-arrays.md) in it:
     "value": 54
   }
 ]
-[
-]
 
You can index `@records` by `@count` rather than `NR` to get a contiguous array: diff --git a/docs/src/reference-dsl-operators.md b/docs/src/reference-dsl-operators.md index 921a02913..1b1173444 100644 --- a/docs/src/reference-dsl-operators.md +++ b/docs/src/reference-dsl-operators.md @@ -109,8 +109,6 @@ However, in Miller 6 it has optional use for map traversal. Example:
 bar.baz
 bar.baz
-[
-]
 
This also works on the left-hand sides of assignment statements: @@ -157,8 +155,6 @@ A few caveats:
 6989
-[
-]
 
* However (awkwardly), if you want to use `.` for map-traversal as well as string-concatenation in the same statement, you'll need to insert parentheses, as the default associativity is left-to-right: @@ -170,8 +166,6 @@ A few caveats:
 (error)
-[
-]
 
@@ -181,6 +175,4 @@ A few caveats:
 
 GET -- api/check
-[
-]
 
diff --git a/man/manpage.txt b/man/manpage.txt index 5f1695701..96de3b270 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -27,7 +27,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.11.0-dev. + manpage documents mlr 6.12.0. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/man/mlr.1 b/man/mlr.1 index 9c78a653a..3d6bb2666 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -47,7 +47,7 @@ on integer-indexed fields: if the natural data structure for the latter is the array, then Miller's natural data structure is the insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as -a special case.) This manpage documents mlr 6.11.0-dev. +a special case.) This manpage documents mlr 6.12.0. .SH "EXAMPLES" .sp diff --git a/miller.spec b/miller.spec index 2ac4f6265..325dbe988 100644 --- a/miller.spec +++ b/miller.spec @@ -1,6 +1,6 @@ Summary: Name-indexed data processing tool Name: miller -Version: 6.11.0 +Version: 6.12.0 Release: 1%{?dist} License: BSD Source: https://github.com/johnkerl/miller/releases/download/%{version}/miller-%{version}.tar.gz @@ -36,6 +36,9 @@ make install %{_mandir}/man1/mlr.1* %changelog +* Sat Mar 16 2024 John Kerl - 6.12.0-1 +- 6.12.0 release + * Tue Jan 23 2024 John Kerl - 6.11.0-1 - 6.11.0 release diff --git a/pkg/version/version.go b/pkg/version/version.go index f3d43c46e..ffce1a19a 100644 --- a/pkg/version/version.go +++ b/pkg/version/version.go @@ -4,4 +4,4 @@ package version // Nominally things like "6.0.0" for a release, then "6.0.0-dev" in between. // This makes it clear that a given build is on the main dev branch, not a // particular snapshot tag. -var STRING string = "6.11.0-dev" +var STRING string = "6.12.0" From b37c3a5e56f55ee63c6d7f7c634918edf33cc755 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 16 Mar 2024 17:51:17 -0400 Subject: [PATCH 147/456] 6.12.0 doc link --- docs/src/manpage.md | 2 +- docs/src/manpage.txt | 2 +- docs/src/release-docs.md.in | 1 + man/manpage.txt | 2 +- man/mlr.1 | 2 +- pkg/version/version.go | 2 +- 6 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 032922f9f..a7754adc6 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -48,7 +48,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.12.0. + manpage documents mlr 6.12.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 96de3b270..98b71fe68 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -27,7 +27,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.12.0. + manpage documents mlr 6.12.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/docs/src/release-docs.md.in b/docs/src/release-docs.md.in index bac3ef2ad..ebbb10a02 100644 --- a/docs/src/release-docs.md.in +++ b/docs/src/release-docs.md.in @@ -8,6 +8,7 @@ If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, witho | Release | Docs | Release notes | |---------|---------------------------------------------------------------------|---------------| main | [main branch](https://miller.readthedocs.io/en/main) | N/A | +6.12.0 | [Miller 6.12.0](https://miller.readthedocs.io/en/6.12.0) | [New sparsify verb, wide-table performance improvement, thousands separator for fmtnum function](https://github.com/johnkerl/miller/releases/tag/v6.12.0) | 6.11.0 | [Miller 6.11.0](https://miller.readthedocs.io/en/6.11.0) | [CSV/TSV auto-unsparsify, regex-fieldname support for reorder/sub/ssub/gsub, strmatch DSL function, and more](https://github.com/johnkerl/miller/releases/tag/v6.11.0) | 6.10.0 | [Miller 6.10.0](https://miller.readthedocs.io/en/6.10.0) | [Add --files option; bugfixes; use Go 1.19](https://github.com/johnkerl/miller/releases/tag/v6.10.0) | 6.9.0 | [Miller 6.9.0](https://miller.readthedocs.io/en/6.9.0) | [Nanosecond timestamps, ZSTD compression, improved data-error handling, and more](https://github.com/johnkerl/miller/releases/tag/v6.9.0) | diff --git a/man/manpage.txt b/man/manpage.txt index 96de3b270..98b71fe68 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -27,7 +27,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.12.0. + manpage documents mlr 6.12.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/man/mlr.1 b/man/mlr.1 index 3d6bb2666..c45d59de0 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -47,7 +47,7 @@ on integer-indexed fields: if the natural data structure for the latter is the array, then Miller's natural data structure is the insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as -a special case.) This manpage documents mlr 6.12.0. +a special case.) This manpage documents mlr 6.12.0-dev. .SH "EXAMPLES" .sp diff --git a/pkg/version/version.go b/pkg/version/version.go index ffce1a19a..1d3f25e49 100644 --- a/pkg/version/version.go +++ b/pkg/version/version.go @@ -4,4 +4,4 @@ package version // Nominally things like "6.0.0" for a release, then "6.0.0-dev" in between. // This makes it clear that a given build is on the main dev branch, not a // particular snapshot tag. -var STRING string = "6.12.0" +var STRING string = "6.12.0-dev" From f6e378c8dfa63f0b637196040d506198a56011c4 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 16 Mar 2024 17:54:32 -0400 Subject: [PATCH 148/456] build previous --- docs/src/release-docs.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/src/release-docs.md b/docs/src/release-docs.md index ca6404042..235c48f57 100644 --- a/docs/src/release-docs.md +++ b/docs/src/release-docs.md @@ -24,6 +24,7 @@ If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, witho | Release | Docs | Release notes | |---------|---------------------------------------------------------------------|---------------| main | [main branch](https://miller.readthedocs.io/en/main) | N/A | +6.12.0 | [Miller 6.12.0](https://miller.readthedocs.io/en/6.12.0) | [New sparsify verb, wide-table performance improvement, thousands separator for fmtnum function](https://github.com/johnkerl/miller/releases/tag/v6.12.0) | 6.11.0 | [Miller 6.11.0](https://miller.readthedocs.io/en/6.11.0) | [CSV/TSV auto-unsparsify, regex-fieldname support for reorder/sub/ssub/gsub, strmatch DSL function, and more](https://github.com/johnkerl/miller/releases/tag/v6.11.0) | 6.10.0 | [Miller 6.10.0](https://miller.readthedocs.io/en/6.10.0) | [Add --files option; bugfixes; use Go 1.19](https://github.com/johnkerl/miller/releases/tag/v6.10.0) | 6.9.0 | [Miller 6.9.0](https://miller.readthedocs.io/en/6.9.0) | [Nanosecond timestamps, ZSTD compression, improved data-error handling, and more](https://github.com/johnkerl/miller/releases/tag/v6.9.0) | From 5f36b22f3f88c23f923dfb68ae47f47e6f1c295d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Mar 2024 08:54:19 -0400 Subject: [PATCH 149/456] Bump actions/cache from 4.0.1 to 4.0.2 (#1532) Bumps [actions/cache](https://github.com/actions/cache) from 4.0.1 to 4.0.2. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/ab5e6d0c87105b4c9c2047343972218f562e4319...0c45773b623bea8c8e75f6c82b208c3cf94ea4f9) --- updated-dependencies: - dependency-name: actions/cache dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 288e95a17..35982fb06 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -29,7 +29,7 @@ jobs: # https://github.com/marketplace/actions/cache - name: Cache Go modules - uses: actions/cache@ab5e6d0c87105b4c9c2047343972218f562e4319 + uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 with: path: | ~/.cache/go-build From 417009d257eed48aeea4821ca8d6efa6be711565 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 5 Apr 2024 09:12:57 -0400 Subject: [PATCH 150/456] Bump golang.org/x/term from 0.18.0 to 0.19.0 (#1536) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.18.0 to 0.19.0. - [Commits](https://github.com/golang/term/compare/v0.18.0...v0.19.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 1a003057a..5557a860b 100644 --- a/go.mod +++ b/go.mod @@ -26,8 +26,8 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.9.0 - golang.org/x/sys v0.18.0 - golang.org/x/term v0.18.0 + golang.org/x/sys v0.19.0 + golang.org/x/term v0.19.0 golang.org/x/text v0.14.0 ) diff --git a/go.sum b/go.sum index 3ae4002fb..30a3604a9 100644 --- a/go.sum +++ b/go.sum @@ -40,10 +40,10 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= -golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= -golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= +golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= +golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q= +golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From 03b8cce04897a732dce96cd064ecd5f8de93d91f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 07:29:35 -0400 Subject: [PATCH 151/456] Bump github.com/klauspost/compress from 1.17.7 to 1.17.8 (#1538) Bumps [github.com/klauspost/compress](https://github.com/klauspost/compress) from 1.17.7 to 1.17.8. - [Release notes](https://github.com/klauspost/compress/releases) - [Changelog](https://github.com/klauspost/compress/blob/master/.goreleaser.yml) - [Commits](https://github.com/klauspost/compress/compare/v1.17.7...v1.17.8) --- updated-dependencies: - dependency-name: github.com/klauspost/compress dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 5557a860b..2db3a9b43 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb github.com/johnkerl/lumin v1.0.0 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 - github.com/klauspost/compress v1.17.7 + github.com/klauspost/compress v1.17.8 github.com/lestrrat-go/strftime v1.0.6 github.com/mattn/go-isatty v0.0.20 github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 diff --git a/go.sum b/go.sum index 30a3604a9..98897fced 100644 --- a/go.sum +++ b/go.sum @@ -15,8 +15,8 @@ github.com/johnkerl/lumin v1.0.0 h1:CV34cHZOJ92Y02RbQ0rd4gA0C06Qck9q8blOyaPoWpU= github.com/johnkerl/lumin v1.0.0/go.mod h1:eLf5AdQOaLvzZ2zVy4REr/DSeEwG+CZreHwNLICqv9E= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= -github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg= -github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= +github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is= github.com/lestrrat-go/strftime v1.0.6 h1:CFGsDEt1pOpFNU+TJB0nhz9jl+K0hZSLE205AhTIGQQ= From e714738a7db3c1075fa21233408bccada6014385 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 11 Apr 2024 08:12:45 -0400 Subject: [PATCH 152/456] Fix typo in online help for `--no-jlistwrap` (#1541) * Add --no-auto-unsparsify flag * Fix typo in online help for `--no-jlistwrap` * Artifacts from `make dev` --- docs/src/data-diving-examples.md | 46 ++++++++++++++-------------- docs/src/manpage.md | 18 ++++++++--- docs/src/manpage.txt | 18 ++++++++--- docs/src/reference-main-flag-list.md | 3 +- docs/src/reference-verbs.md | 38 +++++++++++------------ docs/src/two-pass-algorithms.md | 4 +-- man/manpage.txt | 18 ++++++++--- man/mlr.1 | 12 +++++--- pkg/cli/option_parse.go | 11 ++++++- pkg/cli/option_types.go | 34 ++++++++++++++++++++ pkg/output/record_writer_csv.go | 2 +- pkg/output/record_writer_csvlite.go | 2 +- pkg/output/record_writer_dkvp.go | 2 +- pkg/output/record_writer_nidx.go | 2 +- pkg/output/record_writer_tsv.go | 2 +- pkg/output/record_writer_xtab.go | 2 +- 16 files changed, 143 insertions(+), 71 deletions(-) diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 100716ec2..39738f193 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -160,11 +160,11 @@ CITRUS COUNTY 1332.9 79974.9 483785.1 stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
-tiv_2011_tiv_2012_corr  0.9730497632351701
-tiv_2011_tiv_2012_ols_m 0.9835583980337732
-tiv_2011_tiv_2012_ols_b 433854.6428968301
+tiv_2011_tiv_2012_corr  0.9730497632351692
+tiv_2011_tiv_2012_ols_m 0.9835583980337723
+tiv_2011_tiv_2012_ols_b 433854.6428968317
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320204
+tiv_2011_tiv_2012_r2    0.9468258417320189
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199852926
+0.1334180491027861 -0.011319841199866178
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887242  -0.018565536587084836
-orange   square   0.17685855992752933   -0.07104431573805543
- green   circle   0.05764419437577257   0.011795729888018455
-   red   square    0.0557447712489348 -0.0006801456507506415
-yellow triangle    0.0445727377196281   0.024604310103079844
-yellow   square    0.0437917292729612  -0.044621972016306265
-purple   circle   0.03587354936895115    0.13411339541407613
-  blue   square   0.03241153095761152   -0.05350764811965621
-  blue triangle  0.015356427073158612 -0.0006089997461408209
-orange   circle  0.010518953877704181    -0.1627939732927932
-   red triangle   0.00809782571528054    0.01248662135795501
-purple triangle  0.005155190909099739   -0.04505790925621933
-purple   square  -0.02568027696337717   0.057694296479293694
- green   square -0.025776073450284875 -0.0032651732520739014
-orange triangle -0.030456661186085584   -0.13186999819263814
-yellow   circle  -0.06477331572781515     0.0736944981970553
-  blue   circle   -0.1023476190192966  -0.030528539069839333
- green triangle  -0.10901825107358747   -0.04848782060162855
+   red   circle    0.9807984401887236   -0.01856553658708754
+orange   square   0.17685855992752927   -0.07104431573806054
+ green   circle   0.05764419437577255    0.01179572988801509
+   red   square   0.05574477124893523 -0.0006801456507510942
+yellow triangle   0.04457273771962798   0.024604310103081825
+yellow   square   0.04379172927296089   -0.04462197201631237
+purple   circle   0.03587354936895086     0.1341133954140899
+  blue   square   0.03241153095761164  -0.053507648119643196
+  blue triangle  0.015356427073158766 -0.0006089997461435399
+orange   circle  0.010518953877704048   -0.16279397329279383
+   red triangle   0.00809782571528034   0.012486621357942596
+purple triangle  0.005155190909099334  -0.045057909256220656
+purple   square -0.025680276963377404    0.05769429647930396
+ green   square   -0.0257760734502851  -0.003265173252087127
+orange triangle -0.030456661186085785    -0.1318699981926352
+yellow   circle  -0.06477331572781474    0.07369449819706045
+  blue   circle  -0.10234761901929677  -0.030528539069837757
+ green triangle  -0.10901825107358765   -0.04848782060162929
 
diff --git a/docs/src/manpage.md b/docs/src/manpage.md index a7754adc6..a9620eefc 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -19,7 +19,9 @@ Quick links: This is simply a copy of what you should see on running `man mlr` at a command prompt, once Miller is installed on your system.
-4mMILLER24m(1)                                                            4mMILLER24m(1)
+MILLER(1)                                                            MILLER(1)
+
+
 
 1mNAME0m
        Miller -- like awk, sed, cut, join, and sort for name-indexed data such
@@ -338,6 +340,10 @@ This is simply a copy of what you should see on running `man mlr` at a command p
                                 recreate missing headers.
        --lazy-quotes            Accepts quotes appearing in unquoted fields, and
                                 non-doubled quotes appearing in quoted fields.
+       --no-auto-unsparsify     For CSV/TSV output: if the record keys change from
+                                one row to another, emit a blank line and a new
+                                header line. This is non-compliant with RFC 4180 but
+                                it helpful for heterogeneous data.
        --no-implicit-csv-header or --no-implicit-tsv-header
                                 Opposite of `--implicit-csv-header`. This is the
                                 default anyway -- the main use is for the flags to
@@ -468,8 +474,8 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        --jvstack                Put one key-value pair per line for JSON output
                                 (multi-line output). This is the default for JSON
                                 output format.
-       --no-jlistwrap           Wrap JSON output in outermost `[ ]`. This is the
-                                default for JSON Lines output format.
+       --no-jlistwrap           Do not wrap JSON output in outermost `[ ]`. This is
+                                the default for JSON Lines output format.
        --no-jvstack             Put objects/arrays all on one line for JSON output.
                                 This is the default for JSON Lines output format.
 
@@ -809,7 +815,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p
                markdown " "    N/A    "\n"
                nidx     " "    N/A    "\n"
                pprint   " "    N/A    "\n"
-               tsv      "     "    N/A    "\n"
+               tsv      "  "    N/A    "\n"
                xtab     "\n"   " "    "\n\n"
 
        --fs {string}            Specify FS for input and output.
@@ -3712,5 +3718,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-                                  2024-03-16                         4mMILLER24m(1)
+
+
+                                  2024-04-11                         MILLER(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 98b71fe68..65aa890cd 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1,4 +1,6 @@ -4mMILLER24m(1) 4mMILLER24m(1) +MILLER(1) MILLER(1) + + 1mNAME0m Miller -- like awk, sed, cut, join, and sort for name-indexed data such @@ -317,6 +319,10 @@ recreate missing headers. --lazy-quotes Accepts quotes appearing in unquoted fields, and non-doubled quotes appearing in quoted fields. + --no-auto-unsparsify For CSV/TSV output: if the record keys change from + one row to another, emit a blank line and a new + header line. This is non-compliant with RFC 4180 but + it helpful for heterogeneous data. --no-implicit-csv-header or --no-implicit-tsv-header Opposite of `--implicit-csv-header`. This is the default anyway -- the main use is for the flags to @@ -447,8 +453,8 @@ --jvstack Put one key-value pair per line for JSON output (multi-line output). This is the default for JSON output format. - --no-jlistwrap Wrap JSON output in outermost `[ ]`. This is the - default for JSON Lines output format. + --no-jlistwrap Do not wrap JSON output in outermost `[ ]`. This is + the default for JSON Lines output format. --no-jvstack Put objects/arrays all on one line for JSON output. This is the default for JSON Lines output format. @@ -788,7 +794,7 @@ markdown " " N/A "\n" nidx " " N/A "\n" pprint " " N/A "\n" - tsv " " N/A "\n" + tsv " " N/A "\n" xtab "\n" " " "\n\n" --fs {string} Specify FS for input and output. @@ -3691,4 +3697,6 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-03-16 4mMILLER24m(1) + + + 2024-04-11 MILLER(1) diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index 11f07f9af..e684ef1f6 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -123,6 +123,7 @@ These are flags which are applicable to CSV format. * `--headerless-csv-output or --ho or --headerless-tsv-output`: Print only CSV/TSV data lines; do not print CSV/TSV header lines. * `--implicit-csv-header or --headerless-csv-input or --hi or --implicit-tsv-header`: Use 1,2,3,... as field labels, rather than from line 1 of input files. Tip: combine with `label` to recreate missing headers. * `--lazy-quotes`: Accepts quotes appearing in unquoted fields, and non-doubled quotes appearing in quoted fields. +* `--no-auto-unsparsify`: For CSV/TSV output: if the record keys change from one row to another, emit a blank line and a new header line. This is non-compliant with RFC 4180 but it helpful for heterogeneous data. * `--no-implicit-csv-header or --no-implicit-tsv-header`: Opposite of `--implicit-csv-header`. This is the default anyway -- the main use is for the flags to `mlr join` if you have main file(s) which are headerless but you want to join in on a file which does have a CSV/TSV header. Then you could use `mlr --csv --implicit-csv-header join --no-implicit-csv-header -l your-join-in-with-header.csv ... your-headerless.csv`. * `--quote-all`: Force double-quoting of CSV fields. * `-N`: Keystroke-saver for `--implicit-csv-header --headerless-csv-output`. @@ -234,7 +235,7 @@ These are flags which are applicable to JSON output format. * `--jlistwrap or --jl`: Wrap JSON output in outermost `[ ]`. This is the default for JSON output format. * `--jvquoteall`: Force all JSON values -- recursively into lists and object -- to string. * `--jvstack`: Put one key-value pair per line for JSON output (multi-line output). This is the default for JSON output format. -* `--no-jlistwrap`: Wrap JSON output in outermost `[ ]`. This is the default for JSON Lines output format. +* `--no-jlistwrap`: Do not wrap JSON output in outermost `[ ]`. This is the default for JSON Lines output format. * `--no-jvstack`: Put objects/arrays all on one line for JSON output. This is the default for JSON Lines output format. ## Legacy flags diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 485343ee7..d6589c459 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -3433,14 +3433,14 @@ fields, optionally categorized by one or more fields. data/medium
-x_y_cov    0.00004257482082749404
-x_y_corr   0.0005042001844473328
-y_y_cov    0.08461122467974005
+x_y_cov    0.000042574820827444476
+x_y_corr   0.0005042001844467462
+y_y_cov    0.08461122467974003
 y_y_corr   1
-x2_xy_cov  0.041883822817793716
-x2_xy_corr 0.6301743420379936
-x2_y2_cov  -0.0003095372596253918
-x2_y2_corr -0.003424908876111875
+x2_xy_cov  0.04188382281779374
+x2_xy_corr 0.630174342037994
+x2_y2_cov  -0.00030953725962542085
+x2_y2_corr -0.0034249088761121966
 
@@ -3449,12 +3449,12 @@ x2_y2_corr -0.003424908876111875
   data/medium
 
-a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
-eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
-wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
-zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
-hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
+a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
+eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
+wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
+zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
+hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
 
Here's an example simple line-fit. The `x` and `y` @@ -3540,11 +3540,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.030097447953594 -upsec_count_pca_b 979.9883413064917 +upsec_count_pca_m -39.03009744795354 +upsec_count_pca_b 979.9883413064914 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.108529196302943 +donesec 25.10852919630297 ## step @@ -3821,9 +3821,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 -var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 -skewness - - 0 -0.0006899591185517494 -0.01784976012013298 +stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 +var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 +skewness - - 0 -0.0006899591185521965 -0.017849760120133784 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index e475aebf3..146f3a81e 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144457 -x_stddev 0.29029251511440074 +x_var 0.08426974433144456 +x_stddev 0.2902925151144007
diff --git a/man/manpage.txt b/man/manpage.txt
index 98b71fe68..65aa890cd 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -1,4 +1,6 @@
-4mMILLER24m(1)                                                            4mMILLER24m(1)
+MILLER(1)                                                            MILLER(1)
+
+
 
 1mNAME0m
        Miller -- like awk, sed, cut, join, and sort for name-indexed data such
@@ -317,6 +319,10 @@
                                 recreate missing headers.
        --lazy-quotes            Accepts quotes appearing in unquoted fields, and
                                 non-doubled quotes appearing in quoted fields.
+       --no-auto-unsparsify     For CSV/TSV output: if the record keys change from
+                                one row to another, emit a blank line and a new
+                                header line. This is non-compliant with RFC 4180 but
+                                it helpful for heterogeneous data.
        --no-implicit-csv-header or --no-implicit-tsv-header
                                 Opposite of `--implicit-csv-header`. This is the
                                 default anyway -- the main use is for the flags to
@@ -447,8 +453,8 @@
        --jvstack                Put one key-value pair per line for JSON output
                                 (multi-line output). This is the default for JSON
                                 output format.
-       --no-jlistwrap           Wrap JSON output in outermost `[ ]`. This is the
-                                default for JSON Lines output format.
+       --no-jlistwrap           Do not wrap JSON output in outermost `[ ]`. This is
+                                the default for JSON Lines output format.
        --no-jvstack             Put objects/arrays all on one line for JSON output.
                                 This is the default for JSON Lines output format.
 
@@ -788,7 +794,7 @@
                markdown " "    N/A    "\n"
                nidx     " "    N/A    "\n"
                pprint   " "    N/A    "\n"
-               tsv      "     "    N/A    "\n"
+               tsv      "  "    N/A    "\n"
                xtab     "\n"   " "    "\n\n"
 
        --fs {string}            Specify FS for input and output.
@@ -3691,4 +3697,6 @@
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-                                  2024-03-16                         4mMILLER24m(1)
+
+
+                                  2024-04-11                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index c45d59de0..153ef86bd 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2024-03-16
+.\"      Date: 2024-04-11
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2024-03-16" "\ \&" "\ \&"
+.TH "MILLER" "1" "2024-04-11" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -390,6 +390,10 @@ These are flags which are applicable to CSV format.
                          recreate missing headers.
 --lazy-quotes            Accepts quotes appearing in unquoted fields, and
                          non-doubled quotes appearing in quoted fields.
+--no-auto-unsparsify     For CSV/TSV output: if the record keys change from
+                         one row to another, emit a blank line and a new
+                         header line. This is non-compliant with RFC 4180 but
+                         it helpful for heterogeneous data.
 --no-implicit-csv-header or --no-implicit-tsv-header
                          Opposite of `--implicit-csv-header`. This is the
                          default anyway -- the main use is for the flags to
@@ -552,8 +556,8 @@ These are flags which are applicable to JSON output format.
 --jvstack                Put one key-value pair per line for JSON output
                          (multi-line output). This is the default for JSON
                          output format.
---no-jlistwrap           Wrap JSON output in outermost `[ ]`. This is the
-                         default for JSON Lines output format.
+--no-jlistwrap           Do not wrap JSON output in outermost `[ ]`. This is
+                         the default for JSON Lines output format.
 --no-jvstack             Put objects/arrays all on one line for JSON output.
                          This is the default for JSON Lines output format.
 .fi
diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go
index f4c455366..24f525fcc 100644
--- a/pkg/cli/option_parse.go
+++ b/pkg/cli/option_parse.go
@@ -460,7 +460,7 @@ var JSONOnlyFlagSection = FlagSection{
 
 		{
 			name: "--no-jlistwrap",
-			help: "Wrap JSON output in outermost `[ ]`. This is the default for JSON Lines output format.",
+			help: "Do not wrap JSON output in outermost `[ ]`. This is the default for JSON Lines output format.",
 			parser: func(args []string, argc int, pargi *int, options *TOptions) {
 				options.WriterOptions.WrapJSONOutputInOuterList = false
 				*pargi += 1
@@ -2392,6 +2392,15 @@ var CSVTSVOnlyFlagSection = FlagSection{
 			},
 		},
 
+		{
+			name: "--no-auto-unsparsify",
+			help: "For CSV/TSV output: if the record keys change from one row to another, emit a blank line and a new header line. This is non-compliant with RFC 4180 but it helpful for heterogeneous data.",
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.WriterOptions.NoAutoUnsparsify = true
+				*pargi += 1
+			},
+		},
+
 		{
 			name:     "--implicit-csv-header",
 			altNames: []string{"--headerless-csv-input", "--hi", "--implicit-tsv-header"},
diff --git a/pkg/cli/option_types.go b/pkg/cli/option_types.go
index 22f32658f..7cbece965 100644
--- a/pkg/cli/option_types.go
+++ b/pkg/cli/option_types.go
@@ -134,6 +134,40 @@ type TWriterOptions struct {
 	// (all but JSON) -- unless the user explicitly asks to suppress that.
 	AutoFlatten bool
 
+	// Default CSV/TSV:
+	//   a=1,b=2,c=3
+	//   a=4,b=5
+	// leads to
+	//   a,b,c
+	//   1 2,3
+	//   4,5, <-- note trailing empty field
+	// and
+	//   a=1,b=2,c=3
+	//   d=4,e=5
+	// leads to
+	//   fatal error
+	//
+	// With this flag:
+	//   a=1,b=2,c=3
+	//   a=4,b=5
+	// leads to
+	//   a,b,c
+	//   1 2,3
+	//
+	//   a,b
+	//   4,5
+	//
+	// and
+	//   a=1,b=2,c=3
+	//   d=4,e=5
+	// leads to
+	//   a,b,c
+	//   1,2,3
+	//
+	//   d,e
+	//   4,5
+	NoAutoUnsparsify bool
+
 	// For floating-point numbers: "" means use the Go default.
 	FPOFMT string
 
diff --git a/pkg/output/record_writer_csv.go b/pkg/output/record_writer_csv.go
index 032a57f03..efedd64bf 100644
--- a/pkg/output/record_writer_csv.go
+++ b/pkg/output/record_writer_csv.go
@@ -46,8 +46,8 @@ func (writer *RecordWriterCSV) Write(
 	bufferedOutputStream *bufio.Writer,
 	outputIsStdout bool,
 ) error {
-	// End of record stream: nothing special for this output format
 	if outrec == nil {
+		// End of record stream: nothing special for this output format
 		return nil
 	}
 
diff --git a/pkg/output/record_writer_csvlite.go b/pkg/output/record_writer_csvlite.go
index e3ecf9196..280abf38d 100644
--- a/pkg/output/record_writer_csvlite.go
+++ b/pkg/output/record_writer_csvlite.go
@@ -32,8 +32,8 @@ func (writer *RecordWriterCSVLite) Write(
 	bufferedOutputStream *bufio.Writer,
 	outputIsStdout bool,
 ) error {
-	// End of record stream: nothing special for this output format
 	if outrec == nil {
+		// End of record stream: nothing special for this output format
 		return nil
 	}
 
diff --git a/pkg/output/record_writer_dkvp.go b/pkg/output/record_writer_dkvp.go
index d7a516955..79ea8de05 100644
--- a/pkg/output/record_writer_dkvp.go
+++ b/pkg/output/record_writer_dkvp.go
@@ -25,8 +25,8 @@ func (writer *RecordWriterDKVP) Write(
 	bufferedOutputStream *bufio.Writer,
 	outputIsStdout bool,
 ) error {
-	// End of record stream: nothing special for this output format
 	if outrec == nil {
+		// End of record stream: nothing special for this output format
 		return nil
 	}
 
diff --git a/pkg/output/record_writer_nidx.go b/pkg/output/record_writer_nidx.go
index b8a5573c1..ac599e3a7 100644
--- a/pkg/output/record_writer_nidx.go
+++ b/pkg/output/record_writer_nidx.go
@@ -24,8 +24,8 @@ func (writer *RecordWriterNIDX) Write(
 	bufferedOutputStream *bufio.Writer,
 	outputIsStdout bool,
 ) error {
-	// End of record stream: nothing special for this output format
 	if outrec == nil {
+		// End of record stream: nothing special for this output format
 		return nil
 	}
 
diff --git a/pkg/output/record_writer_tsv.go b/pkg/output/record_writer_tsv.go
index 40f89350a..0e845be79 100644
--- a/pkg/output/record_writer_tsv.go
+++ b/pkg/output/record_writer_tsv.go
@@ -40,8 +40,8 @@ func (writer *RecordWriterTSV) Write(
 	bufferedOutputStream *bufio.Writer,
 	outputIsStdout bool,
 ) error {
-	// End of record stream: nothing special for this output format
 	if outrec == nil {
+		// End of record stream: nothing special for this output format
 		return nil
 	}
 
diff --git a/pkg/output/record_writer_xtab.go b/pkg/output/record_writer_xtab.go
index cd014ddce..bfacdde95 100644
--- a/pkg/output/record_writer_xtab.go
+++ b/pkg/output/record_writer_xtab.go
@@ -48,8 +48,8 @@ func (writer *RecordWriterXTAB) Write(
 	bufferedOutputStream *bufio.Writer,
 	outputIsStdout bool,
 ) error {
-	// End of record stream: nothing special for this output format
 	if outrec == nil {
+		// End of record stream: nothing special for this output format
 		return nil
 	}
 

From 12480c4ab5bc2b321acb94f2e19f6e646b53e23b Mon Sep 17 00:00:00 2001
From: camcui <166618273+camcui@users.noreply.github.com>
Date: Fri, 12 Apr 2024 21:38:31 +0800
Subject: [PATCH 153/456] chore: fix function name in comment (#1543)

Signed-off-by: camcui 
---
 pkg/bifs/regex.go          | 2 +-
 pkg/dsl/cst/hofs.go        | 2 +-
 pkg/mlrval/mlrval_infer.go | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pkg/bifs/regex.go b/pkg/bifs/regex.go
index 331b07e76..25e0fe5c9 100644
--- a/pkg/bifs/regex.go
+++ b/pkg/bifs/regex.go
@@ -201,7 +201,7 @@ func BIF_string_matches_regexp(input1, input2 *mlrval.Mlrval) (retval *mlrval.Ml
 	return mlrval.FromBool(boolOutput), captures
 }
 
-// BIF_string_matches_regexp implements the !=~ operator.
+// BIF_string_does_not_match_regexp implements the !=~ operator.
 func BIF_string_does_not_match_regexp(input1, input2 *mlrval.Mlrval) (retval *mlrval.Mlrval, captures []string) {
 	output, captures := BIF_string_matches_regexp(input1, input2)
 	if output.IsBool() {
diff --git a/pkg/dsl/cst/hofs.go b/pkg/dsl/cst/hofs.go
index 87edc01b2..a7d94a7c2 100644
--- a/pkg/dsl/cst/hofs.go
+++ b/pkg/dsl/cst/hofs.go
@@ -875,7 +875,7 @@ func sortAF(
 	return mlrval.FromArray(outputArray)
 }
 
-// sortAF implements sort on arrays with callback UDF.
+// sortMF implements sort on arrays with callback UDF.
 func sortMF(
 	input1 *mlrval.Mlrval,
 	input2 *mlrval.Mlrval,
diff --git a/pkg/mlrval/mlrval_infer.go b/pkg/mlrval/mlrval_infer.go
index ada3792bd..5be0abef2 100644
--- a/pkg/mlrval/mlrval_infer.go
+++ b/pkg/mlrval/mlrval_infer.go
@@ -34,7 +34,7 @@ func SetInferrerOctalAsInt() {
 	packageLevelInferrer = inferWithOctalAsInt
 }
 
-// SetInferrerStringOnly is for mlr -A.
+// SetInferrerIntAsFloat is for mlr -F.
 func SetInferrerIntAsFloat() {
 	packageLevelInferrer = inferWithIntAsFloat
 }

From cb5265e7961d0ad49462ff5914c1272bdffa9f06 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 19 Apr 2024 09:32:39 -0400
Subject: [PATCH 154/456] Bump actions/upload-artifact from 4.3.1 to 4.3.2
 (#1547)

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.1 to 4.3.2.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/5d5d22a31266ced268874388b861e4b58bb5c2f3...1746f4ab65b179e0ea60a494b83293b640dd5bba)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/go.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 9f87a9b99..1fe754faf 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -41,7 +41,7 @@ jobs:
       if: matrix.os == 'windows-latest'
       run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}}
 
-    - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+    - uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba
       with:
         name: mlr-${{matrix.os}}
         path: bin/${{matrix.os}}/*

From b3b097c40d6d75d0e6b525976f71e3684be0e61d Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sun, 21 Apr 2024 21:44:07 -0400
Subject: [PATCH 155/456] Try to build readthedocs `.epub` and `.pdf` (#1548)

---
 .readthedocs.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 8ad896314..57e91bb94 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -17,3 +17,5 @@ python:
 
 mkdocs:
   configuration: docs/mkdocs.yml
+
+formats: all

From 004fed3279200279b6ed2cdd2c2fc6bf96e59c5e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 22 Apr 2024 08:23:06 -0400
Subject: [PATCH 156/456] Bump actions/checkout from 4.1.2 to 4.1.3 (#1550)

Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.2 to 4.1.3.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/9bb56186c3b09b4f86b1c65136769dd318469633...1d96c772d19495a3b5c517cd2bc0cb401ea0529f)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 2 +-
 .github/workflows/codespell.yml       | 2 +-
 .github/workflows/go.yml              | 2 +-
 .github/workflows/release.yml         | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 7e4d9296f..efe4d986f 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -36,7 +36,7 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+      uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
index 0e7ab4ed1..a59301976 100644
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -21,7 +21,7 @@ jobs:
     steps:
       # Check out the code base
       - name: Check out code
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f
         with:
           # Full git history is needed to get a proper list of changed files within `super-linter`
           fetch-depth: 0
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 1fe754faf..483a2c359 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -15,7 +15,7 @@ jobs:
         os: [ubuntu-latest, macos-latest, windows-latest]
 
     steps:
-    - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+    - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f
 
     - name: Set up Go
       uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 35982fb06..8359527df 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -23,7 +23,7 @@ jobs:
         id: go
 
       - name: Check out code into the Go module directory
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f
         with:
           fetch-depth: 0
 

From 0e3061996682bf8096ae97c1135cccf28ef64496 Mon Sep 17 00:00:00 2001
From: forcedebug <167591285+forcedebug@users.noreply.github.com>
Date: Tue, 23 Apr 2024 00:16:30 +0800
Subject: [PATCH 157/456] Fix mismatched method names in comments (#1549)

Signed-off-by: forcedebug 
---
 pkg/transformers/subs.go    | 4 ++--
 pkg/transformers/summary.go | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pkg/transformers/subs.go b/pkg/transformers/subs.go
index e57469d91..578def1c0 100644
--- a/pkg/transformers/subs.go
+++ b/pkg/transformers/subs.go
@@ -308,7 +308,7 @@ func (tr *TransformerSubs) fieldAcceptorByNames(
 	return tr.fieldNamesSet[fieldName]
 }
 
-// fieldAcceptorByNames implements -r
+// fieldAcceptorByRegexes implements -r
 func (tr *TransformerSubs) fieldAcceptorByRegexes(
 	fieldName string,
 ) bool {
@@ -320,7 +320,7 @@ func (tr *TransformerSubs) fieldAcceptorByRegexes(
 	return false
 }
 
-// fieldAcceptorByNames implements -a
+// fieldAcceptorAll implements -a
 func (tr *TransformerSubs) fieldAcceptorAll(
 	fieldName string,
 ) bool {
diff --git a/pkg/transformers/summary.go b/pkg/transformers/summary.go
index 1b1db3df4..56d95b2d4 100644
--- a/pkg/transformers/summary.go
+++ b/pkg/transformers/summary.go
@@ -427,7 +427,7 @@ func (tr *TransformerSummary) emitTransposed(
 
 // ----------------------------------------------------------------
 
-// maybeEmitPercentileNameTransposed is a helper method for emitTransposed,
+// maybeEmitAccumulatorTransposed is a helper method for emitTransposed,
 // for "count", "sum", "mean", etc.
 func (tr *TransformerSummary) maybeEmitAccumulatorTransposed(
 	oracs *list.List, // list of *types.RecordAndContext

From 97debc3030428552b718448b408ac9881c1f8bd8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Apr 2024 08:09:40 -0400
Subject: [PATCH 158/456] Bump actions/upload-artifact from 4.3.2 to 4.3.3
 (#1551)

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.2 to 4.3.3.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/1746f4ab65b179e0ea60a494b83293b640dd5bba...65462800fd760344b1a7b4382951275a0abb4808)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/go.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 483a2c359..6ddffe51f 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -41,7 +41,7 @@ jobs:
       if: matrix.os == 'windows-latest'
       run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}}
 
-    - uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba
+    - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
       with:
         name: mlr-${{matrix.os}}
         path: bin/${{matrix.os}}/*

From 4ee3a59aabeb102f125e1256f8cf69a8f869c5a9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 25 Apr 2024 09:33:33 -0400
Subject: [PATCH 159/456] Bump actions/checkout from 4.1.3 to 4.1.4 (#1552)

Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.3 to 4.1.4.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/1d96c772d19495a3b5c517cd2bc0cb401ea0529f...0ad4b8fadaa221de15dcec353f45205ec38ea70b)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 2 +-
 .github/workflows/codespell.yml       | 2 +-
 .github/workflows/go.yml              | 2 +-
 .github/workflows/release.yml         | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index efe4d986f..058bfdc75 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -36,7 +36,7 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f
+      uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
index a59301976..c37d980fa 100644
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -21,7 +21,7 @@ jobs:
     steps:
       # Check out the code base
       - name: Check out code
-        uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f
+        uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
         with:
           # Full git history is needed to get a proper list of changed files within `super-linter`
           fetch-depth: 0
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 6ddffe51f..812f75b9d 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -15,7 +15,7 @@ jobs:
         os: [ubuntu-latest, macos-latest, windows-latest]
 
     steps:
-    - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f
+    - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
 
     - name: Set up Go
       uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 8359527df..dd6832c28 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -23,7 +23,7 @@ jobs:
         id: go
 
       - name: Check out code into the Go module directory
-        uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f
+        uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
         with:
           fetch-depth: 0
 

From 4e6f747a23ad6c24a57d8e3f64f395d69360ed03 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 3 May 2024 07:56:16 -0400
Subject: [PATCH 160/456] Bump actions/setup-go from 5.0.0 to 5.0.1 (#1553)

Bumps [actions/setup-go](https://github.com/actions/setup-go) from 5.0.0 to 5.0.1.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](https://github.com/actions/setup-go/compare/0c52d547c9bc32b1aa3301fd7a9cb496313a4491...cdcb36043654635271a94b9a6d1392de5bb323a7)

---
updated-dependencies:
- dependency-name: actions/setup-go
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/go.yml      | 2 +-
 .github/workflows/release.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 812f75b9d..5b1d51546 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -18,7 +18,7 @@ jobs:
     - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
 
     - name: Set up Go
-      uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491
+      uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7
       with:
         go-version: 1.19
 
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index dd6832c28..14bb5227c 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -17,7 +17,7 @@ jobs:
     runs-on: ${{ matrix.platform }}
     steps:
       - name: Set up Go
-        uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491
+        uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7
         with:
           go-version: ${{ env.GO_VERSION }}
         id: go

From 729365d759e46d0f4065f7046d0d8851b77e2dc9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 6 May 2024 08:10:55 -0400
Subject: [PATCH 161/456] Bump golang.org/x/sys from 0.19.0 to 0.20.0 (#1554)

Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.19.0 to 0.20.0.
- [Commits](https://github.com/golang/sys/compare/v0.19.0...v0.20.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 2db3a9b43..104059338 100644
--- a/go.mod
+++ b/go.mod
@@ -26,7 +26,7 @@ require (
 	github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4
 	github.com/pkg/profile v1.7.0
 	github.com/stretchr/testify v1.9.0
-	golang.org/x/sys v0.19.0
+	golang.org/x/sys v0.20.0
 	golang.org/x/term v0.19.0
 	golang.org/x/text v0.14.0
 )
diff --git a/go.sum b/go.sum
index 98897fced..10ce58b3a 100644
--- a/go.sum
+++ b/go.sum
@@ -40,8 +40,8 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT
 github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
-golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
+golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q=
 golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk=
 golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=

From f93089be3f9082a3b54092e2ff4f836fc84eba98 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 6 May 2024 08:11:20 -0400
Subject: [PATCH 162/456] Bump golang.org/x/text from 0.14.0 to 0.15.0 (#1556)

Bumps [golang.org/x/text](https://github.com/golang/text) from 0.14.0 to 0.15.0.
- [Release notes](https://github.com/golang/text/releases)
- [Commits](https://github.com/golang/text/compare/v0.14.0...v0.15.0)

---
updated-dependencies:
- dependency-name: golang.org/x/text
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 104059338..9a058523f 100644
--- a/go.mod
+++ b/go.mod
@@ -28,7 +28,7 @@ require (
 	github.com/stretchr/testify v1.9.0
 	golang.org/x/sys v0.20.0
 	golang.org/x/term v0.19.0
-	golang.org/x/text v0.14.0
+	golang.org/x/text v0.15.0
 )
 
 require (
diff --git a/go.sum b/go.sum
index 10ce58b3a..7b3f3c7fd 100644
--- a/go.sum
+++ b/go.sum
@@ -44,8 +44,8 @@ golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
 golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q=
 golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk=
-golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
-golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
+golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

From e0e7f3c7a95639ca878d7bf855524b2569a3eadc Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 6 May 2024 23:04:08 -0400
Subject: [PATCH 163/456] Bump golang.org/x/term from 0.19.0 to 0.20.0 (#1555)

Bumps [golang.org/x/term](https://github.com/golang/term) from 0.19.0 to 0.20.0.
- [Commits](https://github.com/golang/term/compare/v0.19.0...v0.20.0)

---
updated-dependencies:
- dependency-name: golang.org/x/term
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 9a058523f..bf4fbf827 100644
--- a/go.mod
+++ b/go.mod
@@ -27,7 +27,7 @@ require (
 	github.com/pkg/profile v1.7.0
 	github.com/stretchr/testify v1.9.0
 	golang.org/x/sys v0.20.0
-	golang.org/x/term v0.19.0
+	golang.org/x/term v0.20.0
 	golang.org/x/text v0.15.0
 )
 
diff --git a/go.sum b/go.sum
index 7b3f3c7fd..1ae245763 100644
--- a/go.sum
+++ b/go.sum
@@ -42,8 +42,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
 golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q=
-golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk=
+golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw=
+golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
 golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
 golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=

From 956e65c118e89047228b1cb9580ea7009abab689 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 7 May 2024 08:32:58 -0400
Subject: [PATCH 164/456] Bump actions/checkout from 4.1.4 to 4.1.5 (#1557)

Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.4 to 4.1.5.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/0ad4b8fadaa221de15dcec353f45205ec38ea70b...44c2b7a8a4ea60a981eaca3cf939b5f4305c123b)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 2 +-
 .github/workflows/codespell.yml       | 2 +-
 .github/workflows/go.yml              | 2 +-
 .github/workflows/release.yml         | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 058bfdc75..97d314c49 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -36,7 +36,7 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
+      uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
index c37d980fa..619ed21bf 100644
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -21,7 +21,7 @@ jobs:
     steps:
       # Check out the code base
       - name: Check out code
-        uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
+        uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b
         with:
           # Full git history is needed to get a proper list of changed files within `super-linter`
           fetch-depth: 0
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 5b1d51546..1e3bc3093 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -15,7 +15,7 @@ jobs:
         os: [ubuntu-latest, macos-latest, windows-latest]
 
     steps:
-    - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
+    - uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b
 
     - name: Set up Go
       uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 14bb5227c..ba435d577 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -23,7 +23,7 @@ jobs:
         id: go
 
       - name: Check out code into the Go module directory
-        uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
+        uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b
         with:
           fetch-depth: 0
 

From 5ac48516f71560357c96fa9d09f4c872cc3eff73 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Thu, 9 May 2024 18:39:44 -0400
Subject: [PATCH 165/456] Add a `stat` DSL function (#1560)

* Add a `stat` DSL function [WIP]

* artifacts from `make dev`

* regression test
---
 docs/src/manpage.md                         | 16 ++++++++++++--
 docs/src/manpage.txt                        | 16 ++++++++++++--
 docs/src/reference-dsl-builtin-functions.md | 17 ++++++++++++++-
 man/manpage.txt                             | 16 ++++++++++++--
 man/mlr.1                                   | 24 ++++++++++++++++++---
 pkg/bifs/system.go                          | 22 +++++++++++++++++++
 pkg/dsl/cst/builtin_function_manager.go     | 17 +++++++++++++++
 pkg/mlrval/mlrval_new.go                    |  9 ++++++++
 test/cases/dsl-stat/0001/cmd                |  1 +
 test/cases/dsl-stat/0001/experr             |  0
 test/cases/dsl-stat/0001/expout             | 12 +++++++++++
 test/cases/dsl-stat/0001/input.csv          |  3 +++
 test/cases/dsl-stat/0001/mlr                |  3 +++
 13 files changed, 146 insertions(+), 10 deletions(-)
 create mode 100644 test/cases/dsl-stat/0001/cmd
 create mode 100644 test/cases/dsl-stat/0001/experr
 create mode 100644 test/cases/dsl-stat/0001/expout
 create mode 100644 test/cases/dsl-stat/0001/input.csv
 create mode 100644 test/cases/dsl-stat/0001/mlr

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index a9620eefc..3321f1798 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -225,7 +225,7 @@ MILLER(1)                                                            MILLER(1)
        percentiles pow qnorm reduce regextract regextract_or_else rightpad round
        roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
        select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-       splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
+       splitax splitkv splitkvx splitnv splitnvx sqrt ssub stat stddev strfntime
        strfntime_local strftime strftime_local string strip strlen strmatch strmatchx
        strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
        sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
@@ -2990,6 +2990,18 @@ MILLER(1)                                                            MILLER(1)
        Example:
        ssub("abc.def", ".", "X") gives "abcXdef"
 
+   1mstat0m
+        (class=system #args=1) Returns a map containing information about the provided path: "name" with string value, "size" as decimal int value, "mode" as octal int value, "modtime" as int-valued epoch seconds, and "isdir" as boolean value.
+       Examples:
+       stat("./mlr") gives {
+         "name": "mlr",
+         "size": 38391584,
+         "mode": 0755,
+         "modtime": 1715207874,
+         "isdir": false
+       }
+       stat("./mlr")["size"] gives 38391584
+
    1mstddev0m
         (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
        Example:
@@ -3720,5 +3732,5 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2024-04-11                         MILLER(1)
+                                  2024-05-09                         MILLER(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 65aa890cd..5ce10b960 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -204,7 +204,7 @@ MILLER(1) MILLER(1) percentiles pow qnorm reduce regextract regextract_or_else rightpad round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita - splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime + splitax splitkv splitkvx splitnv splitnvx sqrt ssub stat stddev strfntime strfntime_local strftime strftime_local string strip strlen strmatch strmatchx strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper @@ -2969,6 +2969,18 @@ MILLER(1) MILLER(1) Example: ssub("abc.def", ".", "X") gives "abcXdef" + 1mstat0m + (class=system #args=1) Returns a map containing information about the provided path: "name" with string value, "size" as decimal int value, "mode" as octal int value, "modtime" as int-valued epoch seconds, and "isdir" as boolean value. + Examples: + stat("./mlr") gives { + "name": "mlr", + "size": 38391584, + "mode": 0755, + "modtime": 1715207874, + "isdir": false + } + stat("./mlr")["size"] gives 38391584 + 1mstddev0m (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types. Example: @@ -3699,4 +3711,4 @@ MILLER(1) MILLER(1) - 2024-04-11 MILLER(1) + 2024-05-09 MILLER(1) diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index dbfafc04f..529eddd77 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -76,7 +76,7 @@ is 2. Unary operators such as `!` and `~` show argument-count of 1; the ternary * [**Math functions**](#math-functions): [abs](#abs), [acos](#acos), [acosh](#acosh), [asin](#asin), [asinh](#asinh), [atan](#atan), [atan2](#atan2), [atanh](#atanh), [cbrt](#cbrt), [ceil](#ceil), [cos](#cos), [cosh](#cosh), [erf](#erf), [erfc](#erfc), [exp](#exp), [expm1](#expm1), [floor](#floor), [invqnorm](#invqnorm), [log](#log), [log10](#log10), [log1p](#log1p), [logifit](#logifit), [max](#max), [min](#min), [qnorm](#qnorm), [round](#round), [roundm](#roundm), [sgn](#sgn), [sin](#sin), [sinh](#sinh), [sqrt](#sqrt), [tan](#tan), [tanh](#tanh), [urand](#urand), [urand32](#urand32), [urandelement](#urandelement), [urandint](#urandint), [urandrange](#urandrange). * [**Stats functions**](#stats-functions): [antimode](#antimode), [count](#count), [distinct_count](#distinct_count), [kurtosis](#kurtosis), [maxlen](#maxlen), [mean](#mean), [meaneb](#meaneb), [median](#median), [minlen](#minlen), [mode](#mode), [null_count](#null_count), [percentile](#percentile), [percentiles](#percentiles), [skewness](#skewness), [sort_collection](#sort_collection), [stddev](#stddev), [sum](#sum), [sum2](#sum2), [sum3](#sum3), [sum4](#sum4), [variance](#variance). * [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [contains](#contains), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [strmatch](#strmatch), [strmatchx](#strmatchx), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). -* [**System functions**](#system-functions): [exec](#exec), [hostname](#hostname), [os](#os), [system](#system), [version](#version). +* [**System functions**](#system-functions): [exec](#exec), [hostname](#hostname), [os](#os), [stat](#stat), [system](#system), [version](#version). * [**Time functions**](#time-functions): [dhms2fsec](#dhms2fsec), [dhms2sec](#dhms2sec), [fsec2dhms](#fsec2dhms), [fsec2hms](#fsec2hms), [gmt2localtime](#gmt2localtime), [gmt2nsec](#gmt2nsec), [gmt2sec](#gmt2sec), [hms2fsec](#hms2fsec), [hms2sec](#hms2sec), [localtime2gmt](#localtime2gmt), [localtime2nsec](#localtime2nsec), [localtime2sec](#localtime2sec), [nsec2gmt](#nsec2gmt), [nsec2gmtdate](#nsec2gmtdate), [nsec2localdate](#nsec2localdate), [nsec2localtime](#nsec2localtime), [sec2dhms](#sec2dhms), [sec2gmt](#sec2gmt), [sec2gmtdate](#sec2gmtdate), [sec2hms](#sec2hms), [sec2localdate](#sec2localdate), [sec2localtime](#sec2localtime), [strfntime](#strfntime), [strfntime_local](#strfntime_local), [strftime](#strftime), [strftime_local](#strftime_local), [strpntime](#strpntime), [strpntime_local](#strpntime_local), [strptime](#strptime), [strptime_local](#strptime_local), [sysntime](#sysntime), [systime](#systime), [systimeint](#systimeint), [upntime](#upntime), [uptime](#uptime). * [**Typing functions**](#typing-functions): [asserting_absent](#asserting_absent), [asserting_array](#asserting_array), [asserting_bool](#asserting_bool), [asserting_boolean](#asserting_boolean), [asserting_empty](#asserting_empty), [asserting_empty_map](#asserting_empty_map), [asserting_error](#asserting_error), [asserting_float](#asserting_float), [asserting_int](#asserting_int), [asserting_map](#asserting_map), [asserting_nonempty_map](#asserting_nonempty_map), [asserting_not_array](#asserting_not_array), [asserting_not_empty](#asserting_not_empty), [asserting_not_map](#asserting_not_map), [asserting_not_null](#asserting_not_null), [asserting_null](#asserting_null), [asserting_numeric](#asserting_numeric), [asserting_present](#asserting_present), [asserting_string](#asserting_string), [is_absent](#is_absent), [is_array](#is_array), [is_bool](#is_bool), [is_boolean](#is_boolean), [is_empty](#is_empty), [is_empty_map](#is_empty_map), [is_error](#is_error), [is_float](#is_float), [is_int](#is_int), [is_map](#is_map), [is_nan](#is_nan), [is_nonempty_map](#is_nonempty_map), [is_not_array](#is_not_array), [is_not_empty](#is_not_empty), [is_not_map](#is_not_map), [is_not_null](#is_not_null), [is_null](#is_null), [is_numeric](#is_numeric), [is_present](#is_present), [is_string](#is_string), [typeof](#typeof). @@ -1502,6 +1502,21 @@ os (class=system #args=0) Returns the operating-system name as a string. +### stat +
+stat  (class=system #args=1) Returns a map containing information about the provided path: "name" with string value, "size" as decimal int value, "mode" as octal int value, "modtime" as int-valued epoch seconds, and "isdir" as boolean value.
+Examples:
+stat("./mlr") gives {
+  "name": "mlr",
+  "size": 38391584,
+  "mode": 0755,
+  "modtime": 1715207874,
+  "isdir": false
+}
+stat("./mlr")["size"] gives 38391584
+
+ + ### system
 system  (class=system #args=1) Run command string, yielding its stdout minus final carriage return.
diff --git a/man/manpage.txt b/man/manpage.txt
index 65aa890cd..5ce10b960 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -204,7 +204,7 @@ MILLER(1)                                                            MILLER(1)
        percentiles pow qnorm reduce regextract regextract_or_else rightpad round
        roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
        select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-       splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
+       splitax splitkv splitkvx splitnv splitnvx sqrt ssub stat stddev strfntime
        strfntime_local strftime strftime_local string strip strlen strmatch strmatchx
        strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
        sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
@@ -2969,6 +2969,18 @@ MILLER(1)                                                            MILLER(1)
        Example:
        ssub("abc.def", ".", "X") gives "abcXdef"
 
+   1mstat0m
+        (class=system #args=1) Returns a map containing information about the provided path: "name" with string value, "size" as decimal int value, "mode" as octal int value, "modtime" as int-valued epoch seconds, and "isdir" as boolean value.
+       Examples:
+       stat("./mlr") gives {
+         "name": "mlr",
+         "size": 38391584,
+         "mode": 0755,
+         "modtime": 1715207874,
+         "isdir": false
+       }
+       stat("./mlr")["size"] gives 38391584
+
    1mstddev0m
         (class=stats #args=1) Returns the sample standard deviation of values in an array or map. Returns empty string AKA void for array/map of length less than two; returns error for non-array/non-map types.
        Example:
@@ -3699,4 +3711,4 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2024-04-11                         MILLER(1)
+                                  2024-05-09                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 153ef86bd..d660b99a8 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2024-04-11
+.\"      Date: 2024-05-09
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2024-04-11" "\ \&" "\ \&"
+.TH "MILLER" "1" "2024-05-09" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -251,7 +251,7 @@ nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
 percentiles pow qnorm reduce regextract regextract_or_else rightpad round
 roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
 select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
+splitax splitkv splitkvx splitnv splitnvx sqrt ssub stat stddev strfntime
 strfntime_local strftime strftime_local string strip strlen strmatch strmatchx
 strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
 sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
@@ -4602,6 +4602,24 @@ ssub("abc.def", ".", "X") gives "abcXdef"
 .fi
 .if n \{\
 .RE
+.SS "stat"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=system #args=1) Returns a map containing information about the provided path: "name" with string value, "size" as decimal int value, "mode" as octal int value, "modtime" as int-valued epoch seconds, and "isdir" as boolean value.
+Examples:
+stat("./mlr") gives {
+  "name": "mlr",
+  "size": 38391584,
+  "mode": 0755,
+  "modtime": 1715207874,
+  "isdir": false
+}
+stat("./mlr")["size"] gives 38391584
+.fi
+.if n \{\
+.RE
 .SS "stddev"
 .if n \{\
 .RS 0
diff --git a/pkg/bifs/system.go b/pkg/bifs/system.go
index d56f0bb66..e2044b4b8 100644
--- a/pkg/bifs/system.go
+++ b/pkg/bifs/system.go
@@ -102,3 +102,25 @@ func BIF_exec(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval {
 	outputString := strings.TrimRight(string(outputBytes), "\n")
 	return mlrval.FromString(outputString)
 }
+
+func BIF_stat(input1 *mlrval.Mlrval) *mlrval.Mlrval {
+	if !input1.IsStringOrVoid() {
+		return mlrval.FromNotStringError("system", input1)
+	}
+	path := input1.AcquireStringValue()
+
+	fileInfo, err := os.Stat(path)
+
+	if err != nil {
+		return mlrval.FromError(err)
+	}
+
+	output := mlrval.NewMlrmap()
+	output.PutReference("name", mlrval.FromString(fileInfo.Name()))
+	output.PutReference("size", mlrval.FromInt(fileInfo.Size()))
+	output.PutReference("mode", mlrval.FromIntShowingOctal(int64(fileInfo.Mode())))
+	output.PutReference("modtime", mlrval.FromInt(fileInfo.ModTime().UTC().Unix()))
+	output.PutReference("isdir", mlrval.FromBool(fileInfo.IsDir()))
+
+	return mlrval.FromMap(output)
+}
diff --git a/pkg/dsl/cst/builtin_function_manager.go b/pkg/dsl/cst/builtin_function_manager.go
index 90f854fe0..ec4bfa980 100644
--- a/pkg/dsl/cst/builtin_function_manager.go
+++ b/pkg/dsl/cst/builtin_function_manager.go
@@ -2487,6 +2487,23 @@ Run a command via executable, path, args and environment, yielding its stdout mi
 			variadicFunc: bifs.BIF_exec,
 		},
 
+		{
+			name:      "stat",
+			class:     FUNC_CLASS_SYSTEM,
+			help:      `Returns a map containing information about the provided path: "name" with string value, "size" as decimal int value, "mode" as octal int value, "modtime" as int-valued epoch seconds, and "isdir" as boolean value.`,
+			unaryFunc: bifs.BIF_stat,
+			examples: []string{
+				`stat("./mlr") gives {`,
+				`  "name": "mlr",`,
+				`  "size": 38391584,`,
+				`  "mode": 0755,`,
+				`  "modtime": 1715207874,`,
+				`  "isdir": false`,
+				`}`,
+				`stat("./mlr")["size"] gives 38391584`,
+			},
+		},
+
 		{
 			name:     "version",
 			class:    FUNC_CLASS_SYSTEM,
diff --git a/pkg/mlrval/mlrval_new.go b/pkg/mlrval/mlrval_new.go
index cb548c3d0..c4109af26 100644
--- a/pkg/mlrval/mlrval_new.go
+++ b/pkg/mlrval/mlrval_new.go
@@ -197,6 +197,15 @@ func FromInt(input int64) *Mlrval {
 	}
 }
 
+func FromIntShowingOctal(input int64) *Mlrval {
+	return &Mlrval{
+		mvtype:        MT_INT,
+		printrepValid: true,
+		printrep:      fmt.Sprintf("0%o", input),
+		intf:          input,
+	}
+}
+
 // TryFromIntString is used by the mlrval Formatter (fmtnum DSL function,
 // format-values verb, etc).  Each mlrval has printrep and a printrepValid for
 // its original string, then a type-code like MT_INT or MT_FLOAT, and
diff --git a/test/cases/dsl-stat/0001/cmd b/test/cases/dsl-stat/0001/cmd
new file mode 100644
index 000000000..94b141d0d
--- /dev/null
+++ b/test/cases/dsl-stat/0001/cmd
@@ -0,0 +1 @@
+mlr --icsv --ojson put -f ${CASEDIR}/mlr ${CASEDIR}/input.csv
diff --git a/test/cases/dsl-stat/0001/experr b/test/cases/dsl-stat/0001/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/dsl-stat/0001/expout b/test/cases/dsl-stat/0001/expout
new file mode 100644
index 000000000..dcb25f94b
--- /dev/null
+++ b/test/cases/dsl-stat/0001/expout
@@ -0,0 +1,12 @@
+[
+{
+  "path": "test/cases/dsl-stat/0001/input.csv",
+  "name": "input.csv",
+  "isdir": false
+},
+{
+  "path": "test/cases/dsl-stat/0001/",
+  "name": "0001",
+  "isdir": true
+}
+]
diff --git a/test/cases/dsl-stat/0001/input.csv b/test/cases/dsl-stat/0001/input.csv
new file mode 100644
index 000000000..d2731b359
--- /dev/null
+++ b/test/cases/dsl-stat/0001/input.csv
@@ -0,0 +1,3 @@
+path
+test/cases/dsl-stat/0001/input.csv
+test/cases/dsl-stat/0001/
diff --git a/test/cases/dsl-stat/0001/mlr b/test/cases/dsl-stat/0001/mlr
new file mode 100644
index 000000000..195f1c6a7
--- /dev/null
+++ b/test/cases/dsl-stat/0001/mlr
@@ -0,0 +1,3 @@
+s = stat($path);
+$name  = s["name"];
+$isdir = s["isdir"];

From 16ab1991948213f88f66027d1a7f92e93e183b11 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 11 May 2024 15:55:27 -0400
Subject: [PATCH 166/456] Add `mad` accumulator for `stats1` DSL function
 (#1561)

* Add `mad` accumulator for `stats1` DSL function

* regression files

* make dev output
---
 docs/src/data-diving-examples.md              | 46 +++++++++----------
 docs/src/manpage.md                           | 12 ++---
 docs/src/manpage.txt                          | 12 ++---
 docs/src/reference-verbs.md                   | 40 ++++++++--------
 docs/src/two-pass-algorithms.md               |  4 +-
 man/manpage.txt                               | 12 ++---
 man/mlr.1                                     |  6 ++-
 pkg/transformers/utils/stats1_accumulators.go | 46 +++++++++++++++++++
 test/cases/cli-help/0001/expout               |  2 +
 test/cases/verb-stats1/0001/cmd               |  2 +-
 test/cases/verb-stats1/0001/expout            |  3 ++
 11 files changed, 117 insertions(+), 68 deletions(-)

diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md
index 39738f193..100716ec2 100644
--- a/docs/src/data-diving-examples.md
+++ b/docs/src/data-diving-examples.md
@@ -160,11 +160,11 @@ CITRUS COUNTY       1332.9                 79974.9                483785.1
   stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
 
-tiv_2011_tiv_2012_corr  0.9730497632351692
-tiv_2011_tiv_2012_ols_m 0.9835583980337723
-tiv_2011_tiv_2012_ols_b 433854.6428968317
+tiv_2011_tiv_2012_corr  0.9730497632351701
+tiv_2011_tiv_2012_ols_m 0.9835583980337732
+tiv_2011_tiv_2012_ols_b 433854.6428968301
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320189
+tiv_2011_tiv_2012_r2    0.9468258417320204
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199866178
+0.1334180491027861 -0.011319841199852926
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887236   -0.01856553658708754
-orange   square   0.17685855992752927   -0.07104431573806054
- green   circle   0.05764419437577255    0.01179572988801509
-   red   square   0.05574477124893523 -0.0006801456507510942
-yellow triangle   0.04457273771962798   0.024604310103081825
-yellow   square   0.04379172927296089   -0.04462197201631237
-purple   circle   0.03587354936895086     0.1341133954140899
-  blue   square   0.03241153095761164  -0.053507648119643196
-  blue triangle  0.015356427073158766 -0.0006089997461435399
-orange   circle  0.010518953877704048   -0.16279397329279383
-   red triangle   0.00809782571528034   0.012486621357942596
-purple triangle  0.005155190909099334  -0.045057909256220656
-purple   square -0.025680276963377404    0.05769429647930396
- green   square   -0.0257760734502851  -0.003265173252087127
-orange triangle -0.030456661186085785    -0.1318699981926352
-yellow   circle  -0.06477331572781474    0.07369449819706045
-  blue   circle  -0.10234761901929677  -0.030528539069837757
- green triangle  -0.10901825107358765   -0.04848782060162929
+   red   circle    0.9807984401887242  -0.018565536587084836
+orange   square   0.17685855992752933   -0.07104431573805543
+ green   circle   0.05764419437577257   0.011795729888018455
+   red   square    0.0557447712489348 -0.0006801456507506415
+yellow triangle    0.0445727377196281   0.024604310103079844
+yellow   square    0.0437917292729612  -0.044621972016306265
+purple   circle   0.03587354936895115    0.13411339541407613
+  blue   square   0.03241153095761152   -0.05350764811965621
+  blue triangle  0.015356427073158612 -0.0006089997461408209
+orange   circle  0.010518953877704181    -0.1627939732927932
+   red triangle   0.00809782571528054    0.01248662135795501
+purple triangle  0.005155190909099739   -0.04505790925621933
+purple   square  -0.02568027696337717   0.057694296479293694
+ green   square -0.025776073450284875 -0.0032651732520739014
+orange triangle -0.030456661186085584   -0.13186999819263814
+yellow   circle  -0.06477331572781515     0.0736944981970553
+  blue   circle   -0.1023476190192966  -0.030528539069839333
+ green triangle  -0.10901825107358747   -0.04848782060162855
 
diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 3321f1798..6f4ab1bbd 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -19,9 +19,7 @@ Quick links: This is simply a copy of what you should see on running `man mlr` at a command prompt, once Miller is installed on your system.
-MILLER(1)                                                            MILLER(1)
-
-
+4mMILLER24m(1)                                                            4mMILLER24m(1)
 
 1mNAME0m
        Miller -- like awk, sed, cut, join, and sort for name-indexed data such
@@ -815,7 +813,7 @@ MILLER(1)                                                            MILLER(1)
                markdown " "    N/A    "\n"
                nidx     " "    N/A    "\n"
                pprint   " "    N/A    "\n"
-               tsv      "  "    N/A    "\n"
+               tsv      "     "    N/A    "\n"
                xtab     "\n"   " "    "\n\n"
 
        --fs {string}            Specify FS for input and output.
@@ -1430,6 +1428,7 @@ MILLER(1)                                                            MILLER(1)
          antimode Find least-frequently-occurring values for fields; first-found wins tie
          sum      Compute sums of specified fields
          mean     Compute averages (sample means) of specified fields
+         mad      Compute mean absolute deviation
          var      Compute sample variance of specified fields
          stddev   Compute sample standard deviation of specified fields
          meaneb   Estimate error bars for averages (assuming no sample autocorrelation)
@@ -1928,6 +1927,7 @@ MILLER(1)                                                            MILLER(1)
          antimode Find least-frequently-occurring values for fields; first-found wins tie
          sum      Compute sums of specified fields
          mean     Compute averages (sample means) of specified fields
+         mad      Compute mean absolute deviation
          var      Compute sample variance of specified fields
          stddev   Compute sample standard deviation of specified fields
          meaneb   Estimate error bars for averages (assuming no sample autocorrelation)
@@ -3730,7 +3730,5 @@ MILLER(1)                                                            MILLER(1)
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-
-
-                                  2024-05-09                         MILLER(1)
+                                  2024-05-11                         4mMILLER24m(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 5ce10b960..199946d45 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1,6 +1,4 @@ -MILLER(1) MILLER(1) - - +4mMILLER24m(1) 4mMILLER24m(1) 1mNAME0m Miller -- like awk, sed, cut, join, and sort for name-indexed data such @@ -794,7 +792,7 @@ MILLER(1) MILLER(1) markdown " " N/A "\n" nidx " " N/A "\n" pprint " " N/A "\n" - tsv " " N/A "\n" + tsv " " N/A "\n" xtab "\n" " " "\n\n" --fs {string} Specify FS for input and output. @@ -1409,6 +1407,7 @@ MILLER(1) MILLER(1) antimode Find least-frequently-occurring values for fields; first-found wins tie sum Compute sums of specified fields mean Compute averages (sample means) of specified fields + mad Compute mean absolute deviation var Compute sample variance of specified fields stddev Compute sample standard deviation of specified fields meaneb Estimate error bars for averages (assuming no sample autocorrelation) @@ -1907,6 +1906,7 @@ MILLER(1) MILLER(1) antimode Find least-frequently-occurring values for fields; first-found wins tie sum Compute sums of specified fields mean Compute averages (sample means) of specified fields + mad Compute mean absolute deviation var Compute sample variance of specified fields stddev Compute sample standard deviation of specified fields meaneb Estimate error bars for averages (assuming no sample autocorrelation) @@ -3709,6 +3709,4 @@ MILLER(1) MILLER(1) MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - - - 2024-05-09 MILLER(1) + 2024-05-11 4mMILLER24m(1) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index d6589c459..a90f0890f 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -2093,6 +2093,7 @@ Options: antimode Find least-frequently-occurring values for fields; first-found wins tie sum Compute sums of specified fields mean Compute averages (sample means) of specified fields + mad Compute mean absolute deviation var Compute sample variance of specified fields stddev Compute sample standard deviation of specified fields meaneb Estimate error bars for averages (assuming no sample autocorrelation) @@ -3266,6 +3267,7 @@ Options: antimode Find least-frequently-occurring values for fields; first-found wins tie sum Compute sums of specified fields mean Compute averages (sample means) of specified fields + mad Compute mean absolute deviation var Compute sample variance of specified fields stddev Compute sample standard deviation of specified fields meaneb Estimate error bars for averages (assuming no sample autocorrelation) @@ -3433,14 +3435,14 @@ fields, optionally categorized by one or more fields. data/medium
-x_y_cov    0.000042574820827444476
-x_y_corr   0.0005042001844467462
-y_y_cov    0.08461122467974003
+x_y_cov    0.00004257482082749404
+x_y_corr   0.0005042001844473328
+y_y_cov    0.08461122467974005
 y_y_corr   1
-x2_xy_cov  0.04188382281779374
-x2_xy_corr 0.630174342037994
-x2_y2_cov  -0.00030953725962542085
-x2_y2_corr -0.0034249088761121966
+x2_xy_cov  0.041883822817793716
+x2_xy_corr 0.6301743420379936
+x2_y2_cov  -0.0003095372596253918
+x2_y2_corr -0.003424908876111875
 
@@ -3449,12 +3451,12 @@ x2_y2_corr -0.0034249088761121966
   data/medium
 
-a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
-eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
-wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
-zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
-hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
+a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
+eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
+wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
+zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
+hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
 
Here's an example simple line-fit. The `x` and `y` @@ -3540,11 +3542,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.03009744795354 -upsec_count_pca_b 979.9883413064914 +upsec_count_pca_m -39.030097447953594 +upsec_count_pca_b 979.9883413064917 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.10852919630297 +donesec 25.108529196302943 ## step @@ -3821,9 +3823,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 -var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 -skewness - - 0 -0.0006899591185521965 -0.017849760120133784 +stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 +var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 +skewness - - 0 -0.0006899591185517494 -0.01784976012013298 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index 146f3a81e..e475aebf3 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144456 -x_stddev 0.2902925151144007 +x_var 0.08426974433144457 +x_stddev 0.29029251511440074
diff --git a/man/manpage.txt b/man/manpage.txt
index 5ce10b960..199946d45 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -1,6 +1,4 @@
-MILLER(1)                                                            MILLER(1)
-
-
+4mMILLER24m(1)                                                            4mMILLER24m(1)
 
 1mNAME0m
        Miller -- like awk, sed, cut, join, and sort for name-indexed data such
@@ -794,7 +792,7 @@ MILLER(1)                                                            MILLER(1)
                markdown " "    N/A    "\n"
                nidx     " "    N/A    "\n"
                pprint   " "    N/A    "\n"
-               tsv      "  "    N/A    "\n"
+               tsv      "     "    N/A    "\n"
                xtab     "\n"   " "    "\n\n"
 
        --fs {string}            Specify FS for input and output.
@@ -1409,6 +1407,7 @@ MILLER(1)                                                            MILLER(1)
          antimode Find least-frequently-occurring values for fields; first-found wins tie
          sum      Compute sums of specified fields
          mean     Compute averages (sample means) of specified fields
+         mad      Compute mean absolute deviation
          var      Compute sample variance of specified fields
          stddev   Compute sample standard deviation of specified fields
          meaneb   Estimate error bars for averages (assuming no sample autocorrelation)
@@ -1907,6 +1906,7 @@ MILLER(1)                                                            MILLER(1)
          antimode Find least-frequently-occurring values for fields; first-found wins tie
          sum      Compute sums of specified fields
          mean     Compute averages (sample means) of specified fields
+         mad      Compute mean absolute deviation
          var      Compute sample variance of specified fields
          stddev   Compute sample standard deviation of specified fields
          meaneb   Estimate error bars for averages (assuming no sample autocorrelation)
@@ -3709,6 +3709,4 @@ MILLER(1)                                                            MILLER(1)
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-
-
-                                  2024-05-09                         MILLER(1)
+                                  2024-05-11                         4mMILLER24m(1)
diff --git a/man/mlr.1 b/man/mlr.1
index d660b99a8..f5b66e015 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2024-05-09
+.\"      Date: 2024-05-11
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2024-05-09" "\ \&" "\ \&"
+.TH "MILLER" "1" "2024-05-11" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1778,6 +1778,7 @@ Options:
   antimode Find least-frequently-occurring values for fields; first-found wins tie
   sum      Compute sums of specified fields
   mean     Compute averages (sample means) of specified fields
+  mad      Compute mean absolute deviation
   var      Compute sample variance of specified fields
   stddev   Compute sample standard deviation of specified fields
   meaneb   Estimate error bars for averages (assuming no sample autocorrelation)
@@ -2408,6 +2409,7 @@ Options:
   antimode Find least-frequently-occurring values for fields; first-found wins tie
   sum      Compute sums of specified fields
   mean     Compute averages (sample means) of specified fields
+  mad      Compute mean absolute deviation
   var      Compute sample variance of specified fields
   stddev   Compute sample standard deviation of specified fields
   meaneb   Estimate error bars for averages (assuming no sample autocorrelation)
diff --git a/pkg/transformers/utils/stats1_accumulators.go b/pkg/transformers/utils/stats1_accumulators.go
index 02756a9a3..3e8aadd94 100644
--- a/pkg/transformers/utils/stats1_accumulators.go
+++ b/pkg/transformers/utils/stats1_accumulators.go
@@ -72,6 +72,11 @@ var stats1AccumulatorInfos []stats1AccumulatorInfo = []stats1AccumulatorInfo{
 		"Compute averages (sample means) of specified fields",
 		NewStats1MeanAccumulator,
 	},
+	{
+		"mad",
+		"Compute mean absolute deviation",
+		NewStats1MeanAbsDevAccumulator,
+	},
 
 	{
 		"var",
@@ -504,6 +509,47 @@ func (acc *Stats1MeanAccumulator) Reset() {
 	acc.count = 0
 }
 
+// ----------------------------------------------------------------
+type Stats1MeanAbsDevAccumulator struct {
+	samples []*mlrval.Mlrval
+}
+
+func NewStats1MeanAbsDevAccumulator() IStats1Accumulator {
+	return &Stats1MeanAbsDevAccumulator{
+		samples: make([]*mlrval.Mlrval, 0, 1000),
+	}
+}
+func (acc *Stats1MeanAbsDevAccumulator) Ingest(value *mlrval.Mlrval) {
+	if value.IsNumeric() {
+		acc.samples = append(acc.samples, value)
+	}
+}
+func (acc *Stats1MeanAbsDevAccumulator) Emit() *mlrval.Mlrval {
+	n := len(acc.samples)
+	if n == 0 {
+		return mlrval.VOID
+	}
+	mn := mlrval.FromInt(int64(n))
+
+	mean := mlrval.FromInt(0)
+	for i := 0; i < n; i++ {
+		mean = bifs.BIF_plus_binary(mean, acc.samples[i])
+	}
+	mean = bifs.BIF_divide(mean, mn)
+
+	meanAbsDev := mlrval.FromInt(0)
+	for i := 0; i < n; i++ {
+		diff := bifs.BIF_minus_binary(mean, acc.samples[i])
+		meanAbsDev = bifs.BIF_plus_binary(meanAbsDev, bifs.BIF_abs(diff))
+	}
+	meanAbsDev = bifs.BIF_divide(meanAbsDev, mn)
+
+	return meanAbsDev
+}
+func (acc *Stats1MeanAbsDevAccumulator) Reset() {
+	acc.samples = make([]*mlrval.Mlrval, 0, 1000)
+}
+
 // ----------------------------------------------------------------
 type Stats1MinAccumulator struct {
 	min *mlrval.Mlrval
diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout
index 33eed96d5..6d6cdea85 100644
--- a/test/cases/cli-help/0001/expout
+++ b/test/cases/cli-help/0001/expout
@@ -555,6 +555,7 @@ Options:
   antimode Find least-frequently-occurring values for fields; first-found wins tie
   sum      Compute sums of specified fields
   mean     Compute averages (sample means) of specified fields
+  mad      Compute mean absolute deviation
   var      Compute sample variance of specified fields
   stddev   Compute sample standard deviation of specified fields
   meaneb   Estimate error bars for averages (assuming no sample autocorrelation)
@@ -1075,6 +1076,7 @@ Options:
   antimode Find least-frequently-occurring values for fields; first-found wins tie
   sum      Compute sums of specified fields
   mean     Compute averages (sample means) of specified fields
+  mad      Compute mean absolute deviation
   var      Compute sample variance of specified fields
   stddev   Compute sample standard deviation of specified fields
   meaneb   Estimate error bars for averages (assuming no sample autocorrelation)
diff --git a/test/cases/verb-stats1/0001/cmd b/test/cases/verb-stats1/0001/cmd
index 1e5931d0f..93753529c 100644
--- a/test/cases/verb-stats1/0001/cmd
+++ b/test/cases/verb-stats1/0001/cmd
@@ -1 +1 @@
-mlr --oxtab stats1 -a mean,sum,count,min,max,antimode,mode -f i,x,y test/input/abixy
+mlr --oxtab stats1 -a mean,sum,count,min,max,antimode,mode,mad -f i,x,y test/input/abixy
diff --git a/test/cases/verb-stats1/0001/expout b/test/cases/verb-stats1/0001/expout
index e99cdf2b0..8c52b5166 100644
--- a/test/cases/verb-stats1/0001/expout
+++ b/test/cases/verb-stats1/0001/expout
@@ -5,6 +5,7 @@ i_min      1
 i_max      10
 i_antimode 1
 i_mode     1
+i_mad      2.50000000
 x_mean     0.45362938
 x_sum      4.53629384
 x_count    10
@@ -12,6 +13,7 @@ x_min      0.03144188
 x_max      0.75867996
 x_antimode 0.34679014
 x_mode     0.34679014
+x_mad      0.17005656
 y_mean     0.59445424
 y_sum      5.94454242
 y_count    10
@@ -19,3 +21,4 @@ y_min      0.13418874
 y_max      0.97618139
 y_antimode 0.72680286
 y_mode     0.72680286
+y_mad      0.25930133

From cf6a80af4dedc97353e50b495d7fceeb8faab6c9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 13 May 2024 08:46:57 -0400
Subject: [PATCH 167/456] Bump goreleaser/goreleaser-action from 5.0.0 to 5.1.0
 (#1563)

Bumps [goreleaser/goreleaser-action](https://github.com/goreleaser/goreleaser-action) from 5.0.0 to 5.1.0.
- [Release notes](https://github.com/goreleaser/goreleaser-action/releases)
- [Commits](https://github.com/goreleaser/goreleaser-action/compare/7ec5c2b0c6cdda6e8bbb49444bc797dd33d74dd8...5742e2a039330cbb23ebf35f046f814d4c6ff811)

---
updated-dependencies:
- dependency-name: goreleaser/goreleaser-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index ba435d577..d5128de26 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -40,7 +40,7 @@ jobs:
 
       # https://goreleaser.com/ci/actions/
       - name: Run GoReleaser
-        uses: goreleaser/goreleaser-action@7ec5c2b0c6cdda6e8bbb49444bc797dd33d74dd8
+        uses: goreleaser/goreleaser-action@5742e2a039330cbb23ebf35f046f814d4c6ff811
         #if: startsWith(github.ref, 'refs/tags/v')
         with:
           version: latest

From 8ca13caa146d8d5905e10028b1673d55e56c9514 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 17 May 2024 10:48:04 -0400
Subject: [PATCH 168/456] Bump actions/checkout from 4.1.5 to 4.1.6 (#1566)

Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.5 to 4.1.6.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/44c2b7a8a4ea60a981eaca3cf939b5f4305c123b...a5ac7e51b41094c92402da3b24376905380afc29)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 2 +-
 .github/workflows/codespell.yml       | 2 +-
 .github/workflows/go.yml              | 2 +-
 .github/workflows/release.yml         | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 97d314c49..eb5f454d9 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -36,7 +36,7 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b
+      uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
index 619ed21bf..b0b48afba 100644
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -21,7 +21,7 @@ jobs:
     steps:
       # Check out the code base
       - name: Check out code
-        uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b
+        uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
         with:
           # Full git history is needed to get a proper list of changed files within `super-linter`
           fetch-depth: 0
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 1e3bc3093..9fcf2a384 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -15,7 +15,7 @@ jobs:
         os: [ubuntu-latest, macos-latest, windows-latest]
 
     steps:
-    - uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b
+    - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
 
     - name: Set up Go
       uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index d5128de26..e0aaa22fb 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -23,7 +23,7 @@ jobs:
         id: go
 
       - name: Check out code into the Go module directory
-        uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b
+        uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
         with:
           fetch-depth: 0
 

From 8c9d82f1f226709fe705e072961220b69769da67 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 20 May 2024 09:43:26 -0400
Subject: [PATCH 169/456] Bump github/codeql-action from 2.13.4 to 3.25.5
 (#1567)

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.13.4 to 3.25.5.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/cdcdbb579706841c47f7063dda365e292e5cad7a...b7cec7526559c32f1616476ff32d17ba4c59b2d6)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index eb5f454d9..0cd589efd 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -40,7 +40,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@cdcdbb579706841c47f7063dda365e292e5cad7a
+      uses: github/codeql-action/init@b7cec7526559c32f1616476ff32d17ba4c59b2d6
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -51,7 +51,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@cdcdbb579706841c47f7063dda365e292e5cad7a
+      uses: github/codeql-action/autobuild@b7cec7526559c32f1616476ff32d17ba4c59b2d6
 
     # โ„น๏ธ Command-line programs to run using the OS shell.
     # ๐Ÿ“š https://git.io/JvXDl
@@ -65,4 +65,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@cdcdbb579706841c47f7063dda365e292e5cad7a
+      uses: github/codeql-action/analyze@b7cec7526559c32f1616476ff32d17ba4c59b2d6

From 589c5563c4fed04c6b0458b1cf6028c2869a6fc4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 21 May 2024 08:51:02 -0400
Subject: [PATCH 170/456] --- (#1568)

updated-dependencies:
- dependency-name: github/codeql-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 0cd589efd..4e9cfd898 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -40,7 +40,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@b7cec7526559c32f1616476ff32d17ba4c59b2d6
+      uses: github/codeql-action/init@9fdb3e49720b44c48891d036bb502feb25684276
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -51,7 +51,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@b7cec7526559c32f1616476ff32d17ba4c59b2d6
+      uses: github/codeql-action/autobuild@9fdb3e49720b44c48891d036bb502feb25684276
 
     # โ„น๏ธ Command-line programs to run using the OS shell.
     # ๐Ÿ“š https://git.io/JvXDl
@@ -65,4 +65,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@b7cec7526559c32f1616476ff32d17ba4c59b2d6
+      uses: github/codeql-action/analyze@9fdb3e49720b44c48891d036bb502feb25684276

From 571801b05ce9f00200af1dfb8fcc53db8689c095 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 31 May 2024 09:28:19 -0400
Subject: [PATCH 171/456] Bump github/codeql-action from 3.25.6 to 3.25.7
 (#1570)

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.25.6 to 3.25.7.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/9fdb3e49720b44c48891d036bb502feb25684276...f079b8493333aace61c81488f8bd40919487bd9f)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 4e9cfd898..ae52f608b 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -40,7 +40,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@9fdb3e49720b44c48891d036bb502feb25684276
+      uses: github/codeql-action/init@f079b8493333aace61c81488f8bd40919487bd9f
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -51,7 +51,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@9fdb3e49720b44c48891d036bb502feb25684276
+      uses: github/codeql-action/autobuild@f079b8493333aace61c81488f8bd40919487bd9f
 
     # โ„น๏ธ Command-line programs to run using the OS shell.
     # ๐Ÿ“š https://git.io/JvXDl
@@ -65,4 +65,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@9fdb3e49720b44c48891d036bb502feb25684276
+      uses: github/codeql-action/analyze@f079b8493333aace61c81488f8bd40919487bd9f

From 45ea27bce2a719de6563d2a23433f90d71d20475 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 5 Jun 2024 07:51:33 -0400
Subject: [PATCH 172/456] Bump goreleaser/goreleaser-action from 5.1.0 to 6.0.0
 (#1574)

Bumps [goreleaser/goreleaser-action](https://github.com/goreleaser/goreleaser-action) from 5.1.0 to 6.0.0.
- [Release notes](https://github.com/goreleaser/goreleaser-action/releases)
- [Commits](https://github.com/goreleaser/goreleaser-action/compare/5742e2a039330cbb23ebf35f046f814d4c6ff811...286f3b13b1b49da4ac219696163fb8c1c93e1200)

---
updated-dependencies:
- dependency-name: goreleaser/goreleaser-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index e0aaa22fb..04ff18aa3 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -40,7 +40,7 @@ jobs:
 
       # https://goreleaser.com/ci/actions/
       - name: Run GoReleaser
-        uses: goreleaser/goreleaser-action@5742e2a039330cbb23ebf35f046f814d4c6ff811
+        uses: goreleaser/goreleaser-action@286f3b13b1b49da4ac219696163fb8c1c93e1200
         #if: startsWith(github.ref, 'refs/tags/v')
         with:
           version: latest

From 11d8a2647b86d9df04d3ef98f373f17615706536 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 5 Jun 2024 07:51:42 -0400
Subject: [PATCH 173/456] Bump github/codeql-action from 3.25.7 to 3.25.8
 (#1575)

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.25.7 to 3.25.8.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/f079b8493333aace61c81488f8bd40919487bd9f...2e230e8fe0ad3a14a340ad0815ddb96d599d2aff)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index ae52f608b..d451747b2 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -40,7 +40,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@f079b8493333aace61c81488f8bd40919487bd9f
+      uses: github/codeql-action/init@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -51,7 +51,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@f079b8493333aace61c81488f8bd40919487bd9f
+      uses: github/codeql-action/autobuild@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff
 
     # โ„น๏ธ Command-line programs to run using the OS shell.
     # ๐Ÿ“š https://git.io/JvXDl
@@ -65,4 +65,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@f079b8493333aace61c81488f8bd40919487bd9f
+      uses: github/codeql-action/analyze@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff

From e61247b02cc645d6d869baae3282575feb062f53 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 5 Jun 2024 08:13:51 -0400
Subject: [PATCH 174/456] Bump golang.org/x/text from 0.15.0 to 0.16.0 (#1576)

Bumps [golang.org/x/text](https://github.com/golang/text) from 0.15.0 to 0.16.0.
- [Release notes](https://github.com/golang/text/releases)
- [Commits](https://github.com/golang/text/compare/v0.15.0...v0.16.0)

---
updated-dependencies:
- dependency-name: golang.org/x/text
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index bf4fbf827..c69c9f1aa 100644
--- a/go.mod
+++ b/go.mod
@@ -28,7 +28,7 @@ require (
 	github.com/stretchr/testify v1.9.0
 	golang.org/x/sys v0.20.0
 	golang.org/x/term v0.20.0
-	golang.org/x/text v0.15.0
+	golang.org/x/text v0.16.0
 )
 
 require (
diff --git a/go.sum b/go.sum
index 1ae245763..9c2fdad28 100644
--- a/go.sum
+++ b/go.sum
@@ -44,8 +44,8 @@ golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
 golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw=
 golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
-golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
-golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
+golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

From d7df999d9b0c462ae46651c646c3d2179501b36e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 5 Jun 2024 08:14:00 -0400
Subject: [PATCH 175/456] Bump golang.org/x/sys from 0.20.0 to 0.21.0 (#1578)

Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.20.0 to 0.21.0.
- [Commits](https://github.com/golang/sys/compare/v0.20.0...v0.21.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index c69c9f1aa..e1e63eb11 100644
--- a/go.mod
+++ b/go.mod
@@ -26,7 +26,7 @@ require (
 	github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4
 	github.com/pkg/profile v1.7.0
 	github.com/stretchr/testify v1.9.0
-	golang.org/x/sys v0.20.0
+	golang.org/x/sys v0.21.0
 	golang.org/x/term v0.20.0
 	golang.org/x/text v0.16.0
 )
diff --git a/go.sum b/go.sum
index 9c2fdad28..5d2d16da6 100644
--- a/go.sum
+++ b/go.sum
@@ -40,8 +40,8 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT
 github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
-golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
+golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw=
 golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
 golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=

From 51ed2cfa1dfddf3fb0d81852ba3cacf3785ba231 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 5 Jun 2024 08:14:41 -0400
Subject: [PATCH 176/456] Bump golang.org/x/term from 0.20.0 to 0.21.0 (#1577)

Bumps [golang.org/x/term](https://github.com/golang/term) from 0.20.0 to 0.21.0.
- [Commits](https://github.com/golang/term/compare/v0.20.0...v0.21.0)

---
updated-dependencies:
- dependency-name: golang.org/x/term
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index e1e63eb11..6b301adac 100644
--- a/go.mod
+++ b/go.mod
@@ -27,7 +27,7 @@ require (
 	github.com/pkg/profile v1.7.0
 	github.com/stretchr/testify v1.9.0
 	golang.org/x/sys v0.21.0
-	golang.org/x/term v0.20.0
+	golang.org/x/term v0.21.0
 	golang.org/x/text v0.16.0
 )
 
diff --git a/go.sum b/go.sum
index 5d2d16da6..2b268a60e 100644
--- a/go.sum
+++ b/go.sum
@@ -42,8 +42,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
 golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw=
-golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
+golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA=
+golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0=
 golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
 golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=

From 66abef670463367f429a7261327fc3948787a97d Mon Sep 17 00:00:00 2001
From: Andrew Onyshchuk 
Date: Thu, 6 Jun 2024 06:04:25 -0700
Subject: [PATCH 177/456] fraction bugfix (#1579)

---
 pkg/transformers/fraction.go | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pkg/transformers/fraction.go b/pkg/transformers/fraction.go
index fb90ab2bf..2216342fb 100644
--- a/pkg/transformers/fraction.go
+++ b/pkg/transformers/fraction.go
@@ -264,9 +264,12 @@ func (tr *TransformerFraction) Transform(
 						} else {
 							numerator = value
 						}
-
 						denominator := sumsForGroup[fractionFieldName]
-						if !mlrval.Equals(value, tr.zero) {
+
+						// Return 0 for 0/n
+						if mlrval.Equals(numerator, tr.zero) {
+							outputValue = tr.zero
+						} else if !mlrval.Equals(denominator, tr.zero) {
 							outputValue = bifs.BIF_divide(numerator, denominator)
 							outputValue = bifs.BIF_times(outputValue, tr.multiplier)
 						} else {

From 8223903621e166048dbdcd75b13a110e77df3032 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 8 Jun 2024 13:08:15 -0400
Subject: [PATCH 178/456] Support `$NO_COLOR` (#1580)

* Support `$NO_COLOR`

* run `make dev`
---
 docs/src/manpage.md                    | 6 +++---
 docs/src/manpage.txt                   | 6 +++---
 docs/src/output-colorization.md        | 2 +-
 docs/src/output-colorization.md.in     | 2 +-
 docs/src/reference-main-env-vars.md    | 2 +-
 docs/src/reference-main-env-vars.md.in | 2 +-
 docs/src/reference-main-flag-list.md   | 4 ++--
 man/manpage.txt                        | 6 +++---
 man/mlr.1                              | 8 ++++----
 pkg/cli/option_parse.go                | 4 ++--
 pkg/colorizer/colorizer.go             | 3 ++-
 11 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 6f4ab1bbd..db104861d 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -648,8 +648,8 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        How you can control colorization:
 
        * Suppression/unsuppression:
-           * Environment variable `export MLR_NO_COLOR=true` means don't color
-             even if stdout+TTY.
+           * Environment variable `export MLR_NO_COLOR=true` or `export NO_COLOR=true`
+              means don't color even if stdout+TTY.
            * Environment variable `export MLR_ALWAYS_COLOR=true` means do color
              even if not stdout+TTY.
              For example, you might want to use this when piping mlr output to `less -r`.
@@ -3730,5 +3730,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-                                  2024-05-11                         4mMILLER24m(1)
+                                  2024-06-08                         4mMILLER24m(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 199946d45..4ff577844 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -627,8 +627,8 @@ How you can control colorization: * Suppression/unsuppression: - * Environment variable `export MLR_NO_COLOR=true` means don't color - even if stdout+TTY. + * Environment variable `export MLR_NO_COLOR=true` or `export NO_COLOR=true` + means don't color even if stdout+TTY. * Environment variable `export MLR_ALWAYS_COLOR=true` means do color even if not stdout+TTY. For example, you might want to use this when piping mlr output to `less -r`. @@ -3709,4 +3709,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-05-11 4mMILLER24m(1) + 2024-06-08 4mMILLER24m(1) diff --git a/docs/src/output-colorization.md b/docs/src/output-colorization.md index 6282d2172..e94cfe91a 100644 --- a/docs/src/output-colorization.md +++ b/docs/src/output-colorization.md @@ -50,7 +50,7 @@ described below: * Suppression/unsuppression: - * `export MLR_NO_COLOR=true` means Miller won't color even when it normally would. + * `export MLR_NO_COLOR=true` or `export NO_COLOR=true` means Miller won't color even when it normally would. * `export MLR_ALWAYS_COLOR=true` means Miller will color even when it normally would not. For example, you might want to use this when piping `mlr` output to `less -r`. * Command-line flags `--no-color` or `-M`, `--always-color` or `-C`. * On Windows, replace `export` with `set` diff --git a/docs/src/output-colorization.md.in b/docs/src/output-colorization.md.in index e60a53887..8c32c3f9b 100644 --- a/docs/src/output-colorization.md.in +++ b/docs/src/output-colorization.md.in @@ -34,7 +34,7 @@ described below: * Suppression/unsuppression: - * `export MLR_NO_COLOR=true` means Miller won't color even when it normally would. + * `export MLR_NO_COLOR=true` or `export NO_COLOR=true` means Miller won't color even when it normally would. * `export MLR_ALWAYS_COLOR=true` means Miller will color even when it normally would not. For example, you might want to use this when piping `mlr` output to `less -r`. * Command-line flags `--no-color` or `-M`, `--always-color` or `-C`. * On Windows, replace `export` with `set` diff --git a/docs/src/reference-main-env-vars.md b/docs/src/reference-main-env-vars.md index 3b3302b8c..295973d58 100644 --- a/docs/src/reference-main-env-vars.md +++ b/docs/src/reference-main-env-vars.md @@ -19,6 +19,6 @@ Quick links: The following environment variables affect how Miller works: * `MLRRC`: see [Customization](customization.md). -* `MLR_NO_COLOR`, `MLR_ALWAYS_COLOR`, `MLR_KEY_COLOR`, `MLR_VALUE_COLOR`, `MLR_PASS_COLOR`, `MLR_FAIL_COLOR`, `MLR_REPL_PS1_COLOR`, `MLR_REPL_PS2_COLOR`, `MLR_HELP_COLOR`: see [Output Colorization](output-colorization.md). +* `MLR_NO_COLOR`, `NO_COLOR`, `MLR_ALWAYS_COLOR`, `MLR_KEY_COLOR`, `MLR_VALUE_COLOR`, `MLR_PASS_COLOR`, `MLR_FAIL_COLOR`, `MLR_REPL_PS1_COLOR`, `MLR_REPL_PS2_COLOR`, `MLR_HELP_COLOR`: see [Output Colorization](output-colorization.md). * `MLR_REPL_PS1`, `MLR_REPL_PS2`: see [REPL](repl.md). diff --git a/docs/src/reference-main-env-vars.md.in b/docs/src/reference-main-env-vars.md.in index 5c74638a1..869c73f3f 100644 --- a/docs/src/reference-main-env-vars.md.in +++ b/docs/src/reference-main-env-vars.md.in @@ -3,6 +3,6 @@ The following environment variables affect how Miller works: * `MLRRC`: see [Customization](customization.md). -* `MLR_NO_COLOR`, `MLR_ALWAYS_COLOR`, `MLR_KEY_COLOR`, `MLR_VALUE_COLOR`, `MLR_PASS_COLOR`, `MLR_FAIL_COLOR`, `MLR_REPL_PS1_COLOR`, `MLR_REPL_PS2_COLOR`, `MLR_HELP_COLOR`: see [Output Colorization](output-colorization.md). +* `MLR_NO_COLOR`, `NO_COLOR`, `MLR_ALWAYS_COLOR`, `MLR_KEY_COLOR`, `MLR_VALUE_COLOR`, `MLR_PASS_COLOR`, `MLR_FAIL_COLOR`, `MLR_REPL_PS1_COLOR`, `MLR_REPL_PS2_COLOR`, `MLR_HELP_COLOR`: see [Output Colorization](output-colorization.md). * `MLR_REPL_PS1`, `MLR_REPL_PS2`: see [REPL](repl.md). diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index e684ef1f6..e9a47af63 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -328,8 +328,8 @@ Mechanisms for coloring: How you can control colorization: * Suppression/unsuppression: - * Environment variable `export MLR_NO_COLOR=true` means don't color - even if stdout+TTY. + * Environment variable `export MLR_NO_COLOR=true` or `export NO_COLOR=true` + means don't color even if stdout+TTY. * Environment variable `export MLR_ALWAYS_COLOR=true` means do color even if not stdout+TTY. For example, you might want to use this when piping mlr output to `less -r`. diff --git a/man/manpage.txt b/man/manpage.txt index 199946d45..4ff577844 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -627,8 +627,8 @@ How you can control colorization: * Suppression/unsuppression: - * Environment variable `export MLR_NO_COLOR=true` means don't color - even if stdout+TTY. + * Environment variable `export MLR_NO_COLOR=true` or `export NO_COLOR=true` + means don't color even if stdout+TTY. * Environment variable `export MLR_ALWAYS_COLOR=true` means do color even if not stdout+TTY. For example, you might want to use this when piping mlr output to `less -r`. @@ -3709,4 +3709,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-05-11 4mMILLER24m(1) + 2024-06-08 4mMILLER24m(1) diff --git a/man/mlr.1 b/man/mlr.1 index f5b66e015..c39347089 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2024-05-11 +.\" Date: 2024-06-08 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2024-05-11" "\ \&" "\ \&" +.TH "MILLER" "1" "2024-06-08" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -756,8 +756,8 @@ Mechanisms for coloring: How you can control colorization: * Suppression/unsuppression: - * Environment variable `export MLR_NO_COLOR=true` means don't color - even if stdout+TTY. + * Environment variable `export MLR_NO_COLOR=true` or `export NO_COLOR=true` + means don't color even if stdout+TTY. * Environment variable `export MLR_ALWAYS_COLOR=true` means do color even if not stdout+TTY. For example, you might want to use this when piping mlr output to `less -r`. diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go index 24f525fcc..a17d4658f 100644 --- a/pkg/cli/option_parse.go +++ b/pkg/cli/option_parse.go @@ -2715,8 +2715,8 @@ Mechanisms for coloring: How you can control colorization: * Suppression/unsuppression: - * Environment variable ` + "`export MLR_NO_COLOR=true`" + ` means don't color - even if stdout+TTY. + * Environment variable ` + "`export MLR_NO_COLOR=true` or `export NO_COLOR=true`" + ` + means don't color even if stdout+TTY. * Environment variable ` + "`export MLR_ALWAYS_COLOR=true`" + ` means do color even if not stdout+TTY. For example, you might want to use this when piping mlr output to ` + "`less -r`" + `. diff --git a/pkg/colorizer/colorizer.go b/pkg/colorizer/colorizer.go index 3bc4525ad..cd012cffa 100644 --- a/pkg/colorizer/colorizer.go +++ b/pkg/colorizer/colorizer.go @@ -198,7 +198,8 @@ var stdoutIsATTY = getStdoutIsATTY() // Read environment variables at startup time. These can be overridden // afterward using command-line flags. func init() { - if os.Getenv("MLR_NO_COLOR") != "" { + if os.Getenv("MLR_NO_COLOR") != "" || os.Getenv("NO_COLOR") != "" { + colorization = ColorizeOutputNever colorization = ColorizeOutputNever } else if os.Getenv("MLR_ALWAYS_COLOR") != "" { colorization = ColorizeOutputAlways From 202a79d0e24e7364e092a1cb293d03ef4633c9e1 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 8 Jun 2024 13:37:07 -0400 Subject: [PATCH 179/456] On-line help for `mlr summary --transpose` (#1581) * On-line help for `mlr summary --transpose` * run `make dev` --- docs/src/manpage.md | 1 + docs/src/manpage.txt | 1 + docs/src/reference-verbs.md | 1 + man/manpage.txt | 1 + man/mlr.1 | 1 + pkg/transformers/summary.go | 1 + test/cases/cli-help/0001/expout | 1 + 7 files changed, 7 insertions(+) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index db104861d..a7929a937 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -2099,6 +2099,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p -a {mean,sum,etc.} Use only the specified summarizers. -x {mean,sum,etc.} Use all summarizers, except the specified ones. --all Use all available summarizers. + --transpose Show output with field names as column names.. -h|--help Show this message. 1mtac0m diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 4ff577844..4975b7646 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -2078,6 +2078,7 @@ -a {mean,sum,etc.} Use only the specified summarizers. -x {mean,sum,etc.} Use all summarizers, except the specified ones. --all Use all available summarizers. + --transpose Show output with field names as column names.. -h|--help Show this message. 1mtac0m diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index a90f0890f..8105820c5 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -3796,6 +3796,7 @@ Options: -a {mean,sum,etc.} Use only the specified summarizers. -x {mean,sum,etc.} Use all summarizers, except the specified ones. --all Use all available summarizers. +--transpose Show output with field names as column names.. -h|--help Show this message. diff --git a/man/manpage.txt b/man/manpage.txt index 4ff577844..4975b7646 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -2078,6 +2078,7 @@ -a {mean,sum,etc.} Use only the specified summarizers. -x {mean,sum,etc.} Use all summarizers, except the specified ones. --all Use all available summarizers. + --transpose Show output with field names as column names.. -h|--help Show this message. 1mtac0m diff --git a/man/mlr.1 b/man/mlr.1 index c39347089..a123eaf20 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2605,6 +2605,7 @@ Options: -a {mean,sum,etc.} Use only the specified summarizers. -x {mean,sum,etc.} Use all summarizers, except the specified ones. --all Use all available summarizers. +--transpose Show output with field names as column names.. -h|--help Show this message. .fi .if n \{\ diff --git a/pkg/transformers/summary.go b/pkg/transformers/summary.go index 56d95b2d4..ea5f123bc 100644 --- a/pkg/transformers/summary.go +++ b/pkg/transformers/summary.go @@ -106,6 +106,7 @@ func transformerSummaryUsage( fmt.Fprintf(o, "-a {mean,sum,etc.} Use only the specified summarizers.\n") fmt.Fprintf(o, "-x {mean,sum,etc.} Use all summarizers, except the specified ones.\n") fmt.Fprintf(o, "--all Use all available summarizers.\n") + fmt.Fprintf(o, "--transpose Show output with field names as column names..\n") fmt.Fprintf(o, "-h|--help Show this message.\n") } diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index 6d6cdea85..e6ed5ea33 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -1252,6 +1252,7 @@ Options: -a {mean,sum,etc.} Use only the specified summarizers. -x {mean,sum,etc.} Use all summarizers, except the specified ones. --all Use all available summarizers. +--transpose Show output with field names as column names.. -h|--help Show this message. ================================================================ From dc21fa3cd5c3c5bb1cdfb9cf0cba026de0b3bfb8 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 8 Jun 2024 20:16:56 -0400 Subject: [PATCH 180/456] Note IANA TSV support (#1582) * Note IANA TSV support * run `make docs` --- docs/src/file-formats.md | 18 ++++++++++++------ docs/src/file-formats.md.in | 18 ++++++++++++------ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/docs/src/file-formats.md b/docs/src/file-formats.md index 7064b9b49..2d7a7d0d7 100644 --- a/docs/src/file-formats.md +++ b/docs/src/file-formats.md @@ -106,17 +106,23 @@ When `mlr` is invoked with the `--csv` or `--csvlite` option, key names are foun Miller has record separator `RS` and field separator `FS`, just as `awk` does. (See also the [separators page](reference-main-separators.md).) -**TSV (tab-separated values):** `FS` is tab and `RS` is newline (or carriage return + linefeed for -Windows). On input, if fields have `\r`, `\n`, `\t`, or `\\`, those are decoded as carriage return, -newline, tab, and backslash, respectively. On output, the reverse is done -- for example, if a field -has an embedded newline, that newline is replaced by `\n`. +**CSV (comma-separated values):** Miller's `--csv` flag supports [RFC-4180 CSV](https://tools.ietf.org/html/rfc4180). + +* This includes CRLF line-terminators by default, regardless of platform. +* Any cell containing a comma or a carriage return within it must be double-quoted. + +**TSV (tab-separated values):** Miller's `--tsv` supports [IANA TSV](https://www.iana.org/assignments/media-types/text/tab-separated-values). + +* `FS` is tab and `RS` is newline (or carriage return + linefeed for Windows). +* On input, if fields have `\r`, `\n`, `\t`, or `\\`, those are decoded as carriage return, newline, tab, and backslash, respectively. +* On output, the reverse is done -- for example, if a field has an embedded newline, that newline is replaced by `\n`. +* A tab within a cell must be encoded as `\t`. +* A carriage return within a cell must be encoded as `\n`. **ASV (ASCII-separated values):** the flags `--asv`, `--iasv`, `--oasv`, `--asvlite`, `--iasvlite`, and `--oasvlite` are analogous except they use ASCII FS and RS `0x1f` and `0x1e`, respectively. **USV (Unicode-separated values):** likewise, the flags `--usv`, `--iusv`, `--ousv`, `--usvlite`, `--iusvlite`, and `--ousvlite` use Unicode FS and RS `U+241F` (UTF-8 `0x0xe2909f`) and `U+241E` (UTF-8 `0xe2909e`), respectively. -Miller's `--csv` flag supports [RFC-4180 CSV](https://tools.ietf.org/html/rfc4180). This includes CRLF line-terminators by default, regardless of platform. - Here are the differences between CSV and CSV-lite: * CSV-lite naively splits lines on newline, and fields on comma -- embedded commas and newlines are not escaped in any way. diff --git a/docs/src/file-formats.md.in b/docs/src/file-formats.md.in index 36365a1fb..f72f81387 100644 --- a/docs/src/file-formats.md.in +++ b/docs/src/file-formats.md.in @@ -18,17 +18,23 @@ When `mlr` is invoked with the `--csv` or `--csvlite` option, key names are foun Miller has record separator `RS` and field separator `FS`, just as `awk` does. (See also the [separators page](reference-main-separators.md).) -**TSV (tab-separated values):** `FS` is tab and `RS` is newline (or carriage return + linefeed for -Windows). On input, if fields have `\r`, `\n`, `\t`, or `\\`, those are decoded as carriage return, -newline, tab, and backslash, respectively. On output, the reverse is done -- for example, if a field -has an embedded newline, that newline is replaced by `\n`. +**CSV (comma-separated values):** Miller's `--csv` flag supports [RFC-4180 CSV](https://tools.ietf.org/html/rfc4180). + +* This includes CRLF line-terminators by default, regardless of platform. +* Any cell containing a comma or a carriage return within it must be double-quoted. + +**TSV (tab-separated values):** Miller's `--tsv` supports [IANA TSV](https://www.iana.org/assignments/media-types/text/tab-separated-values). + +* `FS` is tab and `RS` is newline (or carriage return + linefeed for Windows). +* On input, if fields have `\r`, `\n`, `\t`, or `\\`, those are decoded as carriage return, newline, tab, and backslash, respectively. +* On output, the reverse is done -- for example, if a field has an embedded newline, that newline is replaced by `\n`. +* A tab within a cell must be encoded as `\t`. +* A carriage return within a cell must be encoded as `\n`. **ASV (ASCII-separated values):** the flags `--asv`, `--iasv`, `--oasv`, `--asvlite`, `--iasvlite`, and `--oasvlite` are analogous except they use ASCII FS and RS `0x1f` and `0x1e`, respectively. **USV (Unicode-separated values):** likewise, the flags `--usv`, `--iusv`, `--ousv`, `--usvlite`, `--iusvlite`, and `--ousvlite` use Unicode FS and RS `U+241F` (UTF-8 `0x0xe2909f`) and `U+241E` (UTF-8 `0xe2909e`), respectively. -Miller's `--csv` flag supports [RFC-4180 CSV](https://tools.ietf.org/html/rfc4180). This includes CRLF line-terminators by default, regardless of platform. - Here are the differences between CSV and CSV-lite: * CSV-lite naively splits lines on newline, and fields on comma -- embedded commas and newlines are not escaped in any way. From 6520bf47588e4b2459bb2e74f56c9bc770a8571b Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 8 Jun 2024 20:37:31 -0400 Subject: [PATCH 181/456] Bash process substitution not working with `put -f` (#1583) * Bash process substitution not working with `put -f` * run `make dev` --- docs/src/manpage.md | 2 +- docs/src/manpage.txt | 2 +- man/manpage.txt | 2 +- man/mlr.1 | 4 ++-- pkg/transformers/put_or_filter.go | 28 +++++++++++++++++++++------- 5 files changed, 26 insertions(+), 12 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index a7929a937..12661b15e 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -3731,5 +3731,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-06-08 4mMILLER24m(1) + 2024-06-09 4mMILLER24m(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 4975b7646..93be575b4 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -3710,4 +3710,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-06-08 4mMILLER24m(1) + 2024-06-09 4mMILLER24m(1) diff --git a/man/manpage.txt b/man/manpage.txt index 4975b7646..93be575b4 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -3710,4 +3710,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-06-08 4mMILLER24m(1) + 2024-06-09 4mMILLER24m(1) diff --git a/man/mlr.1 b/man/mlr.1 index a123eaf20..822d44f06 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2024-06-08 +.\" Date: 2024-06-09 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2024-06-08" "\ \&" "\ \&" +.TH "MILLER" "1" "2024-06-09" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pkg/transformers/put_or_filter.go b/pkg/transformers/put_or_filter.go index 5ba42435b..82f57e940 100644 --- a/pkg/transformers/put_or_filter.go +++ b/pkg/transformers/put_or_filter.go @@ -247,14 +247,28 @@ func transformerPutOrFilterParseCLI( } else if opt == "-f" { // Get a DSL string from the user-specified filename filename := cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) - theseDSLStrings, err := lib.LoadStringsFromFileOrDir(filename, ".mlr") - if err != nil { - fmt.Fprintf(os.Stderr, "%s %s: cannot load DSL expression from file \"%s\": ", - "mlr", verb, filename) - fmt.Println(err) - os.Exit(1) + + // Miller has a two-pass command-line parser. If the user does + // `mlr put -f foo.mlr` + // then that file can be parsed both times. But if the user does + // `mlr put -f <( echo 'some expression goes here' )` + // that will read stdin. (The filename will come in as "dev/fd/63" or what have you.) + // But this file _cannot_ be read twice. So, if doConstruct==false -- we're + // on the first pass of the command-line parser -- don't bother to parse + // the DSL-contents file. + // + // See also https://github.com/johnkerl/miller/issues/1515 + + if doConstruct { + theseDSLStrings, err := lib.LoadStringsFromFileOrDir(filename, ".mlr") + if err != nil { + fmt.Fprintf(os.Stderr, "%s %s: cannot load DSL expression from file \"%s\": ", + "mlr", verb, filename) + fmt.Println(err) + os.Exit(1) + } + dslStrings = append(dslStrings, theseDSLStrings...) } - dslStrings = append(dslStrings, theseDSLStrings...) haveDSLStringsHere = true } else if opt == "-e" { From 71d9388bff6832230196c6c30ca3e01e2dbd18ea Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 8 Jun 2024 20:58:26 -0400 Subject: [PATCH 182/456] Be smarter about auto-unflatten (#1584) --- pkg/cli/flatten_unflatten.go | 21 ++++++++++++++++++++- pkg/climain/mlrcli_parse.go | 2 +- pkg/terminals/repl/entry.go | 2 +- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/pkg/cli/flatten_unflatten.go b/pkg/cli/flatten_unflatten.go index cd92ec6f7..8744b10b8 100644 --- a/pkg/cli/flatten_unflatten.go +++ b/pkg/cli/flatten_unflatten.go @@ -52,6 +52,10 @@ package cli // * If input is non-JSON and output is JSON: // o Default is to auto-unflatten at output. // o There is a --no-auto-unflatten for those who want it. +// +// * Overrriding these: if the last verb the user has explicitly provided is +// flatten, don't undo that by putting an unflatten right after. +// // ================================================================ func DecideFinalFlatten(writerOptions *TWriterOptions) bool { @@ -64,7 +68,22 @@ func DecideFinalFlatten(writerOptions *TWriterOptions) bool { return false } -func DecideFinalUnflatten(options *TOptions) bool { +func DecideFinalUnflatten( + options *TOptions, + verbSequences [][]string, +) bool { + + numVerbs := len(verbSequences) + if numVerbs > 0 { + lastVerbSequence := verbSequences[numVerbs-1] + if len(lastVerbSequence) > 0 { + lastVerbName := lastVerbSequence[0] + if lastVerbName == "flatten" { + return false + } + } + } + ifmt := options.ReaderOptions.InputFileFormat ofmt := options.WriterOptions.OutputFileFormat diff --git a/pkg/climain/mlrcli_parse.go b/pkg/climain/mlrcli_parse.go index eeecfa6dc..52e772d4b 100644 --- a/pkg/climain/mlrcli_parse.go +++ b/pkg/climain/mlrcli_parse.go @@ -376,7 +376,7 @@ func parseCommandLinePassTwo( recordTransformers = append(recordTransformers, transformer) } - if cli.DecideFinalUnflatten(options) { + if cli.DecideFinalUnflatten(options, verbSequences) { // E.g. req.method=GET,req.path=/api/check becomes // '{"req": {"method": "GET", "path": "/api/check"}}' transformer, err := transformers.NewTransformerUnflatten(options.WriterOptions.FLATSEP, options, nil) diff --git a/pkg/terminals/repl/entry.go b/pkg/terminals/repl/entry.go index d2403ccdf..23a86941b 100644 --- a/pkg/terminals/repl/entry.go +++ b/pkg/terminals/repl/entry.go @@ -156,7 +156,7 @@ func ReplMain(args []string) int { // --auto-flatten is on by default. But if input and output formats are both JSON, // then we don't need to actually do anything. See also mlrcli_parse.go. options.WriterOptions.AutoFlatten = cli.DecideFinalFlatten(&options.WriterOptions) - options.WriterOptions.AutoUnflatten = cli.DecideFinalUnflatten(options) + options.WriterOptions.AutoUnflatten = cli.DecideFinalUnflatten(options, [][]string{}) recordOutputFileName := "(stdout)" recordOutputStream := os.Stdout From 97c299a491b57e583779fd7d44533ac297fb7fa5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 12 Jun 2024 10:10:52 -0400 Subject: [PATCH 183/456] Bump github.com/klauspost/compress from 1.17.8 to 1.17.9 (#1585) Bumps [github.com/klauspost/compress](https://github.com/klauspost/compress) from 1.17.8 to 1.17.9. - [Release notes](https://github.com/klauspost/compress/releases) - [Changelog](https://github.com/klauspost/compress/blob/master/.goreleaser.yml) - [Commits](https://github.com/klauspost/compress/compare/v1.17.8...v1.17.9) --- updated-dependencies: - dependency-name: github.com/klauspost/compress dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 6b301adac..c03e269ab 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb github.com/johnkerl/lumin v1.0.0 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 - github.com/klauspost/compress v1.17.8 + github.com/klauspost/compress v1.17.9 github.com/lestrrat-go/strftime v1.0.6 github.com/mattn/go-isatty v0.0.20 github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 diff --git a/go.sum b/go.sum index 2b268a60e..a2314b8b9 100644 --- a/go.sum +++ b/go.sum @@ -15,8 +15,8 @@ github.com/johnkerl/lumin v1.0.0 h1:CV34cHZOJ92Y02RbQ0rd4gA0C06Qck9q8blOyaPoWpU= github.com/johnkerl/lumin v1.0.0/go.mod h1:eLf5AdQOaLvzZ2zVy4REr/DSeEwG+CZreHwNLICqv9E= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= -github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= -github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is= github.com/lestrrat-go/strftime v1.0.6 h1:CFGsDEt1pOpFNU+TJB0nhz9jl+K0hZSLE205AhTIGQQ= From 2ffbedf4c9e2274509e05032d642f8d4d3b370bf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 13 Jun 2024 09:24:44 -0400 Subject: [PATCH 184/456] Bump actions/checkout from 4.1.6 to 4.1.7 (#1586) Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.6 to 4.1.7. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/a5ac7e51b41094c92402da3b24376905380afc29...692973e3d937129bcbf40652eb9f2f61becf3332) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/codespell.yml | 2 +- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index d451747b2..d651391c0 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -36,7 +36,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index b0b48afba..b10af575e 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -21,7 +21,7 @@ jobs: steps: # Check out the code base - name: Check out code - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 with: # Full git history is needed to get a proper list of changed files within `super-linter` fetch-depth: 0 diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 9fcf2a384..84f82d3d7 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -15,7 +15,7 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 - name: Set up Go uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 04ff18aa3..92afb3b12 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -23,7 +23,7 @@ jobs: id: go - name: Check out code into the Go module directory - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 with: fetch-depth: 0 From 2cd97939221483b903ee465bf6507921b0c6d23f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 13 Jun 2024 09:25:03 -0400 Subject: [PATCH 185/456] Bump github/codeql-action from 3.25.8 to 3.25.9 (#1587) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.25.8 to 3.25.9. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/2e230e8fe0ad3a14a340ad0815ddb96d599d2aff...530d4feaa9c62aaab2d250371e2061eb7a172363) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index d651391c0..960bd15da 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff + uses: github/codeql-action/init@530d4feaa9c62aaab2d250371e2061eb7a172363 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff + uses: github/codeql-action/autobuild@530d4feaa9c62aaab2d250371e2061eb7a172363 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff + uses: github/codeql-action/analyze@530d4feaa9c62aaab2d250371e2061eb7a172363 From 95ade3c56fb1cea97c98844842efc1744b281cbf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Jun 2024 11:07:16 -0400 Subject: [PATCH 186/456] Bump github/codeql-action from 3.25.9 to 3.25.10 (#1588) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.25.9 to 3.25.10. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/530d4feaa9c62aaab2d250371e2061eb7a172363...23acc5c183826b7a8a97bce3cecc52db901f8251) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 960bd15da..ed6ab405e 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@530d4feaa9c62aaab2d250371e2061eb7a172363 + uses: github/codeql-action/init@23acc5c183826b7a8a97bce3cecc52db901f8251 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@530d4feaa9c62aaab2d250371e2061eb7a172363 + uses: github/codeql-action/autobuild@23acc5c183826b7a8a97bce3cecc52db901f8251 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@530d4feaa9c62aaab2d250371e2061eb7a172363 + uses: github/codeql-action/analyze@23acc5c183826b7a8a97bce3cecc52db901f8251 From 13f4f7eb4a6b8f3948cc013edaaef192959f4c74 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Jul 2024 08:47:42 -0400 Subject: [PATCH 187/456] Bump github/codeql-action from 3.25.10 to 3.25.11 (#1593) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.25.10 to 3.25.11. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/23acc5c183826b7a8a97bce3cecc52db901f8251...b611370bb5703a7efb587f9d136a52ea24c5c38c) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index ed6ab405e..b7dc28e17 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@23acc5c183826b7a8a97bce3cecc52db901f8251 + uses: github/codeql-action/init@b611370bb5703a7efb587f9d136a52ea24c5c38c with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@23acc5c183826b7a8a97bce3cecc52db901f8251 + uses: github/codeql-action/autobuild@b611370bb5703a7efb587f9d136a52ea24c5c38c # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@23acc5c183826b7a8a97bce3cecc52db901f8251 + uses: github/codeql-action/analyze@b611370bb5703a7efb587f9d136a52ea24c5c38c From 33cb41bc394a7dccbf553a28238fac6179080a02 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 5 Jul 2024 08:18:22 -0400 Subject: [PATCH 188/456] Bump golang.org/x/sys from 0.21.0 to 0.22.0 (#1595) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.21.0 to 0.22.0. - [Commits](https://github.com/golang/sys/compare/v0.21.0...v0.22.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index c03e269ab..0f50500cf 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.9.0 - golang.org/x/sys v0.21.0 + golang.org/x/sys v0.22.0 golang.org/x/term v0.21.0 golang.org/x/text v0.16.0 ) diff --git a/go.sum b/go.sum index a2314b8b9..e19ef7b0f 100644 --- a/go.sum +++ b/go.sum @@ -40,8 +40,8 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= -golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= +golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= From ca6c09c6cf77898b1b2c87d8b46a8d6d6b6e5555 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 5 Jul 2024 09:42:27 -0400 Subject: [PATCH 189/456] Bump golang.org/x/term from 0.21.0 to 0.22.0 (#1594) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.21.0 to 0.22.0. - [Commits](https://github.com/golang/term/compare/v0.21.0...v0.22.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 0f50500cf..b484e27e6 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.9.0 golang.org/x/sys v0.22.0 - golang.org/x/term v0.21.0 + golang.org/x/term v0.22.0 golang.org/x/text v0.16.0 ) diff --git a/go.sum b/go.sum index e19ef7b0f..97c1eaf1e 100644 --- a/go.sum +++ b/go.sum @@ -42,8 +42,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= -golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= +golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= +golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From 1029c960e05f749a4db330a57315d0d1963e9614 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Jul 2024 09:00:57 -0400 Subject: [PATCH 190/456] Bump actions/upload-artifact from 4.3.3 to 4.3.4 (#1596) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.3 to 4.3.4. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/65462800fd760344b1a7b4382951275a0abb4808...0b2256b8c012f0828dc542b3febcab082c67f72b) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 84f82d3d7..a830b29d9 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -41,7 +41,7 @@ jobs: if: matrix.os == 'windows-latest' run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}} - - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 + - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b with: name: mlr-${{matrix.os}} path: bin/${{matrix.os}}/* From 003c7aa44f98f4b060e8b9d955696d220385779c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Jul 2024 09:58:36 -0400 Subject: [PATCH 191/456] Bump actions/setup-go from 5.0.1 to 5.0.2 (#1597) Bumps [actions/setup-go](https://github.com/actions/setup-go) from 5.0.1 to 5.0.2. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/cdcb36043654635271a94b9a6d1392de5bb323a7...0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32) --- updated-dependencies: - dependency-name: actions/setup-go dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index a830b29d9..ff1b0e7a0 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -18,7 +18,7 @@ jobs: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 - name: Set up Go - uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 + uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 with: go-version: 1.19 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 92afb3b12..d824fb219 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,7 +17,7 @@ jobs: runs-on: ${{ matrix.platform }} steps: - name: Set up Go - uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 + uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 with: go-version: ${{ env.GO_VERSION }} id: go From 44c5594310e37bcad796498044747be48db7fac7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Jul 2024 09:13:41 -0400 Subject: [PATCH 192/456] Bump github/codeql-action from 3.25.11 to 3.25.12 (#1598) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.25.11 to 3.25.12. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/b611370bb5703a7efb587f9d136a52ea24c5c38c...4fa2a7953630fd2f3fb380f21be14ede0169dd4f) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index b7dc28e17..b35118e4e 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@b611370bb5703a7efb587f9d136a52ea24c5c38c + uses: github/codeql-action/init@4fa2a7953630fd2f3fb380f21be14ede0169dd4f with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@b611370bb5703a7efb587f9d136a52ea24c5c38c + uses: github/codeql-action/autobuild@4fa2a7953630fd2f3fb380f21be14ede0169dd4f # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@b611370bb5703a7efb587f9d136a52ea24c5c38c + uses: github/codeql-action/analyze@4fa2a7953630fd2f3fb380f21be14ede0169dd4f From c8c4759bb2987c380134ddd16e3e6ba6af4502d5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Jul 2024 08:38:09 -0400 Subject: [PATCH 193/456] Bump github/codeql-action from 3.25.12 to 3.25.13 (#1602) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.25.12 to 3.25.13. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/4fa2a7953630fd2f3fb380f21be14ede0169dd4f...2d790406f505036ef40ecba973cc774a50395aac) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index b35118e4e..4cad617c0 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@4fa2a7953630fd2f3fb380f21be14ede0169dd4f + uses: github/codeql-action/init@2d790406f505036ef40ecba973cc774a50395aac with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@4fa2a7953630fd2f3fb380f21be14ede0169dd4f + uses: github/codeql-action/autobuild@2d790406f505036ef40ecba973cc774a50395aac # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@4fa2a7953630fd2f3fb380f21be14ede0169dd4f + uses: github/codeql-action/analyze@2d790406f505036ef40ecba973cc774a50395aac From 9bac6b441314f0d063e9f257e0dddd94a976d913 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 25 Jul 2024 08:08:28 -0400 Subject: [PATCH 194/456] Bump github/codeql-action from 3.25.13 to 3.25.14 (#1603) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.25.13 to 3.25.14. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/2d790406f505036ef40ecba973cc774a50395aac...5cf07d8b700b67e235fbb65cbc84f69c0cf10464) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 4cad617c0..80cde6922 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@2d790406f505036ef40ecba973cc774a50395aac + uses: github/codeql-action/init@5cf07d8b700b67e235fbb65cbc84f69c0cf10464 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@2d790406f505036ef40ecba973cc774a50395aac + uses: github/codeql-action/autobuild@5cf07d8b700b67e235fbb65cbc84f69c0cf10464 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@2d790406f505036ef40ecba973cc774a50395aac + uses: github/codeql-action/analyze@5cf07d8b700b67e235fbb65cbc84f69c0cf10464 From 627e7bc510f9a9f80f85c2f377b11ac5aed088bb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 Jul 2024 09:29:27 -0400 Subject: [PATCH 195/456] Bump github/codeql-action from 3.25.14 to 3.25.15 (#1604) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.25.14 to 3.25.15. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/5cf07d8b700b67e235fbb65cbc84f69c0cf10464...afb54ba388a7dca6ecae48f608c4ff05ff4cc77a) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 80cde6922..5f69b0925 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@5cf07d8b700b67e235fbb65cbc84f69c0cf10464 + uses: github/codeql-action/init@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@5cf07d8b700b67e235fbb65cbc84f69c0cf10464 + uses: github/codeql-action/autobuild@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@5cf07d8b700b67e235fbb65cbc84f69c0cf10464 + uses: github/codeql-action/analyze@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a From 018e3aa039efb0318ab3f669fc47dadfc466ea16 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Aug 2024 08:50:01 -0400 Subject: [PATCH 196/456] Bump golang.org/x/sys from 0.22.0 to 0.23.0 (#1605) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.22.0 to 0.23.0. - [Commits](https://github.com/golang/sys/compare/v0.22.0...v0.23.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index b484e27e6..aece52c7e 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.9.0 - golang.org/x/sys v0.22.0 + golang.org/x/sys v0.23.0 golang.org/x/term v0.22.0 golang.org/x/text v0.16.0 ) diff --git a/go.sum b/go.sum index 97c1eaf1e..99c0859d8 100644 --- a/go.sum +++ b/go.sum @@ -40,8 +40,8 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= +golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= From 93574580f90e88bbb0162f06e25c7bf6a28a208d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Aug 2024 08:50:49 -0400 Subject: [PATCH 197/456] Bump actions/upload-artifact from 4.3.4 to 4.3.5 (#1606) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.4 to 4.3.5. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/0b2256b8c012f0828dc542b3febcab082c67f72b...89ef406dd8d7e03cfd12d9e0a4a378f454709029) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index ff1b0e7a0..3980d5894 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -41,7 +41,7 @@ jobs: if: matrix.os == 'windows-latest' run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}} - - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b + - uses: actions/upload-artifact@89ef406dd8d7e03cfd12d9e0a4a378f454709029 with: name: mlr-${{matrix.os}} path: bin/${{matrix.os}}/* From 247a86c998d3fd65a06fdc81d510a19fe5ffeb01 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 7 Aug 2024 08:05:39 -0400 Subject: [PATCH 198/456] Bump golang.org/x/term from 0.22.0 to 0.23.0 (#1612) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.22.0 to 0.23.0. - [Commits](https://github.com/golang/term/compare/v0.22.0...v0.23.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index aece52c7e..e6877e9e2 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.9.0 golang.org/x/sys v0.23.0 - golang.org/x/term v0.22.0 + golang.org/x/term v0.23.0 golang.org/x/text v0.16.0 ) diff --git a/go.sum b/go.sum index 99c0859d8..777ea8eae 100644 --- a/go.sum +++ b/go.sum @@ -42,8 +42,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= -golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= +golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= +golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From afca7388f737e49d88882c40e1f7a2c07fcc4f36 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 7 Aug 2024 08:06:10 -0400 Subject: [PATCH 199/456] Bump actions/upload-artifact from 4.3.5 to 4.3.6 (#1609) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.5 to 4.3.6. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/89ef406dd8d7e03cfd12d9e0a4a378f454709029...834a144ee995460fba8ed112a2fc961b36a5ec5a) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 3980d5894..22ead6832 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -41,7 +41,7 @@ jobs: if: matrix.os == 'windows-latest' run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}} - - uses: actions/upload-artifact@89ef406dd8d7e03cfd12d9e0a4a378f454709029 + - uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a with: name: mlr-${{matrix.os}} path: bin/${{matrix.os}}/* From 0e2ed5fbefb01a4d03137f12a2c9bc21d04ee32b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 7 Aug 2024 08:07:32 -0400 Subject: [PATCH 200/456] Bump github/codeql-action from 3.25.15 to 3.26.0 (#1610) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.25.15 to 3.26.0. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/afb54ba388a7dca6ecae48f608c4ff05ff4cc77a...eb055d739abdc2e8de2e5f4ba1a8b246daa779aa) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 5f69b0925..c9213198b 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a + uses: github/codeql-action/init@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a + uses: github/codeql-action/autobuild@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a + uses: github/codeql-action/analyze@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa From cd91ab0a27977d7f65fe0ee53b836149bcb63d1a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 7 Aug 2024 08:55:21 -0400 Subject: [PATCH 201/456] Bump golang.org/x/text from 0.16.0 to 0.17.0 (#1611) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.16.0 to 0.17.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.16.0...v0.17.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index e6877e9e2..1239da41c 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/stretchr/testify v1.9.0 golang.org/x/sys v0.23.0 golang.org/x/term v0.23.0 - golang.org/x/text v0.16.0 + golang.org/x/text v0.17.0 ) require ( diff --git a/go.sum b/go.sum index 777ea8eae..40c30869d 100644 --- a/go.sum +++ b/go.sum @@ -44,8 +44,8 @@ golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= -golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= -golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= +golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From dfe1ca1164ddb32487d46cc0b354dac59ad53685 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 9 Aug 2024 08:58:49 -0400 Subject: [PATCH 202/456] Bump golang.org/x/sys from 0.23.0 to 0.24.0 (#1613) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.23.0 to 0.24.0. - [Commits](https://github.com/golang/sys/compare/v0.23.0...v0.24.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 1239da41c..bd23cceca 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.9.0 - golang.org/x/sys v0.23.0 + golang.org/x/sys v0.24.0 golang.org/x/term v0.23.0 golang.org/x/text v0.17.0 ) diff --git a/go.sum b/go.sum index 40c30869d..511e6a039 100644 --- a/go.sum +++ b/go.sum @@ -40,8 +40,8 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= -golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= +golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= From 3966a6a0a18cfe9d0437e33618a103ae940df8e8 Mon Sep 17 00:00:00 2001 From: Eng Zer Jun Date: Sat, 10 Aug 2024 01:09:53 +0800 Subject: [PATCH 203/456] lib/regex: use string version of regexp methods to reduce allocs (#1614) Both `(*Regexp).Match` and `(*Regexp).FindAllSubmatchIndex` have string-based equivalents: `(*Regexp).MatchString` and `(*Regexp).FindAllStringSubmatchIndex`. We should use the string version to avoid unnecessary `[]byte` conversions. Benchmark: var regex = regexp.MustCompile("foo.*") func BenchmarkMatch(b *testing.B) { for i := 0; i < b.N; i++ { if match := regex.Match([]byte("foo bar baz")); !match { b.Fail() } } } func BenchmarkMatchString(b *testing.B) { for i := 0; i < b.N; i++ { if match := regex.MatchString("foo bar baz"); !match { b.Fail() } } } func BenchmarkFindAllSubmatchIndex(b *testing.B) { for i := 0; i < b.N; i++ { if match := regex.FindAllSubmatchIndex([]byte("foo bar baz"), -1); len(match) == 0 { b.Fail() } } } func BenchmarkFindAllStringSubmatchIndex(b *testing.B) { for i := 0; i < b.N; i++ { if match := regex.FindAllStringSubmatchIndex("foo bar baz", -1); len(match) == 0 { b.Fail() } } } goos: linux goarch: amd64 pkg: github.com/johnkerl/miller/pkg/lib cpu: AMD Ryzen 7 PRO 4750U with Radeon Graphics BenchmarkMatch-16 2198350 517.5 ns/op 16 B/op 1 allocs/op BenchmarkMatchString-16 3143605 371.5 ns/op 0 B/op 0 allocs/op BenchmarkFindAllSubmatchIndex-16 921711 1199 ns/op 273 B/op 3 allocs/op BenchmarkFindAllStringSubmatchIndex-16 1212321 981.0 ns/op 257 B/op 2 allocs/op PASS coverage: 0.0% of statements ok github.com/johnkerl/miller/pkg/lib 6.576s Signed-off-by: Eng Zer Jun --- pkg/lib/regex.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/lib/regex.go b/pkg/lib/regex.go index af0a18805..d8d551167 100644 --- a/pkg/lib/regex.go +++ b/pkg/lib/regex.go @@ -202,7 +202,7 @@ func regexCompiledSubOrGsub( replacementCaptureMatrix [][]int, breakOnFirst bool, ) string { - matrix := regex.FindAllSubmatchIndex([]byte(input), -1) + matrix := regex.FindAllStringSubmatchIndex(input, -1) if matrix == nil || len(matrix) == 0 { return input } @@ -290,7 +290,7 @@ func RegexCompiledMatchSimple( input string, regex *regexp.Regexp, ) bool { - return regex.Match([]byte(input)) + return regex.MatchString(input) } // RegexStringMatchWithMapResults implements much of the `strmatchx` DSL function. This returns @@ -320,7 +320,7 @@ func RegexCompiledMatchWithMapResults( starts := make([]int, 0, 10) ends := make([]int, 0, 10) - matrix := regex.FindAllSubmatchIndex([]byte(input), -1) + matrix := regex.FindAllStringSubmatchIndex(input, -1) if matrix == nil || len(matrix) == 0 { return false, captures, starts, ends } @@ -406,7 +406,7 @@ func RegexCompiledMatchWithCaptures( input string, regex *regexp.Regexp, ) (bool, []string) { - matrix := regex.FindAllSubmatchIndex([]byte(input), -1) + matrix := regex.FindAllStringSubmatchIndex(input, -1) if matrix == nil || len(matrix) == 0 { // Set all captures to "" return false, make([]string, 10) @@ -474,7 +474,7 @@ func ReplacementHasCaptures( matrix [][]int, ) { if captureDetector.MatchString(replacement) { - return true, captureSplitter.FindAllSubmatchIndex([]byte(replacement), -1) + return true, captureSplitter.FindAllStringSubmatchIndex(replacement, -1) } else { return false, nil } From 753464d0f68fdeb7daa7e63055ec73d9f853b0be Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 14 Aug 2024 08:03:57 -0400 Subject: [PATCH 204/456] Bump github/codeql-action from 3.26.0 to 3.26.1 (#1615) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.0 to 3.26.1. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/eb055d739abdc2e8de2e5f4ba1a8b246daa779aa...29d86d22a34ea372b1bbf3b2dced2e25ca6b3384) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index c9213198b..0038e56fd 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa + uses: github/codeql-action/init@29d86d22a34ea372b1bbf3b2dced2e25ca6b3384 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa + uses: github/codeql-action/autobuild@29d86d22a34ea372b1bbf3b2dced2e25ca6b3384 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa + uses: github/codeql-action/analyze@29d86d22a34ea372b1bbf3b2dced2e25ca6b3384 From 7a2fa0bf07d27a44b39e4744eace6203f0636fad Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 15 Aug 2024 09:03:54 -0400 Subject: [PATCH 205/456] Bump github/codeql-action from 3.26.1 to 3.26.2 (#1617) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.1 to 3.26.2. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/29d86d22a34ea372b1bbf3b2dced2e25ca6b3384...429e1977040da7a23b6822b13c129cd1ba93dbb2) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 0038e56fd..292042d80 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@29d86d22a34ea372b1bbf3b2dced2e25ca6b3384 + uses: github/codeql-action/init@429e1977040da7a23b6822b13c129cd1ba93dbb2 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@29d86d22a34ea372b1bbf3b2dced2e25ca6b3384 + uses: github/codeql-action/autobuild@429e1977040da7a23b6822b13c129cd1ba93dbb2 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@29d86d22a34ea372b1bbf3b2dced2e25ca6b3384 + uses: github/codeql-action/analyze@429e1977040da7a23b6822b13c129cd1ba93dbb2 From 6bee4ebbf25fddbd502b34b9c26d0ad4110a5b1f Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 16 Aug 2024 10:25:25 -0400 Subject: [PATCH 206/456] RS aliases for ASCII top-of-table control characters are misnamed (#1620) * Fix misnames of ASCII control-character aliases * artifacts from `make dev` --- docs/src/data-diving-examples.md | 46 +++++++++++++-------------- docs/src/manpage.md | 10 +++--- docs/src/manpage.txt | 10 +++--- docs/src/reference-main-flag-list.md | 8 ++--- docs/src/reference-main-separators.md | 8 ++--- docs/src/reference-verbs.md | 38 +++++++++++----------- docs/src/two-pass-algorithms.md | 4 +-- man/manpage.txt | 10 +++--- man/mlr.1 | 12 +++---- pkg/cli/flatten_unflatten.go | 4 +-- pkg/cli/separators.go | 8 ++--- 11 files changed, 79 insertions(+), 79 deletions(-) diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 100716ec2..39738f193 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -160,11 +160,11 @@ CITRUS COUNTY 1332.9 79974.9 483785.1 stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
-tiv_2011_tiv_2012_corr  0.9730497632351701
-tiv_2011_tiv_2012_ols_m 0.9835583980337732
-tiv_2011_tiv_2012_ols_b 433854.6428968301
+tiv_2011_tiv_2012_corr  0.9730497632351692
+tiv_2011_tiv_2012_ols_m 0.9835583980337723
+tiv_2011_tiv_2012_ols_b 433854.6428968317
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320204
+tiv_2011_tiv_2012_r2    0.9468258417320189
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199852926
+0.1334180491027861 -0.011319841199866178
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887242  -0.018565536587084836
-orange   square   0.17685855992752933   -0.07104431573805543
- green   circle   0.05764419437577257   0.011795729888018455
-   red   square    0.0557447712489348 -0.0006801456507506415
-yellow triangle    0.0445727377196281   0.024604310103079844
-yellow   square    0.0437917292729612  -0.044621972016306265
-purple   circle   0.03587354936895115    0.13411339541407613
-  blue   square   0.03241153095761152   -0.05350764811965621
-  blue triangle  0.015356427073158612 -0.0006089997461408209
-orange   circle  0.010518953877704181    -0.1627939732927932
-   red triangle   0.00809782571528054    0.01248662135795501
-purple triangle  0.005155190909099739   -0.04505790925621933
-purple   square  -0.02568027696337717   0.057694296479293694
- green   square -0.025776073450284875 -0.0032651732520739014
-orange triangle -0.030456661186085584   -0.13186999819263814
-yellow   circle  -0.06477331572781515     0.0736944981970553
-  blue   circle   -0.1023476190192966  -0.030528539069839333
- green triangle  -0.10901825107358747   -0.04848782060162855
+   red   circle    0.9807984401887236   -0.01856553658708754
+orange   square   0.17685855992752927   -0.07104431573806054
+ green   circle   0.05764419437577255    0.01179572988801509
+   red   square   0.05574477124893523 -0.0006801456507510942
+yellow triangle   0.04457273771962798   0.024604310103081825
+yellow   square   0.04379172927296089   -0.04462197201631237
+purple   circle   0.03587354936895086     0.1341133954140899
+  blue   square   0.03241153095761164  -0.053507648119643196
+  blue triangle  0.015356427073158766 -0.0006089997461435399
+orange   circle  0.010518953877704048   -0.16279397329279383
+   red triangle   0.00809782571528034   0.012486621357942596
+purple triangle  0.005155190909099334  -0.045057909256220656
+purple   square -0.025680276963377404    0.05769429647930396
+ green   square   -0.0257760734502851  -0.003265173252087127
+orange triangle -0.030456661186085785    -0.1318699981926352
+yellow   circle  -0.06477331572781474    0.07369449819706045
+  blue   circle  -0.10234761901929677  -0.030528539069837757
+ green triangle  -0.10901825107358765   -0.04848782060162929
 
diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 12661b15e..100db8483 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -768,13 +768,13 @@ This is simply a copy of what you should see on running `man mlr` at a command p - To avoid backslashing, you can use any of the following names: ascii_esc = "\x1b" - ascii_etx = "\x04" + ascii_etx = "\x03" ascii_fs = "\x1c" ascii_gs = "\x1d" - ascii_null = "\x01" + ascii_null = "\x00" ascii_rs = "\x1e" - ascii_soh = "\x02" - ascii_stx = "\x03" + ascii_soh = "\x01" + ascii_stx = "\x02" ascii_us = "\x1f" asv_fs = "\x1f" asv_rs = "\x1e" @@ -3731,5 +3731,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-06-09 4mMILLER24m(1) + 2024-08-16 4mMILLER24m(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 93be575b4..d0ba0cbb9 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -747,13 +747,13 @@ - To avoid backslashing, you can use any of the following names: ascii_esc = "\x1b" - ascii_etx = "\x04" + ascii_etx = "\x03" ascii_fs = "\x1c" ascii_gs = "\x1d" - ascii_null = "\x01" + ascii_null = "\x00" ascii_rs = "\x1e" - ascii_soh = "\x02" - ascii_stx = "\x03" + ascii_soh = "\x01" + ascii_stx = "\x02" ascii_us = "\x1f" asv_fs = "\x1f" asv_rs = "\x1e" @@ -3710,4 +3710,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-06-09 4mMILLER24m(1) + 2024-08-16 4mMILLER24m(1) diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index e9a47af63..fdea7b253 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -438,13 +438,13 @@ Notes about all other separators: - To avoid backslashing, you can use any of the following names: ascii_esc = "\x1b" - ascii_etx = "\x04" + ascii_etx = "\x03" ascii_fs = "\x1c" ascii_gs = "\x1d" - ascii_null = "\x01" + ascii_null = "\x00" ascii_rs = "\x1e" - ascii_soh = "\x02" - ascii_stx = "\x03" + ascii_soh = "\x01" + ascii_stx = "\x02" ascii_us = "\x1f" asv_fs = "\x1f" asv_rs = "\x1e" diff --git a/docs/src/reference-main-separators.md b/docs/src/reference-main-separators.md index c13241e65..8ed7612a7 100644 --- a/docs/src/reference-main-separators.md +++ b/docs/src/reference-main-separators.md @@ -187,13 +187,13 @@ Many things we'd like to write as separators need to be escaped from the shell
 ascii_esc  = "\x1b"
-ascii_etx  = "\x04"
+ascii_etx  = "\x03"
 ascii_fs   = "\x1c"
 ascii_gs   = "\x1d"
-ascii_null = "\x01"
+ascii_null = "\x00"
 ascii_rs   = "\x1e"
-ascii_soh  = "\x02"
-ascii_stx  = "\x03"
+ascii_soh  = "\x01"
+ascii_stx  = "\x02"
 ascii_us   = "\x1f"
 asv_fs     = "\x1f"
 asv_rs     = "\x1e"
diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md
index 8105820c5..991ecee80 100644
--- a/docs/src/reference-verbs.md
+++ b/docs/src/reference-verbs.md
@@ -3435,14 +3435,14 @@ fields, optionally categorized by one or more fields.
   data/medium
 
-x_y_cov    0.00004257482082749404
-x_y_corr   0.0005042001844473328
-y_y_cov    0.08461122467974005
+x_y_cov    0.000042574820827444476
+x_y_corr   0.0005042001844467462
+y_y_cov    0.08461122467974003
 y_y_corr   1
-x2_xy_cov  0.041883822817793716
-x2_xy_corr 0.6301743420379936
-x2_y2_cov  -0.0003095372596253918
-x2_y2_corr -0.003424908876111875
+x2_xy_cov  0.04188382281779374
+x2_xy_corr 0.630174342037994
+x2_y2_cov  -0.00030953725962542085
+x2_y2_corr -0.0034249088761121966
 
@@ -3451,12 +3451,12 @@ x2_y2_corr -0.003424908876111875
   data/medium
 
-a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
-eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
-wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
-zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
-hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
+a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
+eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
+wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
+zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
+hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
 
Here's an example simple line-fit. The `x` and `y` @@ -3542,11 +3542,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.030097447953594 -upsec_count_pca_b 979.9883413064917 +upsec_count_pca_m -39.03009744795354 +upsec_count_pca_b 979.9883413064914 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.108529196302943 +donesec 25.10852919630297 ## step @@ -3824,9 +3824,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 -var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 -skewness - - 0 -0.0006899591185517494 -0.01784976012013298 +stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 +var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 +skewness - - 0 -0.0006899591185521965 -0.017849760120133784 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index e475aebf3..146f3a81e 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144457 -x_stddev 0.29029251511440074 +x_var 0.08426974433144456 +x_stddev 0.2902925151144007
diff --git a/man/manpage.txt b/man/manpage.txt
index 93be575b4..d0ba0cbb9 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -747,13 +747,13 @@
          - To avoid backslashing, you can use any of the following names:
 
                  ascii_esc  = "\x1b"
-                 ascii_etx  = "\x04"
+                 ascii_etx  = "\x03"
                  ascii_fs   = "\x1c"
                  ascii_gs   = "\x1d"
-                 ascii_null = "\x01"
+                 ascii_null = "\x00"
                  ascii_rs   = "\x1e"
-                 ascii_soh  = "\x02"
-                 ascii_stx  = "\x03"
+                 ascii_soh  = "\x01"
+                 ascii_stx  = "\x02"
                  ascii_us   = "\x1f"
                  asv_fs     = "\x1f"
                  asv_rs     = "\x1e"
@@ -3710,4 +3710,4 @@
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-                                  2024-06-09                         4mMILLER24m(1)
+                                  2024-08-16                         4mMILLER24m(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 822d44f06..ed06ca5af 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2024-06-09
+.\"      Date: 2024-08-16
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2024-06-09" "\ \&" "\ \&"
+.TH "MILLER" "1" "2024-08-16" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -900,13 +900,13 @@ Notes about all other separators:
   - To avoid backslashing, you can use any of the following names:
 
           ascii_esc  = "\ex1b"
-          ascii_etx  = "\ex04"
+          ascii_etx  = "\ex03"
           ascii_fs   = "\ex1c"
           ascii_gs   = "\ex1d"
-          ascii_null = "\ex01"
+          ascii_null = "\ex00"
           ascii_rs   = "\ex1e"
-          ascii_soh  = "\ex02"
-          ascii_stx  = "\ex03"
+          ascii_soh  = "\ex01"
+          ascii_stx  = "\ex02"
           ascii_us   = "\ex1f"
           asv_fs     = "\ex1f"
           asv_rs     = "\ex1e"
diff --git a/pkg/cli/flatten_unflatten.go b/pkg/cli/flatten_unflatten.go
index 8744b10b8..d91602b43 100644
--- a/pkg/cli/flatten_unflatten.go
+++ b/pkg/cli/flatten_unflatten.go
@@ -55,7 +55,7 @@ package cli
 //
 // * Overrriding these: if the last verb the user has explicitly provided is
 //   flatten, don't undo that by putting an unflatten right after.
-// 
+//
 // ================================================================
 
 func DecideFinalFlatten(writerOptions *TWriterOptions) bool {
@@ -70,7 +70,7 @@ func DecideFinalFlatten(writerOptions *TWriterOptions) bool {
 
 func DecideFinalUnflatten(
 	options *TOptions,
-    verbSequences [][]string,
+	verbSequences [][]string,
 ) bool {
 
 	numVerbs := len(verbSequences)
diff --git a/pkg/cli/separators.go b/pkg/cli/separators.go
index 0a5278f64..26c976497 100644
--- a/pkg/cli/separators.go
+++ b/pkg/cli/separators.go
@@ -21,13 +21,13 @@ const TABS_REGEX = "(\\t)+"
 const WHITESPACE_REGEX = "([ \\t])+"
 
 const ASCII_ESC = "\\x1b"
-const ASCII_ETX = "\\x04"
+const ASCII_ETX = "\\x03"
 const ASCII_FS = "\\x1c"
 const ASCII_GS = "\\x1d"
-const ASCII_NULL = "\\x01"
+const ASCII_NULL = "\\x00"
 const ASCII_RS = "\\x1e"
-const ASCII_SOH = "\\x02"
-const ASCII_STX = "\\x03"
+const ASCII_SOH = "\\x01"
+const ASCII_STX = "\\x02"
 const ASCII_US = "\\x1f"
 
 const ASV_FS = "\\x1f"

From bdd26736a5f332a24ac4fb31bc680d0ed55bfd77 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Aug 2024 07:42:56 -0400
Subject: [PATCH 207/456] Bump codespell-project/actions-codespell from 2.0 to
 2.1 (#1622)

Bumps [codespell-project/actions-codespell](https://github.com/codespell-project/actions-codespell) from 2.0 to 2.1.
- [Release notes](https://github.com/codespell-project/actions-codespell/releases)
- [Commits](https://github.com/codespell-project/actions-codespell/compare/94259cd8be02ad2903ba34a22d9c13de21a74461...406322ec52dd7b488e48c1c4b82e2a8b3a1bf630)

---
updated-dependencies:
- dependency-name: codespell-project/actions-codespell
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codespell.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
index b10af575e..18cff3361 100644
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -29,7 +29,7 @@ jobs:
       # Run linter against code base
       # https://github.com/codespell-project/codespell
       - name: Codespell
-        uses: codespell-project/actions-codespell@94259cd8be02ad2903ba34a22d9c13de21a74461
+        uses: codespell-project/actions-codespell@406322ec52dd7b488e48c1c4b82e2a8b3a1bf630
         with:
           check_filenames: true
           ignore_words_file: .codespellignore

From f5010f4605b7b9b93fac41b215fc88f43a7b01a8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 09:12:59 -0400
Subject: [PATCH 208/456] Bump github/codeql-action from 3.26.2 to 3.26.3
 (#1623)

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.2 to 3.26.3.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/429e1977040da7a23b6822b13c129cd1ba93dbb2...883d8588e56d1753a8a58c1c86e88976f0c23449)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 292042d80..df53169a9 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -40,7 +40,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@429e1977040da7a23b6822b13c129cd1ba93dbb2
+      uses: github/codeql-action/init@883d8588e56d1753a8a58c1c86e88976f0c23449
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -51,7 +51,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@429e1977040da7a23b6822b13c129cd1ba93dbb2
+      uses: github/codeql-action/autobuild@883d8588e56d1753a8a58c1c86e88976f0c23449
 
     # โ„น๏ธ Command-line programs to run using the OS shell.
     # ๐Ÿ“š https://git.io/JvXDl
@@ -65,4 +65,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@429e1977040da7a23b6822b13c129cd1ba93dbb2
+      uses: github/codeql-action/analyze@883d8588e56d1753a8a58c1c86e88976f0c23449

From 60bdd6c9222198311e780e338fd7762c8a95c50a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 22 Aug 2024 09:03:18 -0400
Subject: [PATCH 209/456] Bump github/codeql-action from 3.26.3 to 3.26.4
 (#1624)

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.3 to 3.26.4.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/883d8588e56d1753a8a58c1c86e88976f0c23449...f0f3afee809481da311ca3a6ff1ff51d81dbeb24)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index df53169a9..731b7e129 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -40,7 +40,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@883d8588e56d1753a8a58c1c86e88976f0c23449
+      uses: github/codeql-action/init@f0f3afee809481da311ca3a6ff1ff51d81dbeb24
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -51,7 +51,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@883d8588e56d1753a8a58c1c86e88976f0c23449
+      uses: github/codeql-action/autobuild@f0f3afee809481da311ca3a6ff1ff51d81dbeb24
 
     # โ„น๏ธ Command-line programs to run using the OS shell.
     # ๐Ÿ“š https://git.io/JvXDl
@@ -65,4 +65,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@883d8588e56d1753a8a58c1c86e88976f0c23449
+      uses: github/codeql-action/analyze@f0f3afee809481da311ca3a6ff1ff51d81dbeb24

From 16a898cff412cc7c93947b416bdea90089e8ab38 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sun, 25 Aug 2024 15:00:51 -0400
Subject: [PATCH 210/456] Fix binary data in JSON output (#1626)

---
 docs/src/data-diving-examples.md  | 46 +++++++++++++++----------------
 docs/src/manpage.md               |  2 +-
 docs/src/manpage.txt              |  2 +-
 docs/src/reference-verbs.md       | 38 ++++++++++++-------------
 docs/src/two-pass-algorithms.md   |  4 +--
 man/manpage.txt                   |  2 +-
 man/mlr.1                         |  4 +--
 pkg/mlrval/mlrval_json.go         | 24 ++++++++++++----
 test/cases/io-json-io/0036/cmd    |  1 +
 test/cases/io-json-io/0036/experr |  0
 test/cases/io-json-io/0036/expout |  2 ++
 test/cases/io-json-io/0037/cmd    |  1 +
 test/cases/io-json-io/0037/experr |  0
 test/cases/io-json-io/0037/expout |  5 ++++
 test/input/binary.json            |  5 ++++
 15 files changed, 81 insertions(+), 55 deletions(-)
 create mode 100644 test/cases/io-json-io/0036/cmd
 create mode 100644 test/cases/io-json-io/0036/experr
 create mode 100644 test/cases/io-json-io/0036/expout
 create mode 100644 test/cases/io-json-io/0037/cmd
 create mode 100644 test/cases/io-json-io/0037/experr
 create mode 100644 test/cases/io-json-io/0037/expout
 create mode 100644 test/input/binary.json

diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md
index 39738f193..100716ec2 100644
--- a/docs/src/data-diving-examples.md
+++ b/docs/src/data-diving-examples.md
@@ -160,11 +160,11 @@ CITRUS COUNTY       1332.9                 79974.9                483785.1
   stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
 
-tiv_2011_tiv_2012_corr  0.9730497632351692
-tiv_2011_tiv_2012_ols_m 0.9835583980337723
-tiv_2011_tiv_2012_ols_b 433854.6428968317
+tiv_2011_tiv_2012_corr  0.9730497632351701
+tiv_2011_tiv_2012_ols_m 0.9835583980337732
+tiv_2011_tiv_2012_ols_b 433854.6428968301
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320189
+tiv_2011_tiv_2012_r2    0.9468258417320204
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199866178
+0.1334180491027861 -0.011319841199852926
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887236   -0.01856553658708754
-orange   square   0.17685855992752927   -0.07104431573806054
- green   circle   0.05764419437577255    0.01179572988801509
-   red   square   0.05574477124893523 -0.0006801456507510942
-yellow triangle   0.04457273771962798   0.024604310103081825
-yellow   square   0.04379172927296089   -0.04462197201631237
-purple   circle   0.03587354936895086     0.1341133954140899
-  blue   square   0.03241153095761164  -0.053507648119643196
-  blue triangle  0.015356427073158766 -0.0006089997461435399
-orange   circle  0.010518953877704048   -0.16279397329279383
-   red triangle   0.00809782571528034   0.012486621357942596
-purple triangle  0.005155190909099334  -0.045057909256220656
-purple   square -0.025680276963377404    0.05769429647930396
- green   square   -0.0257760734502851  -0.003265173252087127
-orange triangle -0.030456661186085785    -0.1318699981926352
-yellow   circle  -0.06477331572781474    0.07369449819706045
-  blue   circle  -0.10234761901929677  -0.030528539069837757
- green triangle  -0.10901825107358765   -0.04848782060162929
+   red   circle    0.9807984401887242  -0.018565536587084836
+orange   square   0.17685855992752933   -0.07104431573805543
+ green   circle   0.05764419437577257   0.011795729888018455
+   red   square    0.0557447712489348 -0.0006801456507506415
+yellow triangle    0.0445727377196281   0.024604310103079844
+yellow   square    0.0437917292729612  -0.044621972016306265
+purple   circle   0.03587354936895115    0.13411339541407613
+  blue   square   0.03241153095761152   -0.05350764811965621
+  blue triangle  0.015356427073158612 -0.0006089997461408209
+orange   circle  0.010518953877704181    -0.1627939732927932
+   red triangle   0.00809782571528054    0.01248662135795501
+purple triangle  0.005155190909099739   -0.04505790925621933
+purple   square  -0.02568027696337717   0.057694296479293694
+ green   square -0.025776073450284875 -0.0032651732520739014
+orange triangle -0.030456661186085584   -0.13186999819263814
+yellow   circle  -0.06477331572781515     0.0736944981970553
+  blue   circle   -0.1023476190192966  -0.030528539069839333
+ green triangle  -0.10901825107358747   -0.04848782060162855
 
diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 100db8483..04e0ce349 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -3731,5 +3731,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-08-16 4mMILLER24m(1) + 2024-08-25 4mMILLER24m(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index d0ba0cbb9..1f8f35e95 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -3710,4 +3710,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-08-16 4mMILLER24m(1) + 2024-08-25 4mMILLER24m(1) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 991ecee80..8105820c5 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -3435,14 +3435,14 @@ fields, optionally categorized by one or more fields. data/medium
-x_y_cov    0.000042574820827444476
-x_y_corr   0.0005042001844467462
-y_y_cov    0.08461122467974003
+x_y_cov    0.00004257482082749404
+x_y_corr   0.0005042001844473328
+y_y_cov    0.08461122467974005
 y_y_corr   1
-x2_xy_cov  0.04188382281779374
-x2_xy_corr 0.630174342037994
-x2_y2_cov  -0.00030953725962542085
-x2_y2_corr -0.0034249088761121966
+x2_xy_cov  0.041883822817793716
+x2_xy_corr 0.6301743420379936
+x2_y2_cov  -0.0003095372596253918
+x2_y2_corr -0.003424908876111875
 
@@ -3451,12 +3451,12 @@ x2_y2_corr -0.0034249088761121966
   data/medium
 
-a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
-eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
-wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
-zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
-hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
+a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
+eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
+wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
+zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
+hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
 
Here's an example simple line-fit. The `x` and `y` @@ -3542,11 +3542,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.03009744795354 -upsec_count_pca_b 979.9883413064914 +upsec_count_pca_m -39.030097447953594 +upsec_count_pca_b 979.9883413064917 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.10852919630297 +donesec 25.108529196302943 ## step @@ -3824,9 +3824,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 -var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 -skewness - - 0 -0.0006899591185521965 -0.017849760120133784 +stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 +var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 +skewness - - 0 -0.0006899591185517494 -0.01784976012013298 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index 146f3a81e..e475aebf3 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144456 -x_stddev 0.2902925151144007 +x_var 0.08426974433144457 +x_stddev 0.29029251511440074
diff --git a/man/manpage.txt b/man/manpage.txt
index d0ba0cbb9..1f8f35e95 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -3710,4 +3710,4 @@
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-                                  2024-08-16                         4mMILLER24m(1)
+                                  2024-08-25                         4mMILLER24m(1)
diff --git a/man/mlr.1 b/man/mlr.1
index ed06ca5af..50ccd2440 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2024-08-16
+.\"      Date: 2024-08-25
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2024-08-16" "\ \&" "\ \&"
+.TH "MILLER" "1" "2024-08-25" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/pkg/mlrval/mlrval_json.go b/pkg/mlrval/mlrval_json.go
index c657815ec..5dfd99624 100644
--- a/pkg/mlrval/mlrval_json.go
+++ b/pkg/mlrval/mlrval_json.go
@@ -352,9 +352,17 @@ func (mv *Mlrval) marshalJSONString(outputIsStdout bool) (string, error) {
 }
 
 // Wraps with double-quotes and escape-encoded JSON-special characters.
+//
+// Per https://www.json.org/json-en.html:
+//
+// * Escapes: \b \f \n \r \t \u
+// * Acceptable ranges: 0x20..0x10FFFF
+//
+// Since these are bytes here, we only need to check < 0x20, and special-case the five valid
+// escapes, and then \u the rest.
+
 func millerJSONEncodeString(input string) string {
 	var buffer bytes.Buffer
-
 	buffer.WriteByte('"')
 
 	for _, b := range []byte(input) {
@@ -362,15 +370,15 @@ func millerJSONEncodeString(input string) string {
 		case '\\':
 			buffer.WriteByte('\\')
 			buffer.WriteByte('\\')
-		case '\n':
-			buffer.WriteByte('\\')
-			buffer.WriteByte('n')
 		case '\b':
 			buffer.WriteByte('\\')
 			buffer.WriteByte('b')
 		case '\f':
 			buffer.WriteByte('\\')
 			buffer.WriteByte('f')
+		case '\n':
+			buffer.WriteByte('\\')
+			buffer.WriteByte('n')
 		case '\r':
 			buffer.WriteByte('\\')
 			buffer.WriteByte('r')
@@ -381,12 +389,16 @@ func millerJSONEncodeString(input string) string {
 			buffer.WriteByte('\\')
 			buffer.WriteByte('"')
 		default:
-			buffer.WriteByte(b)
+			if b < 0x20 {
+				s := fmt.Sprintf("\\u%04x", b)
+				buffer.WriteString(s)
+			} else {
+				buffer.WriteByte(b)
+			}
 		}
 	}
 
 	buffer.WriteByte('"')
-
 	return buffer.String()
 }
 
diff --git a/test/cases/io-json-io/0036/cmd b/test/cases/io-json-io/0036/cmd
new file mode 100644
index 000000000..a298f0f2e
--- /dev/null
+++ b/test/cases/io-json-io/0036/cmd
@@ -0,0 +1 @@
+mlr --ijson --opprint cat test/input/binary.json
diff --git a/test/cases/io-json-io/0036/experr b/test/cases/io-json-io/0036/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-json-io/0036/expout b/test/cases/io-json-io/0036/expout
new file mode 100644
index 000000000..dd883f4e5
--- /dev/null
+++ b/test/cases/io-json-io/0036/expout
@@ -0,0 +1,2 @@
+msg
+X๏ฟฝ๏ฟฝ๏ฟฝ๏ฟฝY
diff --git a/test/cases/io-json-io/0037/cmd b/test/cases/io-json-io/0037/cmd
new file mode 100644
index 000000000..abcffb242
--- /dev/null
+++ b/test/cases/io-json-io/0037/cmd
@@ -0,0 +1 @@
+mlr -j cat test/input/binary.json
diff --git a/test/cases/io-json-io/0037/experr b/test/cases/io-json-io/0037/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/io-json-io/0037/expout b/test/cases/io-json-io/0037/expout
new file mode 100644
index 000000000..9bf2f47be
--- /dev/null
+++ b/test/cases/io-json-io/0037/expout
@@ -0,0 +1,5 @@
+[
+{
+  "msg": "X\u0001\b๏ฟฝ๏ฟฝ๏ฟฝ๏ฟฝ\u0012Y"
+}
+]
diff --git a/test/input/binary.json b/test/input/binary.json
new file mode 100644
index 000000000..9bf2f47be
--- /dev/null
+++ b/test/input/binary.json
@@ -0,0 +1,5 @@
+[
+{
+  "msg": "X\u0001\b๏ฟฝ๏ฟฝ๏ฟฝ๏ฟฝ\u0012Y"
+}
+]

From 1015f18e7b2fb130398d1c8e49c4c8079900563f Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sun, 25 Aug 2024 17:40:07 -0400
Subject: [PATCH 211/456] Fix prepipe handling when filenames have whitespace
 (#1627)

* Fix prepipe handling when filenames have whitespace

* unit-test data

* Windows-only unit-test item

* Fix Windows fails; neaten
---
 pkg/lib/file_readers.go                               | 11 +++++++----
 test/cases/non-windows/io-compressed-input/0017/cmd   |  1 +
 .../cases/non-windows/io-compressed-input/0017/experr |  0
 .../cases/non-windows/io-compressed-input/0017/expout |  3 +++
 test/input/whitespace 1.csv                           |  2 ++
 test/input/whitespace 2.csv                           |  2 ++
 6 files changed, 15 insertions(+), 4 deletions(-)
 create mode 100644 test/cases/non-windows/io-compressed-input/0017/cmd
 create mode 100644 test/cases/non-windows/io-compressed-input/0017/experr
 create mode 100644 test/cases/non-windows/io-compressed-input/0017/expout
 create mode 100644 test/input/whitespace 1.csv
 create mode 100644 test/input/whitespace 2.csv

diff --git a/pkg/lib/file_readers.go b/pkg/lib/file_readers.go
index a348ff900..d4d3f60be 100644
--- a/pkg/lib/file_readers.go
+++ b/pkg/lib/file_readers.go
@@ -128,21 +128,24 @@ func openPrepipedHandleForRead(
 // Avoids shell-injection cases by replacing single-quote with backslash
 // single-quote and double-quote with backslack double-quote, then wrapping the
 // entire result in initial and final single-quote.
-//
-// TODO: test on Windows. Maybe needs move to pkg/platform.
+// Also wraps in single quotes in case the filename has whitespace in it
 func escapeFileNameForPopen(filename string) string {
 	var buffer bytes.Buffer
-	foundQuote := false
+	foundQuoteOrSpace := false
 	for _, c := range filename {
 		if c == '\'' || c == '"' {
 			buffer.WriteRune('\'')
 			buffer.WriteRune(c)
 			buffer.WriteRune('\'')
+			foundQuoteOrSpace = true
+		} else if c == ' ' {
+			buffer.WriteRune(c)
+			foundQuoteOrSpace = true
 		} else {
 			buffer.WriteRune(c)
 		}
 	}
-	if foundQuote {
+	if foundQuoteOrSpace {
 		return "'" + buffer.String() + "'"
 	} else {
 		return buffer.String()
diff --git a/test/cases/non-windows/io-compressed-input/0017/cmd b/test/cases/non-windows/io-compressed-input/0017/cmd
new file mode 100644
index 000000000..71f61cc5e
--- /dev/null
+++ b/test/cases/non-windows/io-compressed-input/0017/cmd
@@ -0,0 +1 @@
+mlr --csv cat test/input/whitespace*.csv
diff --git a/test/cases/non-windows/io-compressed-input/0017/experr b/test/cases/non-windows/io-compressed-input/0017/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/non-windows/io-compressed-input/0017/expout b/test/cases/non-windows/io-compressed-input/0017/expout
new file mode 100644
index 000000000..88700c714
--- /dev/null
+++ b/test/cases/non-windows/io-compressed-input/0017/expout
@@ -0,0 +1,3 @@
+a,b,c
+1,2,3
+4,5,6
diff --git a/test/input/whitespace 1.csv b/test/input/whitespace 1.csv
new file mode 100644
index 000000000..bfde6bfa0
--- /dev/null
+++ b/test/input/whitespace 1.csv	
@@ -0,0 +1,2 @@
+a,b,c
+1,2,3
diff --git a/test/input/whitespace 2.csv b/test/input/whitespace 2.csv
new file mode 100644
index 000000000..a9411aa9d
--- /dev/null
+++ b/test/input/whitespace 2.csv	
@@ -0,0 +1,2 @@
+a,b,c
+4,5,6

From 73e2117b43be3627754e9a6a5a630afb770dd3de Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sun, 25 Aug 2024 17:40:57 -0400
Subject: [PATCH 212/456] Misc. codespell findings (#1628)

---
 docs/src/index.md                           | 2 +-
 docs/src/index.md.in                        | 2 +-
 docs/src/manpage.md                         | 4 ++--
 docs/src/manpage.txt                        | 4 ++--
 docs/src/reference-verbs.md                 | 4 ++--
 man/manpage.txt                             | 4 ++--
 man/mlr.1                                   | 4 ++--
 pkg/cli/flatten_unflatten.go                | 2 +-
 pkg/dsl/cst/for.go                          | 6 +++---
 pkg/dsl/cst/types.go                        | 2 +-
 pkg/input/record_reader_csv.go              | 2 +-
 pkg/transformers/put_or_filter.go           | 4 ++--
 test/cases/cli-help/0001/expout             | 4 ++--
 test/cases/dsl-for-variants/0006/experr     | 2 +-
 test/cases/dsl-for-variants/0008/experr     | 2 +-
 test/cases/dsl-for-variants/0009/experr     | 2 +-
 test/cases/dsl-for-variants/0010/experr     | 2 +-
 test/cases/dsl-triple-for-loops/0015/experr | 2 +-
 test/cases/dsl-triple-for-loops/0016/experr | 2 +-
 19 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/docs/src/index.md b/docs/src/index.md
index 799123fca..fd39051a6 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -45,7 +45,7 @@ The `sort`, `head`, etc are called *verbs*. They're analogs of familiar command-
 mlr --csv put '$rate = $units / $seconds' input.csv
 
-which allow you to succintly express your own logic. +which allow you to succinctly express your own logic. **Multiple domains:** People use Miller for data analysis, data science, software engineering, devops/system-administration, journalism, scientific research, and more. diff --git a/docs/src/index.md.in b/docs/src/index.md.in index 6d16f6d19..3722d45e6 100644 --- a/docs/src/index.md.in +++ b/docs/src/index.md.in @@ -29,7 +29,7 @@ GENMD-SHOW-COMMAND mlr --csv put '$rate = $units / $seconds' input.csv GENMD-EOF -which allow you to succintly express your own logic. +which allow you to succinctly express your own logic. **Multiple domains:** People use Miller for data analysis, data science, software engineering, devops/system-administration, journalism, scientific research, and more. diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 04e0ce349..0783a57fa 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -1077,7 +1077,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p 1mfilter0m Usage: mlr filter [options] {DSL expression} - Lets you use a domain-specific language to progamatically filter which + Lets you use a domain-specific language to programatically filter which stream records will be output. See also: https://miller.readthedocs.io/en/latest/reference-verbs @@ -1535,7 +1535,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p 1mput0m Usage: mlr put [options] {DSL expression} - Lets you use a domain-specific language to progamatically alter stream records. + Lets you use a domain-specific language to programatically alter stream records. See also: https://miller.readthedocs.io/en/latest/reference-verbs Options: diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 1f8f35e95..521ebb98e 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1056,7 +1056,7 @@ 1mfilter0m Usage: mlr filter [options] {DSL expression} - Lets you use a domain-specific language to progamatically filter which + Lets you use a domain-specific language to programatically filter which stream records will be output. See also: https://miller.readthedocs.io/en/latest/reference-verbs @@ -1514,7 +1514,7 @@ 1mput0m Usage: mlr put [options] {DSL expression} - Lets you use a domain-specific language to progamatically alter stream records. + Lets you use a domain-specific language to programatically alter stream records. See also: https://miller.readthedocs.io/en/latest/reference-verbs Options: diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 8105820c5..f11994d1a 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -970,7 +970,7 @@ a,b,c
 Usage: mlr filter [options] {DSL expression}
-Lets you use a domain-specific language to progamatically filter which
+Lets you use a domain-specific language to programatically filter which
 stream records will be output.
 See also: https://miller.readthedocs.io/en/latest/reference-verbs
 
@@ -2306,7 +2306,7 @@ Options:
 
 Usage: mlr put [options] {DSL expression}
-Lets you use a domain-specific language to progamatically alter stream records.
+Lets you use a domain-specific language to programatically alter stream records.
 See also: https://miller.readthedocs.io/en/latest/reference-verbs
 
 Options:
diff --git a/man/manpage.txt b/man/manpage.txt
index 1f8f35e95..521ebb98e 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -1056,7 +1056,7 @@
 
    1mfilter0m
        Usage: mlr filter [options] {DSL expression}
-       Lets you use a domain-specific language to progamatically filter which
+       Lets you use a domain-specific language to programatically filter which
        stream records will be output.
        See also: https://miller.readthedocs.io/en/latest/reference-verbs
 
@@ -1514,7 +1514,7 @@
 
    1mput0m
        Usage: mlr put [options] {DSL expression}
-       Lets you use a domain-specific language to progamatically alter stream records.
+       Lets you use a domain-specific language to programatically alter stream records.
        See also: https://miller.readthedocs.io/en/latest/reference-verbs
 
        Options:
diff --git a/man/mlr.1 b/man/mlr.1
index 50ccd2440..2ed7c3d2d 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -1319,7 +1319,7 @@ Options:
 .\}
 .nf
 Usage: mlr filter [options] {DSL expression}
-Lets you use a domain-specific language to progamatically filter which
+Lets you use a domain-specific language to programatically filter which
 stream records will be output.
 See also: https://miller.readthedocs.io/en/latest/reference-verbs
 
@@ -1909,7 +1909,7 @@ Options:
 .\}
 .nf
 Usage: mlr put [options] {DSL expression}
-Lets you use a domain-specific language to progamatically alter stream records.
+Lets you use a domain-specific language to programatically alter stream records.
 See also: https://miller.readthedocs.io/en/latest/reference-verbs
 
 Options:
diff --git a/pkg/cli/flatten_unflatten.go b/pkg/cli/flatten_unflatten.go
index d91602b43..006b65f53 100644
--- a/pkg/cli/flatten_unflatten.go
+++ b/pkg/cli/flatten_unflatten.go
@@ -53,7 +53,7 @@ package cli
 //   o Default is to auto-unflatten at output.
 //   o There is a --no-auto-unflatten for those who want it.
 //
-// * Overrriding these: if the last verb the user has explicitly provided is
+// * Overriding these: if the last verb the user has explicitly provided is
 //   flatten, don't undo that by putting an unflatten right after.
 //
 // ================================================================
diff --git a/pkg/dsl/cst/for.go b/pkg/dsl/cst/for.go
index 34d6b808d..22c490c47 100644
--- a/pkg/dsl/cst/for.go
+++ b/pkg/dsl/cst/for.go
@@ -805,7 +805,7 @@ func (root *RootNode) BuildTripleForLoopNode(astNode *dsl.ASTNode) (*TripleForLo
 			for i := 0; i < n-1; i++ {
 				if continuationExpressionASTNode.Children[i].Type != dsl.NodeTypeAssignment {
 					return nil, fmt.Errorf(
-						"mlr: the non-final triple-for continutation statements must be assignments.",
+						"mlr: the non-final triple-for continuation statements must be assignments.",
 					)
 				}
 				precontinuationAssignment, err := root.BuildAssignmentNode(
@@ -822,11 +822,11 @@ func (root *RootNode) BuildTripleForLoopNode(astNode *dsl.ASTNode) (*TripleForLo
 		if bareBooleanASTNode.Type != dsl.NodeTypeBareBoolean {
 			if n == 1 {
 				return nil, fmt.Errorf(
-					"mlr: the triple-for continutation statement must be a bare boolean.",
+					"mlr: the triple-for continuation statement must be a bare boolean.",
 				)
 			} else {
 				return nil, fmt.Errorf(
-					"mlr: the final triple-for continutation statement must be a bare boolean.",
+					"mlr: the final triple-for continuation statement must be a bare boolean.",
 				)
 			}
 		}
diff --git a/pkg/dsl/cst/types.go b/pkg/dsl/cst/types.go
index 4fa935edf..f490ca02b 100644
--- a/pkg/dsl/cst/types.go
+++ b/pkg/dsl/cst/types.go
@@ -58,7 +58,7 @@ type RootNode struct {
 type NodeBuilder func(astNode *dsl.ASTNode) (IEvaluable, error)
 
 // ----------------------------------------------------------------
-// This is for all statements and statemnt blocks within the CST.
+// This is for all statements and statement blocks within the CST.
 type IExecutable interface {
 	Execute(state *runtime.State) (*BlockExitPayload, error)
 }
diff --git a/pkg/input/record_reader_csv.go b/pkg/input/record_reader_csv.go
index 505020cee..68949b25a 100644
--- a/pkg/input/record_reader_csv.go
+++ b/pkg/input/record_reader_csv.go
@@ -358,7 +358,7 @@ func (wb WorkaroundBuffer) String() string {
 // ----------------------------------------------------------------
 // BOM-stripping
 //
-// Some CSVs start with a "byte-order mark" which is the 3-byte sequene
+// Some CSVs start with a "byte-order mark" which is the 3-byte sequence
 // \xef\xbb\xbf".  Any file with such contents trips up csv.Reader:
 //
 // * If a header line is not double-quoted then we can simply look at the first
diff --git a/pkg/transformers/put_or_filter.go b/pkg/transformers/put_or_filter.go
index 82f57e940..36fed42e5 100644
--- a/pkg/transformers/put_or_filter.go
+++ b/pkg/transformers/put_or_filter.go
@@ -53,9 +53,9 @@ func transformerPutOrFilterUsage(
 ) {
 	fmt.Fprintf(o, "Usage: %s %s [options] {DSL expression}\n", "mlr", verb)
 	if verb == "put" {
-		fmt.Fprintf(o, "Lets you use a domain-specific language to progamatically alter stream records.\n")
+		fmt.Fprintf(o, "Lets you use a domain-specific language to programatically alter stream records.\n")
 	} else if verb == "filter" {
-		fmt.Fprintf(o, "Lets you use a domain-specific language to progamatically filter which\n")
+		fmt.Fprintf(o, "Lets you use a domain-specific language to programatically filter which\n")
 		fmt.Fprintf(o, "stream records will be output.\n")
 	}
 	fmt.Fprintf(o, "See also: https://miller.readthedocs.io/en/latest/reference-verbs\n")
diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout
index e6ed5ea33..41079ee75 100644
--- a/test/cases/cli-help/0001/expout
+++ b/test/cases/cli-help/0001/expout
@@ -186,7 +186,7 @@ Options:
 ================================================================
 filter
 Usage: mlr filter [options] {DSL expression}
-Lets you use a domain-specific language to progamatically filter which
+Lets you use a domain-specific language to programatically filter which
 stream records will be output.
 See also: https://miller.readthedocs.io/en/latest/reference-verbs
 
@@ -666,7 +666,7 @@ Options:
 ================================================================
 put
 Usage: mlr put [options] {DSL expression}
-Lets you use a domain-specific language to progamatically alter stream records.
+Lets you use a domain-specific language to programatically alter stream records.
 See also: https://miller.readthedocs.io/en/latest/reference-verbs
 
 Options:
diff --git a/test/cases/dsl-for-variants/0006/experr b/test/cases/dsl-for-variants/0006/experr
index 148fb6f19..3a9ad9c69 100644
--- a/test/cases/dsl-for-variants/0006/experr
+++ b/test/cases/dsl-for-variants/0006/experr
@@ -1 +1 @@
-mlr: the triple-for continutation statement must be a bare boolean.
+mlr: the triple-for continuation statement must be a bare boolean.
diff --git a/test/cases/dsl-for-variants/0008/experr b/test/cases/dsl-for-variants/0008/experr
index ad7b7a3ce..8c456cbf6 100644
--- a/test/cases/dsl-for-variants/0008/experr
+++ b/test/cases/dsl-for-variants/0008/experr
@@ -1 +1 @@
-mlr: the final triple-for continutation statement must be a bare boolean.
+mlr: the final triple-for continuation statement must be a bare boolean.
diff --git a/test/cases/dsl-for-variants/0009/experr b/test/cases/dsl-for-variants/0009/experr
index 48da09e23..aeff07c4a 100644
--- a/test/cases/dsl-for-variants/0009/experr
+++ b/test/cases/dsl-for-variants/0009/experr
@@ -1 +1 @@
-mlr: the non-final triple-for continutation statements must be assignments.
+mlr: the non-final triple-for continuation statements must be assignments.
diff --git a/test/cases/dsl-for-variants/0010/experr b/test/cases/dsl-for-variants/0010/experr
index 48da09e23..aeff07c4a 100644
--- a/test/cases/dsl-for-variants/0010/experr
+++ b/test/cases/dsl-for-variants/0010/experr
@@ -1 +1 @@
-mlr: the non-final triple-for continutation statements must be assignments.
+mlr: the non-final triple-for continuation statements must be assignments.
diff --git a/test/cases/dsl-triple-for-loops/0015/experr b/test/cases/dsl-triple-for-loops/0015/experr
index 148fb6f19..3a9ad9c69 100644
--- a/test/cases/dsl-triple-for-loops/0015/experr
+++ b/test/cases/dsl-triple-for-loops/0015/experr
@@ -1 +1 @@
-mlr: the triple-for continutation statement must be a bare boolean.
+mlr: the triple-for continuation statement must be a bare boolean.
diff --git a/test/cases/dsl-triple-for-loops/0016/experr b/test/cases/dsl-triple-for-loops/0016/experr
index 48da09e23..aeff07c4a 100644
--- a/test/cases/dsl-triple-for-loops/0016/experr
+++ b/test/cases/dsl-triple-for-loops/0016/experr
@@ -1 +1 @@
-mlr: the non-final triple-for continutation statements must be assignments.
+mlr: the non-final triple-for continuation statements must be assignments.

From f33c0b2cd66e7e5a29ec0b366d0c0594084acc18 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sun, 25 Aug 2024 19:00:24 -0400
Subject: [PATCH 213/456] Error in `splita`/`splitax` when field contains a
 single non-string value (#1629)

---
 pkg/bifs/collections.go               | 15 ++++++++-------
 test/cases/dsl-split-join/0021/expout |  4 ----
 test/cases/dsl-split-join/0021/mlr    |  3 ++-
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/pkg/bifs/collections.go b/pkg/bifs/collections.go
index a734ee451..247622740 100644
--- a/pkg/bifs/collections.go
+++ b/pkg/bifs/collections.go
@@ -568,15 +568,16 @@ func BIF_splitnvx(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
 // ----------------------------------------------------------------
 // splita("3,4,5", ",") -> [3,4,5]
 func BIF_splita(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
-	if !input1.IsStringOrVoid() {
-		return mlrval.FromNotStringError("splita", input1)
+	if !input1.IsLegit() {
+		return input1
 	}
+	input1String := input1.String()
 	if !input2.IsString() {
 		return mlrval.FromNotStringError("splita", input2)
 	}
 	fieldSeparator := input2.AcquireStringValue()
 
-	fields := lib.SplitString(input1.AcquireStringValue(), fieldSeparator)
+	fields := lib.SplitString(input1String, fieldSeparator)
 
 	arrayval := make([]*mlrval.Mlrval, len(fields))
 
@@ -592,16 +593,16 @@ func BIF_splita(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
 // BIF_splitax splits a string to an array, without type-inference:
 // e.g. splitax("3,4,5", ",") -> ["3","4","5"]
 func BIF_splitax(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
-	if !input1.IsStringOrVoid() {
-		return mlrval.FromNotStringError("splitax", input1)
+	if !input1.IsLegit() {
+		return input1
 	}
+	input1String := input1.String()
 	if !input2.IsString() {
 		return mlrval.FromNotStringError("splitax", input2)
 	}
-	input := input1.AcquireStringValue()
 	fieldSeparator := input2.AcquireStringValue()
 
-	return bif_splitax_helper(input, fieldSeparator)
+	return bif_splitax_helper(input1String, fieldSeparator)
 }
 
 // bif_splitax_helper is split out for the benefit of BIF_splitax and
diff --git a/test/cases/dsl-split-join/0021/expout b/test/cases/dsl-split-join/0021/expout
index a49c0a717..e69de29bb 100644
--- a/test/cases/dsl-split-join/0021/expout
+++ b/test/cases/dsl-split-join/0021/expout
@@ -1,4 +0,0 @@
-[3, 4, 5]
-[3, 4]
-[3]
-[]
diff --git a/test/cases/dsl-split-join/0021/mlr b/test/cases/dsl-split-join/0021/mlr
index 86e3dd532..32232c023 100644
--- a/test/cases/dsl-split-join/0021/mlr
+++ b/test/cases/dsl-split-join/0021/mlr
@@ -1,6 +1,7 @@
-end {
+test/cases/dsl-split-join/0021/mlrend {
   print splita("3,4,5", ",");
   print splita("3,4", ",");
   print splita("3", ",");
+  print splita(3, ",");
   print splita("", ",");
 }

From ffa062adae3576376e36e654328974997849dfab Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 26 Aug 2024 08:00:44 -0400
Subject: [PATCH 214/456] Bump github/codeql-action from 3.26.4 to 3.26.5
 (#1630)

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.4 to 3.26.5.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/f0f3afee809481da311ca3a6ff1ff51d81dbeb24...2c779ab0d087cd7fe7b826087247c2c81f27bfa6)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 731b7e129..ddfd13b7a 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -40,7 +40,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@f0f3afee809481da311ca3a6ff1ff51d81dbeb24
+      uses: github/codeql-action/init@2c779ab0d087cd7fe7b826087247c2c81f27bfa6
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -51,7 +51,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@f0f3afee809481da311ca3a6ff1ff51d81dbeb24
+      uses: github/codeql-action/autobuild@2c779ab0d087cd7fe7b826087247c2c81f27bfa6
 
     # โ„น๏ธ Command-line programs to run using the OS shell.
     # ๐Ÿ“š https://git.io/JvXDl
@@ -65,4 +65,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@f0f3afee809481da311ca3a6ff1ff51d81dbeb24
+      uses: github/codeql-action/analyze@2c779ab0d087cd7fe7b826087247c2c81f27bfa6

From 24e3c7728027c00466bb9208e1a648db72c889a6 Mon Sep 17 00:00:00 2001
From: Andrea Borruso 
Date: Mon, 26 Aug 2024 15:14:53 +0200
Subject: [PATCH 215/456] To realize which chapter and section are active
 (#1631)

---
 docs/src/extra.css | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/src/extra.css b/docs/src/extra.css
index 2f25087f2..0e75f2af7 100644
--- a/docs/src/extra.css
+++ b/docs/src/extra.css
@@ -236,3 +236,11 @@ img {
   --md-footer-fg-color: #800000;
   --md-footer-fg-color: #eae2cb;
 }
+
+md-nav__link--active {
+    text-decoration: underline;
+}
+
+md-nav__link--active {
+    text-decoration: underline;
+}

From d247fab73d5ccdebe6f04cde7eba2a2339b8dc40 Mon Sep 17 00:00:00 2001
From: Andrea Borruso 
Date: Mon, 26 Aug 2024 15:20:08 +0200
Subject: [PATCH 216/456] To have edit and copy code in each page (#1632)

---
 docs/mkdocs.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
index 287d929c7..a12fb64e8 100644
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -8,6 +8,8 @@ theme:
     code: Lato Mono
   features:
     - navigation.top
+    - content.action.edit
+    - content.action.view
   custom_dir: overrides
 repo_url: https://github.com/johnkerl/miller
 repo_name: miller

From 807775c5194bdd2b24d90cd3a6f1958a293659c2 Mon Sep 17 00:00:00 2001
From: Andrea Borruso 
Date: Tue, 27 Aug 2024 14:44:20 +0200
Subject: [PATCH 217/456] Update extra.css (#1633)

removed a duplicate and corrected a typo
---
 docs/src/extra.css | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/docs/src/extra.css b/docs/src/extra.css
index 0e75f2af7..e1395aff3 100644
--- a/docs/src/extra.css
+++ b/docs/src/extra.css
@@ -237,10 +237,7 @@ img {
   --md-footer-fg-color: #eae2cb;
 }
 
-md-nav__link--active {
+.md-nav__link--active {
     text-decoration: underline;
 }
 
-md-nav__link--active {
-    text-decoration: underline;
-}

From b63f66ff8c4755e3f06ba817ad7d5c4804deb6d1 Mon Sep 17 00:00:00 2001
From: Andrea Borruso 
Date: Tue, 27 Aug 2024 17:26:17 +0200
Subject: [PATCH 218/456] A note about positional field names (#1634)

The inspiration comes from this question
https://stackoverflow.com/q/78908146/757714
---
 docs/src/reference-dsl-variables.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/src/reference-dsl-variables.md b/docs/src/reference-dsl-variables.md
index 85ad66051..57f185be0 100644
--- a/docs/src/reference-dsl-variables.md
+++ b/docs/src/reference-dsl-variables.md
@@ -155,6 +155,10 @@ a=eks,b=wye,i=4,x=0.381399,y=0.134188
 a=wye,b=pan,i=5,x=0.573288,y=0.863624
 
+!!! note + + You can use positional field names only in `DSL` syntax, so only with the verbs `put` and `filter`. + ## Out-of-stream variables These are prefixed with an at-sign, e.g. `@sum`. Furthermore, unlike built-in variables and stream-record fields, they are maintained in an arbitrarily nested map: you can do `@sum += $quantity`, or `@sum[$color] += $quantity`, or `@sum[$color][$shape] += $quantity`. The keys for the multi-level map can be any expression which evaluates to string or integer: e.g. `@sum[NR] = $a + $b`, `@sum[$a."-".$b] = $x`, etc. From ab637328cd3ddd9389d376446464429bc083f13b Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 27 Aug 2024 11:42:24 -0400 Subject: [PATCH 219/456] Source-file update for PR 1634 (#1635) --- docs/src/reference-dsl-variables.md | 2 +- docs/src/reference-dsl-variables.md.in | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/src/reference-dsl-variables.md b/docs/src/reference-dsl-variables.md index 57f185be0..37590a365 100644 --- a/docs/src/reference-dsl-variables.md +++ b/docs/src/reference-dsl-variables.md @@ -157,7 +157,7 @@ a=wye,b=pan,i=5,x=0.573288,y=0.863624 !!! note - You can use positional field names only in `DSL` syntax, so only with the verbs `put` and `filter`. + You can use positional field names only in the [Miller DSL](reference-dsl.md), i.e. only with the verbs `put` and `filter`. ## Out-of-stream variables diff --git a/docs/src/reference-dsl-variables.md.in b/docs/src/reference-dsl-variables.md.in index 68fca60e4..7871b148e 100644 --- a/docs/src/reference-dsl-variables.md.in +++ b/docs/src/reference-dsl-variables.md.in @@ -80,6 +80,10 @@ GENMD-RUN-COMMAND mlr put '$[[[6]]] = "NEW"' data/small GENMD-EOF +!!! note + + You can use positional field names only in the [Miller DSL](reference-dsl.md), i.e. only with the verbs `put` and `filter`. + ## Out-of-stream variables These are prefixed with an at-sign, e.g. `@sum`. Furthermore, unlike built-in variables and stream-record fields, they are maintained in an arbitrarily nested map: you can do `@sum += $quantity`, or `@sum[$color] += $quantity`, or `@sum[$color][$shape] += $quantity`. The keys for the multi-level map can be any expression which evaluates to string or integer: e.g. `@sum[NR] = $a + $b`, `@sum[$a."-".$b] = $x`, etc. From 1fe2645989d053c5da77048d9f3f4d0e621d0033 Mon Sep 17 00:00:00 2001 From: Andrea Borruso Date: Tue, 27 Aug 2024 18:02:27 +0200 Subject: [PATCH 220/456] Enable admonition extension (#1636) In PR #1634 I have added an admonition note. I assumed that the admonition extension was enabled, but it was not. I apologize John. I have now enabled it as per the documentation: https://squidfunk.github.io/mkdocs-material/reference/admonitions/?h=ad#admonitions --- docs/mkdocs.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index a12fb64e8..6b36e5a94 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -118,5 +118,9 @@ nav: - "What's new in Miller 6": "new-in-miller-6.md" markdown_extensions: -- toc: + - toc: permalink: true + - admonition + - pymdownx.details + - pymdownx.superfences + From 52f28538f47b76272531469213873b36fd310cea Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 28 Aug 2024 07:31:12 -0400 Subject: [PATCH 221/456] Bump github.com/lestrrat-go/strftime from 1.0.6 to 1.1.0 (#1637) Bumps [github.com/lestrrat-go/strftime](https://github.com/lestrrat-go/strftime) from 1.0.6 to 1.1.0. - [Release notes](https://github.com/lestrrat-go/strftime/releases) - [Changelog](https://github.com/lestrrat-go/strftime/blob/master/Changes) - [Commits](https://github.com/lestrrat-go/strftime/compare/v1.0.6...v1.1.0) --- updated-dependencies: - dependency-name: github.com/lestrrat-go/strftime dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 3 +-- go.sum | 8 ++------ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/go.mod b/go.mod index bd23cceca..0bf4443fa 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( github.com/johnkerl/lumin v1.0.0 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 github.com/klauspost/compress v1.17.9 - github.com/lestrrat-go/strftime v1.0.6 + github.com/lestrrat-go/strftime v1.1.0 github.com/mattn/go-isatty v0.0.20 github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 @@ -35,7 +35,6 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/felixge/fgprof v0.9.3 // indirect github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect - github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 511e6a039..bb5dd1884 100644 --- a/go.sum +++ b/go.sum @@ -18,22 +18,18 @@ github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:C github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8= -github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is= -github.com/lestrrat-go/strftime v1.0.6 h1:CFGsDEt1pOpFNU+TJB0nhz9jl+K0hZSLE205AhTIGQQ= -github.com/lestrrat-go/strftime v1.0.6/go.mod h1:f7jQKgV5nnJpYgdEasS+/y7EsTb8ykN2z68n3TtcTaw= +github.com/lestrrat-go/strftime v1.1.0 h1:gMESpZy44/4pXLO/m+sL0yBd1W6LjgjrrD4a68Gapyg= +github.com/lestrrat-go/strftime v1.1.0/go.mod h1:uzeIB52CeUJenCo1syghlugshMysrqUT51HlxphXVeI= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 h1:NC4H8hewgaktBqMI5yzy6L/Vln5/H7BEziyxaE2fX3Y= github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4/go.mod h1:eUQxpEiJy001RoaLXrNa5+QQLYiEgmEafwWuA3ppJSo= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/profile v1.7.0 h1:hnbDkaNWPCLMO9wGLdBFTIZvzDrDfBM2072E1S9gJkA= github.com/pkg/profile v1.7.0/go.mod h1:8Uer0jas47ZQMJ7VD+OHknK4YDY07LPUC6dEvqDjvNo= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= From bea792b1361d4b7374c149115d7101f8dd739843 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 29 Aug 2024 09:39:58 -0400 Subject: [PATCH 222/456] Bump github/codeql-action from 3.26.5 to 3.26.6 (#1638) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.5 to 3.26.6. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/2c779ab0d087cd7fe7b826087247c2c81f27bfa6...4dd16135b69a43b6c8efb853346f8437d92d3c93) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index ddfd13b7a..f998331bb 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@2c779ab0d087cd7fe7b826087247c2c81f27bfa6 + uses: github/codeql-action/init@4dd16135b69a43b6c8efb853346f8437d92d3c93 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@2c779ab0d087cd7fe7b826087247c2c81f27bfa6 + uses: github/codeql-action/autobuild@4dd16135b69a43b6c8efb853346f8437d92d3c93 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@2c779ab0d087cd7fe7b826087247c2c81f27bfa6 + uses: github/codeql-action/analyze@4dd16135b69a43b6c8efb853346f8437d92d3c93 From d739298160c18f94aca89a338479d275a7b5ce22 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 10:10:52 -0400 Subject: [PATCH 223/456] Bump actions/upload-artifact from 4.3.6 to 4.4.0 (#1640) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.6 to 4.4.0. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/834a144ee995460fba8ed112a2fc961b36a5ec5a...50769540e7f4bd5e21e526ee35c689e35e0d6874) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 22ead6832..56bfde270 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -41,7 +41,7 @@ jobs: if: matrix.os == 'windows-latest' run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}} - - uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a + - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 with: name: mlr-${{matrix.os}} path: bin/${{matrix.os}}/* From e1ac188f49c46269f1950745975a319b35745a64 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 Sep 2024 08:03:18 -0400 Subject: [PATCH 224/456] Bump golang.org/x/text from 0.17.0 to 0.18.0 (#1641) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.17.0 to 0.18.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.17.0...v0.18.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 0bf4443fa..e14bcd31d 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/stretchr/testify v1.9.0 golang.org/x/sys v0.24.0 golang.org/x/term v0.23.0 - golang.org/x/text v0.17.0 + golang.org/x/text v0.18.0 ) require ( diff --git a/go.sum b/go.sum index bb5dd1884..ed1073ef9 100644 --- a/go.sum +++ b/go.sum @@ -40,8 +40,8 @@ golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= -golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= -golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= +golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From 73b1a4b40e006d15c1e11acb5b7e9c6904857a5c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 Sep 2024 08:33:09 -0400 Subject: [PATCH 225/456] Bump golang.org/x/term from 0.23.0 to 0.24.0 (#1642) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.23.0 to 0.24.0. - [Commits](https://github.com/golang/term/compare/v0.23.0...v0.24.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index e14bcd31d..f9f4db916 100644 --- a/go.mod +++ b/go.mod @@ -26,8 +26,8 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.9.0 - golang.org/x/sys v0.24.0 - golang.org/x/term v0.23.0 + golang.org/x/sys v0.25.0 + golang.org/x/term v0.24.0 golang.org/x/text v0.18.0 ) diff --git a/go.sum b/go.sum index ed1073ef9..da4f1028a 100644 --- a/go.sum +++ b/go.sum @@ -36,10 +36,10 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= -golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= -golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= +golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= +golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= +golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From 5ef01ca3567cf012d1f7565d8814b4a36fa440ec Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Sep 2024 08:13:24 -0400 Subject: [PATCH 226/456] Bump github/codeql-action from 3.26.6 to 3.26.7 (#1648) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.6 to 3.26.7. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/4dd16135b69a43b6c8efb853346f8437d92d3c93...8214744c546c1e5c8f03dde8fab3a7353211988d) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index f998331bb..f26e3c3f0 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@4dd16135b69a43b6c8efb853346f8437d92d3c93 + uses: github/codeql-action/init@8214744c546c1e5c8f03dde8fab3a7353211988d with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@4dd16135b69a43b6c8efb853346f8437d92d3c93 + uses: github/codeql-action/autobuild@8214744c546c1e5c8f03dde8fab3a7353211988d # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@4dd16135b69a43b6c8efb853346f8437d92d3c93 + uses: github/codeql-action/analyze@8214744c546c1e5c8f03dde8fab3a7353211988d From d1767e7c180f4626473e8a22a9779f16071c431f Mon Sep 17 00:00:00 2001 From: Balki Date: Tue, 17 Sep 2024 14:58:09 +0000 Subject: [PATCH 227/456] Fix local time when TZ is not set (#1649) Do not override time.Local when TZ is empty or unset. It is already set correctly by go standard library. --- pkg/lib/time.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/lib/time.go b/pkg/lib/time.go index 4fa6818c5..8ceae8760 100644 --- a/pkg/lib/time.go +++ b/pkg/lib/time.go @@ -16,6 +16,9 @@ import ( // statement does 'ENV["TZ"] = Asia/Istanbul'. func SetTZFromEnv() error { tzenv := os.Getenv("TZ") + if tzenv == "" { + return nil + } location, err := time.LoadLocation(tzenv) if err != nil { return fmt.Errorf("TZ environment variable appears malformed: \"%s\"", tzenv) From a91abf5d5cafb883c82f398b73973a94a12ebacb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 19 Sep 2024 08:41:17 -0400 Subject: [PATCH 228/456] Bump github/codeql-action from 3.26.7 to 3.26.8 (#1652) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.7 to 3.26.8. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/8214744c546c1e5c8f03dde8fab3a7353211988d...294a9d92911152fe08befb9ec03e240add280cb3) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index f26e3c3f0..5c13fbb23 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@8214744c546c1e5c8f03dde8fab3a7353211988d + uses: github/codeql-action/init@294a9d92911152fe08befb9ec03e240add280cb3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@8214744c546c1e5c8f03dde8fab3a7353211988d + uses: github/codeql-action/autobuild@294a9d92911152fe08befb9ec03e240add280cb3 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@8214744c546c1e5c8f03dde8fab3a7353211988d + uses: github/codeql-action/analyze@294a9d92911152fe08befb9ec03e240add280cb3 From 085e8316685f89067906bfbb775f7cee8d8b176f Mon Sep 17 00:00:00 2001 From: Adam Lesperance Date: Fri, 20 Sep 2024 11:10:11 -0500 Subject: [PATCH 229/456] The package version must match the major tag version (#1654) * Update package version * Update makefile targets * Update readme packages * Remaining old packages via rg/sd --- Makefile | 24 +++++++++---------- README.md | 6 ++--- cmd/experiments/colors/main.go | 2 +- cmd/mlr/main.go | 2 +- cmd/scan/main.go | 2 +- cmd/sizes/main.go | 4 ++-- docs/src/build.md | 8 +++---- docs/src/build.md.in | 8 +++---- docs/src/miller-as-library.md | 14 +++++------ docs/src/miller-as-library/main1.go | 4 ++-- docs/src/miller-as-library/main2.go | 12 +++++----- docs/src/miller-as-library/main3.go | 10 ++++---- go.mod | 6 ++--- pkg/bifs/arithmetic.go | 4 ++-- pkg/bifs/arithmetic_test.go | 2 +- pkg/bifs/base.go | 6 ++--- pkg/bifs/bits.go | 2 +- pkg/bifs/bits_test.go | 2 +- pkg/bifs/booleans.go | 2 +- pkg/bifs/cmp.go | 4 ++-- pkg/bifs/collections.go | 4 ++-- pkg/bifs/collections_test.go | 2 +- pkg/bifs/datetime.go | 6 ++--- pkg/bifs/hashing.go | 2 +- pkg/bifs/hashing_test.go | 2 +- pkg/bifs/mathlib.go | 4 ++-- pkg/bifs/percentiles.go | 2 +- pkg/bifs/random.go | 4 ++-- pkg/bifs/regex.go | 4 ++-- pkg/bifs/relative_time.go | 2 +- pkg/bifs/stats.go | 4 ++-- pkg/bifs/stats_test.go | 2 +- pkg/bifs/strings.go | 4 ++-- pkg/bifs/system.go | 6 ++--- pkg/bifs/types.go | 6 ++--- pkg/cli/flag_types.go | 4 ++-- pkg/cli/option_parse.go | 6 ++--- pkg/cli/option_types.go | 2 +- pkg/cli/verb_utils.go | 2 +- pkg/climain/mlrcli_mlrrc.go | 2 +- pkg/climain/mlrcli_parse.go | 14 +++++------ pkg/climain/mlrcli_shebang.go | 2 +- pkg/dsl/ast_build.go | 4 ++-- pkg/dsl/ast_types.go | 2 +- pkg/dsl/cst/assignments.go | 6 ++--- pkg/dsl/cst/block_exit.go | 6 ++--- pkg/dsl/cst/blocks.go | 6 ++--- pkg/dsl/cst/builtin_function_manager.go | 6 ++--- pkg/dsl/cst/builtin_functions.go | 10 ++++---- pkg/dsl/cst/collections.go | 10 ++++---- pkg/dsl/cst/cond.go | 10 ++++---- pkg/dsl/cst/dump.go | 10 ++++---- pkg/dsl/cst/emit1.go | 8 +++---- pkg/dsl/cst/emit_emitp.go | 14 +++++------ pkg/dsl/cst/emitf.go | 12 +++++----- pkg/dsl/cst/env.go | 8 +++---- pkg/dsl/cst/evaluable.go | 8 +++---- pkg/dsl/cst/filter.go | 6 ++--- pkg/dsl/cst/for.go | 10 ++++---- pkg/dsl/cst/functions.go | 4 ++-- pkg/dsl/cst/hofs.go | 8 +++---- pkg/dsl/cst/if.go | 10 ++++---- pkg/dsl/cst/keyword_usage.go | 4 ++-- pkg/dsl/cst/leaves.go | 8 +++---- pkg/dsl/cst/lvalues.go | 8 +++---- pkg/dsl/cst/print.go | 10 ++++---- pkg/dsl/cst/root.go | 14 +++++------ pkg/dsl/cst/signature.go | 2 +- pkg/dsl/cst/statements.go | 2 +- pkg/dsl/cst/subroutines.go | 4 ++-- pkg/dsl/cst/tee.go | 12 +++++----- pkg/dsl/cst/types.go | 8 +++---- pkg/dsl/cst/udf.go | 10 ++++---- pkg/dsl/cst/uds.go | 10 ++++---- pkg/dsl/cst/validate.go | 4 ++-- pkg/dsl/cst/warn.go | 4 ++-- pkg/dsl/cst/while.go | 8 +++---- pkg/dsl/token.go | 2 +- pkg/entrypoint/entrypoint.go | 14 +++++------ pkg/input/line_reader.go | 2 +- pkg/input/pseudo_reader_gen.go | 8 +++---- pkg/input/record_reader.go | 2 +- pkg/input/record_reader_benchmark_test.go | 4 ++-- pkg/input/record_reader_csv.go | 10 ++++---- pkg/input/record_reader_csvlite.go | 8 +++---- pkg/input/record_reader_dkvp_nidx.go | 8 +++---- pkg/input/record_reader_dkvp_test.go | 2 +- pkg/input/record_reader_factory.go | 2 +- pkg/input/record_reader_json.go | 8 +++---- pkg/input/record_reader_markdown.go | 2 +- pkg/input/record_reader_pprint.go | 8 +++---- pkg/input/record_reader_tsv.go | 8 +++---- pkg/input/record_reader_xtab.go | 8 +++---- pkg/input/splitters.go | 4 ++-- pkg/lib/halfpipe.go | 2 +- pkg/lib/readfiles.go | 2 +- pkg/mlrval/mlrmap_accessors.go | 2 +- pkg/mlrval/mlrmap_flatten_unflatten.go | 2 +- pkg/mlrval/mlrmap_json.go | 4 ++-- pkg/mlrval/mlrval_accessors.go | 2 +- pkg/mlrval/mlrval_benchmark_test.go | 2 +- pkg/mlrval/mlrval_cmp.go | 2 +- pkg/mlrval/mlrval_collections.go | 2 +- pkg/mlrval/mlrval_get.go | 2 +- pkg/mlrval/mlrval_infer.go | 2 +- pkg/mlrval/mlrval_is.go | 2 +- pkg/mlrval/mlrval_json.go | 4 ++-- pkg/mlrval/mlrval_new.go | 4 ++-- pkg/output/channel_writer.go | 4 ++-- pkg/output/file_output_handlers.go | 6 ++--- pkg/output/record_writer.go | 4 ++-- pkg/output/record_writer_csv.go | 8 +++---- pkg/output/record_writer_csv_colorizer.go | 2 +- pkg/output/record_writer_csvlite.go | 8 +++---- pkg/output/record_writer_dkvp.go | 8 +++---- pkg/output/record_writer_factory.go | 2 +- pkg/output/record_writer_json.go | 6 ++--- pkg/output/record_writer_markdown.go | 8 +++---- pkg/output/record_writer_nidx.go | 6 ++--- pkg/output/record_writer_pprint.go | 8 +++---- pkg/output/record_writer_tsv.go | 10 ++++---- pkg/output/record_writer_xtab.go | 8 +++---- pkg/parsing/errors.go.template | 2 +- pkg/parsing/errors/errors.go | 2 +- pkg/parsing/lexer/acttab.go | 2 +- pkg/parsing/lexer/lexer.go | 2 +- pkg/parsing/mlr.bnf | 2 +- pkg/parsing/parser/parser.go | 4 ++-- pkg/parsing/parser/productionstable.go | 2 +- pkg/runtime/stack.go | 6 ++--- pkg/runtime/state.go | 8 +++---- pkg/scan/find_benchmark_test.go | 2 +- pkg/stream/stream.go | 10 ++++---- pkg/terminals/help/entry.go | 16 ++++++------- pkg/terminals/regtest/invoker.go | 4 ++-- pkg/terminals/regtest/regtester.go | 4 ++-- pkg/terminals/repl/dsl.go | 6 ++--- pkg/terminals/repl/entry.go | 2 +- pkg/terminals/repl/prompt.go | 6 ++--- pkg/terminals/repl/session.go | 16 ++++++------- pkg/terminals/repl/types.go | 10 ++++---- pkg/terminals/repl/verbs.go | 12 +++++----- pkg/terminals/terminals.go | 8 +++---- pkg/transformers/aaa_chain_transformer.go | 4 ++-- pkg/transformers/aaa_record_transformer.go | 4 ++-- pkg/transformers/aaa_transformer_table.go | 4 ++-- pkg/transformers/altkv.go | 6 ++--- pkg/transformers/bar.go | 6 ++--- pkg/transformers/bootstrap.go | 6 ++--- pkg/transformers/case.go | 8 +++---- pkg/transformers/cat.go | 6 ++--- pkg/transformers/check.go | 4 ++-- pkg/transformers/clean_whitespace.go | 8 +++---- pkg/transformers/count.go | 8 +++---- pkg/transformers/count_similar.go | 8 +++---- pkg/transformers/cut.go | 8 +++---- pkg/transformers/decimate.go | 4 ++-- pkg/transformers/fill_down.go | 6 ++--- pkg/transformers/fill_empty.go | 6 ++--- pkg/transformers/flatten.go | 6 ++--- pkg/transformers/format_values.go | 6 ++--- pkg/transformers/fraction.go | 10 ++++---- pkg/transformers/gap.go | 6 ++--- pkg/transformers/grep.go | 4 ++-- pkg/transformers/group_by.go | 6 ++--- pkg/transformers/group_like.go | 6 ++--- pkg/transformers/having_fields.go | 6 ++--- pkg/transformers/head.go | 4 ++-- pkg/transformers/histogram.go | 8 +++---- pkg/transformers/join.go | 12 +++++----- pkg/transformers/json_parse.go | 6 ++--- pkg/transformers/json_stringify.go | 8 +++---- pkg/transformers/label.go | 6 ++--- pkg/transformers/latin1_to_utf8.go | 8 +++---- pkg/transformers/merge_fields.go | 8 +++---- pkg/transformers/most_or_least_frequent.go | 8 +++---- pkg/transformers/nest.go | 8 +++---- pkg/transformers/nothing.go | 4 ++-- pkg/transformers/put_or_filter.go | 14 +++++------ pkg/transformers/regularize.go | 8 +++---- pkg/transformers/remove_empty_columns.go | 6 ++--- pkg/transformers/rename.go | 6 ++--- pkg/transformers/reorder.go | 8 +++---- pkg/transformers/repeat.go | 4 ++-- pkg/transformers/reshape.go | 8 +++---- pkg/transformers/sample.go | 6 ++--- pkg/transformers/sec2gmt.go | 8 +++---- pkg/transformers/sec2gmtdate.go | 8 +++---- pkg/transformers/seqgen.go | 8 +++---- pkg/transformers/shuffle.go | 6 ++--- pkg/transformers/skip_trivial_records.go | 4 ++-- pkg/transformers/sort.go | 8 +++---- pkg/transformers/sort_within_records.go | 4 ++-- pkg/transformers/sparsify.go | 8 +++---- pkg/transformers/split.go | 8 +++---- pkg/transformers/stats1.go | 10 ++++---- pkg/transformers/stats2.go | 10 ++++---- pkg/transformers/step.go | 12 +++++----- pkg/transformers/subs.go | 10 ++++---- pkg/transformers/summary.go | 10 ++++---- pkg/transformers/tac.go | 4 ++-- pkg/transformers/tail.go | 6 ++--- pkg/transformers/tee.go | 6 ++--- pkg/transformers/template.go | 8 +++---- pkg/transformers/top.go | 10 ++++---- pkg/transformers/unflatten.go | 6 ++--- pkg/transformers/uniq.go | 8 +++---- pkg/transformers/unspace.go | 6 ++--- pkg/transformers/unsparsify.go | 8 +++---- pkg/transformers/utf8_to_latin1.go | 8 +++---- pkg/transformers/utils/join_bucket.go | 2 +- pkg/transformers/utils/join_bucket_keeper.go | 10 ++++---- pkg/transformers/utils/percentile_keeper.go | 4 ++-- pkg/transformers/utils/stats1_accumulators.go | 6 ++--- pkg/transformers/utils/stats2_accumulators.go | 4 ++-- pkg/transformers/utils/top_keeper.go | 4 ++-- pkg/transformers/utils/window_keeper.go | 2 +- pkg/types/context.go | 2 +- pkg/types/mlrval_typing.go | 2 +- regression_test.go | 2 +- scripts/compiler-versions-build | 4 ++-- 221 files changed, 668 insertions(+), 668 deletions(-) diff --git a/Makefile b/Makefile index fe27b8dd3..ec2b817cb 100644 --- a/Makefile +++ b/Makefile @@ -7,12 +7,12 @@ INSTALLDIR=$(PREFIX)/bin # This must remain the first target in this file, which is what 'make' with no # arguments will run. build: - go build github.com/johnkerl/miller/cmd/mlr + go build github.com/johnkerl/miller/v6/cmd/mlr @echo "Build complete. The Miller executable is ./mlr (or .\mlr.exe on Windows)." @echo "You can use 'make check' to run tests". quiet: - @go build github.com/johnkerl/miller/cmd/mlr + @go build github.com/johnkerl/miller/v6/cmd/mlr # For interactive use, 'mlr regtest' offers more options and transparency. check: unit-test regression-test @@ -33,25 +33,25 @@ install: build # ---------------------------------------------------------------- # Unit tests (small number) unit-test ut: build - go test github.com/johnkerl/miller/pkg/... + go test github.com/johnkerl/miller/v6/pkg/... ut-lib:build - go test github.com/johnkerl/miller/pkg/lib... + go test github.com/johnkerl/miller/v6/pkg/lib... ut-scan:build - go test github.com/johnkerl/miller/pkg/scan/... + go test github.com/johnkerl/miller/v6/pkg/scan/... ut-mlv:build - go test github.com/johnkerl/miller/pkg/mlrval/... + go test github.com/johnkerl/miller/v6/pkg/mlrval/... ut-bifs:build - go test github.com/johnkerl/miller/pkg/bifs/... + go test github.com/johnkerl/miller/v6/pkg/bifs/... ut-input:build - go test github.com/johnkerl/miller/pkg/input/... + go test github.com/johnkerl/miller/v6/pkg/input/... bench:build - go test -run=nonesuch -bench=. github.com/johnkerl/miller/pkg/... + go test -run=nonesuch -bench=. github.com/johnkerl/miller/v6/pkg/... bench-mlv:build - go test -run=nonesuch -bench=. github.com/johnkerl/miller/pkg/mlrval/... + go test -run=nonesuch -bench=. github.com/johnkerl/miller/v6/pkg/mlrval/... bench-input:build - go test -run=nonesuch -bench=. github.com/johnkerl/miller/pkg/input/... + go test -run=nonesuch -bench=. github.com/johnkerl/miller/v6/pkg/input/... # ---------------------------------------------------------------- # Regression tests (large number) @@ -114,7 +114,7 @@ it: build check so: install mlr: - go build github.com/johnkerl/miller/cmd/mlr + go build github.com/johnkerl/miller/v6/cmd/mlr # ---------------------------------------------------------------- # Please see comments in ./create-release-tarball as well as diff --git a/README.md b/README.md index be095ed66..0819f272e 100644 --- a/README.md +++ b/README.md @@ -110,9 +110,9 @@ See also [building from source](https://miller.readthedocs.io/en/latest/build.ht * To install: `make install`. This installs the executable `/usr/local/bin/mlr` and manual page `/usr/local/share/man/man1/mlr.1` (so you can do `man mlr`). * You can do `./configure --prefix=/some/install/path` before `make install` if you want to install somewhere other than `/usr/local`. * Without `make`: - * To build: `go build github.com/johnkerl/miller/cmd/mlr`. - * To run tests: `go test github.com/johnkerl/miller/pkg/...` and `mlr regtest`. - * To install: `go install github.com/johnkerl/miller/cmd/mlr` will install to _GOPATH_`/bin/mlr`. + * To build: `go build github.com/johnkerl/miller/v6/cmd/mlr`. + * To run tests: `go test github.com/johnkerl/miller/v6/pkg/...` and `mlr regtest`. + * To install: `go install github.com/johnkerl/miller/v6/cmd/mlr` will install to _GOPATH_`/bin/mlr`. * See also the doc page on [building from source](https://miller.readthedocs.io/en/latest/build). * For more developer information please see [README-dev.md](./README-dev.md). diff --git a/cmd/experiments/colors/main.go b/cmd/experiments/colors/main.go index 5f5093eee..3539d3c1e 100644 --- a/cmd/experiments/colors/main.go +++ b/cmd/experiments/colors/main.go @@ -3,7 +3,7 @@ package main import ( "fmt" - "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/colorizer" ) const boldString = "\u001b[1m" diff --git a/cmd/mlr/main.go b/cmd/mlr/main.go index 3e37bdca1..dc2b1f8b4 100644 --- a/cmd/mlr/main.go +++ b/cmd/mlr/main.go @@ -11,7 +11,7 @@ import ( "strings" "time" - "github.com/johnkerl/miller/pkg/entrypoint" + "github.com/johnkerl/miller/v6/pkg/entrypoint" "github.com/pkg/profile" // for trace.out ) diff --git a/cmd/scan/main.go b/cmd/scan/main.go index f93e0226e..d42b08115 100644 --- a/cmd/scan/main.go +++ b/cmd/scan/main.go @@ -8,7 +8,7 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/pkg/scan" + "github.com/johnkerl/miller/v6/pkg/scan" ) func main() { diff --git a/cmd/sizes/main.go b/cmd/sizes/main.go index 5ae6209cc..8e06398fe 100644 --- a/cmd/sizes/main.go +++ b/cmd/sizes/main.go @@ -3,7 +3,7 @@ // ================================================================ /* -go build github.com/johnkerl/miller/cmd/sizes +go build github.com/johnkerl/miller/v6/cmd/sizes */ package main @@ -11,7 +11,7 @@ package main import ( "fmt" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) func main() { diff --git a/docs/src/build.md b/docs/src/build.md index 0b6787898..0bceafb0d 100644 --- a/docs/src/build.md +++ b/docs/src/build.md @@ -31,16 +31,16 @@ Two-clause BSD license [https://github.com/johnkerl/miller/blob/master/LICENSE.t * `cd mlr-i.j.k` * `cd go` * `make` creates the `./mlr` (or `.\mlr.exe` on Windows) executable - * Without `make`: `go build github.com/johnkerl/miller/cmd/mlr` + * Without `make`: `go build github.com/johnkerl/miller/v6/cmd/mlr` * `make check` runs tests - * Without `make`: `go test github.com/johnkerl/miller/pkg/...` and `mlr regtest` + * Without `make`: `go test github.com/johnkerl/miller/v6/pkg/...` and `mlr regtest` * `make install` installs the `mlr` executable and the `mlr` manpage - * Without make: `go install github.com/johnkerl/miller/cmd/mlr` will install to _GOPATH_`/bin/mlr` + * Without make: `go install github.com/johnkerl/miller/v6/cmd/mlr` will install to _GOPATH_`/bin/mlr` ## From git clone * `git clone https://github.com/johnkerl/miller` -* `make`/`go build github.com/johnkerl/miller/cmd/mlr` as above +* `make`/`go build github.com/johnkerl/miller/v6/cmd/mlr` as above ## In case of problems diff --git a/docs/src/build.md.in b/docs/src/build.md.in index 5138c9b8f..ef3e4aa7d 100644 --- a/docs/src/build.md.in +++ b/docs/src/build.md.in @@ -15,16 +15,16 @@ Two-clause BSD license [https://github.com/johnkerl/miller/blob/master/LICENSE.t * `cd mlr-i.j.k` * `cd go` * `make` creates the `./mlr` (or `.\mlr.exe` on Windows) executable - * Without `make`: `go build github.com/johnkerl/miller/cmd/mlr` + * Without `make`: `go build github.com/johnkerl/miller/v6/cmd/mlr` * `make check` runs tests - * Without `make`: `go test github.com/johnkerl/miller/pkg/...` and `mlr regtest` + * Without `make`: `go test github.com/johnkerl/miller/v6/pkg/...` and `mlr regtest` * `make install` installs the `mlr` executable and the `mlr` manpage - * Without make: `go install github.com/johnkerl/miller/cmd/mlr` will install to _GOPATH_`/bin/mlr` + * Without make: `go install github.com/johnkerl/miller/v6/cmd/mlr` will install to _GOPATH_`/bin/mlr` ## From git clone * `git clone https://github.com/johnkerl/miller` -* `make`/`go build github.com/johnkerl/miller/cmd/mlr` as above +* `make`/`go build github.com/johnkerl/miller/v6/cmd/mlr` as above ## In case of problems diff --git a/docs/src/miller-as-library.md b/docs/src/miller-as-library.md index c17872ba0..219b1f653 100644 --- a/docs/src/miller-as-library.md +++ b/docs/src/miller-as-library.md @@ -50,8 +50,8 @@ package main import ( "fmt" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) func main() { @@ -86,11 +86,11 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/input" - "github.com/johnkerl/miller/pkg/output" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/input" + "github.com/johnkerl/miller/v6/pkg/output" + "github.com/johnkerl/miller/v6/pkg/types" ) // Put your record-processing logic here. diff --git a/docs/src/miller-as-library/main1.go b/docs/src/miller-as-library/main1.go index c56f5a0db..68823b9f2 100644 --- a/docs/src/miller-as-library/main1.go +++ b/docs/src/miller-as-library/main1.go @@ -3,8 +3,8 @@ package main import ( "fmt" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) func main() { diff --git a/docs/src/miller-as-library/main2.go b/docs/src/miller-as-library/main2.go index c460a174a..8434f14bd 100644 --- a/docs/src/miller-as-library/main2.go +++ b/docs/src/miller-as-library/main2.go @@ -7,11 +7,11 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/input" - "github.com/johnkerl/miller/pkg/output" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/input" + "github.com/johnkerl/miller/v6/pkg/output" + "github.com/johnkerl/miller/v6/pkg/types" ) // Put your record-processing logic here. @@ -49,7 +49,7 @@ func custom_options() *cli.TOptions { func run_custom_processor( fileNames []string, options *cli.TOptions, - record_processor func (irac *types.RecordAndContext) (*types.RecordAndContext, error), + record_processor func(irac *types.RecordAndContext) (*types.RecordAndContext, error), ) error { outputStream := os.Stdout outputIsStdout := true diff --git a/docs/src/miller-as-library/main3.go b/docs/src/miller-as-library/main3.go index 07d4be50e..23a400453 100644 --- a/docs/src/miller-as-library/main3.go +++ b/docs/src/miller-as-library/main3.go @@ -7,11 +7,11 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/input" - "github.com/johnkerl/miller/pkg/output" - "github.com/johnkerl/miller/pkg/transformers" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/input" + "github.com/johnkerl/miller/v6/pkg/output" + "github.com/johnkerl/miller/v6/pkg/transformers" + "github.com/johnkerl/miller/v6/pkg/types" ) func convert_csv_to_json(fileNames []string) error { diff --git a/go.mod b/go.mod index f9f4db916..358903723 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/johnkerl/miller +module github.com/johnkerl/miller/v6 // The repo is 'miller' and the executable is 'mlr', going back many years and // predating the Go port. @@ -7,8 +7,8 @@ module github.com/johnkerl/miller // executable would be 'miller' not 'mlr'. // // So we have cmd/mlr/main.go: -// * go build github.com/johnkerl/miller/cmd/mlr -// * go install github.com/johnkerl/miller/cmd/mlr +// * go build github.com/johnkerl/miller/v6/cmd/mlr +// * go install github.com/johnkerl/miller/v6/cmd/mlr // go get github.com/johnkerl/lumin@v1.0.0 // Local development: diff --git a/pkg/bifs/arithmetic.go b/pkg/bifs/arithmetic.go index f5a2b853e..1edf7cee4 100644 --- a/pkg/bifs/arithmetic.go +++ b/pkg/bifs/arithmetic.go @@ -4,8 +4,8 @@ import ( "fmt" "math" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) // ================================================================ diff --git a/pkg/bifs/arithmetic_test.go b/pkg/bifs/arithmetic_test.go index 76efd45ea..393a3a968 100644 --- a/pkg/bifs/arithmetic_test.go +++ b/pkg/bifs/arithmetic_test.go @@ -5,7 +5,7 @@ import ( "github.com/stretchr/testify/assert" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) func TestBIF_plus_unary(t *testing.T) { diff --git a/pkg/bifs/base.go b/pkg/bifs/base.go index 28aa0d6bf..0f299048f 100644 --- a/pkg/bifs/base.go +++ b/pkg/bifs/base.go @@ -50,9 +50,9 @@ package bifs import ( "fmt" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // Function-pointer type for zary functions. diff --git a/pkg/bifs/bits.go b/pkg/bifs/bits.go index 5ed8cc20e..c9001c431 100644 --- a/pkg/bifs/bits.go +++ b/pkg/bifs/bits.go @@ -1,7 +1,7 @@ package bifs import ( - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) // ================================================================ diff --git a/pkg/bifs/bits_test.go b/pkg/bifs/bits_test.go index 96718e00d..9239d58a4 100644 --- a/pkg/bifs/bits_test.go +++ b/pkg/bifs/bits_test.go @@ -5,7 +5,7 @@ import ( "github.com/stretchr/testify/assert" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) func TestBIF_bitcount(t *testing.T) { diff --git a/pkg/bifs/booleans.go b/pkg/bifs/booleans.go index c0b3bc3db..181e5cbc6 100644 --- a/pkg/bifs/booleans.go +++ b/pkg/bifs/booleans.go @@ -5,7 +5,7 @@ package bifs import ( - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) func BIF_logical_NOT(input1 *mlrval.Mlrval) *mlrval.Mlrval { diff --git a/pkg/bifs/cmp.go b/pkg/bifs/cmp.go index 832feab57..b4603d2a8 100644 --- a/pkg/bifs/cmp.go +++ b/pkg/bifs/cmp.go @@ -5,8 +5,8 @@ package bifs import ( - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/pkg/bifs/collections.go b/pkg/bifs/collections.go index 247622740..cd3f87da2 100644 --- a/pkg/bifs/collections.go +++ b/pkg/bifs/collections.go @@ -5,8 +5,8 @@ import ( "strconv" "strings" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) // ================================================================ diff --git a/pkg/bifs/collections_test.go b/pkg/bifs/collections_test.go index 16ffba8c6..595e8c670 100644 --- a/pkg/bifs/collections_test.go +++ b/pkg/bifs/collections_test.go @@ -5,7 +5,7 @@ import ( "github.com/stretchr/testify/assert" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) func TestBIF_length(t *testing.T) { diff --git a/pkg/bifs/datetime.go b/pkg/bifs/datetime.go index 9fa11e6b0..84bd48fd1 100644 --- a/pkg/bifs/datetime.go +++ b/pkg/bifs/datetime.go @@ -5,11 +5,11 @@ import ( "regexp" "time" - strptime "github.com/johnkerl/miller/pkg/pbnjay-strptime" + strptime "github.com/johnkerl/miller/v6/pkg/pbnjay-strptime" "github.com/lestrrat-go/strftime" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) const ISO8601_TIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ" diff --git a/pkg/bifs/hashing.go b/pkg/bifs/hashing.go index e2d09d1e4..829c67601 100644 --- a/pkg/bifs/hashing.go +++ b/pkg/bifs/hashing.go @@ -7,7 +7,7 @@ import ( "crypto/sha512" "fmt" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) func BIF_md5(input1 *mlrval.Mlrval) *mlrval.Mlrval { diff --git a/pkg/bifs/hashing_test.go b/pkg/bifs/hashing_test.go index 6b44028be..5e3c177f1 100644 --- a/pkg/bifs/hashing_test.go +++ b/pkg/bifs/hashing_test.go @@ -5,7 +5,7 @@ import ( "github.com/stretchr/testify/assert" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) func TestBIF_md5(t *testing.T) { diff --git a/pkg/bifs/mathlib.go b/pkg/bifs/mathlib.go index b415cb809..1c5395f69 100644 --- a/pkg/bifs/mathlib.go +++ b/pkg/bifs/mathlib.go @@ -7,8 +7,8 @@ package bifs import ( "math" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) // ---------------------------------------------------------------- diff --git a/pkg/bifs/percentiles.go b/pkg/bifs/percentiles.go index cecb98aec..ef1083879 100644 --- a/pkg/bifs/percentiles.go +++ b/pkg/bifs/percentiles.go @@ -3,7 +3,7 @@ package bifs import ( "math" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) func GetPercentileLinearlyInterpolated( diff --git a/pkg/bifs/random.go b/pkg/bifs/random.go index c85509da6..007174db9 100644 --- a/pkg/bifs/random.go +++ b/pkg/bifs/random.go @@ -3,8 +3,8 @@ package bifs import ( "math" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) func BIF_urand() *mlrval.Mlrval { diff --git a/pkg/bifs/regex.go b/pkg/bifs/regex.go index 25e0fe5c9..011c21ac2 100644 --- a/pkg/bifs/regex.go +++ b/pkg/bifs/regex.go @@ -3,8 +3,8 @@ package bifs import ( "strings" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) // BIF_ssub implements the ssub function -- no-frills string-replace, no diff --git a/pkg/bifs/relative_time.go b/pkg/bifs/relative_time.go index f36258ffe..d05ce3900 100644 --- a/pkg/bifs/relative_time.go +++ b/pkg/bifs/relative_time.go @@ -5,7 +5,7 @@ import ( "math" "strings" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) func BIF_dhms2sec(input1 *mlrval.Mlrval) *mlrval.Mlrval { diff --git a/pkg/bifs/stats.go b/pkg/bifs/stats.go index ff3531a31..d7bd3f106 100644 --- a/pkg/bifs/stats.go +++ b/pkg/bifs/stats.go @@ -4,8 +4,8 @@ import ( "math" "sort" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) // ---------------------------------------------------------------- diff --git a/pkg/bifs/stats_test.go b/pkg/bifs/stats_test.go index 735ceab83..a8e846897 100644 --- a/pkg/bifs/stats_test.go +++ b/pkg/bifs/stats_test.go @@ -6,7 +6,7 @@ import ( "github.com/stretchr/testify/assert" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) func stats_test_array(n int) *mlrval.Mlrval { diff --git a/pkg/bifs/strings.go b/pkg/bifs/strings.go index e77de7c68..73aef62bf 100644 --- a/pkg/bifs/strings.go +++ b/pkg/bifs/strings.go @@ -7,8 +7,8 @@ import ( "strconv" "strings" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) // ================================================================ diff --git a/pkg/bifs/system.go b/pkg/bifs/system.go index e2044b4b8..e734f2998 100644 --- a/pkg/bifs/system.go +++ b/pkg/bifs/system.go @@ -6,9 +6,9 @@ import ( "runtime" "strings" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/platform" - "github.com/johnkerl/miller/pkg/version" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/platform" + "github.com/johnkerl/miller/v6/pkg/version" ) func BIF_version() *mlrval.Mlrval { diff --git a/pkg/bifs/types.go b/pkg/bifs/types.go index 87ee80448..a4eb1f230 100644 --- a/pkg/bifs/types.go +++ b/pkg/bifs/types.go @@ -5,9 +5,9 @@ import ( "math" "os" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ================================================================ diff --git a/pkg/cli/flag_types.go b/pkg/cli/flag_types.go index 590487d43..f1eef4772 100644 --- a/pkg/cli/flag_types.go +++ b/pkg/cli/flag_types.go @@ -42,8 +42,8 @@ import ( "sort" "strings" - "github.com/johnkerl/miller/pkg/colorizer" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go index a17d4658f..34db19a77 100644 --- a/pkg/cli/option_parse.go +++ b/pkg/cli/option_parse.go @@ -16,9 +16,9 @@ import ( "github.com/mattn/go-isatty" - "github.com/johnkerl/miller/pkg/colorizer" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) // FinalizeReaderOptions does a few things. diff --git a/pkg/cli/option_types.go b/pkg/cli/option_types.go index 7cbece965..19227fd73 100644 --- a/pkg/cli/option_types.go +++ b/pkg/cli/option_types.go @@ -9,7 +9,7 @@ package cli import ( "regexp" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/lib" ) type TCommentHandling int diff --git a/pkg/cli/verb_utils.go b/pkg/cli/verb_utils.go index 421af9af8..d20901998 100644 --- a/pkg/cli/verb_utils.go +++ b/pkg/cli/verb_utils.go @@ -9,7 +9,7 @@ import ( "os" "strconv" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/lib" ) // For flags with values, e.g. ["-n" "10"], while we're looking at the "-n" this let us see if the "10" slot exists. diff --git a/pkg/climain/mlrcli_mlrrc.go b/pkg/climain/mlrcli_mlrrc.go index d3c5c1401..ce0a85789 100644 --- a/pkg/climain/mlrcli_mlrrc.go +++ b/pkg/climain/mlrcli_mlrrc.go @@ -8,7 +8,7 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/cli" ) // loadMlrrcOrDie rule: If $MLRRC is set, use it and only it. Otherwise try diff --git a/pkg/climain/mlrcli_parse.go b/pkg/climain/mlrcli_parse.go index 52e772d4b..364195824 100644 --- a/pkg/climain/mlrcli_parse.go +++ b/pkg/climain/mlrcli_parse.go @@ -74,13 +74,13 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/terminals" - "github.com/johnkerl/miller/pkg/terminals/help" - "github.com/johnkerl/miller/pkg/transformers" - "github.com/johnkerl/miller/pkg/version" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/terminals" + "github.com/johnkerl/miller/v6/pkg/terminals/help" + "github.com/johnkerl/miller/v6/pkg/transformers" + "github.com/johnkerl/miller/v6/pkg/version" ) // ParseCommandLine is the entrypoint for handling the Miller command line: diff --git a/pkg/climain/mlrcli_shebang.go b/pkg/climain/mlrcli_shebang.go index e0e2f91c8..686c9f2d2 100644 --- a/pkg/climain/mlrcli_shebang.go +++ b/pkg/climain/mlrcli_shebang.go @@ -6,7 +6,7 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/lib" shellquote "github.com/kballard/go-shellquote" ) diff --git a/pkg/dsl/ast_build.go b/pkg/dsl/ast_build.go index f417998f6..bba98e1e3 100644 --- a/pkg/dsl/ast_build.go +++ b/pkg/dsl/ast_build.go @@ -8,8 +8,8 @@ package dsl import ( "fmt" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/parsing/token" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/parsing/token" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/ast_types.go b/pkg/dsl/ast_types.go index 6856ce830..80fd7dcc5 100644 --- a/pkg/dsl/ast_types.go +++ b/pkg/dsl/ast_types.go @@ -5,7 +5,7 @@ package dsl import ( - "github.com/johnkerl/miller/pkg/parsing/token" + "github.com/johnkerl/miller/v6/pkg/parsing/token" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/assignments.go b/pkg/dsl/cst/assignments.go index 129ec850c..81bcac85f 100644 --- a/pkg/dsl/cst/assignments.go +++ b/pkg/dsl/cst/assignments.go @@ -5,9 +5,9 @@ package cst import ( - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/runtime" ) // ================================================================ diff --git a/pkg/dsl/cst/block_exit.go b/pkg/dsl/cst/block_exit.go index b52b363cb..3ba730705 100644 --- a/pkg/dsl/cst/block_exit.go +++ b/pkg/dsl/cst/block_exit.go @@ -8,9 +8,9 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/blocks.go b/pkg/dsl/cst/blocks.go index d51c70d75..4b47bf574 100644 --- a/pkg/dsl/cst/blocks.go +++ b/pkg/dsl/cst/blocks.go @@ -6,9 +6,9 @@ package cst import ( - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/builtin_function_manager.go b/pkg/dsl/cst/builtin_function_manager.go index ec4bfa980..e35ee8b0b 100644 --- a/pkg/dsl/cst/builtin_function_manager.go +++ b/pkg/dsl/cst/builtin_function_manager.go @@ -19,9 +19,9 @@ import ( "sort" "strings" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/colorizer" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/lib" ) type TFunctionClass string diff --git a/pkg/dsl/cst/builtin_functions.go b/pkg/dsl/cst/builtin_functions.go index ef5a6fb98..495cea6b3 100644 --- a/pkg/dsl/cst/builtin_functions.go +++ b/pkg/dsl/cst/builtin_functions.go @@ -7,11 +7,11 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/collections.go b/pkg/dsl/cst/collections.go index 1dcee4daf..85866f7d7 100644 --- a/pkg/dsl/cst/collections.go +++ b/pkg/dsl/cst/collections.go @@ -8,11 +8,11 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/cond.go b/pkg/dsl/cst/cond.go index f7f0063e3..aca452f43 100644 --- a/pkg/dsl/cst/cond.go +++ b/pkg/dsl/cst/cond.go @@ -8,11 +8,11 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/parsing/token" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/parsing/token" + "github.com/johnkerl/miller/v6/pkg/runtime" ) type CondBlockNode struct { diff --git a/pkg/dsl/cst/dump.go b/pkg/dsl/cst/dump.go index 14070527d..ba41ce16a 100644 --- a/pkg/dsl/cst/dump.go +++ b/pkg/dsl/cst/dump.go @@ -21,11 +21,11 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/output" - "github.com/johnkerl/miller/pkg/runtime" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/output" + "github.com/johnkerl/miller/v6/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/types" ) // ================================================================ diff --git a/pkg/dsl/cst/emit1.go b/pkg/dsl/cst/emit1.go index a4996e312..5ea14c8f4 100644 --- a/pkg/dsl/cst/emit1.go +++ b/pkg/dsl/cst/emit1.go @@ -22,10 +22,10 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/runtime" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/types" ) type Emit1StatementNode struct { diff --git a/pkg/dsl/cst/emit_emitp.go b/pkg/dsl/cst/emit_emitp.go index 3552f023d..323c7495a 100644 --- a/pkg/dsl/cst/emit_emitp.go +++ b/pkg/dsl/cst/emit_emitp.go @@ -41,13 +41,13 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/output" - "github.com/johnkerl/miller/pkg/runtime" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/output" + "github.com/johnkerl/miller/v6/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/types" ) // ================================================================ diff --git a/pkg/dsl/cst/emitf.go b/pkg/dsl/cst/emitf.go index 97aebfe98..bb211f64a 100644 --- a/pkg/dsl/cst/emitf.go +++ b/pkg/dsl/cst/emitf.go @@ -8,12 +8,12 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/output" - "github.com/johnkerl/miller/pkg/runtime" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/output" + "github.com/johnkerl/miller/v6/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/types" ) // ================================================================ diff --git a/pkg/dsl/cst/env.go b/pkg/dsl/cst/env.go index c2f038f2d..25e70a511 100644 --- a/pkg/dsl/cst/env.go +++ b/pkg/dsl/cst/env.go @@ -10,10 +10,10 @@ package cst import ( "os" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/runtime" ) type EnvironmentVariableNode struct { diff --git a/pkg/dsl/cst/evaluable.go b/pkg/dsl/cst/evaluable.go index 9440537ba..ea5673be9 100644 --- a/pkg/dsl/cst/evaluable.go +++ b/pkg/dsl/cst/evaluable.go @@ -10,10 +10,10 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/filter.go b/pkg/dsl/cst/filter.go index 4a4d3984e..dbcbb2252 100644 --- a/pkg/dsl/cst/filter.go +++ b/pkg/dsl/cst/filter.go @@ -19,9 +19,9 @@ package cst import ( - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/for.go b/pkg/dsl/cst/for.go index 22c490c47..75e7cf258 100644 --- a/pkg/dsl/cst/for.go +++ b/pkg/dsl/cst/for.go @@ -7,11 +7,11 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/parsing/token" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/parsing/token" + "github.com/johnkerl/miller/v6/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/functions.go b/pkg/dsl/cst/functions.go index 5aca6d397..c214cd349 100644 --- a/pkg/dsl/cst/functions.go +++ b/pkg/dsl/cst/functions.go @@ -9,8 +9,8 @@ package cst import ( - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/hofs.go b/pkg/dsl/cst/hofs.go index a7d94a7c2..67ab64b4a 100644 --- a/pkg/dsl/cst/hofs.go +++ b/pkg/dsl/cst/hofs.go @@ -14,12 +14,12 @@ import ( "github.com/facette/natsort" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/runtime" ) -// Most function types are in the github.com/johnkerl/miller/pkg/types package. These types, though, +// Most function types are in the github.com/johnkerl/miller/v6/pkg/types package. These types, though, // include functions which need to access CST state in order to call back to // user-defined functions. To avoid a package-cycle dependency, they are // defined here. diff --git a/pkg/dsl/cst/if.go b/pkg/dsl/cst/if.go index b947c7f6e..a25f60eb8 100644 --- a/pkg/dsl/cst/if.go +++ b/pkg/dsl/cst/if.go @@ -7,11 +7,11 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/parsing/token" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/parsing/token" + "github.com/johnkerl/miller/v6/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/keyword_usage.go b/pkg/dsl/cst/keyword_usage.go index c5bec8f7a..0f5341980 100644 --- a/pkg/dsl/cst/keyword_usage.go +++ b/pkg/dsl/cst/keyword_usage.go @@ -4,8 +4,8 @@ import ( "fmt" "strings" - "github.com/johnkerl/miller/pkg/colorizer" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/leaves.go b/pkg/dsl/cst/leaves.go index 0e3621d7d..81612d85f 100644 --- a/pkg/dsl/cst/leaves.go +++ b/pkg/dsl/cst/leaves.go @@ -8,10 +8,10 @@ import ( "fmt" "math" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/lvalues.go b/pkg/dsl/cst/lvalues.go index 073c5d991..cb18d7832 100644 --- a/pkg/dsl/cst/lvalues.go +++ b/pkg/dsl/cst/lvalues.go @@ -9,10 +9,10 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/print.go b/pkg/dsl/cst/print.go index 8c68593be..f00c712a8 100644 --- a/pkg/dsl/cst/print.go +++ b/pkg/dsl/cst/print.go @@ -9,11 +9,11 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/output" - "github.com/johnkerl/miller/pkg/runtime" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/output" + "github.com/johnkerl/miller/v6/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/root.go b/pkg/dsl/cst/root.go index f48ebc330..a5b2f4848 100644 --- a/pkg/dsl/cst/root.go +++ b/pkg/dsl/cst/root.go @@ -11,13 +11,13 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/output" - "github.com/johnkerl/miller/pkg/parsing/lexer" - "github.com/johnkerl/miller/pkg/parsing/parser" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/output" + "github.com/johnkerl/miller/v6/pkg/parsing/lexer" + "github.com/johnkerl/miller/v6/pkg/parsing/parser" + "github.com/johnkerl/miller/v6/pkg/runtime" ) // NewEmptyRoot sets up an empty CST, before ingesting any DSL strings. For diff --git a/pkg/dsl/cst/signature.go b/pkg/dsl/cst/signature.go index 1ee554763..210ac4a4e 100644 --- a/pkg/dsl/cst/signature.go +++ b/pkg/dsl/cst/signature.go @@ -6,7 +6,7 @@ package cst import ( - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/statements.go b/pkg/dsl/cst/statements.go index ce42cb854..251618f19 100644 --- a/pkg/dsl/cst/statements.go +++ b/pkg/dsl/cst/statements.go @@ -8,7 +8,7 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/dsl" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/subroutines.go b/pkg/dsl/cst/subroutines.go index 6c1b76dae..3f04de745 100644 --- a/pkg/dsl/cst/subroutines.go +++ b/pkg/dsl/cst/subroutines.go @@ -9,8 +9,8 @@ package cst import ( - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/tee.go b/pkg/dsl/cst/tee.go index df9b8ff0b..7a9542eea 100644 --- a/pkg/dsl/cst/tee.go +++ b/pkg/dsl/cst/tee.go @@ -7,12 +7,12 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/output" - "github.com/johnkerl/miller/pkg/runtime" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/output" + "github.com/johnkerl/miller/v6/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/types.go b/pkg/dsl/cst/types.go index f490ca02b..bc1b2768a 100644 --- a/pkg/dsl/cst/types.go +++ b/pkg/dsl/cst/types.go @@ -7,10 +7,10 @@ package cst import ( "container/list" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/runtime" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/udf.go b/pkg/dsl/cst/udf.go index 042366afc..4add00e88 100644 --- a/pkg/dsl/cst/udf.go +++ b/pkg/dsl/cst/udf.go @@ -8,11 +8,11 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/runtime" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/uds.go b/pkg/dsl/cst/uds.go index 2ed14fa56..c9f888ca2 100644 --- a/pkg/dsl/cst/uds.go +++ b/pkg/dsl/cst/uds.go @@ -7,11 +7,11 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/runtime" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/validate.go b/pkg/dsl/cst/validate.go index 305c1bf7f..989b3d0e7 100644 --- a/pkg/dsl/cst/validate.go +++ b/pkg/dsl/cst/validate.go @@ -9,8 +9,8 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/warn.go b/pkg/dsl/cst/warn.go index 55850c8b1..75c5d0436 100644 --- a/pkg/dsl/cst/warn.go +++ b/pkg/dsl/cst/warn.go @@ -11,8 +11,8 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/pkg/dsl/cst/while.go b/pkg/dsl/cst/while.go index 4e088df6c..46ea57e32 100644 --- a/pkg/dsl/cst/while.go +++ b/pkg/dsl/cst/while.go @@ -7,10 +7,10 @@ package cst import ( "fmt" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/parsing/token" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/parsing/token" + "github.com/johnkerl/miller/v6/pkg/runtime" ) // ================================================================ diff --git a/pkg/dsl/token.go b/pkg/dsl/token.go index ff79d26e0..6808941d2 100644 --- a/pkg/dsl/token.go +++ b/pkg/dsl/token.go @@ -3,7 +3,7 @@ package dsl import ( "fmt" - "github.com/johnkerl/miller/pkg/parsing/token" + "github.com/johnkerl/miller/v6/pkg/parsing/token" ) // TokenToLocationInfo is used to track runtime errors back to source-code locations in DSL diff --git a/pkg/entrypoint/entrypoint.go b/pkg/entrypoint/entrypoint.go index 962fc59fc..7426c726d 100644 --- a/pkg/entrypoint/entrypoint.go +++ b/pkg/entrypoint/entrypoint.go @@ -10,13 +10,13 @@ import ( "os" "path" - "github.com/johnkerl/miller/pkg/auxents" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/climain" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/platform" - "github.com/johnkerl/miller/pkg/stream" - "github.com/johnkerl/miller/pkg/transformers" + "github.com/johnkerl/miller/v6/pkg/auxents" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/climain" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/platform" + "github.com/johnkerl/miller/v6/pkg/stream" + "github.com/johnkerl/miller/v6/pkg/transformers" ) type MainReturn struct { diff --git a/pkg/input/line_reader.go b/pkg/input/line_reader.go index 6779b65db..663178c5d 100644 --- a/pkg/input/line_reader.go +++ b/pkg/input/line_reader.go @@ -9,7 +9,7 @@ import ( "io" "strings" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/lib" ) type ILineReader interface { diff --git a/pkg/input/pseudo_reader_gen.go b/pkg/input/pseudo_reader_gen.go index 6479cb4d7..fa949e01a 100644 --- a/pkg/input/pseudo_reader_gen.go +++ b/pkg/input/pseudo_reader_gen.go @@ -4,10 +4,10 @@ import ( "container/list" "fmt" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) type PseudoReaderGen struct { diff --git a/pkg/input/record_reader.go b/pkg/input/record_reader.go index 62a411f22..3ad932f2f 100644 --- a/pkg/input/record_reader.go +++ b/pkg/input/record_reader.go @@ -6,7 +6,7 @@ package input import ( "container/list" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/types" ) // Since Go is concurrent, the context struct (AWK-like variables such as diff --git a/pkg/input/record_reader_benchmark_test.go b/pkg/input/record_reader_benchmark_test.go index 9d2352983..7b79c32ad 100644 --- a/pkg/input/record_reader_benchmark_test.go +++ b/pkg/input/record_reader_benchmark_test.go @@ -5,10 +5,10 @@ import ( "github.com/stretchr/testify/assert" - "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/cli" ) -// go test -run=nonesuch -bench=. github.com/johnkerl/miller/pkg/input/... +// go test -run=nonesuch -bench=. github.com/johnkerl/miller/v6/pkg/input/... func BenchmarkDKVPParse(b *testing.B) { readerOptions := &cli.TReaderOptions{ diff --git a/pkg/input/record_reader_csv.go b/pkg/input/record_reader_csv.go index 68949b25a..976f6ed1d 100644 --- a/pkg/input/record_reader_csv.go +++ b/pkg/input/record_reader_csv.go @@ -8,12 +8,12 @@ import ( "strconv" "strings" - csv "github.com/johnkerl/miller/pkg/go-csv" + csv "github.com/johnkerl/miller/v6/pkg/go-csv" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/input/record_reader_csvlite.go b/pkg/input/record_reader_csvlite.go index 5109770df..dd590da82 100644 --- a/pkg/input/record_reader_csvlite.go +++ b/pkg/input/record_reader_csvlite.go @@ -25,10 +25,10 @@ import ( "strconv" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // recordBatchGetterCSV points to either an explicit-CSV-header or diff --git a/pkg/input/record_reader_dkvp_nidx.go b/pkg/input/record_reader_dkvp_nidx.go index a5509a23d..efc0ae385 100644 --- a/pkg/input/record_reader_dkvp_nidx.go +++ b/pkg/input/record_reader_dkvp_nidx.go @@ -8,10 +8,10 @@ import ( "strconv" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // splitter_DKVP_NIDX is a function type for the one bit of code differing diff --git a/pkg/input/record_reader_dkvp_test.go b/pkg/input/record_reader_dkvp_test.go index 77e0e557f..b73b97103 100644 --- a/pkg/input/record_reader_dkvp_test.go +++ b/pkg/input/record_reader_dkvp_test.go @@ -5,7 +5,7 @@ import ( "github.com/stretchr/testify/assert" - "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/cli" ) func TestRecordFromDKVPLine(t *testing.T) { diff --git a/pkg/input/record_reader_factory.go b/pkg/input/record_reader_factory.go index 26d2f81ed..c4fd13934 100644 --- a/pkg/input/record_reader_factory.go +++ b/pkg/input/record_reader_factory.go @@ -3,7 +3,7 @@ package input import ( "fmt" - "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/cli" ) func Create(readerOptions *cli.TReaderOptions, recordsPerBatch int64) (IRecordReader, error) { diff --git a/pkg/input/record_reader_json.go b/pkg/input/record_reader_json.go index aaa49a178..094dca996 100644 --- a/pkg/input/record_reader_json.go +++ b/pkg/input/record_reader_json.go @@ -8,10 +8,10 @@ import ( "encoding/json" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) type RecordReaderJSON struct { diff --git a/pkg/input/record_reader_markdown.go b/pkg/input/record_reader_markdown.go index 22cc3a078..1766967fc 100644 --- a/pkg/input/record_reader_markdown.go +++ b/pkg/input/record_reader_markdown.go @@ -3,7 +3,7 @@ package input import ( "regexp" - "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/cli" ) func NewRecordReaderMarkdown( diff --git a/pkg/input/record_reader_pprint.go b/pkg/input/record_reader_pprint.go index 5cb4bfbad..d3ed2c228 100644 --- a/pkg/input/record_reader_pprint.go +++ b/pkg/input/record_reader_pprint.go @@ -8,10 +8,10 @@ import ( "strconv" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) func NewRecordReaderPPRINT( diff --git a/pkg/input/record_reader_tsv.go b/pkg/input/record_reader_tsv.go index 02a3c4f6e..4db48f669 100644 --- a/pkg/input/record_reader_tsv.go +++ b/pkg/input/record_reader_tsv.go @@ -7,10 +7,10 @@ import ( "strconv" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // recordBatchGetterTSV points to either an explicit-TSV-header or diff --git a/pkg/input/record_reader_xtab.go b/pkg/input/record_reader_xtab.go index 31294012c..b108f771d 100644 --- a/pkg/input/record_reader_xtab.go +++ b/pkg/input/record_reader_xtab.go @@ -8,10 +8,10 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) type iXTABPairSplitter interface { diff --git a/pkg/input/splitters.go b/pkg/input/splitters.go index aa3e43b59..5e24e0b73 100644 --- a/pkg/input/splitters.go +++ b/pkg/input/splitters.go @@ -7,8 +7,8 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" ) // IPairSplitter splits a string into left and right, e.g. for IPS. diff --git a/pkg/lib/halfpipe.go b/pkg/lib/halfpipe.go index 276b2893b..040896a00 100644 --- a/pkg/lib/halfpipe.go +++ b/pkg/lib/halfpipe.go @@ -4,7 +4,7 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/pkg/platform" + "github.com/johnkerl/miller/v6/pkg/platform" ) // OpenOutboundHalfPipe returns a handle to a process. Writing to that handle diff --git a/pkg/lib/readfiles.go b/pkg/lib/readfiles.go index 305f8a2b5..6eaaa0d17 100644 --- a/pkg/lib/readfiles.go +++ b/pkg/lib/readfiles.go @@ -9,7 +9,7 @@ import ( "os" "strings" - csv "github.com/johnkerl/miller/pkg/go-csv" + csv "github.com/johnkerl/miller/v6/pkg/go-csv" ) // LoadStringsFromFileOrDir calls LoadStringFromFile if path exists and is a diff --git a/pkg/mlrval/mlrmap_accessors.go b/pkg/mlrval/mlrmap_accessors.go index 0ba61fda5..caea8e2ab 100644 --- a/pkg/mlrval/mlrmap_accessors.go +++ b/pkg/mlrval/mlrmap_accessors.go @@ -5,7 +5,7 @@ import ( "fmt" "strconv" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/lib" ) // IsEmpty determines if a map is empty. diff --git a/pkg/mlrval/mlrmap_flatten_unflatten.go b/pkg/mlrval/mlrmap_flatten_unflatten.go index 8e48ba515..579522f22 100644 --- a/pkg/mlrval/mlrmap_flatten_unflatten.go +++ b/pkg/mlrval/mlrmap_flatten_unflatten.go @@ -25,7 +25,7 @@ package mlrval import ( "strings" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/pkg/mlrval/mlrmap_json.go b/pkg/mlrval/mlrmap_json.go index a985098eb..2db38dd9d 100644 --- a/pkg/mlrval/mlrmap_json.go +++ b/pkg/mlrval/mlrmap_json.go @@ -7,8 +7,8 @@ package mlrval import ( "bytes" - "github.com/johnkerl/miller/pkg/colorizer" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/pkg/mlrval/mlrval_accessors.go b/pkg/mlrval/mlrval_accessors.go index f788cc35b..e6d8a44e3 100644 --- a/pkg/mlrval/mlrval_accessors.go +++ b/pkg/mlrval/mlrval_accessors.go @@ -3,7 +3,7 @@ package mlrval import ( "strconv" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/lib" ) func (mv *Mlrval) GetArrayLength() (int, bool) { diff --git a/pkg/mlrval/mlrval_benchmark_test.go b/pkg/mlrval/mlrval_benchmark_test.go index 8d7c576b2..9cb79f349 100644 --- a/pkg/mlrval/mlrval_benchmark_test.go +++ b/pkg/mlrval/mlrval_benchmark_test.go @@ -4,7 +4,7 @@ import ( "testing" ) -// go test -run=nonesuch -bench=. github.com/johnkerl/miller/pkg/mlrval/... +// go test -run=nonesuch -bench=. github.com/johnkerl/miller/v6/pkg/mlrval/... func BenchmarkFromDeferredType(b *testing.B) { for i := 0; i < b.N; i++ { diff --git a/pkg/mlrval/mlrval_cmp.go b/pkg/mlrval/mlrval_cmp.go index cebd3af25..f631420a2 100644 --- a/pkg/mlrval/mlrval_cmp.go +++ b/pkg/mlrval/mlrval_cmp.go @@ -14,7 +14,7 @@ package mlrval import ( - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/lib" ) type CmpFuncBool func(input1, input2 *Mlrval) bool diff --git a/pkg/mlrval/mlrval_collections.go b/pkg/mlrval/mlrval_collections.go index 5f4e305a6..46e2d3718 100644 --- a/pkg/mlrval/mlrval_collections.go +++ b/pkg/mlrval/mlrval_collections.go @@ -74,7 +74,7 @@ import ( "os" "strconv" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/lib" ) // ================================================================ diff --git a/pkg/mlrval/mlrval_get.go b/pkg/mlrval/mlrval_get.go index 2eb6bfb66..9c681229d 100644 --- a/pkg/mlrval/mlrval_get.go +++ b/pkg/mlrval/mlrval_get.go @@ -4,7 +4,7 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/lib" ) // It's essential that we use mv.Type() not mv.mvtype, or use an Is...() diff --git a/pkg/mlrval/mlrval_infer.go b/pkg/mlrval/mlrval_infer.go index 5be0abef2..2c9a20064 100644 --- a/pkg/mlrval/mlrval_infer.go +++ b/pkg/mlrval/mlrval_infer.go @@ -3,7 +3,7 @@ package mlrval import ( "strconv" - "github.com/johnkerl/miller/pkg/scan" + "github.com/johnkerl/miller/v6/pkg/scan" ) // TODO: comment no infer-bool from data files. Always false in this path. diff --git a/pkg/mlrval/mlrval_is.go b/pkg/mlrval/mlrval_is.go index 5b3fcd9e8..7f438871f 100644 --- a/pkg/mlrval/mlrval_is.go +++ b/pkg/mlrval/mlrval_is.go @@ -1,7 +1,7 @@ package mlrval import ( - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/lib" ) // It's essential that we use mv.Type() not mv.mvtype since types are diff --git a/pkg/mlrval/mlrval_json.go b/pkg/mlrval/mlrval_json.go index 5dfd99624..fd7d6711e 100644 --- a/pkg/mlrval/mlrval_json.go +++ b/pkg/mlrval/mlrval_json.go @@ -14,8 +14,8 @@ import ( "fmt" "io" - "github.com/johnkerl/miller/pkg/colorizer" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/lib" ) const JSON_INDENT_STRING string = " " diff --git a/pkg/mlrval/mlrval_new.go b/pkg/mlrval/mlrval_new.go index c4109af26..bcad5590b 100644 --- a/pkg/mlrval/mlrval_new.go +++ b/pkg/mlrval/mlrval_new.go @@ -8,7 +8,7 @@ import ( "errors" "fmt" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/lib" ) // TODO: comment for JSON-scanner context. @@ -318,7 +318,7 @@ func (mv *Mlrval) SetFromPrevalidatedBoolString(input string, boolval bool) *Mlr // The user-defined function is of type 'interface{}' here to avoid what would // otherwise be a package-dependency cycle between this package and -// github.com/johnkerl/miller/pkg/dsl/cst. +// github.com/johnkerl/miller/v6/pkg/dsl/cst. // // Nominally the name argument is the user-specified name if `func f(a, b) { // ... }`, or some autogenerated UUID like `fl0052` if `func (a, b) { ... }`. diff --git a/pkg/output/channel_writer.go b/pkg/output/channel_writer.go index 3eb8b1338..86be3324a 100644 --- a/pkg/output/channel_writer.go +++ b/pkg/output/channel_writer.go @@ -6,8 +6,8 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/types" ) func ChannelWriter( diff --git a/pkg/output/file_output_handlers.go b/pkg/output/file_output_handlers.go index d21cfb812..a5b3824ee 100644 --- a/pkg/output/file_output_handlers.go +++ b/pkg/output/file_output_handlers.go @@ -20,9 +20,9 @@ import ( "io" "os" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/types" ) // ================================================================ diff --git a/pkg/output/record_writer.go b/pkg/output/record_writer.go index ceb7522d2..e3c224667 100644 --- a/pkg/output/record_writer.go +++ b/pkg/output/record_writer.go @@ -3,8 +3,8 @@ package output import ( "bufio" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // IRecordWriter is the abstract interface for all record-writers. They are diff --git a/pkg/output/record_writer_csv.go b/pkg/output/record_writer_csv.go index efedd64bf..ffd76f2ec 100644 --- a/pkg/output/record_writer_csv.go +++ b/pkg/output/record_writer_csv.go @@ -5,11 +5,11 @@ import ( "fmt" "strings" - csv "github.com/johnkerl/miller/pkg/go-csv" + csv "github.com/johnkerl/miller/v6/pkg/go-csv" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) type RecordWriterCSV struct { diff --git a/pkg/output/record_writer_csv_colorizer.go b/pkg/output/record_writer_csv_colorizer.go index 9cb103b6a..a9efd0b30 100644 --- a/pkg/output/record_writer_csv_colorizer.go +++ b/pkg/output/record_writer_csv_colorizer.go @@ -47,7 +47,7 @@ import ( "strings" "unicode/utf8" - "github.com/johnkerl/miller/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/colorizer" ) var errInvalidDelim = errors.New("csv: invalid field or comment delimiter") diff --git a/pkg/output/record_writer_csvlite.go b/pkg/output/record_writer_csvlite.go index 280abf38d..ac36a8270 100644 --- a/pkg/output/record_writer_csvlite.go +++ b/pkg/output/record_writer_csvlite.go @@ -4,10 +4,10 @@ import ( "bufio" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/colorizer" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) type RecordWriterCSVLite struct { diff --git a/pkg/output/record_writer_dkvp.go b/pkg/output/record_writer_dkvp.go index 79ea8de05..692fa9480 100644 --- a/pkg/output/record_writer_dkvp.go +++ b/pkg/output/record_writer_dkvp.go @@ -3,10 +3,10 @@ package output import ( "bufio" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/colorizer" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) type RecordWriterDKVP struct { diff --git a/pkg/output/record_writer_factory.go b/pkg/output/record_writer_factory.go index ae7941490..84ff64cfe 100644 --- a/pkg/output/record_writer_factory.go +++ b/pkg/output/record_writer_factory.go @@ -3,7 +3,7 @@ package output import ( "fmt" - "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/cli" ) func Create(writerOptions *cli.TWriterOptions) (IRecordWriter, error) { diff --git a/pkg/output/record_writer_json.go b/pkg/output/record_writer_json.go index 1a1e7ed58..d0be01461 100644 --- a/pkg/output/record_writer_json.go +++ b/pkg/output/record_writer_json.go @@ -5,9 +5,9 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/output/record_writer_markdown.go b/pkg/output/record_writer_markdown.go index 94137822d..64bc8bb97 100644 --- a/pkg/output/record_writer_markdown.go +++ b/pkg/output/record_writer_markdown.go @@ -4,10 +4,10 @@ import ( "bufio" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/colorizer" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) type RecordWriterMarkdown struct { diff --git a/pkg/output/record_writer_nidx.go b/pkg/output/record_writer_nidx.go index ac599e3a7..45d01c45b 100644 --- a/pkg/output/record_writer_nidx.go +++ b/pkg/output/record_writer_nidx.go @@ -3,9 +3,9 @@ package output import ( "bufio" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) type RecordWriterNIDX struct { diff --git a/pkg/output/record_writer_pprint.go b/pkg/output/record_writer_pprint.go index 6b2f92f1f..acb3366e2 100644 --- a/pkg/output/record_writer_pprint.go +++ b/pkg/output/record_writer_pprint.go @@ -7,10 +7,10 @@ import ( "strings" "unicode/utf8" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/colorizer" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) type RecordWriterPPRINT struct { diff --git a/pkg/output/record_writer_tsv.go b/pkg/output/record_writer_tsv.go index 0e845be79..17f1ce563 100644 --- a/pkg/output/record_writer_tsv.go +++ b/pkg/output/record_writer_tsv.go @@ -5,11 +5,11 @@ import ( "fmt" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/colorizer" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) type RecordWriterTSV struct { diff --git a/pkg/output/record_writer_xtab.go b/pkg/output/record_writer_xtab.go index bfacdde95..5d1b52fa0 100644 --- a/pkg/output/record_writer_xtab.go +++ b/pkg/output/record_writer_xtab.go @@ -5,10 +5,10 @@ import ( "fmt" "unicode/utf8" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/colorizer" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/parsing/errors.go.template b/pkg/parsing/errors.go.template index 5c39ede1d..11d8a4539 100644 --- a/pkg/parsing/errors.go.template +++ b/pkg/parsing/errors.go.template @@ -13,7 +13,7 @@ import ( "fmt" "strings" - "github.com/johnkerl/miller/pkg/parsing/token" + "github.com/johnkerl/miller/v6/pkg/parsing/token" ) type ErrorSymbol interface { diff --git a/pkg/parsing/errors/errors.go b/pkg/parsing/errors/errors.go index 5c39ede1d..11d8a4539 100644 --- a/pkg/parsing/errors/errors.go +++ b/pkg/parsing/errors/errors.go @@ -13,7 +13,7 @@ import ( "fmt" "strings" - "github.com/johnkerl/miller/pkg/parsing/token" + "github.com/johnkerl/miller/v6/pkg/parsing/token" ) type ErrorSymbol interface { diff --git a/pkg/parsing/lexer/acttab.go b/pkg/parsing/lexer/acttab.go index 18917879f..b8150ad23 100644 --- a/pkg/parsing/lexer/acttab.go +++ b/pkg/parsing/lexer/acttab.go @@ -5,7 +5,7 @@ package lexer import ( "fmt" - "github.com/johnkerl/miller/pkg/parsing/token" + "github.com/johnkerl/miller/v6/pkg/parsing/token" ) type ActionTable [NumStates]ActionRow diff --git a/pkg/parsing/lexer/lexer.go b/pkg/parsing/lexer/lexer.go index d106a08a3..74ac942b1 100644 --- a/pkg/parsing/lexer/lexer.go +++ b/pkg/parsing/lexer/lexer.go @@ -6,7 +6,7 @@ import ( "os" "unicode/utf8" - "github.com/johnkerl/miller/pkg/parsing/token" + "github.com/johnkerl/miller/v6/pkg/parsing/token" ) const ( diff --git a/pkg/parsing/mlr.bnf b/pkg/parsing/mlr.bnf index 39d6c0c3b..5903cf419 100644 --- a/pkg/parsing/mlr.bnf +++ b/pkg/parsing/mlr.bnf @@ -347,7 +347,7 @@ panic : '%' '%' '%' 'p' 'a' 'n' 'i' 'c' '%' '%' '%' ; // ================================================================ // Import the AST/ASTNode types and functions -<< import "github.com/johnkerl/miller/pkg/dsl" >> +<< import "github.com/johnkerl/miller/v6/pkg/dsl" >> // ================================================================ // TOP-LEVEL PRODUCTION RULE FOR THE MILLER DSL diff --git a/pkg/parsing/parser/parser.go b/pkg/parsing/parser/parser.go index 444e9f495..b984087af 100644 --- a/pkg/parsing/parser/parser.go +++ b/pkg/parsing/parser/parser.go @@ -6,8 +6,8 @@ import ( "fmt" "strings" - parseError "github.com/johnkerl/miller/pkg/parsing/errors" - "github.com/johnkerl/miller/pkg/parsing/token" + parseError "github.com/johnkerl/miller/v6/pkg/parsing/errors" + "github.com/johnkerl/miller/v6/pkg/parsing/token" ) const ( diff --git a/pkg/parsing/parser/productionstable.go b/pkg/parsing/parser/productionstable.go index f4b61fd50..93ea03996 100644 --- a/pkg/parsing/parser/productionstable.go +++ b/pkg/parsing/parser/productionstable.go @@ -2,7 +2,7 @@ package parser -import "github.com/johnkerl/miller/pkg/dsl" +import "github.com/johnkerl/miller/v6/pkg/dsl" type ( ProdTab [numProductions]ProdTabEntry diff --git a/pkg/runtime/stack.go b/pkg/runtime/stack.go index b32cd06dd..263e5cee1 100644 --- a/pkg/runtime/stack.go +++ b/pkg/runtime/stack.go @@ -29,9 +29,9 @@ import ( "container/list" "fmt" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ================================================================ diff --git a/pkg/runtime/state.go b/pkg/runtime/state.go index cfd9e11a7..6b8b5d29c 100644 --- a/pkg/runtime/state.go +++ b/pkg/runtime/state.go @@ -9,10 +9,10 @@ package runtime import ( "container/list" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) type State struct { diff --git a/pkg/scan/find_benchmark_test.go b/pkg/scan/find_benchmark_test.go index e905bb735..ee3d2b744 100644 --- a/pkg/scan/find_benchmark_test.go +++ b/pkg/scan/find_benchmark_test.go @@ -4,7 +4,7 @@ import ( "testing" ) -// go test -run=nonesuch -bench=. github.com/johnkerl/miller/pkg/scan/... +// go test -run=nonesuch -bench=. github.com/johnkerl/miller/v6/pkg/scan/... func BenchmarkFromNormalCases(b *testing.B) { diff --git a/pkg/stream/stream.go b/pkg/stream/stream.go index 9f2cbe805..84f096faf 100644 --- a/pkg/stream/stream.go +++ b/pkg/stream/stream.go @@ -6,11 +6,11 @@ import ( "errors" "io" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/input" - "github.com/johnkerl/miller/pkg/output" - "github.com/johnkerl/miller/pkg/transformers" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/input" + "github.com/johnkerl/miller/v6/pkg/output" + "github.com/johnkerl/miller/v6/pkg/transformers" + "github.com/johnkerl/miller/v6/pkg/types" ) // Since Go is concurrent, the context struct (AWK-like variables such as diff --git a/pkg/terminals/help/entry.go b/pkg/terminals/help/entry.go index 4666adfa9..47d3f6e9d 100644 --- a/pkg/terminals/help/entry.go +++ b/pkg/terminals/help/entry.go @@ -10,14 +10,14 @@ import ( "github.com/mattn/go-isatty" - "github.com/johnkerl/miller/pkg/auxents" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/dsl/cst" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/runtime" - "github.com/johnkerl/miller/pkg/transformers" + "github.com/johnkerl/miller/v6/pkg/auxents" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/dsl/cst" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/transformers" ) // ================================================================ diff --git a/pkg/terminals/regtest/invoker.go b/pkg/terminals/regtest/invoker.go index febbbbfa3..7f58d7d9e 100644 --- a/pkg/terminals/regtest/invoker.go +++ b/pkg/terminals/regtest/invoker.go @@ -6,8 +6,8 @@ import ( "os/exec" "strings" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/platform" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/platform" ) // RunMillerCommand runs a string like 'mlr cat foo.dat', with specified mlr diff --git a/pkg/terminals/regtest/regtester.go b/pkg/terminals/regtest/regtester.go index 8b5231c8e..1df98f874 100644 --- a/pkg/terminals/regtest/regtester.go +++ b/pkg/terminals/regtest/regtester.go @@ -63,8 +63,8 @@ import ( "runtime" "strings" - "github.com/johnkerl/miller/pkg/colorizer" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/lib" ) const CmdName = "cmd" diff --git a/pkg/terminals/repl/dsl.go b/pkg/terminals/repl/dsl.go index 8f3a2a046..78f3b98bb 100644 --- a/pkg/terminals/repl/dsl.go +++ b/pkg/terminals/repl/dsl.go @@ -23,9 +23,9 @@ import ( "fmt" "strings" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/dsl/cst" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/dsl/cst" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) // ---------------------------------------------------------------- diff --git a/pkg/terminals/repl/entry.go b/pkg/terminals/repl/entry.go index 23a86941b..95d01f385 100644 --- a/pkg/terminals/repl/entry.go +++ b/pkg/terminals/repl/entry.go @@ -27,7 +27,7 @@ import ( "path" "strings" - "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/cli" ) // ================================================================ diff --git a/pkg/terminals/repl/prompt.go b/pkg/terminals/repl/prompt.go index bfcb46d2c..be2de0e10 100644 --- a/pkg/terminals/repl/prompt.go +++ b/pkg/terminals/repl/prompt.go @@ -11,9 +11,9 @@ import ( "golang.org/x/term" - "github.com/johnkerl/miller/pkg/colorizer" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/version" + "github.com/johnkerl/miller/v6/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/version" ) const ENV_PRIMARY_PROMPT = "MLR_REPL_PS1" diff --git a/pkg/terminals/repl/session.go b/pkg/terminals/repl/session.go index 03ef0f6b4..33fcac149 100644 --- a/pkg/terminals/repl/session.go +++ b/pkg/terminals/repl/session.go @@ -25,14 +25,14 @@ import ( "strings" "syscall" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/dsl/cst" - "github.com/johnkerl/miller/pkg/input" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/output" - "github.com/johnkerl/miller/pkg/runtime" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/dsl/cst" + "github.com/johnkerl/miller/v6/pkg/input" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/output" + "github.com/johnkerl/miller/v6/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/terminals/repl/types.go b/pkg/terminals/repl/types.go index b0da1b9d3..76f8507f0 100644 --- a/pkg/terminals/repl/types.go +++ b/pkg/terminals/repl/types.go @@ -9,11 +9,11 @@ import ( "container/list" "os" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/dsl/cst" - "github.com/johnkerl/miller/pkg/input" - "github.com/johnkerl/miller/pkg/output" - "github.com/johnkerl/miller/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/dsl/cst" + "github.com/johnkerl/miller/v6/pkg/input" + "github.com/johnkerl/miller/v6/pkg/output" + "github.com/johnkerl/miller/v6/pkg/runtime" ) // ================================================================ diff --git a/pkg/terminals/repl/verbs.go b/pkg/terminals/repl/verbs.go index ac5440ffd..bbb1a36b2 100644 --- a/pkg/terminals/repl/verbs.go +++ b/pkg/terminals/repl/verbs.go @@ -10,12 +10,12 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/colorizer" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/dsl/cst" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/dsl/cst" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/terminals/terminals.go b/pkg/terminals/terminals.go index 78ec0dd3f..9e11e1e71 100644 --- a/pkg/terminals/terminals.go +++ b/pkg/terminals/terminals.go @@ -10,10 +10,10 @@ import ( "os" "runtime" - "github.com/johnkerl/miller/pkg/terminals/help" - "github.com/johnkerl/miller/pkg/terminals/regtest" - "github.com/johnkerl/miller/pkg/terminals/repl" - "github.com/johnkerl/miller/pkg/version" + "github.com/johnkerl/miller/v6/pkg/terminals/help" + "github.com/johnkerl/miller/v6/pkg/terminals/regtest" + "github.com/johnkerl/miller/v6/pkg/terminals/repl" + "github.com/johnkerl/miller/v6/pkg/version" ) // tTerminalMain is a function-pointer type for the entrypoint handler for a given terminal, diff --git a/pkg/transformers/aaa_chain_transformer.go b/pkg/transformers/aaa_chain_transformer.go index e367ab883..6b67b03b7 100644 --- a/pkg/transformers/aaa_chain_transformer.go +++ b/pkg/transformers/aaa_chain_transformer.go @@ -3,8 +3,8 @@ package transformers import ( "container/list" "fmt" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/types" "os" ) diff --git a/pkg/transformers/aaa_record_transformer.go b/pkg/transformers/aaa_record_transformer.go index 1be4fc917..516a11a31 100644 --- a/pkg/transformers/aaa_record_transformer.go +++ b/pkg/transformers/aaa_record_transformer.go @@ -4,8 +4,8 @@ import ( "container/list" "os" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/types" ) // IRecordTransformer is the interface satisfied by all transformers, i.e., diff --git a/pkg/transformers/aaa_transformer_table.go b/pkg/transformers/aaa_transformer_table.go index 34a5b6ea8..1f201fa29 100644 --- a/pkg/transformers/aaa_transformer_table.go +++ b/pkg/transformers/aaa_transformer_table.go @@ -5,8 +5,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/colorizer" - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/colorizer" + "github.com/johnkerl/miller/v6/pkg/lib" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/altkv.go b/pkg/transformers/altkv.go index a97c3127e..cb1d5c8d2 100644 --- a/pkg/transformers/altkv.go +++ b/pkg/transformers/altkv.go @@ -7,9 +7,9 @@ import ( "strconv" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/bar.go b/pkg/transformers/bar.go index 0aaafd8f1..09713c272 100644 --- a/pkg/transformers/bar.go +++ b/pkg/transformers/bar.go @@ -7,9 +7,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) const barDefaultFillString = "*" diff --git a/pkg/transformers/bootstrap.go b/pkg/transformers/bootstrap.go index 9450a425e..47a200499 100644 --- a/pkg/transformers/bootstrap.go +++ b/pkg/transformers/bootstrap.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/case.go b/pkg/transformers/case.go index 4d02617c4..e630c8e15 100644 --- a/pkg/transformers/case.go +++ b/pkg/transformers/case.go @@ -9,10 +9,10 @@ import ( "golang.org/x/text/cases" "golang.org/x/text/language" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/cat.go b/pkg/transformers/cat.go index c065aa536..74df80eca 100644 --- a/pkg/transformers/cat.go +++ b/pkg/transformers/cat.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/check.go b/pkg/transformers/check.go index 9f3600190..ed68d0afa 100644 --- a/pkg/transformers/check.go +++ b/pkg/transformers/check.go @@ -6,8 +6,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/clean_whitespace.go b/pkg/transformers/clean_whitespace.go index 3ffdd3862..bdb032f66 100644 --- a/pkg/transformers/clean_whitespace.go +++ b/pkg/transformers/clean_whitespace.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/count.go b/pkg/transformers/count.go index 623855ac9..107dbec6d 100644 --- a/pkg/transformers/count.go +++ b/pkg/transformers/count.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/count_similar.go b/pkg/transformers/count_similar.go index 2fafe2d70..b8c0a1819 100644 --- a/pkg/transformers/count_similar.go +++ b/pkg/transformers/count_similar.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/cut.go b/pkg/transformers/cut.go index f3039f65a..f8933d586 100644 --- a/pkg/transformers/cut.go +++ b/pkg/transformers/cut.go @@ -7,10 +7,10 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/decimate.go b/pkg/transformers/decimate.go index 8535c1d4b..112d9395e 100644 --- a/pkg/transformers/decimate.go +++ b/pkg/transformers/decimate.go @@ -6,8 +6,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/fill_down.go b/pkg/transformers/fill_down.go index b9ab079dc..55acbb3c3 100644 --- a/pkg/transformers/fill_down.go +++ b/pkg/transformers/fill_down.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/fill_empty.go b/pkg/transformers/fill_empty.go index e893ef334..833ed5b12 100644 --- a/pkg/transformers/fill_empty.go +++ b/pkg/transformers/fill_empty.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/flatten.go b/pkg/transformers/flatten.go index a2f4d1a97..86c4c0fd1 100644 --- a/pkg/transformers/flatten.go +++ b/pkg/transformers/flatten.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/format_values.go b/pkg/transformers/format_values.go index d91041c27..edaf0389f 100644 --- a/pkg/transformers/format_values.go +++ b/pkg/transformers/format_values.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/fraction.go b/pkg/transformers/fraction.go index 2216342fb..432d600cf 100644 --- a/pkg/transformers/fraction.go +++ b/pkg/transformers/fraction.go @@ -7,11 +7,11 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/gap.go b/pkg/transformers/gap.go index 345780f4e..9bec0b2ab 100644 --- a/pkg/transformers/gap.go +++ b/pkg/transformers/gap.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/grep.go b/pkg/transformers/grep.go index 46d01244c..38942d650 100644 --- a/pkg/transformers/grep.go +++ b/pkg/transformers/grep.go @@ -7,8 +7,8 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/group_by.go b/pkg/transformers/group_by.go index f210969ad..bf5441cb5 100644 --- a/pkg/transformers/group_by.go +++ b/pkg/transformers/group_by.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/group_like.go b/pkg/transformers/group_like.go index a7ede9a59..73a7dc347 100644 --- a/pkg/transformers/group_like.go +++ b/pkg/transformers/group_like.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/having_fields.go b/pkg/transformers/having_fields.go index b17e6cecc..467c0a7d3 100644 --- a/pkg/transformers/having_fields.go +++ b/pkg/transformers/having_fields.go @@ -7,9 +7,9 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/types" ) type tHavingFieldsCriterion int diff --git a/pkg/transformers/head.go b/pkg/transformers/head.go index 4626ddead..6f7ff5a19 100644 --- a/pkg/transformers/head.go +++ b/pkg/transformers/head.go @@ -6,8 +6,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/histogram.go b/pkg/transformers/histogram.go index a7d6241fb..32f581107 100644 --- a/pkg/transformers/histogram.go +++ b/pkg/transformers/histogram.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/join.go b/pkg/transformers/join.go index deeea1623..3d987bc07 100644 --- a/pkg/transformers/join.go +++ b/pkg/transformers/join.go @@ -6,12 +6,12 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/input" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/transformers/utils" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/input" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/transformers/utils" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/json_parse.go b/pkg/transformers/json_parse.go index 1a00ccf08..0690b410e 100644 --- a/pkg/transformers/json_parse.go +++ b/pkg/transformers/json_parse.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/json_stringify.go b/pkg/transformers/json_stringify.go index ca515f0a0..c6b5642a8 100644 --- a/pkg/transformers/json_stringify.go +++ b/pkg/transformers/json_stringify.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/label.go b/pkg/transformers/label.go index b14be1b6d..033908093 100644 --- a/pkg/transformers/label.go +++ b/pkg/transformers/label.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/latin1_to_utf8.go b/pkg/transformers/latin1_to_utf8.go index b3dca48b5..0dd509f39 100644 --- a/pkg/transformers/latin1_to_utf8.go +++ b/pkg/transformers/latin1_to_utf8.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/merge_fields.go b/pkg/transformers/merge_fields.go index 7ee2d9fad..56fe0bd0d 100644 --- a/pkg/transformers/merge_fields.go +++ b/pkg/transformers/merge_fields.go @@ -7,10 +7,10 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/transformers/utils" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/transformers/utils" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/most_or_least_frequent.go b/pkg/transformers/most_or_least_frequent.go index 1381e81d9..a47ba80dc 100644 --- a/pkg/transformers/most_or_least_frequent.go +++ b/pkg/transformers/most_or_least_frequent.go @@ -7,10 +7,10 @@ import ( "sort" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/nest.go b/pkg/transformers/nest.go index bced869df..31d58bf3e 100644 --- a/pkg/transformers/nest.go +++ b/pkg/transformers/nest.go @@ -9,10 +9,10 @@ import ( "strconv" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/nothing.go b/pkg/transformers/nothing.go index 9c5b72f1a..b64688352 100644 --- a/pkg/transformers/nothing.go +++ b/pkg/transformers/nothing.go @@ -6,8 +6,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/put_or_filter.go b/pkg/transformers/put_or_filter.go index 36fed42e5..1f19e85d0 100644 --- a/pkg/transformers/put_or_filter.go +++ b/pkg/transformers/put_or_filter.go @@ -6,13 +6,13 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/dsl" - "github.com/johnkerl/miller/pkg/dsl/cst" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/runtime" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/dsl" + "github.com/johnkerl/miller/v6/pkg/dsl/cst" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/runtime" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/regularize.go b/pkg/transformers/regularize.go index c75d261f0..d39ffbad8 100644 --- a/pkg/transformers/regularize.go +++ b/pkg/transformers/regularize.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/remove_empty_columns.go b/pkg/transformers/remove_empty_columns.go index ce9b5a5dc..3b6b74263 100644 --- a/pkg/transformers/remove_empty_columns.go +++ b/pkg/transformers/remove_empty_columns.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/rename.go b/pkg/transformers/rename.go index 7880b6ead..d7adef13f 100644 --- a/pkg/transformers/rename.go +++ b/pkg/transformers/rename.go @@ -7,9 +7,9 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/reorder.go b/pkg/transformers/reorder.go index 141b55c19..44e915a80 100644 --- a/pkg/transformers/reorder.go +++ b/pkg/transformers/reorder.go @@ -7,10 +7,10 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/repeat.go b/pkg/transformers/repeat.go index eab1725f4..a9761415a 100644 --- a/pkg/transformers/repeat.go +++ b/pkg/transformers/repeat.go @@ -6,8 +6,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/types" ) type tRepeatCountSource int diff --git a/pkg/transformers/reshape.go b/pkg/transformers/reshape.go index 4c0cffc07..1cc96f64f 100644 --- a/pkg/transformers/reshape.go +++ b/pkg/transformers/reshape.go @@ -34,10 +34,10 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/sample.go b/pkg/transformers/sample.go index b1eef576b..b8e798c65 100644 --- a/pkg/transformers/sample.go +++ b/pkg/transformers/sample.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/sec2gmt.go b/pkg/transformers/sec2gmt.go index 0dfbbe5df..33cbad387 100644 --- a/pkg/transformers/sec2gmt.go +++ b/pkg/transformers/sec2gmt.go @@ -5,10 +5,10 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/sec2gmtdate.go b/pkg/transformers/sec2gmtdate.go index d09defffa..ee440f607 100644 --- a/pkg/transformers/sec2gmtdate.go +++ b/pkg/transformers/sec2gmtdate.go @@ -5,10 +5,10 @@ import ( "fmt" "os" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/seqgen.go b/pkg/transformers/seqgen.go index 2d1c3b188..5700a4ea5 100644 --- a/pkg/transformers/seqgen.go +++ b/pkg/transformers/seqgen.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/shuffle.go b/pkg/transformers/shuffle.go index 7aad23641..77659e5a8 100644 --- a/pkg/transformers/shuffle.go +++ b/pkg/transformers/shuffle.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/skip_trivial_records.go b/pkg/transformers/skip_trivial_records.go index 91c8bd242..4a0245edb 100644 --- a/pkg/transformers/skip_trivial_records.go +++ b/pkg/transformers/skip_trivial_records.go @@ -6,8 +6,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/sort.go b/pkg/transformers/sort.go index d28009ae4..945c6e581 100644 --- a/pkg/transformers/sort.go +++ b/pkg/transformers/sort.go @@ -48,10 +48,10 @@ import ( "sort" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/sort_within_records.go b/pkg/transformers/sort_within_records.go index 398ca5372..50ce51bc2 100644 --- a/pkg/transformers/sort_within_records.go +++ b/pkg/transformers/sort_within_records.go @@ -6,8 +6,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/sparsify.go b/pkg/transformers/sparsify.go index b6ae40c51..6d6212a33 100644 --- a/pkg/transformers/sparsify.go +++ b/pkg/transformers/sparsify.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/split.go b/pkg/transformers/split.go index 7834931e9..7295fa174 100644 --- a/pkg/transformers/split.go +++ b/pkg/transformers/split.go @@ -7,10 +7,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/output" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/output" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/stats1.go b/pkg/transformers/stats1.go index b58129691..2f06540e1 100644 --- a/pkg/transformers/stats1.go +++ b/pkg/transformers/stats1.go @@ -8,11 +8,11 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/transformers/utils" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/transformers/utils" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/stats2.go b/pkg/transformers/stats2.go index a30141021..47dc60387 100644 --- a/pkg/transformers/stats2.go +++ b/pkg/transformers/stats2.go @@ -6,11 +6,11 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/transformers/utils" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/transformers/utils" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/step.go b/pkg/transformers/step.go index 56539cb90..7ecafaf70 100644 --- a/pkg/transformers/step.go +++ b/pkg/transformers/step.go @@ -73,12 +73,12 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/transformers/utils" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/transformers/utils" + "github.com/johnkerl/miller/v6/pkg/types" ) // For EWMA diff --git a/pkg/transformers/subs.go b/pkg/transformers/subs.go index 578def1c0..10468a280 100644 --- a/pkg/transformers/subs.go +++ b/pkg/transformers/subs.go @@ -7,11 +7,11 @@ import ( "regexp" "strings" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/summary.go b/pkg/transformers/summary.go index ea5f123bc..6f386ab62 100644 --- a/pkg/transformers/summary.go +++ b/pkg/transformers/summary.go @@ -6,11 +6,11 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/transformers/utils" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/transformers/utils" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/tac.go b/pkg/transformers/tac.go index 83ccd6876..ba25195cb 100644 --- a/pkg/transformers/tac.go +++ b/pkg/transformers/tac.go @@ -6,8 +6,8 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/tail.go b/pkg/transformers/tail.go index dcba44b67..345d09d53 100644 --- a/pkg/transformers/tail.go +++ b/pkg/transformers/tail.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/tee.go b/pkg/transformers/tee.go index 9e944df17..bff0065ea 100644 --- a/pkg/transformers/tee.go +++ b/pkg/transformers/tee.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/output" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/output" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/template.go b/pkg/transformers/template.go index 392f96377..a30bf7dae 100644 --- a/pkg/transformers/template.go +++ b/pkg/transformers/template.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/top.go b/pkg/transformers/top.go index 70119731b..9bc62fe69 100644 --- a/pkg/transformers/top.go +++ b/pkg/transformers/top.go @@ -6,11 +6,11 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/transformers/utils" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/transformers/utils" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/unflatten.go b/pkg/transformers/unflatten.go index d1e02a52c..dcf8014af 100644 --- a/pkg/transformers/unflatten.go +++ b/pkg/transformers/unflatten.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/uniq.go b/pkg/transformers/uniq.go index 5893b689a..ecd89a1c6 100644 --- a/pkg/transformers/uniq.go +++ b/pkg/transformers/uniq.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/unspace.go b/pkg/transformers/unspace.go index eb6253025..274f28fbd 100644 --- a/pkg/transformers/unspace.go +++ b/pkg/transformers/unspace.go @@ -6,9 +6,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/unsparsify.go b/pkg/transformers/unsparsify.go index 467b83dac..30ac1c3ee 100644 --- a/pkg/transformers/unsparsify.go +++ b/pkg/transformers/unsparsify.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/utf8_to_latin1.go b/pkg/transformers/utf8_to_latin1.go index fb658562a..bc744c8fa 100644 --- a/pkg/transformers/utf8_to_latin1.go +++ b/pkg/transformers/utf8_to_latin1.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/utils/join_bucket.go b/pkg/transformers/utils/join_bucket.go index 4e26e2538..f4390906f 100644 --- a/pkg/transformers/utils/join_bucket.go +++ b/pkg/transformers/utils/join_bucket.go @@ -7,7 +7,7 @@ package utils import ( "container/list" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/utils/join_bucket_keeper.go b/pkg/transformers/utils/join_bucket_keeper.go index df218cc3d..237f2648b 100644 --- a/pkg/transformers/utils/join_bucket_keeper.go +++ b/pkg/transformers/utils/join_bucket_keeper.go @@ -113,11 +113,11 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/cli" - "github.com/johnkerl/miller/pkg/input" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/input" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/utils/percentile_keeper.go b/pkg/transformers/utils/percentile_keeper.go index 0aebdc709..30701a7c3 100644 --- a/pkg/transformers/utils/percentile_keeper.go +++ b/pkg/transformers/utils/percentile_keeper.go @@ -8,8 +8,8 @@ import ( "fmt" "sort" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) type PercentileKeeper struct { diff --git a/pkg/transformers/utils/stats1_accumulators.go b/pkg/transformers/utils/stats1_accumulators.go index 3e8aadd94..1e5267a8a 100644 --- a/pkg/transformers/utils/stats1_accumulators.go +++ b/pkg/transformers/utils/stats1_accumulators.go @@ -9,9 +9,9 @@ import ( "os" "strings" - "github.com/johnkerl/miller/pkg/bifs" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/bifs" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/utils/stats2_accumulators.go b/pkg/transformers/utils/stats2_accumulators.go index 84fcdc030..e825af0aa 100644 --- a/pkg/transformers/utils/stats2_accumulators.go +++ b/pkg/transformers/utils/stats2_accumulators.go @@ -9,8 +9,8 @@ import ( "math" "os" - "github.com/johnkerl/miller/pkg/lib" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/utils/top_keeper.go b/pkg/transformers/utils/top_keeper.go index 3a3ce9e4e..4e59783d0 100644 --- a/pkg/transformers/utils/top_keeper.go +++ b/pkg/transformers/utils/top_keeper.go @@ -5,8 +5,8 @@ package utils import ( - "github.com/johnkerl/miller/pkg/mlrval" - "github.com/johnkerl/miller/pkg/types" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" ) // ---------------------------------------------------------------- diff --git a/pkg/transformers/utils/window_keeper.go b/pkg/transformers/utils/window_keeper.go index 2de875020..c19b33a2a 100644 --- a/pkg/transformers/utils/window_keeper.go +++ b/pkg/transformers/utils/window_keeper.go @@ -1,7 +1,7 @@ package utils import ( - "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/v6/pkg/lib" ) // WindowKeeper is a sliding-window container, nominally for use by mlr step, diff --git a/pkg/types/context.go b/pkg/types/context.go index a3da4f71a..6f82bc527 100644 --- a/pkg/types/context.go +++ b/pkg/types/context.go @@ -5,7 +5,7 @@ import ( "container/list" "strconv" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) // Since Go is concurrent, the context struct (AWK-like variables such as diff --git a/pkg/types/mlrval_typing.go b/pkg/types/mlrval_typing.go index e3c68b5f8..8adc5b010 100644 --- a/pkg/types/mlrval_typing.go +++ b/pkg/types/mlrval_typing.go @@ -8,7 +8,7 @@ package types import ( "fmt" - "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/mlrval" ) // ---------------------------------------------------------------- diff --git a/regression_test.go b/regression_test.go index 5657d17bc..0ccaa3a15 100644 --- a/regression_test.go +++ b/regression_test.go @@ -5,7 +5,7 @@ import ( "os" "testing" - "github.com/johnkerl/miller/pkg/terminals/regtest" + "github.com/johnkerl/miller/v6/pkg/terminals/regtest" ) // TestRegression is a familiar entry point for regression testing. Miller diff --git a/scripts/compiler-versions-build b/scripts/compiler-versions-build index 49e2c2ad2..e575959e7 100755 --- a/scripts/compiler-versions-build +++ b/scripts/compiler-versions-build @@ -1,7 +1,7 @@ #!/bin/sh for go in go1.15.15 go1.16.12 go1.17.5 go1.18beta1; do - $go clean github.com/johnkerl/miller/cmd/mlr - $go build github.com/johnkerl/miller/cmd/mlr + $go clean github.com/johnkerl/miller/v6/cmd/mlr + $go build github.com/johnkerl/miller/v6/cmd/mlr mv mlr mlr-$go done From 7afa99dec4a0e21c26d0b858b9d287fb6aaafcf7 Mon Sep 17 00:00:00 2001 From: Adam Lesperance Date: Fri, 20 Sep 2024 11:52:15 -0500 Subject: [PATCH 230/456] Compiling on newer go versions doesn't work (#1655) For whatever reason when compiling with go `1.23` it complains about needing to `go tidy` and running that bumps the go version to `1.21` and adds the `toolchain` directive, while also updating go.sum. Compiling on go `1.20` works just fine without this update. Not sure if you want to go all the way to `1.23` or do the minimum of `1.21` so I just picked the latter and can change if you want to. --- go.mod | 2 +- go.sum | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 358903723..b1d767c23 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ module github.com/johnkerl/miller/v6 // Local development: // replace github.com/johnkerl/lumin => /Users/kerl/git/johnkerl/lumin -go 1.19 +go 1.21 require ( github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb diff --git a/go.sum b/go.sum index da4f1028a..036117d8c 100644 --- a/go.sum +++ b/go.sum @@ -18,6 +18,7 @@ github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:C github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8= +github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is= github.com/lestrrat-go/strftime v1.1.0 h1:gMESpZy44/4pXLO/m+sL0yBd1W6LjgjrrD4a68Gapyg= github.com/lestrrat-go/strftime v1.1.0/go.mod h1:uzeIB52CeUJenCo1syghlugshMysrqUT51HlxphXVeI= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= From 025ba0707cd3233db0370e4ae66f8ae8ac4d4be7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 24 Sep 2024 08:38:16 -0400 Subject: [PATCH 231/456] Bump github.com/klauspost/compress from 1.17.9 to 1.17.10 (#1659) Bumps [github.com/klauspost/compress](https://github.com/klauspost/compress) from 1.17.9 to 1.17.10. - [Release notes](https://github.com/klauspost/compress/releases) - [Changelog](https://github.com/klauspost/compress/blob/master/.goreleaser.yml) - [Commits](https://github.com/klauspost/compress/compare/v1.17.9...v1.17.10) --- updated-dependencies: - dependency-name: github.com/klauspost/compress dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index b1d767c23..20d9f376f 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb github.com/johnkerl/lumin v1.0.0 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 - github.com/klauspost/compress v1.17.9 + github.com/klauspost/compress v1.17.10 github.com/lestrrat-go/strftime v1.1.0 github.com/mattn/go-isatty v0.0.20 github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 diff --git a/go.sum b/go.sum index 036117d8c..d90a3b194 100644 --- a/go.sum +++ b/go.sum @@ -15,8 +15,8 @@ github.com/johnkerl/lumin v1.0.0 h1:CV34cHZOJ92Y02RbQ0rd4gA0C06Qck9q8blOyaPoWpU= github.com/johnkerl/lumin v1.0.0/go.mod h1:eLf5AdQOaLvzZ2zVy4REr/DSeEwG+CZreHwNLICqv9E= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.10 h1:oXAz+Vh0PMUvJczoi+flxpnBEPxoER1IaAnU/NMPtT0= +github.com/klauspost/compress v1.17.10/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is= github.com/lestrrat-go/strftime v1.1.0 h1:gMESpZy44/4pXLO/m+sL0yBd1W6LjgjrrD4a68Gapyg= From fbd7ef446f272c8e0aad20131b36d332bfda8a24 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 25 Sep 2024 07:47:09 -0400 Subject: [PATCH 232/456] Bump github/codeql-action from 3.26.8 to 3.26.9 (#1660) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.8 to 3.26.9. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/294a9d92911152fe08befb9ec03e240add280cb3...461ef6c76dfe95d5c364de2f431ddbd31a417628) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 5c13fbb23..7e71ba9f6 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@294a9d92911152fe08befb9ec03e240add280cb3 + uses: github/codeql-action/init@461ef6c76dfe95d5c364de2f431ddbd31a417628 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@294a9d92911152fe08befb9ec03e240add280cb3 + uses: github/codeql-action/autobuild@461ef6c76dfe95d5c364de2f431ddbd31a417628 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@294a9d92911152fe08befb9ec03e240add280cb3 + uses: github/codeql-action/analyze@461ef6c76dfe95d5c364de2f431ddbd31a417628 From 563fd4b3d0795dfa746e9407dae5692a2ca27f31 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 26 Sep 2024 09:15:38 -0400 Subject: [PATCH 233/456] Bump actions/checkout from 4.1.7 to 4.2.0 (#1662) Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.7 to 4.2.0. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/692973e3d937129bcbf40652eb9f2f61becf3332...d632683dd7b4114ad314bca15554477dd762a938) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/codespell.yml | 2 +- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 7e71ba9f6..6578beedf 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -36,7 +36,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 18cff3361..9742495c5 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -21,7 +21,7 @@ jobs: steps: # Check out the code base - name: Check out code - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 with: # Full git history is needed to get a proper list of changed files within `super-linter` fetch-depth: 0 diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 56bfde270..edd571f87 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -15,7 +15,7 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 - name: Set up Go uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d824fb219..e132de4c3 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -23,7 +23,7 @@ jobs: id: go - name: Check out code into the Go module directory - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 with: fetch-depth: 0 From 8b2290bd70d2c0d7ef1c731cdb557bacf18cbf0f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 1 Oct 2024 07:32:02 -0400 Subject: [PATCH 234/456] Bump github/codeql-action from 3.26.9 to 3.26.10 (#1664) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.9 to 3.26.10. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/461ef6c76dfe95d5c364de2f431ddbd31a417628...e2b3eafc8d227b0241d48be5f425d47c2d750a13) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 6578beedf..11d48e470 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@461ef6c76dfe95d5c364de2f431ddbd31a417628 + uses: github/codeql-action/init@e2b3eafc8d227b0241d48be5f425d47c2d750a13 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@461ef6c76dfe95d5c364de2f431ddbd31a417628 + uses: github/codeql-action/autobuild@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@461ef6c76dfe95d5c364de2f431ddbd31a417628 + uses: github/codeql-action/analyze@e2b3eafc8d227b0241d48be5f425d47c2d750a13 From 56210b045b440fc9cc7d6f7833ce0f1e1981eb89 Mon Sep 17 00:00:00 2001 From: Andrea Borruso Date: Wed, 2 Oct 2024 14:08:49 +0200 Subject: [PATCH 235/456] Update reference-verbs.md (#1665) This should be a type --- docs/src/reference-verbs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index f11994d1a..e7d1aa7d1 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -3302,7 +3302,7 @@ Options: Example: mlr stats1 -a min,p10,p50,p90,max -f value -g size,shape Example: mlr stats1 -a count,mode -f size Example: mlr stats1 -a count,mode -f size -g shape -Example: mlr stats1 -a count,mode --fr '^[a-h].*$' -gr '^k.*$' +Example: mlr stats1 -a count,mode --fr '^[a-h].*$' --gr '^k.*$' This computes count and mode statistics on all field names beginning with a through h, grouped by all field names starting with k. From 4a2f349289b7943dd3ca9cfdcf81d860134d60e0 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Wed, 2 Oct 2024 08:46:27 -0400 Subject: [PATCH 236/456] Update source material for #1665 (#1666) * Fix source info for #1665 * run `make dev` --- docs/src/manpage.md | 4 ++-- docs/src/manpage.txt | 4 ++-- docs/src/miller-as-library.md | 2 +- man/manpage.txt | 4 ++-- man/mlr.1 | 6 +++--- pkg/transformers/stats1.go | 2 +- test/cases/cli-help/0001/expout | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 0783a57fa..a2023eb06 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -1962,7 +1962,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p Example: mlr stats1 -a min,p10,p50,p90,max -f value -g size,shape Example: mlr stats1 -a count,mode -f size Example: mlr stats1 -a count,mode -f size -g shape - Example: mlr stats1 -a count,mode --fr '^[a-h].*$' -gr '^k.*$' + Example: mlr stats1 -a count,mode --fr '^[a-h].*$' --gr '^k.*$' This computes count and mode statistics on all field names beginning with a through h, grouped by all field names starting with k. @@ -3731,5 +3731,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-08-25 4mMILLER24m(1) + 2024-10-02 4mMILLER24m(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 521ebb98e..ac120d642 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1941,7 +1941,7 @@ Example: mlr stats1 -a min,p10,p50,p90,max -f value -g size,shape Example: mlr stats1 -a count,mode -f size Example: mlr stats1 -a count,mode -f size -g shape - Example: mlr stats1 -a count,mode --fr '^[a-h].*$' -gr '^k.*$' + Example: mlr stats1 -a count,mode --fr '^[a-h].*$' --gr '^k.*$' This computes count and mode statistics on all field names beginning with a through h, grouped by all field names starting with k. @@ -3710,4 +3710,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-08-25 4mMILLER24m(1) + 2024-10-02 4mMILLER24m(1) diff --git a/docs/src/miller-as-library.md b/docs/src/miller-as-library.md index 219b1f653..3b09a4bbc 100644 --- a/docs/src/miller-as-library.md +++ b/docs/src/miller-as-library.md @@ -128,7 +128,7 @@ func custom_options() *cli.TOptions { func run_custom_processor( fileNames []string, options *cli.TOptions, - record_processor func (irac *types.RecordAndContext) (*types.RecordAndContext, error), + record_processor func(irac *types.RecordAndContext) (*types.RecordAndContext, error), ) error { outputStream := os.Stdout outputIsStdout := true diff --git a/man/manpage.txt b/man/manpage.txt index 521ebb98e..ac120d642 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -1941,7 +1941,7 @@ Example: mlr stats1 -a min,p10,p50,p90,max -f value -g size,shape Example: mlr stats1 -a count,mode -f size Example: mlr stats1 -a count,mode -f size -g shape - Example: mlr stats1 -a count,mode --fr '^[a-h].*$' -gr '^k.*$' + Example: mlr stats1 -a count,mode --fr '^[a-h].*$' --gr '^k.*$' This computes count and mode statistics on all field names beginning with a through h, grouped by all field names starting with k. @@ -3710,4 +3710,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-08-25 4mMILLER24m(1) + 2024-10-02 4mMILLER24m(1) diff --git a/man/mlr.1 b/man/mlr.1 index 2ed7c3d2d..0c0be4c56 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2024-08-25 +.\" Date: 2024-10-02 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2024-08-25" "\ \&" "\ \&" +.TH "MILLER" "1" "2024-10-02" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2444,7 +2444,7 @@ Options: Example: mlr stats1 -a min,p10,p50,p90,max -f value -g size,shape Example: mlr stats1 -a count,mode -f size Example: mlr stats1 -a count,mode -f size -g shape -Example: mlr stats1 -a count,mode --fr '^[a-h].*$' -gr '^k.*$' +Example: mlr stats1 -a count,mode --fr '^[a-h].*$' --gr '^k.*$' This computes count and mode statistics on all field names beginning with a through h, grouped by all field names starting with k. diff --git a/pkg/transformers/stats1.go b/pkg/transformers/stats1.go index 2f06540e1..3b26be564 100644 --- a/pkg/transformers/stats1.go +++ b/pkg/transformers/stats1.go @@ -69,7 +69,7 @@ Options: fmt.Fprintln(o, "Example: mlr stats1 -a count,mode -f size -g shape") fmt.Fprintln(o, - "Example: mlr stats1 -a count,mode --fr '^[a-h].*$' -gr '^k.*$'") + "Example: mlr stats1 -a count,mode --fr '^[a-h].*$' --gr '^k.*$'") fmt.Fprintln(o, ` This computes count and mode statistics on all field names beginning with a through h, grouped by all field names starting with k.`) diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index 41079ee75..57a9746ee 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -1111,7 +1111,7 @@ Options: Example: mlr stats1 -a min,p10,p50,p90,max -f value -g size,shape Example: mlr stats1 -a count,mode -f size Example: mlr stats1 -a count,mode -f size -g shape -Example: mlr stats1 -a count,mode --fr '^[a-h].*$' -gr '^k.*$' +Example: mlr stats1 -a count,mode --fr '^[a-h].*$' --gr '^k.*$' This computes count and mode statistics on all field names beginning with a through h, grouped by all field names starting with k. From 5b3698402df859418d85f8aa77bfe3e6a1d69005 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 4 Oct 2024 08:53:13 -0400 Subject: [PATCH 237/456] Bump github/codeql-action from 3.26.10 to 3.26.11 (#1669) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.10 to 3.26.11. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/e2b3eafc8d227b0241d48be5f425d47c2d750a13...6db8d6351fd0be61f9ed8ebd12ccd35dcec51fea) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 11d48e470..0ec46b92d 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@e2b3eafc8d227b0241d48be5f425d47c2d750a13 + uses: github/codeql-action/init@6db8d6351fd0be61f9ed8ebd12ccd35dcec51fea with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@e2b3eafc8d227b0241d48be5f425d47c2d750a13 + uses: github/codeql-action/autobuild@6db8d6351fd0be61f9ed8ebd12ccd35dcec51fea # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@e2b3eafc8d227b0241d48be5f425d47c2d750a13 + uses: github/codeql-action/analyze@6db8d6351fd0be61f9ed8ebd12ccd35dcec51fea From 26e55f2ec3cf31c3195cbd6f1f4cd71ef1fe2210 Mon Sep 17 00:00:00 2001 From: Andrea Borruso Date: Sat, 5 Oct 2024 14:49:53 +0200 Subject: [PATCH 238/456] Characters to be removed (#1668) It seems to me that they are to be removed --- docs/src/reference-verbs.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index e7d1aa7d1..e41f086ff 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -2463,9 +2463,9 @@ Options: first-match replacement. -h|--help Show this message. Examples: -mlr rename old_name,new_name' -mlr rename old_name_1,new_name_1,old_name_2,new_name_2' -mlr rename -r 'Date_[0-9]+,Date,' Rename all such fields to be "Date" +mlr rename old_name,new_name +mlr rename old_name_1,new_name_1,old_name_2,new_name_2 +mlr rename -r 'Date_[0-9]+,Date' Rename all such fields to be "Date" mlr rename -r '"Date_[0-9]+",Date' Same mlr rename -r 'Date_([0-9]+).*,\1' Rename all such fields to be of the form 20151015 mlr rename -r '"name"i,Name' Rename "name", "Name", "NAME", etc. to "Name" From 31d6164181710d28aa7571a769789c54ef8162cb Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 5 Oct 2024 09:25:47 -0400 Subject: [PATCH 239/456] Fix 1668 error-source (#1672) * Fix 1668 error-source * run `make dev` --- docs/src/manpage.md | 8 ++++---- docs/src/manpage.txt | 8 ++++---- man/manpage.txt | 8 ++++---- man/mlr.1 | 10 +++++----- pkg/transformers/rename.go | 6 +++--- test/cases/cli-help/0001/expout | 6 +++--- 6 files changed, 23 insertions(+), 23 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index a2023eb06..f3b547cc6 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -1644,9 +1644,9 @@ This is simply a copy of what you should see on running `man mlr` at a command p first-match replacement. -h|--help Show this message. Examples: - mlr rename old_name,new_name' - mlr rename old_name_1,new_name_1,old_name_2,new_name_2' - mlr rename -r 'Date_[0-9]+,Date,' Rename all such fields to be "Date" + mlr rename old_name,new_name + mlr rename old_name_1,new_name_1,old_name_2,new_name_2 + mlr rename -r 'Date_[0-9]+,Date' Rename all such fields to be "Date" mlr rename -r '"Date_[0-9]+",Date' Same mlr rename -r 'Date_([0-9]+).*,\1' Rename all such fields to be of the form 20151015 mlr rename -r '"name"i,Name' Rename "name", "Name", "NAME", etc. to "Name" @@ -3731,5 +3731,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-10-02 4mMILLER24m(1) + 2024-10-05 4mMILLER24m(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index ac120d642..4c56ec513 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1623,9 +1623,9 @@ first-match replacement. -h|--help Show this message. Examples: - mlr rename old_name,new_name' - mlr rename old_name_1,new_name_1,old_name_2,new_name_2' - mlr rename -r 'Date_[0-9]+,Date,' Rename all such fields to be "Date" + mlr rename old_name,new_name + mlr rename old_name_1,new_name_1,old_name_2,new_name_2 + mlr rename -r 'Date_[0-9]+,Date' Rename all such fields to be "Date" mlr rename -r '"Date_[0-9]+",Date' Same mlr rename -r 'Date_([0-9]+).*,\1' Rename all such fields to be of the form 20151015 mlr rename -r '"name"i,Name' Rename "name", "Name", "NAME", etc. to "Name" @@ -3710,4 +3710,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-10-02 4mMILLER24m(1) + 2024-10-05 4mMILLER24m(1) diff --git a/man/manpage.txt b/man/manpage.txt index ac120d642..4c56ec513 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -1623,9 +1623,9 @@ first-match replacement. -h|--help Show this message. Examples: - mlr rename old_name,new_name' - mlr rename old_name_1,new_name_1,old_name_2,new_name_2' - mlr rename -r 'Date_[0-9]+,Date,' Rename all such fields to be "Date" + mlr rename old_name,new_name + mlr rename old_name_1,new_name_1,old_name_2,new_name_2 + mlr rename -r 'Date_[0-9]+,Date' Rename all such fields to be "Date" mlr rename -r '"Date_[0-9]+",Date' Same mlr rename -r 'Date_([0-9]+).*,\1' Rename all such fields to be of the form 20151015 mlr rename -r '"name"i,Name' Rename "name", "Name", "NAME", etc. to "Name" @@ -3710,4 +3710,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-10-02 4mMILLER24m(1) + 2024-10-05 4mMILLER24m(1) diff --git a/man/mlr.1 b/man/mlr.1 index 0c0be4c56..46b85a18d 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2024-10-02 +.\" Date: 2024-10-05 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2024-10-02" "\ \&" "\ \&" +.TH "MILLER" "1" "2024-10-05" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2036,9 +2036,9 @@ Options: first-match replacement. -h|--help Show this message. Examples: -mlr rename old_name,new_name' -mlr rename old_name_1,new_name_1,old_name_2,new_name_2' -mlr rename -r 'Date_[0-9]+,Date,' Rename all such fields to be "Date" +mlr rename old_name,new_name +mlr rename old_name_1,new_name_1,old_name_2,new_name_2 +mlr rename -r 'Date_[0-9]+,Date' Rename all such fields to be "Date" mlr rename -r '"Date_[0-9]+",Date' Same mlr rename -r 'Date_([0-9]+).*,\e1' Rename all such fields to be of the form 20151015 mlr rename -r '"name"i,Name' Rename "name", "Name", "NAME", etc. to "Name" diff --git a/pkg/transformers/rename.go b/pkg/transformers/rename.go index d7adef13f..39f252827 100644 --- a/pkg/transformers/rename.go +++ b/pkg/transformers/rename.go @@ -43,9 +43,9 @@ func transformerRenameUsage( fmt.Fprintf(o, " first-match replacement.\n") fmt.Fprintf(o, "-h|--help Show this message.\n") fmt.Fprintf(o, "Examples:\n") - fmt.Fprintf(o, "%s %s old_name,new_name'\n", exeName, verb) - fmt.Fprintf(o, "%s %s old_name_1,new_name_1,old_name_2,new_name_2'\n", exeName, verb) - fmt.Fprintf(o, "%s %s -r 'Date_[0-9]+,Date,' Rename all such fields to be \"Date\"\n", exeName, verb) + fmt.Fprintf(o, "%s %s old_name,new_name\n", exeName, verb) + fmt.Fprintf(o, "%s %s old_name_1,new_name_1,old_name_2,new_name_2\n", exeName, verb) + fmt.Fprintf(o, "%s %s -r 'Date_[0-9]+,Date' Rename all such fields to be \"Date\"\n", exeName, verb) fmt.Fprintf(o, "%s %s -r '\"Date_[0-9]+\",Date' Same\n", exeName, verb) fmt.Fprintf(o, "%s %s -r 'Date_([0-9]+).*,\\1' Rename all such fields to be of the form 20151015\n", exeName, verb) fmt.Fprintf(o, "%s %s -r '\"name\"i,Name' Rename \"name\", \"Name\", \"NAME\", etc. to \"Name\"\n", exeName, verb) diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index 57a9746ee..411318a64 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -778,9 +778,9 @@ Options: first-match replacement. -h|--help Show this message. Examples: -mlr rename old_name,new_name' -mlr rename old_name_1,new_name_1,old_name_2,new_name_2' -mlr rename -r 'Date_[0-9]+,Date,' Rename all such fields to be "Date" +mlr rename old_name,new_name +mlr rename old_name_1,new_name_1,old_name_2,new_name_2 +mlr rename -r 'Date_[0-9]+,Date' Rename all such fields to be "Date" mlr rename -r '"Date_[0-9]+",Date' Same mlr rename -r 'Date_([0-9]+).*,\1' Rename all such fields to be of the form 20151015 mlr rename -r '"name"i,Name' Rename "name", "Name", "NAME", etc. to "Name" From 5cd457d5657050353919d496fef8d38f915331fa Mon Sep 17 00:00:00 2001 From: Austin Letson Date: Sat, 5 Oct 2024 09:27:31 -0400 Subject: [PATCH 240/456] Fix minor typo (#1673) --- docs/src/miller-programming-language.md | 2 +- docs/src/miller-programming-language.md.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/miller-programming-language.md b/docs/src/miller-programming-language.md index f8d2f027b..e5da65233 100644 --- a/docs/src/miller-programming-language.md +++ b/docs/src/miller-programming-language.md @@ -209,7 +209,7 @@ So, take this sum/count example as an indication of the kinds of things you can Also inspired by [AWK](https://en.wikipedia.org/wiki/AWK), the Miller DSL has the following special [**context variables**](reference-dsl-variables.md#built-in-variables): * `FILENAME` -- the filename the current record came from. Especially useful in things like `mlr ... *.csv`. -* `FILENUM` -- similarly, but integer 1,2,3,... rather than filenam.e +* `FILENUM` -- similarly, but integer 1,2,3,... rather than filename. * `NF` -- the number of fields in the current record. Note that if you assign `$newcolumn = some value` then `NF` will increment. * `NR` -- starting from 1, counter of how many records processed so far. * `FNR` -- similar, but resets to 1 at the start of each file. diff --git a/docs/src/miller-programming-language.md.in b/docs/src/miller-programming-language.md.in index eb5702f3b..624a0dc9c 100644 --- a/docs/src/miller-programming-language.md.in +++ b/docs/src/miller-programming-language.md.in @@ -94,7 +94,7 @@ So, take this sum/count example as an indication of the kinds of things you can Also inspired by [AWK](https://en.wikipedia.org/wiki/AWK), the Miller DSL has the following special [**context variables**](reference-dsl-variables.md#built-in-variables): * `FILENAME` -- the filename the current record came from. Especially useful in things like `mlr ... *.csv`. -* `FILENUM` -- similarly, but integer 1,2,3,... rather than filenam.e +* `FILENUM` -- similarly, but integer 1,2,3,... rather than filename. * `NF` -- the number of fields in the current record. Note that if you assign `$newcolumn = some value` then `NF` will increment. * `NR` -- starting from 1, counter of how many records processed so far. * `FNR` -- similar, but resets to 1 at the start of each file. From c66094a1841038d0c0340e52ee560035e28ee8ca Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 5 Oct 2024 09:32:15 -0400 Subject: [PATCH 241/456] miller 6.13.0 --- docs/src/manpage.md | 2 +- docs/src/manpage.txt | 2 +- man/manpage.txt | 2 +- man/mlr.1 | 2 +- miller.spec | 5 ++++- pkg/version/version.go | 2 +- 6 files changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index f3b547cc6..f26627432 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -48,7 +48,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.12.0-dev. + manpage documents mlr 6.13.0. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 4c56ec513..eabc96ef8 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -27,7 +27,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.12.0-dev. + manpage documents mlr 6.13.0. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/man/manpage.txt b/man/manpage.txt index 4c56ec513..eabc96ef8 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -27,7 +27,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.12.0-dev. + manpage documents mlr 6.13.0. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/man/mlr.1 b/man/mlr.1 index 46b85a18d..f50755e5a 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -47,7 +47,7 @@ on integer-indexed fields: if the natural data structure for the latter is the array, then Miller's natural data structure is the insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as -a special case.) This manpage documents mlr 6.12.0-dev. +a special case.) This manpage documents mlr 6.13.0. .SH "EXAMPLES" .sp diff --git a/miller.spec b/miller.spec index 325dbe988..77b5d9012 100644 --- a/miller.spec +++ b/miller.spec @@ -1,6 +1,6 @@ Summary: Name-indexed data processing tool Name: miller -Version: 6.12.0 +Version: 6.13.0 Release: 1%{?dist} License: BSD Source: https://github.com/johnkerl/miller/releases/download/%{version}/miller-%{version}.tar.gz @@ -36,6 +36,9 @@ make install %{_mandir}/man1/mlr.1* %changelog +* Sat Oct 5 2024 John Kerl - 6.13.0-1 +- 6.13.0 release + * Sat Mar 16 2024 John Kerl - 6.12.0-1 - 6.12.0 release diff --git a/pkg/version/version.go b/pkg/version/version.go index 1d3f25e49..d0aec2ff9 100644 --- a/pkg/version/version.go +++ b/pkg/version/version.go @@ -4,4 +4,4 @@ package version // Nominally things like "6.0.0" for a release, then "6.0.0-dev" in between. // This makes it clear that a given build is on the main dev branch, not a // particular snapshot tag. -var STRING string = "6.12.0-dev" +var STRING string = "6.13.0" From 7ef83f3a235d29088c5b2f07087a020bbc4fd3f9 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 5 Oct 2024 09:50:07 -0400 Subject: [PATCH 242/456] go mod tidy requires go 1.20 --- go.mod | 2 +- go.sum | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 20d9f376f..eaa5500e7 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ module github.com/johnkerl/miller/v6 // Local development: // replace github.com/johnkerl/lumin => /Users/kerl/git/johnkerl/lumin -go 1.21 +go 1.20 require ( github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb diff --git a/go.sum b/go.sum index d90a3b194..9d77adc44 100644 --- a/go.sum +++ b/go.sum @@ -18,7 +18,6 @@ github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:C github.com/klauspost/compress v1.17.10 h1:oXAz+Vh0PMUvJczoi+flxpnBEPxoER1IaAnU/NMPtT0= github.com/klauspost/compress v1.17.10/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8= -github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is= github.com/lestrrat-go/strftime v1.1.0 h1:gMESpZy44/4pXLO/m+sL0yBd1W6LjgjrrD4a68Gapyg= github.com/lestrrat-go/strftime v1.1.0/go.mod h1:uzeIB52CeUJenCo1syghlugshMysrqUT51HlxphXVeI= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= From 52f930ba31b95cc48cebd0a919afb13885d1ac6f Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 5 Oct 2024 10:17:36 -0400 Subject: [PATCH 243/456] trying again with go version / go mod tidy --- .github/workflows/release.yml | 2 +- go.mod | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e132de4c3..3de58875e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -6,7 +6,7 @@ on: workflow_dispatch: env: - GO_VERSION: 1.19.13 + GO_VERSION: 1.21.1 jobs: release: diff --git a/go.mod b/go.mod index eaa5500e7..20d9f376f 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ module github.com/johnkerl/miller/v6 // Local development: // replace github.com/johnkerl/lumin => /Users/kerl/git/johnkerl/lumin -go 1.20 +go 1.21 require ( github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb From f751084013b628499ac05c32f16019df7d76ee83 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 5 Oct 2024 10:46:36 -0400 Subject: [PATCH 244/456] 6.13 release docs --- docs/src/reference-main-regular-expressions.md | 4 +++- docs/src/release-docs.md | 1 + docs/src/release-docs.md.in | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/src/reference-main-regular-expressions.md b/docs/src/reference-main-regular-expressions.md index f679669b8..60126f5fa 100644 --- a/docs/src/reference-main-regular-expressions.md +++ b/docs/src/reference-main-regular-expressions.md @@ -251,7 +251,8 @@ package syntax // import "regexp/syntax" Package syntax parses regular expressions into parse trees and compiles parse trees into programs. Most clients of regular expressions will use the facilities -of package regexp (such as Compile and Match) instead of this package. +of package regexp (such as regexp.Compile and regexp.Match) instead of this +package. # Syntax @@ -301,6 +302,7 @@ Grouping: (re) numbered capturing group (submatch) (?Pre) named & numbered capturing group (submatch) + (?re) named & numbered capturing group (submatch) (?:re) non-capturing group (?flags) set flags within current group; non-capturing (?flags:re) set flags during re; non-capturing diff --git a/docs/src/release-docs.md b/docs/src/release-docs.md index 235c48f57..0e9fcc1c8 100644 --- a/docs/src/release-docs.md +++ b/docs/src/release-docs.md @@ -24,6 +24,7 @@ If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, witho | Release | Docs | Release notes | |---------|---------------------------------------------------------------------|---------------| main | [main branch](https://miller.readthedocs.io/en/main) | N/A | +6.13.0 | [Miller 6.13.0](https://miller.readthedocs.io/en/6.13.0) | [File-stat DSL function, new stats accumulator, misc. bugfixes](https://github.com/johnkerl/miller/releases/tag/v6.13.0) | 6.12.0 | [Miller 6.12.0](https://miller.readthedocs.io/en/6.12.0) | [New sparsify verb, wide-table performance improvement, thousands separator for fmtnum function](https://github.com/johnkerl/miller/releases/tag/v6.12.0) | 6.11.0 | [Miller 6.11.0](https://miller.readthedocs.io/en/6.11.0) | [CSV/TSV auto-unsparsify, regex-fieldname support for reorder/sub/ssub/gsub, strmatch DSL function, and more](https://github.com/johnkerl/miller/releases/tag/v6.11.0) | 6.10.0 | [Miller 6.10.0](https://miller.readthedocs.io/en/6.10.0) | [Add --files option; bugfixes; use Go 1.19](https://github.com/johnkerl/miller/releases/tag/v6.10.0) | diff --git a/docs/src/release-docs.md.in b/docs/src/release-docs.md.in index ebbb10a02..7a840c6f0 100644 --- a/docs/src/release-docs.md.in +++ b/docs/src/release-docs.md.in @@ -8,6 +8,7 @@ If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, witho | Release | Docs | Release notes | |---------|---------------------------------------------------------------------|---------------| main | [main branch](https://miller.readthedocs.io/en/main) | N/A | +6.13.0 | [Miller 6.13.0](https://miller.readthedocs.io/en/6.13.0) | [File-stat DSL function, new stats accumulator, misc. bugfixes](https://github.com/johnkerl/miller/releases/tag/v6.13.0) | 6.12.0 | [Miller 6.12.0](https://miller.readthedocs.io/en/6.12.0) | [New sparsify verb, wide-table performance improvement, thousands separator for fmtnum function](https://github.com/johnkerl/miller/releases/tag/v6.12.0) | 6.11.0 | [Miller 6.11.0](https://miller.readthedocs.io/en/6.11.0) | [CSV/TSV auto-unsparsify, regex-fieldname support for reorder/sub/ssub/gsub, strmatch DSL function, and more](https://github.com/johnkerl/miller/releases/tag/v6.11.0) | 6.10.0 | [Miller 6.10.0](https://miller.readthedocs.io/en/6.10.0) | [Add --files option; bugfixes; use Go 1.19](https://github.com/johnkerl/miller/releases/tag/v6.10.0) | From a0d65c3035972af27240730b5fc745500957126e Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 5 Oct 2024 10:47:43 -0400 Subject: [PATCH 245/456] 6.13.0-dev --- pkg/version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/version/version.go b/pkg/version/version.go index d0aec2ff9..1a6792614 100644 --- a/pkg/version/version.go +++ b/pkg/version/version.go @@ -4,4 +4,4 @@ package version // Nominally things like "6.0.0" for a release, then "6.0.0-dev" in between. // This makes it clear that a given build is on the main dev branch, not a // particular snapshot tag. -var STRING string = "6.13.0" +var STRING string = "6.13.0-dev" From 39c88041d6e58b2f0e571bc1747bb5e5baa435e2 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 5 Oct 2024 10:50:37 -0400 Subject: [PATCH 246/456] make dev for previous commit --- docs/src/manpage.md | 2 +- docs/src/manpage.txt | 2 +- man/manpage.txt | 2 +- man/mlr.1 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index f26627432..64c6f72f2 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -48,7 +48,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.13.0. + manpage documents mlr 6.13.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index eabc96ef8..cca5dced4 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -27,7 +27,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.13.0. + manpage documents mlr 6.13.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/man/manpage.txt b/man/manpage.txt index eabc96ef8..cca5dced4 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -27,7 +27,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.13.0. + manpage documents mlr 6.13.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/man/mlr.1 b/man/mlr.1 index f50755e5a..6391b5483 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -47,7 +47,7 @@ on integer-indexed fields: if the natural data structure for the latter is the array, then Miller's natural data structure is the insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as -a special case.) This manpage documents mlr 6.13.0. +a special case.) This manpage documents mlr 6.13.0-dev. .SH "EXAMPLES" .sp From 7a0320fc2702517ca34a2154c2b88fd144bcce6f Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Sun, 6 Oct 2024 23:30:12 +0200 Subject: [PATCH 247/456] Typo fix: programmatically (#1679) Signed-off-by: Stephen Kitt --- docs/src/manpage.md | 4 ++-- docs/src/manpage.txt | 4 ++-- docs/src/reference-verbs.md | 4 ++-- man/manpage.txt | 4 ++-- man/mlr.1 | 4 ++-- pkg/transformers/put_or_filter.go | 4 ++-- test/cases/cli-help/0001/expout | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 64c6f72f2..1d367ed72 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -1077,7 +1077,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p 1mfilter0m Usage: mlr filter [options] {DSL expression} - Lets you use a domain-specific language to programatically filter which + Lets you use a domain-specific language to programmatically filter which stream records will be output. See also: https://miller.readthedocs.io/en/latest/reference-verbs @@ -1535,7 +1535,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p 1mput0m Usage: mlr put [options] {DSL expression} - Lets you use a domain-specific language to programatically alter stream records. + Lets you use a domain-specific language to programmatically alter stream records. See also: https://miller.readthedocs.io/en/latest/reference-verbs Options: diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index cca5dced4..cb369532a 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1056,7 +1056,7 @@ 1mfilter0m Usage: mlr filter [options] {DSL expression} - Lets you use a domain-specific language to programatically filter which + Lets you use a domain-specific language to programmatically filter which stream records will be output. See also: https://miller.readthedocs.io/en/latest/reference-verbs @@ -1514,7 +1514,7 @@ 1mput0m Usage: mlr put [options] {DSL expression} - Lets you use a domain-specific language to programatically alter stream records. + Lets you use a domain-specific language to programmatically alter stream records. See also: https://miller.readthedocs.io/en/latest/reference-verbs Options: diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index e41f086ff..0af85fb80 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -970,7 +970,7 @@ a,b,c
 Usage: mlr filter [options] {DSL expression}
-Lets you use a domain-specific language to programatically filter which
+Lets you use a domain-specific language to programmatically filter which
 stream records will be output.
 See also: https://miller.readthedocs.io/en/latest/reference-verbs
 
@@ -2306,7 +2306,7 @@ Options:
 
 Usage: mlr put [options] {DSL expression}
-Lets you use a domain-specific language to programatically alter stream records.
+Lets you use a domain-specific language to programmatically alter stream records.
 See also: https://miller.readthedocs.io/en/latest/reference-verbs
 
 Options:
diff --git a/man/manpage.txt b/man/manpage.txt
index cca5dced4..cb369532a 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -1056,7 +1056,7 @@
 
    1mfilter0m
        Usage: mlr filter [options] {DSL expression}
-       Lets you use a domain-specific language to programatically filter which
+       Lets you use a domain-specific language to programmatically filter which
        stream records will be output.
        See also: https://miller.readthedocs.io/en/latest/reference-verbs
 
@@ -1514,7 +1514,7 @@
 
    1mput0m
        Usage: mlr put [options] {DSL expression}
-       Lets you use a domain-specific language to programatically alter stream records.
+       Lets you use a domain-specific language to programmatically alter stream records.
        See also: https://miller.readthedocs.io/en/latest/reference-verbs
 
        Options:
diff --git a/man/mlr.1 b/man/mlr.1
index 6391b5483..75566da1d 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -1319,7 +1319,7 @@ Options:
 .\}
 .nf
 Usage: mlr filter [options] {DSL expression}
-Lets you use a domain-specific language to programatically filter which
+Lets you use a domain-specific language to programmatically filter which
 stream records will be output.
 See also: https://miller.readthedocs.io/en/latest/reference-verbs
 
@@ -1909,7 +1909,7 @@ Options:
 .\}
 .nf
 Usage: mlr put [options] {DSL expression}
-Lets you use a domain-specific language to programatically alter stream records.
+Lets you use a domain-specific language to programmatically alter stream records.
 See also: https://miller.readthedocs.io/en/latest/reference-verbs
 
 Options:
diff --git a/pkg/transformers/put_or_filter.go b/pkg/transformers/put_or_filter.go
index 1f19e85d0..34e2e12d9 100644
--- a/pkg/transformers/put_or_filter.go
+++ b/pkg/transformers/put_or_filter.go
@@ -53,9 +53,9 @@ func transformerPutOrFilterUsage(
 ) {
 	fmt.Fprintf(o, "Usage: %s %s [options] {DSL expression}\n", "mlr", verb)
 	if verb == "put" {
-		fmt.Fprintf(o, "Lets you use a domain-specific language to programatically alter stream records.\n")
+		fmt.Fprintf(o, "Lets you use a domain-specific language to programmatically alter stream records.\n")
 	} else if verb == "filter" {
-		fmt.Fprintf(o, "Lets you use a domain-specific language to programatically filter which\n")
+		fmt.Fprintf(o, "Lets you use a domain-specific language to programmatically filter which\n")
 		fmt.Fprintf(o, "stream records will be output.\n")
 	}
 	fmt.Fprintf(o, "See also: https://miller.readthedocs.io/en/latest/reference-verbs\n")
diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout
index 411318a64..5869278b8 100644
--- a/test/cases/cli-help/0001/expout
+++ b/test/cases/cli-help/0001/expout
@@ -186,7 +186,7 @@ Options:
 ================================================================
 filter
 Usage: mlr filter [options] {DSL expression}
-Lets you use a domain-specific language to programatically filter which
+Lets you use a domain-specific language to programmatically filter which
 stream records will be output.
 See also: https://miller.readthedocs.io/en/latest/reference-verbs
 
@@ -666,7 +666,7 @@ Options:
 ================================================================
 put
 Usage: mlr put [options] {DSL expression}
-Lets you use a domain-specific language to programatically alter stream records.
+Lets you use a domain-specific language to programmatically alter stream records.
 See also: https://miller.readthedocs.io/en/latest/reference-verbs
 
 Options:

From 6eb5721070cdc9b73bb276cd502b6dc01136a2e5 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 7 Oct 2024 08:24:00 -0400
Subject: [PATCH 248/456] Bump actions/cache from 4.0.2 to 4.1.0 (#1683)

Bumps [actions/cache](https://github.com/actions/cache) from 4.0.2 to 4.1.0.
- [Release notes](https://github.com/actions/cache/releases)
- [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md)
- [Commits](https://github.com/actions/cache/compare/0c45773b623bea8c8e75f6c82b208c3cf94ea4f9...2cdf405574d6ef1f33a1d12acccd3ae82f47b3f2)

---
updated-dependencies:
- dependency-name: actions/cache
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 3de58875e..9cc88bfe7 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -29,7 +29,7 @@ jobs:
 
       # https://github.com/marketplace/actions/cache
       - name: Cache Go modules
-        uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9
+        uses: actions/cache@2cdf405574d6ef1f33a1d12acccd3ae82f47b3f2
         with:
           path: |
             ~/.cache/go-build

From 8789f73d7b4297ec7318e9afdacda69b888b1325 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 7 Oct 2024 08:24:13 -0400
Subject: [PATCH 249/456] Bump golang.org/x/sys from 0.25.0 to 0.26.0 (#1682)

Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.25.0 to 0.26.0.
- [Commits](https://github.com/golang/sys/compare/v0.25.0...v0.26.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 20d9f376f..7bb7abadd 100644
--- a/go.mod
+++ b/go.mod
@@ -26,7 +26,7 @@ require (
 	github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4
 	github.com/pkg/profile v1.7.0
 	github.com/stretchr/testify v1.9.0
-	golang.org/x/sys v0.25.0
+	golang.org/x/sys v0.26.0
 	golang.org/x/term v0.24.0
 	golang.org/x/text v0.18.0
 )
diff --git a/go.sum b/go.sum
index 9d77adc44..45dd0a211 100644
--- a/go.sum
+++ b/go.sum
@@ -18,6 +18,7 @@ github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:C
 github.com/klauspost/compress v1.17.10 h1:oXAz+Vh0PMUvJczoi+flxpnBEPxoER1IaAnU/NMPtT0=
 github.com/klauspost/compress v1.17.10/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
 github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8=
+github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is=
 github.com/lestrrat-go/strftime v1.1.0 h1:gMESpZy44/4pXLO/m+sL0yBd1W6LjgjrrD4a68Gapyg=
 github.com/lestrrat-go/strftime v1.1.0/go.mod h1:uzeIB52CeUJenCo1syghlugshMysrqUT51HlxphXVeI=
 github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
@@ -36,8 +37,8 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT
 github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
-golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
+golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM=
 golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8=
 golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224=

From e18eac29db5e64915a45f864189e45c4eee92df8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 7 Oct 2024 08:25:31 -0400
Subject: [PATCH 250/456] Bump golang.org/x/text from 0.18.0 to 0.19.0 (#1681)

Bumps [golang.org/x/text](https://github.com/golang/text) from 0.18.0 to 0.19.0.
- [Release notes](https://github.com/golang/text/releases)
- [Commits](https://github.com/golang/text/compare/v0.18.0...v0.19.0)

---
updated-dependencies:
- dependency-name: golang.org/x/text
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 7bb7abadd..377b5b96d 100644
--- a/go.mod
+++ b/go.mod
@@ -28,7 +28,7 @@ require (
 	github.com/stretchr/testify v1.9.0
 	golang.org/x/sys v0.26.0
 	golang.org/x/term v0.24.0
-	golang.org/x/text v0.18.0
+	golang.org/x/text v0.19.0
 )
 
 require (
diff --git a/go.sum b/go.sum
index 45dd0a211..b0c48c9ce 100644
--- a/go.sum
+++ b/go.sum
@@ -41,8 +41,8 @@ golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
 golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM=
 golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8=
-golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224=
-golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
+golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM=
+golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

From bfa1fd4b28b4d07baed459c9e9b0a62fbb6ff3c3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 7 Oct 2024 08:30:19 -0400
Subject: [PATCH 251/456] Bump golang.org/x/term from 0.24.0 to 0.25.0 (#1680)

Bumps [golang.org/x/term](https://github.com/golang/term) from 0.24.0 to 0.25.0.
- [Commits](https://github.com/golang/term/compare/v0.24.0...v0.25.0)

---
updated-dependencies:
- dependency-name: golang.org/x/term
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 377b5b96d..2f6b5522b 100644
--- a/go.mod
+++ b/go.mod
@@ -27,7 +27,7 @@ require (
 	github.com/pkg/profile v1.7.0
 	github.com/stretchr/testify v1.9.0
 	golang.org/x/sys v0.26.0
-	golang.org/x/term v0.24.0
+	golang.org/x/term v0.25.0
 	golang.org/x/text v0.19.0
 )
 
diff --git a/go.sum b/go.sum
index b0c48c9ce..0c51ab9d1 100644
--- a/go.sum
+++ b/go.sum
@@ -39,8 +39,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
 golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM=
-golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8=
+golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24=
+golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M=
 golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM=
 golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=

From fd3e0d8ffc9fc2eab9dcc73b1d79c773c4a14b1c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 8 Oct 2024 07:52:30 -0400
Subject: [PATCH 252/456] Bump github/codeql-action from 3.26.11 to 3.26.12
 (#1687)

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.11 to 3.26.12.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/6db8d6351fd0be61f9ed8ebd12ccd35dcec51fea...c36620d31ac7c881962c3d9dd939c40ec9434f2b)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 0ec46b92d..92d72ffa6 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -40,7 +40,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@6db8d6351fd0be61f9ed8ebd12ccd35dcec51fea
+      uses: github/codeql-action/init@c36620d31ac7c881962c3d9dd939c40ec9434f2b
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -51,7 +51,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@6db8d6351fd0be61f9ed8ebd12ccd35dcec51fea
+      uses: github/codeql-action/autobuild@c36620d31ac7c881962c3d9dd939c40ec9434f2b
 
     # โ„น๏ธ Command-line programs to run using the OS shell.
     # ๐Ÿ“š https://git.io/JvXDl
@@ -65,4 +65,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@6db8d6351fd0be61f9ed8ebd12ccd35dcec51fea
+      uses: github/codeql-action/analyze@c36620d31ac7c881962c3d9dd939c40ec9434f2b

From 6ea8e238dbb9a7a6ace93ecf67c238b35fc40e4d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 8 Oct 2024 07:52:41 -0400
Subject: [PATCH 253/456] Bump actions/upload-artifact from 4.4.0 to 4.4.1
 (#1686)

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.4.0 to 4.4.1.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/50769540e7f4bd5e21e526ee35c689e35e0d6874...604373da6381bf24206979c74d06a550515601b9)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/go.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index edd571f87..f4dac4015 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -41,7 +41,7 @@ jobs:
       if: matrix.os == 'windows-latest'
       run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}}
 
-    - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874
+    - uses: actions/upload-artifact@604373da6381bf24206979c74d06a550515601b9
       with:
         name: mlr-${{matrix.os}}
         path: bin/${{matrix.os}}/*

From e9fbd9f48dfeacaa10bd4a377be75b7c0c71083f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 8 Oct 2024 07:52:52 -0400
Subject: [PATCH 254/456] Bump actions/checkout from 4.2.0 to 4.2.1 (#1685)

Bumps [actions/checkout](https://github.com/actions/checkout) from 4.2.0 to 4.2.1.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/d632683dd7b4114ad314bca15554477dd762a938...eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 2 +-
 .github/workflows/codespell.yml       | 2 +-
 .github/workflows/go.yml              | 2 +-
 .github/workflows/release.yml         | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 92d72ffa6..a4f86c048 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -36,7 +36,7 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938
+      uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
index 9742495c5..35965e50b 100644
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -21,7 +21,7 @@ jobs:
     steps:
       # Check out the code base
       - name: Check out code
-        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938
+        uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871
         with:
           # Full git history is needed to get a proper list of changed files within `super-linter`
           fetch-depth: 0
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index f4dac4015..fab2fa8bb 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -15,7 +15,7 @@ jobs:
         os: [ubuntu-latest, macos-latest, windows-latest]
 
     steps:
-    - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938
+    - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871
 
     - name: Set up Go
       uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 9cc88bfe7..a8a8b9a64 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -23,7 +23,7 @@ jobs:
         id: go
 
       - name: Check out code into the Go module directory
-        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938
+        uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871
         with:
           fetch-depth: 0
 

From acc8a490e83663a55ff5a2ea005a2231ad2027be Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 9 Oct 2024 08:14:56 -0400
Subject: [PATCH 255/456] Bump actions/cache from 4.1.0 to 4.1.1 (#1688)

Bumps [actions/cache](https://github.com/actions/cache) from 4.1.0 to 4.1.1.
- [Release notes](https://github.com/actions/cache/releases)
- [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md)
- [Commits](https://github.com/actions/cache/compare/2cdf405574d6ef1f33a1d12acccd3ae82f47b3f2...3624ceb22c1c5a301c8db4169662070a689d9ea8)

---
updated-dependencies:
- dependency-name: actions/cache
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index a8a8b9a64..0ede4802f 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -29,7 +29,7 @@ jobs:
 
       # https://github.com/marketplace/actions/cache
       - name: Cache Go modules
-        uses: actions/cache@2cdf405574d6ef1f33a1d12acccd3ae82f47b3f2
+        uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8
         with:
           path: |
             ~/.cache/go-build

From 4e3b500f94df41e41076169054a6a5e739212002 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 9 Oct 2024 08:15:07 -0400
Subject: [PATCH 256/456] Bump actions/upload-artifact from 4.4.1 to 4.4.2
 (#1689)

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.4.1 to 4.4.2.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/604373da6381bf24206979c74d06a550515601b9...84480863f228bb9747b473957fcc9e309aa96097)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/go.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index fab2fa8bb..2d8b0c05c 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -41,7 +41,7 @@ jobs:
       if: matrix.os == 'windows-latest'
       run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}}
 
-    - uses: actions/upload-artifact@604373da6381bf24206979c74d06a550515601b9
+    - uses: actions/upload-artifact@84480863f228bb9747b473957fcc9e309aa96097
       with:
         name: mlr-${{matrix.os}}
         path: bin/${{matrix.os}}/*

From 2b4a0c2ca87f1668ce20a9babe208fc51e67da46 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 10 Oct 2024 07:53:37 -0400
Subject: [PATCH 257/456] Bump actions/upload-artifact from 4.4.2 to 4.4.3
 (#1690)

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.4.2 to 4.4.3.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/84480863f228bb9747b473957fcc9e309aa96097...b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/go.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 2d8b0c05c..e16239ff2 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -41,7 +41,7 @@ jobs:
       if: matrix.os == 'windows-latest'
       run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}}
 
-    - uses: actions/upload-artifact@84480863f228bb9747b473957fcc9e309aa96097
+    - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
       with:
         name: mlr-${{matrix.os}}
         path: bin/${{matrix.os}}/*

From 979addd3c392da36cacaeeab76f8eef632d217ef Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 14 Oct 2024 08:02:27 -0400
Subject: [PATCH 258/456] Bump github/codeql-action from 3.26.12 to 3.26.13
 (#1692)

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.12 to 3.26.13.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/c36620d31ac7c881962c3d9dd939c40ec9434f2b...f779452ac5af1c261dce0346a8f964149f49322b)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index a4f86c048..9d0379be3 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -40,7 +40,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@c36620d31ac7c881962c3d9dd939c40ec9434f2b
+      uses: github/codeql-action/init@f779452ac5af1c261dce0346a8f964149f49322b
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -51,7 +51,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@c36620d31ac7c881962c3d9dd939c40ec9434f2b
+      uses: github/codeql-action/autobuild@f779452ac5af1c261dce0346a8f964149f49322b
 
     # โ„น๏ธ Command-line programs to run using the OS shell.
     # ๐Ÿ“š https://git.io/JvXDl
@@ -65,4 +65,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@c36620d31ac7c881962c3d9dd939c40ec9434f2b
+      uses: github/codeql-action/analyze@f779452ac5af1c261dce0346a8f964149f49322b

From 07c896833cc4da4a3d6f0d3eace8245a5bf507bf Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 14 Oct 2024 08:02:39 -0400
Subject: [PATCH 259/456] Bump github.com/klauspost/compress from 1.17.10 to
 1.17.11 (#1691)

Bumps [github.com/klauspost/compress](https://github.com/klauspost/compress) from 1.17.10 to 1.17.11.
- [Release notes](https://github.com/klauspost/compress/releases)
- [Changelog](https://github.com/klauspost/compress/blob/master/.goreleaser.yml)
- [Commits](https://github.com/klauspost/compress/compare/v1.17.10...v1.17.11)

---
updated-dependencies:
- dependency-name: github.com/klauspost/compress
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 2f6b5522b..1de88737e 100644
--- a/go.mod
+++ b/go.mod
@@ -20,7 +20,7 @@ require (
 	github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb
 	github.com/johnkerl/lumin v1.0.0
 	github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
-	github.com/klauspost/compress v1.17.10
+	github.com/klauspost/compress v1.17.11
 	github.com/lestrrat-go/strftime v1.1.0
 	github.com/mattn/go-isatty v0.0.20
 	github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4
diff --git a/go.sum b/go.sum
index 0c51ab9d1..9167f5c7e 100644
--- a/go.sum
+++ b/go.sum
@@ -15,8 +15,8 @@ github.com/johnkerl/lumin v1.0.0 h1:CV34cHZOJ92Y02RbQ0rd4gA0C06Qck9q8blOyaPoWpU=
 github.com/johnkerl/lumin v1.0.0/go.mod h1:eLf5AdQOaLvzZ2zVy4REr/DSeEwG+CZreHwNLICqv9E=
 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8=
-github.com/klauspost/compress v1.17.10 h1:oXAz+Vh0PMUvJczoi+flxpnBEPxoER1IaAnU/NMPtT0=
-github.com/klauspost/compress v1.17.10/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
+github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
+github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
 github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8=
 github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is=
 github.com/lestrrat-go/strftime v1.1.0 h1:gMESpZy44/4pXLO/m+sL0yBd1W6LjgjrrD4a68Gapyg=

From 05aa16cfcf08cdddf10090d8e04e15ee9fa8a906 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Thu, 17 Oct 2024 09:11:03 -0400
Subject: [PATCH 260/456] Join docs wrong link (#1695)

* Fix join-docs link in online help

* run `make dev` and commit the artifacts
---
 docs/src/manpage.md             | 4 ++--
 docs/src/manpage.txt            | 4 ++--
 docs/src/reference-verbs.md     | 2 +-
 man/manpage.txt                 | 4 ++--
 man/mlr.1                       | 6 +++---
 pkg/transformers/join.go        | 2 +-
 test/cases/cli-help/0001/expout | 2 +-
 7 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 1d367ed72..5185360d3 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -1384,7 +1384,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        Likewise, if you have 'mlr --csv --implicit-csv-header ...' then the join-in file will be
        expected to be headerless as well unless you put '--no-implicit-csv-header' after 'join'.
        Please use "mlr --usage-separator-options" for information on specifying separators.
-       Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#join for more information
+       Please see https://miller.readthedocs.io/en/latest/reference-verbs#join for more information
        including examples.
 
    1mlabel0m
@@ -3731,5 +3731,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-                                  2024-10-05                         4mMILLER24m(1)
+                                  2024-10-17                         4mMILLER24m(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index cb369532a..a341f6c94 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1363,7 +1363,7 @@ Likewise, if you have 'mlr --csv --implicit-csv-header ...' then the join-in file will be expected to be headerless as well unless you put '--no-implicit-csv-header' after 'join'. Please use "mlr --usage-separator-options" for information on specifying separators. - Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#join for more information + Please see https://miller.readthedocs.io/en/latest/reference-verbs#join for more information including examples. 1mlabel0m @@ -3710,4 +3710,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-10-05 4mMILLER24m(1) + 2024-10-17 4mMILLER24m(1) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 0af85fb80..092f53969 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -1711,7 +1711,7 @@ be specified CSV as well unless you override with 'mlr --csv ... join --ijson -l Likewise, if you have 'mlr --csv --implicit-csv-header ...' then the join-in file will be expected to be headerless as well unless you put '--no-implicit-csv-header' after 'join'. Please use "mlr --usage-separator-options" for information on specifying separators. -Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#join for more information +Please see https://miller.readthedocs.io/en/latest/reference-verbs#join for more information including examples. diff --git a/man/manpage.txt b/man/manpage.txt index cb369532a..a341f6c94 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -1363,7 +1363,7 @@ Likewise, if you have 'mlr --csv --implicit-csv-header ...' then the join-in file will be expected to be headerless as well unless you put '--no-implicit-csv-header' after 'join'. Please use "mlr --usage-separator-options" for information on specifying separators. - Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#join for more information + Please see https://miller.readthedocs.io/en/latest/reference-verbs#join for more information including examples. 1mlabel0m @@ -3710,4 +3710,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-10-05 4mMILLER24m(1) + 2024-10-17 4mMILLER24m(1) diff --git a/man/mlr.1 b/man/mlr.1 index 75566da1d..2e30cb59d 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2024-10-05 +.\" Date: 2024-10-17 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2024-10-05" "\ \&" "\ \&" +.TH "MILLER" "1" "2024-10-17" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1710,7 +1710,7 @@ be specified CSV as well unless you override with 'mlr --csv ... join --ijson -l Likewise, if you have 'mlr --csv --implicit-csv-header ...' then the join-in file will be expected to be headerless as well unless you put '--no-implicit-csv-header' after 'join'. Please use "mlr --usage-separator-options" for information on specifying separators. -Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#join for more information +Please see https://miller.readthedocs.io/en/latest/reference-verbs#join for more information including examples. .fi .if n \{\ diff --git a/pkg/transformers/join.go b/pkg/transformers/join.go index 3d987bc07..61d8a47a5 100644 --- a/pkg/transformers/join.go +++ b/pkg/transformers/join.go @@ -130,7 +130,7 @@ func transformerJoinUsage( fmt.Fprintf(o, "expected to be headerless as well unless you put '--no-implicit-csv-header' after 'join'.\n") fmt.Fprintf(o, "Please use \"%s --usage-separator-options\" for information on specifying separators.\n", "mlr") - fmt.Fprintf(o, "Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#join for more information\n") + fmt.Fprintf(o, "Please see https://miller.readthedocs.io/en/latest/reference-verbs#join for more information\n") fmt.Fprintf(o, "including examples.\n") } diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index 5869278b8..3265d1e0c 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -507,7 +507,7 @@ be specified CSV as well unless you override with 'mlr --csv ... join --ijson -l Likewise, if you have 'mlr --csv --implicit-csv-header ...' then the join-in file will be expected to be headerless as well unless you put '--no-implicit-csv-header' after 'join'. Please use "mlr --usage-separator-options" for information on specifying separators. -Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#join for more information +Please see https://miller.readthedocs.io/en/latest/reference-verbs#join for more information including examples. ================================================================ From bf320bcc997fb5dc6d8aee2b8a5a930a9f1ebce2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 23 Oct 2024 07:58:02 -0400 Subject: [PATCH 261/456] Bump actions/cache from 4.1.1 to 4.1.2 (#1698) Bumps [actions/cache](https://github.com/actions/cache) from 4.1.1 to 4.1.2. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/3624ceb22c1c5a301c8db4169662070a689d9ea8...6849a6489940f00c2f30c0fb92c6274307ccb58a) --- updated-dependencies: - dependency-name: actions/cache dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0ede4802f..73813e9a3 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -29,7 +29,7 @@ jobs: # https://github.com/marketplace/actions/cache - name: Cache Go modules - uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 + uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a with: path: | ~/.cache/go-build From 7225f2c094ab2790288938c6f63c87578c46ab34 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 23 Oct 2024 07:58:15 -0400 Subject: [PATCH 262/456] Bump github/codeql-action from 3.26.13 to 3.27.0 (#1697) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.13 to 3.27.0. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/f779452ac5af1c261dce0346a8f964149f49322b...662472033e021d55d94146f66f6058822b0b39fd) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 9d0379be3..e692d44ee 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@f779452ac5af1c261dce0346a8f964149f49322b + uses: github/codeql-action/init@662472033e021d55d94146f66f6058822b0b39fd with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@f779452ac5af1c261dce0346a8f964149f49322b + uses: github/codeql-action/autobuild@662472033e021d55d94146f66f6058822b0b39fd # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@f779452ac5af1c261dce0346a8f964149f49322b + uses: github/codeql-action/analyze@662472033e021d55d94146f66f6058822b0b39fd From 1f6432e26092cd219adfefeb0934a000a0830ce7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 24 Oct 2024 08:16:21 -0400 Subject: [PATCH 263/456] Bump actions/checkout from 4.2.1 to 4.2.2 (#1699) Bumps [actions/checkout](https://github.com/actions/checkout) from 4.2.1 to 4.2.2. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871...11bd71901bbe5b1630ceea73d27597364c9af683) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/codespell.yml | 2 +- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index e692d44ee..2da2b5891 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -36,7 +36,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 35965e50b..fff2e3c40 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -21,7 +21,7 @@ jobs: steps: # Check out the code base - name: Check out code - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 with: # Full git history is needed to get a proper list of changed files within `super-linter` fetch-depth: 0 diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index e16239ff2..5d3e5a6ed 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -15,7 +15,7 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] steps: - - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - name: Set up Go uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 73813e9a3..0541e8888 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -23,7 +23,7 @@ jobs: id: go - name: Check out code into the Go module directory - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 with: fetch-depth: 0 From d7a5997d70325399449975bdb62201ba5e89b1a0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 25 Oct 2024 08:02:54 -0400 Subject: [PATCH 264/456] Bump actions/setup-go from 5.0.2 to 5.1.0 (#1700) Bumps [actions/setup-go](https://github.com/actions/setup-go) from 5.0.2 to 5.1.0. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32...41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed) --- updated-dependencies: - dependency-name: actions/setup-go dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 5d3e5a6ed..e06b0e8f7 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -18,7 +18,7 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - name: Set up Go - uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 + uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed with: go-version: 1.19 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0541e8888..e3a706295 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,7 +17,7 @@ jobs: runs-on: ${{ matrix.platform }} steps: - name: Set up Go - uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 + uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed with: go-version: ${{ env.GO_VERSION }} id: go From 047cb4bc28e75b5c151a501a6b51420b1d614645 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 27 Oct 2024 11:42:43 -0400 Subject: [PATCH 265/456] Static-check fixes from @lespea #1657, batch 1/n (#1703) --- pkg/climain/mlrcli_shebang.go | 2 +- pkg/dsl/cst/block_exit.go | 2 +- pkg/dsl/cst/builtin_functions.go | 8 ++--- pkg/dsl/cst/cond.go | 2 +- pkg/dsl/cst/dump.go | 4 +-- pkg/dsl/cst/emit_emitp.go | 8 ++--- pkg/dsl/cst/emitf.go | 6 ++-- pkg/dsl/cst/for.go | 8 ++--- pkg/dsl/cst/if.go | 2 +- pkg/dsl/cst/lvalues.go | 28 +++++++-------- pkg/dsl/cst/print.go | 4 +-- pkg/dsl/cst/statements.go | 8 ++--- pkg/dsl/cst/tee.go | 6 ++-- pkg/dsl/cst/udf.go | 4 +-- pkg/dsl/cst/uds.go | 2 +- pkg/dsl/cst/validate.go | 34 +++++++++---------- pkg/dsl/cst/while.go | 4 +-- pkg/input/record_reader_csv.go | 3 +- pkg/input/record_reader_csvlite.go | 6 ++-- pkg/input/record_reader_json.go | 4 +-- pkg/input/record_reader_pprint.go | 6 ++-- pkg/input/record_reader_tsv.go | 6 ++-- pkg/lib/file_readers.go | 6 ++-- pkg/mlrval/mlrmap_accessors.go | 4 +-- pkg/mlrval/mlrval_collections.go | 18 +++++----- pkg/mlrval/mlrval_json.go | 14 ++++---- pkg/output/file_output_handlers.go | 2 +- pkg/parsing/token/token.go | 2 +- pkg/runtime/stack.go | 4 +-- pkg/terminals/regtest/regtester.go | 4 +-- pkg/terminals/repl/session.go | 2 +- pkg/transformers/case.go | 2 +- pkg/transformers/label.go | 2 +- pkg/transformers/merge_fields.go | 2 +- pkg/transformers/put_or_filter.go | 4 +-- pkg/transformers/seqgen.go | 2 +- pkg/transformers/stats1.go | 2 +- pkg/transformers/stats2.go | 2 +- pkg/transformers/step.go | 4 +-- pkg/types/mlrval_typing.go | 4 +-- test/cases/cli-help/0001/expout | 4 +-- test/cases/dsl-argpass-typedecl/0002/experr | 2 +- test/cases/dsl-argpass-typedecl/0003/experr | 2 +- test/cases/dsl-argpass-typedecl/0004/experr | 2 +- test/cases/dsl-argpass-typedecl/0005/experr | 2 +- test/cases/dsl-argpass-typedecl/0007/experr | 1 - test/cases/dsl-argpass-typedecl/0008/experr | 1 - test/cases/dsl-array-map-indexing/0005/experr | 2 +- test/cases/dsl-array-map-indexing/0006/experr | 2 +- test/cases/dsl-array-map-indexing/0007/experr | 2 +- test/cases/dsl-array-map-indexing/0035/experr | 2 +- test/cases/dsl-array-map-indexing/0036/experr | 2 +- test/cases/dsl-array-map-indexing/0068/experr | 2 +- test/cases/dsl-array-map-indexing/0069/experr | 2 +- .../0001/experr | 2 +- .../0002/experr | 2 +- .../0003/experr | 2 +- .../0004/experr | 2 +- .../0005/experr | 2 +- .../0006/experr | 2 +- .../0007/experr | 2 +- .../0008/experr | 2 +- .../0009/experr | 2 +- .../0010/experr | 2 +- .../0011/experr | 2 +- .../0012/experr | 2 +- .../0013/experr | 2 +- .../0014/experr | 2 +- .../0015/experr | 2 +- .../0016/experr | 2 +- .../0017/experr | 2 +- .../0018/experr | 2 +- .../0024/experr | 2 +- .../0025/experr | 2 +- test/cases/dsl-context-validation/0001/experr | 2 +- test/cases/dsl-context-validation/0002/experr | 2 +- test/cases/dsl-context-validation/0003/experr | 2 +- test/cases/dsl-context-validation/0004/experr | 2 +- test/cases/dsl-context-validation/0005/experr | 2 +- test/cases/dsl-context-validation/0006/experr | 2 +- test/cases/dsl-context-validation/0007/experr | 2 +- test/cases/dsl-context-validation/0008/experr | 2 +- test/cases/dsl-context-validation/0009/experr | 2 +- test/cases/dsl-context-validation/0010/experr | 2 +- test/cases/dsl-context-validation/0011/experr | 2 +- test/cases/dsl-context-validation/0012/experr | 2 +- test/cases/dsl-empty-statements/0002/experr | 2 +- test/cases/dsl-env/0008/experr | 2 +- .../sort-errors-04/experr | 3 +- .../sort-errors-05/experr | 3 +- test/cases/dsl-for-oosvar-loops/0005/experr | 2 +- test/cases/dsl-for-oosvar-loops/0006/experr | 2 +- test/cases/dsl-for-oosvar-loops/0007/experr | 2 +- test/cases/dsl-for-oosvar-loops/0008/experr | 2 +- test/cases/dsl-for-oosvar-loops/0009/experr | 2 +- test/cases/dsl-for-oosvar-loops/0010/experr | 2 +- test/cases/dsl-for-oosvar-loops/0011/experr | 2 +- test/cases/dsl-for-oosvar-loops/0012/experr | 2 +- test/cases/dsl-for-oosvar-loops/0013/experr | 2 +- test/cases/dsl-for-oosvar-loops/0014/experr | 2 +- test/cases/dsl-for-oosvar-loops/0015/experr | 2 +- test/cases/dsl-for-variants/0006/experr | 2 +- test/cases/dsl-for-variants/0008/experr | 2 +- test/cases/dsl-for-variants/0009/experr | 2 +- test/cases/dsl-for-variants/0010/experr | 2 +- test/cases/dsl-forbind-typedecl/0002/experr | 1 - test/cases/dsl-forbind-typedecl/0004/experr | 1 - test/cases/dsl-forbind-typedecl/0005/experr | 1 - .../dsl-lashed-emitp-singles/0075/experr | 2 +- .../dsl-line-number-column-number/cond/experr | 2 +- .../do-while/experr | 2 +- .../dsl-line-number-column-number/for/experr | 2 +- .../dsl-line-number-column-number/if/experr | 2 +- .../while/experr | 2 +- .../0003/experr | 1 - .../0004/experr | 1 - test/cases/dsl-localvar-typedecl/0002/experr | 1 - test/cases/dsl-localvar-typedecl/0003/experr | 1 - test/cases/dsl-map-funcs/0003/experr | 3 +- test/cases/dsl-map-funcs/0004/experr | 3 +- .../dsl-mapsum-mapdiff-mapexcept/0003/experr | 3 +- .../dsl-mapsum-mapdiff-mapexcept/0004/experr | 3 +- test/cases/dsl-mapvar-assignments/0050/experr | 1 - test/cases/dsl-mapvar-assignments/0056/experr | 1 - .../dsl-mapvars-udfs-subroutines/0006/experr | 2 +- .../dsl-mapvars-udfs-subroutines/0008/experr | 2 +- .../dsl-mapvars-udfs-subroutines/0010/experr | 2 +- .../dsl-mapvars-udfs-subroutines/0011/experr | 2 +- .../dsl-mapvars-udfs-subroutines/0012/experr | 1 - .../cases/dsl-no-filter-in-filter/0002/experr | 2 +- test/cases/dsl-parse/0112/experr | 2 +- test/cases/dsl-subr/0007/experr | 2 +- test/cases/dsl-subr/0015/experr | 2 +- test/cases/dsl-triple-for-loops/0015/experr | 2 +- test/cases/dsl-triple-for-loops/0016/experr | 2 +- test/cases/dsl-typedecl/0004/experr | 1 - test/cases/dsl-typedecl/0005/experr | 1 - test/cases/dsl-typedecl/0009/experr | 2 +- test/cases/dsl-typedecl/0010/experr | 2 +- test/cases/dsl-typedecl/0014/experr | 2 +- test/cases/dsl-typedecl/0015/experr | 2 +- .../0004/experr | 2 +- .../0005/experr | 2 +- .../0008/experr | 2 +- .../0010/experr | 2 +- .../0011/experr | 2 +- .../0012/experr | 2 +- .../0013/experr | 2 +- .../0014/experr | 2 +- .../0015/experr | 2 +- .../0016/experr | 2 +- .../0017/experr | 2 +- .../0018/experr | 2 +- .../0019/experr | 2 +- .../0020/experr | 2 +- .../0021/experr | 2 +- .../0022/experr | 2 +- .../0023/experr | 2 +- .../0024/experr | 2 +- .../0025/experr | 2 +- .../0026/experr | 2 +- .../0027/experr | 2 +- .../0028/experr | 2 +- .../0029/experr | 2 +- test/cases/io-spec-tsv/0004/experr | 3 +- test/cases/verb-label/0009/experr | 2 +- test/cases/verb-stats1/0018/experr | 2 +- 167 files changed, 229 insertions(+), 257 deletions(-) diff --git a/pkg/climain/mlrcli_shebang.go b/pkg/climain/mlrcli_shebang.go index 686c9f2d2..cfcab1b39 100644 --- a/pkg/climain/mlrcli_shebang.go +++ b/pkg/climain/mlrcli_shebang.go @@ -39,7 +39,7 @@ func maybeInterpolateDashS(args []string) ([]string, error) { return args, nil } if len(args) < 3 { - return nil, fmt.Errorf("mlr: -s flag requires a filename after it.") + return nil, fmt.Errorf("mlr: -s flag requires a filename after it") } // mlr -s scriptfile input1.csv input2.csv diff --git a/pkg/dsl/cst/block_exit.go b/pkg/dsl/cst/block_exit.go index 3ba730705..8542e88e7 100644 --- a/pkg/dsl/cst/block_exit.go +++ b/pkg/dsl/cst/block_exit.go @@ -70,7 +70,7 @@ func (root *RootNode) BuildReturnNode(astNode *dsl.ASTNode) (*ReturnNode, error) } else { lib.InternalCodingErrorIf(true) } - return nil, fmt.Errorf("internal coding error: statement should not be reached.") + return nil, fmt.Errorf("internal coding error: statement should not be reached") } func (node *ReturnNode) Execute(state *runtime.State) (*BlockExitPayload, error) { diff --git a/pkg/dsl/cst/builtin_functions.go b/pkg/dsl/cst/builtin_functions.go index 495cea6b3..3b9fa12f3 100644 --- a/pkg/dsl/cst/builtin_functions.go +++ b/pkg/dsl/cst/builtin_functions.go @@ -599,7 +599,7 @@ func (root *RootNode) BuildVariadicFunctionCallsiteNode( if callsiteArity < builtinFunctionInfo.minimumVariadicArity { return nil, fmt.Errorf( - "mlr: function %s takes minimum argument count %d; got %d.\n", + "mlr: function %s takes minimum argument count %d; got %d", builtinFunctionInfo.name, builtinFunctionInfo.minimumVariadicArity, callsiteArity, @@ -609,7 +609,7 @@ func (root *RootNode) BuildVariadicFunctionCallsiteNode( if builtinFunctionInfo.maximumVariadicArity != 0 { if callsiteArity > builtinFunctionInfo.maximumVariadicArity { return nil, fmt.Errorf( - "mlr: function %s takes maximum argument count %d; got %d.\n", + "mlr: function %s takes maximum argument count %d; got %d", builtinFunctionInfo.name, builtinFunctionInfo.maximumVariadicArity, callsiteArity, @@ -657,7 +657,7 @@ func (root *RootNode) BuildVariadicFunctionWithStateCallsiteNode( if callsiteArity < builtinFunctionInfo.minimumVariadicArity { return nil, fmt.Errorf( - "mlr: function %s takes minimum argument count %d; got %d.\n", + "mlr: function %s takes minimum argument count %d; got %d", builtinFunctionInfo.name, builtinFunctionInfo.minimumVariadicArity, callsiteArity, @@ -667,7 +667,7 @@ func (root *RootNode) BuildVariadicFunctionWithStateCallsiteNode( if builtinFunctionInfo.maximumVariadicArity != 0 { if callsiteArity > builtinFunctionInfo.maximumVariadicArity { return nil, fmt.Errorf( - "mlr: function %s takes maximum argument count %d; got %d.\n", + "mlr: function %s takes maximum argument count %d; got %d", builtinFunctionInfo.name, builtinFunctionInfo.maximumVariadicArity, callsiteArity, diff --git a/pkg/dsl/cst/cond.go b/pkg/dsl/cst/cond.go index aca452f43..53532d925 100644 --- a/pkg/dsl/cst/cond.go +++ b/pkg/dsl/cst/cond.go @@ -61,7 +61,7 @@ func (node *CondBlockNode) Execute( boolValue = false } else if !isBool { return nil, fmt.Errorf( - "mlr: conditional expression did not evaluate to boolean%s.", + "mlr: conditional expression did not evaluate to boolean%s", dsl.TokenToLocationInfo(node.conditionToken), ) } diff --git a/pkg/dsl/cst/dump.go b/pkg/dsl/cst/dump.go index ba41ce16a..c5e0d975e 100644 --- a/pkg/dsl/cst/dump.go +++ b/pkg/dsl/cst/dump.go @@ -137,7 +137,7 @@ func (root *RootNode) buildDumpxStatementNode( } else if redirectorNode.Type == dsl.NodeTypeRedirectPipe { retval.outputHandlerManager = output.NewPipeWriteHandlerManager(root.recordWriterOptions) } else { - return nil, fmt.Errorf("mlr: unhandled redirector node type %s.", string(redirectorNode.Type)) + return nil, fmt.Errorf("mlr: unhandled redirector node type %s", string(redirectorNode.Type)) } } } @@ -211,7 +211,7 @@ func (node *DumpStatementNode) dumpToFileOrPipe( redirectorTarget := node.redirectorTargetEvaluable.Evaluate(state) if !redirectorTarget.IsString() { return fmt.Errorf( - "mlr: output redirection yielded %s, not string.", + "mlr: output redirection yielded %s, not string", redirectorTarget.GetTypeName(), ) } diff --git a/pkg/dsl/cst/emit_emitp.go b/pkg/dsl/cst/emit_emitp.go index 323c7495a..85b9e374a 100644 --- a/pkg/dsl/cst/emit_emitp.go +++ b/pkg/dsl/cst/emit_emitp.go @@ -171,7 +171,7 @@ func (root *RootNode) buildEmitXStatementNode( } else { return nil, fmt.Errorf( - "mlr: unlashed-emit node types must be local variables, field names, oosvars, or maps; got %s.", + "mlr: unlashed-emit node types must be local variables, field names, oosvars, or maps; got %s", childNode.Type, ) } @@ -181,7 +181,7 @@ func (root *RootNode) buildEmitXStatementNode( for _, childNode := range emittablesNode.Children { if !EMITX_NAMED_NODE_TYPES[childNode.Type] { return nil, fmt.Errorf( - "mlr: lashed-emit node types must be local variables, field names, or oosvars; got %s.", + "mlr: lashed-emit node types must be local variables, field names, or oosvars; got %s", childNode.Type, ) } @@ -271,7 +271,7 @@ func (root *RootNode) buildEmitXStatementNode( } else if redirectorNode.Type == dsl.NodeTypeRedirectPipe { retval.outputHandlerManager = output.NewPipeWriteHandlerManager(root.recordWriterOptions) } else { - return nil, fmt.Errorf("mlr: unhandled redirector node type %s.", string(redirectorNode.Type)) + return nil, fmt.Errorf("mlr: unhandled redirector node type %s", string(redirectorNode.Type)) } } } @@ -989,7 +989,7 @@ func (node *EmitXStatementNode) emitRecordToFileOrPipe( ) error { redirectorTarget := node.redirectorTargetEvaluable.Evaluate(state) if !redirectorTarget.IsString() { - return fmt.Errorf("mlr: output redirection yielded %s, not string.", redirectorTarget.GetTypeName()) + return fmt.Errorf("mlr: output redirection yielded %s, not string", redirectorTarget.GetTypeName()) } outputFileName := redirectorTarget.String() diff --git a/pkg/dsl/cst/emitf.go b/pkg/dsl/cst/emitf.go index bb211f64a..eb6812ef7 100644 --- a/pkg/dsl/cst/emitf.go +++ b/pkg/dsl/cst/emitf.go @@ -119,7 +119,7 @@ func (root *RootNode) BuildEmitFStatementNode(astNode *dsl.ASTNode) (IExecutable } else if redirectorNode.Type == dsl.NodeTypeRedirectPipe { retval.outputHandlerManager = output.NewPipeWriteHandlerManager(root.recordWriterOptions) } else { - return nil, fmt.Errorf("mlr: unhandled redirector node type %s.", string(redirectorNode.Type)) + return nil, fmt.Errorf("mlr: unhandled redirector node type %s", string(redirectorNode.Type)) } } } @@ -163,7 +163,7 @@ func getNameFromNamedNode(astNode *dsl.ASTNode, description string) (string, err } else if astNode.Type == dsl.NodeTypeDirectFieldValue { return string(astNode.Token.Lit), nil } - return "", fmt.Errorf("mlr: can't get name of node type \"%s\" for %s.", string(astNode.Type), description) + return "", fmt.Errorf(`mlr: can't get name of node type "%s" for %s`, string(astNode.Type), description) } // ---------------------------------------------------------------- @@ -187,7 +187,7 @@ func (node *EmitFStatementNode) emitfToFileOrPipe( ) error { redirectorTarget := node.redirectorTargetEvaluable.Evaluate(state) if !redirectorTarget.IsString() { - return fmt.Errorf("mlr: output redirection yielded %s, not string.", redirectorTarget.GetTypeName()) + return fmt.Errorf("mlr: output redirection yielded %s, not string", redirectorTarget.GetTypeName()) } outputFileName := redirectorTarget.String() diff --git a/pkg/dsl/cst/for.go b/pkg/dsl/cst/for.go index 75e7cf258..204505aba 100644 --- a/pkg/dsl/cst/for.go +++ b/pkg/dsl/cst/for.go @@ -805,7 +805,7 @@ func (root *RootNode) BuildTripleForLoopNode(astNode *dsl.ASTNode) (*TripleForLo for i := 0; i < n-1; i++ { if continuationExpressionASTNode.Children[i].Type != dsl.NodeTypeAssignment { return nil, fmt.Errorf( - "mlr: the non-final triple-for continuation statements must be assignments.", + "mlr: the non-final triple-for continuation statements must be assignments", ) } precontinuationAssignment, err := root.BuildAssignmentNode( @@ -822,11 +822,11 @@ func (root *RootNode) BuildTripleForLoopNode(astNode *dsl.ASTNode) (*TripleForLo if bareBooleanASTNode.Type != dsl.NodeTypeBareBoolean { if n == 1 { return nil, fmt.Errorf( - "mlr: the triple-for continuation statement must be a bare boolean.", + "mlr: the triple-for continuation statement must be a bare boolean", ) } else { return nil, fmt.Errorf( - "mlr: the final triple-for continuation statement must be a bare boolean.", + "mlr: the final triple-for continuation statement must be a bare boolean", ) } } @@ -898,7 +898,7 @@ func (node *TripleForLoopNode) Execute(state *runtime.State) (*BlockExitPayload, boolValue, isBool := continuationValue.GetBoolValue() if !isBool { return nil, fmt.Errorf( - "mlr: for-loop continuation did not evaluate to boolean%s.", + "mlr: for-loop continuation did not evaluate to boolean%s", dsl.TokenToLocationInfo(node.continuationExpressionToken), ) } diff --git a/pkg/dsl/cst/if.go b/pkg/dsl/cst/if.go index a25f60eb8..3396ddd19 100644 --- a/pkg/dsl/cst/if.go +++ b/pkg/dsl/cst/if.go @@ -130,7 +130,7 @@ func (node *IfChainNode) Execute(state *runtime.State) (*BlockExitPayload, error boolValue, isBool := condition.GetBoolValue() if !isBool { return nil, fmt.Errorf( - "mlr: conditional expression did not evaluate to boolean%s.", + "mlr: conditional expression did not evaluate to boolean%s", dsl.TokenToLocationInfo(ifItem.conditionToken), ) } diff --git a/pkg/dsl/cst/lvalues.go b/pkg/dsl/cst/lvalues.go index cb18d7832..b680644cb 100644 --- a/pkg/dsl/cst/lvalues.go +++ b/pkg/dsl/cst/lvalues.go @@ -45,11 +45,11 @@ func (root *RootNode) BuildAssignableNode( case dsl.NodeTypeArrayOrMapPositionalNameAccess: return nil, fmt.Errorf( - "mlr: '[[...]]' is allowed on assignment left-hand sides only when immediately preceded by '$'.", + "mlr: '[[...]]' is allowed on assignment left-hand sides only when immediately preceded by '$'", ) case dsl.NodeTypeArrayOrMapPositionalValueAccess: return nil, fmt.Errorf( - "mlr: '[[[...]]]' is allowed on assignment left-hand sides only when immediately preceded by '$'.", + "mlr: '[[[...]]]' is allowed on assignment left-hand sides only when immediately preceded by '$'", ) case dsl.NodeTypeArrayOrMapIndexAccess: @@ -106,7 +106,7 @@ func (node *DirectFieldValueLvalueNode) AssignIndexed( // print inrec attributes. Also, a UDF/UDS invoked from begin/end could try // to access the inrec, and that would get past the validator. if state.Inrec == nil { - return fmt.Errorf("there is no current record to assign to.") + return fmt.Errorf("there is no current record to assign to") } // AssignmentNode checks for absent, so we just assign whatever we get @@ -205,7 +205,7 @@ func (node *IndirectFieldValueLvalueNode) AssignIndexed( // print inrec attributes. Also, a UDF/UDS invoked from begin/end could try // to access the inrec, and that would get past the validator. if state.Inrec == nil { - return fmt.Errorf("there is no current record to assign to.") + return fmt.Errorf("there is no current record to assign to") } lhsFieldName := node.lhsFieldNameExpression.Evaluate(state) @@ -298,7 +298,7 @@ func (node *PositionalFieldNameLvalueNode) Assign( // print inrec attributes. Also, a UDF/UDS invoked from begin/end could try // to access the inrec, and that would get past the validator. if state.Inrec == nil { - return fmt.Errorf("there is no current record to assign to.") + return fmt.Errorf("there is no current record to assign to") } lhsFieldIndex := node.lhsFieldIndexExpression.Evaluate(state) @@ -310,7 +310,7 @@ func (node *PositionalFieldNameLvalueNode) Assign( return nil } else { return fmt.Errorf( - "mlr: positional index for $[[...]] assignment must be integer; got %s.", + "mlr: positional index for $[[...]] assignment must be integer; got %s", lhsFieldIndex.GetTypeName(), ) } @@ -324,7 +324,7 @@ func (node *PositionalFieldNameLvalueNode) AssignIndexed( // TODO: reconsider this if /when we decide to allow string-slice // assignments. return fmt.Errorf( - "mlr: $[[...]] = ... expressions are not indexable.", + "mlr: $[[...]] = ... expressions are not indexable", ) } @@ -416,7 +416,7 @@ func (node *PositionalFieldValueLvalueNode) AssignIndexed( // print inrec attributes. Also, a UDF/UDS invoked from begin/end could try // to access the inrec, and that would get past the validator. if state.Inrec == nil { - return fmt.Errorf("there is no current record to assign to.") + return fmt.Errorf("there is no current record to assign to") } lhsFieldIndex := node.lhsFieldIndexExpression.Evaluate(state) @@ -434,7 +434,7 @@ func (node *PositionalFieldValueLvalueNode) AssignIndexed( return nil } else { return fmt.Errorf( - "mlr: positional index for $[[[...]]] assignment must be integer; got %s.", + "mlr: positional index for $[[[...]]] assignment must be integer; got %s", lhsFieldIndex.GetTypeName(), ) } @@ -517,7 +517,7 @@ func (node *FullSrecLvalueNode) AssignIndexed( // print inrec attributes. Also, a UDF/UDS invoked from begin/end could try // to access the inrec, and that would get past the validator. if state.Inrec == nil { - return fmt.Errorf("there is no current record to assign to.") + return fmt.Errorf("there is no current record to assign to") } // AssignmentNode checks for absentness of the rvalue, so we just assign @@ -787,7 +787,7 @@ func (root *RootNode) BuildLocalVariableLvalueNode(astNode *dsl.ASTNode) (IAssig if astNode.Children == nil { // untyped, like 'x = 3' if root.strictMode { return nil, fmt.Errorf( - "mlr: need typedecl such as \"var\", \"str\", \"num\", etc. for variable \"%s\" in strict mode", + `mlr: need typedecl such as "var", "str", "num", etc. for variable "%s" in strict mode`, variableName, ) } @@ -1086,7 +1086,7 @@ func (node *EnvironmentVariableLvalueNode) Assign( if !name.IsString() { return fmt.Errorf( - "assignments to ENV[...] must have string names; got %s \"%s\"\n", + `assignments to ENV[...] must have string names; got %s "%s"`, name.GetTypeName(), name.String(), ) @@ -1109,7 +1109,7 @@ func (node *EnvironmentVariableLvalueNode) AssignIndexed( indices []*mlrval.Mlrval, state *runtime.State, ) error { - return fmt.Errorf("mlr: ENV[...] cannot be indexed.") + return fmt.Errorf("mlr: ENV[...] cannot be indexed") } func (node *EnvironmentVariableLvalueNode) Unassign( @@ -1133,5 +1133,5 @@ func (node *EnvironmentVariableLvalueNode) UnassignIndexed( state *runtime.State, ) { // TODO: needs error return - //return errors.New("mlr: ENV[...] cannot be indexed.") + //return errors.New("mlr: ENV[...] cannot be indexed") } diff --git a/pkg/dsl/cst/print.go b/pkg/dsl/cst/print.go index f00c712a8..39c84f58d 100644 --- a/pkg/dsl/cst/print.go +++ b/pkg/dsl/cst/print.go @@ -280,7 +280,7 @@ func (root *RootNode) buildPrintxStatementNode( } else if redirectorNode.Type == dsl.NodeTypeRedirectPipe { retval.outputHandlerManager = output.NewPipeWriteHandlerManager(root.recordWriterOptions) } else { - return nil, fmt.Errorf("mlr: unhandled redirector node type %s.", string(redirectorNode.Type)) + return nil, fmt.Errorf("mlr: unhandled redirector node type %s", string(redirectorNode.Type)) } } } @@ -356,7 +356,7 @@ func (node *PrintStatementNode) printToFileOrPipe( ) error { redirectorTarget := node.redirectorTargetEvaluable.Evaluate(state) if !redirectorTarget.IsString() { - return fmt.Errorf("mlr: output redirection yielded %s, not string.", redirectorTarget.GetTypeName()) + return fmt.Errorf("mlr: output redirection yielded %s, not string", redirectorTarget.GetTypeName()) } outputFileName := redirectorTarget.String() diff --git a/pkg/dsl/cst/statements.go b/pkg/dsl/cst/statements.go index 251618f19..eec090606 100644 --- a/pkg/dsl/cst/statements.go +++ b/pkg/dsl/cst/statements.go @@ -67,9 +67,9 @@ func (root *RootNode) BuildStatementNode( return root.BuildEmitPStatementNode(astNode) case dsl.NodeTypeBeginBlock: - return nil, fmt.Errorf("mlr: begin blocks may only be declared at top level.") + return nil, fmt.Errorf("mlr: begin blocks may only be declared at top level") case dsl.NodeTypeEndBlock: - return nil, fmt.Errorf("mlr: end blocks may only be declared at top level.") + return nil, fmt.Errorf("mlr: end blocks may only be declared at top level") case dsl.NodeTypeIfChain: return root.BuildIfChainNode(astNode) @@ -89,9 +89,9 @@ func (root *RootNode) BuildStatementNode( return root.BuildTripleForLoopNode(astNode) case dsl.NodeTypeNamedFunctionDefinition: - return nil, fmt.Errorf("mlr: functions may only be declared at top level.") + return nil, fmt.Errorf("mlr: functions may only be declared at top level") case dsl.NodeTypeSubroutineDefinition: - return nil, fmt.Errorf("mlr: subroutines may only be declared at top level.") + return nil, fmt.Errorf("mlr: subroutines may only be declared at top level") case dsl.NodeTypeSubroutineCallsite: return root.BuildSubroutineCallsiteNode(astNode) diff --git a/pkg/dsl/cst/tee.go b/pkg/dsl/cst/tee.go index 7a9542eea..c0ef376f1 100644 --- a/pkg/dsl/cst/tee.go +++ b/pkg/dsl/cst/tee.go @@ -121,7 +121,7 @@ func (root *RootNode) BuildTeeStatementNode(astNode *dsl.ASTNode) (IExecutable, } else if redirectorNode.Type == dsl.NodeTypeRedirectPipe { retval.outputHandlerManager = output.NewPipeWriteHandlerManager(root.recordWriterOptions) } else { - return nil, fmt.Errorf("mlr: unhandled redirector node type %s.", string(redirectorNode.Type)) + return nil, fmt.Errorf("mlr: unhandled redirector node type %s", string(redirectorNode.Type)) } } @@ -138,7 +138,7 @@ func (root *RootNode) BuildTeeStatementNode(astNode *dsl.ASTNode) (IExecutable, func (node *TeeStatementNode) Execute(state *runtime.State) (*BlockExitPayload, error) { expression := node.expressionEvaluable.Evaluate(state) if !expression.IsMap() { - return nil, fmt.Errorf("mlr: tee-evaluaiton yielded %s, not map.", expression.GetTypeName()) + return nil, fmt.Errorf("mlr: tee-evaluaiton yielded %s, not map", expression.GetTypeName()) } err := node.teeToRedirectFunc(expression.GetMap(), state) return nil, err @@ -151,7 +151,7 @@ func (node *TeeStatementNode) teeToFileOrPipe( ) error { redirectorTarget := node.redirectorTargetEvaluable.Evaluate(state) if !redirectorTarget.IsString() { - return fmt.Errorf("mlr: output redirection yielded %s, not string.", redirectorTarget.GetTypeName()) + return fmt.Errorf("mlr: output redirection yielded %s, not string", redirectorTarget.GetTypeName()) } outputFileName := redirectorTarget.String() diff --git a/pkg/dsl/cst/udf.go b/pkg/dsl/cst/udf.go index 4add00e88..e1544d7b2 100644 --- a/pkg/dsl/cst/udf.go +++ b/pkg/dsl/cst/udf.go @@ -403,7 +403,7 @@ func (root *RootNode) BuildAndInstallUDF(astNode *dsl.ASTNode) error { if BuiltinFunctionManagerInstance.LookUp(functionName) != nil { return fmt.Errorf( - "mlr: function named \"%s\" must not override a built-in function of the same name.", + `mlr: function named "%s" must not override a built-in function of the same name`, functionName, ) } @@ -411,7 +411,7 @@ func (root *RootNode) BuildAndInstallUDF(astNode *dsl.ASTNode) error { if !root.allowUDFUDSRedefinitions { if root.udfManager.ExistsByName(functionName) { return fmt.Errorf( - "mlr: function named \"%s\" has already been defined.", + `mlr: function named "%s" has already been defined`, functionName, ) } diff --git a/pkg/dsl/cst/uds.go b/pkg/dsl/cst/uds.go index c9f888ca2..f42d5fc20 100644 --- a/pkg/dsl/cst/uds.go +++ b/pkg/dsl/cst/uds.go @@ -244,7 +244,7 @@ func (root *RootNode) BuildAndInstallUDS(astNode *dsl.ASTNode) error { if !root.allowUDFUDSRedefinitions { if root.udsManager.ExistsByName(subroutineName) { return fmt.Errorf( - "mlr: subroutine named \"%s\" has already been defined.", + `mlr: subroutine named "%s" has already been defined`, subroutineName, ) } diff --git a/pkg/dsl/cst/validate.go b/pkg/dsl/cst/validate.go index 989b3d0e7..39db06fdc 100644 --- a/pkg/dsl/cst/validate.go +++ b/pkg/dsl/cst/validate.go @@ -31,7 +31,7 @@ func ValidateAST( // But filter '' is an error. if ast.RootNode.Children == nil || len(ast.RootNode.Children) == 0 { if dslInstanceType == DSLInstanceTypeFilter { - return fmt.Errorf("mlr: filter statement must not be empty.") + return fmt.Errorf("mlr: filter statement must not be empty") } } @@ -80,7 +80,7 @@ func validateASTAux( if astNode.Type == dsl.NodeTypeFilterStatement { if dslInstanceType == DSLInstanceTypeFilter { return fmt.Errorf( - "mlr: filter expressions must not also contain the \"filter\" keyword.", + `mlr: filter expressions must not also contain the "filter" keyword`, ) } } @@ -89,21 +89,21 @@ func validateASTAux( if astNode.Type == dsl.NodeTypeBeginBlock { if !atTopLevel { return fmt.Errorf( - "mlr: begin blocks can only be at top level.", + "mlr: begin blocks can only be at top level", ) } nextLevelInBeginOrEnd = true } else if astNode.Type == dsl.NodeTypeEndBlock { if !atTopLevel { return fmt.Errorf( - "mlr: end blocks can only be at top level.", + "mlr: end blocks can only be at top level", ) } nextLevelInBeginOrEnd = true } else if astNode.Type == dsl.NodeTypeNamedFunctionDefinition { if !atTopLevel { return fmt.Errorf( - "mlr: func blocks can only be at top level.", + "mlr: func blocks can only be at top level", ) } nextLevelInUDF = true @@ -112,7 +112,7 @@ func validateASTAux( } else if astNode.Type == dsl.NodeTypeSubroutineDefinition { if !atTopLevel { return fmt.Errorf( - "mlr: subr blocks can only be at top level.", + "mlr: subr blocks can only be at top level", ) } nextLevelInUDS = true @@ -134,7 +134,7 @@ func validateASTAux( astNode.Type == dsl.NodeTypeIndirectFieldValue || astNode.Type == dsl.NodeTypeFullSrec { return fmt.Errorf( - "mlr: begin/end blocks cannot refer to records via $x, $*, etc.", + "mlr: begin/end blocks cannot refer to records via $x, $*, etc", ) } } @@ -143,7 +143,7 @@ func validateASTAux( if !inLoop { if astNode.Type == dsl.NodeTypeBreak { return fmt.Errorf( - "mlr: break statements are only valid within for/do/while loops.", + "mlr: break statements are only valid within for/do/while loops", ) } } @@ -151,7 +151,7 @@ func validateASTAux( if !inLoop { if astNode.Type == dsl.NodeTypeContinue { return fmt.Errorf( - "mlr: break statements are only valid within for/do/while loops.", + "mlr: break statements are only valid within for/do/while loops", ) } } @@ -169,7 +169,7 @@ func validateASTAux( if !inUDF && !inUDS { if astNode.Type == dsl.NodeTypeReturn { return fmt.Errorf( - "mlr: return statements are only valid within func/subr blocks.", + "mlr: return statements are only valid within func/subr blocks", ) } } @@ -179,14 +179,14 @@ func validateASTAux( if inUDF { if len(astNode.Children) != 1 { return fmt.Errorf( - "mlr: return statements in func blocks must return a value.", + "mlr: return statements in func blocks must return a value", ) } } if inUDS { if len(astNode.Children) != 0 { return fmt.Errorf( - "mlr: return statements in subr blocks must not return a value.", + "mlr: return statements in subr blocks must not return a value", ) } } @@ -197,7 +197,7 @@ func validateASTAux( ok := VALID_LHS_NODE_TYPES[astNode.Type] if !ok { return fmt.Errorf( - "mlr: %s is not valid on the left-hand side of an assignment.", + "mlr: %s is not valid on the left-hand side of an assignment", astNode.Type, ) } @@ -208,7 +208,7 @@ func validateASTAux( ok := VALID_LHS_NODE_TYPES[astNode.Type] if !ok { return fmt.Errorf( - "mlr: %s is not valid for unset statement.", + "mlr: %s is not valid for unset statement", astNode.Type, ) } @@ -259,7 +259,7 @@ func validateForLoopTwoVariableUniqueNames(astNode *dsl.ASTNode) error { keyVarName := string(keyVarNode.Token.Lit) valVarName := string(valVarNode.Token.Lit) if keyVarName == valVarName { - return fmt.Errorf("mlr: redefinition of variable %s in the same scope.", keyVarName) + return fmt.Errorf("mlr: redefinition of variable %s in the same scope", keyVarName) } else { return nil } @@ -289,14 +289,14 @@ func validateForLoopMultivariableUniqueNames(astNode *dsl.ASTNode) error { name := string(keyVarNode.Token.Lit) _, present := seen[name] if present { - return fmt.Errorf("mlr: redefinition of variable %s in the same scope.", name) + return fmt.Errorf("mlr: redefinition of variable %s in the same scope", name) } seen[name] = true } valVarName := string(valVarNode.Token.Lit) if seen[valVarName] { - return fmt.Errorf("mlr: redefinition of variable %s in the same scope.", valVarName) + return fmt.Errorf("mlr: redefinition of variable %s in the same scope", valVarName) } return nil diff --git a/pkg/dsl/cst/while.go b/pkg/dsl/cst/while.go index 46ea57e32..2e2fd5f12 100644 --- a/pkg/dsl/cst/while.go +++ b/pkg/dsl/cst/while.go @@ -60,7 +60,7 @@ func (node *WhileLoopNode) Execute(state *runtime.State) (*BlockExitPayload, err boolValue, isBool := condition.GetBoolValue() if !isBool { return nil, fmt.Errorf( - "mlr: conditional expression did not evaluate to boolean%s.", + "mlr: conditional expression did not evaluate to boolean%s", dsl.TokenToLocationInfo(node.conditionToken), ) } @@ -157,7 +157,7 @@ func (node *DoWhileLoopNode) Execute(state *runtime.State) (*BlockExitPayload, e boolValue, isBool := condition.GetBoolValue() if !isBool { return nil, fmt.Errorf( - "mlr: conditional expression did not evaluate to boolean%s.", + "mlr: conditional expression did not evaluate to boolean%s", dsl.TokenToLocationInfo(node.conditionToken), ) } diff --git a/pkg/input/record_reader_csv.go b/pkg/input/record_reader_csv.go index 976f6ed1d..20c1fd15d 100644 --- a/pkg/input/record_reader_csv.go +++ b/pkg/input/record_reader_csv.go @@ -244,8 +244,7 @@ func (reader *RecordReaderCSV) getRecordBatch( } else { if !reader.readerOptions.AllowRaggedCSVInput { err := fmt.Errorf( - "mlr: CSV header/data length mismatch %d != %d "+ - "at filename %s row %d.\n", + "mlr: CSV header/data length mismatch %d != %d at filename %s row %d", nh, nd, reader.filename, reader.rowNumber, ) errorChannel <- err diff --git a/pkg/input/record_reader_csvlite.go b/pkg/input/record_reader_csvlite.go index dd590da82..d658a4f99 100644 --- a/pkg/input/record_reader_csvlite.go +++ b/pkg/input/record_reader_csvlite.go @@ -217,8 +217,7 @@ func getRecordBatchExplicitCSVHeader( } else { if !reader.readerOptions.AllowRaggedCSVInput && len(reader.headerStrings) != len(fields) { err := fmt.Errorf( - "mlr: CSV header/data length mismatch %d != %d "+ - "at filename %s line %d.\n", + "mlr: CSV header/data length mismatch %d != %d at filename %s line %d", len(reader.headerStrings), len(fields), filename, reader.inputLineNumber, ) errorChannel <- err @@ -342,8 +341,7 @@ func getRecordBatchImplicitCSVHeader( } else { if !reader.readerOptions.AllowRaggedCSVInput && len(reader.headerStrings) != len(fields) { err := fmt.Errorf( - "mlr: CSV header/data length mismatch %d != %d "+ - "at filename %s line %d.\n", + "mlr: CSV header/data length mismatch %d != %d at filename %s line %d", len(reader.headerStrings), len(fields), filename, reader.inputLineNumber, ) errorChannel <- err diff --git a/pkg/input/record_reader_json.go b/pkg/input/record_reader_json.go index 094dca996..096e506fb 100644 --- a/pkg/input/record_reader_json.go +++ b/pkg/input/record_reader_json.go @@ -153,7 +153,7 @@ func (reader *RecordReaderJSON) processHandle( if !mlrval.IsMap() { // TODO: more context errorChannel <- fmt.Errorf( - "valid but unmillerable JSON. Expected map (JSON object); got %s.", + "valid but unmillerable JSON. Expected map (JSON object); got %s", mlrval.GetTypeName(), ) return @@ -174,7 +174,7 @@ func (reader *RecordReaderJSON) processHandle( } else { errorChannel <- fmt.Errorf( - "valid but unmillerable JSON. Expected map (JSON object); got %s.", + "valid but unmillerable JSON. Expected map (JSON object); got %s", mlrval.GetTypeName(), ) return diff --git a/pkg/input/record_reader_pprint.go b/pkg/input/record_reader_pprint.go index d3ed2c228..b83710f7b 100644 --- a/pkg/input/record_reader_pprint.go +++ b/pkg/input/record_reader_pprint.go @@ -240,8 +240,7 @@ func getRecordBatchExplicitPprintHeader( } else { if !reader.readerOptions.AllowRaggedCSVInput && len(reader.headerStrings) != len(fields) { err := fmt.Errorf( - "mlr: PPRINT-barred header/data length mismatch %d != %d "+ - "at filename %s line %d.\n", + "mlr: PPRINT-barred header/data length mismatch %d != %d at filename %s line %d", len(reader.headerStrings), len(fields), filename, reader.inputLineNumber, ) errorChannel <- err @@ -378,8 +377,7 @@ func getRecordBatchImplicitPprintHeader( } else { if !reader.readerOptions.AllowRaggedCSVInput && len(reader.headerStrings) != len(fields) { err := fmt.Errorf( - "mlr: CSV header/data length mismatch %d != %d "+ - "at filename %s line %d.\n", + "mlr: CSV header/data length mismatch %d != %d at filename %s line %d", len(reader.headerStrings), len(fields), filename, reader.inputLineNumber, ) errorChannel <- err diff --git a/pkg/input/record_reader_tsv.go b/pkg/input/record_reader_tsv.go index 4db48f669..f70042bbe 100644 --- a/pkg/input/record_reader_tsv.go +++ b/pkg/input/record_reader_tsv.go @@ -186,8 +186,7 @@ func getRecordBatchExplicitTSVHeader( } else { if !reader.readerOptions.AllowRaggedCSVInput && len(reader.headerStrings) != len(fields) { err := fmt.Errorf( - "mlr: TSV header/data length mismatch %d != %d "+ - "at filename %s line %d.\n", + "mlr: TSV header/data length mismatch %d != %d at filename %s line %d", len(reader.headerStrings), len(fields), filename, reader.inputLineNumber, ) errorChannel <- err @@ -307,8 +306,7 @@ func getRecordBatchImplicitTSVHeader( } else { if !reader.readerOptions.AllowRaggedCSVInput && len(reader.headerStrings) != len(fields) { err := fmt.Errorf( - "mlr: TSV header/data length mismatch %d != %d "+ - "at filename %s line %d.\n", + "mlr: TSV header/data length mismatch %d != %d at filename %s line %d", len(reader.headerStrings), len(fields), filename, reader.inputLineNumber, ) errorChannel <- err diff --git a/pkg/lib/file_readers.go b/pkg/lib/file_readers.go index d4d3f60be..d370d616f 100644 --- a/pkg/lib/file_readers.go +++ b/pkg/lib/file_readers.go @@ -269,10 +269,10 @@ func IsUpdateableInPlace( if strings.HasPrefix(filename, "http://") || strings.HasPrefix(filename, "https://") || strings.HasPrefix(filename, "file://") { - return fmt.Errorf("http://, https://, and file:// URLs are not updateable in place.") + return fmt.Errorf("http://, https://, and file:// URLs are not updateable in place") } if prepipe != "" { - return fmt.Errorf("input with --prepipe or --prepipex is not updateable in place.") + return fmt.Errorf("input with --prepipe or --prepipex is not updateable in place") } return nil } @@ -314,7 +314,7 @@ func WrapOutputHandle( ) (io.WriteCloser, bool, error) { switch inputFileEncoding { case FileInputEncodingBzip2: - return fileWriteHandle, false, fmt.Errorf("bzip2 is not currently supported for in-place mode.") + return fileWriteHandle, false, fmt.Errorf("bzip2 is not currently supported for in-place mode") case FileInputEncodingGzip: return gzip.NewWriter(fileWriteHandle), true, nil case FileInputEncodingZlib: diff --git a/pkg/mlrval/mlrmap_accessors.go b/pkg/mlrval/mlrmap_accessors.go index caea8e2ab..9552efe6c 100644 --- a/pkg/mlrval/mlrmap_accessors.go +++ b/pkg/mlrval/mlrmap_accessors.go @@ -360,7 +360,7 @@ func (mlrmap *Mlrmap) getWithMlrvalArrayIndex(index *Mlrval) (*Mlrval, error) { } if i < n-1 { if !next.IsMap() { - return nil, fmt.Errorf("mlr: cannot multi-index non-map.") + return nil, fmt.Errorf("mlr: cannot multi-index non-map") } current = next.intf.(*Mlrmap) } else { @@ -378,7 +378,7 @@ func (mlrmap *Mlrmap) getWithMlrvalSingleIndex(index *Mlrval) (*Mlrval, error) { return mlrmap.Get(index.String()), nil } else { return nil, fmt.Errorf( - "Record/map indices must be string, int, or array thereof; got %s", index.GetTypeName(), + "record/map indices must be string, int, or array thereof; got %s", index.GetTypeName(), ) } } diff --git a/pkg/mlrval/mlrval_collections.go b/pkg/mlrval/mlrval_collections.go index 46e2d3718..2813fe099 100644 --- a/pkg/mlrval/mlrval_collections.go +++ b/pkg/mlrval/mlrval_collections.go @@ -396,9 +396,9 @@ func putIndexedOnArray( if inBounds { (*baseArray)[zindex] = rvalue.Copy() } else if mindex.intf.(int64) == 0 { - return errors.New("mlr: zero indices are not supported. Indices are 1-up.") + return errors.New("mlr: zero indices are not supported. Indices are 1-up") } else if mindex.intf.(int64) < 0 { - return errors.New("mlr: Cannot use negative indices to auto-lengthen arrays.") + return errors.New("mlr: Cannot use negative indices to auto-lengthen arrays") } else { // Array is [a,b,c] with mindices 1,2,3. Length is 3. Zindices are 0,1,2. // Given mindex is 4. @@ -431,9 +431,9 @@ func putIndexedOnArray( return (*baseArray)[zindex].PutIndexed(indices[1:], rvalue) } else if mindex.intf.(int64) == 0 { - return errors.New("mlr: zero indices are not supported. Indices are 1-up.") + return errors.New("mlr: zero indices are not supported. Indices are 1-up") } else if mindex.intf.(int64) < 0 { - return errors.New("mlr: Cannot use negative indices to auto-lengthen arrays.") + return errors.New("mlr: Cannot use negative indices to auto-lengthen arrays") } else { // Already allocated but needs to be longer LengthenMlrvalArray(baseArray, int(mindex.intf.(int64))) @@ -458,7 +458,7 @@ func (mv *Mlrval) RemoveIndexed(indices []*Mlrval) error { } else { return errors.New( - "mlr: cannot unset index variable which is neither map nor array.", + "mlr: cannot unset index variable which is neither map nor array", ) } } @@ -527,20 +527,20 @@ func removeIndexedOnArray( rightSlice := (*baseArray)[zindex+1 : len((*baseArray))] *baseArray = append(leftSlice, rightSlice...) } else if mindex.intf.(int64) == 0 { - return errors.New("mlr: zero indices are not supported. Indices are 1-up.") + return errors.New("mlr: zero indices are not supported. Indices are 1-up") } else { // TODO: improve wording - return errors.New("mlr: array index out of bounds for unset.") + return errors.New("mlr: array index out of bounds for unset") } } else { // More indices remain; recurse if inBounds { return (*baseArray)[zindex].RemoveIndexed(indices[1:]) } else if mindex.intf.(int64) == 0 { - return errors.New("mlr: zero indices are not supported. Indices are 1-up.") + return errors.New("mlr: zero indices are not supported. Indices are 1-up") } else { // TODO: improve wording - return errors.New("mlr: array index out of bounds for unset.") + return errors.New("mlr: array index out of bounds for unset") } } diff --git a/pkg/mlrval/mlrval_json.go b/pkg/mlrval/mlrval_json.go index fd7d6711e..a5dc213a7 100644 --- a/pkg/mlrval/mlrval_json.go +++ b/pkg/mlrval/mlrval_json.go @@ -105,7 +105,7 @@ func (mv *Mlrval) UnmarshalJSON(inputBytes []byte) error { decoder := json.NewDecoder(bytes.NewReader(inputBytes)) pmv, eof, err := MlrvalDecodeFromJSON(decoder) if eof { - return fmt.Errorf("mlr: JSON parser: unexpected premature EOF.") + return fmt.Errorf("mlr: JSON parser: unexpected premature EOF") } if err != nil { return err @@ -119,7 +119,7 @@ func TryUnmarshalJSON(inputBytes []byte) (pmv *Mlrval, err error) { decoder := json.NewDecoder(bytes.NewReader(inputBytes)) pmv, eof, err := MlrvalDecodeFromJSON(decoder) if eof { - err = fmt.Errorf("mlr: JSON parser: unexpected premature EOF.") + err = fmt.Errorf("mlr: JSON parser: unexpected premature EOF") } return pmv, err } @@ -196,7 +196,7 @@ func MlrvalDecodeFromJSON(decoder *json.Decoder) ( element, eof, err := MlrvalDecodeFromJSON(decoder) if eof { // xxx constify - return nil, false, fmt.Errorf("mlr: JSON parser: unexpected premature EOF.") + return nil, false, fmt.Errorf("mlr: JSON parser: unexpected premature EOF") } if err != nil { return nil, false, err @@ -211,7 +211,7 @@ func MlrvalDecodeFromJSON(decoder *json.Decoder) ( key, eof, err := MlrvalDecodeFromJSON(decoder) if eof { // xxx constify - return nil, false, fmt.Errorf("mlr: JSON parser: unexpected premature EOF.") + return nil, false, fmt.Errorf("mlr: JSON parser: unexpected premature EOF") } if err != nil { return nil, false, err @@ -219,14 +219,14 @@ func MlrvalDecodeFromJSON(decoder *json.Decoder) ( if !key.IsString() { return nil, false, fmt.Errorf( // TODO: print out what was gotten - "mlr JSON reader: object keys must be string-valued.", + "mlr JSON reader: object keys must be string-valued", ) } value, eof, err := MlrvalDecodeFromJSON(decoder) if eof { // xxx constify - return nil, false, fmt.Errorf("mlr: JSON parser: unexpected premature EOF.") + return nil, false, fmt.Errorf("mlr: JSON parser: unexpected premature EOF") } if err != nil { return nil, false, err @@ -245,7 +245,7 @@ func MlrvalDecodeFromJSON(decoder *json.Decoder) ( endToken, err := decoder.Token() if err == io.EOF { - return nil, false, fmt.Errorf("mlr: JSON parser: unexpected premature EOF.") + return nil, false, fmt.Errorf("mlr: JSON parser: unexpected premature EOF") } if err != nil { return nil, false, err diff --git a/pkg/output/file_output_handlers.go b/pkg/output/file_output_handlers.go index a5b3824ee..d47617cc0 100644 --- a/pkg/output/file_output_handlers.go +++ b/pkg/output/file_output_handlers.go @@ -300,7 +300,7 @@ func NewPipeWriteOutputHandler( ) (*FileOutputHandler, error) { writePipe, err := lib.OpenOutboundHalfPipe(commandString) if err != nil { - return nil, fmt.Errorf("could not launch command \"%s\" for pipe-to.", commandString) + return nil, fmt.Errorf(`could not launch command "%s" for pipe-to`, commandString) } return newOutputHandlerCommon( diff --git a/pkg/parsing/token/token.go b/pkg/parsing/token/token.go index 50282b2c3..69fe0404c 100644 --- a/pkg/parsing/token/token.go +++ b/pkg/parsing/token/token.go @@ -123,7 +123,7 @@ func (t *Token) Int64Value() (int64, error) { func (t *Token) UTF8Rune() (rune, error) { r, _ := utf8.DecodeRune(t.Lit) if r == utf8.RuneError { - err := fmt.Errorf("Invalid rune") + err := fmt.Errorf("invalid rune") return r, err } return r, nil diff --git a/pkg/runtime/stack.go b/pkg/runtime/stack.go index 263e5cee1..6424ebf19 100644 --- a/pkg/runtime/stack.go +++ b/pkg/runtime/stack.go @@ -407,7 +407,7 @@ func (frame *StackFrame) defineTyped( return nil } else { return fmt.Errorf( - "%s: variable %s has already been defined in the same scope.", + "%s: variable %s has already been defined in the same scope", "mlr", stackVariable.name, ) } @@ -429,7 +429,7 @@ func (frame *StackFrame) setIndexed( return frame.set(stackVariable, newval) } else { return fmt.Errorf( - "%s: map indices must be int or string; got %s.\n", + "%s: map indices must be int or string; got %s", "mlr", leadingIndex.GetTypeName(), ) } diff --git a/pkg/terminals/regtest/regtester.go b/pkg/terminals/regtest/regtester.go index 1df98f874..029ace13b 100644 --- a/pkg/terminals/regtest/regtester.go +++ b/pkg/terminals/regtest/regtester.go @@ -855,7 +855,7 @@ func (regtester *RegTester) loadEnvFile( fields := strings.SplitN(line, "=", 2) if len(fields) != 2 { return nil, fmt.Errorf( - "mlr: could not parse line \"%s\" from file \"%s\".\n", + `mlr: could not parse line "%s" from file "%s"`, line, filename, ) } @@ -892,7 +892,7 @@ func (regtester *RegTester) loadStringPairFile( fields := strings.SplitN(line, " ", 2) // TODO: split on multi-space if len(fields) != 2 { return nil, fmt.Errorf( - "mlr: could not parse line \"%s\" from file \"%s\".\n", + `mlr: could not parse line "%s" from file "%s"`, line, filename, ) } diff --git a/pkg/terminals/repl/session.go b/pkg/terminals/repl/session.go index 33fcac149..27bd3896a 100644 --- a/pkg/terminals/repl/session.go +++ b/pkg/terminals/repl/session.go @@ -268,7 +268,7 @@ func (repl *Repl) closeBufferedOutputStream() error { if repl.recordOutputStream != os.Stdout { err := repl.recordOutputStream.Close() if err != nil { - return fmt.Errorf("mlr repl: error on redirect close of %s: %v\n", + return fmt.Errorf("mlr repl: error on redirect close of %s: %v", repl.recordOutputFileName, err, ) } diff --git a/pkg/transformers/case.go b/pkg/transformers/case.go index e630c8e15..1020876e4 100644 --- a/pkg/transformers/case.go +++ b/pkg/transformers/case.go @@ -157,7 +157,7 @@ func NewTransformerCase( tr.caserFunc = cases.Title(language.Und).String default: return nil, fmt.Errorf( - "mlr %s: case option must be specified using one of -u, -l, -s, -t.", + "mlr %s: case option must be specified using one of -u, -l, -s, -t", verbNameCase, ) } diff --git a/pkg/transformers/label.go b/pkg/transformers/label.go index 033908093..3ad653595 100644 --- a/pkg/transformers/label.go +++ b/pkg/transformers/label.go @@ -104,7 +104,7 @@ func NewTransformerLabel( for _, newName := range newNames { _, ok := uniquenessChecker[newName] if ok { - return nil, fmt.Errorf("mlr label: labels must be unique; got duplicate \"%s\"\n", newName) + return nil, fmt.Errorf(`mlr label: labels must be unique; got duplicate "%s"`, newName) } uniquenessChecker[newName] = true } diff --git a/pkg/transformers/merge_fields.go b/pkg/transformers/merge_fields.go index 56fe0bd0d..f16a9d31e 100644 --- a/pkg/transformers/merge_fields.go +++ b/pkg/transformers/merge_fields.go @@ -254,7 +254,7 @@ func NewTransformerMergeFields( for _, accumulatorName := range accumulatorNameList { if !utils.ValidateStats1AccumulatorName(accumulatorName) { return nil, fmt.Errorf( - "mlr %s: accumulator \"%s\" not found.\n", + `mlr %s: accumulator "%s" not found`, verbNameMergeFields, accumulatorName, ) } diff --git a/pkg/transformers/put_or_filter.go b/pkg/transformers/put_or_filter.go index 34e2e12d9..974903345 100644 --- a/pkg/transformers/put_or_filter.go +++ b/pkg/transformers/put_or_filter.go @@ -73,7 +73,7 @@ func transformerPutOrFilterUsage( Since the expression pieces are simply concatenated, please be sure to use intervening semicolons to separate expressions.) --s name=value: Predefines out-of-stream variable @name to have +-s name=value: Predefines out-of-stream variable @name to have Thus mlr put -s foo=97 '$column += @foo' is like mlr put 'begin {@foo = 97} $column += @foo'. The value part is subject to type-inferencing. @@ -473,7 +473,7 @@ func NewTransformerPut( for _, preset := range presets { pair := strings.SplitN(preset, "=", 2) if len(pair) != 2 { - return nil, fmt.Errorf("missing \"=\" in preset expression \"%s\".", preset) + return nil, fmt.Errorf(`missing "=" in preset expression "%s"`, preset) } key := pair[0] svalue := pair[1] diff --git a/pkg/transformers/seqgen.go b/pkg/transformers/seqgen.go index 5700a4ea5..560df4ed3 100644 --- a/pkg/transformers/seqgen.go +++ b/pkg/transformers/seqgen.go @@ -156,7 +156,7 @@ func NewTransformerSeqgen( if fstart == fstop { doneComparator = bifs.BIF_equals } else { - return nil, fmt.Errorf("mlr seqgen: step must not be zero unless start == stop.") + return nil, fmt.Errorf("mlr seqgen: step must not be zero unless start == stop") } } diff --git a/pkg/transformers/stats1.go b/pkg/transformers/stats1.go index 3b26be564..1f924aa2d 100644 --- a/pkg/transformers/stats1.go +++ b/pkg/transformers/stats1.go @@ -312,7 +312,7 @@ func NewTransformerStats1( ) (*TransformerStats1, error) { for _, name := range accumulatorNameList { if !utils.ValidateStats1AccumulatorName(name) { - return nil, fmt.Errorf("mlr stats1: accumulator \"%s\" not found.", name) + return nil, fmt.Errorf(`mlr stats1: accumulator "%s" not found`, name) } } diff --git a/pkg/transformers/stats2.go b/pkg/transformers/stats2.go index 47dc60387..f1e9d94de 100644 --- a/pkg/transformers/stats2.go +++ b/pkg/transformers/stats2.go @@ -207,7 +207,7 @@ func NewTransformerStats2( ) (*TransformerStats2, error) { for _, name := range accumulatorNameList { if !utils.ValidateStats2AccumulatorName(name) { - return nil, fmt.Errorf("mlr stats2: accumulator \"%s\" not found.", name) + return nil, fmt.Errorf(`mlr stats2: accumulator "%s" not found`, name) } } diff --git a/pkg/transformers/step.go b/pkg/transformers/step.go index 7ecafaf70..fcf5d0eee 100644 --- a/pkg/transformers/step.go +++ b/pkg/transformers/step.go @@ -282,12 +282,12 @@ func NewTransformerStep( ) (*TransformerStep, error) { if len(stepperInputs) == 0 || len(valueFieldNames) == 0 { - return nil, fmt.Errorf("mlr %s: -a and -f are both required arguments.", verbNameStep) + return nil, fmt.Errorf("mlr %s: -a and -f are both required arguments", verbNameStep) } if len(stringAlphas) != 0 && len(ewmaSuffixes) != 0 { if len(ewmaSuffixes) != len(stringAlphas) { return nil, fmt.Errorf( - "mlr %s: If -d and -o are provided, their values must have the same length.", verbNameStep, + "mlr %s: If -d and -o are provided, their values must have the same length", verbNameStep, ) } } diff --git a/pkg/types/mlrval_typing.go b/pkg/types/mlrval_typing.go index 8adc5b010..4eeb60269 100644 --- a/pkg/types/mlrval_typing.go +++ b/pkg/types/mlrval_typing.go @@ -24,7 +24,7 @@ func NewTypeGatedMlrvalName( ) (*TypeGatedMlrvalName, error) { typeMask, ok := mlrval.TypeNameToMask(typeName) if !ok { - return nil, fmt.Errorf("mlr: couldn't resolve type name \"%s\".", typeName) + return nil, fmt.Errorf(`mlr: couldn't resolve type name "%s"`, typeName) } return &TypeGatedMlrvalName{ Name: name, @@ -39,7 +39,7 @@ func (tname *TypeGatedMlrvalName) Check(value *mlrval.Mlrval) error { return nil } else { return fmt.Errorf( - "mlr: couldn't assign variable %s %s from value %s %s\n", + "mlr: couldn't assign variable %s %s from value %s %s", tname.TypeName, tname.Name, value.GetTypeName(), value.String(), ) } diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index 3265d1e0c..bc366781e 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -202,7 +202,7 @@ Options: Since the expression pieces are simply concatenated, please be sure to use intervening semicolons to separate expressions.) --s name=value: Predefines out-of-stream variable @name to have +-s name=value: Predefines out-of-stream variable @name to have Thus mlr put -s foo=97 '$column += @foo' is like mlr put 'begin {@foo = 97} $column += @foo'. The value part is subject to type-inferencing. @@ -681,7 +681,7 @@ Options: Since the expression pieces are simply concatenated, please be sure to use intervening semicolons to separate expressions.) --s name=value: Predefines out-of-stream variable @name to have +-s name=value: Predefines out-of-stream variable @name to have Thus mlr put -s foo=97 '$column += @foo' is like mlr put 'begin {@foo = 97} $column += @foo'. The value part is subject to type-inferencing. diff --git a/test/cases/dsl-argpass-typedecl/0002/experr b/test/cases/dsl-argpass-typedecl/0002/experr index 49cdce4a4..49d1b3f4b 100644 --- a/test/cases/dsl-argpass-typedecl/0002/experr +++ b/test/cases/dsl-argpass-typedecl/0002/experr @@ -1 +1 @@ -mlr: couldn't assign variable int i from value float 0.34679014 +mlr: couldn't assign variable int i from value float 0.34679014 \ No newline at end of file diff --git a/test/cases/dsl-argpass-typedecl/0003/experr b/test/cases/dsl-argpass-typedecl/0003/experr index 93b3d02d8..88075a591 100644 --- a/test/cases/dsl-argpass-typedecl/0003/experr +++ b/test/cases/dsl-argpass-typedecl/0003/experr @@ -1 +1 @@ -mlr: couldn't assign variable int function return value from value float 3.79679014 +mlr: couldn't assign variable int function return value from value float 3.79679014 \ No newline at end of file diff --git a/test/cases/dsl-argpass-typedecl/0004/experr b/test/cases/dsl-argpass-typedecl/0004/experr index 8289c8c80..e3de7b5d2 100644 --- a/test/cases/dsl-argpass-typedecl/0004/experr +++ b/test/cases/dsl-argpass-typedecl/0004/experr @@ -1 +1 @@ -mlr: couldn't assign variable int function return value from value float 4.45000000 +mlr: couldn't assign variable int function return value from value float 4.45000000 \ No newline at end of file diff --git a/test/cases/dsl-argpass-typedecl/0005/experr b/test/cases/dsl-argpass-typedecl/0005/experr index 323a86f3f..22ff28ed1 100644 --- a/test/cases/dsl-argpass-typedecl/0005/experr +++ b/test/cases/dsl-argpass-typedecl/0005/experr @@ -1 +1 @@ -mlr: couldn't assign variable int function return value from value error (error) +mlr: couldn't assign variable int function return value from value error (error) \ No newline at end of file diff --git a/test/cases/dsl-argpass-typedecl/0007/experr b/test/cases/dsl-argpass-typedecl/0007/experr index 85b8be44b..49cdce4a4 100644 --- a/test/cases/dsl-argpass-typedecl/0007/experr +++ b/test/cases/dsl-argpass-typedecl/0007/experr @@ -1,2 +1 @@ mlr: couldn't assign variable int i from value float 0.34679014 - diff --git a/test/cases/dsl-argpass-typedecl/0008/experr b/test/cases/dsl-argpass-typedecl/0008/experr index 1dc1c87a0..4b9795ee1 100644 --- a/test/cases/dsl-argpass-typedecl/0008/experr +++ b/test/cases/dsl-argpass-typedecl/0008/experr @@ -1,2 +1 @@ mlr: couldn't assign variable num i from value string a - diff --git a/test/cases/dsl-array-map-indexing/0005/experr b/test/cases/dsl-array-map-indexing/0005/experr index 3f242c2b7..d2d964201 100644 --- a/test/cases/dsl-array-map-indexing/0005/experr +++ b/test/cases/dsl-array-map-indexing/0005/experr @@ -1 +1 @@ -mlr: '[[...]]' is allowed on assignment left-hand sides only when immediately preceded by '$'. +mlr: '[[...]]' is allowed on assignment left-hand sides only when immediately preceded by '$' diff --git a/test/cases/dsl-array-map-indexing/0006/experr b/test/cases/dsl-array-map-indexing/0006/experr index 40195c5a1..f6f1ef0d1 100644 --- a/test/cases/dsl-array-map-indexing/0006/experr +++ b/test/cases/dsl-array-map-indexing/0006/experr @@ -1 +1 @@ -mlr: '[[[...]]]' is allowed on assignment left-hand sides only when immediately preceded by '$'. +mlr: '[[[...]]]' is allowed on assignment left-hand sides only when immediately preceded by '$' diff --git a/test/cases/dsl-array-map-indexing/0007/experr b/test/cases/dsl-array-map-indexing/0007/experr index 3f242c2b7..d2d964201 100644 --- a/test/cases/dsl-array-map-indexing/0007/experr +++ b/test/cases/dsl-array-map-indexing/0007/experr @@ -1 +1 @@ -mlr: '[[...]]' is allowed on assignment left-hand sides only when immediately preceded by '$'. +mlr: '[[...]]' is allowed on assignment left-hand sides only when immediately preceded by '$' diff --git a/test/cases/dsl-array-map-indexing/0035/experr b/test/cases/dsl-array-map-indexing/0035/experr index 3f242c2b7..d2d964201 100644 --- a/test/cases/dsl-array-map-indexing/0035/experr +++ b/test/cases/dsl-array-map-indexing/0035/experr @@ -1 +1 @@ -mlr: '[[...]]' is allowed on assignment left-hand sides only when immediately preceded by '$'. +mlr: '[[...]]' is allowed on assignment left-hand sides only when immediately preceded by '$' diff --git a/test/cases/dsl-array-map-indexing/0036/experr b/test/cases/dsl-array-map-indexing/0036/experr index 3f242c2b7..d2d964201 100644 --- a/test/cases/dsl-array-map-indexing/0036/experr +++ b/test/cases/dsl-array-map-indexing/0036/experr @@ -1 +1 @@ -mlr: '[[...]]' is allowed on assignment left-hand sides only when immediately preceded by '$'. +mlr: '[[...]]' is allowed on assignment left-hand sides only when immediately preceded by '$' diff --git a/test/cases/dsl-array-map-indexing/0068/experr b/test/cases/dsl-array-map-indexing/0068/experr index 3f242c2b7..d2d964201 100644 --- a/test/cases/dsl-array-map-indexing/0068/experr +++ b/test/cases/dsl-array-map-indexing/0068/experr @@ -1 +1 @@ -mlr: '[[...]]' is allowed on assignment left-hand sides only when immediately preceded by '$'. +mlr: '[[...]]' is allowed on assignment left-hand sides only when immediately preceded by '$' diff --git a/test/cases/dsl-array-map-indexing/0069/experr b/test/cases/dsl-array-map-indexing/0069/experr index 40195c5a1..f6f1ef0d1 100644 --- a/test/cases/dsl-array-map-indexing/0069/experr +++ b/test/cases/dsl-array-map-indexing/0069/experr @@ -1 +1 @@ -mlr: '[[[...]]]' is allowed on assignment left-hand sides only when immediately preceded by '$'. +mlr: '[[[...]]]' is allowed on assignment left-hand sides only when immediately preceded by '$' diff --git a/test/cases/dsl-context-specific-validation/0001/experr b/test/cases/dsl-context-specific-validation/0001/experr index 153ac97b2..9ccf96101 100644 --- a/test/cases/dsl-context-specific-validation/0001/experr +++ b/test/cases/dsl-context-specific-validation/0001/experr @@ -1 +1 @@ -mlr: begin blocks can only be at top level. +mlr: begin blocks can only be at top level diff --git a/test/cases/dsl-context-specific-validation/0002/experr b/test/cases/dsl-context-specific-validation/0002/experr index 153ac97b2..9ccf96101 100644 --- a/test/cases/dsl-context-specific-validation/0002/experr +++ b/test/cases/dsl-context-specific-validation/0002/experr @@ -1 +1 @@ -mlr: begin blocks can only be at top level. +mlr: begin blocks can only be at top level diff --git a/test/cases/dsl-context-specific-validation/0003/experr b/test/cases/dsl-context-specific-validation/0003/experr index 1bf2e1cd8..2e5c850a0 100644 --- a/test/cases/dsl-context-specific-validation/0003/experr +++ b/test/cases/dsl-context-specific-validation/0003/experr @@ -1 +1 @@ -mlr: end blocks can only be at top level. +mlr: end blocks can only be at top level diff --git a/test/cases/dsl-context-specific-validation/0004/experr b/test/cases/dsl-context-specific-validation/0004/experr index 1bf2e1cd8..2e5c850a0 100644 --- a/test/cases/dsl-context-specific-validation/0004/experr +++ b/test/cases/dsl-context-specific-validation/0004/experr @@ -1 +1 @@ -mlr: end blocks can only be at top level. +mlr: end blocks can only be at top level diff --git a/test/cases/dsl-context-specific-validation/0005/experr b/test/cases/dsl-context-specific-validation/0005/experr index e8c49427b..5bfdbce14 100644 --- a/test/cases/dsl-context-specific-validation/0005/experr +++ b/test/cases/dsl-context-specific-validation/0005/experr @@ -1 +1 @@ -mlr: begin/end blocks cannot refer to records via $x, $*, etc. +mlr: begin/end blocks cannot refer to records via $x, $*, etc diff --git a/test/cases/dsl-context-specific-validation/0006/experr b/test/cases/dsl-context-specific-validation/0006/experr index e8c49427b..5bfdbce14 100644 --- a/test/cases/dsl-context-specific-validation/0006/experr +++ b/test/cases/dsl-context-specific-validation/0006/experr @@ -1 +1 @@ -mlr: begin/end blocks cannot refer to records via $x, $*, etc. +mlr: begin/end blocks cannot refer to records via $x, $*, etc diff --git a/test/cases/dsl-context-specific-validation/0007/experr b/test/cases/dsl-context-specific-validation/0007/experr index e8c49427b..5bfdbce14 100644 --- a/test/cases/dsl-context-specific-validation/0007/experr +++ b/test/cases/dsl-context-specific-validation/0007/experr @@ -1 +1 @@ -mlr: begin/end blocks cannot refer to records via $x, $*, etc. +mlr: begin/end blocks cannot refer to records via $x, $*, etc diff --git a/test/cases/dsl-context-specific-validation/0008/experr b/test/cases/dsl-context-specific-validation/0008/experr index e8c49427b..5bfdbce14 100644 --- a/test/cases/dsl-context-specific-validation/0008/experr +++ b/test/cases/dsl-context-specific-validation/0008/experr @@ -1 +1 @@ -mlr: begin/end blocks cannot refer to records via $x, $*, etc. +mlr: begin/end blocks cannot refer to records via $x, $*, etc diff --git a/test/cases/dsl-context-specific-validation/0009/experr b/test/cases/dsl-context-specific-validation/0009/experr index e8c49427b..5bfdbce14 100644 --- a/test/cases/dsl-context-specific-validation/0009/experr +++ b/test/cases/dsl-context-specific-validation/0009/experr @@ -1 +1 @@ -mlr: begin/end blocks cannot refer to records via $x, $*, etc. +mlr: begin/end blocks cannot refer to records via $x, $*, etc diff --git a/test/cases/dsl-context-specific-validation/0010/experr b/test/cases/dsl-context-specific-validation/0010/experr index e8c49427b..5bfdbce14 100644 --- a/test/cases/dsl-context-specific-validation/0010/experr +++ b/test/cases/dsl-context-specific-validation/0010/experr @@ -1 +1 @@ -mlr: begin/end blocks cannot refer to records via $x, $*, etc. +mlr: begin/end blocks cannot refer to records via $x, $*, etc diff --git a/test/cases/dsl-context-specific-validation/0011/experr b/test/cases/dsl-context-specific-validation/0011/experr index e8c49427b..5bfdbce14 100644 --- a/test/cases/dsl-context-specific-validation/0011/experr +++ b/test/cases/dsl-context-specific-validation/0011/experr @@ -1 +1 @@ -mlr: begin/end blocks cannot refer to records via $x, $*, etc. +mlr: begin/end blocks cannot refer to records via $x, $*, etc diff --git a/test/cases/dsl-context-specific-validation/0012/experr b/test/cases/dsl-context-specific-validation/0012/experr index e8c49427b..5bfdbce14 100644 --- a/test/cases/dsl-context-specific-validation/0012/experr +++ b/test/cases/dsl-context-specific-validation/0012/experr @@ -1 +1 @@ -mlr: begin/end blocks cannot refer to records via $x, $*, etc. +mlr: begin/end blocks cannot refer to records via $x, $*, etc diff --git a/test/cases/dsl-context-specific-validation/0013/experr b/test/cases/dsl-context-specific-validation/0013/experr index e8c49427b..5bfdbce14 100644 --- a/test/cases/dsl-context-specific-validation/0013/experr +++ b/test/cases/dsl-context-specific-validation/0013/experr @@ -1 +1 @@ -mlr: begin/end blocks cannot refer to records via $x, $*, etc. +mlr: begin/end blocks cannot refer to records via $x, $*, etc diff --git a/test/cases/dsl-context-specific-validation/0014/experr b/test/cases/dsl-context-specific-validation/0014/experr index e8c49427b..5bfdbce14 100644 --- a/test/cases/dsl-context-specific-validation/0014/experr +++ b/test/cases/dsl-context-specific-validation/0014/experr @@ -1 +1 @@ -mlr: begin/end blocks cannot refer to records via $x, $*, etc. +mlr: begin/end blocks cannot refer to records via $x, $*, etc diff --git a/test/cases/dsl-context-specific-validation/0015/experr b/test/cases/dsl-context-specific-validation/0015/experr index f70d4eb66..88c27a41c 100644 --- a/test/cases/dsl-context-specific-validation/0015/experr +++ b/test/cases/dsl-context-specific-validation/0015/experr @@ -1 +1 @@ -mlr: break statements are only valid within for/do/while loops. +mlr: break statements are only valid within for/do/while loops diff --git a/test/cases/dsl-context-specific-validation/0016/experr b/test/cases/dsl-context-specific-validation/0016/experr index f70d4eb66..88c27a41c 100644 --- a/test/cases/dsl-context-specific-validation/0016/experr +++ b/test/cases/dsl-context-specific-validation/0016/experr @@ -1 +1 @@ -mlr: break statements are only valid within for/do/while loops. +mlr: break statements are only valid within for/do/while loops diff --git a/test/cases/dsl-context-specific-validation/0017/experr b/test/cases/dsl-context-specific-validation/0017/experr index f70d4eb66..88c27a41c 100644 --- a/test/cases/dsl-context-specific-validation/0017/experr +++ b/test/cases/dsl-context-specific-validation/0017/experr @@ -1 +1 @@ -mlr: break statements are only valid within for/do/while loops. +mlr: break statements are only valid within for/do/while loops diff --git a/test/cases/dsl-context-specific-validation/0018/experr b/test/cases/dsl-context-specific-validation/0018/experr index f70d4eb66..88c27a41c 100644 --- a/test/cases/dsl-context-specific-validation/0018/experr +++ b/test/cases/dsl-context-specific-validation/0018/experr @@ -1 +1 @@ -mlr: break statements are only valid within for/do/while loops. +mlr: break statements are only valid within for/do/while loops diff --git a/test/cases/dsl-context-specific-validation/0024/experr b/test/cases/dsl-context-specific-validation/0024/experr index 0991fd583..326c8c258 100644 --- a/test/cases/dsl-context-specific-validation/0024/experr +++ b/test/cases/dsl-context-specific-validation/0024/experr @@ -1 +1 @@ -mlr: filter expressions must not also contain the "filter" keyword. +mlr: filter expressions must not also contain the "filter" keyword diff --git a/test/cases/dsl-context-specific-validation/0025/experr b/test/cases/dsl-context-specific-validation/0025/experr index 0991fd583..326c8c258 100644 --- a/test/cases/dsl-context-specific-validation/0025/experr +++ b/test/cases/dsl-context-specific-validation/0025/experr @@ -1 +1 @@ -mlr: filter expressions must not also contain the "filter" keyword. +mlr: filter expressions must not also contain the "filter" keyword diff --git a/test/cases/dsl-context-validation/0001/experr b/test/cases/dsl-context-validation/0001/experr index 153ac97b2..9ccf96101 100644 --- a/test/cases/dsl-context-validation/0001/experr +++ b/test/cases/dsl-context-validation/0001/experr @@ -1 +1 @@ -mlr: begin blocks can only be at top level. +mlr: begin blocks can only be at top level diff --git a/test/cases/dsl-context-validation/0002/experr b/test/cases/dsl-context-validation/0002/experr index 1bf2e1cd8..2e5c850a0 100644 --- a/test/cases/dsl-context-validation/0002/experr +++ b/test/cases/dsl-context-validation/0002/experr @@ -1 +1 @@ -mlr: end blocks can only be at top level. +mlr: end blocks can only be at top level diff --git a/test/cases/dsl-context-validation/0003/experr b/test/cases/dsl-context-validation/0003/experr index 153ac97b2..9ccf96101 100644 --- a/test/cases/dsl-context-validation/0003/experr +++ b/test/cases/dsl-context-validation/0003/experr @@ -1 +1 @@ -mlr: begin blocks can only be at top level. +mlr: begin blocks can only be at top level diff --git a/test/cases/dsl-context-validation/0004/experr b/test/cases/dsl-context-validation/0004/experr index 1bf2e1cd8..2e5c850a0 100644 --- a/test/cases/dsl-context-validation/0004/experr +++ b/test/cases/dsl-context-validation/0004/experr @@ -1 +1 @@ -mlr: end blocks can only be at top level. +mlr: end blocks can only be at top level diff --git a/test/cases/dsl-context-validation/0005/experr b/test/cases/dsl-context-validation/0005/experr index 28403d108..105a572c5 100644 --- a/test/cases/dsl-context-validation/0005/experr +++ b/test/cases/dsl-context-validation/0005/experr @@ -1 +1 @@ -mlr: func blocks can only be at top level. +mlr: func blocks can only be at top level diff --git a/test/cases/dsl-context-validation/0006/experr b/test/cases/dsl-context-validation/0006/experr index aa4913898..bb100265c 100644 --- a/test/cases/dsl-context-validation/0006/experr +++ b/test/cases/dsl-context-validation/0006/experr @@ -1 +1 @@ -mlr: subr blocks can only be at top level. +mlr: subr blocks can only be at top level diff --git a/test/cases/dsl-context-validation/0007/experr b/test/cases/dsl-context-validation/0007/experr index e8c49427b..5bfdbce14 100644 --- a/test/cases/dsl-context-validation/0007/experr +++ b/test/cases/dsl-context-validation/0007/experr @@ -1 +1 @@ -mlr: begin/end blocks cannot refer to records via $x, $*, etc. +mlr: begin/end blocks cannot refer to records via $x, $*, etc diff --git a/test/cases/dsl-context-validation/0008/experr b/test/cases/dsl-context-validation/0008/experr index 9979f9d90..62e02c192 100644 --- a/test/cases/dsl-context-validation/0008/experr +++ b/test/cases/dsl-context-validation/0008/experr @@ -1 +1 @@ -mlr: return statements are only valid within func/subr blocks. +mlr: return statements are only valid within func/subr blocks diff --git a/test/cases/dsl-context-validation/0009/experr b/test/cases/dsl-context-validation/0009/experr index f70d4eb66..88c27a41c 100644 --- a/test/cases/dsl-context-validation/0009/experr +++ b/test/cases/dsl-context-validation/0009/experr @@ -1 +1 @@ -mlr: break statements are only valid within for/do/while loops. +mlr: break statements are only valid within for/do/while loops diff --git a/test/cases/dsl-context-validation/0010/experr b/test/cases/dsl-context-validation/0010/experr index f70d4eb66..88c27a41c 100644 --- a/test/cases/dsl-context-validation/0010/experr +++ b/test/cases/dsl-context-validation/0010/experr @@ -1 +1 @@ -mlr: break statements are only valid within for/do/while loops. +mlr: break statements are only valid within for/do/while loops diff --git a/test/cases/dsl-context-validation/0011/experr b/test/cases/dsl-context-validation/0011/experr index f70d4eb66..88c27a41c 100644 --- a/test/cases/dsl-context-validation/0011/experr +++ b/test/cases/dsl-context-validation/0011/experr @@ -1 +1 @@ -mlr: break statements are only valid within for/do/while loops. +mlr: break statements are only valid within for/do/while loops diff --git a/test/cases/dsl-context-validation/0012/experr b/test/cases/dsl-context-validation/0012/experr index f70d4eb66..88c27a41c 100644 --- a/test/cases/dsl-context-validation/0012/experr +++ b/test/cases/dsl-context-validation/0012/experr @@ -1 +1 @@ -mlr: break statements are only valid within for/do/while loops. +mlr: break statements are only valid within for/do/while loops diff --git a/test/cases/dsl-empty-statements/0002/experr b/test/cases/dsl-empty-statements/0002/experr index 7d441dec9..ade7a9c32 100644 --- a/test/cases/dsl-empty-statements/0002/experr +++ b/test/cases/dsl-empty-statements/0002/experr @@ -1 +1 @@ -mlr: filter statement must not be empty. +mlr: filter statement must not be empty diff --git a/test/cases/dsl-env/0008/experr b/test/cases/dsl-env/0008/experr index c58cbb290..624b07c03 100644 --- a/test/cases/dsl-env/0008/experr +++ b/test/cases/dsl-env/0008/experr @@ -1 +1 @@ -mlr: ENV[...] cannot be indexed. +mlr: ENV[...] cannot be indexed diff --git a/test/cases/dsl-first-class-functions/sort-errors-04/experr b/test/cases/dsl-first-class-functions/sort-errors-04/experr index 41f95ed46..4a9e7afda 100644 --- a/test/cases/dsl-first-class-functions/sort-errors-04/experr +++ b/test/cases/dsl-first-class-functions/sort-errors-04/experr @@ -1,2 +1 @@ -mlr: function sort takes maximum argument count 2; got 4. - +mlr: function sort takes maximum argument count 2; got 4 diff --git a/test/cases/dsl-first-class-functions/sort-errors-05/experr b/test/cases/dsl-first-class-functions/sort-errors-05/experr index 41f95ed46..4a9e7afda 100644 --- a/test/cases/dsl-first-class-functions/sort-errors-05/experr +++ b/test/cases/dsl-first-class-functions/sort-errors-05/experr @@ -1,2 +1 @@ -mlr: function sort takes maximum argument count 2; got 4. - +mlr: function sort takes maximum argument count 2; got 4 diff --git a/test/cases/dsl-for-oosvar-loops/0005/experr b/test/cases/dsl-for-oosvar-loops/0005/experr index 69c0cca99..31b1667e3 100644 --- a/test/cases/dsl-for-oosvar-loops/0005/experr +++ b/test/cases/dsl-for-oosvar-loops/0005/experr @@ -1 +1 @@ -mlr: redefinition of variable k in the same scope. +mlr: redefinition of variable k in the same scope diff --git a/test/cases/dsl-for-oosvar-loops/0006/experr b/test/cases/dsl-for-oosvar-loops/0006/experr index 69c0cca99..31b1667e3 100644 --- a/test/cases/dsl-for-oosvar-loops/0006/experr +++ b/test/cases/dsl-for-oosvar-loops/0006/experr @@ -1 +1 @@ -mlr: redefinition of variable k in the same scope. +mlr: redefinition of variable k in the same scope diff --git a/test/cases/dsl-for-oosvar-loops/0007/experr b/test/cases/dsl-for-oosvar-loops/0007/experr index 5226846af..e26fb2c94 100644 --- a/test/cases/dsl-for-oosvar-loops/0007/experr +++ b/test/cases/dsl-for-oosvar-loops/0007/experr @@ -1 +1 @@ -mlr: redefinition of variable a in the same scope. +mlr: redefinition of variable a in the same scope diff --git a/test/cases/dsl-for-oosvar-loops/0008/experr b/test/cases/dsl-for-oosvar-loops/0008/experr index 5226846af..e26fb2c94 100644 --- a/test/cases/dsl-for-oosvar-loops/0008/experr +++ b/test/cases/dsl-for-oosvar-loops/0008/experr @@ -1 +1 @@ -mlr: redefinition of variable a in the same scope. +mlr: redefinition of variable a in the same scope diff --git a/test/cases/dsl-for-oosvar-loops/0009/experr b/test/cases/dsl-for-oosvar-loops/0009/experr index 014bc942f..0ec7e995f 100644 --- a/test/cases/dsl-for-oosvar-loops/0009/experr +++ b/test/cases/dsl-for-oosvar-loops/0009/experr @@ -1 +1 @@ -mlr: redefinition of variable b in the same scope. +mlr: redefinition of variable b in the same scope diff --git a/test/cases/dsl-for-oosvar-loops/0010/experr b/test/cases/dsl-for-oosvar-loops/0010/experr index 5226846af..e26fb2c94 100644 --- a/test/cases/dsl-for-oosvar-loops/0010/experr +++ b/test/cases/dsl-for-oosvar-loops/0010/experr @@ -1 +1 @@ -mlr: redefinition of variable a in the same scope. +mlr: redefinition of variable a in the same scope diff --git a/test/cases/dsl-for-oosvar-loops/0011/experr b/test/cases/dsl-for-oosvar-loops/0011/experr index 5226846af..e26fb2c94 100644 --- a/test/cases/dsl-for-oosvar-loops/0011/experr +++ b/test/cases/dsl-for-oosvar-loops/0011/experr @@ -1 +1 @@ -mlr: redefinition of variable a in the same scope. +mlr: redefinition of variable a in the same scope diff --git a/test/cases/dsl-for-oosvar-loops/0012/experr b/test/cases/dsl-for-oosvar-loops/0012/experr index 5226846af..e26fb2c94 100644 --- a/test/cases/dsl-for-oosvar-loops/0012/experr +++ b/test/cases/dsl-for-oosvar-loops/0012/experr @@ -1 +1 @@ -mlr: redefinition of variable a in the same scope. +mlr: redefinition of variable a in the same scope diff --git a/test/cases/dsl-for-oosvar-loops/0013/experr b/test/cases/dsl-for-oosvar-loops/0013/experr index 014bc942f..0ec7e995f 100644 --- a/test/cases/dsl-for-oosvar-loops/0013/experr +++ b/test/cases/dsl-for-oosvar-loops/0013/experr @@ -1 +1 @@ -mlr: redefinition of variable b in the same scope. +mlr: redefinition of variable b in the same scope diff --git a/test/cases/dsl-for-oosvar-loops/0014/experr b/test/cases/dsl-for-oosvar-loops/0014/experr index 014bc942f..0ec7e995f 100644 --- a/test/cases/dsl-for-oosvar-loops/0014/experr +++ b/test/cases/dsl-for-oosvar-loops/0014/experr @@ -1 +1 @@ -mlr: redefinition of variable b in the same scope. +mlr: redefinition of variable b in the same scope diff --git a/test/cases/dsl-for-oosvar-loops/0015/experr b/test/cases/dsl-for-oosvar-loops/0015/experr index 91d20ead9..b5ded9d15 100644 --- a/test/cases/dsl-for-oosvar-loops/0015/experr +++ b/test/cases/dsl-for-oosvar-loops/0015/experr @@ -1 +1 @@ -mlr: redefinition of variable c in the same scope. +mlr: redefinition of variable c in the same scope diff --git a/test/cases/dsl-for-variants/0006/experr b/test/cases/dsl-for-variants/0006/experr index 3a9ad9c69..0c3735274 100644 --- a/test/cases/dsl-for-variants/0006/experr +++ b/test/cases/dsl-for-variants/0006/experr @@ -1 +1 @@ -mlr: the triple-for continuation statement must be a bare boolean. +mlr: the triple-for continuation statement must be a bare boolean diff --git a/test/cases/dsl-for-variants/0008/experr b/test/cases/dsl-for-variants/0008/experr index 8c456cbf6..8e4a46c2e 100644 --- a/test/cases/dsl-for-variants/0008/experr +++ b/test/cases/dsl-for-variants/0008/experr @@ -1 +1 @@ -mlr: the final triple-for continuation statement must be a bare boolean. +mlr: the final triple-for continuation statement must be a bare boolean diff --git a/test/cases/dsl-for-variants/0009/experr b/test/cases/dsl-for-variants/0009/experr index aeff07c4a..f6e32effe 100644 --- a/test/cases/dsl-for-variants/0009/experr +++ b/test/cases/dsl-for-variants/0009/experr @@ -1 +1 @@ -mlr: the non-final triple-for continuation statements must be assignments. +mlr: the non-final triple-for continuation statements must be assignments diff --git a/test/cases/dsl-for-variants/0010/experr b/test/cases/dsl-for-variants/0010/experr index aeff07c4a..f6e32effe 100644 --- a/test/cases/dsl-for-variants/0010/experr +++ b/test/cases/dsl-for-variants/0010/experr @@ -1 +1 @@ -mlr: the non-final triple-for continuation statements must be assignments. +mlr: the non-final triple-for continuation statements must be assignments diff --git a/test/cases/dsl-forbind-typedecl/0002/experr b/test/cases/dsl-forbind-typedecl/0002/experr index 870536d9d..2d3ce42b4 100644 --- a/test/cases/dsl-forbind-typedecl/0002/experr +++ b/test/cases/dsl-forbind-typedecl/0002/experr @@ -1,2 +1 @@ mlr: couldn't assign variable float i from value int 0 - diff --git a/test/cases/dsl-forbind-typedecl/0004/experr b/test/cases/dsl-forbind-typedecl/0004/experr index f19d3e91b..350883003 100644 --- a/test/cases/dsl-forbind-typedecl/0004/experr +++ b/test/cases/dsl-forbind-typedecl/0004/experr @@ -1,2 +1 @@ mlr: couldn't assign variable int i from value float 1.50000000 - diff --git a/test/cases/dsl-forbind-typedecl/0005/experr b/test/cases/dsl-forbind-typedecl/0005/experr index 02ec7d367..bc277f19f 100644 --- a/test/cases/dsl-forbind-typedecl/0005/experr +++ b/test/cases/dsl-forbind-typedecl/0005/experr @@ -1,2 +1 @@ mlr: couldn't assign variable int i from value float 1.00000000 - diff --git a/test/cases/dsl-lashed-emitp-singles/0075/experr b/test/cases/dsl-lashed-emitp-singles/0075/experr index abe43d98a..c90eb2309 100644 --- a/test/cases/dsl-lashed-emitp-singles/0075/experr +++ b/test/cases/dsl-lashed-emitp-singles/0075/experr @@ -1 +1 @@ -mlr: lashed-emit node types must be local variables, field names, or oosvars; got map literal. +mlr: lashed-emit node types must be local variables, field names, or oosvars; got map literal diff --git a/test/cases/dsl-line-number-column-number/cond/experr b/test/cases/dsl-line-number-column-number/cond/experr index 7d9b1ed3f..0993a5d51 100644 --- a/test/cases/dsl-line-number-column-number/cond/experr +++ b/test/cases/dsl-line-number-column-number/cond/experr @@ -1 +1 @@ -mlr: conditional expression did not evaluate to boolean at DSL expression line 5 column 3. +mlr: conditional expression did not evaluate to boolean at DSL expression line 5 column 3 diff --git a/test/cases/dsl-line-number-column-number/do-while/experr b/test/cases/dsl-line-number-column-number/do-while/experr index 2ae50c49a..4b2d5dfa8 100644 --- a/test/cases/dsl-line-number-column-number/do-while/experr +++ b/test/cases/dsl-line-number-column-number/do-while/experr @@ -1 +1 @@ -mlr: conditional expression did not evaluate to boolean at DSL expression line 6 column 12. +mlr: conditional expression did not evaluate to boolean at DSL expression line 6 column 12 diff --git a/test/cases/dsl-line-number-column-number/for/experr b/test/cases/dsl-line-number-column-number/for/experr index a99b7edd3..0c385cf26 100644 --- a/test/cases/dsl-line-number-column-number/for/experr +++ b/test/cases/dsl-line-number-column-number/for/experr @@ -1 +1 @@ -mlr: for-loop continuation did not evaluate to boolean at DSL expression line 5 column 9. +mlr: for-loop continuation did not evaluate to boolean at DSL expression line 5 column 9 diff --git a/test/cases/dsl-line-number-column-number/if/experr b/test/cases/dsl-line-number-column-number/if/experr index 2c953ec07..8b4f058c6 100644 --- a/test/cases/dsl-line-number-column-number/if/experr +++ b/test/cases/dsl-line-number-column-number/if/experr @@ -1 +1 @@ -mlr: conditional expression did not evaluate to boolean at DSL expression line 5 column 7. +mlr: conditional expression did not evaluate to boolean at DSL expression line 5 column 7 diff --git a/test/cases/dsl-line-number-column-number/while/experr b/test/cases/dsl-line-number-column-number/while/experr index 977d70e73..eb2268d9f 100644 --- a/test/cases/dsl-line-number-column-number/while/experr +++ b/test/cases/dsl-line-number-column-number/while/experr @@ -1 +1 @@ -mlr: conditional expression did not evaluate to boolean at DSL expression line 5 column 10. +mlr: conditional expression did not evaluate to boolean at DSL expression line 5 column 10 diff --git a/test/cases/dsl-local-map-variable-typedecl/0003/experr b/test/cases/dsl-local-map-variable-typedecl/0003/experr index 74d6d8036..35f4a78af 100644 --- a/test/cases/dsl-local-map-variable-typedecl/0003/experr +++ b/test/cases/dsl-local-map-variable-typedecl/0003/experr @@ -1,2 +1 @@ mlr: couldn't assign variable map a from value int 2 - diff --git a/test/cases/dsl-local-map-variable-typedecl/0004/experr b/test/cases/dsl-local-map-variable-typedecl/0004/experr index 74d6d8036..35f4a78af 100644 --- a/test/cases/dsl-local-map-variable-typedecl/0004/experr +++ b/test/cases/dsl-local-map-variable-typedecl/0004/experr @@ -1,2 +1 @@ mlr: couldn't assign variable map a from value int 2 - diff --git a/test/cases/dsl-localvar-typedecl/0002/experr b/test/cases/dsl-localvar-typedecl/0002/experr index 7bf2edfba..80caea3e4 100644 --- a/test/cases/dsl-localvar-typedecl/0002/experr +++ b/test/cases/dsl-localvar-typedecl/0002/experr @@ -1,2 +1 @@ mlr: couldn't assign variable str a from value int 1 - diff --git a/test/cases/dsl-localvar-typedecl/0003/experr b/test/cases/dsl-localvar-typedecl/0003/experr index f2baa162e..89b17ca3e 100644 --- a/test/cases/dsl-localvar-typedecl/0003/experr +++ b/test/cases/dsl-localvar-typedecl/0003/experr @@ -1,2 +1 @@ mlr: couldn't assign variable int a from value string pan - diff --git a/test/cases/dsl-map-funcs/0003/experr b/test/cases/dsl-map-funcs/0003/experr index b46352b1b..aebcb76c3 100644 --- a/test/cases/dsl-map-funcs/0003/experr +++ b/test/cases/dsl-map-funcs/0003/experr @@ -1,2 +1 @@ -mlr: function mapexcept takes minimum argument count 1; got 0. - +mlr: function mapexcept takes minimum argument count 1; got 0 diff --git a/test/cases/dsl-map-funcs/0004/experr b/test/cases/dsl-map-funcs/0004/experr index cdbfc0f29..a177b4adc 100644 --- a/test/cases/dsl-map-funcs/0004/experr +++ b/test/cases/dsl-map-funcs/0004/experr @@ -1,2 +1 @@ -mlr: function mapselect takes minimum argument count 1; got 0. - +mlr: function mapselect takes minimum argument count 1; got 0 diff --git a/test/cases/dsl-mapsum-mapdiff-mapexcept/0003/experr b/test/cases/dsl-mapsum-mapdiff-mapexcept/0003/experr index b46352b1b..aebcb76c3 100644 --- a/test/cases/dsl-mapsum-mapdiff-mapexcept/0003/experr +++ b/test/cases/dsl-mapsum-mapdiff-mapexcept/0003/experr @@ -1,2 +1 @@ -mlr: function mapexcept takes minimum argument count 1; got 0. - +mlr: function mapexcept takes minimum argument count 1; got 0 diff --git a/test/cases/dsl-mapsum-mapdiff-mapexcept/0004/experr b/test/cases/dsl-mapsum-mapdiff-mapexcept/0004/experr index cdbfc0f29..a177b4adc 100644 --- a/test/cases/dsl-mapsum-mapdiff-mapexcept/0004/experr +++ b/test/cases/dsl-mapsum-mapdiff-mapexcept/0004/experr @@ -1,2 +1 @@ -mlr: function mapselect takes minimum argument count 1; got 0. - +mlr: function mapselect takes minimum argument count 1; got 0 diff --git a/test/cases/dsl-mapvar-assignments/0050/experr b/test/cases/dsl-mapvar-assignments/0050/experr index 7da47746e..c99edb2b9 100644 --- a/test/cases/dsl-mapvar-assignments/0050/experr +++ b/test/cases/dsl-mapvar-assignments/0050/experr @@ -1,2 +1 @@ mlr: couldn't assign variable map o from value int 1 - diff --git a/test/cases/dsl-mapvar-assignments/0056/experr b/test/cases/dsl-mapvar-assignments/0056/experr index 7da47746e..c99edb2b9 100644 --- a/test/cases/dsl-mapvar-assignments/0056/experr +++ b/test/cases/dsl-mapvar-assignments/0056/experr @@ -1,2 +1 @@ mlr: couldn't assign variable map o from value int 1 - diff --git a/test/cases/dsl-mapvars-udfs-subroutines/0006/experr b/test/cases/dsl-mapvars-udfs-subroutines/0006/experr index d99ee6563..e82e26c86 100644 --- a/test/cases/dsl-mapvars-udfs-subroutines/0006/experr +++ b/test/cases/dsl-mapvars-udfs-subroutines/0006/experr @@ -1 +1 @@ -mlr: couldn't assign variable int x from value float 0.34679014 +mlr: couldn't assign variable int x from value float 0.34679014 \ No newline at end of file diff --git a/test/cases/dsl-mapvars-udfs-subroutines/0008/experr b/test/cases/dsl-mapvars-udfs-subroutines/0008/experr index d99ee6563..e82e26c86 100644 --- a/test/cases/dsl-mapvars-udfs-subroutines/0008/experr +++ b/test/cases/dsl-mapvars-udfs-subroutines/0008/experr @@ -1 +1 @@ -mlr: couldn't assign variable int x from value float 0.34679014 +mlr: couldn't assign variable int x from value float 0.34679014 \ No newline at end of file diff --git a/test/cases/dsl-mapvars-udfs-subroutines/0010/experr b/test/cases/dsl-mapvars-udfs-subroutines/0010/experr index d99ee6563..e82e26c86 100644 --- a/test/cases/dsl-mapvars-udfs-subroutines/0010/experr +++ b/test/cases/dsl-mapvars-udfs-subroutines/0010/experr @@ -1 +1 @@ -mlr: couldn't assign variable int x from value float 0.34679014 +mlr: couldn't assign variable int x from value float 0.34679014 \ No newline at end of file diff --git a/test/cases/dsl-mapvars-udfs-subroutines/0011/experr b/test/cases/dsl-mapvars-udfs-subroutines/0011/experr index 5ee09d0dc..23c61d240 100644 --- a/test/cases/dsl-mapvars-udfs-subroutines/0011/experr +++ b/test/cases/dsl-mapvars-udfs-subroutines/0011/experr @@ -1 +1 @@ -mlr: couldn't assign variable int function return value from value absent (absent) +mlr: couldn't assign variable int function return value from value absent (absent) \ No newline at end of file diff --git a/test/cases/dsl-mapvars-udfs-subroutines/0012/experr b/test/cases/dsl-mapvars-udfs-subroutines/0012/experr index 75f9941ea..b52b23036 100644 --- a/test/cases/dsl-mapvars-udfs-subroutines/0012/experr +++ b/test/cases/dsl-mapvars-udfs-subroutines/0012/experr @@ -1,2 +1 @@ mlr: couldn't assign variable var b from value error (error) - diff --git a/test/cases/dsl-no-filter-in-filter/0002/experr b/test/cases/dsl-no-filter-in-filter/0002/experr index 0991fd583..326c8c258 100644 --- a/test/cases/dsl-no-filter-in-filter/0002/experr +++ b/test/cases/dsl-no-filter-in-filter/0002/experr @@ -1 +1 @@ -mlr: filter expressions must not also contain the "filter" keyword. +mlr: filter expressions must not also contain the "filter" keyword diff --git a/test/cases/dsl-parse/0112/experr b/test/cases/dsl-parse/0112/experr index c83d31e2e..5e6a6bb14 100644 --- a/test/cases/dsl-parse/0112/experr +++ b/test/cases/dsl-parse/0112/experr @@ -1 +1 @@ -mlr: int literal is not valid for unset statement. +mlr: int literal is not valid for unset statement diff --git a/test/cases/dsl-subr/0007/experr b/test/cases/dsl-subr/0007/experr index 57f65d06e..54423defa 100644 --- a/test/cases/dsl-subr/0007/experr +++ b/test/cases/dsl-subr/0007/experr @@ -1 +1 @@ -mlr: return statements in subr blocks must not return a value. +mlr: return statements in subr blocks must not return a value diff --git a/test/cases/dsl-subr/0015/experr b/test/cases/dsl-subr/0015/experr index 96372b783..374f3bf05 100644 --- a/test/cases/dsl-subr/0015/experr +++ b/test/cases/dsl-subr/0015/experr @@ -1 +1 @@ -mlr: subroutine named "s" has already been defined. +mlr: subroutine named "s" has already been defined diff --git a/test/cases/dsl-triple-for-loops/0015/experr b/test/cases/dsl-triple-for-loops/0015/experr index 3a9ad9c69..0c3735274 100644 --- a/test/cases/dsl-triple-for-loops/0015/experr +++ b/test/cases/dsl-triple-for-loops/0015/experr @@ -1 +1 @@ -mlr: the triple-for continuation statement must be a bare boolean. +mlr: the triple-for continuation statement must be a bare boolean diff --git a/test/cases/dsl-triple-for-loops/0016/experr b/test/cases/dsl-triple-for-loops/0016/experr index aeff07c4a..f6e32effe 100644 --- a/test/cases/dsl-triple-for-loops/0016/experr +++ b/test/cases/dsl-triple-for-loops/0016/experr @@ -1 +1 @@ -mlr: the non-final triple-for continuation statements must be assignments. +mlr: the non-final triple-for continuation statements must be assignments diff --git a/test/cases/dsl-typedecl/0004/experr b/test/cases/dsl-typedecl/0004/experr index 698f98f19..d773f96b4 100644 --- a/test/cases/dsl-typedecl/0004/experr +++ b/test/cases/dsl-typedecl/0004/experr @@ -1,2 +1 @@ mlr: couldn't assign variable str x from value int 3 - diff --git a/test/cases/dsl-typedecl/0005/experr b/test/cases/dsl-typedecl/0005/experr index d1a782622..a9603769f 100644 --- a/test/cases/dsl-typedecl/0005/experr +++ b/test/cases/dsl-typedecl/0005/experr @@ -1,2 +1 @@ mlr: couldn't assign variable arr x from value int 3 - diff --git a/test/cases/dsl-typedecl/0009/experr b/test/cases/dsl-typedecl/0009/experr index d773f96b4..4294ddc2e 100644 --- a/test/cases/dsl-typedecl/0009/experr +++ b/test/cases/dsl-typedecl/0009/experr @@ -1 +1 @@ -mlr: couldn't assign variable str x from value int 3 +mlr: couldn't assign variable str x from value int 3 \ No newline at end of file diff --git a/test/cases/dsl-typedecl/0010/experr b/test/cases/dsl-typedecl/0010/experr index a9603769f..55d3b8e4d 100644 --- a/test/cases/dsl-typedecl/0010/experr +++ b/test/cases/dsl-typedecl/0010/experr @@ -1 +1 @@ -mlr: couldn't assign variable arr x from value int 3 +mlr: couldn't assign variable arr x from value int 3 \ No newline at end of file diff --git a/test/cases/dsl-typedecl/0014/experr b/test/cases/dsl-typedecl/0014/experr index 83d5d73ae..84cc0ab08 100644 --- a/test/cases/dsl-typedecl/0014/experr +++ b/test/cases/dsl-typedecl/0014/experr @@ -1 +1 @@ -mlr: couldn't assign variable str function return value from value int 6 +mlr: couldn't assign variable str function return value from value int 6 \ No newline at end of file diff --git a/test/cases/dsl-typedecl/0015/experr b/test/cases/dsl-typedecl/0015/experr index 16aa9509e..2e3bd9694 100644 --- a/test/cases/dsl-typedecl/0015/experr +++ b/test/cases/dsl-typedecl/0015/experr @@ -1 +1 @@ -mlr: couldn't assign variable arr function return value from value int 6 +mlr: couldn't assign variable arr function return value from value int 6 \ No newline at end of file diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0004/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0004/experr index 57f65d06e..54423defa 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0004/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0004/experr @@ -1 +1 @@ -mlr: return statements in subr blocks must not return a value. +mlr: return statements in subr blocks must not return a value diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0005/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0005/experr index 6ffaf460f..8a639f9e1 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0005/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0005/experr @@ -1 +1 @@ -mlr: return statements in func blocks must return a value. +mlr: return statements in func blocks must return a value diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0008/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0008/experr index 7c7da7e52..17bba472c 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0008/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0008/experr @@ -1 +1 @@ -mlr: function named "log" must not override a built-in function of the same name. +mlr: function named "log" must not override a built-in function of the same name diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0010/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0010/experr index 153ac97b2..9ccf96101 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0010/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0010/experr @@ -1 +1 @@ -mlr: begin blocks can only be at top level. +mlr: begin blocks can only be at top level diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0011/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0011/experr index 1bf2e1cd8..2e5c850a0 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0011/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0011/experr @@ -1 +1 @@ -mlr: end blocks can only be at top level. +mlr: end blocks can only be at top level diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0012/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0012/experr index 153ac97b2..9ccf96101 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0012/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0012/experr @@ -1 +1 @@ -mlr: begin blocks can only be at top level. +mlr: begin blocks can only be at top level diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0013/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0013/experr index 1bf2e1cd8..2e5c850a0 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0013/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0013/experr @@ -1 +1 @@ -mlr: end blocks can only be at top level. +mlr: end blocks can only be at top level diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0014/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0014/experr index 28403d108..105a572c5 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0014/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0014/experr @@ -1 +1 @@ -mlr: func blocks can only be at top level. +mlr: func blocks can only be at top level diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0015/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0015/experr index aa4913898..bb100265c 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0015/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0015/experr @@ -1 +1 @@ -mlr: subr blocks can only be at top level. +mlr: subr blocks can only be at top level diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0016/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0016/experr index 28403d108..105a572c5 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0016/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0016/experr @@ -1 +1 @@ -mlr: func blocks can only be at top level. +mlr: func blocks can only be at top level diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0017/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0017/experr index aa4913898..bb100265c 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0017/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0017/experr @@ -1 +1 @@ -mlr: subr blocks can only be at top level. +mlr: subr blocks can only be at top level diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0018/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0018/experr index 153ac97b2..9ccf96101 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0018/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0018/experr @@ -1 +1 @@ -mlr: begin blocks can only be at top level. +mlr: begin blocks can only be at top level diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0019/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0019/experr index 1bf2e1cd8..2e5c850a0 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0019/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0019/experr @@ -1 +1 @@ -mlr: end blocks can only be at top level. +mlr: end blocks can only be at top level diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0020/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0020/experr index 153ac97b2..9ccf96101 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0020/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0020/experr @@ -1 +1 @@ -mlr: begin blocks can only be at top level. +mlr: begin blocks can only be at top level diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0021/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0021/experr index 1bf2e1cd8..2e5c850a0 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0021/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0021/experr @@ -1 +1 @@ -mlr: end blocks can only be at top level. +mlr: end blocks can only be at top level diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0022/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0022/experr index 28403d108..105a572c5 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0022/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0022/experr @@ -1 +1 @@ -mlr: func blocks can only be at top level. +mlr: func blocks can only be at top level diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0023/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0023/experr index aa4913898..bb100265c 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0023/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0023/experr @@ -1 +1 @@ -mlr: subr blocks can only be at top level. +mlr: subr blocks can only be at top level diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0024/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0024/experr index 28403d108..105a572c5 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0024/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0024/experr @@ -1 +1 @@ -mlr: func blocks can only be at top level. +mlr: func blocks can only be at top level diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0025/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0025/experr index aa4913898..bb100265c 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0025/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0025/experr @@ -1 +1 @@ -mlr: subr blocks can only be at top level. +mlr: subr blocks can only be at top level diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0026/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0026/experr index 7c7da7e52..17bba472c 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0026/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0026/experr @@ -1 +1 @@ -mlr: function named "log" must not override a built-in function of the same name. +mlr: function named "log" must not override a built-in function of the same name diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0027/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0027/experr index c1c50ef4d..e7f7b7522 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0027/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0027/experr @@ -1 +1 @@ -mlr: function named "f" has already been defined. +mlr: function named "f" has already been defined diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0028/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0028/experr index 96372b783..374f3bf05 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0028/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0028/experr @@ -1 +1 @@ -mlr: subroutine named "s" has already been defined. +mlr: subroutine named "s" has already been defined diff --git a/test/cases/dsl-user-defined-functions-and-subroutines/0029/experr b/test/cases/dsl-user-defined-functions-and-subroutines/0029/experr index 96372b783..374f3bf05 100644 --- a/test/cases/dsl-user-defined-functions-and-subroutines/0029/experr +++ b/test/cases/dsl-user-defined-functions-and-subroutines/0029/experr @@ -1 +1 @@ -mlr: subroutine named "s" has already been defined. +mlr: subroutine named "s" has already been defined diff --git a/test/cases/io-spec-tsv/0004/experr b/test/cases/io-spec-tsv/0004/experr index 77ead78b2..a996791ac 100644 --- a/test/cases/io-spec-tsv/0004/experr +++ b/test/cases/io-spec-tsv/0004/experr @@ -1,2 +1 @@ -mlr: mlr: TSV header/data length mismatch 1 != 0 at filename test/cases/io-spec-tsv/0004/single-column-with-blank.tsv line 4. -. +mlr: mlr: TSV header/data length mismatch 1 != 0 at filename test/cases/io-spec-tsv/0004/single-column-with-blank.tsv line 4. diff --git a/test/cases/verb-label/0009/experr b/test/cases/verb-label/0009/experr index 615cfe2bf..0933a079d 100644 --- a/test/cases/verb-label/0009/experr +++ b/test/cases/verb-label/0009/experr @@ -1 +1 @@ -mlr label: labels must be unique; got duplicate "d" +mlr label: labels must be unique; got duplicate "d" \ No newline at end of file diff --git a/test/cases/verb-stats1/0018/experr b/test/cases/verb-stats1/0018/experr index 03f44410a..43dafc363 100644 --- a/test/cases/verb-stats1/0018/experr +++ b/test/cases/verb-stats1/0018/experr @@ -1 +1 @@ -mlr stats1: accumulator "nonesuch" not found. +mlr stats1: accumulator "nonesuch" not found From cc8a3c4b4e5813b9df50229733bded0ff7bf0f87 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 27 Oct 2024 11:50:15 -0400 Subject: [PATCH 266/456] Static-check fixes from @lespea #1657, batch 2/n (#1704) --- pkg/bifs/arithmetic.go | 4 ++-- pkg/bifs/cmp.go | 2 +- pkg/bifs/types.go | 6 +++--- pkg/climain/mlrcli_parse.go | 2 +- pkg/dsl/cst/builtin_function_manager.go | 2 +- pkg/dsl/cst/builtin_functions.go | 2 +- pkg/dsl/cst/cond.go | 2 +- pkg/dsl/cst/for.go | 2 +- pkg/dsl/cst/if.go | 2 +- pkg/dsl/cst/while.go | 4 ++-- pkg/lib/util.go | 2 +- pkg/mlrval/mlrval_is.go | 4 ++-- pkg/mlrval/mlrval_new.go | 2 +- pkg/mlrval/mlrval_output.go | 2 +- pkg/terminals/repl/verbs.go | 2 +- pkg/transformers/aaa_chain_transformer.go | 2 +- pkg/transformers/put_or_filter.go | 2 +- 17 files changed, 22 insertions(+), 22 deletions(-) diff --git a/pkg/bifs/arithmetic.go b/pkg/bifs/arithmetic.go index 1edf7cee4..1ce24544a 100644 --- a/pkg/bifs/arithmetic.go +++ b/pkg/bifs/arithmetic.go @@ -834,7 +834,7 @@ func min_i_ii(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { // a=F | min=a min=a // a=T | min=b min=b func min_b_bb(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { - if input1.AcquireBoolValue() == false { + if !input1.AcquireBoolValue() { return input1 } else { return input2 @@ -1004,7 +1004,7 @@ func max_i_ii(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { // a=F | max=a max=b // a=T | max=a max=b func max_b_bb(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { - if input2.AcquireBoolValue() == false { + if !input2.AcquireBoolValue() { return input1 } else { return input2 diff --git a/pkg/bifs/cmp.go b/pkg/bifs/cmp.go index b4603d2a8..3be517990 100644 --- a/pkg/bifs/cmp.go +++ b/pkg/bifs/cmp.go @@ -246,7 +246,7 @@ func eq_b_aa(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { for i := range a { eq := BIF_equals(a[i], b[i]) lib.InternalCodingErrorIf(eq.Type() != mlrval.MT_BOOL) - if eq.AcquireBoolValue() == false { + if !eq.AcquireBoolValue() { return mlrval.FALSE } } diff --git a/pkg/bifs/types.go b/pkg/bifs/types.go index a4eb1f230..d3e8d61ef 100644 --- a/pkg/bifs/types.go +++ b/pkg/bifs/types.go @@ -37,7 +37,7 @@ func float_to_int(input1 *mlrval.Mlrval) *mlrval.Mlrval { } func bool_to_int(input1 *mlrval.Mlrval) *mlrval.Mlrval { - if input1.AcquireBoolValue() == true { + if input1.AcquireBoolValue() { return mlrval.FromInt(1) } else { return mlrval.FromInt(0) @@ -92,7 +92,7 @@ func float_to_int_with_base(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { } func bool_to_int_with_base(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { - if input1.AcquireBoolValue() == true { + if input1.AcquireBoolValue() { return mlrval.FromInt(1) } else { return mlrval.FromInt(0) @@ -146,7 +146,7 @@ func int_to_float(input1 *mlrval.Mlrval) *mlrval.Mlrval { } func bool_to_float(input1 *mlrval.Mlrval) *mlrval.Mlrval { - if input1.AcquireBoolValue() == true { + if input1.AcquireBoolValue() { return mlrval.FromFloat(1.0) } else { return mlrval.FromFloat(0.0) diff --git a/pkg/climain/mlrcli_parse.go b/pkg/climain/mlrcli_parse.go index 364195824..f582cd51c 100644 --- a/pkg/climain/mlrcli_parse.go +++ b/pkg/climain/mlrcli_parse.go @@ -295,7 +295,7 @@ func parseCommandLinePassTwo( rc := cli.FLAG_TABLE.Parse(args, argc, &argi, options) // Should have been parsed OK in pass one. - lib.InternalCodingErrorIf(rc != true) + lib.InternalCodingErrorIf(!rc) // Make sure we consumed the entire flag sequence as parsed by pass one. lib.InternalCodingErrorIf(argi != argc) } diff --git a/pkg/dsl/cst/builtin_function_manager.go b/pkg/dsl/cst/builtin_function_manager.go index e35ee8b0b..4a1d83bfa 100644 --- a/pkg/dsl/cst/builtin_function_manager.go +++ b/pkg/dsl/cst/builtin_function_manager.go @@ -2595,7 +2595,7 @@ func (manager *BuiltinFunctionManager) getBuiltinFunctionClasses() []string { classesList := make([]string, 0) for _, builtinFunctionInfo := range *manager.lookupTable { class := string(builtinFunctionInfo.class) - if classesSeen[class] == false { + if !classesSeen[class] { classesList = append(classesList, class) classesSeen[class] = true } diff --git a/pkg/dsl/cst/builtin_functions.go b/pkg/dsl/cst/builtin_functions.go index 3b9fa12f3..e3558d1cd 100644 --- a/pkg/dsl/cst/builtin_functions.go +++ b/pkg/dsl/cst/builtin_functions.go @@ -945,7 +945,7 @@ func (node *StandardTernaryOperatorNode) Evaluate( } // Short-circuit: defer evaluation unless needed - if boolValue == true { + if boolValue { return node.b.Evaluate(state) } else { return node.c.Evaluate(state) diff --git a/pkg/dsl/cst/cond.go b/pkg/dsl/cst/cond.go index 53532d925..52dd30dea 100644 --- a/pkg/dsl/cst/cond.go +++ b/pkg/dsl/cst/cond.go @@ -66,7 +66,7 @@ func (node *CondBlockNode) Execute( ) } - if boolValue == true { + if boolValue { blockExitPayload, err := node.statementBlockNode.Execute(state) if err != nil { return nil, err diff --git a/pkg/dsl/cst/for.go b/pkg/dsl/cst/for.go index 204505aba..074f3b500 100644 --- a/pkg/dsl/cst/for.go +++ b/pkg/dsl/cst/for.go @@ -902,7 +902,7 @@ func (node *TripleForLoopNode) Execute(state *runtime.State) (*BlockExitPayload, dsl.TokenToLocationInfo(node.continuationExpressionToken), ) } - if boolValue == false { + if !boolValue { break } } diff --git a/pkg/dsl/cst/if.go b/pkg/dsl/cst/if.go index 3396ddd19..b85f68119 100644 --- a/pkg/dsl/cst/if.go +++ b/pkg/dsl/cst/if.go @@ -134,7 +134,7 @@ func (node *IfChainNode) Execute(state *runtime.State) (*BlockExitPayload, error dsl.TokenToLocationInfo(ifItem.conditionToken), ) } - if boolValue == true { + if boolValue { blockExitPayload, err := ifItem.statementBlockNode.Execute(state) if err != nil { return nil, err diff --git a/pkg/dsl/cst/while.go b/pkg/dsl/cst/while.go index 2e2fd5f12..ccdde0052 100644 --- a/pkg/dsl/cst/while.go +++ b/pkg/dsl/cst/while.go @@ -64,7 +64,7 @@ func (node *WhileLoopNode) Execute(state *runtime.State) (*BlockExitPayload, err dsl.TokenToLocationInfo(node.conditionToken), ) } - if boolValue != true { + if !boolValue { break } blockExitPayload, err := node.statementBlockNode.Execute(state) @@ -161,7 +161,7 @@ func (node *DoWhileLoopNode) Execute(state *runtime.State) (*BlockExitPayload, e dsl.TokenToLocationInfo(node.conditionToken), ) } - if boolValue == false { + if !boolValue { break } } diff --git a/pkg/lib/util.go b/pkg/lib/util.go index 37e90ce46..f37194ccb 100644 --- a/pkg/lib/util.go +++ b/pkg/lib/util.go @@ -14,7 +14,7 @@ func BooleanXOR(a, b bool) bool { } func BoolToInt(b bool) int64 { - if b == false { + if !b { return 0 } else { return 1 diff --git a/pkg/mlrval/mlrval_is.go b/pkg/mlrval/mlrval_is.go index 7f438871f..0cdfdadde 100644 --- a/pkg/mlrval/mlrval_is.go +++ b/pkg/mlrval/mlrval_is.go @@ -112,10 +112,10 @@ func (mv *Mlrval) IsBool() bool { } func (mv *Mlrval) IsTrue() bool { - return mv.Type() == MT_BOOL && mv.intf.(bool) == true + return mv.Type() == MT_BOOL && mv.intf.(bool) } func (mv *Mlrval) IsFalse() bool { - return mv.Type() == MT_BOOL && mv.intf.(bool) == false + return mv.Type() == MT_BOOL && !mv.intf.(bool) } func (mv *Mlrval) IsArray() bool { diff --git a/pkg/mlrval/mlrval_new.go b/pkg/mlrval/mlrval_new.go index bcad5590b..9d63989b1 100644 --- a/pkg/mlrval/mlrval_new.go +++ b/pkg/mlrval/mlrval_new.go @@ -289,7 +289,7 @@ func FromPrevalidatedFloatString(input string, floatval float64) *Mlrval { } func FromBool(input bool) *Mlrval { - if input == true { + if input { return TRUE } else { return FALSE diff --git a/pkg/mlrval/mlrval_output.go b/pkg/mlrval/mlrval_output.go index d864806b3..a72760998 100644 --- a/pkg/mlrval/mlrval_output.go +++ b/pkg/mlrval/mlrval_output.go @@ -87,7 +87,7 @@ func (mv *Mlrval) setPrintRep() { mv.printrep = strconv.FormatFloat(mv.intf.(float64), 'f', -1, 64) case MT_BOOL: - if mv.intf.(bool) == true { + if mv.intf.(bool) { mv.printrep = "true" } else { mv.printrep = "false" diff --git a/pkg/terminals/repl/verbs.go b/pkg/terminals/repl/verbs.go index bbb1a36b2..3b901b78b 100644 --- a/pkg/terminals/repl/verbs.go +++ b/pkg/terminals/repl/verbs.go @@ -566,7 +566,7 @@ func skipOrProcessRecord( repl.runtimeState.Update(recordAndContext.Record, &recordAndContext.Context) // End-of-stream marker - if recordAndContext.EndOfStream == true { + if recordAndContext.EndOfStream { fmt.Println("End of record stream") repl.readerChannel = nil repl.errorChannel = nil diff --git a/pkg/transformers/aaa_chain_transformer.go b/pkg/transformers/aaa_chain_transformer.go index 6b67b03b7..a4137b763 100644 --- a/pkg/transformers/aaa_chain_transformer.go +++ b/pkg/transformers/aaa_chain_transformer.go @@ -265,7 +265,7 @@ func runSingleTransformerBatch( // the output channel without involving the record-transformer, since // there is no record to be transformed. - if inputRecordAndContext.EndOfStream == true || inputRecordAndContext.Record != nil { + if inputRecordAndContext.EndOfStream || inputRecordAndContext.Record != nil { recordTransformer.Transform( inputRecordAndContext, outputRecordsAndContexts, diff --git a/pkg/transformers/put_or_filter.go b/pkg/transformers/put_or_filter.go index 974903345..648595ce2 100644 --- a/pkg/transformers/put_or_filter.go +++ b/pkg/transformers/put_or_filter.go @@ -542,7 +542,7 @@ func (tr *TransformerPut) Transform( // If there were no input records then we never executed the // begin-blocks. Do so now. - if tr.executedBeginBlocks == false { + if !tr.executedBeginBlocks { err := tr.cstRootNode.ExecuteBeginBlocks(tr.runtimeState) if err != nil { fmt.Fprintln(os.Stderr, err) From 04a9b9decdd9dd0fa9e5243f1159fcc44f03222d Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 27 Oct 2024 11:55:38 -0400 Subject: [PATCH 267/456] Static-check fixes from @lespea #1657, batch 3/n (#1705) * Static-check fixes from @lespea #1657, batch 2/n * Static-check fixes from @lespea #1657, batch 3/n --- pkg/output/file_output_handlers.go | 6 ++---- pkg/stream/stream.go | 7 ++----- pkg/terminals/repl/verbs.go | 11 +++-------- pkg/transformers/seqgen.go | 2 -- 4 files changed, 7 insertions(+), 19 deletions(-) diff --git a/pkg/output/file_output_handlers.go b/pkg/output/file_output_handlers.go index d47617cc0..31f6b89a0 100644 --- a/pkg/output/file_output_handlers.go +++ b/pkg/output/file_output_handlers.go @@ -399,13 +399,11 @@ func (handler *FileOutputHandler) Close() (retval error) { done := false for !done { select { - case _ = <-handler.recordErroredChannel: + case <-handler.recordErroredChannel: done = true retval = errors.New("exiting due to data error") // details already printed - break - case _ = <-handler.recordDoneChannel: + case <-handler.recordDoneChannel: done = true - break } } } diff --git a/pkg/stream/stream.go b/pkg/stream/stream.go index 84f096faf..1aafe95c9 100644 --- a/pkg/stream/stream.go +++ b/pkg/stream/stream.go @@ -95,13 +95,10 @@ func Stream( select { case ierr := <-inputErrorChannel: retval = ierr - break - case _ = <-dataProcessingErrorChannel: + case <-dataProcessingErrorChannel: retval = errors.New("exiting due to data error") // details already printed - break - case _ = <-doneWritingChannel: + case <-doneWritingChannel: done = true - break } } diff --git a/pkg/terminals/repl/verbs.go b/pkg/terminals/repl/verbs.go index 3b901b78b..c2b992746 100644 --- a/pkg/terminals/repl/verbs.go +++ b/pkg/terminals/repl/verbs.go @@ -442,11 +442,8 @@ func handleSkipOrProcessN(repl *Repl, n int64, processingNotSkipping bool) { for i := int64(1); i <= n; i++ { select { case recordsAndContexts = <-repl.readerChannel: - break case err = <-repl.errorChannel: - break - case _ = <-repl.appSignalNotificationChannel: // user typed control-C - break + case <-repl.appSignalNotificationChannel: // user typed control-C } if err != nil { @@ -505,13 +502,11 @@ func handleSkipOrProcessUntil(repl *Repl, dslString string, processingNotSkippin doubleBreak := false select { case recordsAndContexts = <-repl.readerChannel: - break case err = <-repl.errorChannel: - break - case _ = <-repl.appSignalNotificationChannel: // user typed control-C + case <-repl.appSignalNotificationChannel: // user typed control-C doubleBreak = true - break } + if doubleBreak { break } diff --git a/pkg/transformers/seqgen.go b/pkg/transformers/seqgen.go index 560df4ed3..9d77e7173 100644 --- a/pkg/transformers/seqgen.go +++ b/pkg/transformers/seqgen.go @@ -192,9 +192,7 @@ func (tr *TransformerSeqgen) Transform( case b := <-inputDownstreamDoneChannel: outputDownstreamDoneChannel <- b keepGoing = false - break default: - break } if !keepGoing { break From 8c791f5466384692c12fe4dd2e00b304ae8a4b9a Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 27 Oct 2024 12:00:25 -0400 Subject: [PATCH 268/456] Static-check fixes from @lespea #1657, batch 4/n (#1706) * Static-check fixes from @lespea #1657, batch 2/n * Static-check fixes from @lespea #1657, batch 3/n * Static-check fixes from @lespea #1657, batch 4/n --- docs/src/miller-as-library/main3.go | 4 ++-- pkg/bifs/arithmetic.go | 4 ++-- pkg/bifs/regex.go | 6 +++--- pkg/bifs/stats.go | 4 ++-- pkg/input/line_reader.go | 2 +- pkg/input/pseudo_reader_gen.go | 2 +- pkg/input/record_reader_csv.go | 2 +- pkg/input/record_reader_json.go | 2 +- pkg/input/record_reader_pprint.go | 4 ++-- pkg/input/record_reader_xtab.go | 2 +- pkg/mlrval/mlrval_output.go | 2 +- pkg/platform/getargs_windows.go | 2 +- pkg/terminals/repl/session.go | 2 +- pkg/transformers/tee.go | 2 +- 14 files changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/src/miller-as-library/main3.go b/docs/src/miller-as-library/main3.go index 23a400453..617488c33 100644 --- a/docs/src/miller-as-library/main3.go +++ b/docs/src/miller-as-library/main3.go @@ -89,10 +89,10 @@ func convert_csv_to_json(fileNames []string) error { case ierr := <-inputErrorChannel: retval = ierr break - case _ = <-dataProcessingErrorChannel: + case <-dataProcessingErrorChannel: retval = errors.New("exiting due to data error") // details already printed break - case _ = <-doneWritingChannel: + case <-doneWritingChannel: done = true break } diff --git a/pkg/bifs/arithmetic.go b/pkg/bifs/arithmetic.go index 1ce24544a..bffcc5309 100644 --- a/pkg/bifs/arithmetic.go +++ b/pkg/bifs/arithmetic.go @@ -946,7 +946,7 @@ func BIF_minlen_variadic(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval { } // Do the bulk arithmetic on native ints not Mlrvals, to avoid unnecessary allocation. retval := lib.UTF8Strlen(mlrvals[0].OriginalString()) - for i, _ := range mlrvals { + for i := range mlrvals { clen := lib.UTF8Strlen(mlrvals[i].OriginalString()) if clen < retval { retval = clen @@ -1116,7 +1116,7 @@ func BIF_maxlen_variadic(mlrvals []*mlrval.Mlrval) *mlrval.Mlrval { } // Do the bulk arithmetic on native ints not Mlrvals, to avoid unnecessary allocation. retval := lib.UTF8Strlen(mlrvals[0].OriginalString()) - for i, _ := range mlrvals { + for i := range mlrvals { clen := lib.UTF8Strlen(mlrvals[i].OriginalString()) if clen > retval { retval = clen diff --git a/pkg/bifs/regex.go b/pkg/bifs/regex.go index 011c21ac2..2095003a6 100644 --- a/pkg/bifs/regex.go +++ b/pkg/bifs/regex.go @@ -147,7 +147,7 @@ func BIF_strmatchx(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { captures_array := make([]*mlrval.Mlrval, len(captures)) if len(captures) > 0 { - for i, _ := range captures { + for i := range captures { if i == 0 { results.PutReference("full_capture", mlrval.FromString(captures[i])) } else { @@ -156,7 +156,7 @@ func BIF_strmatchx(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { } starts_array := make([]*mlrval.Mlrval, len(starts)) - for i, _ := range starts { + for i := range starts { if i == 0 { results.PutReference("full_start", mlrval.FromInt(int64(starts[i]))) } else { @@ -165,7 +165,7 @@ func BIF_strmatchx(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { } ends_array := make([]*mlrval.Mlrval, len(ends)) - for i, _ := range ends { + for i := range ends { if i == 0 { results.PutReference("full_end", mlrval.FromInt(int64(ends[i]))) } else { diff --git a/pkg/bifs/stats.go b/pkg/bifs/stats.go index d7bd3f106..bc9bc6e43 100644 --- a/pkg/bifs/stats.go +++ b/pkg/bifs/stats.go @@ -636,7 +636,7 @@ func bif_percentiles_impl( outputs := make([]*mlrval.Mlrval, len(ps)) - for i, _ := range ps { + for i := range ps { p, ok := ps[i].GetNumericToFloatValue() if !ok { outputs[i] = type_error_named_argument(funcname, "numeric", "percentile", ps[i]) @@ -655,7 +655,7 @@ func bif_percentiles_impl( return mlrval.FromArray(outputs) } else { m := mlrval.NewMlrmap() - for i, _ := range ps { + for i := range ps { sp := ps[i].String() m.PutCopy(sp, outputs[i]) } diff --git a/pkg/input/line_reader.go b/pkg/input/line_reader.go index 663178c5d..b1f965307 100644 --- a/pkg/input/line_reader.go +++ b/pkg/input/line_reader.go @@ -201,7 +201,7 @@ func channelizedLineReader( // quickly, as it should. if i%recordsPerBatch == 0 { select { - case _ = <-downstreamDoneChannel: + case <-downstreamDoneChannel: done = true break default: diff --git a/pkg/input/pseudo_reader_gen.go b/pkg/input/pseudo_reader_gen.go index fa949e01a..495a6dab7 100644 --- a/pkg/input/pseudo_reader_gen.go +++ b/pkg/input/pseudo_reader_gen.go @@ -96,7 +96,7 @@ func (reader *PseudoReaderGen) process( // avoid goroutine-scheduler thrash. eof := false select { - case _ = <-downstreamDoneChannel: + case <-downstreamDoneChannel: eof = true break default: diff --git a/pkg/input/record_reader_csv.go b/pkg/input/record_reader_csv.go index 20c1fd15d..6ed07250d 100644 --- a/pkg/input/record_reader_csv.go +++ b/pkg/input/record_reader_csv.go @@ -158,7 +158,7 @@ func channelizedCSVRecordScanner( // quickly, as it should. if i%recordsPerBatch == 0 { select { - case _ = <-downstreamDoneChannel: + case <-downstreamDoneChannel: done = true break default: diff --git a/pkg/input/record_reader_json.go b/pkg/input/record_reader_json.go index 096e506fb..52844025e 100644 --- a/pkg/input/record_reader_json.go +++ b/pkg/input/record_reader_json.go @@ -99,7 +99,7 @@ func (reader *RecordReaderJSON) processHandle( i++ if i%recordsPerBatch == 0 { select { - case _ = <-downstreamDoneChannel: + case <-downstreamDoneChannel: eof = true break default: diff --git a/pkg/input/record_reader_pprint.go b/pkg/input/record_reader_pprint.go index b83710f7b..aad87769c 100644 --- a/pkg/input/record_reader_pprint.go +++ b/pkg/input/record_reader_pprint.go @@ -227,7 +227,7 @@ func getRecordBatchExplicitPprintHeader( continue } fields := make([]string, npad-2) - for i, _ := range paddedFields { + for i := range paddedFields { if i == 0 || i == npad-1 { continue } @@ -361,7 +361,7 @@ func getRecordBatchImplicitPprintHeader( paddedFields := reader.fieldSplitter.Split(line) npad := len(paddedFields) fields := make([]string, npad-2) - for i, _ := range paddedFields { + for i := range paddedFields { if i == 0 || i == npad-1 { continue } diff --git a/pkg/input/record_reader_xtab.go b/pkg/input/record_reader_xtab.go index b108f771d..5d1530007 100644 --- a/pkg/input/record_reader_xtab.go +++ b/pkg/input/record_reader_xtab.go @@ -202,7 +202,7 @@ func channelizedStanzaScanner( // quickly, as it should. if numStanzasSeen%recordsPerBatch == 0 { select { - case _ = <-downstreamDoneChannel: + case <-downstreamDoneChannel: done = true break default: diff --git a/pkg/mlrval/mlrval_output.go b/pkg/mlrval/mlrval_output.go index a72760998..b6fd84e0f 100644 --- a/pkg/mlrval/mlrval_output.go +++ b/pkg/mlrval/mlrval_output.go @@ -120,7 +120,7 @@ func (mv *Mlrval) StringifyValuesRecursively() { switch mv.mvtype { case MT_ARRAY: - for i, _ := range mv.intf.([]*Mlrval) { + for i := range mv.intf.([]*Mlrval) { mv.intf.([]*Mlrval)[i].StringifyValuesRecursively() } diff --git a/pkg/platform/getargs_windows.go b/pkg/platform/getargs_windows.go index 4349e4346..7a2f1af3d 100644 --- a/pkg/platform/getargs_windows.go +++ b/pkg/platform/getargs_windows.go @@ -79,7 +79,7 @@ func GetArgs() []string { //printArgs(retargs, "NEW") globbed := make([]string, 0) - for i, _ := range retargs { + for i := range retargs { // Expand things like *.csv matches, err := filepath.Glob(retargs[i]) if matches != nil && err == nil { diff --git a/pkg/terminals/repl/session.go b/pkg/terminals/repl/session.go index 27bd3896a..2f25beb31 100644 --- a/pkg/terminals/repl/session.go +++ b/pkg/terminals/repl/session.go @@ -176,7 +176,7 @@ func (repl *Repl) handleSession(istream *os.File) error { doneDraining := false for { select { - case _ = <-repl.appSignalNotificationChannel: + case <-repl.appSignalNotificationChannel: line = "" // Ignore any partially-entered line -- a ^C should do that default: doneDraining = true diff --git a/pkg/transformers/tee.go b/pkg/transformers/tee.go index bff0065ea..e5f5413ca 100644 --- a/pkg/transformers/tee.go +++ b/pkg/transformers/tee.go @@ -183,7 +183,7 @@ func (tr *TransformerTee) Transform( // But 'mlr cut -f foo then tee bar.txt then head -n 10' -- one does expect // bar.txt to have all the output from cut. select { - case _ = <-inputDownstreamDoneChannel: + case <-inputDownstreamDoneChannel: // Do not write this to the coutputDownstreamDoneChannel, as other transformers do break default: From 02bd5344b9002a83979edf2183dcbf659d0f79f4 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 27 Oct 2024 12:05:48 -0400 Subject: [PATCH 269/456] Static-check fixes from @lespea #1657, batch 5/n (#1707) * Static-check fixes from @lespea #1657, batch 2/n * Static-check fixes from @lespea #1657, batch 3/n * Static-check fixes from @lespea #1657, batch 4/n * Static-check fixes from @lespea #1657, batch 5/n --- pkg/dsl/cst/udf.go | 3 +++ pkg/input/pseudo_reader_gen.go | 1 - pkg/lib/halfpipe.go | 6 +++++ pkg/mlrval/mlrval_get_test.go | 44 +++++++++++++++++----------------- pkg/mlrval/mlrval_json.go | 5 ++-- 5 files changed, 34 insertions(+), 25 deletions(-) diff --git a/pkg/dsl/cst/udf.go b/pkg/dsl/cst/udf.go index e1544d7b2..f3550669e 100644 --- a/pkg/dsl/cst/udf.go +++ b/pkg/dsl/cst/udf.go @@ -495,6 +495,9 @@ func (root *RootNode) BuildUDF( "function return value", returnValueTypeName, ) + if err != nil { + return nil, err + } lib.InternalCodingErrorIf(parameterListASTNode.Type != dsl.NodeTypeParameterList) lib.InternalCodingErrorIf(parameterListASTNode.Children == nil) diff --git a/pkg/input/pseudo_reader_gen.go b/pkg/input/pseudo_reader_gen.go index 495a6dab7..e847b59ab 100644 --- a/pkg/input/pseudo_reader_gen.go +++ b/pkg/input/pseudo_reader_gen.go @@ -113,7 +113,6 @@ func (reader *PseudoReaderGen) process( if recordsAndContexts.Len() > 0 { readerChannel <- recordsAndContexts - recordsAndContexts = list.New() } } diff --git a/pkg/lib/halfpipe.go b/pkg/lib/halfpipe.go index 040896a00..3b926ef3f 100644 --- a/pkg/lib/halfpipe.go +++ b/pkg/lib/halfpipe.go @@ -21,6 +21,9 @@ import ( func OpenOutboundHalfPipe(commandString string) (*os.File, error) { readPipe, writePipe, err := os.Pipe() + if err != nil { + return nil, err + } var procAttr os.ProcAttr procAttr.Files = []*os.File{ @@ -56,6 +59,9 @@ func OpenOutboundHalfPipe(commandString string) (*os.File, error) { func OpenInboundHalfPipe(commandString string) (*os.File, error) { readPipe, writePipe, err := os.Pipe() + if err != nil { + return nil, err + } var procAttr os.ProcAttr procAttr.Files = []*os.File{ diff --git a/pkg/mlrval/mlrval_get_test.go b/pkg/mlrval/mlrval_get_test.go index 9107fbdc9..90abc8598 100644 --- a/pkg/mlrval/mlrval_get_test.go +++ b/pkg/mlrval/mlrval_get_test.go @@ -12,23 +12,23 @@ import ( func TestGetString(t *testing.T) { mv := FromInferredType("234") - stringval, ok := mv.GetStringValue() + _, ok := mv.GetStringValue() assert.False(t, ok) mv = FromDeferredType("234") - stringval, ok = mv.GetStringValue() + _, ok = mv.GetStringValue() assert.False(t, ok) mv = FromInferredType("234.5") - stringval, ok = mv.GetStringValue() + _, ok = mv.GetStringValue() assert.False(t, ok) mv = FromDeferredType("234.5") - stringval, ok = mv.GetStringValue() + _, ok = mv.GetStringValue() assert.False(t, ok) mv = FromInferredType("abc") - stringval, ok = mv.GetStringValue() + stringval, ok := mv.GetStringValue() assert.Equal(t, "abc", stringval) assert.True(t, ok) @@ -60,33 +60,33 @@ func TestGetIntValue(t *testing.T) { assert.True(t, ok) mv = FromInferredType("123.4") - intval, ok = mv.GetIntValue() + _, ok = mv.GetIntValue() assert.False(t, ok) mv = FromDeferredType("123.4") - intval, ok = mv.GetIntValue() + _, ok = mv.GetIntValue() assert.False(t, ok) mv = FromInferredType("abc") - intval, ok = mv.GetIntValue() + _, ok = mv.GetIntValue() assert.False(t, ok) mv = FromDeferredType("abc") - intval, ok = mv.GetIntValue() + _, ok = mv.GetIntValue() assert.False(t, ok) } func TestGetFloatValue(t *testing.T) { mv := FromInferredType("234") - floatval, ok := mv.GetFloatValue() + _, ok := mv.GetFloatValue() assert.False(t, ok) mv = FromDeferredType("234") - floatval, ok = mv.GetFloatValue() + _, ok = mv.GetFloatValue() assert.False(t, ok) mv = FromInferredType("234.5") - floatval, ok = mv.GetFloatValue() + floatval, ok := mv.GetFloatValue() assert.Equal(t, 234.5, floatval) assert.True(t, ok) @@ -96,11 +96,11 @@ func TestGetFloatValue(t *testing.T) { assert.True(t, ok) mv = FromInferredType("abc") - floatval, ok = mv.GetFloatValue() + _, ok = mv.GetFloatValue() assert.False(t, ok) mv = FromDeferredType("abc") - floatval, ok = mv.GetFloatValue() + _, ok = mv.GetFloatValue() assert.False(t, ok) } @@ -126,38 +126,38 @@ func TestGetNumericToFloatValue(t *testing.T) { assert.True(t, ok) mv = FromInferredType("abc") - floatval, ok = mv.GetNumericToFloatValue() + _, ok = mv.GetNumericToFloatValue() assert.False(t, ok) mv = FromDeferredType("abc") - floatval, ok = mv.GetNumericToFloatValue() + _, ok = mv.GetNumericToFloatValue() assert.False(t, ok) } func TestGetBoolValue(t *testing.T) { mv := FromInferredType("234") - boolval, ok := mv.GetBoolValue() + _, ok := mv.GetBoolValue() assert.False(t, ok) mv = FromDeferredType("234") - boolval, ok = mv.GetBoolValue() + _, ok = mv.GetBoolValue() assert.False(t, ok) mv = FromInferredType("abc") - boolval, ok = mv.GetBoolValue() + _, ok = mv.GetBoolValue() assert.False(t, ok) mv = FromDeferredType("abc") - boolval, ok = mv.GetBoolValue() + _, ok = mv.GetBoolValue() assert.False(t, ok) mv = FromInferredType("true") - boolval, ok = mv.GetBoolValue() + boolval, ok := mv.GetBoolValue() assert.True(t, boolval) assert.True(t, ok) mv = FromDeferredType("false") - boolval, ok = mv.GetBoolValue() + _, ok = mv.GetBoolValue() assert.False(t, ok, "from-data-file \"false\" should infer to string") } diff --git a/pkg/mlrval/mlrval_json.go b/pkg/mlrval/mlrval_json.go index a5dc213a7..d1fb880ed 100644 --- a/pkg/mlrval/mlrval_json.go +++ b/pkg/mlrval/mlrval_json.go @@ -188,7 +188,9 @@ func MlrvalDecodeFromJSON(decoder *json.Decoder) ( ) } - mv := FromPending() + // Will be assigned as an array or a map + var mv *Mlrval + if isArray { mv = FromEmptyArray() @@ -203,7 +205,6 @@ func MlrvalDecodeFromJSON(decoder *json.Decoder) ( } mv.ArrayAppend(element) } - } else { mv = FromEmptyMap() From b4ff26a7d0826da90ee34fb8036cc727eaa85d16 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 27 Oct 2024 12:06:17 -0400 Subject: [PATCH 270/456] Static-check fixes from @lespea #1657, batch 6/n (#1708) * Static-check fixes from @lespea #1657, batch 2/n * Static-check fixes from @lespea #1657, batch 3/n * Static-check fixes from @lespea #1657, batch 4/n * Static-check fixes from @lespea #1657, batch 5/n * Static-check fixes from @lespea #1657, batch 6/n --- pkg/climain/mlrcli_parse.go | 2 +- pkg/dsl/ast_print.go | 2 +- pkg/dsl/cst/root.go | 2 +- pkg/dsl/cst/validate.go | 2 +- pkg/lib/regex.go | 6 +++--- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/climain/mlrcli_parse.go b/pkg/climain/mlrcli_parse.go index f582cd51c..939f10e36 100644 --- a/pkg/climain/mlrcli_parse.go +++ b/pkg/climain/mlrcli_parse.go @@ -394,7 +394,7 @@ func parseCommandLinePassTwo( options.FileNames = nil } - if options.DoInPlace && (options.FileNames == nil || len(options.FileNames) == 0) { + if options.DoInPlace && len(options.FileNames) == 0 { fmt.Fprintf(os.Stderr, "%s: -I option (in-place operation) requires input files.\n", "mlr") os.Exit(1) } diff --git a/pkg/dsl/ast_print.go b/pkg/dsl/ast_print.go index 4210e5b17..c735ef84c 100644 --- a/pkg/dsl/ast_print.go +++ b/pkg/dsl/ast_print.go @@ -157,7 +157,7 @@ func (node *ASTNode) printParexOneLineAux() { // IsLeaf determines if an AST node is a leaf node. func (node *ASTNode) IsLeaf() bool { - return node.Children == nil || len(node.Children) == 0 + return len(node.Children) == 0 } // ChildrenAreAllLeaves determines if an AST node's children are all leaf nodes. diff --git a/pkg/dsl/cst/root.go b/pkg/dsl/cst/root.go index a5b2f4848..a1c5b0b99 100644 --- a/pkg/dsl/cst/root.go +++ b/pkg/dsl/cst/root.go @@ -251,7 +251,7 @@ func (root *RootNode) regexProtectPrePass(ast *dsl.AST) { func (root *RootNode) regexProtectPrePassAux(astNode *dsl.ASTNode) { - if astNode.Children == nil || len(astNode.Children) == 0 { + if len(astNode.Children) == 0 { return } diff --git a/pkg/dsl/cst/validate.go b/pkg/dsl/cst/validate.go index 39db06fdc..8783c6003 100644 --- a/pkg/dsl/cst/validate.go +++ b/pkg/dsl/cst/validate.go @@ -29,7 +29,7 @@ func ValidateAST( // They can do mlr put '': there are simply zero statements. // But filter '' is an error. - if ast.RootNode.Children == nil || len(ast.RootNode.Children) == 0 { + if len(ast.RootNode.Children) == 0 { if dslInstanceType == DSLInstanceTypeFilter { return fmt.Errorf("mlr: filter statement must not be empty") } diff --git a/pkg/lib/regex.go b/pkg/lib/regex.go index d8d551167..b810a4b01 100644 --- a/pkg/lib/regex.go +++ b/pkg/lib/regex.go @@ -203,7 +203,7 @@ func regexCompiledSubOrGsub( breakOnFirst bool, ) string { matrix := regex.FindAllStringSubmatchIndex(input, -1) - if matrix == nil || len(matrix) == 0 { + if len(matrix) == 0 { return input } @@ -321,7 +321,7 @@ func RegexCompiledMatchWithMapResults( ends := make([]int, 0, 10) matrix := regex.FindAllStringSubmatchIndex(input, -1) - if matrix == nil || len(matrix) == 0 { + if len(matrix) == 0 { return false, captures, starts, ends } @@ -407,7 +407,7 @@ func RegexCompiledMatchWithCaptures( regex *regexp.Regexp, ) (bool, []string) { matrix := regex.FindAllStringSubmatchIndex(input, -1) - if matrix == nil || len(matrix) == 0 { + if len(matrix) == 0 { // Set all captures to "" return false, make([]string, 10) } From 41649bf4f9d7a9b45604a940941fb9df413d1b2b Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 27 Oct 2024 12:11:28 -0400 Subject: [PATCH 271/456] Static-check fixes from @lespea #1657, batch 7/n (#1709) --- cmd/experiments/colors/main.go | 15 +++++++++------ pkg/bifs/base.go | 2 ++ pkg/dsl/cst/dump.go | 2 +- pkg/dsl/cst/statements.go | 1 - pkg/entrypoint/entrypoint.go | 4 +--- pkg/input/record_reader_json.go | 4 +--- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cmd/experiments/colors/main.go b/cmd/experiments/colors/main.go index 3539d3c1e..2e41124d9 100644 --- a/cmd/experiments/colors/main.go +++ b/cmd/experiments/colors/main.go @@ -6,12 +6,15 @@ import ( "github.com/johnkerl/miller/v6/pkg/colorizer" ) -const boldString = "\u001b[1m" -const underlineString = "\u001b[4m" -const reversedString = "\u001b[7m" -const redString = "\u001b[1;31m" -const blueString = "\u001b[1;34m" -const defaultString = "\u001b[0m" +const ( + boldString = "\u001b[1m" + reversedString = "\u001b[7m" + redString = "\u001b[1;31m" + blueString = "\u001b[1;34m" + defaultString = "\u001b[0m" + + // underlineString = "\u001b[4m" +) func main() { fmt.Printf("Hello, world!\n") diff --git a/pkg/bifs/base.go b/pkg/bifs/base.go index 0f299048f..c259cc7fe 100644 --- a/pkg/bifs/base.go +++ b/pkg/bifs/base.go @@ -104,6 +104,8 @@ func _zero1(input1 *mlrval.Mlrval) *mlrval.Mlrval { } // Return one (unary) +// +//lint:ignore U1000 util function might be used later func __one1(input1 *mlrval.Mlrval) *mlrval.Mlrval { return mlrval.FromInt(1) } diff --git a/pkg/dsl/cst/dump.go b/pkg/dsl/cst/dump.go index c5e0d975e..1114043ff 100644 --- a/pkg/dsl/cst/dump.go +++ b/pkg/dsl/cst/dump.go @@ -199,7 +199,7 @@ func (node *DumpStatementNode) dumpToStderr( outputString string, state *runtime.State, ) error { - fmt.Fprintf(os.Stderr, outputString) + fmt.Fprint(os.Stderr, outputString) return nil } diff --git a/pkg/dsl/cst/statements.go b/pkg/dsl/cst/statements.go index eec090606..8e8edba72 100644 --- a/pkg/dsl/cst/statements.go +++ b/pkg/dsl/cst/statements.go @@ -104,7 +104,6 @@ func (root *RootNode) BuildStatementNode( default: return nil, fmt.Errorf("at CST BuildStatementNode: unhandled AST node %s", string(astNode.Type)) - break } return statement, nil } diff --git a/pkg/entrypoint/entrypoint.go b/pkg/entrypoint/entrypoint.go index 7426c726d..c8293c041 100644 --- a/pkg/entrypoint/entrypoint.go +++ b/pkg/entrypoint/entrypoint.go @@ -104,9 +104,7 @@ func processInPlace( // Save off the file names from the command line. fileNames := make([]string, len(originalOptions.FileNames)) - for i, fileName := range originalOptions.FileNames { - fileNames[i] = fileName - } + copy(fileNames, originalOptions.FileNames) for _, fileName := range fileNames { diff --git a/pkg/input/record_reader_json.go b/pkg/input/record_reader_json.go index 52844025e..63d9f7368 100644 --- a/pkg/input/record_reader_json.go +++ b/pkg/input/record_reader_json.go @@ -281,9 +281,7 @@ func (bsr *JSONCommentEnabledReader) Read(p []byte) (n int, err error) { func (bsr *JSONCommentEnabledReader) populateFromLine(p []byte) int { numBytesWritten := 0 if len(bsr.lineBytes) < len(p) { - for i := 0; i < len(bsr.lineBytes); i++ { - p[i] = bsr.lineBytes[i] - } + copy(p, bsr.lineBytes) numBytesWritten = len(bsr.lineBytes) bsr.lineBytes = nil } else { From 5424e753a4ad630233cbcb27b854dbacb4940fb5 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 27 Oct 2024 12:16:49 -0400 Subject: [PATCH 272/456] Static-check fixes from @lespea #1657, batch 8/n (#1710) --- pkg/output/record_writer_csv.go | 1 - pkg/output/record_writer_markdown.go | 1 - pkg/transformers/fill_down.go | 1 - pkg/transformers/step.go | 1 - 4 files changed, 4 deletions(-) diff --git a/pkg/output/record_writer_csv.go b/pkg/output/record_writer_csv.go index ffd76f2ec..ca51cd325 100644 --- a/pkg/output/record_writer_csv.go +++ b/pkg/output/record_writer_csv.go @@ -14,7 +14,6 @@ import ( type RecordWriterCSV struct { writerOptions *cli.TWriterOptions - ofs0 byte // Go's CSV library only lets its 'Comma' be a single character csvWriter *csv.Writer needToPrintHeader bool firstRecordKeys []string diff --git a/pkg/output/record_writer_markdown.go b/pkg/output/record_writer_markdown.go index 64bc8bb97..b3b96089a 100644 --- a/pkg/output/record_writer_markdown.go +++ b/pkg/output/record_writer_markdown.go @@ -12,7 +12,6 @@ import ( type RecordWriterMarkdown struct { writerOptions *cli.TWriterOptions - ors string numHeaderLinesOutput int lastJoinedHeader string diff --git a/pkg/transformers/fill_down.go b/pkg/transformers/fill_down.go index 55acbb3c3..cf779e336 100644 --- a/pkg/transformers/fill_down.go +++ b/pkg/transformers/fill_down.go @@ -116,7 +116,6 @@ func transformerFillDownParseCLI( type TransformerFillDown struct { // input fillDownFieldNames []string - doAll bool onlyIfAbsent bool // state diff --git a/pkg/transformers/step.go b/pkg/transformers/step.go index fcf5d0eee..e003aaf3f 100644 --- a/pkg/transformers/step.go +++ b/pkg/transformers/step.go @@ -260,7 +260,6 @@ type TransformerStep struct { // STATE // Scratch space used per-record - valueFieldValues []mlrval.Mlrval // Map from group-by field names to value-field names to stepper name to stepper object. See // the Transform method below for more details. groups map[string]map[string]map[string]tStepper From 296430fe41682c0a569fe92db5bd1556cd4a1fd5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 8 Nov 2024 09:00:52 -0500 Subject: [PATCH 273/456] Bump golang.org/x/term from 0.25.0 to 0.26.0 (#1712) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.25.0 to 0.26.0. - [Commits](https://github.com/golang/term/compare/v0.25.0...v0.26.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 1de88737e..a1f304124 100644 --- a/go.mod +++ b/go.mod @@ -26,8 +26,8 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.9.0 - golang.org/x/sys v0.26.0 - golang.org/x/term v0.25.0 + golang.org/x/sys v0.27.0 + golang.org/x/term v0.26.0 golang.org/x/text v0.19.0 ) diff --git a/go.sum b/go.sum index 9167f5c7e..44b7e60c1 100644 --- a/go.sum +++ b/go.sum @@ -37,10 +37,10 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= -golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= +golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= +golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= +golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From 193a2ee37b807d49960c1262052fd7e87a5065ff Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 8 Nov 2024 09:01:14 -0500 Subject: [PATCH 274/456] Bump goreleaser/goreleaser-action from 6.0.0 to 6.1.0 (#1711) Bumps [goreleaser/goreleaser-action](https://github.com/goreleaser/goreleaser-action) from 6.0.0 to 6.1.0. - [Release notes](https://github.com/goreleaser/goreleaser-action/releases) - [Commits](https://github.com/goreleaser/goreleaser-action/compare/286f3b13b1b49da4ac219696163fb8c1c93e1200...9ed2f89a662bf1735a48bc8557fd212fa902bebf) --- updated-dependencies: - dependency-name: goreleaser/goreleaser-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e3a706295..515564be0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -40,7 +40,7 @@ jobs: # https://goreleaser.com/ci/actions/ - name: Run GoReleaser - uses: goreleaser/goreleaser-action@286f3b13b1b49da4ac219696163fb8c1c93e1200 + uses: goreleaser/goreleaser-action@9ed2f89a662bf1735a48bc8557fd212fa902bebf #if: startsWith(github.ref, 'refs/tags/v') with: version: latest From 214129a95ecdedea52216cc007938a7c1ab3a3ee Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 8 Nov 2024 20:39:42 -0700 Subject: [PATCH 275/456] Bump golang.org/x/text from 0.19.0 to 0.20.0 (#1714) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.19.0 to 0.20.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.19.0...v0.20.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index a1f304124..292414dc6 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/stretchr/testify v1.9.0 golang.org/x/sys v0.27.0 golang.org/x/term v0.26.0 - golang.org/x/text v0.19.0 + golang.org/x/text v0.20.0 ) require ( diff --git a/go.sum b/go.sum index 44b7e60c1..bf3b2cb5c 100644 --- a/go.sum +++ b/go.sum @@ -41,8 +41,8 @@ golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= +golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From cd3b0a62abab3f1f67bee2fbe54bfe372f7d79d0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Nov 2024 08:35:09 -0700 Subject: [PATCH 276/456] Bump github/codeql-action from 3.27.0 to 3.27.1 (#1715) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.27.0 to 3.27.1. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/662472033e021d55d94146f66f6058822b0b39fd...4f3212b61783c3c68e8309a0f18a699764811cda) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 2da2b5891..d174d278c 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@662472033e021d55d94146f66f6058822b0b39fd + uses: github/codeql-action/init@4f3212b61783c3c68e8309a0f18a699764811cda with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@662472033e021d55d94146f66f6058822b0b39fd + uses: github/codeql-action/autobuild@4f3212b61783c3c68e8309a0f18a699764811cda # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@662472033e021d55d94146f66f6058822b0b39fd + uses: github/codeql-action/analyze@4f3212b61783c3c68e8309a0f18a699764811cda From 3d17ca117c55fd6387d8a8627a627902b8c6dbf8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 12 Nov 2024 09:05:02 -0500 Subject: [PATCH 277/456] Bump github/codeql-action from 3.27.1 to 3.27.2 (#1716) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.27.1 to 3.27.2. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/4f3212b61783c3c68e8309a0f18a699764811cda...9278e421667d5d90a2839487a482448c4ec7df4d) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index d174d278c..56cb781b5 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@4f3212b61783c3c68e8309a0f18a699764811cda + uses: github/codeql-action/init@9278e421667d5d90a2839487a482448c4ec7df4d with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@4f3212b61783c3c68e8309a0f18a699764811cda + uses: github/codeql-action/autobuild@9278e421667d5d90a2839487a482448c4ec7df4d # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@4f3212b61783c3c68e8309a0f18a699764811cda + uses: github/codeql-action/analyze@9278e421667d5d90a2839487a482448c4ec7df4d From b0f9e03609f51b23f4bd9048ebf9d26e7276bc59 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Nov 2024 08:45:13 -0500 Subject: [PATCH 278/456] Bump github/codeql-action from 3.27.2 to 3.27.3 (#1717) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.27.2 to 3.27.3. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/9278e421667d5d90a2839487a482448c4ec7df4d...396bb3e45325a47dd9ef434068033c6d5bb0d11a) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 56cb781b5..18a0235ca 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@9278e421667d5d90a2839487a482448c4ec7df4d + uses: github/codeql-action/init@396bb3e45325a47dd9ef434068033c6d5bb0d11a with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@9278e421667d5d90a2839487a482448c4ec7df4d + uses: github/codeql-action/autobuild@396bb3e45325a47dd9ef434068033c6d5bb0d11a # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@9278e421667d5d90a2839487a482448c4ec7df4d + uses: github/codeql-action/analyze@396bb3e45325a47dd9ef434068033c6d5bb0d11a From c189b6a2d81f5621cdf822332b3b8af0ca9754b4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 15 Nov 2024 07:59:04 -0500 Subject: [PATCH 279/456] Bump github/codeql-action from 3.27.3 to 3.27.4 (#1718) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.27.3 to 3.27.4. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/396bb3e45325a47dd9ef434068033c6d5bb0d11a...ea9e4e37992a54ee68a9622e985e60c8e8f12d9f) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 18a0235ca..d4f56775f 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@396bb3e45325a47dd9ef434068033c6d5bb0d11a + uses: github/codeql-action/init@ea9e4e37992a54ee68a9622e985e60c8e8f12d9f with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@396bb3e45325a47dd9ef434068033c6d5bb0d11a + uses: github/codeql-action/autobuild@ea9e4e37992a54ee68a9622e985e60c8e8f12d9f # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@396bb3e45325a47dd9ef434068033c6d5bb0d11a + uses: github/codeql-action/analyze@ea9e4e37992a54ee68a9622e985e60c8e8f12d9f From 2868fb6e7e0deb259e2d589aa1c8f0b3e5ddb48f Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 19 Nov 2024 18:54:48 -0500 Subject: [PATCH 280/456] rm xtodo.txt --- xtodo.txt | 70 ------------------------------------------------------- 1 file changed, 70 deletions(-) delete mode 100644 xtodo.txt diff --git a/xtodo.txt b/xtodo.txt deleted file mode 100644 index e3dab2ea5..000000000 --- a/xtodo.txt +++ /dev/null @@ -1,70 +0,0 @@ ----------------------------------------------------------------- -* look at: mr -vvv test/cases/io-spec-tsv/0004/cmd - ----------------------------------------------------------------- - -func (keeper *PercentileKeeper) EmitNamed(name string) *mlrval.Mlrval { - if name == "min" { - return keeper.EmitNonInterpolated(0.0) - } else if name == "p25" { - return keeper.EmitNonInterpolated(25.0) - } else if name == "median" { - return keeper.EmitNonInterpolated(50.0) - } else if name == "p75" { - return keeper.EmitNonInterpolated(75.0) - } else if name == "max" { - return keeper.EmitNonInterpolated(100.0) - - } else if name == "iqr" { - p25 := keeper.EmitNonInterpolated(25.0) - p75 := keeper.EmitNonInterpolated(75.0) - if p25.IsNumeric() && p75.IsNumeric() { - return bifs.BIF_minus_binary(p75, p25) - } else { - return mlrval.VOID - } - - } else if name == "lof" { - p25 := keeper.EmitNonInterpolated(25.0) - iqr := keeper.EmitNamed("iqr") - if p25.IsNumeric() && iqr.IsNumeric() { - return bifs.BIF_minus_binary(p25, bifs.BIF_times(fenceOuterK, iqr)) - } else { - return mlrval.VOID - } - - } else if name == "lif" { - p25 := keeper.EmitNonInterpolated(25.0) - iqr := keeper.EmitNamed("iqr") - if p25.IsNumeric() && iqr.IsNumeric() { - return bifs.BIF_minus_binary(p25, bifs.BIF_times(fenceInnerK, iqr)) - } else { - return mlrval.VOID - } - - } else if name == "uif" { - p75 := keeper.EmitNonInterpolated(75.0) - iqr := keeper.EmitNamed("iqr") - if p75.IsNumeric() && iqr.IsNumeric() { - return bifs.BIF_plus_binary(p75, bifs.BIF_times(fenceInnerK, iqr)) - } else { - return mlrval.VOID - } - - } else if name == "uof" { - p75 := keeper.EmitNonInterpolated(75.0) - iqr := keeper.EmitNamed("iqr") - if p75.IsNumeric() && iqr.IsNumeric() { - return bifs.BIF_plus_binary(p75, bifs.BIF_times(fenceOuterK, iqr)) - } else { - return mlrval.VOID - } - - } else { - return mlrval.FromError( - errors.New( - "stats1: unrecognized - ), - ) - } -} From 3050e0aeea553e4d2d287791ffdcbe1af0335d9e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 21 Nov 2024 08:00:11 -0500 Subject: [PATCH 281/456] Bump github/codeql-action from 3.27.4 to 3.27.5 (#1719) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.27.4 to 3.27.5. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/ea9e4e37992a54ee68a9622e985e60c8e8f12d9f...f09c1c0a94de965c15400f5634aa42fac8fb8f88) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index d4f56775f..016e37154 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@ea9e4e37992a54ee68a9622e985e60c8e8f12d9f + uses: github/codeql-action/init@f09c1c0a94de965c15400f5634aa42fac8fb8f88 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@ea9e4e37992a54ee68a9622e985e60c8e8f12d9f + uses: github/codeql-action/autobuild@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@ea9e4e37992a54ee68a9622e985e60c8e8f12d9f + uses: github/codeql-action/analyze@f09c1c0a94de965c15400f5634aa42fac8fb8f88 From 019b15a310ee8ab61ab9edc72795de9e347d3f50 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 23 Nov 2024 10:12:13 -0500 Subject: [PATCH 282/456] delve.txt --- delve.txt | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 delve.txt diff --git a/delve.txt b/delve.txt new file mode 100644 index 000000000..a34052ee1 --- /dev/null +++ b/delve.txt @@ -0,0 +1,5 @@ +dlv exec ./mlr -- --csv --from x.csv sub -a def ghi +break main.main + # or wherever +restart +continue From 9f77bbe09625d59a9a5d7c025722d8cc8e802f15 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 23 Nov 2024 10:13:36 -0500 Subject: [PATCH 283/456] Add help strings for `-a`/`-r` in `sub`/`gsub`/`ssub` (#1721) * Help strings for `-a`/`-r` in `sub`/`gsub`/`ssub` * `mlr regtest -p test/cases/cli-help` to update expected outputs * artifacts from `make dev` --- docs/src/manpage.md | 8 +++++++- docs/src/manpage.txt | 8 +++++++- docs/src/record-heterogeneity.md | 1 - docs/src/reference-verbs.md | 10 ++++++++-- man/manpage.txt | 8 +++++++- man/mlr.1 | 14 ++++++++++---- pkg/transformers/subs.go | 6 ++++++ test/cases/cli-help/0001/expout | 6 ++++++ 8 files changed, 51 insertions(+), 10 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 5185360d3..2092d8abf 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -1275,6 +1275,8 @@ This is simply a copy of what you should see on running `man mlr` at a command p See also the `sub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. + -r {regex} Regular expression for field names to convert. + -a Convert all field names. -h|--help Show this message. 1mhaving-fields0m @@ -1910,6 +1912,8 @@ This is simply a copy of what you should see on running `man mlr` at a command p the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. Options: -f {a,b,c} Field names to convert. + -r {regex} Regular expression for field names to convert. + -a Convert all field names. -h|--help Show this message. 1mstats10m @@ -2057,6 +2061,8 @@ This is simply a copy of what you should see on running `man mlr` at a command p See also the `gsub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. + -r {regex} Regular expression for field names to convert. + -a Convert all field names. -h|--help Show this message. 1msummary0m @@ -3731,5 +3737,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-10-17 4mMILLER24m(1) + 2024-11-23 4mMILLER24m(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index a341f6c94..35680317d 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1254,6 +1254,8 @@ See also the `sub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. + -r {regex} Regular expression for field names to convert. + -a Convert all field names. -h|--help Show this message. 1mhaving-fields0m @@ -1889,6 +1891,8 @@ the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. Options: -f {a,b,c} Field names to convert. + -r {regex} Regular expression for field names to convert. + -a Convert all field names. -h|--help Show this message. 1mstats10m @@ -2036,6 +2040,8 @@ See also the `gsub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. + -r {regex} Regular expression for field names to convert. + -a Convert all field names. -h|--help Show this message. 1msummary0m @@ -3710,4 +3716,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-10-17 4mMILLER24m(1) + 2024-11-23 4mMILLER24m(1) diff --git a/docs/src/record-heterogeneity.md b/docs/src/record-heterogeneity.md index de96ae69c..57e929826 100644 --- a/docs/src/record-heterogeneity.md +++ b/docs/src/record-heterogeneity.md @@ -130,7 +130,6 @@ If you `mlr --csv cat` this, you'll get an error message: a,b,c 1,2,3 mlr: mlr: CSV header/data length mismatch 3 != 2 at filename data/het/ragged.csv row 3. -. There are two kinds of raggedness here. Since CSVs form records by zipping the diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 092f53969..3eb176dde 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -986,7 +986,7 @@ Options: Since the expression pieces are simply concatenated, please be sure to use intervening semicolons to separate expressions.) --s name=value: Predefines out-of-stream variable @name to have +-s name=value: Predefines out-of-stream variable @name to have Thus mlr put -s foo=97 '$column += @foo' is like mlr put 'begin {@foo = 97} $column += @foo'. The value part is subject to type-inferencing. @@ -1464,6 +1464,8 @@ for the old string and handling multiple matches, like the `gsub` DSL function. See also the `sub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. +-r {regex} Regular expression for field names to convert. +-a Convert all field names. -h|--help Show this message. @@ -2321,7 +2323,7 @@ Options: Since the expression pieces are simply concatenated, please be sure to use intervening semicolons to separate expressions.) --s name=value: Predefines out-of-stream variable @name to have +-s name=value: Predefines out-of-stream variable @name to have Thus mlr put -s foo=97 '$column += @foo' is like mlr put 'begin {@foo = 97} $column += @foo'. The value part is subject to type-inferencing. @@ -3210,6 +3212,8 @@ Replaces old string with new string in specified field(s), without regex support the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. Options: -f {a,b,c} Field names to convert. +-r {regex} Regular expression for field names to convert. +-a Convert all field names. -h|--help Show this message. @@ -3714,6 +3718,8 @@ for the old string and not handling multiple matches, like the `sub` DSL functio See also the `gsub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. +-r {regex} Regular expression for field names to convert. +-a Convert all field names. -h|--help Show this message. diff --git a/man/manpage.txt b/man/manpage.txt index a341f6c94..35680317d 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -1254,6 +1254,8 @@ See also the `sub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. + -r {regex} Regular expression for field names to convert. + -a Convert all field names. -h|--help Show this message. 1mhaving-fields0m @@ -1889,6 +1891,8 @@ the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. Options: -f {a,b,c} Field names to convert. + -r {regex} Regular expression for field names to convert. + -a Convert all field names. -h|--help Show this message. 1mstats10m @@ -2036,6 +2040,8 @@ See also the `gsub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. + -r {regex} Regular expression for field names to convert. + -a Convert all field names. -h|--help Show this message. 1msummary0m @@ -3710,4 +3716,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-10-17 4mMILLER24m(1) + 2024-11-23 4mMILLER24m(1) diff --git a/man/mlr.1 b/man/mlr.1 index 2e30cb59d..67cc66732 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2024-10-17 +.\" Date: 2024-11-23 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2024-10-17" "\ \&" "\ \&" +.TH "MILLER" "1" "2024-11-23" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1335,7 +1335,7 @@ Options: Since the expression pieces are simply concatenated, please be sure to use intervening semicolons to separate expressions.) --s name=value: Predefines out-of-stream variable @name to have +-s name=value: Predefines out-of-stream variable @name to have Thus mlr put -s foo=97 '$column += @foo' is like mlr put 'begin {@foo = 97} $column += @foo'. The value part is subject to type-inferencing. @@ -1565,6 +1565,8 @@ for the old string and handling multiple matches, like the `gsub` DSL function. See also the `sub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. +-r {regex} Regular expression for field names to convert. +-a Convert all field names. -h|--help Show this message. .fi .if n \{\ @@ -1924,7 +1926,7 @@ Options: Since the expression pieces are simply concatenated, please be sure to use intervening semicolons to separate expressions.) --s name=value: Predefines out-of-stream variable @name to have +-s name=value: Predefines out-of-stream variable @name to have Thus mlr put -s foo=97 '$column += @foo' is like mlr put 'begin {@foo = 97} $column += @foo'. The value part is subject to type-inferencing. @@ -2386,6 +2388,8 @@ Replaces old string with new string in specified field(s), without regex support the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. Options: -f {a,b,c} Field names to convert. +-r {regex} Regular expression for field names to convert. +-a Convert all field names. -h|--help Show this message. .fi .if n \{\ @@ -2557,6 +2561,8 @@ for the old string and not handling multiple matches, like the `sub` DSL functio See also the `gsub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. +-r {regex} Regular expression for field names to convert. +-a Convert all field names. -h|--help Show this message. .fi .if n \{\ diff --git a/pkg/transformers/subs.go b/pkg/transformers/subs.go index 10468a280..59a8e92de 100644 --- a/pkg/transformers/subs.go +++ b/pkg/transformers/subs.go @@ -49,6 +49,8 @@ func transformerSubUsage( fmt.Fprintf(o, "See also the `gsub` and `ssub` verbs.\n") fmt.Fprintf(o, "Options:\n") fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") + fmt.Fprintf(o, "-r {regex} Regular expression for field names to convert.\n") + fmt.Fprintf(o, "-a Convert all field names.\n") fmt.Fprintf(o, "-h|--help Show this message.\n") } @@ -61,6 +63,8 @@ func transformerGsubUsage( fmt.Fprintf(o, "See also the `sub` and `ssub` verbs.\n") fmt.Fprintf(o, "Options:\n") fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") + fmt.Fprintf(o, "-r {regex} Regular expression for field names to convert.\n") + fmt.Fprintf(o, "-a Convert all field names.\n") fmt.Fprintf(o, "-h|--help Show this message.\n") } @@ -72,6 +76,8 @@ func transformerSsubUsage( fmt.Fprintf(o, "the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.\n") fmt.Fprintf(o, "Options:\n") fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") + fmt.Fprintf(o, "-r {regex} Regular expression for field names to convert.\n") + fmt.Fprintf(o, "-a Convert all field names.\n") fmt.Fprintf(o, "-h|--help Show this message.\n") } diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index bc366781e..c8d0af1bc 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -392,6 +392,8 @@ for the old string and handling multiple matches, like the `gsub` DSL function. See also the `sub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. +-r {regex} Regular expression for field names to convert. +-a Convert all field names. -h|--help Show this message. ================================================================ @@ -1058,6 +1060,8 @@ Replaces old string with new string in specified field(s), without regex support the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. Options: -f {a,b,c} Field names to convert. +-r {regex} Regular expression for field names to convert. +-a Convert all field names. -h|--help Show this message. ================================================================ @@ -1209,6 +1213,8 @@ for the old string and not handling multiple matches, like the `sub` DSL functio See also the `gsub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. +-r {regex} Regular expression for field names to convert. +-a Convert all field names. -h|--help Show this message. ================================================================ From a728524bf3d4a7fe5cd95574e76d0e22819a70e3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 09:19:57 -0500 Subject: [PATCH 284/456] Bump github.com/stretchr/testify from 1.9.0 to 1.10.0 (#1723) Bumps [github.com/stretchr/testify](https://github.com/stretchr/testify) from 1.9.0 to 1.10.0. - [Release notes](https://github.com/stretchr/testify/releases) - [Commits](https://github.com/stretchr/testify/compare/v1.9.0...v1.10.0) --- updated-dependencies: - dependency-name: github.com/stretchr/testify dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 292414dc6..c658134d4 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,7 @@ require ( github.com/mattn/go-isatty v0.0.20 github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 - github.com/stretchr/testify v1.9.0 + github.com/stretchr/testify v1.10.0 golang.org/x/sys v0.27.0 golang.org/x/term v0.26.0 golang.org/x/text v0.20.0 diff --git a/go.sum b/go.sum index bf3b2cb5c..9e9c18b19 100644 --- a/go.sum +++ b/go.sum @@ -33,8 +33,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= From 0614b37dfa6a8e061fb520fc99fd08eacf3787b6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 4 Dec 2024 08:51:27 -0500 Subject: [PATCH 285/456] Bump github/codeql-action from 3.27.5 to 3.27.6 (#1724) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.27.5 to 3.27.6. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/f09c1c0a94de965c15400f5634aa42fac8fb8f88...aa578102511db1f4524ed59b8cc2bae4f6e88195) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 016e37154..32060b52d 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@f09c1c0a94de965c15400f5634aa42fac8fb8f88 + uses: github/codeql-action/init@aa578102511db1f4524ed59b8cc2bae4f6e88195 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@f09c1c0a94de965c15400f5634aa42fac8fb8f88 + uses: github/codeql-action/autobuild@aa578102511db1f4524ed59b8cc2bae4f6e88195 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@f09c1c0a94de965c15400f5634aa42fac8fb8f88 + uses: github/codeql-action/analyze@aa578102511db1f4524ed59b8cc2bae4f6e88195 From e62a0b4b2054cb3513661ec143dc4bbbd9d9b720 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 Dec 2024 07:01:46 -0500 Subject: [PATCH 286/456] Bump golang.org/x/text from 0.20.0 to 0.21.0 (#1727) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.20.0 to 0.21.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.20.0...v0.21.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index c658134d4..71477976d 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/stretchr/testify v1.10.0 golang.org/x/sys v0.27.0 golang.org/x/term v0.26.0 - golang.org/x/text v0.20.0 + golang.org/x/text v0.21.0 ) require ( diff --git a/go.sum b/go.sum index 9e9c18b19..583a8c58e 100644 --- a/go.sum +++ b/go.sum @@ -41,8 +41,8 @@ golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= -golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= -golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From c01fe78fbd5e5a47f356988fb25dff64c0b2a717 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 Dec 2024 07:33:02 -0500 Subject: [PATCH 287/456] Bump golang.org/x/term from 0.26.0 to 0.27.0 (#1726) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.26.0 to 0.27.0. - [Commits](https://github.com/golang/term/compare/v0.26.0...v0.27.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 71477976d..64c05c434 100644 --- a/go.mod +++ b/go.mod @@ -26,8 +26,8 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.10.0 - golang.org/x/sys v0.27.0 - golang.org/x/term v0.26.0 + golang.org/x/sys v0.28.0 + golang.org/x/term v0.27.0 golang.org/x/text v0.21.0 ) diff --git a/go.sum b/go.sum index 583a8c58e..8132f7c0b 100644 --- a/go.sum +++ b/go.sum @@ -37,10 +37,10 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= -golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= -golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= +golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From 63654683f0e40443b7b46502c4afef6bdf526c96 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 6 Dec 2024 09:42:15 -0500 Subject: [PATCH 288/456] Bump actions/cache from 4.1.2 to 4.2.0 (#1728) Bumps [actions/cache](https://github.com/actions/cache) from 4.1.2 to 4.2.0. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/6849a6489940f00c2f30c0fb92c6274307ccb58a...1bd1e32a3bdc45362d1e726936510720a7c30a57) --- updated-dependencies: - dependency-name: actions/cache dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 515564be0..2e97d4aa3 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -29,7 +29,7 @@ jobs: # https://github.com/marketplace/actions/cache - name: Cache Go modules - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a + uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 with: path: | ~/.cache/go-build From 8bc3c5f645bb56e33e44054c0f93cf018579ea2b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 11 Dec 2024 08:07:28 -0500 Subject: [PATCH 289/456] Bump github/codeql-action from 3.27.6 to 3.27.7 (#1730) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.27.6 to 3.27.7. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/aa578102511db1f4524ed59b8cc2bae4f6e88195...babb554ede22fd5605947329c4d04d8e7a0b8155) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 32060b52d..11e3df105 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@aa578102511db1f4524ed59b8cc2bae4f6e88195 + uses: github/codeql-action/init@babb554ede22fd5605947329c4d04d8e7a0b8155 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@aa578102511db1f4524ed59b8cc2bae4f6e88195 + uses: github/codeql-action/autobuild@babb554ede22fd5605947329c4d04d8e7a0b8155 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@aa578102511db1f4524ed59b8cc2bae4f6e88195 + uses: github/codeql-action/analyze@babb554ede22fd5605947329c4d04d8e7a0b8155 From dde2cd20a70ddbf9a584209329e7e25d1bca040d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 11 Dec 2024 08:09:33 -0500 Subject: [PATCH 290/456] Bump actions/setup-go from 5.1.0 to 5.2.0 (#1729) Bumps [actions/setup-go](https://github.com/actions/setup-go) from 5.1.0 to 5.2.0. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed...3041bf56c941b39c61721a86cd11f3bb1338122a) --- updated-dependencies: - dependency-name: actions/setup-go dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index e06b0e8f7..ec18c710a 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -18,7 +18,7 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - name: Set up Go - uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed + uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a with: go-version: 1.19 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2e97d4aa3..0f2f27cf1 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,7 +17,7 @@ jobs: runs-on: ${{ matrix.platform }} steps: - name: Set up Go - uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed + uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a with: go-version: ${{ env.GO_VERSION }} id: go From 929a2357d04e702a4def8ea7b1597dd566acfda6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 13 Dec 2024 08:07:35 -0500 Subject: [PATCH 291/456] Bump github/codeql-action from 3.27.7 to 3.27.9 (#1731) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.27.7 to 3.27.9. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/babb554ede22fd5605947329c4d04d8e7a0b8155...df409f7d9260372bd5f19e5b04e83cb3c43714ae) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 11e3df105..21e02a0b6 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@babb554ede22fd5605947329c4d04d8e7a0b8155 + uses: github/codeql-action/init@df409f7d9260372bd5f19e5b04e83cb3c43714ae with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@babb554ede22fd5605947329c4d04d8e7a0b8155 + uses: github/codeql-action/autobuild@df409f7d9260372bd5f19e5b04e83cb3c43714ae # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@babb554ede22fd5605947329c4d04d8e7a0b8155 + uses: github/codeql-action/analyze@df409f7d9260372bd5f19e5b04e83cb3c43714ae From 06e33c0f82b8872dc82ed21216e32175e83d01d5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 18 Dec 2024 08:34:21 -0500 Subject: [PATCH 292/456] Bump actions/upload-artifact from 4.4.3 to 4.5.0 (#1732) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.4.3 to 4.5.0. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882...6f51ac03b9356f520e9adb1b1b7802705f340c2b) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index ec18c710a..e546a3410 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -41,7 +41,7 @@ jobs: if: matrix.os == 'windows-latest' run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}} - - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 + - uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b with: name: mlr-${{matrix.os}} path: bin/${{matrix.os}}/* From 8088850505767794e2e9cd6f3b442e4b20ddba1e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 Dec 2024 09:26:34 -0500 Subject: [PATCH 293/456] Bump github/codeql-action from 3.27.9 to 3.28.0 (#1734) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.27.9 to 3.28.0. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/df409f7d9260372bd5f19e5b04e83cb3c43714ae...48ab28a6f5dbc2a99bf1e0131198dd8f1df78169) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 21e02a0b6..1fbaf629d 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@df409f7d9260372bd5f19e5b04e83cb3c43714ae + uses: github/codeql-action/init@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@df409f7d9260372bd5f19e5b04e83cb3c43714ae + uses: github/codeql-action/autobuild@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@df409f7d9260372bd5f19e5b04e83cb3c43714ae + uses: github/codeql-action/analyze@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 From cc1cd954eac65f4421bb81d6b6a7a110481b1f61 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 23 Dec 2024 12:27:08 -0500 Subject: [PATCH 294/456] Fix unflatten with field names like `.` `.x` or `x..y` (#1735) * Fix unflatten with field name like `.` `.x` or `x..y` * docs & test data --- docs/src/data/flatten-dots.csv | 2 + docs/src/flatten-unflatten.md | 53 +++++++++++++++++ docs/src/flatten-unflatten.md.in | 27 +++++++++ docs/src/manpage.md | 10 ++-- docs/src/manpage.txt | 10 ++-- docs/src/reference-main-flag-list.md | 6 +- man/manpage.txt | 10 ++-- man/mlr.1 | 12 ++-- pkg/cli/option_parse.go | 6 +- pkg/mlrval/mlrmap_flatten_unflatten.go | 57 ++++++++++++++----- test/cases/verb-flatten-unflatten/0011/expout | 9 ++- test/input/unflatten-input-2.xtab | 5 ++ 12 files changed, 164 insertions(+), 43 deletions(-) create mode 100644 docs/src/data/flatten-dots.csv diff --git a/docs/src/data/flatten-dots.csv b/docs/src/data/flatten-dots.csv new file mode 100644 index 000000000..6a7947149 --- /dev/null +++ b/docs/src/data/flatten-dots.csv @@ -0,0 +1,2 @@ +a,b.,.c,.,d..e,f.g +1,2,3,4,5,6 diff --git a/docs/src/flatten-unflatten.md b/docs/src/flatten-unflatten.md index 7a3c138d2..ff428ca39 100644 --- a/docs/src/flatten-unflatten.md +++ b/docs/src/flatten-unflatten.md @@ -348,6 +348,59 @@ a.1,a.3,a.5 ] +## Non-inferencing cases + +An additional heuristic is that if a field name starts with a `.`, ends with +a `.`, or has two or more consecutive `.` characters, no attempt is made +to unflatten it on conversion from non-JSON to JSON. + +
+cat data/flatten-dots.csv
+
+
+a,b.,.c,.,d..e,f.g
+1,2,3,4,5,6
+
+ +
+mlr --icsv --oxtab cat data/flatten-dots.csv
+
+
+a    1
+b.   2
+.c   3
+.    4
+d..e 5
+f.g  6
+
+ +
+mlr --icsv --ojson cat data/flatten-dots.csv
+
+
+[
+{
+  "a": 1,
+  "b.": 2,
+  ".c": 3,
+  ".": 4,
+  "d..e": 5,
+  "f": {
+    "g": 6
+  }
+}
+]
+
+ +## Non-inferencing cases + +An additional heuristic is that if a field name starts with a `.`, ends with +a `.`, or has two or more consecutive `.` characters, no attempt is made +to unflatten it on conversion from non-JSON to JSON. + +## Manual control + + ## Manual control To see what our options are for manually controlling flattening and diff --git a/docs/src/flatten-unflatten.md.in b/docs/src/flatten-unflatten.md.in index 68033d594..152efadba 100644 --- a/docs/src/flatten-unflatten.md.in +++ b/docs/src/flatten-unflatten.md.in @@ -156,6 +156,33 @@ GENMD-RUN-COMMAND mlr --c2j cat data/non-consecutive.csv GENMD-EOF +## Non-inferencing cases + +An additional heuristic is that if a field name starts with a `.`, ends with +a `.`, or has two or more consecutive `.` characters, no attempt is made +to unflatten it on conversion from non-JSON to JSON. + +GENMD-RUN-COMMAND +cat data/flatten-dots.csv +GENMD-EOF + +GENMD-RUN-COMMAND +mlr --icsv --oxtab cat data/flatten-dots.csv +GENMD-EOF + +GENMD-RUN-COMMAND +mlr --icsv --ojson cat data/flatten-dots.csv +GENMD-EOF + +## Non-inferencing cases + +An additional heuristic is that if a field name starts with a `.`, ends with +a `.`, or has two or more consecutive `.` characters, no attempt is made +to unflatten it on conversion from non-JSON to JSON. + +## Manual control + + ## Manual control To see what our options are for manually controlling flattening and diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 2092d8abf..b9af6e51c 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -424,7 +424,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p 1mFLATTEN-UNFLATTEN FLAGS0m These flags control how Miller converts record values which are maps or arrays, when input is JSON and output is non-JSON (flattening) or input is non-JSON and output is JSON (unflattening). - See the Flatten/unflatten doc page for more information. + See the flatten/unflatten doc page https://miller.readthedocs.io/en/latest/flatten-unflatten for more information. --flatsep or --jflatsep {string} Separator for flattening multi-level JSON keys, e.g. @@ -435,10 +435,10 @@ This is simply a copy of what you should see on running `man mlr` at a command p then this flattens to `y.1=7,y.2=8,y.3=9, and similarly for maps. With `--no-auto-flatten`, instead we get `$y=[1, 2, 3]`. - --no-auto-unflatten When input non-JSON and output is JSON, suppress the - default auto-unflatten behavior. Default: if the + --no-auto-unflatten When input is non-JSON and output is JSON, suppress + the default auto-unflatten behavior. Default: if the input has `y.1=7,y.2=8,y.3=9` then this unflattens to - `$y=[7,8,9]`. flattens to `y.1=7,y.2=8,y.3=9. With + `$y=[7,8,9]`. flattens to `y.1=7,y.2=8,y.3=91. With `--no-auto-flatten`, instead we get `${y.1}=7,${y.2}=8,${y.3}=9`. @@ -3737,5 +3737,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-11-23 4mMILLER24m(1) + 2024-12-23 4mMILLER24m(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 35680317d..aa0b21b9b 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -403,7 +403,7 @@ 1mFLATTEN-UNFLATTEN FLAGS0m These flags control how Miller converts record values which are maps or arrays, when input is JSON and output is non-JSON (flattening) or input is non-JSON and output is JSON (unflattening). - See the Flatten/unflatten doc page for more information. + See the flatten/unflatten doc page https://miller.readthedocs.io/en/latest/flatten-unflatten for more information. --flatsep or --jflatsep {string} Separator for flattening multi-level JSON keys, e.g. @@ -414,10 +414,10 @@ then this flattens to `y.1=7,y.2=8,y.3=9, and similarly for maps. With `--no-auto-flatten`, instead we get `$y=[1, 2, 3]`. - --no-auto-unflatten When input non-JSON and output is JSON, suppress the - default auto-unflatten behavior. Default: if the + --no-auto-unflatten When input is non-JSON and output is JSON, suppress + the default auto-unflatten behavior. Default: if the input has `y.1=7,y.2=8,y.3=9` then this unflattens to - `$y=[7,8,9]`. flattens to `y.1=7,y.2=8,y.3=9. With + `$y=[7,8,9]`. flattens to `y.1=7,y.2=8,y.3=91. With `--no-auto-flatten`, instead we get `${y.1}=7,${y.2}=8,${y.3}=9`. @@ -3716,4 +3716,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-11-23 4mMILLER24m(1) + 2024-12-23 4mMILLER24m(1) diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index fdea7b253..7258cce08 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -195,14 +195,14 @@ are overridden in all cases by setting output format to `format2`. These flags control how Miller converts record values which are maps or arrays, when input is JSON and output is non-JSON (flattening) or input is non-JSON and output is JSON (unflattening). -See the Flatten/unflatten doc page for more information. +See the flatten/unflatten doc page https://miller.readthedocs.io/en/latest/flatten-unflatten for more information. **Flags:** * `--flatsep or --jflatsep {string}`: Separator for flattening multi-level JSON keys, e.g. `{"a":{"b":3}}` becomes `a:b => 3` for non-JSON formats. Defaults to `.`. -* `--no-auto-flatten`: When output is non-JSON, suppress the default auto-flatten behavior. Default: if `$y = [7,8,9]` then this flattens to `y.1=7,y.2=8,y.3=9, and similarly for maps. With `--no-auto-flatten`, instead we get `$y=[1, 2, 3]`. -* `--no-auto-unflatten`: When input non-JSON and output is JSON, suppress the default auto-unflatten behavior. Default: if the input has `y.1=7,y.2=8,y.3=9` then this unflattens to `$y=[7,8,9]`. flattens to `y.1=7,y.2=8,y.3=9. With `--no-auto-flatten`, instead we get `${y.1}=7,${y.2}=8,${y.3}=9`. +* `--no-auto-flatten`: When output is non-JSON, suppress the default auto-flatten behavior. Default: if `$y = [7,8,9]` then this flattens to `y.1=7,y.2=8,y.3=9`, and similarly for maps. With `--no-auto-flatten`, instead we get `$y=[1, 2, 3]`. +* `--no-auto-unflatten`: When input is non-JSON and output is JSON, suppress the default auto-unflatten behavior. Default: if the input has `y.1=7,y.2=8,y.3=9` then this unflattens to `$y=[7,8,9]`. With `--no-auto-flatten`, instead we get `${y.1}=7,${y.2}=8,${y.3}=9`. ## Format-conversion keystroke-saver flags diff --git a/man/manpage.txt b/man/manpage.txt index 35680317d..aa0b21b9b 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -403,7 +403,7 @@ 1mFLATTEN-UNFLATTEN FLAGS0m These flags control how Miller converts record values which are maps or arrays, when input is JSON and output is non-JSON (flattening) or input is non-JSON and output is JSON (unflattening). - See the Flatten/unflatten doc page for more information. + See the flatten/unflatten doc page https://miller.readthedocs.io/en/latest/flatten-unflatten for more information. --flatsep or --jflatsep {string} Separator for flattening multi-level JSON keys, e.g. @@ -414,10 +414,10 @@ then this flattens to `y.1=7,y.2=8,y.3=9, and similarly for maps. With `--no-auto-flatten`, instead we get `$y=[1, 2, 3]`. - --no-auto-unflatten When input non-JSON and output is JSON, suppress the - default auto-unflatten behavior. Default: if the + --no-auto-unflatten When input is non-JSON and output is JSON, suppress + the default auto-unflatten behavior. Default: if the input has `y.1=7,y.2=8,y.3=9` then this unflattens to - `$y=[7,8,9]`. flattens to `y.1=7,y.2=8,y.3=9. With + `$y=[7,8,9]`. flattens to `y.1=7,y.2=8,y.3=91. With `--no-auto-flatten`, instead we get `${y.1}=7,${y.2}=8,${y.3}=9`. @@ -3716,4 +3716,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-11-23 4mMILLER24m(1) + 2024-12-23 4mMILLER24m(1) diff --git a/man/mlr.1 b/man/mlr.1 index 67cc66732..967d53e31 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2024-11-23 +.\" Date: 2024-12-23 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2024-11-23" "\ \&" "\ \&" +.TH "MILLER" "1" "2024-12-23" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -492,7 +492,7 @@ are overridden in all cases by setting output format to `format2`. .nf These flags control how Miller converts record values which are maps or arrays, when input is JSON and output is non-JSON (flattening) or input is non-JSON and output is JSON (unflattening). -See the Flatten/unflatten doc page for more information. +See the flatten/unflatten doc page https://miller.readthedocs.io/en/latest/flatten-unflatten for more information. --flatsep or --jflatsep {string} Separator for flattening multi-level JSON keys, e.g. @@ -503,10 +503,10 @@ See the Flatten/unflatten doc page for more information. then this flattens to `y.1=7,y.2=8,y.3=9, and similarly for maps. With `--no-auto-flatten`, instead we get `$y=[1, 2, 3]`. ---no-auto-unflatten When input non-JSON and output is JSON, suppress the - default auto-unflatten behavior. Default: if the +--no-auto-unflatten When input is non-JSON and output is JSON, suppress + the default auto-unflatten behavior. Default: if the input has `y.1=7,y.2=8,y.3=9` then this unflattens to - `$y=[7,8,9]`. flattens to `y.1=7,y.2=8,y.3=9. With + `$y=[7,8,9]`. flattens to `y.1=7,y.2=8,y.3=91. With `--no-auto-flatten`, instead we get `${y.1}=7,${y.2}=8,${y.3}=9`. .fi diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go index 34db19a77..41be332b6 100644 --- a/pkg/cli/option_parse.go +++ b/pkg/cli/option_parse.go @@ -2877,7 +2877,7 @@ var OutputColorizationFlagSection = FlagSection{ func FlattenUnflattenPrintInfo() { fmt.Println("These flags control how Miller converts record values which are maps or arrays, when input is JSON and output is non-JSON (flattening) or input is non-JSON and output is JSON (unflattening).") fmt.Println() - fmt.Println("See the Flatten/unflatten doc page for more information.") + fmt.Println("See the flatten/unflatten doc page https://miller.readthedocs.io/en/latest/flatten-unflatten for more information.") } func init() { FlattenUnflattenFlagSection.Sort() } @@ -2901,7 +2901,7 @@ var FlattenUnflattenFlagSection = FlagSection{ { name: "--no-auto-flatten", - help: "When output is non-JSON, suppress the default auto-flatten behavior. Default: if `$y = [7,8,9]` then this flattens to `y.1=7,y.2=8,y.3=9, and similarly for maps. With `--no-auto-flatten`, instead we get `$y=[1, 2, 3]`.", + help: "When output is non-JSON, suppress the default auto-flatten behavior. Default: if `$y = [7,8,9]` then this flattens to `y.1=7,y.2=8,y.3=9`, and similarly for maps. With `--no-auto-flatten`, instead we get `$y=[1, 2, 3]`.", parser: func(args []string, argc int, pargi *int, options *TOptions) { options.WriterOptions.AutoFlatten = false *pargi += 1 @@ -2910,7 +2910,7 @@ var FlattenUnflattenFlagSection = FlagSection{ { name: "--no-auto-unflatten", - help: "When input non-JSON and output is JSON, suppress the default auto-unflatten behavior. Default: if the input has `y.1=7,y.2=8,y.3=9` then this unflattens to `$y=[7,8,9]`. flattens to `y.1=7,y.2=8,y.3=9. With `--no-auto-flatten`, instead we get `${y.1}=7,${y.2}=8,${y.3}=9`.", + help: "When input is non-JSON and output is JSON, suppress the default auto-unflatten behavior. Default: if the input has `y.1=7,y.2=8,y.3=9` then this unflattens to `$y=[7,8,9]`. With `--no-auto-flatten`, instead we get `${y.1}=7,${y.2}=8,${y.3}=9`.", parser: func(args []string, argc int, pargi *int, options *TOptions) { options.WriterOptions.AutoUnflatten = false *pargi += 1 diff --git a/pkg/mlrval/mlrmap_flatten_unflatten.go b/pkg/mlrval/mlrmap_flatten_unflatten.go index 579522f22..4e5d117d2 100644 --- a/pkg/mlrval/mlrmap_flatten_unflatten.go +++ b/pkg/mlrval/mlrmap_flatten_unflatten.go @@ -106,7 +106,18 @@ func (mlrmap *Mlrmap) isFlattenable() bool { // For mlr unflatten without -f. This undoes Unflatten. This is for conversion // from non-JSON to JSON. If there are fields x.a, x.b, x.c, etc. they're put // into a single field x with map-valued value keyed by "a", "b", "c". - +// +// There is a heurtistic here though. Miller is (wildly) multi-format and needs +// to accommodate all manner of data. In the JSON world, "." is the default +// delimiter for nested data, and we're here to handle that. But in the R world, +// "." is just like "_" in other languages: witness "data.frame" rather than +// "data_frame". If the "." was intended as punctuation, in a say a field named +// "a.b" with value 3, then unflatten-to-JSON will make `{"a": {"b": 3}}`. This +// is just our default behavior; users can use --no-auto-unflatten. Weirder +// are field names like ".", ".x", "x.", "x..y", etc. The heuristic here +// is that when we split on "." and any of the pieces around/between the dots +// are empty string, we don't try to unflatten that field. +// // Special case: if the resulting string keys are string representations of 1, // 2, 3, etc -- without gaps -- then the map is converted to an array. // @@ -134,22 +145,38 @@ func (mlrmap *Mlrmap) CopyUnflattened( // We'll come through this loop once for x.a, another for x.b, etc. for pe := mlrmap.Head; pe != nil; pe = pe.Next { - // Is the field name something dot something? - if strings.Contains(pe.Key, separator) { - arrayOfIndices := SplitAXHelper(pe.Key, separator) - arrayval := arrayOfIndices.intf.([]*Mlrval) - lib.InternalCodingErrorIf(len(arrayval) < 1) - // If the input field name was "x.a" then remember the "x". - baseIndex := arrayval[0].String() - affectedBaseIndices[baseIndex] = true - // Use PutIndexed to assign $x["a"] = 7, or $x["b"] = 8, etc. - other.PutIndexed( - CopyMlrvalArray(arrayval), - unflattenTerminal(pe.Value).Copy(), - ) - } else { + // If there are no dots in the field name, treat it as a terminal. + if !strings.Contains(pe.Key, separator) { other.PutReference(pe.Key, unflattenTerminal(pe.Value)) + continue } + + arrayOfIndices := SplitAXHelper(pe.Key, separator) + arrayval := arrayOfIndices.intf.([]*Mlrval) + lib.InternalCodingErrorIf(len(arrayval) < 1) + + // Check for "" in any of the split pieces; treat the field as terminal if so. + legitDots := true + for i, _ := range arrayval { + piece := arrayval[i].String() + if piece == "" { + legitDots = false + break + } + } + if !legitDots { + other.PutReference(pe.Key, unflattenTerminal(pe.Value)) + continue + } + + // If the input field name was "x.a" then remember the "x". + baseIndex := arrayval[0].String() + affectedBaseIndices[baseIndex] = true + // Use PutIndexed to assign $x["a"] = 7, or $x["b"] = 8, etc. + other.PutIndexed( + CopyMlrvalArray(arrayval), + unflattenTerminal(pe.Value).Copy(), + ) } // Go through all the field names which were turned into maps -- e.g. "x" diff --git a/test/cases/verb-flatten-unflatten/0011/expout b/test/cases/verb-flatten-unflatten/0011/expout index 9a45bc186..18f737223 100644 --- a/test/cases/verb-flatten-unflatten/0011/expout +++ b/test/cases/verb-flatten-unflatten/0011/expout @@ -24,6 +24,13 @@ "wrapper": { "empty3": {}, "emtpy4": [] - } + }, + "x": { + "y": 1 + }, + "@": 2, + "x@": 3, + "@y": 4, + "x@@y": 5 } ] diff --git a/test/input/unflatten-input-2.xtab b/test/input/unflatten-input-2.xtab index 97b1941e1..21ea4bd2b 100644 --- a/test/input/unflatten-input-2.xtab +++ b/test/input/unflatten-input-2.xtab @@ -13,3 +13,8 @@ empty1 {} empty2 [] wrapper@empty3 {} wrapper@emtpy4 [] +x@y 1 +@ 2 +x@ 3 +@y 4 +x@@y 5 From 0060cceafc57fefe7673eff9670e5684adee2de5 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 23 Dec 2024 14:19:51 -0500 Subject: [PATCH 295/456] Fix section-title typos for docs in #1735 (#1736) * fix typo in flatten/unflatten doc section titles * run `make docs` --- docs/src/flatten-unflatten.md | 9 --------- docs/src/flatten-unflatten.md.in | 9 --------- 2 files changed, 18 deletions(-) diff --git a/docs/src/flatten-unflatten.md b/docs/src/flatten-unflatten.md index ff428ca39..da0e817db 100644 --- a/docs/src/flatten-unflatten.md +++ b/docs/src/flatten-unflatten.md @@ -392,15 +392,6 @@ f.g 6 ] -## Non-inferencing cases - -An additional heuristic is that if a field name starts with a `.`, ends with -a `.`, or has two or more consecutive `.` characters, no attempt is made -to unflatten it on conversion from non-JSON to JSON. - -## Manual control - - ## Manual control To see what our options are for manually controlling flattening and diff --git a/docs/src/flatten-unflatten.md.in b/docs/src/flatten-unflatten.md.in index 152efadba..951ea1f58 100644 --- a/docs/src/flatten-unflatten.md.in +++ b/docs/src/flatten-unflatten.md.in @@ -174,15 +174,6 @@ GENMD-RUN-COMMAND mlr --icsv --ojson cat data/flatten-dots.csv GENMD-EOF -## Non-inferencing cases - -An additional heuristic is that if a field name starts with a `.`, ends with -a `.`, or has two or more consecutive `.` characters, no attempt is made -to unflatten it on conversion from non-JSON to JSON. - -## Manual control - - ## Manual control To see what our options are for manually controlling flattening and From 6287b04fa8737677071777529bc3319be9e13166 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 09:03:18 -0500 Subject: [PATCH 296/456] Bump golang.org/x/sys from 0.28.0 to 0.29.0 (#1738) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.28.0 to 0.29.0. - [Commits](https://github.com/golang/sys/compare/v0.28.0...v0.29.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 64c05c434..535059313 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.10.0 - golang.org/x/sys v0.28.0 + golang.org/x/sys v0.29.0 golang.org/x/term v0.27.0 golang.org/x/text v0.21.0 ) diff --git a/go.sum b/go.sum index 8132f7c0b..df32bd92f 100644 --- a/go.sum +++ b/go.sum @@ -37,8 +37,8 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= -golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= +golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= From a83470d16c3caa295eeba2d04aaf1d71a70a3f18 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 09:13:45 -0500 Subject: [PATCH 297/456] Bump golang.org/x/term from 0.27.0 to 0.28.0 (#1737) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.27.0 to 0.28.0. - [Commits](https://github.com/golang/term/compare/v0.27.0...v0.28.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 535059313..6b3c64ec1 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.10.0 golang.org/x/sys v0.29.0 - golang.org/x/term v0.27.0 + golang.org/x/term v0.28.0 golang.org/x/text v0.21.0 ) diff --git a/go.sum b/go.sum index df32bd92f..5a052c3a8 100644 --- a/go.sum +++ b/go.sum @@ -39,8 +39,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= -golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= +golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg= +golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek= golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From 9bf883233eec95c3ca5d91be86b444c2473bb83f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 10 Jan 2025 09:42:51 -0500 Subject: [PATCH 298/456] Bump actions/upload-artifact from 4.5.0 to 4.6.0 (#1739) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.5.0 to 4.6.0. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/6f51ac03b9356f520e9adb1b1b7802705f340c2b...65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index e546a3410..c63b0b0b0 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -41,7 +41,7 @@ jobs: if: matrix.os == 'windows-latest' run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}} - - uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b + - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 with: name: mlr-${{matrix.os}} path: bin/${{matrix.os}}/* From 2b6fa35388443721300bb639cf5ae3a4fe7fd39b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 Jan 2025 08:53:46 -0500 Subject: [PATCH 299/456] Bump github/codeql-action from 3.28.0 to 3.28.1 (#1740) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.0 to 3.28.1. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/48ab28a6f5dbc2a99bf1e0131198dd8f1df78169...b6a472f63d85b9c78a3ac5e89422239fc15e9b3c) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 1fbaf629d..0e4de9f83 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 + uses: github/codeql-action/init@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 + uses: github/codeql-action/autobuild@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 + uses: github/codeql-action/analyze@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c From e3a1e833f09e7f886726f54198b33416f02d0121 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 21 Jan 2025 08:34:39 -0500 Subject: [PATCH 300/456] Bump actions/setup-go from 5.2.0 to 5.3.0 (#1741) Bumps [actions/setup-go](https://github.com/actions/setup-go) from 5.2.0 to 5.3.0. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/3041bf56c941b39c61721a86cd11f3bb1338122a...f111f3307d8850f501ac008e886eec1fd1932a34) --- updated-dependencies: - dependency-name: actions/setup-go dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index c63b0b0b0..0d7707755 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -18,7 +18,7 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - name: Set up Go - uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a + uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 with: go-version: 1.19 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0f2f27cf1..88a639b7a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,7 +17,7 @@ jobs: runs-on: ${{ matrix.platform }} steps: - name: Set up Go - uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a + uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 with: go-version: ${{ env.GO_VERSION }} id: go From ce3123b3fae65c14748915bb5499431767a0f1ae Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 22 Jan 2025 09:11:00 -0500 Subject: [PATCH 301/456] Bump github/codeql-action from 3.28.1 to 3.28.2 (#1742) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.1 to 3.28.2. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/b6a472f63d85b9c78a3ac5e89422239fc15e9b3c...d68b2d4edb4189fd2a5366ac14e72027bd4b37dd) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 0e4de9f83..e0d78cf80 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c + uses: github/codeql-action/init@d68b2d4edb4189fd2a5366ac14e72027bd4b37dd with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c + uses: github/codeql-action/autobuild@d68b2d4edb4189fd2a5366ac14e72027bd4b37dd # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c + uses: github/codeql-action/analyze@d68b2d4edb4189fd2a5366ac14e72027bd4b37dd From 3738b617aeaf4e4c170b2dc5a07f7e9cc66708a7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 23 Jan 2025 09:27:59 -0500 Subject: [PATCH 302/456] Bump github/codeql-action from 3.28.2 to 3.28.3 (#1743) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.2 to 3.28.3. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/d68b2d4edb4189fd2a5366ac14e72027bd4b37dd...dd196fa9ce80b6bacc74ca1c32bd5b0ba22efca7) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index e0d78cf80..2184829f9 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@d68b2d4edb4189fd2a5366ac14e72027bd4b37dd + uses: github/codeql-action/init@dd196fa9ce80b6bacc74ca1c32bd5b0ba22efca7 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@d68b2d4edb4189fd2a5366ac14e72027bd4b37dd + uses: github/codeql-action/autobuild@dd196fa9ce80b6bacc74ca1c32bd5b0ba22efca7 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@d68b2d4edb4189fd2a5366ac14e72027bd4b37dd + uses: github/codeql-action/analyze@dd196fa9ce80b6bacc74ca1c32bd5b0ba22efca7 From cf458f0230787d9eae77eb881ce6d47ca8ecaf0e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 Jan 2025 09:47:39 -0500 Subject: [PATCH 303/456] Bump github/codeql-action from 3.28.3 to 3.28.4 (#1744) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.3 to 3.28.4. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/dd196fa9ce80b6bacc74ca1c32bd5b0ba22efca7...ee117c905ab18f32fa0f66c2fe40ecc8013f3e04) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 2184829f9..48433311c 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@dd196fa9ce80b6bacc74ca1c32bd5b0ba22efca7 + uses: github/codeql-action/init@ee117c905ab18f32fa0f66c2fe40ecc8013f3e04 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@dd196fa9ce80b6bacc74ca1c32bd5b0ba22efca7 + uses: github/codeql-action/autobuild@ee117c905ab18f32fa0f66c2fe40ecc8013f3e04 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@dd196fa9ce80b6bacc74ca1c32bd5b0ba22efca7 + uses: github/codeql-action/analyze@ee117c905ab18f32fa0f66c2fe40ecc8013f3e04 From 107e57e3e4a053b674dd9ec3fdccd0aec9bc3a17 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Jan 2025 09:26:00 -0500 Subject: [PATCH 304/456] Bump github/codeql-action from 3.28.4 to 3.28.5 (#1746) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.4 to 3.28.5. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/ee117c905ab18f32fa0f66c2fe40ecc8013f3e04...f6091c0113d1dcf9b98e269ee48e8a7e51b7bdd4) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 48433311c..12ae5be5e 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@ee117c905ab18f32fa0f66c2fe40ecc8013f3e04 + uses: github/codeql-action/init@f6091c0113d1dcf9b98e269ee48e8a7e51b7bdd4 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@ee117c905ab18f32fa0f66c2fe40ecc8013f3e04 + uses: github/codeql-action/autobuild@f6091c0113d1dcf9b98e269ee48e8a7e51b7bdd4 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@ee117c905ab18f32fa0f66c2fe40ecc8013f3e04 + uses: github/codeql-action/analyze@f6091c0113d1dcf9b98e269ee48e8a7e51b7bdd4 From 70c485695c8846fd00f8bd6e94a4154d943363fe Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 28 Jan 2025 09:11:58 -0500 Subject: [PATCH 305/456] Bump github/codeql-action from 3.28.5 to 3.28.6 (#1747) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.5 to 3.28.6. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/f6091c0113d1dcf9b98e269ee48e8a7e51b7bdd4...17a820bf2e43b47be2c72b39cc905417bc1ab6d0) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 12ae5be5e..06b3dd29a 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@f6091c0113d1dcf9b98e269ee48e8a7e51b7bdd4 + uses: github/codeql-action/init@17a820bf2e43b47be2c72b39cc905417bc1ab6d0 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@f6091c0113d1dcf9b98e269ee48e8a7e51b7bdd4 + uses: github/codeql-action/autobuild@17a820bf2e43b47be2c72b39cc905417bc1ab6d0 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@f6091c0113d1dcf9b98e269ee48e8a7e51b7bdd4 + uses: github/codeql-action/analyze@17a820bf2e43b47be2c72b39cc905417bc1ab6d0 From 813a5204dc08e7d672b804c2d0b63b4614e98719 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 30 Jan 2025 09:17:09 -0500 Subject: [PATCH 306/456] Bump github/codeql-action from 3.28.6 to 3.28.8 (#1748) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.6 to 3.28.8. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/17a820bf2e43b47be2c72b39cc905417bc1ab6d0...dd746615b3b9d728a6a37ca2045b68ca76d4841a) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 06b3dd29a..a578a7da7 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@17a820bf2e43b47be2c72b39cc905417bc1ab6d0 + uses: github/codeql-action/init@dd746615b3b9d728a6a37ca2045b68ca76d4841a with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@17a820bf2e43b47be2c72b39cc905417bc1ab6d0 + uses: github/codeql-action/autobuild@dd746615b3b9d728a6a37ca2045b68ca76d4841a # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@17a820bf2e43b47be2c72b39cc905417bc1ab6d0 + uses: github/codeql-action/analyze@dd746615b3b9d728a6a37ca2045b68ca76d4841a From 6bed7bb5606b7fb5929ecb83594d704d5a0f61bd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 5 Feb 2025 07:58:10 -0500 Subject: [PATCH 307/456] Bump golang.org/x/text from 0.21.0 to 0.22.0 (#1752) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.21.0 to 0.22.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.21.0...v0.22.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 6b3c64ec1..2bbf54643 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/stretchr/testify v1.10.0 golang.org/x/sys v0.29.0 golang.org/x/term v0.28.0 - golang.org/x/text v0.21.0 + golang.org/x/text v0.22.0 ) require ( diff --git a/go.sum b/go.sum index 5a052c3a8..2dbf6538f 100644 --- a/go.sum +++ b/go.sum @@ -41,8 +41,8 @@ golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg= golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek= -golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= -golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= +golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From 225072384a195d178e897f32ed258e7513dfb509 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 5 Feb 2025 08:30:24 -0500 Subject: [PATCH 308/456] Bump golang.org/x/term from 0.28.0 to 0.29.0 (#1751) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.28.0 to 0.29.0. - [Commits](https://github.com/golang/term/compare/v0.28.0...v0.29.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 2bbf54643..8d843cc61 100644 --- a/go.mod +++ b/go.mod @@ -26,8 +26,8 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.10.0 - golang.org/x/sys v0.29.0 - golang.org/x/term v0.28.0 + golang.org/x/sys v0.30.0 + golang.org/x/term v0.29.0 golang.org/x/text v0.22.0 ) diff --git a/go.sum b/go.sum index 2dbf6538f..32aae654a 100644 --- a/go.sum +++ b/go.sum @@ -37,10 +37,10 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= -golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg= -golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek= +golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= +golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU= +golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s= golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From bd2497a2854de5aa87928b669ef65839b972dde7 Mon Sep 17 00:00:00 2001 From: Michel Lind Date: Wed, 5 Feb 2025 14:32:23 +0100 Subject: [PATCH 309/456] Fix non-constant format string errors with Go 1.24 (#1745) Use `errors.New` instead of `fmt.Errorf` and `fmt.Fprint` instead of `fmt.Fprintf` if a non-constant string is used Signed-off-by: Michel Lind --- pkg/dsl/cst/builtin_functions.go | 3 ++- pkg/dsl/cst/lvalues.go | 3 ++- pkg/dsl/cst/root.go | 5 +++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pkg/dsl/cst/builtin_functions.go b/pkg/dsl/cst/builtin_functions.go index e3558d1cd..12c16ea45 100644 --- a/pkg/dsl/cst/builtin_functions.go +++ b/pkg/dsl/cst/builtin_functions.go @@ -5,6 +5,7 @@ package cst import ( + "errors" "fmt" "github.com/johnkerl/miller/v6/pkg/bifs" @@ -78,7 +79,7 @@ func (root *RootNode) BuildMultipleArityFunctionCallsiteNode( return root.BuildTernaryFunctionCallsiteNode(astNode, builtinFunctionInfo) } - return nil, fmt.Errorf( + return nil, errors.New( "at CST BuildMultipleArityFunctionCallsiteNode: function name not found: " + builtinFunctionInfo.name, ) diff --git a/pkg/dsl/cst/lvalues.go b/pkg/dsl/cst/lvalues.go index b680644cb..799d8801e 100644 --- a/pkg/dsl/cst/lvalues.go +++ b/pkg/dsl/cst/lvalues.go @@ -6,6 +6,7 @@ package cst import ( + "errors" "fmt" "os" @@ -62,7 +63,7 @@ func (root *RootNode) BuildAssignableNode( return root.BuildEnvironmentVariableLvalueNode(astNode) } - return nil, fmt.Errorf( + return nil, errors.New( "at CST BuildAssignableNode: unhandled AST node " + string(astNode.Type), ) } diff --git a/pkg/dsl/cst/root.go b/pkg/dsl/cst/root.go index a1c5b0b99..e6d2de59a 100644 --- a/pkg/dsl/cst/root.go +++ b/pkg/dsl/cst/root.go @@ -7,6 +7,7 @@ package cst import ( "container/list" + "errors" "fmt" "os" "strings" @@ -163,7 +164,7 @@ func (root *RootNode) IngestAST( err = nil if ast.RootNode == nil { - return hadWarnings, fmt.Errorf("cannot build CST from nil AST root") + return hadWarnings, errors.New("cannot build CST from nil AST root") } // Check for things that are syntax errors but not done in the AST for @@ -417,7 +418,7 @@ func (root *RootNode) resolveSubroutineCallsites() error { return err } if uds == nil { - return fmt.Errorf("mlr: subroutine name not found: " + subroutineName) + return errors.New("mlr: subroutine name not found: " + subroutineName) } unresolvedSubroutineCallsite.uds = uds From 20e1c8780140605cd2f6226934b1aa69333fdb42 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 7 Feb 2025 07:57:56 -0500 Subject: [PATCH 310/456] Bump github/codeql-action from 3.28.8 to 3.28.9 (#1753) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.8 to 3.28.9. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/dd746615b3b9d728a6a37ca2045b68ca76d4841a...9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index a578a7da7..1e90da053 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@dd746615b3b9d728a6a37ca2045b68ca76d4841a + uses: github/codeql-action/init@9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@dd746615b3b9d728a6a37ca2045b68ca76d4841a + uses: github/codeql-action/autobuild@9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@dd746615b3b9d728a6a37ca2045b68ca76d4841a + uses: github/codeql-action/analyze@9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0 From a9a25490749d10b635bb6cfdf5b07546632a6cb8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 11 Feb 2025 08:11:12 -0500 Subject: [PATCH 311/456] Bump goreleaser/goreleaser-action from 6.1.0 to 6.2.1 (#1755) Bumps [goreleaser/goreleaser-action](https://github.com/goreleaser/goreleaser-action) from 6.1.0 to 6.2.1. - [Release notes](https://github.com/goreleaser/goreleaser-action/releases) - [Commits](https://github.com/goreleaser/goreleaser-action/compare/9ed2f89a662bf1735a48bc8557fd212fa902bebf...90a3faa9d0182683851fbfa97ca1a2cb983bfca3) --- updated-dependencies: - dependency-name: goreleaser/goreleaser-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 88a639b7a..eea331461 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -40,7 +40,7 @@ jobs: # https://goreleaser.com/ci/actions/ - name: Run GoReleaser - uses: goreleaser/goreleaser-action@9ed2f89a662bf1735a48bc8557fd212fa902bebf + uses: goreleaser/goreleaser-action@90a3faa9d0182683851fbfa97ca1a2cb983bfca3 #if: startsWith(github.ref, 'refs/tags/v') with: version: latest From ea0550b09beb20c8e5faa6b454dd813f974eea9f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 19 Feb 2025 09:04:29 -0500 Subject: [PATCH 312/456] Bump actions/cache from 4.2.0 to 4.2.1 (#1756) Bumps [actions/cache](https://github.com/actions/cache) from 4.2.0 to 4.2.1. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/1bd1e32a3bdc45362d1e726936510720a7c30a57...0c907a75c2c80ebcb7f088228285e798b750cf8f) --- updated-dependencies: - dependency-name: actions/cache dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index eea331461..67538813e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -29,7 +29,7 @@ jobs: # https://github.com/marketplace/actions/cache - name: Cache Go modules - uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 + uses: actions/cache@0c907a75c2c80ebcb7f088228285e798b750cf8f with: path: | ~/.cache/go-build From 4fe7051c1ef50d39dcbc4f7ed78503b067c26b2c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Feb 2025 09:01:06 -0500 Subject: [PATCH 313/456] Bump actions/upload-artifact from 4.6.0 to 4.6.1 (#1760) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.6.0 to 4.6.1. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08...4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 0d7707755..b812b1183 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -41,7 +41,7 @@ jobs: if: matrix.os == 'windows-latest' run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}} - - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 + - uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 with: name: mlr-${{matrix.os}} path: bin/${{matrix.os}}/* From 8e11fd36d5aa5a8c2e745372283ab029ec94fbd7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Feb 2025 09:12:48 -0500 Subject: [PATCH 314/456] Bump github/codeql-action from 3.28.9 to 3.28.10 (#1759) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.9 to 3.28.10. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0...b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 1e90da053..b6dac853d 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0 + uses: github/codeql-action/init@b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0 + uses: github/codeql-action/autobuild@b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0 + uses: github/codeql-action/analyze@b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d From 7d51030b88dab37fae3fac76d808b28341d28c01 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 28 Feb 2025 09:15:59 -0500 Subject: [PATCH 315/456] Bump actions/cache from 4.2.1 to 4.2.2 (#1762) Bumps [actions/cache](https://github.com/actions/cache) from 4.2.1 to 4.2.2. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/0c907a75c2c80ebcb7f088228285e798b750cf8f...d4323d4df104b026a6aa633fdb11d772146be0bf) --- updated-dependencies: - dependency-name: actions/cache dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 67538813e..9aa29d525 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -29,7 +29,7 @@ jobs: # https://github.com/marketplace/actions/cache - name: Cache Go modules - uses: actions/cache@0c907a75c2c80ebcb7f088228285e798b750cf8f + uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf with: path: | ~/.cache/go-build From 9963df409089b14d6730b6a59c7c4182a8d0e8c3 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Wed, 5 Mar 2025 08:19:15 -0500 Subject: [PATCH 316/456] Switch to generics (#1763) * gradually replace list.List with slices * gradually replace list.List with slices * more * more * more --- pkg/dsl/cst/root.go | 34 +++++++++------------ pkg/dsl/cst/types.go | 9 +++--- pkg/runtime/stack.go | 18 +++++------ pkg/runtime/state.go | 17 +++++------ pkg/terminals/regtest/regtester.go | 49 ++++++++++++++---------------- 5 files changed, 57 insertions(+), 70 deletions(-) diff --git a/pkg/dsl/cst/root.go b/pkg/dsl/cst/root.go index e6d2de59a..099301bac 100644 --- a/pkg/dsl/cst/root.go +++ b/pkg/dsl/cst/root.go @@ -6,7 +6,6 @@ package cst import ( - "container/list" "errors" "fmt" "os" @@ -37,9 +36,9 @@ func NewEmptyRoot( udfManager: NewUDFManager(), udsManager: NewUDSManager(), allowUDFUDSRedefinitions: false, - unresolvedFunctionCallsites: list.New(), - unresolvedSubroutineCallsites: list.New(), - outputHandlerManagers: list.New(), + unresolvedFunctionCallsites: make([]*UDFCallsite, 0), + unresolvedSubroutineCallsites: make([]*UDSCallsite, 0), + outputHandlerManagers: make([]output.OutputHandlerManager, 0), recordWriterOptions: recordWriterOptions, dslInstanceType: dslInstanceType, } @@ -364,11 +363,11 @@ func (root *RootNode) buildMainPass(ast *dsl.AST, isReplImmediate bool) error { // This is invoked within the buildMainPass call tree whenever a function is // called before it's defined. func (root *RootNode) rememberUnresolvedFunctionCallsite(udfCallsite *UDFCallsite) { - root.unresolvedFunctionCallsites.PushBack(udfCallsite) + root.unresolvedFunctionCallsites = append(root.unresolvedFunctionCallsites, udfCallsite) } func (root *RootNode) rememberUnresolvedSubroutineCallsite(udsCallsite *UDSCallsite) { - root.unresolvedSubroutineCallsites.PushBack(udsCallsite) + root.unresolvedSubroutineCallsites = append(root.unresolvedSubroutineCallsites, udsCallsite) } // After-pass after buildMainPass returns, in case a function was called before @@ -381,10 +380,9 @@ func (root *RootNode) rememberUnresolvedSubroutineCallsite(udsCallsite *UDSCalls // So, our error message should reflect all those options. func (root *RootNode) resolveFunctionCallsites() error { - for root.unresolvedFunctionCallsites.Len() > 0 { - unresolvedFunctionCallsite := root.unresolvedFunctionCallsites.Remove( - root.unresolvedFunctionCallsites.Front(), - ).(*UDFCallsite) + for len(root.unresolvedFunctionCallsites) > 0 { + unresolvedFunctionCallsite := root.unresolvedFunctionCallsites[0] + root.unresolvedFunctionCallsites = root.unresolvedFunctionCallsites[1:] functionName := unresolvedFunctionCallsite.udf.signature.funcOrSubrName callsiteArity := unresolvedFunctionCallsite.udf.signature.arity @@ -405,10 +403,9 @@ func (root *RootNode) resolveFunctionCallsites() error { } func (root *RootNode) resolveSubroutineCallsites() error { - for root.unresolvedSubroutineCallsites.Len() > 0 { - unresolvedSubroutineCallsite := root.unresolvedSubroutineCallsites.Remove( - root.unresolvedSubroutineCallsites.Front(), - ).(*UDSCallsite) + for len(root.unresolvedSubroutineCallsites) > 0 { + unresolvedSubroutineCallsite := root.unresolvedSubroutineCallsites[0] + root.unresolvedSubroutineCallsites = root.unresolvedSubroutineCallsites[1:] subroutineName := unresolvedSubroutineCallsite.uds.signature.funcOrSubrName callsiteArity := unresolvedSubroutineCallsite.uds.signature.arity @@ -438,12 +435,11 @@ func (root *RootNode) resolveSubroutineCallsites() error { func (root *RootNode) RegisterOutputHandlerManager( outputHandlerManager output.OutputHandlerManager, ) { - root.outputHandlerManagers.PushBack(outputHandlerManager) + root.outputHandlerManagers = append(root.outputHandlerManagers, outputHandlerManager) } func (root *RootNode) ProcessEndOfStream() { - for entry := root.outputHandlerManagers.Front(); entry != nil; entry = entry.Next() { - outputHandlerManager := entry.Value.(output.OutputHandlerManager) + for _, outputHandlerManager := range root.outputHandlerManagers { errs := outputHandlerManager.Close() if len(errs) != 0 { for _, err := range errs { @@ -501,8 +497,8 @@ func (root *RootNode) ExecuteREPLImmediate(state *runtime.State) (outrec *mlrval // This is the 'and then discarded' part of that. func (root *RootNode) ResetForREPL() { root.replImmediateBlock = NewStatementBlockNode() - root.unresolvedFunctionCallsites = list.New() - root.unresolvedSubroutineCallsites = list.New() + root.unresolvedFunctionCallsites = make([]*UDFCallsite, 0) + root.unresolvedSubroutineCallsites = make([]*UDSCallsite, 0) } // This is for the REPL's context-printer command. diff --git a/pkg/dsl/cst/types.go b/pkg/dsl/cst/types.go index bc1b2768a..dea4861a6 100644 --- a/pkg/dsl/cst/types.go +++ b/pkg/dsl/cst/types.go @@ -5,11 +5,10 @@ package cst import ( - "container/list" - "github.com/johnkerl/miller/v6/pkg/cli" "github.com/johnkerl/miller/v6/pkg/dsl" "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/output" "github.com/johnkerl/miller/v6/pkg/runtime" ) @@ -44,9 +43,9 @@ type RootNode struct { udfManager *UDFManager udsManager *UDSManager allowUDFUDSRedefinitions bool - unresolvedFunctionCallsites *list.List - unresolvedSubroutineCallsites *list.List - outputHandlerManagers *list.List + unresolvedFunctionCallsites []*UDFCallsite + unresolvedSubroutineCallsites []*UDSCallsite + outputHandlerManagers []output.OutputHandlerManager recordWriterOptions *cli.TWriterOptions dslInstanceType DSLInstanceType // put, filter, repl strictMode bool diff --git a/pkg/runtime/stack.go b/pkg/runtime/stack.go index 6424ebf19..a71f83379 100644 --- a/pkg/runtime/stack.go +++ b/pkg/runtime/stack.go @@ -26,7 +26,6 @@ package runtime import ( - "container/list" "fmt" "github.com/johnkerl/miller/v6/pkg/lib" @@ -68,7 +67,7 @@ func (sv *StackVariable) GetName() string { type Stack struct { // list of *StackFrameSet - stackFrameSets *list.List + stackFrameSets []*StackFrameSet // Invariant: equal to the head of the stackFrameSets list. This is cached // since all sets/gets in between frameset-push and frameset-pop will all @@ -77,9 +76,9 @@ type Stack struct { } func NewStack() *Stack { - stackFrameSets := list.New() + stackFrameSets := make([]*StackFrameSet, 1) head := newStackFrameSet() - stackFrameSets.PushFront(head) + stackFrameSets[0] = head return &Stack{ stackFrameSets: stackFrameSets, head: head, @@ -89,13 +88,13 @@ func NewStack() *Stack { // For when a user-defined function/subroutine is being entered func (stack *Stack) PushStackFrameSet() { stack.head = newStackFrameSet() - stack.stackFrameSets.PushFront(stack.head) + stack.stackFrameSets = append([]*StackFrameSet{stack.head}, stack.stackFrameSets...) } // For when a user-defined function/subroutine is being exited func (stack *Stack) PopStackFrameSet() { - stack.stackFrameSets.Remove(stack.stackFrameSets.Front()) - stack.head = stack.stackFrameSets.Front().Value.(*StackFrameSet) + stack.stackFrameSets = stack.stackFrameSets[1:] + stack.head = stack.stackFrameSets[0] } // ---------------------------------------------------------------- @@ -180,9 +179,8 @@ func (stack *Stack) UnsetIndexed( } func (stack *Stack) Dump() { - fmt.Printf("STACK FRAMESETS (count %d):\n", stack.stackFrameSets.Len()) - for entry := stack.stackFrameSets.Front(); entry != nil; entry = entry.Next() { - stackFrameSet := entry.Value.(*StackFrameSet) + fmt.Printf("STACK FRAMESETS (count %d):\n", len(stack.stackFrameSets)) + for _, stackFrameSet := range stack.stackFrameSets { stackFrameSet.dump() } } diff --git a/pkg/runtime/state.go b/pkg/runtime/state.go index 6b8b5d29c..3fe93aa18 100644 --- a/pkg/runtime/state.go +++ b/pkg/runtime/state.go @@ -35,7 +35,7 @@ type State struct { // This is necessary for the stateful semantics of `=~` and "\1", "\2", etc. // Those are avoided when the user calls `matchx`, which is newer, and // stateless. However, `=~` exists in the Miller DSL and we must support it. - regexCapturesByFrame *list.List // list of []string + regexCapturesByFrame [][]string Options *cli.TOptions @@ -46,8 +46,8 @@ type State struct { func NewEmptyState(options *cli.TOptions, strictMode bool) *State { // See lib.MakeEmptyCaptures for context. - regexCapturesByFrame := list.New() - regexCapturesByFrame.PushFront(lib.MakeEmptyCaptures()) + regexCapturesByFrame := make([][]string, 1) + regexCapturesByFrame[0] = lib.MakeEmptyCaptures() oosvars := mlrval.NewMlrmap() return &State{ @@ -72,25 +72,24 @@ func (state *State) Update( ) { state.Inrec = inrec state.Context = context - state.regexCapturesByFrame.Front().Value = lib.MakeEmptyCaptures() + state.regexCapturesByFrame[0] = lib.MakeEmptyCaptures() } func (state *State) SetRegexCaptures( captures []string, ) { - state.regexCapturesByFrame.Front().Value = lib.CopyStringArray(captures) + state.regexCapturesByFrame[0] = lib.CopyStringArray(captures) } func (state *State) GetRegexCaptures() []string { - regexCaptures := state.regexCapturesByFrame.Front().Value.([]string) + regexCaptures := state.regexCapturesByFrame[0] return lib.CopyStringArray(regexCaptures) } func (state *State) PushRegexCapturesFrame() { - state.regexCapturesByFrame.PushFront(lib.MakeEmptyCaptures()) + state.regexCapturesByFrame = append([][]string{lib.MakeEmptyCaptures()}, state.regexCapturesByFrame...) } func (state *State) PopRegexCapturesFrame() { - // There is no PopFront - state.regexCapturesByFrame.Remove(state.regexCapturesByFrame.Front()) + state.regexCapturesByFrame = state.regexCapturesByFrame[1:] } diff --git a/pkg/terminals/regtest/regtester.go b/pkg/terminals/regtest/regtester.go index 029ace13b..749002b89 100644 --- a/pkg/terminals/regtest/regtester.go +++ b/pkg/terminals/regtest/regtester.go @@ -56,7 +56,6 @@ package regtest import ( - "container/list" "fmt" "os" "path/filepath" @@ -110,8 +109,8 @@ type RegTester struct { casePassCount int caseFailCount int - failDirNames *list.List - failCaseNames *list.List + failDirNames []string + failCaseNames []string firstNFailsToShow int } @@ -132,8 +131,8 @@ func NewRegTester( directoryFailCount: 0, casePassCount: 0, caseFailCount: 0, - failDirNames: list.New(), - failCaseNames: list.New(), + failDirNames: make([]string, 0), + failCaseNames: make([]string, 0), firstNFailsToShow: firstNFailsToShow, } } @@ -182,13 +181,13 @@ func (regtester *RegTester) Execute( regtester.executeSinglePath(path) } - if regtester.failCaseNames.Len() > 0 && regtester.firstNFailsToShow > 0 { + if len(regtester.failCaseNames) > 0 && regtester.firstNFailsToShow > 0 { fmt.Println() fmt.Println("RERUNS OF FIRST FAILED CASE FILES:") verbosityLevel := 3 i := 0 - for e := regtester.failCaseNames.Front(); e != nil; e = e.Next() { - regtester.executeSingleCmdFile(e.Value.(string), verbosityLevel) + for _, e := range regtester.failCaseNames { + regtester.executeSingleCmdFile(e, verbosityLevel) i++ if i >= regtester.firstNFailsToShow { break @@ -196,11 +195,11 @@ func (regtester *RegTester) Execute( } } - if !regtester.plainMode && regtester.failDirNames.Len() > 0 { + if !regtester.plainMode && len(regtester.failDirNames) > 0 { fmt.Println() fmt.Println("FAILED CASE DIRECTORIES:") - for e := regtester.failDirNames.Front(); e != nil; e = e.Next() { - fmt.Printf(" %s/\n", e.Value.(string)) + for _, e := range regtester.failDirNames { + fmt.Printf(" %s/\n", e) } } @@ -248,7 +247,7 @@ func (regtester *RegTester) executeSinglePath( regtester.directoryPassCount++ } else { regtester.directoryFailCount++ - regtester.failDirNames.PushBack(path) + regtester.failDirNames = append(regtester.failDirNames, path) } } return passed @@ -260,7 +259,7 @@ func (regtester *RegTester) executeSinglePath( regtester.casePassCount++ } else { regtester.caseFailCount++ - regtester.failCaseNames.PushBack(path) + regtester.failCaseNames = append(regtester.failCaseNames, path) } return passed } @@ -478,8 +477,7 @@ func (regtester *RegTester) executeSingleCmdFile( // Copy any files requested by the test. (Most don't; some do, e.g. those // which test the write-in-place logic of mlr -I.) - for pe := preCopySrcDestPairs.Front(); pe != nil; pe = pe.Next() { - pair := pe.Value.(stringPair) + for _, pair := range preCopySrcDestPairs { src := pair.first dst := pair.second if verbosityLevel >= 3 { @@ -564,8 +562,7 @@ func (regtester *RegTester) executeSingleCmdFile( } } - for pe := postCompareExpectedActualPairs.Front(); pe != nil; pe = pe.Next() { - pair := pe.Value.(stringPair) + for _, pair := range postCompareExpectedActualPairs { expectedFileName := pair.first actualFileName := pair.second @@ -686,8 +683,7 @@ func (regtester *RegTester) executeSingleCmdFile( // Compare any additional output files. Most test cases don't have // these (just stdout/stderr), but some do: for example, those which // test the tee verb/function. - for pe := postCompareExpectedActualPairs.Front(); pe != nil; pe = pe.Next() { - pair := pe.Value.(stringPair) + for _, pair := range postCompareExpectedActualPairs { expectedFileName := pair.first actualFileName := pair.second ok, expectedContents, actualContents, err := regtester.compareFiles(expectedFileName, actualFileName, caseDir) @@ -725,8 +721,7 @@ func (regtester *RegTester) executeSingleCmdFile( } // Clean up any requested file-copies so that we're git-clean after the regression-test run. - for pe := preCopySrcDestPairs.Front(); pe != nil; pe = pe.Next() { - pair := pe.Value.(stringPair) + for _, pair := range preCopySrcDestPairs { dst := pair.second os.Remove(dst) if verbosityLevel >= 3 { @@ -735,8 +730,7 @@ func (regtester *RegTester) executeSingleCmdFile( } // Clean up any extra output files so that we're git-clean after the regression-test run. - for pe := postCompareExpectedActualPairs.Front(); pe != nil; pe = pe.Next() { - pair := pe.Value.(stringPair) + for _, pair := range postCompareExpectedActualPairs { actualFileName := pair.second os.Remove(actualFileName) if verbosityLevel >= 3 { @@ -868,12 +862,13 @@ func (regtester *RegTester) loadEnvFile( func (regtester *RegTester) loadStringPairFile( filename string, caseDir string, -) (*list.List, error) { +) ([]stringPair, error) { + pairs := make([]stringPair, 0) // If the file doesn't exist that's the normal case -- most cases do not // have a .precopy or .postcmp file. _, err := os.Stat(filename) if os.IsNotExist(err) { - return list.New(), nil + return pairs, nil } // If the file does exist and isn't loadable, that's an error. @@ -882,7 +877,7 @@ func (regtester *RegTester) loadStringPairFile( return nil, err } - pairs := list.New() + pairs = make([]stringPair, 0) lines := strings.Split(contents, "\n") for _, line := range lines { line = strings.TrimSuffix(line, "\r") @@ -897,7 +892,7 @@ func (regtester *RegTester) loadStringPairFile( ) } pair := stringPair{first: fields[0], second: fields[1]} - pairs.PushBack(pair) + pairs = append(pairs, pair) } return pairs, nil } From d08ee477328feb5f929465a6fd0e5b231b424ae9 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 6 Mar 2025 08:32:03 -0500 Subject: [PATCH 317/456] Use Go 1.21 in CI (#1768) --- .github/workflows/go.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index b812b1183..d4c7756c1 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -20,7 +20,7 @@ jobs: - name: Set up Go uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 with: - go-version: 1.19 + go-version: 1.21 - name: Build run: make build From d45e7b06a62aad9eb8b9dc53fa3f625b096ee51e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Mar 2025 08:43:38 -0400 Subject: [PATCH 318/456] Bump github/codeql-action from 3.28.10 to 3.28.11 (#1769) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.10 to 3.28.11. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d...6bb031afdd8eb862ea3fc1848194185e076637e5) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index b6dac853d..c8ee976fd 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d + uses: github/codeql-action/init@6bb031afdd8eb862ea3fc1848194185e076637e5 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d + uses: github/codeql-action/autobuild@6bb031afdd8eb862ea3fc1848194185e076637e5 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d + uses: github/codeql-action/analyze@6bb031afdd8eb862ea3fc1848194185e076637e5 From b0addbe4f7efd2aa4fb909b2446218503e012a6e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 19 Mar 2025 09:23:55 -0400 Subject: [PATCH 319/456] Bump actions/setup-go from 5.3.0 to 5.4.0 (#1771) Bumps [actions/setup-go](https://github.com/actions/setup-go) from 5.3.0 to 5.4.0. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/f111f3307d8850f501ac008e886eec1fd1932a34...0aaccfd150d50ccaeb58ebd88d36e91967a5f35b) --- updated-dependencies: - dependency-name: actions/setup-go dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index d4c7756c1..f04cb8999 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -18,7 +18,7 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - name: Set up Go - uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 + uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b with: go-version: 1.21 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9aa29d525..684c717eb 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,7 +17,7 @@ jobs: runs-on: ${{ matrix.platform }} steps: - name: Set up Go - uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 + uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b with: go-version: ${{ env.GO_VERSION }} id: go From 1bfb8b0cc446fa2ccd7105efe02b5d95519af26d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 20 Mar 2025 08:47:44 -0400 Subject: [PATCH 320/456] Bump actions/cache from 4.2.2 to 4.2.3 (#1774) Bumps [actions/cache](https://github.com/actions/cache) from 4.2.2 to 4.2.3. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/d4323d4df104b026a6aa633fdb11d772146be0bf...5a3ec84eff668545956fd18022155c47e93e2684) --- updated-dependencies: - dependency-name: actions/cache dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 684c717eb..167ddd5e6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -29,7 +29,7 @@ jobs: # https://github.com/marketplace/actions/cache - name: Cache Go modules - uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 with: path: | ~/.cache/go-build From 48eba537aade391b049ba9be2e066430444cb2d1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 20 Mar 2025 08:49:17 -0400 Subject: [PATCH 321/456] Bump actions/upload-artifact from 4.6.1 to 4.6.2 (#1773) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.6.1 to 4.6.2. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1...ea165f8d65b6e75b540449e92b4886f43607fa02) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index f04cb8999..189bafd82 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -41,7 +41,7 @@ jobs: if: matrix.os == 'windows-latest' run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}} - - uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 with: name: mlr-${{matrix.os}} path: bin/${{matrix.os}}/* From f13a2467543de311528c19199d855135836f4f3f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 20 Mar 2025 08:49:25 -0400 Subject: [PATCH 322/456] Bump github/codeql-action from 3.28.11 to 3.28.12 (#1772) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.11 to 3.28.12. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/6bb031afdd8eb862ea3fc1848194185e076637e5...5f8171a638ada777af81d42b55959a643bb29017) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index c8ee976fd..b5a811be6 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@6bb031afdd8eb862ea3fc1848194185e076637e5 + uses: github/codeql-action/init@5f8171a638ada777af81d42b55959a643bb29017 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@6bb031afdd8eb862ea3fc1848194185e076637e5 + uses: github/codeql-action/autobuild@5f8171a638ada777af81d42b55959a643bb29017 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@6bb031afdd8eb862ea3fc1848194185e076637e5 + uses: github/codeql-action/analyze@5f8171a638ada777af81d42b55959a643bb29017 From 6e6e893bdaa1d26acbffd6109b809b0ff66c7651 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 25 Mar 2025 09:10:26 -0400 Subject: [PATCH 323/456] Bump github/codeql-action from 3.28.12 to 3.28.13 (#1776) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.12 to 3.28.13. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/5f8171a638ada777af81d42b55959a643bb29017...1b549b9259bda1cb5ddde3b41741a82a2d15a841) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index b5a811be6..7877fb952 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@5f8171a638ada777af81d42b55959a643bb29017 + uses: github/codeql-action/init@1b549b9259bda1cb5ddde3b41741a82a2d15a841 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@5f8171a638ada777af81d42b55959a643bb29017 + uses: github/codeql-action/autobuild@1b549b9259bda1cb5ddde3b41741a82a2d15a841 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@5f8171a638ada777af81d42b55959a643bb29017 + uses: github/codeql-action/analyze@1b549b9259bda1cb5ddde3b41741a82a2d15a841 From b6ee2eb2024d4d2613521d8533c6d83169725219 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 31 Mar 2025 08:42:13 -0400 Subject: [PATCH 324/456] Bump goreleaser/goreleaser-action from 6.2.1 to 6.3.0 (#1778) Bumps [goreleaser/goreleaser-action](https://github.com/goreleaser/goreleaser-action) from 6.2.1 to 6.3.0. - [Release notes](https://github.com/goreleaser/goreleaser-action/releases) - [Commits](https://github.com/goreleaser/goreleaser-action/compare/90a3faa9d0182683851fbfa97ca1a2cb983bfca3...9c156ee8a17a598857849441385a2041ef570552) --- updated-dependencies: - dependency-name: goreleaser/goreleaser-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 167ddd5e6..b9a60380c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -40,7 +40,7 @@ jobs: # https://goreleaser.com/ci/actions/ - name: Run GoReleaser - uses: goreleaser/goreleaser-action@90a3faa9d0182683851fbfa97ca1a2cb983bfca3 + uses: goreleaser/goreleaser-action@9c156ee8a17a598857849441385a2041ef570552 #if: startsWith(github.ref, 'refs/tags/v') with: version: latest From 07130d8d653e181ba32dfd902547d12e3c14ff0b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Apr 2025 08:54:34 -0400 Subject: [PATCH 325/456] Bump github/codeql-action from 3.28.13 to 3.28.14 (#1779) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.13 to 3.28.14. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/1b549b9259bda1cb5ddde3b41741a82a2d15a841...fc7e4a0fa01c3cca5fd6a1fddec5c0740c977aa2) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.28.14 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 7877fb952..d9072f899 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@1b549b9259bda1cb5ddde3b41741a82a2d15a841 + uses: github/codeql-action/init@fc7e4a0fa01c3cca5fd6a1fddec5c0740c977aa2 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@1b549b9259bda1cb5ddde3b41741a82a2d15a841 + uses: github/codeql-action/autobuild@fc7e4a0fa01c3cca5fd6a1fddec5c0740c977aa2 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@1b549b9259bda1cb5ddde3b41741a82a2d15a841 + uses: github/codeql-action/analyze@fc7e4a0fa01c3cca5fd6a1fddec5c0740c977aa2 From 121dd9425fc3579e53e6e077280fe4b456005ac1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 8 Apr 2025 08:57:58 -0400 Subject: [PATCH 326/456] Bump github/codeql-action from 3.28.14 to 3.28.15 (#1783) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.14 to 3.28.15. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/fc7e4a0fa01c3cca5fd6a1fddec5c0740c977aa2...45775bd8235c68ba998cffa5171334d58593da47) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.28.15 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index d9072f899..41b1c2c08 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@fc7e4a0fa01c3cca5fd6a1fddec5c0740c977aa2 + uses: github/codeql-action/init@45775bd8235c68ba998cffa5171334d58593da47 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@fc7e4a0fa01c3cca5fd6a1fddec5c0740c977aa2 + uses: github/codeql-action/autobuild@45775bd8235c68ba998cffa5171334d58593da47 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@fc7e4a0fa01c3cca5fd6a1fddec5c0740c977aa2 + uses: github/codeql-action/analyze@45775bd8235c68ba998cffa5171334d58593da47 From 629aebb9892eb12f774d4e3ab6156c3e8bd8c07e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 24 Apr 2025 08:37:41 -0400 Subject: [PATCH 327/456] Bump github/codeql-action from 3.28.15 to 3.28.16 (#1790) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.15 to 3.28.16. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/45775bd8235c68ba998cffa5171334d58593da47...28deaeda66b76a05916b6923827895f2b14ab387) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.28.16 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 41b1c2c08..b36c244b9 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@45775bd8235c68ba998cffa5171334d58593da47 + uses: github/codeql-action/init@28deaeda66b76a05916b6923827895f2b14ab387 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@45775bd8235c68ba998cffa5171334d58593da47 + uses: github/codeql-action/autobuild@28deaeda66b76a05916b6923827895f2b14ab387 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@45775bd8235c68ba998cffa5171334d58593da47 + uses: github/codeql-action/analyze@28deaeda66b76a05916b6923827895f2b14ab387 From 100166532c5fe72b9f05de516c3e6fcfa068b879 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 1 May 2025 17:08:55 -0400 Subject: [PATCH 328/456] Fix `joinv` with `""` separator (#1794) * codemod per se * unit-test coverage --- pkg/bifs/collections.go | 2 +- test/cases/dsl-split-join/0028/cmd | 1 + test/cases/dsl-split-join/0028/experr | 0 test/cases/dsl-split-join/0028/expout | 1 + test/cases/dsl-split-join/0028/mlr | 1 + 5 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 test/cases/dsl-split-join/0028/cmd create mode 100644 test/cases/dsl-split-join/0028/experr create mode 100644 test/cases/dsl-split-join/0028/expout create mode 100644 test/cases/dsl-split-join/0028/mlr diff --git a/pkg/bifs/collections.go b/pkg/bifs/collections.go index cd3f87da2..a47730702 100644 --- a/pkg/bifs/collections.go +++ b/pkg/bifs/collections.go @@ -373,7 +373,7 @@ func BIF_joink(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { // joinv([3,4,5], ",") -> "3,4,5" // joinv({"a":3,"b":4,"c":5}, ",") -> "3,4,5" func BIF_joinv(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval { - if !input2.IsString() { + if !input2.IsStringOrVoid() { return mlrval.FromNotStringError("joinv", input2) } fieldSeparator := input2.AcquireStringValue() diff --git a/test/cases/dsl-split-join/0028/cmd b/test/cases/dsl-split-join/0028/cmd new file mode 100644 index 000000000..6add080d4 --- /dev/null +++ b/test/cases/dsl-split-join/0028/cmd @@ -0,0 +1 @@ +mlr -n put -f ${CASEDIR}/mlr diff --git a/test/cases/dsl-split-join/0028/experr b/test/cases/dsl-split-join/0028/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-split-join/0028/expout b/test/cases/dsl-split-join/0028/expout new file mode 100644 index 000000000..51b400812 --- /dev/null +++ b/test/cases/dsl-split-join/0028/expout @@ -0,0 +1 @@ +345 diff --git a/test/cases/dsl-split-join/0028/mlr b/test/cases/dsl-split-join/0028/mlr new file mode 100644 index 000000000..08e79d8dc --- /dev/null +++ b/test/cases/dsl-split-join/0028/mlr @@ -0,0 +1 @@ +end {print joinv([3,4,5], "")} From 34bc8a1c3d23d8b0eed6fb6070286ffb67f1b631 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 1 May 2025 17:18:17 -0400 Subject: [PATCH 329/456] Fix `print` within `begin{}`/`end{}` (#1795) * codemod per se * unit-test coverage * lint --- pkg/output/channel_writer.go | 3 ++- test/cases/dsl-begin-end/0010/cmd | 1 + test/cases/dsl-begin-end/0010/experr | 0 test/cases/dsl-begin-end/0010/expout | 2 ++ 4 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 test/cases/dsl-begin-end/0010/cmd create mode 100644 test/cases/dsl-begin-end/0010/experr create mode 100644 test/cases/dsl-begin-end/0010/expout diff --git a/pkg/output/channel_writer.go b/pkg/output/channel_writer.go index 86be3324a..3a8cab8e4 100644 --- a/pkg/output/channel_writer.go +++ b/pkg/output/channel_writer.go @@ -70,8 +70,9 @@ func channelWriterHandleBatch( // XXX more // XXX also make sure this results in exit 1 & goroutine cleanup - if writerOptions.FailOnDataError { + if writerOptions.FailOnDataError && record != nil { ok := true + fmt.Printf("AAA %#v", record) for pe := record.Head; pe != nil; pe = pe.Next { if pe.Value.IsError() { context := recordAndContext.Context diff --git a/test/cases/dsl-begin-end/0010/cmd b/test/cases/dsl-begin-end/0010/cmd new file mode 100644 index 000000000..11bc26154 --- /dev/null +++ b/test/cases/dsl-begin-end/0010/cmd @@ -0,0 +1 @@ +mlr --from test/input/s.dkvp put -q 'begin{print 8}; end{print 9}' diff --git a/test/cases/dsl-begin-end/0010/experr b/test/cases/dsl-begin-end/0010/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/dsl-begin-end/0010/expout b/test/cases/dsl-begin-end/0010/expout new file mode 100644 index 000000000..512858e60 --- /dev/null +++ b/test/cases/dsl-begin-end/0010/expout @@ -0,0 +1,2 @@ +8 +9 From bbcf903647b69df58399c54ed0a9a30a267e17bc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 2 May 2025 09:31:18 -0400 Subject: [PATCH 330/456] Bump github/codeql-action from 3.28.16 to 3.28.17 (#1796) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.16 to 3.28.17. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/28deaeda66b76a05916b6923827895f2b14ab387...60168efe1c415ce0f5521ea06d5c2062adbeed1b) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.28.17 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index b36c244b9..ec295bdb7 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@28deaeda66b76a05916b6923827895f2b14ab387 + uses: github/codeql-action/init@60168efe1c415ce0f5521ea06d5c2062adbeed1b with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@28deaeda66b76a05916b6923827895f2b14ab387 + uses: github/codeql-action/autobuild@60168efe1c415ce0f5521ea06d5c2062adbeed1b # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@28deaeda66b76a05916b6923827895f2b14ab387 + uses: github/codeql-action/analyze@60168efe1c415ce0f5521ea06d5c2062adbeed1b From ca7d47454d21a86d01e61bc982303c514a03be1e Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 5 May 2025 09:33:03 -0400 Subject: [PATCH 331/456] Improve help message on non-existent verb (#1798) --- pkg/climain/mlrcli_parse.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/climain/mlrcli_parse.go b/pkg/climain/mlrcli_parse.go index 939f10e36..0400df5ed 100644 --- a/pkg/climain/mlrcli_parse.go +++ b/pkg/climain/mlrcli_parse.go @@ -192,8 +192,8 @@ func parseCommandLinePassOne( transformerSetup := transformers.LookUp(verb) if transformerSetup == nil { fmt.Fprintf(os.Stderr, - "%s: verb \"%s\" not found. Please use \"%s --help\" for a list.\n", - "mlr", verb, "mlr") + "mlr: verb \"%s\" not found. Please use \"mlr -l\" for a list.\n", + verb) os.Exit(1) } From 35c7eeb97713dc1e9c1f6ef42ba45942bc69ad54 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 8 May 2025 08:15:02 -0400 Subject: [PATCH 332/456] Bump actions/setup-go from 5.4.0 to 5.5.0 (#1802) Bumps [actions/setup-go](https://github.com/actions/setup-go) from 5.4.0 to 5.5.0. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/0aaccfd150d50ccaeb58ebd88d36e91967a5f35b...d35c59abb061a4a6fb18e82ac0862c26744d6ab5) --- updated-dependencies: - dependency-name: actions/setup-go dependency-version: 5.5.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 189bafd82..402382e47 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -18,7 +18,7 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - name: Set up Go - uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b + uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 with: go-version: 1.21 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b9a60380c..a0f0a5178 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,7 +17,7 @@ jobs: runs-on: ${{ matrix.platform }} steps: - name: Set up Go - uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b + uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 with: go-version: ${{ env.GO_VERSION }} id: go From df73ad8ec05cbda0dde84b98fcd083c0d7f0aaac Mon Sep 17 00:00:00 2001 From: "Christian G. Warden" Date: Thu, 15 May 2025 17:17:08 -0500 Subject: [PATCH 333/456] Add surv Verb to Estimate a Survival Curve (#1788) Add a surv verb to estimate a survival curve using Kaplan-Meier. It requires duration and status (event or censored) columns, and outputs each distinct duration and corresponding probability of survival. --- go.mod | 11 +- go.sum | 14 +- pkg/transformers/aaa_transformer_table.go | 1 + pkg/transformers/surv.go | 173 ++++++++++++++++++++++ test/cases/cli-help/0001/expout | 10 ++ test/cases/verb-surv/0001/cmd | 1 + test/cases/verb-surv/0001/experr | 0 test/cases/verb-surv/0001/expout | 4 + test/input/surv.csv | 6 + 9 files changed, 216 insertions(+), 4 deletions(-) create mode 100644 pkg/transformers/surv.go create mode 100644 test/cases/verb-surv/0001/cmd create mode 100644 test/cases/verb-surv/0001/experr create mode 100644 test/cases/verb-surv/0001/expout create mode 100644 test/input/surv.csv diff --git a/go.mod b/go.mod index 8d843cc61..1868fb964 100644 --- a/go.mod +++ b/go.mod @@ -14,13 +14,16 @@ module github.com/johnkerl/miller/v6 // Local development: // replace github.com/johnkerl/lumin => /Users/kerl/git/johnkerl/lumin -go 1.21 +go 1.23.0 + +toolchain go1.24.2 require ( github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb github.com/johnkerl/lumin v1.0.0 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 github.com/klauspost/compress v1.17.11 + github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1 github.com/lestrrat-go/strftime v1.1.0 github.com/mattn/go-isatty v0.0.20 github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 @@ -28,13 +31,17 @@ require ( github.com/stretchr/testify v1.10.0 golang.org/x/sys v0.30.0 golang.org/x/term v0.29.0 - golang.org/x/text v0.22.0 + golang.org/x/text v0.23.0 ) require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/felixge/fgprof v0.9.3 // indirect + github.com/golang/snappy v1.0.0 // indirect github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect + github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + golang.org/x/tools v0.26.0 // indirect + gonum.org/v1/gonum v0.16.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 32aae654a..49ded388f 100644 --- a/go.sum +++ b/go.sum @@ -8,6 +8,8 @@ github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb h1:IT4JYU7k4ikYg1S github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb/go.mod h1:bH6Xx7IW64qjjJq8M2u4dxNaBiDfKK+z/3eGDpXEQhc= github.com/felixge/fgprof v0.9.3 h1:VvyZxILNuCiUCSXtPtYmmtGvb65nqXh2QFWc0Wpf2/g= github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw= +github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs= +github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/pprof v0.0.0-20211214055906-6f57359322fd h1:1FjCyPC+syAzJ5/2S8fqdZK1R22vvA0J7JZKcuOIQ7Y= github.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg= github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w= @@ -17,6 +19,10 @@ github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNU github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb h1:Z5BVHFk/DLOIUAd2NycF0mLtKfhl7ynm4Uy5+AFhT48= +github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb/go.mod h1:+U+6yzfITr4/teU2YhxWhdyw6YzednT/16/UBMjlDrU= +github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1 h1:UyIQ1VTQq/0CS/wLYjf3DV6uRKTd1xcsng3BccM4XCY= +github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1/go.mod h1:uvVFnikBpVz7S1pdsyUI+BBRlz64vmU6Q+kviiB+fpU= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is= github.com/lestrrat-go/strftime v1.1.0 h1:gMESpZy44/4pXLO/m+sL0yBd1W6LjgjrrD4a68Gapyg= @@ -41,8 +47,12 @@ golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU= golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s= -golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= -golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= +golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= +golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/transformers/aaa_transformer_table.go b/pkg/transformers/aaa_transformer_table.go index 1f201fa29..b1a41ae17 100644 --- a/pkg/transformers/aaa_transformer_table.go +++ b/pkg/transformers/aaa_transformer_table.go @@ -70,6 +70,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{ StepSetup, SubSetup, SummarySetup, + SurvSetup, TacSetup, TailSetup, TeeSetup, diff --git a/pkg/transformers/surv.go b/pkg/transformers/surv.go new file mode 100644 index 000000000..6d4b38f09 --- /dev/null +++ b/pkg/transformers/surv.go @@ -0,0 +1,173 @@ +package transformers + +import ( + "container/list" + "fmt" + "os" + "strings" + + "github.com/johnkerl/miller/v6/pkg/cli" + "github.com/johnkerl/miller/v6/pkg/mlrval" + "github.com/johnkerl/miller/v6/pkg/types" + "github.com/kshedden/statmodel/duration" + "github.com/kshedden/statmodel/statmodel" +) + +// ---------------------------------------------------------------- +const verbNameSurv = "surv" + +// SurvSetup defines the surv verb: Kaplan-Meier survival curve. +var SurvSetup = TransformerSetup{ + Verb: verbNameSurv, + UsageFunc: transformerSurvUsage, + ParseCLIFunc: transformerSurvParseCLI, + IgnoresInput: false, +} + +func transformerSurvUsage(o *os.File) { + fmt.Fprintf(o, "Usage: %s %s -d {duration-field} -s {status-field}\n", "mlr", verbNameSurv) + fmt.Fprint(o, ` +Estimate Kaplan-Meier survival curve (right-censored). +Options: + -d {field} Name of duration field (time-to-event or censoring). + -s {field} Name of status field (0=censored, 1=event). + -h, --help Show this message. +`) +} + +func transformerSurvParseCLI( + pargi *int, + argc int, + args []string, + _ *cli.TOptions, + doConstruct bool, +) IRecordTransformer { + argi := *pargi + verb := args[argi] + argi++ + + var durationField, statusField string + + for argi < argc { + opt := args[argi] + if !strings.HasPrefix(opt, "-") { + break + } + if opt == "-h" || opt == "--help" { + transformerSurvUsage(os.Stdout) + os.Exit(0) + } else if opt == "-d" { + if argi+1 >= argc { + fmt.Fprintf(os.Stderr, "mlr %s: %s requires an argument\n", verb, opt) + os.Exit(1) + } + argi++ + durationField = args[argi] + argi++ + } else if opt == "-s" { + if argi+1 >= argc { + fmt.Fprintf(os.Stderr, "mlr %s: %s requires an argument\n", verb, opt) + os.Exit(1) + } + argi++ + statusField = args[argi] + argi++ + } else { + break + } + } + *pargi = argi + if !doConstruct { + return nil + } + if durationField == "" { + fmt.Fprintf(os.Stderr, "mlr %s: -d option is required.\n", verbNameSurv) + fmt.Fprintf(os.Stderr, "Please see 'mlr %s --help' for more information.\n", verbNameSurv) + os.Exit(1) + } + if statusField == "" { + fmt.Fprintf(os.Stderr, "mlr %s: -s option is required.\n", verbNameSurv) + fmt.Fprintf(os.Stderr, "Please see 'mlr %s --help' for more information.\n", verbNameSurv) + os.Exit(1) + } + return NewTransformerSurv(durationField, statusField) +} + +// TransformerSurv holds fields for surv verb. +type TransformerSurv struct { + durationField string + statusField string + times []float64 + events []bool +} + +// NewTransformerSurv constructs a new surv transformer. +func NewTransformerSurv(durationField, statusField string) IRecordTransformer { + return &TransformerSurv{ + durationField: durationField, + statusField: statusField, + times: make([]float64, 0), + events: make([]bool, 0), + } +} + +// Transform processes each record or emits results at end-of-stream. +func (tr *TransformerSurv) Transform( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, + inputDownstreamDoneChannel <-chan bool, + outputDownstreamDoneChannel chan<- bool, +) { + HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) + if !inrecAndContext.EndOfStream { + rec := inrecAndContext.Record + mvDur := rec.Get(tr.durationField) + if mvDur == nil { + fmt.Fprintf(os.Stderr, "mlr surv: duration field '%s' not found\n", tr.durationField) + os.Exit(1) + } + duration := mvDur.GetNumericToFloatValueOrDie() + mvStat := rec.Get(tr.statusField) + if mvStat == nil { + fmt.Fprintf(os.Stderr, "mlr surv: status field '%s' not found\n", tr.statusField) + os.Exit(1) + } + status := mvStat.GetNumericToFloatValueOrDie() != 0 + tr.times = append(tr.times, duration) + tr.events = append(tr.events, status) + } else { + // Compute survival using kshedden/statmodel + n := len(tr.times) + if n == 0 { + outputRecordsAndContexts.PushBack(inrecAndContext) + return + } + durations := tr.times + statuses := make([]float64, n) + for i, ev := range tr.events { + if ev { + statuses[i] = 1.0 + } else { + statuses[i] = 0.0 + } + } + dataCols := [][]float64{durations, statuses} + names := []string{tr.durationField, tr.statusField} + ds := statmodel.NewDataset(dataCols, names) + sf, err := duration.NewSurvfuncRight(ds, tr.durationField, tr.statusField, &duration.SurvfuncRightConfig{}) + if err != nil { + fmt.Fprintf(os.Stderr, "mlr surv: %v\n", err) + os.Exit(1) + } + sf.Fit() + times := sf.Time() + survProbs := sf.SurvProb() + for i, t := range times { + newrec := mlrval.NewMlrmapAsRecord() + newrec.PutCopy("time", mlrval.FromFloat(t)) + newrec.PutCopy("survival", mlrval.FromFloat(survProbs[i])) + outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context)) + } + outputRecordsAndContexts.PushBack(inrecAndContext) + } +} diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index c8d0af1bc..e7eed5d74 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -1261,6 +1261,16 @@ Options: --transpose Show output with field names as column names.. -h|--help Show this message. +================================================================ +surv +Usage: mlr surv -d {duration-field} -s {status-field} + +Estimate Kaplan-Meier survival curve (right-censored). +Options: + -d {field} Name of duration field (time-to-event or censoring). + -s {field} Name of status field (0=censored, 1=event). + -h, --help Show this message. + ================================================================ tac Usage: mlr tac [options] diff --git a/test/cases/verb-surv/0001/cmd b/test/cases/verb-surv/0001/cmd new file mode 100644 index 000000000..d50e07397 --- /dev/null +++ b/test/cases/verb-surv/0001/cmd @@ -0,0 +1 @@ +mlr --csv --from test/input/surv.csv surv -d duration -s status \ No newline at end of file diff --git a/test/cases/verb-surv/0001/experr b/test/cases/verb-surv/0001/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-surv/0001/expout b/test/cases/verb-surv/0001/expout new file mode 100644 index 000000000..9b23a50f5 --- /dev/null +++ b/test/cases/verb-surv/0001/expout @@ -0,0 +1,4 @@ +time,survival +1.00000000,0.80000000 +3.00000000,0.53333333 +5.00000000,0.00000000 diff --git a/test/input/surv.csv b/test/input/surv.csv new file mode 100644 index 000000000..f025f6a5c --- /dev/null +++ b/test/input/surv.csv @@ -0,0 +1,6 @@ +duration,status +1,1 +2,0 +3,1 +4,0 +5,1 \ No newline at end of file From e9637bba9d1cb11be728c67f48d51178e2cc7fbe Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 15 May 2025 18:23:33 -0400 Subject: [PATCH 334/456] Bump golang.org/x/sys from 0.30.0 to 0.33.0 (#1801) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.30.0 to 0.33.0. - [Commits](https://github.com/golang/sys/compare/v0.30.0...v0.33.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-version: 0.33.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 1868fb964..a92de4386 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.10.0 - golang.org/x/sys v0.30.0 + golang.org/x/sys v0.33.0 golang.org/x/term v0.29.0 golang.org/x/text v0.23.0 ) diff --git a/go.sum b/go.sum index 49ded388f..70bfe5013 100644 --- a/go.sum +++ b/go.sum @@ -43,8 +43,8 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= -golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= +golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU= golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s= golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= From 230b348a718c2d1f50c7f6269dcfa5d08b5d48fd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 15 May 2025 18:23:42 -0400 Subject: [PATCH 335/456] Bump golang.org/x/text from 0.22.0 to 0.25.0 (#1800) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.22.0 to 0.25.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.22.0...v0.25.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-version: 0.25.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index a92de4386..fca3c7540 100644 --- a/go.mod +++ b/go.mod @@ -31,7 +31,7 @@ require ( github.com/stretchr/testify v1.10.0 golang.org/x/sys v0.33.0 golang.org/x/term v0.29.0 - golang.org/x/text v0.23.0 + golang.org/x/text v0.25.0 ) require ( diff --git a/go.sum b/go.sum index 70bfe5013..38b61b658 100644 --- a/go.sum +++ b/go.sum @@ -47,8 +47,8 @@ golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU= golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s= -golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= -golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= +golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4= +golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA= golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= From d14dc76318917ea4a727a0f9ebe1947b54b1a45c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 15 May 2025 18:29:17 -0400 Subject: [PATCH 336/456] Bump golang.org/x/term from 0.29.0 to 0.32.0 (#1799) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.29.0 to 0.32.0. - [Commits](https://github.com/golang/term/compare/v0.29.0...v0.32.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-version: 0.32.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index fca3c7540..540858390 100644 --- a/go.mod +++ b/go.mod @@ -30,7 +30,7 @@ require ( github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.10.0 golang.org/x/sys v0.33.0 - golang.org/x/term v0.29.0 + golang.org/x/term v0.32.0 golang.org/x/text v0.25.0 ) diff --git a/go.sum b/go.sum index 38b61b658..3177d44de 100644 --- a/go.sum +++ b/go.sum @@ -45,8 +45,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU= -golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s= +golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= +golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4= golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA= golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= From ea242a242abe865b6fd09acdcb163feb6c5050cd Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 15 May 2025 19:41:58 -0400 Subject: [PATCH 337/456] Docs for new `surv` verb (#1807) --- docs/src/reference-verbs.md | 15 +++++++++++++++ docs/src/reference-verbs.md.in | 6 ++++++ pkg/output/channel_writer.go | 1 - 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 3eb176dde..c3e121644 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -3857,6 +3857,21 @@ mean - - 5000.5 0.49860196816795804 0.5062057444929905 median pan pan 5001 0.5011592202840128 0.5060212582772865 +## surv + +
+mlr surv --help
+
+
+Usage: mlr surv -d {duration-field} -s {status-field}
+
+Estimate Kaplan-Meier survival curve (right-censored).
+Options:
+  -d {field}   Name of duration field (time-to-event or censoring).
+  -s {field}   Name of status field (0=censored, 1=event).
+  -h, --help   Show this message.
+
+ ## tac
diff --git a/docs/src/reference-verbs.md.in b/docs/src/reference-verbs.md.in
index 8959ebf6b..5f6f31097 100644
--- a/docs/src/reference-verbs.md.in
+++ b/docs/src/reference-verbs.md.in
@@ -1161,6 +1161,12 @@ GENMD-RUN-COMMAND
 mlr --from data/medium --opprint summary --transpose -a mean,median,mode
 GENMD-EOF
 
+## surv
+
+GENMD-RUN-COMMAND
+mlr surv --help
+GENMD-EOF
+
 ## tac
 
 GENMD-RUN-COMMAND
diff --git a/pkg/output/channel_writer.go b/pkg/output/channel_writer.go
index 3a8cab8e4..ac025398b 100644
--- a/pkg/output/channel_writer.go
+++ b/pkg/output/channel_writer.go
@@ -72,7 +72,6 @@ func channelWriterHandleBatch(
 			// XXX also make sure this results in exit 1 & goroutine cleanup
 			if writerOptions.FailOnDataError && record != nil {
 				ok := true
-				fmt.Printf("AAA %#v", record)
 				for pe := record.Head; pe != nil; pe = pe.Next {
 					if pe.Value.IsError() {
 						context := recordAndContext.Context

From 68f28455788d657bd8f1e2f494e233ef67298566 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 16 May 2025 08:32:28 -0400
Subject: [PATCH 338/456] Bump github/codeql-action from 3.28.17 to 3.28.18
 (#1808)

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.17 to 3.28.18.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/60168efe1c415ce0f5521ea06d5c2062adbeed1b...ff0a06e83cb2de871e5a09832bc6a81e7276941f)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-version: 3.28.18
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index ec295bdb7..f441ca6fd 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -40,7 +40,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@60168efe1c415ce0f5521ea06d5c2062adbeed1b
+      uses: github/codeql-action/init@ff0a06e83cb2de871e5a09832bc6a81e7276941f
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -51,7 +51,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@60168efe1c415ce0f5521ea06d5c2062adbeed1b
+      uses: github/codeql-action/autobuild@ff0a06e83cb2de871e5a09832bc6a81e7276941f
 
     # โ„น๏ธ Command-line programs to run using the OS shell.
     # ๐Ÿ“š https://git.io/JvXDl
@@ -65,4 +65,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@60168efe1c415ce0f5521ea06d5c2062adbeed1b
+      uses: github/codeql-action/analyze@ff0a06e83cb2de871e5a09832bc6a81e7276941f

From cc7f72b741e7026405f7cfd96093ea414d87c470 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 4 Jun 2025 08:11:40 -0400
Subject: [PATCH 339/456] Bump github/codeql-action from 3.28.18 to 3.28.19
 (#1812)

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.18 to 3.28.19.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/ff0a06e83cb2de871e5a09832bc6a81e7276941f...fca7ace96b7d713c7035871441bd52efbe39e27e)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-version: 3.28.19
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index f441ca6fd..b90a29d95 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -40,7 +40,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@ff0a06e83cb2de871e5a09832bc6a81e7276941f
+      uses: github/codeql-action/init@fca7ace96b7d713c7035871441bd52efbe39e27e
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -51,7 +51,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@ff0a06e83cb2de871e5a09832bc6a81e7276941f
+      uses: github/codeql-action/autobuild@fca7ace96b7d713c7035871441bd52efbe39e27e
 
     # โ„น๏ธ Command-line programs to run using the OS shell.
     # ๐Ÿ“š https://git.io/JvXDl
@@ -65,4 +65,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@ff0a06e83cb2de871e5a09832bc6a81e7276941f
+      uses: github/codeql-action/analyze@fca7ace96b7d713c7035871441bd52efbe39e27e

From 8e07a2f78d5fd7c7c88701c882176de7c31502af Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 6 Jun 2025 09:30:38 -0400
Subject: [PATCH 340/456] Bump golang.org/x/text from 0.25.0 to 0.26.0 (#1813)

Bumps [golang.org/x/text](https://github.com/golang/text) from 0.25.0 to 0.26.0.
- [Release notes](https://github.com/golang/text/releases)
- [Commits](https://github.com/golang/text/compare/v0.25.0...v0.26.0)

---
updated-dependencies:
- dependency-name: golang.org/x/text
  dependency-version: 0.26.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 4 ++--
 go.sum | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/go.mod b/go.mod
index 540858390..526d27725 100644
--- a/go.mod
+++ b/go.mod
@@ -31,7 +31,7 @@ require (
 	github.com/stretchr/testify v1.10.0
 	golang.org/x/sys v0.33.0
 	golang.org/x/term v0.32.0
-	golang.org/x/text v0.25.0
+	golang.org/x/text v0.26.0
 )
 
 require (
@@ -41,7 +41,7 @@ require (
 	github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect
 	github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
-	golang.org/x/tools v0.26.0 // indirect
+	golang.org/x/tools v0.33.0 // indirect
 	gonum.org/v1/gonum v0.16.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
diff --git a/go.sum b/go.sum
index 3177d44de..a902b4c81 100644
--- a/go.sum
+++ b/go.sum
@@ -47,10 +47,10 @@ golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
 golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
 golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg=
 golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ=
-golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
-golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
-golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ=
-golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0=
+golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M=
+golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA=
+golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc=
+golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI=
 gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
 gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=

From 34c9d764d87832237e4df38eac9997a8fd407b4d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 12 Jun 2025 17:08:14 -0400
Subject: [PATCH 341/456] Bump github/codeql-action from 3.28.19 to 3.29.0
 (#1814)

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.28.19 to 3.29.0.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/fca7ace96b7d713c7035871441bd52efbe39e27e...ce28f5bb42b7a9f2c824e633a3f6ee835bab6858)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-version: 3.29.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index b90a29d95..987876d1b 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -40,7 +40,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@fca7ace96b7d713c7035871441bd52efbe39e27e
+      uses: github/codeql-action/init@ce28f5bb42b7a9f2c824e633a3f6ee835bab6858
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -51,7 +51,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@fca7ace96b7d713c7035871441bd52efbe39e27e
+      uses: github/codeql-action/autobuild@ce28f5bb42b7a9f2c824e633a3f6ee835bab6858
 
     # โ„น๏ธ Command-line programs to run using the OS shell.
     # ๐Ÿ“š https://git.io/JvXDl
@@ -65,4 +65,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@fca7ace96b7d713c7035871441bd52efbe39e27e
+      uses: github/codeql-action/analyze@ce28f5bb42b7a9f2c824e633a3f6ee835bab6858

From d30501a69b503ca7c1fa5c0564c9eb1f670ae6cb Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Fri, 13 Jun 2025 13:54:34 -0400
Subject: [PATCH 342/456] Argument parsing is different in `mlr -s` scripts
 (#1817)

---
 docs/src/scripting.md        |  2 +-
 docs/src/scripting.md.in     |  2 +-
 pkg/climain/mlrcli_parse.go  | 10 ++++++++++
 pkg/entrypoint/entrypoint.go |  9 ---------
 4 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/docs/src/scripting.md b/docs/src/scripting.md
index 71c6b22a0..4766dcb50 100644
--- a/docs/src/scripting.md
+++ b/docs/src/scripting.md
@@ -234,7 +234,7 @@ then fraction -f count
 
 Points:
 
-* Same as above, where the `#!` line isn't needed. (But you can include a `#!` line; `mlr -s` will simply see it as a comment line.).
+* Same as above, where the `#!` line isn't needed. (But you can include a `#!` line; `mlr -s` will simply see it as a comment line.)
 * As above, you don't need all the backslashing for line-continuations.
 * As above, you don't need the explicit `--` or `"$@"`.
 
diff --git a/docs/src/scripting.md.in b/docs/src/scripting.md.in
index 3234c9398..f29fe8b63 100644
--- a/docs/src/scripting.md.in
+++ b/docs/src/scripting.md.in
@@ -101,7 +101,7 @@ GENMD-EOF
 
 Points:
 
-* Same as above, where the `#!` line isn't needed. (But you can include a `#!` line; `mlr -s` will simply see it as a comment line.).
+* Same as above, where the `#!` line isn't needed. (But you can include a `#!` line; `mlr -s` will simply see it as a comment line.)
 * As above, you don't need all the backslashing for line-continuations.
 * As above, you don't need the explicit `--` or `"$@"`.
 
diff --git a/pkg/climain/mlrcli_parse.go b/pkg/climain/mlrcli_parse.go
index 0400df5ed..f83a93373 100644
--- a/pkg/climain/mlrcli_parse.go
+++ b/pkg/climain/mlrcli_parse.go
@@ -94,11 +94,21 @@ func ParseCommandLine(
 ) {
 	// mlr -s scriptfile {data-file names ...} means take the contents of
 	// scriptfile as if it were command-line items.
+
 	args, err = maybeInterpolateDashS(args)
 	if err != nil {
 		return nil, nil, err
 	}
 
+	// Expand "-xyz" into "-x -y -z" while leaving "--xyz" intact. This is a
+	// keystroke-saver for the user.
+	//
+	// This is OK to do globally here since Miller is quite consistent (in
+	// main, verbs, and auxents) that multi-character options start with two
+	// dashes, e.g. "--csv". (The sole exception is the sort verb's -nf/-nr
+	// which are handled specially there.)
+    args = lib.Getoptify(args)
+
 	// Pass one as described at the top of this file.
 	flagSequences, terminalSequence, verbSequences, dataFileNames := parseCommandLinePassOne(args)
 
diff --git a/pkg/entrypoint/entrypoint.go b/pkg/entrypoint/entrypoint.go
index c8293c041..0d2b8d3a9 100644
--- a/pkg/entrypoint/entrypoint.go
+++ b/pkg/entrypoint/entrypoint.go
@@ -36,15 +36,6 @@ func Main() MainReturn {
 	// otherwise, we only raw ANSI escape sequences like โ†[0;30m  0โ†[0m โ†[0;31m  1
 	platform.EnableAnsiEscapeSequences()
 
-	// Expand "-xyz" into "-x -y -z" while leaving "--xyz" intact. This is a
-	// keystroke-saver for the user.
-	//
-	// This is OK to do globally here since Miller is quite consistent (in
-	// main, verbs, and auxents) that multi-character options start with two
-	// dashes, e.g. "--csv". (The sole exception is the sort verb's -nf/-nr
-	// which are handled specially there.)
-	os.Args = lib.Getoptify(os.Args)
-
 	// 'mlr repl' or 'mlr lecat' or any other non-miller-per-se toolery which
 	// is delivered (for convenience) within the mlr executable. If argv[1] is
 	// found then this function will not return.

From de05d9665b0626de4d4baac10a6bf543037c53aa Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 27 Jun 2025 10:36:05 -0400
Subject: [PATCH 343/456] Bump github/codeql-action from 3.29.0 to 3.29.1
 (#1822)

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.29.0 to 3.29.1.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/ce28f5bb42b7a9f2c824e633a3f6ee835bab6858...39edc492dbe16b1465b0cafca41432d857bdb31a)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-version: 3.29.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 987876d1b..a5f9f9d83 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -40,7 +40,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@ce28f5bb42b7a9f2c824e633a3f6ee835bab6858
+      uses: github/codeql-action/init@39edc492dbe16b1465b0cafca41432d857bdb31a
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -51,7 +51,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@ce28f5bb42b7a9f2c824e633a3f6ee835bab6858
+      uses: github/codeql-action/autobuild@39edc492dbe16b1465b0cafca41432d857bdb31a
 
     # โ„น๏ธ Command-line programs to run using the OS shell.
     # ๐Ÿ“š https://git.io/JvXDl
@@ -65,4 +65,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@ce28f5bb42b7a9f2c824e633a3f6ee835bab6858
+      uses: github/codeql-action/analyze@39edc492dbe16b1465b0cafca41432d857bdb31a

From 4d84f99120931083c9f40f2ac445a1b2593719a7 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 1 Jul 2025 09:32:07 -0400
Subject: [PATCH 344/456] Bump github/codeql-action from 3.29.1 to 3.29.2
 (#1825)

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.29.1 to 3.29.2.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/39edc492dbe16b1465b0cafca41432d857bdb31a...181d5eefc20863364f96762470ba6f862bdef56b)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-version: 3.29.2
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index a5f9f9d83..b3dd4bcec 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -40,7 +40,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@39edc492dbe16b1465b0cafca41432d857bdb31a
+      uses: github/codeql-action/init@181d5eefc20863364f96762470ba6f862bdef56b
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -51,7 +51,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@39edc492dbe16b1465b0cafca41432d857bdb31a
+      uses: github/codeql-action/autobuild@181d5eefc20863364f96762470ba6f862bdef56b
 
     # โ„น๏ธ Command-line programs to run using the OS shell.
     # ๐Ÿ“š https://git.io/JvXDl
@@ -65,4 +65,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@39edc492dbe16b1465b0cafca41432d857bdb31a
+      uses: github/codeql-action/analyze@181d5eefc20863364f96762470ba6f862bdef56b

From e67bdef98eeecb6fdfdcb9c33a9500c07758b4bc Mon Sep 17 00:00:00 2001
From: Balki 
Date: Thu, 3 Jul 2025 22:54:09 +0000
Subject: [PATCH 345/456] cut: Consider `-o` flag even when using regexes with
 `-r` (#1823)

* cut: Consider `-o` flag even when using regexes with `-r`

* update doc for cut -r flag
---
 docs/src/manpage.md             |  2 +-
 docs/src/manpage.txt            |  2 +-
 docs/src/mlr.1                  |  2 +-
 docs/src/reference-verbs.md     |  2 +-
 man/manpage.txt                 |  2 +-
 man/mlr.1                       |  2 +-
 pkg/transformers/cut.go         | 31 ++++++++++++++++++++++++++++---
 test/cases/cli-help/0001/expout |  2 +-
 8 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index b9af6e51c..d17c5de45 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -1033,7 +1033,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p
         -r Treat field names as regular expressions. "ab", "a.*b" will
           match any field name containing the substring "ab" or matching
           "a.*b", respectively; anchors of the form "^ab$", "^a.*b$" may
-          be used. The -o flag is ignored when -r is present.
+          be used.
        -h|--help Show this message.
        Examples:
          mlr cut -f hostname,status
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt
index aa0b21b9b..c19bbc49d 100644
--- a/docs/src/manpage.txt
+++ b/docs/src/manpage.txt
@@ -1012,7 +1012,7 @@
         -r Treat field names as regular expressions. "ab", "a.*b" will
           match any field name containing the substring "ab" or matching
           "a.*b", respectively; anchors of the form "^ab$", "^a.*b$" may
-          be used. The -o flag is ignored when -r is present.
+          be used.
        -h|--help Show this message.
        Examples:
          mlr cut -f hostname,status
diff --git a/docs/src/mlr.1 b/docs/src/mlr.1
index 0a026dd4f..5aa5bd16f 100644
--- a/docs/src/mlr.1
+++ b/docs/src/mlr.1
@@ -722,7 +722,7 @@ Passes through input records with specified fields included/excluded.
 -r               Treat field names as regular expressions. "ab", "a.*b" will
                  match any field name containing the substring "ab" or matching
                  "a.*b", respectively; anchors of the form "^ab$", "^a.*b$" may
-                 be used. The -o flag is ignored when -r is present.
+                 be used.
 Examples:
   mlr cut -f hostname,status
   mlr cut -x -f hostname,status
diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md
index c3e121644..ab7a599fa 100644
--- a/docs/src/reference-verbs.md
+++ b/docs/src/reference-verbs.md
@@ -804,7 +804,7 @@ Options:
  -r Treat field names as regular expressions. "ab", "a.*b" will
    match any field name containing the substring "ab" or matching
    "a.*b", respectively; anchors of the form "^ab$", "^a.*b$" may
-   be used. The -o flag is ignored when -r is present.
+   be used.
 -h|--help Show this message.
 Examples:
   mlr cut -f hostname,status
diff --git a/man/manpage.txt b/man/manpage.txt
index aa0b21b9b..c19bbc49d 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -1012,7 +1012,7 @@
         -r Treat field names as regular expressions. "ab", "a.*b" will
           match any field name containing the substring "ab" or matching
           "a.*b", respectively; anchors of the form "^ab$", "^a.*b$" may
-          be used. The -o flag is ignored when -r is present.
+          be used.
        -h|--help Show this message.
        Examples:
          mlr cut -f hostname,status
diff --git a/man/mlr.1 b/man/mlr.1
index 967d53e31..f862890cf 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -1251,7 +1251,7 @@ Options:
  -r Treat field names as regular expressions. "ab", "a.*b" will
    match any field name containing the substring "ab" or matching
    "a.*b", respectively; anchors of the form "^ab$", "^a.*b$" may
-   be used. The -o flag is ignored when -r is present.
+   be used.
 -h|--help Show this message.
 Examples:
   mlr cut -f hostname,status
diff --git a/pkg/transformers/cut.go b/pkg/transformers/cut.go
index f8933d586..e9e57d438 100644
--- a/pkg/transformers/cut.go
+++ b/pkg/transformers/cut.go
@@ -1,10 +1,12 @@
 package transformers
 
 import (
+	"cmp"
 	"container/list"
 	"fmt"
 	"os"
 	"regexp"
+	"slices"
 	"strings"
 
 	"github.com/johnkerl/miller/v6/pkg/cli"
@@ -36,7 +38,7 @@ func transformerCutUsage(
 	fmt.Fprintf(o, " -r Treat field names as regular expressions. \"ab\", \"a.*b\" will\n")
 	fmt.Fprintf(o, "   match any field name containing the substring \"ab\" or matching\n")
 	fmt.Fprintf(o, "   \"a.*b\", respectively; anchors of the form \"^ab$\", \"^a.*b$\" may\n")
-	fmt.Fprintf(o, "   be used. The -o flag is ignored when -r is present.\n")
+	fmt.Fprintf(o, "   be used.\n")
 	fmt.Fprintf(o, "-h|--help Show this message.\n")
 	fmt.Fprintf(o, "Examples:\n")
 	fmt.Fprintf(o, "  %s %s -f hostname,status\n", "mlr", verbNameCut)
@@ -129,6 +131,7 @@ type TransformerCut struct {
 	fieldNameSet  map[string]bool
 
 	doComplement bool
+	doArgOrder   bool
 	regexes      []*regexp.Regexp
 
 	recordTransformerFunc RecordTransformerFunc
@@ -143,6 +146,8 @@ func NewTransformerCut(
 
 	tr := &TransformerCut{}
 
+	tr.doArgOrder = doArgOrder
+
 	if !doRegexes {
 		tr.fieldNameList = fieldNames
 		tr.fieldNameSet = lib.StringListToSet(fieldNames)
@@ -257,6 +262,11 @@ func (tr *TransformerCut) exclude(
 	outputRecordsAndContexts.PushBack(inrecAndContext)
 }
 
+type entryIndex struct {
+	index int
+	entry *mlrval.MlrmapEntry
+}
+
 // ----------------------------------------------------------------
 func (tr *TransformerCut) processWithRegexes(
 	inrecAndContext *types.RecordAndContext,
@@ -267,11 +277,14 @@ func (tr *TransformerCut) processWithRegexes(
 	if !inrecAndContext.EndOfStream {
 		inrec := inrecAndContext.Record
 		newrec := mlrval.NewMlrmapAsRecord()
+		var entries []entryIndex
 		for pe := inrec.Head; pe != nil; pe = pe.Next {
 			matchesAny := false
-			for _, regex := range tr.regexes {
+			var index int
+			for i, regex := range tr.regexes {
 				if regex.MatchString(pe.Key) {
 					matchesAny = true
+					index = i
 					break
 				}
 			}
@@ -279,7 +292,19 @@ func (tr *TransformerCut) processWithRegexes(
 			if matchesAny != tr.doComplement {
 				// Pointer-motion is OK since the inrec is being hereby discarded.
 				// We're simply transferring ownership to the newrec.
-				newrec.PutReference(pe.Key, pe.Value)
+				if tr.doArgOrder {
+					entries = append(entries, entryIndex{index, pe})
+				} else {
+					newrec.PutReference(pe.Key, pe.Value)
+				}
+			}
+		}
+		if tr.doArgOrder {
+			slices.SortStableFunc(entries, func(a, b entryIndex) int {
+				return cmp.Compare(a.index, b.index)
+			})
+			for _, ei := range entries {
+				newrec.PutReference(ei.entry.Key, ei.entry.Value)
 			}
 		}
 		outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context))
diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout
index e7eed5d74..a451ac0c5 100644
--- a/test/cases/cli-help/0001/expout
+++ b/test/cases/cli-help/0001/expout
@@ -138,7 +138,7 @@ Options:
  -r Treat field names as regular expressions. "ab", "a.*b" will
    match any field name containing the substring "ab" or matching
    "a.*b", respectively; anchors of the form "^ab$", "^a.*b$" may
-   be used. The -o flag is ignored when -r is present.
+   be used.
 -h|--help Show this message.
 Examples:
   mlr cut -f hostname,status

From d6cd981c87be9aaebf7382690ce40096281f554d Mon Sep 17 00:00:00 2001
From: Balki 
Date: Thu, 3 Jul 2025 23:01:17 +0000
Subject: [PATCH 346/456] Add Keystroke savers for same format (#1824)

---
 docs/src/manpage.md                  | 16 +++++-----
 docs/src/manpage.txt                 | 16 +++++-----
 docs/src/reference-main-flag-list.md | 20 ++++++------
 man/manpage.txt                      | 16 +++++-----
 man/mlr.1                            | 16 +++++-----
 pkg/cli/option_parse.go              | 47 +++++++++++++++-------------
 6 files changed, 69 insertions(+), 62 deletions(-)

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index d17c5de45..2a0656948 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -449,14 +449,14 @@ This is simply a copy of what you should see on running `man mlr` at a command p
 
        | In\out   | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
        +----------+-------+-------+--------+--------+--------+--------+--------+--------+----------|
-       | CSV      |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
-       | TSV      | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
-       | JSON     | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
-       | JSONL    | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
-       | DKVP     | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
-       | NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
-       | XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
-       | PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
+       | CSV      | --c2c | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
+       | TSV      | --t2c | --t2t | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
+       | JSON     | --j2c | --j2t | --j2j  | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
+       | JSONL    | --l2c | --l2t | --l2j  | --l2l  | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
+       | DKVP     | --d2c | --d2t | --d2j  | --d2l  | --d2d  | --d2n  | --d2x  | --d2p  | --d2m    |
+       | NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  | --n2n  | --n2x  | --n2p  | --n2m    |
+       | XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  | --x2x  | --x2p  | --x2m    |
+       | PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  | --p2p  | --p2m    |
        | Markdown | --m2c | --m2t | --m2j  | --m2l  | --m2d  | --m2n  | --m2x  | --m2p  |          |
 
        -p                       Keystroke-saver for `--nidx --fs space --repifs`.
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt
index c19bbc49d..7bd2f017a 100644
--- a/docs/src/manpage.txt
+++ b/docs/src/manpage.txt
@@ -428,14 +428,14 @@
 
        | In\out   | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
        +----------+-------+-------+--------+--------+--------+--------+--------+--------+----------|
-       | CSV      |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
-       | TSV      | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
-       | JSON     | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
-       | JSONL    | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
-       | DKVP     | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
-       | NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
-       | XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
-       | PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
+       | CSV      | --c2c | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
+       | TSV      | --t2c | --t2t | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
+       | JSON     | --j2c | --j2t | --j2j  | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
+       | JSONL    | --l2c | --l2t | --l2j  | --l2l  | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
+       | DKVP     | --d2c | --d2t | --d2j  | --d2l  | --d2d  | --d2n  | --d2x  | --d2p  | --d2m    |
+       | NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  | --n2n  | --n2x  | --n2p  | --n2m    |
+       | XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  | --x2x  | --x2p  | --x2m    |
+       | PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  | --p2p  | --p2m    |
        | Markdown | --m2c | --m2t | --m2j  | --m2l  | --m2d  | --m2n  | --m2x  | --m2p  |          |
 
        -p                       Keystroke-saver for `--nidx --fs space --repifs`.
diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md
index 7258cce08..0294d4223 100644
--- a/docs/src/reference-main-flag-list.md
+++ b/docs/src/reference-main-flag-list.md
@@ -210,15 +210,17 @@ The letters `c`, `t`, `j`, `d`, `n`, `x`, `p`, and `m` refer to formats CSV, TSV
 PPRINT, and markdown, respectively. Note that markdown format is available for
 output only.
 
-| In  out   | **CSV** | **TSV** | **JSON** | **DKVP** | **NIDX** | **XTAB** | **PPRINT** | **Markdown** |
-|------------|---------|---------|----------|----------|----------|----------|------------|--------------|
-| **CSV**    |         | `--c2t` | `--c2j`  | `--c2d`  | `--c2n`  | `--c2x`  | `--c2p`    | `--c2m`      |
-| **TSV**    | `--t2c` |         | `--t2j`  | `--t2d`  | `--t2n`  | `--t2x`  | `--t2p`    | `--t2m`      |
-| **JSON**   | `--j2c` | `--j2t` |          | `--j2d`  | `--j2n`  | `--j2x`  | `--j2p`    | `--j2m`      |
-| **DKVP**   | `--d2c` | `--d2t` | `--d2j`  |          | `--d2n`  | `--d2x`  | `--d2p`    | `--d2m`      |
-| **NIDX**   | `--n2c` | `--n2t` | `--n2j`  | `--n2d`  |          | `--n2x`  | `--n2p`    | `--n2m`      |
-| **XTAB**   | `--x2c` | `--x2t` | `--x2j`  | `--x2d`  | `--x2n`  |          | `--x2p`    | `--x2m`      |
-| **PPRINT** | `--p2c` | `--p2t` | `--p2j`  | `--p2d`  | `--p2n`  | `--p2x`  |            | `--p2m`      |
+| In  out      | **CSV** | **TSV** | **JSON** | **JSONL** | **DKVP** | **NIDX** | **XTAB** | **PPRINT** | **Markdown** |
+|--------------|---------|---------|----------|-----------|----------|----------|----------|------------|--------------|
+| **CSV**      | --c2c   | --c2t   | --c2j    | --c2l     | --c2d    | --c2n    | --c2x    | --c2p      | --c2m        |
+| **TSV**      | --t2c   | --t2t   | --t2j    | --t2l     | --t2d    | --t2n    | --t2x    | --t2p      | --t2m        |
+| **JSON**     | --j2c   | --j2t   | --j2j    | --j2l     | --j2d    | --j2n    | --j2x    | --j2p      | --j2m        |
+| **JSONL**    | --l2c   | --l2t   | --l2j    | --l2l     | --l2d    | --l2n    | --l2x    | --l2p      | --l2m        |
+| **DKVP**     | --d2c   | --d2t   | --d2j    | --d2l     | --d2d    | --d2n    | --d2x    | --d2p      | --d2m        |
+| **NIDX**     | --n2c   | --n2t   | --n2j    | --n2l     | --n2d    | --n2n    | --n2x    | --n2p      | --n2m        |
+| **XTAB**     | --x2c   | --x2t   | --x2j    | --x2l     | --x2d    | --x2n    | --x2x    | --x2p      | --x2m        |
+| **PPRINT**   | --p2c   | --p2t   | --p2j    | --p2l     | --p2d    | --p2n    | --p2x    | --p2p      | --p2m        |
+| **Markdown** | --m2c   | --m2t   | --m2j    | --m2l     | --m2d    | --m2n    | --m2x    | --m2p      |              |
 
 Additionally:
 
diff --git a/man/manpage.txt b/man/manpage.txt
index c19bbc49d..7bd2f017a 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -428,14 +428,14 @@
 
        | In\out   | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
        +----------+-------+-------+--------+--------+--------+--------+--------+--------+----------|
-       | CSV      |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
-       | TSV      | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
-       | JSON     | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
-       | JSONL    | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
-       | DKVP     | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
-       | NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
-       | XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
-       | PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
+       | CSV      | --c2c | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
+       | TSV      | --t2c | --t2t | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
+       | JSON     | --j2c | --j2t | --j2j  | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
+       | JSONL    | --l2c | --l2t | --l2j  | --l2l  | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
+       | DKVP     | --d2c | --d2t | --d2j  | --d2l  | --d2d  | --d2n  | --d2x  | --d2p  | --d2m    |
+       | NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  | --n2n  | --n2x  | --n2p  | --n2m    |
+       | XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  | --x2x  | --x2p  | --x2m    |
+       | PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  | --p2p  | --p2m    |
        | Markdown | --m2c | --m2t | --m2j  | --m2l  | --m2d  | --m2n  | --m2x  | --m2p  |          |
 
        -p                       Keystroke-saver for `--nidx --fs space --repifs`.
diff --git a/man/mlr.1 b/man/mlr.1
index f862890cf..ecf229958 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -525,14 +525,14 @@ JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively.
 
 | In\eout   | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
 +----------+-------+-------+--------+--------+--------+--------+--------+--------+----------|
-| CSV      |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
-| TSV      | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
-| JSON     | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
-| JSONL    | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
-| DKVP     | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
-| NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
-| XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
-| PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
+| CSV      | --c2c | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
+| TSV      | --t2c | --t2t | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
+| JSON     | --j2c | --j2t | --j2j  | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
+| JSONL    | --l2c | --l2t | --l2j  | --l2l  | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
+| DKVP     | --d2c | --d2t | --d2j  | --d2l  | --d2d  | --d2n  | --d2x  | --d2p  | --d2m    |
+| NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  | --n2n  | --n2x  | --n2p  | --n2m    |
+| XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  | --x2x  | --x2p  | --x2m    |
+| PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  | --p2p  | --p2m    |
 | Markdown | --m2c | --m2t | --m2j  | --m2l  | --m2d  | --m2n  | --m2x  | --m2p  |          |
 
 -p                       Keystroke-saver for `--nidx --fs space --repifs`.
diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go
index 41be332b6..134accc24 100644
--- a/pkg/cli/option_parse.go
+++ b/pkg/cli/option_parse.go
@@ -1009,7 +1009,7 @@ var FileFormatFlagSection = FlagSection{
 		{
 			name:     "--csv",
 			help:     "Use CSV format for input and output data.",
-			altNames: []string{"-c"},
+			altNames: []string{"-c", "--c2c"},
 			parser: func(args []string, argc int, pargi *int, options *TOptions) {
 				options.ReaderOptions.InputFileFormat = "csv"
 				options.WriterOptions.OutputFileFormat = "csv"
@@ -1030,7 +1030,7 @@ var FileFormatFlagSection = FlagSection{
 		{
 			name:     "--tsv",
 			help:     "Use TSV format for input and output data.",
-			altNames: []string{"-t"},
+			altNames: []string{"-t", "--t2t"},
 			parser: func(args []string, argc int, pargi *int, options *TOptions) {
 				options.ReaderOptions.InputFileFormat = "tsv"
 				options.WriterOptions.OutputFileFormat = "tsv"
@@ -1093,8 +1093,9 @@ var FileFormatFlagSection = FlagSection{
 		},
 
 		{
-			name: "--dkvp",
-			help: "Use DKVP format for input and output data.",
+			name:     "--dkvp",
+			help:     "Use DKVP format for input and output data.",
+			altNames: []string{"--d2d"},
 			parser: func(args []string, argc int, pargi *int, options *TOptions) {
 				options.ReaderOptions.InputFileFormat = "dkvp"
 				options.WriterOptions.OutputFileFormat = "dkvp"
@@ -1105,7 +1106,7 @@ var FileFormatFlagSection = FlagSection{
 		{
 			name:     "--json",
 			help:     "Use JSON format for input and output data.",
-			altNames: []string{"-j"},
+			altNames: []string{"-j", "--j2j"},
 			parser: func(args []string, argc int, pargi *int, options *TOptions) {
 				options.ReaderOptions.InputFileFormat = "json"
 				options.WriterOptions.OutputFileFormat = "json"
@@ -1116,8 +1117,9 @@ var FileFormatFlagSection = FlagSection{
 		},
 
 		{
-			name: "--jsonl",
-			help: "Use JSON Lines format for input and output data.",
+			name:     "--jsonl",
+			help:     "Use JSON Lines format for input and output data.",
+			altNames: []string{"--l2l"},
 			parser: func(args []string, argc int, pargi *int, options *TOptions) {
 				options.ReaderOptions.InputFileFormat = "json"
 				options.WriterOptions.OutputFileFormat = "json"
@@ -1128,8 +1130,9 @@ var FileFormatFlagSection = FlagSection{
 		},
 
 		{
-			name: "--nidx",
-			help: "Use NIDX format for input and output data.",
+			name:     "--nidx",
+			help:     "Use NIDX format for input and output data.",
+			altNames: []string{"--n2n"},
 			parser: func(args []string, argc int, pargi *int, options *TOptions) {
 				options.ReaderOptions.InputFileFormat = "nidx"
 				options.WriterOptions.OutputFileFormat = "nidx"
@@ -1138,8 +1141,9 @@ var FileFormatFlagSection = FlagSection{
 		},
 
 		{
-			name: "--xtab",
-			help: "Use XTAB format for input and output data.",
+			name:     "--xtab",
+			help:     "Use XTAB format for input and output data.",
+			altNames: []string{"--x2x"},
 			parser: func(args []string, argc int, pargi *int, options *TOptions) {
 				options.ReaderOptions.InputFileFormat = "xtab"
 				options.WriterOptions.OutputFileFormat = "xtab"
@@ -1157,8 +1161,9 @@ var FileFormatFlagSection = FlagSection{
 		},
 
 		{
-			name: "--pprint",
-			help: "Use PPRINT format for input and output data.",
+			name:     "--pprint",
+			help:     "Use PPRINT format for input and output data.",
+			altNames: []string{"--p2p"},
 			parser: func(args []string, argc int, pargi *int, options *TOptions) {
 				options.ReaderOptions.InputFileFormat = "pprint"
 				options.ReaderOptions.IFS = " "
@@ -1180,14 +1185,14 @@ JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively.
 
 | In\out   | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
 +----------+-------+-------+--------+--------+--------+--------+--------+--------+----------|
-| CSV      |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
-| TSV      | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
-| JSON     | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
-| JSONL    | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
-| DKVP     | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
-| NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
-| XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
-| PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
+| CSV      | --c2c | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
+| TSV      | --t2c | --t2t | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
+| JSON     | --j2c | --j2t | --j2j  | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
+| JSONL    | --l2c | --l2t | --l2j  | --l2l  | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
+| DKVP     | --d2c | --d2t | --d2j  | --d2l  | --d2d  | --d2n  | --d2x  | --d2p  | --d2m    |
+| NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  | --n2n  | --n2x  | --n2p  | --n2m    |
+| XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  | --x2x  | --x2p  | --x2m    |
+| PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  | --p2p  | --p2m    |
 | Markdown | --m2c | --m2t | --m2j  | --m2l  | --m2d  | --m2n  | --m2x  | --m2p  |          |`)
 }
 

From 99a98b0dc785aee8e0c5bb09e99af17f19b8af1c Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Thu, 3 Jul 2025 19:23:38 -0400
Subject: [PATCH 347/456] Add `-c`, `-t`, `-j` to doc matrix in PR 1824 (#1826)

* Add `-c`, `-t`, `-j` to doc matrix in PR 1824

* Run `make dev`
---
 docs/src/file-formats.md             | 22 ++++-----
 docs/src/manpage.md                  | 60 +++++++++++++-----------
 docs/src/manpage.txt                 | 60 +++++++++++++-----------
 docs/src/reference-main-flag-list.md | 36 +++++++--------
 man/manpage.txt                      | 60 +++++++++++++-----------
 man/mlr.1                            | 68 +++++++++++++++++-----------
 pkg/cli/option_parse.go              | 22 ++++-----
 pkg/climain/mlrcli_parse.go          |  2 +-
 8 files changed, 183 insertions(+), 147 deletions(-)

diff --git a/docs/src/file-formats.md b/docs/src/file-formats.md
index 2d7a7d0d7..31b874f7d 100644
--- a/docs/src/file-formats.md
+++ b/docs/src/file-formats.md
@@ -712,17 +712,17 @@ As keystroke-savers for format-conversion you may use the following.
 The letters c, t, j, l, d, n, x, p, and m refer to formats CSV, TSV, DKVP, NIDX,
 JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively.
 
-| In\out   | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
-+----------+-------+-------+--------+--------+--------+--------+--------+--------+----------|
-| CSV      |       | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
-| TSV      | --t2c |       | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
-| JSON     | --j2c | --j2t |        | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
-| JSONL    | --l2c | --l2t |        |        | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
-| DKVP     | --d2c | --d2t | --d2j  | --d2l  |        | --d2n  | --d2x  | --d2p  | --d2m    |
-| NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  |        | --n2x  | --n2p  | --n2m    |
-| XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  |        | --x2p  | --x2m    |
-| PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  |        | --p2m    |
-| Markdown | --m2c | --m2t | --m2j  | --m2l  | --m2d  | --m2n  | --m2x  | --m2p  |          |
+| In\out   | CSV      | TSV      | JSON     | JSONL | DKVP  | NIDX  | XTAB  | PPRINT | Markdown |
++----------+----------+----------+----------+-------+-------+-------+-------+--------+----------|
+| CSV      | --c2c,-c | --c2t    | --c2j    | --c2l | --c2d | --c2n | --c2x | --c2p  | --c2m    |
+| TSV      | --t2c    | --t2t,-t | --t2j    | --t2l | --t2d | --t2n | --t2x | --t2p  | --t2m    |
+| JSON     | --j2c    | --j2t    | --j2j,-j | --j2l | --j2d | --j2n | --j2x | --j2p  | --j2m    |
+| JSONL    | --l2c    | --l2t    | --l2j    | --l2l | --l2d | --l2n | --l2x | --l2p  | --l2m    |
+| DKVP     | --d2c    | --d2t    | --d2j    | --d2l | --d2d | --d2n | --d2x | --d2p  | --d2m    |
+| NIDX     | --n2c    | --n2t    | --n2j    | --n2l | --n2d | --n2n | --n2x | --n2p  | --n2m    |
+| XTAB     | --x2c    | --x2t    | --x2j    | --x2l | --x2d | --x2n | --x2x | --x2p  | --x2m    |
+| PPRINT   | --p2c    | --p2t    | --p2j    | --p2l | --p2d | --p2n | --p2x | -p2p   | --p2m    |
+| Markdown | --m2c    | --m2t    | --m2j    | --m2l | --m2d | --m2n | --m2x | --m2p  |          |
 
 -p                       Keystroke-saver for `--nidx --fs space --repifs`.
 -T                       Keystroke-saver for `--nidx --fs tab`.
diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 2a0656948..8f75d568d 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -198,8 +198,8 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        merge-fields most-frequent nest nothing put regularize remove-empty-columns
        rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
        skip-trivial-records sort sort-within-records sparsify split ssub stats1
-       stats2 step sub summary tac tail tee template top utf8-to-latin1 unflatten
-       uniq unspace unsparsify
+       stats2 step sub summary surv tac tail tee template top utf8-to-latin1
+       unflatten uniq unspace unsparsify
 
 1mFUNCTION LIST0m
        abs acos acosh antimode any append apply arrayify asin asinh asserting_absent
@@ -368,9 +368,9 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        are overridden in all cases by setting output format to `format2`.
 
        --asv or --asvlite       Use ASV format for input and output data.
-       --csv or -c              Use CSV format for input and output data.
+       --csv or -c or --c2c     Use CSV format for input and output data.
        --csvlite                Use CSV-lite format for input and output data.
-       --dkvp                   Use DKVP format for input and output data.
+       --dkvp or --d2d          Use DKVP format for input and output data.
        --gen-field-name         Specify field name for --igen. Defaults to "i".
        --gen-start              Specify start value for --igen. Defaults to 1.
        --gen-step               Specify step value for --igen. Defaults to 1.
@@ -394,9 +394,9 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        --itsvlite               Use TSV-lite format for input data.
        --iusv or --iusvlite     Use USV format for input data.
        --ixtab                  Use XTAB format for input data.
-       --json or -j             Use JSON format for input and output data.
-       --jsonl                  Use JSON Lines format for input and output data.
-       --nidx                   Use NIDX format for input and output data.
+       --json or -j or --j2j    Use JSON format for input and output data.
+       --jsonl or --l2l         Use JSON Lines format for input and output data.
+       --nidx or --n2n          Use NIDX format for input and output data.
        --oasv or --oasvlite     Use ASV format for output data.
        --ocsv                   Use CSV format for output data.
        --ocsvlite               Use CSV-lite format for output data.
@@ -410,11 +410,11 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        --otsvlite               Use TSV-lite format for output data.
        --ousv or --ousvlite     Use USV format for output data.
        --oxtab                  Use XTAB format for output data.
-       --pprint                 Use PPRINT format for input and output data.
-       --tsv or -t              Use TSV format for input and output data.
+       --pprint or --p2p        Use PPRINT format for input and output data.
+       --tsv or -t or --t2t     Use TSV format for input and output data.
        --tsvlite                Use TSV-lite format for input and output data.
        --usv or --usvlite       Use USV format for input and output data.
-       --xtab                   Use XTAB format for input and output data.
+       --xtab or --x2x          Use XTAB format for input and output data.
        --xvright                Right-justify values for XTAB format.
        -i {format name}         Use format name for input data. For example: `-i csv`
                                 is the same as `--icsv`.
@@ -432,32 +432,31 @@ This is simply a copy of what you should see on running `man mlr` at a command p
                                 formats. Defaults to `.`.
        --no-auto-flatten        When output is non-JSON, suppress the default
                                 auto-flatten behavior. Default: if `$y = [7,8,9]`
-                                then this flattens to `y.1=7,y.2=8,y.3=9, and
+                                then this flattens to `y.1=7,y.2=8,y.3=9`, and
                                 similarly for maps. With `--no-auto-flatten`, instead
                                 we get `$y=[1, 2, 3]`.
        --no-auto-unflatten      When input is non-JSON and output is JSON, suppress
                                 the default auto-unflatten behavior. Default: if the
                                 input has `y.1=7,y.2=8,y.3=9` then this unflattens to
-                                `$y=[7,8,9]`. flattens to `y.1=7,y.2=8,y.3=91. With
-                                `--no-auto-flatten`, instead we get
-                                `${y.1}=7,${y.2}=8,${y.3}=9`.
+                                `$y=[7,8,9]`. With `--no-auto-flatten`, instead we
+                                get `${y.1}=7,${y.2}=8,${y.3}=9`.
 
 1mFORMAT-CONVERSION KEYSTROKE-SAVER FLAGS0m
        As keystroke-savers for format-conversion you may use the following.
        The letters c, t, j, l, d, n, x, p, and m refer to formats CSV, TSV, DKVP, NIDX,
        JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively.
 
-       | In\out   | CSV   | TSV   | JSON   | JSONL  | DKVP   | NIDX   | XTAB   | PPRINT | Markdown |
-       +----------+-------+-------+--------+--------+--------+--------+--------+--------+----------|
-       | CSV      | --c2c | --c2t | --c2j  | --c2l  | --c2d  | --c2n  | --c2x  | --c2p  | --c2m    |
-       | TSV      | --t2c | --t2t | --t2j  | --t2l  | --t2d  | --t2n  | --t2x  | --t2p  | --t2m    |
-       | JSON     | --j2c | --j2t | --j2j  | --j2l  | --j2d  | --j2n  | --j2x  | --j2p  | --j2m    |
-       | JSONL    | --l2c | --l2t | --l2j  | --l2l  | --l2d  | --l2n  | --l2x  | --l2p  | --l2m    |
-       | DKVP     | --d2c | --d2t | --d2j  | --d2l  | --d2d  | --d2n  | --d2x  | --d2p  | --d2m    |
-       | NIDX     | --n2c | --n2t | --n2j  | --n2l  | --n2d  | --n2n  | --n2x  | --n2p  | --n2m    |
-       | XTAB     | --x2c | --x2t | --x2j  | --x2l  | --x2d  | --x2n  | --x2x  | --x2p  | --x2m    |
-       | PPRINT   | --p2c | --p2t | --p2j  | --p2l  | --p2d  | --p2n  | --p2x  | --p2p  | --p2m    |
-       | Markdown | --m2c | --m2t | --m2j  | --m2l  | --m2d  | --m2n  | --m2x  | --m2p  |          |
+       | In\out   | CSV      | TSV      | JSON     | JSONL | DKVP  | NIDX  | XTAB  | PPRINT | Markdown |
+       +----------+----------+----------+----------+-------+-------+-------+-------+--------+----------|
+       | CSV      | --c2c,-c | --c2t    | --c2j    | --c2l | --c2d | --c2n | --c2x | --c2p  | --c2m    |
+       | TSV      | --t2c    | --t2t,-t | --t2j    | --t2l | --t2d | --t2n | --t2x | --t2p  | --t2m    |
+       | JSON     | --j2c    | --j2t    | --j2j,-j | --j2l | --j2d | --j2n | --j2x | --j2p  | --j2m    |
+       | JSONL    | --l2c    | --l2t    | --l2j    | --l2l | --l2d | --l2n | --l2x | --l2p  | --l2m    |
+       | DKVP     | --d2c    | --d2t    | --d2j    | --d2l | --d2d | --d2n | --d2x | --d2p  | --d2m    |
+       | NIDX     | --n2c    | --n2t    | --n2j    | --n2l | --n2d | --n2n | --n2x | --n2p  | --n2m    |
+       | XTAB     | --x2c    | --x2t    | --x2j    | --x2l | --x2d | --x2n | --x2x | --x2p  | --x2m    |
+       | PPRINT   | --p2c    | --p2t    | --p2j    | --p2l | --p2d | --p2n | --p2x | -p2p   | --p2m    |
+       | Markdown | --m2c    | --m2t    | --m2j    | --m2l | --m2d | --m2n | --m2x | --m2p  |          |
 
        -p                       Keystroke-saver for `--nidx --fs space --repifs`.
        -T                       Keystroke-saver for `--nidx --fs tab`.
@@ -2108,6 +2107,15 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        --transpose        Show output with field names as column names..
        -h|--help Show this message.
 
+   1msurv0m
+       Usage: mlr surv -d {duration-field} -s {status-field}
+
+       Estimate Kaplan-Meier survival curve (right-censored).
+       Options:
+         -d {field}   Name of duration field (time-to-event or censoring).
+         -s {field}   Name of status field (0=censored, 1=event).
+         -h, --help   Show this message.
+
    1mtac0m
        Usage: mlr tac [options]
        Prints records in reverse order from the order in which they were encountered.
@@ -3737,5 +3745,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-                                  2024-12-23                         4mMILLER24m(1)
+                                  2025-07-03                         4mMILLER24m(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 7bd2f017a..78358a341 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -177,8 +177,8 @@ merge-fields most-frequent nest nothing put regularize remove-empty-columns rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records sort sort-within-records sparsify split ssub stats1 - stats2 step sub summary tac tail tee template top utf8-to-latin1 unflatten - uniq unspace unsparsify + stats2 step sub summary surv tac tail tee template top utf8-to-latin1 + unflatten uniq unspace unsparsify 1mFUNCTION LIST0m abs acos acosh antimode any append apply arrayify asin asinh asserting_absent @@ -347,9 +347,9 @@ are overridden in all cases by setting output format to `format2`. --asv or --asvlite Use ASV format for input and output data. - --csv or -c Use CSV format for input and output data. + --csv or -c or --c2c Use CSV format for input and output data. --csvlite Use CSV-lite format for input and output data. - --dkvp Use DKVP format for input and output data. + --dkvp or --d2d Use DKVP format for input and output data. --gen-field-name Specify field name for --igen. Defaults to "i". --gen-start Specify start value for --igen. Defaults to 1. --gen-step Specify step value for --igen. Defaults to 1. @@ -373,9 +373,9 @@ --itsvlite Use TSV-lite format for input data. --iusv or --iusvlite Use USV format for input data. --ixtab Use XTAB format for input data. - --json or -j Use JSON format for input and output data. - --jsonl Use JSON Lines format for input and output data. - --nidx Use NIDX format for input and output data. + --json or -j or --j2j Use JSON format for input and output data. + --jsonl or --l2l Use JSON Lines format for input and output data. + --nidx or --n2n Use NIDX format for input and output data. --oasv or --oasvlite Use ASV format for output data. --ocsv Use CSV format for output data. --ocsvlite Use CSV-lite format for output data. @@ -389,11 +389,11 @@ --otsvlite Use TSV-lite format for output data. --ousv or --ousvlite Use USV format for output data. --oxtab Use XTAB format for output data. - --pprint Use PPRINT format for input and output data. - --tsv or -t Use TSV format for input and output data. + --pprint or --p2p Use PPRINT format for input and output data. + --tsv or -t or --t2t Use TSV format for input and output data. --tsvlite Use TSV-lite format for input and output data. --usv or --usvlite Use USV format for input and output data. - --xtab Use XTAB format for input and output data. + --xtab or --x2x Use XTAB format for input and output data. --xvright Right-justify values for XTAB format. -i {format name} Use format name for input data. For example: `-i csv` is the same as `--icsv`. @@ -411,32 +411,31 @@ formats. Defaults to `.`. --no-auto-flatten When output is non-JSON, suppress the default auto-flatten behavior. Default: if `$y = [7,8,9]` - then this flattens to `y.1=7,y.2=8,y.3=9, and + then this flattens to `y.1=7,y.2=8,y.3=9`, and similarly for maps. With `--no-auto-flatten`, instead we get `$y=[1, 2, 3]`. --no-auto-unflatten When input is non-JSON and output is JSON, suppress the default auto-unflatten behavior. Default: if the input has `y.1=7,y.2=8,y.3=9` then this unflattens to - `$y=[7,8,9]`. flattens to `y.1=7,y.2=8,y.3=91. With - `--no-auto-flatten`, instead we get - `${y.1}=7,${y.2}=8,${y.3}=9`. + `$y=[7,8,9]`. With `--no-auto-flatten`, instead we + get `${y.1}=7,${y.2}=8,${y.3}=9`. 1mFORMAT-CONVERSION KEYSTROKE-SAVER FLAGS0m As keystroke-savers for format-conversion you may use the following. The letters c, t, j, l, d, n, x, p, and m refer to formats CSV, TSV, DKVP, NIDX, JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively. - | In\out | CSV | TSV | JSON | JSONL | DKVP | NIDX | XTAB | PPRINT | Markdown | - +----------+-------+-------+--------+--------+--------+--------+--------+--------+----------| - | CSV | --c2c | --c2t | --c2j | --c2l | --c2d | --c2n | --c2x | --c2p | --c2m | - | TSV | --t2c | --t2t | --t2j | --t2l | --t2d | --t2n | --t2x | --t2p | --t2m | - | JSON | --j2c | --j2t | --j2j | --j2l | --j2d | --j2n | --j2x | --j2p | --j2m | - | JSONL | --l2c | --l2t | --l2j | --l2l | --l2d | --l2n | --l2x | --l2p | --l2m | - | DKVP | --d2c | --d2t | --d2j | --d2l | --d2d | --d2n | --d2x | --d2p | --d2m | - | NIDX | --n2c | --n2t | --n2j | --n2l | --n2d | --n2n | --n2x | --n2p | --n2m | - | XTAB | --x2c | --x2t | --x2j | --x2l | --x2d | --x2n | --x2x | --x2p | --x2m | - | PPRINT | --p2c | --p2t | --p2j | --p2l | --p2d | --p2n | --p2x | --p2p | --p2m | - | Markdown | --m2c | --m2t | --m2j | --m2l | --m2d | --m2n | --m2x | --m2p | | + | In\out | CSV | TSV | JSON | JSONL | DKVP | NIDX | XTAB | PPRINT | Markdown | + +----------+----------+----------+----------+-------+-------+-------+-------+--------+----------| + | CSV | --c2c,-c | --c2t | --c2j | --c2l | --c2d | --c2n | --c2x | --c2p | --c2m | + | TSV | --t2c | --t2t,-t | --t2j | --t2l | --t2d | --t2n | --t2x | --t2p | --t2m | + | JSON | --j2c | --j2t | --j2j,-j | --j2l | --j2d | --j2n | --j2x | --j2p | --j2m | + | JSONL | --l2c | --l2t | --l2j | --l2l | --l2d | --l2n | --l2x | --l2p | --l2m | + | DKVP | --d2c | --d2t | --d2j | --d2l | --d2d | --d2n | --d2x | --d2p | --d2m | + | NIDX | --n2c | --n2t | --n2j | --n2l | --n2d | --n2n | --n2x | --n2p | --n2m | + | XTAB | --x2c | --x2t | --x2j | --x2l | --x2d | --x2n | --x2x | --x2p | --x2m | + | PPRINT | --p2c | --p2t | --p2j | --p2l | --p2d | --p2n | --p2x | -p2p | --p2m | + | Markdown | --m2c | --m2t | --m2j | --m2l | --m2d | --m2n | --m2x | --m2p | | -p Keystroke-saver for `--nidx --fs space --repifs`. -T Keystroke-saver for `--nidx --fs tab`. @@ -2087,6 +2086,15 @@ --transpose Show output with field names as column names.. -h|--help Show this message. + 1msurv0m + Usage: mlr surv -d {duration-field} -s {status-field} + + Estimate Kaplan-Meier survival curve (right-censored). + Options: + -d {field} Name of duration field (time-to-event or censoring). + -s {field} Name of status field (0=censored, 1=event). + -h, --help Show this message. + 1mtac0m Usage: mlr tac [options] Prints records in reverse order from the order in which they were encountered. @@ -3716,4 +3724,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-12-23 4mMILLER24m(1) + 2025-07-03 4mMILLER24m(1) diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index 0294d4223..28adc12f6 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -144,9 +144,9 @@ are overridden in all cases by setting output format to `format2`. **Flags:** * `--asv or --asvlite`: Use ASV format for input and output data. -* `--csv or -c`: Use CSV format for input and output data. +* `--csv or -c or --c2c`: Use CSV format for input and output data. * `--csvlite`: Use CSV-lite format for input and output data. -* `--dkvp`: Use DKVP format for input and output data. +* `--dkvp or --d2d`: Use DKVP format for input and output data. * `--gen-field-name`: Specify field name for --igen. Defaults to "i". * `--gen-start`: Specify start value for --igen. Defaults to 1. * `--gen-step`: Specify step value for --igen. Defaults to 1. @@ -166,9 +166,9 @@ are overridden in all cases by setting output format to `format2`. * `--itsvlite`: Use TSV-lite format for input data. * `--iusv or --iusvlite`: Use USV format for input data. * `--ixtab`: Use XTAB format for input data. -* `--json or -j`: Use JSON format for input and output data. -* `--jsonl`: Use JSON Lines format for input and output data. -* `--nidx`: Use NIDX format for input and output data. +* `--json or -j or --j2j`: Use JSON format for input and output data. +* `--jsonl or --l2l`: Use JSON Lines format for input and output data. +* `--nidx or --n2n`: Use NIDX format for input and output data. * `--oasv or --oasvlite`: Use ASV format for output data. * `--ocsv`: Use CSV format for output data. * `--ocsvlite`: Use CSV-lite format for output data. @@ -182,11 +182,11 @@ are overridden in all cases by setting output format to `format2`. * `--otsvlite`: Use TSV-lite format for output data. * `--ousv or --ousvlite`: Use USV format for output data. * `--oxtab`: Use XTAB format for output data. -* `--pprint`: Use PPRINT format for input and output data. -* `--tsv or -t`: Use TSV format for input and output data. +* `--pprint or --p2p`: Use PPRINT format for input and output data. +* `--tsv or -t or --t2t`: Use TSV format for input and output data. * `--tsvlite`: Use TSV-lite format for input and output data. * `--usv or --usvlite`: Use USV format for input and output data. -* `--xtab`: Use XTAB format for input and output data. +* `--xtab or --x2x`: Use XTAB format for input and output data. * `--xvright`: Right-justify values for XTAB format. * `-i {format name}`: Use format name for input data. For example: `-i csv` is the same as `--icsv`. * `-o {format name}`: Use format name for output data. For example: `-o csv` is the same as `--ocsv`. @@ -210,17 +210,15 @@ The letters `c`, `t`, `j`, `d`, `n`, `x`, `p`, and `m` refer to formats CSV, TSV PPRINT, and markdown, respectively. Note that markdown format is available for output only. -| In out | **CSV** | **TSV** | **JSON** | **JSONL** | **DKVP** | **NIDX** | **XTAB** | **PPRINT** | **Markdown** | -|--------------|---------|---------|----------|-----------|----------|----------|----------|------------|--------------| -| **CSV** | --c2c | --c2t | --c2j | --c2l | --c2d | --c2n | --c2x | --c2p | --c2m | -| **TSV** | --t2c | --t2t | --t2j | --t2l | --t2d | --t2n | --t2x | --t2p | --t2m | -| **JSON** | --j2c | --j2t | --j2j | --j2l | --j2d | --j2n | --j2x | --j2p | --j2m | -| **JSONL** | --l2c | --l2t | --l2j | --l2l | --l2d | --l2n | --l2x | --l2p | --l2m | -| **DKVP** | --d2c | --d2t | --d2j | --d2l | --d2d | --d2n | --d2x | --d2p | --d2m | -| **NIDX** | --n2c | --n2t | --n2j | --n2l | --n2d | --n2n | --n2x | --n2p | --n2m | -| **XTAB** | --x2c | --x2t | --x2j | --x2l | --x2d | --x2n | --x2x | --x2p | --x2m | -| **PPRINT** | --p2c | --p2t | --p2j | --p2l | --p2d | --p2n | --p2x | --p2p | --p2m | -| **Markdown** | --m2c | --m2t | --m2j | --m2l | --m2d | --m2n | --m2x | --m2p | | +| In out | **CSV** | **TSV** | **JSON** | **DKVP** | **NIDX** | **XTAB** | **PPRINT** | **Markdown** | +|------------|---------|---------|----------|----------|----------|----------|------------|--------------| +| **CSV** | | `--c2t` | `--c2j` | `--c2d` | `--c2n` | `--c2x` | `--c2p` | `--c2m` | +| **TSV** | `--t2c` | | `--t2j` | `--t2d` | `--t2n` | `--t2x` | `--t2p` | `--t2m` | +| **JSON** | `--j2c` | `--j2t` | | `--j2d` | `--j2n` | `--j2x` | `--j2p` | `--j2m` | +| **DKVP** | `--d2c` | `--d2t` | `--d2j` | | `--d2n` | `--d2x` | `--d2p` | `--d2m` | +| **NIDX** | `--n2c` | `--n2t` | `--n2j` | `--n2d` | | `--n2x` | `--n2p` | `--n2m` | +| **XTAB** | `--x2c` | `--x2t` | `--x2j` | `--x2d` | `--x2n` | | `--x2p` | `--x2m` | +| **PPRINT** | `--p2c` | `--p2t` | `--p2j` | `--p2d` | `--p2n` | `--p2x` | | `--p2m` | Additionally: diff --git a/man/manpage.txt b/man/manpage.txt index 7bd2f017a..78358a341 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -177,8 +177,8 @@ merge-fields most-frequent nest nothing put regularize remove-empty-columns rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records sort sort-within-records sparsify split ssub stats1 - stats2 step sub summary tac tail tee template top utf8-to-latin1 unflatten - uniq unspace unsparsify + stats2 step sub summary surv tac tail tee template top utf8-to-latin1 + unflatten uniq unspace unsparsify 1mFUNCTION LIST0m abs acos acosh antimode any append apply arrayify asin asinh asserting_absent @@ -347,9 +347,9 @@ are overridden in all cases by setting output format to `format2`. --asv or --asvlite Use ASV format for input and output data. - --csv or -c Use CSV format for input and output data. + --csv or -c or --c2c Use CSV format for input and output data. --csvlite Use CSV-lite format for input and output data. - --dkvp Use DKVP format for input and output data. + --dkvp or --d2d Use DKVP format for input and output data. --gen-field-name Specify field name for --igen. Defaults to "i". --gen-start Specify start value for --igen. Defaults to 1. --gen-step Specify step value for --igen. Defaults to 1. @@ -373,9 +373,9 @@ --itsvlite Use TSV-lite format for input data. --iusv or --iusvlite Use USV format for input data. --ixtab Use XTAB format for input data. - --json or -j Use JSON format for input and output data. - --jsonl Use JSON Lines format for input and output data. - --nidx Use NIDX format for input and output data. + --json or -j or --j2j Use JSON format for input and output data. + --jsonl or --l2l Use JSON Lines format for input and output data. + --nidx or --n2n Use NIDX format for input and output data. --oasv or --oasvlite Use ASV format for output data. --ocsv Use CSV format for output data. --ocsvlite Use CSV-lite format for output data. @@ -389,11 +389,11 @@ --otsvlite Use TSV-lite format for output data. --ousv or --ousvlite Use USV format for output data. --oxtab Use XTAB format for output data. - --pprint Use PPRINT format for input and output data. - --tsv or -t Use TSV format for input and output data. + --pprint or --p2p Use PPRINT format for input and output data. + --tsv or -t or --t2t Use TSV format for input and output data. --tsvlite Use TSV-lite format for input and output data. --usv or --usvlite Use USV format for input and output data. - --xtab Use XTAB format for input and output data. + --xtab or --x2x Use XTAB format for input and output data. --xvright Right-justify values for XTAB format. -i {format name} Use format name for input data. For example: `-i csv` is the same as `--icsv`. @@ -411,32 +411,31 @@ formats. Defaults to `.`. --no-auto-flatten When output is non-JSON, suppress the default auto-flatten behavior. Default: if `$y = [7,8,9]` - then this flattens to `y.1=7,y.2=8,y.3=9, and + then this flattens to `y.1=7,y.2=8,y.3=9`, and similarly for maps. With `--no-auto-flatten`, instead we get `$y=[1, 2, 3]`. --no-auto-unflatten When input is non-JSON and output is JSON, suppress the default auto-unflatten behavior. Default: if the input has `y.1=7,y.2=8,y.3=9` then this unflattens to - `$y=[7,8,9]`. flattens to `y.1=7,y.2=8,y.3=91. With - `--no-auto-flatten`, instead we get - `${y.1}=7,${y.2}=8,${y.3}=9`. + `$y=[7,8,9]`. With `--no-auto-flatten`, instead we + get `${y.1}=7,${y.2}=8,${y.3}=9`. 1mFORMAT-CONVERSION KEYSTROKE-SAVER FLAGS0m As keystroke-savers for format-conversion you may use the following. The letters c, t, j, l, d, n, x, p, and m refer to formats CSV, TSV, DKVP, NIDX, JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively. - | In\out | CSV | TSV | JSON | JSONL | DKVP | NIDX | XTAB | PPRINT | Markdown | - +----------+-------+-------+--------+--------+--------+--------+--------+--------+----------| - | CSV | --c2c | --c2t | --c2j | --c2l | --c2d | --c2n | --c2x | --c2p | --c2m | - | TSV | --t2c | --t2t | --t2j | --t2l | --t2d | --t2n | --t2x | --t2p | --t2m | - | JSON | --j2c | --j2t | --j2j | --j2l | --j2d | --j2n | --j2x | --j2p | --j2m | - | JSONL | --l2c | --l2t | --l2j | --l2l | --l2d | --l2n | --l2x | --l2p | --l2m | - | DKVP | --d2c | --d2t | --d2j | --d2l | --d2d | --d2n | --d2x | --d2p | --d2m | - | NIDX | --n2c | --n2t | --n2j | --n2l | --n2d | --n2n | --n2x | --n2p | --n2m | - | XTAB | --x2c | --x2t | --x2j | --x2l | --x2d | --x2n | --x2x | --x2p | --x2m | - | PPRINT | --p2c | --p2t | --p2j | --p2l | --p2d | --p2n | --p2x | --p2p | --p2m | - | Markdown | --m2c | --m2t | --m2j | --m2l | --m2d | --m2n | --m2x | --m2p | | + | In\out | CSV | TSV | JSON | JSONL | DKVP | NIDX | XTAB | PPRINT | Markdown | + +----------+----------+----------+----------+-------+-------+-------+-------+--------+----------| + | CSV | --c2c,-c | --c2t | --c2j | --c2l | --c2d | --c2n | --c2x | --c2p | --c2m | + | TSV | --t2c | --t2t,-t | --t2j | --t2l | --t2d | --t2n | --t2x | --t2p | --t2m | + | JSON | --j2c | --j2t | --j2j,-j | --j2l | --j2d | --j2n | --j2x | --j2p | --j2m | + | JSONL | --l2c | --l2t | --l2j | --l2l | --l2d | --l2n | --l2x | --l2p | --l2m | + | DKVP | --d2c | --d2t | --d2j | --d2l | --d2d | --d2n | --d2x | --d2p | --d2m | + | NIDX | --n2c | --n2t | --n2j | --n2l | --n2d | --n2n | --n2x | --n2p | --n2m | + | XTAB | --x2c | --x2t | --x2j | --x2l | --x2d | --x2n | --x2x | --x2p | --x2m | + | PPRINT | --p2c | --p2t | --p2j | --p2l | --p2d | --p2n | --p2x | -p2p | --p2m | + | Markdown | --m2c | --m2t | --m2j | --m2l | --m2d | --m2n | --m2x | --m2p | | -p Keystroke-saver for `--nidx --fs space --repifs`. -T Keystroke-saver for `--nidx --fs tab`. @@ -2087,6 +2086,15 @@ --transpose Show output with field names as column names.. -h|--help Show this message. + 1msurv0m + Usage: mlr surv -d {duration-field} -s {status-field} + + Estimate Kaplan-Meier survival curve (right-censored). + Options: + -d {field} Name of duration field (time-to-event or censoring). + -s {field} Name of status field (0=censored, 1=event). + -h, --help Show this message. + 1mtac0m Usage: mlr tac [options] Prints records in reverse order from the order in which they were encountered. @@ -3716,4 +3724,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2024-12-23 4mMILLER24m(1) + 2025-07-03 4mMILLER24m(1) diff --git a/man/mlr.1 b/man/mlr.1 index ecf229958..028f696f4 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2024-12-23 +.\" Date: 2025-07-03 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2024-12-23" "\ \&" "\ \&" +.TH "MILLER" "1" "2025-07-03" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -220,8 +220,8 @@ json-parse json-stringify join label latin1-to-utf8 least-frequent merge-fields most-frequent nest nothing put regularize remove-empty-columns rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records sort sort-within-records sparsify split ssub stats1 -stats2 step sub summary tac tail tee template top utf8-to-latin1 unflatten -uniq unspace unsparsify +stats2 step sub summary surv tac tail tee template top utf8-to-latin1 +unflatten uniq unspace unsparsify .fi .if n \{\ .RE @@ -428,9 +428,9 @@ The latter sets up input and output flags for `format1`, not all of which are overridden in all cases by setting output format to `format2`. --asv or --asvlite Use ASV format for input and output data. ---csv or -c Use CSV format for input and output data. +--csv or -c or --c2c Use CSV format for input and output data. --csvlite Use CSV-lite format for input and output data. ---dkvp Use DKVP format for input and output data. +--dkvp or --d2d Use DKVP format for input and output data. --gen-field-name Specify field name for --igen. Defaults to "i". --gen-start Specify start value for --igen. Defaults to 1. --gen-step Specify step value for --igen. Defaults to 1. @@ -454,9 +454,9 @@ are overridden in all cases by setting output format to `format2`. --itsvlite Use TSV-lite format for input data. --iusv or --iusvlite Use USV format for input data. --ixtab Use XTAB format for input data. ---json or -j Use JSON format for input and output data. ---jsonl Use JSON Lines format for input and output data. ---nidx Use NIDX format for input and output data. +--json or -j or --j2j Use JSON format for input and output data. +--jsonl or --l2l Use JSON Lines format for input and output data. +--nidx or --n2n Use NIDX format for input and output data. --oasv or --oasvlite Use ASV format for output data. --ocsv Use CSV format for output data. --ocsvlite Use CSV-lite format for output data. @@ -470,11 +470,11 @@ are overridden in all cases by setting output format to `format2`. --otsvlite Use TSV-lite format for output data. --ousv or --ousvlite Use USV format for output data. --oxtab Use XTAB format for output data. ---pprint Use PPRINT format for input and output data. ---tsv or -t Use TSV format for input and output data. +--pprint or --p2p Use PPRINT format for input and output data. +--tsv or -t or --t2t Use TSV format for input and output data. --tsvlite Use TSV-lite format for input and output data. --usv or --usvlite Use USV format for input and output data. ---xtab Use XTAB format for input and output data. +--xtab or --x2x Use XTAB format for input and output data. --xvright Right-justify values for XTAB format. -i {format name} Use format name for input data. For example: `-i csv` is the same as `--icsv`. @@ -500,15 +500,14 @@ See the flatten/unflatten doc page https://miller.readthedocs.io/en/latest/flatt formats. Defaults to `.`. --no-auto-flatten When output is non-JSON, suppress the default auto-flatten behavior. Default: if `$y = [7,8,9]` - then this flattens to `y.1=7,y.2=8,y.3=9, and + then this flattens to `y.1=7,y.2=8,y.3=9`, and similarly for maps. With `--no-auto-flatten`, instead we get `$y=[1, 2, 3]`. --no-auto-unflatten When input is non-JSON and output is JSON, suppress the default auto-unflatten behavior. Default: if the input has `y.1=7,y.2=8,y.3=9` then this unflattens to - `$y=[7,8,9]`. flattens to `y.1=7,y.2=8,y.3=91. With - `--no-auto-flatten`, instead we get - `${y.1}=7,${y.2}=8,${y.3}=9`. + `$y=[7,8,9]`. With `--no-auto-flatten`, instead we + get `${y.1}=7,${y.2}=8,${y.3}=9`. .fi .if n \{\ .RE @@ -523,17 +522,17 @@ As keystroke-savers for format-conversion you may use the following. The letters c, t, j, l, d, n, x, p, and m refer to formats CSV, TSV, DKVP, NIDX, JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively. -| In\eout | CSV | TSV | JSON | JSONL | DKVP | NIDX | XTAB | PPRINT | Markdown | -+----------+-------+-------+--------+--------+--------+--------+--------+--------+----------| -| CSV | --c2c | --c2t | --c2j | --c2l | --c2d | --c2n | --c2x | --c2p | --c2m | -| TSV | --t2c | --t2t | --t2j | --t2l | --t2d | --t2n | --t2x | --t2p | --t2m | -| JSON | --j2c | --j2t | --j2j | --j2l | --j2d | --j2n | --j2x | --j2p | --j2m | -| JSONL | --l2c | --l2t | --l2j | --l2l | --l2d | --l2n | --l2x | --l2p | --l2m | -| DKVP | --d2c | --d2t | --d2j | --d2l | --d2d | --d2n | --d2x | --d2p | --d2m | -| NIDX | --n2c | --n2t | --n2j | --n2l | --n2d | --n2n | --n2x | --n2p | --n2m | -| XTAB | --x2c | --x2t | --x2j | --x2l | --x2d | --x2n | --x2x | --x2p | --x2m | -| PPRINT | --p2c | --p2t | --p2j | --p2l | --p2d | --p2n | --p2x | --p2p | --p2m | -| Markdown | --m2c | --m2t | --m2j | --m2l | --m2d | --m2n | --m2x | --m2p | | +| In\eout | CSV | TSV | JSON | JSONL | DKVP | NIDX | XTAB | PPRINT | Markdown | ++----------+----------+----------+----------+-------+-------+-------+-------+--------+----------| +| CSV | --c2c,-c | --c2t | --c2j | --c2l | --c2d | --c2n | --c2x | --c2p | --c2m | +| TSV | --t2c | --t2t,-t | --t2j | --t2l | --t2d | --t2n | --t2x | --t2p | --t2m | +| JSON | --j2c | --j2t | --j2j,-j | --j2l | --j2d | --j2n | --j2x | --j2p | --j2m | +| JSONL | --l2c | --l2t | --l2j | --l2l | --l2d | --l2n | --l2x | --l2p | --l2m | +| DKVP | --d2c | --d2t | --d2j | --d2l | --d2d | --d2n | --d2x | --d2p | --d2m | +| NIDX | --n2c | --n2t | --n2j | --n2l | --n2d | --n2n | --n2x | --n2p | --n2m | +| XTAB | --x2c | --x2t | --x2j | --x2l | --x2d | --x2n | --x2x | --x2p | --x2m | +| PPRINT | --p2c | --p2t | --p2j | --p2l | --p2d | --p2n | --p2x | -p2p | --p2m | +| Markdown | --m2c | --m2t | --m2j | --m2l | --m2d | --m2n | --m2x | --m2p | | -p Keystroke-saver for `--nidx --fs space --repifs`. -T Keystroke-saver for `--nidx --fs tab`. @@ -2616,6 +2615,21 @@ Options: .fi .if n \{\ .RE +.SS "surv" +.if n \{\ +.RS 0 +.\} +.nf +Usage: mlr surv -d {duration-field} -s {status-field} + +Estimate Kaplan-Meier survival curve (right-censored). +Options: + -d {field} Name of duration field (time-to-event or censoring). + -s {field} Name of status field (0=censored, 1=event). + -h, --help Show this message. +.fi +.if n \{\ +.RE .SS "tac" .if n \{\ .RS 0 diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go index 134accc24..156a056a3 100644 --- a/pkg/cli/option_parse.go +++ b/pkg/cli/option_parse.go @@ -1183,17 +1183,17 @@ func FormatConversionKeystrokeSaverPrintInfo() { The letters c, t, j, l, d, n, x, p, and m refer to formats CSV, TSV, DKVP, NIDX, JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively. -| In\out | CSV | TSV | JSON | JSONL | DKVP | NIDX | XTAB | PPRINT | Markdown | -+----------+-------+-------+--------+--------+--------+--------+--------+--------+----------| -| CSV | --c2c | --c2t | --c2j | --c2l | --c2d | --c2n | --c2x | --c2p | --c2m | -| TSV | --t2c | --t2t | --t2j | --t2l | --t2d | --t2n | --t2x | --t2p | --t2m | -| JSON | --j2c | --j2t | --j2j | --j2l | --j2d | --j2n | --j2x | --j2p | --j2m | -| JSONL | --l2c | --l2t | --l2j | --l2l | --l2d | --l2n | --l2x | --l2p | --l2m | -| DKVP | --d2c | --d2t | --d2j | --d2l | --d2d | --d2n | --d2x | --d2p | --d2m | -| NIDX | --n2c | --n2t | --n2j | --n2l | --n2d | --n2n | --n2x | --n2p | --n2m | -| XTAB | --x2c | --x2t | --x2j | --x2l | --x2d | --x2n | --x2x | --x2p | --x2m | -| PPRINT | --p2c | --p2t | --p2j | --p2l | --p2d | --p2n | --p2x | --p2p | --p2m | -| Markdown | --m2c | --m2t | --m2j | --m2l | --m2d | --m2n | --m2x | --m2p | |`) +| In\out | CSV | TSV | JSON | JSONL | DKVP | NIDX | XTAB | PPRINT | Markdown | ++----------+----------+----------+----------+-------+-------+-------+-------+--------+----------| +| CSV | --c2c,-c | --c2t | --c2j | --c2l | --c2d | --c2n | --c2x | --c2p | --c2m | +| TSV | --t2c | --t2t,-t | --t2j | --t2l | --t2d | --t2n | --t2x | --t2p | --t2m | +| JSON | --j2c | --j2t | --j2j,-j | --j2l | --j2d | --j2n | --j2x | --j2p | --j2m | +| JSONL | --l2c | --l2t | --l2j | --l2l | --l2d | --l2n | --l2x | --l2p | --l2m | +| DKVP | --d2c | --d2t | --d2j | --d2l | --d2d | --d2n | --d2x | --d2p | --d2m | +| NIDX | --n2c | --n2t | --n2j | --n2l | --n2d | --n2n | --n2x | --n2p | --n2m | +| XTAB | --x2c | --x2t | --x2j | --x2l | --x2d | --x2n | --x2x | --x2p | --x2m | +| PPRINT | --p2c | --p2t | --p2j | --p2l | --p2d | --p2n | --p2x | -p2p | --p2m | +| Markdown | --m2c | --m2t | --m2j | --m2l | --m2d | --m2n | --m2x | --m2p | |`) } func init() { FormatConversionKeystrokeSaverFlagSection.Sort() } diff --git a/pkg/climain/mlrcli_parse.go b/pkg/climain/mlrcli_parse.go index f83a93373..68ba5abea 100644 --- a/pkg/climain/mlrcli_parse.go +++ b/pkg/climain/mlrcli_parse.go @@ -107,7 +107,7 @@ func ParseCommandLine( // main, verbs, and auxents) that multi-character options start with two // dashes, e.g. "--csv". (The sole exception is the sort verb's -nf/-nr // which are handled specially there.) - args = lib.Getoptify(args) + args = lib.Getoptify(args) // Pass one as described at the top of this file. flagSequences, terminalSequence, verbSequences, dataFileNames := parseCommandLinePassOne(args) From b7248bae98ae9a6d404e3432571aa53c014667d1 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 4 Jul 2025 13:43:22 -0400 Subject: [PATCH 348/456] Doc copy edits (#1827) * Update index.md.in * more copy-editing * swipes.sh * swipes.sh * run `make docs` to generate `*.md` from `*.md.in` --- docs/src/10min.md | 2 +- docs/src/10min.md.in | 2 +- docs/src/features.md | 10 +- docs/src/features.md.in | 10 +- docs/src/file-formats.md | 43 ++++----- docs/src/file-formats.md.in | 43 ++++----- docs/src/how-to-release.md | 2 +- docs/src/how-to-release.md.in | 2 +- docs/src/index.md | 16 ++-- docs/src/index.md.in | 16 ++-- docs/src/installing-miller.md | 8 +- docs/src/installing-miller.md.in | 8 +- docs/src/keystroke-savers.md | 8 +- docs/src/keystroke-savers.md.in | 8 +- docs/src/miller-on-windows.md | 6 +- docs/src/miller-on-windows.md.in | 6 +- docs/src/miller-programming-language.md | 34 +++---- docs/src/miller-programming-language.md.in | 34 +++---- docs/src/new-in-miller-6.md | 95 +++++-------------- docs/src/new-in-miller-6.md.in | 95 +++++-------------- docs/src/originality.md | 18 ++-- docs/src/originality.md.in | 18 ++-- docs/src/record-heterogeneity.md | 50 ++++------ docs/src/record-heterogeneity.md.in | 50 ++++------ docs/src/reference-dsl-builtin-functions.md | 25 +---- .../src/reference-dsl-builtin-functions.md.in | 25 +---- docs/src/reference-dsl-complexity.md | 31 +----- docs/src/reference-dsl-complexity.md.in | 31 +----- docs/src/reference-dsl-control-structures.md | 22 ++--- .../reference-dsl-control-structures.md.in | 22 ++--- docs/src/reference-dsl-filter-statements.md | 2 +- .../src/reference-dsl-filter-statements.md.in | 2 +- .../reference-dsl-higher-order-functions.md | 67 ++++--------- ...reference-dsl-higher-order-functions.md.in | 67 ++++--------- docs/src/reference-dsl-operators.md | 11 +-- docs/src/reference-dsl-operators.md.in | 11 +-- docs/src/reference-dsl-output-statements.md | 65 +++++-------- .../src/reference-dsl-output-statements.md.in | 65 +++++-------- docs/src/reference-dsl-syntax.md | 10 +- docs/src/reference-dsl-syntax.md.in | 10 +- .../reference-dsl-user-defined-functions.md | 25 ++--- ...reference-dsl-user-defined-functions.md.in | 25 ++--- docs/src/reference-dsl-variables.md | 62 ++++++------ docs/src/reference-dsl-variables.md.in | 62 ++++++------ docs/src/reference-main-overview.md | 2 +- docs/src/reference-main-overview.md.in | 2 +- docs/src/structure-of-these-documents.md | 14 +-- docs/src/structure-of-these-documents.md.in | 14 +-- docs/src/swipes.sh | 6 ++ docs/src/unix-toolkit-context.md | 6 +- docs/src/unix-toolkit-context.md.in | 6 +- docs/src/why.md | 34 +++---- docs/src/why.md.in | 34 +++---- 53 files changed, 514 insertions(+), 828 deletions(-) create mode 100755 docs/src/swipes.sh diff --git a/docs/src/10min.md b/docs/src/10min.md index d9e4d2416..eaec2be05 100644 --- a/docs/src/10min.md +++ b/docs/src/10min.md @@ -20,7 +20,7 @@ Quick links: Let's take a quick look at some of the most useful Miller verbs -- file-format-aware, name-index-empowered equivalents of standard system commands. -For most of this section we'll use our [example.csv](./example.csv). +For most of this section, we'll use our [example.csv](./example.csv). `mlr cat` is like system `cat` (or `type` on Windows) -- it passes the data through unmodified: diff --git a/docs/src/10min.md.in b/docs/src/10min.md.in index 0fdc94bf1..32f06d7d7 100644 --- a/docs/src/10min.md.in +++ b/docs/src/10min.md.in @@ -4,7 +4,7 @@ Let's take a quick look at some of the most useful Miller verbs -- file-format-aware, name-index-empowered equivalents of standard system commands. -For most of this section we'll use our [example.csv](./example.csv). +For most of this section, we'll use our [example.csv](./example.csv). `mlr cat` is like system `cat` (or `type` on Windows) -- it passes the data through unmodified: diff --git a/docs/src/features.md b/docs/src/features.md index 36d4f66ee..ae1222a3f 100644 --- a/docs/src/features.md +++ b/docs/src/features.md @@ -16,7 +16,7 @@ Quick links: # Features -Miller is like awk, sed, cut, join, and sort for **name-indexed data such as +Miller is like awk, sed, cut, join, and sort for **name-indexed data, such as CSV, TSV, JSON, and JSON Lines**. You get to work with your data using named fields, without needing to count positional column indices. @@ -36,9 +36,9 @@ including but not limited to the familiar CSV, TSV, JSON, and JSON Lines. * Miller complements SQL **databases**: you can slice, dice, and reformat data on the client side on its way into or out of a database. (See [SQL Examples](sql-examples.md).) You can also reap some of the benefits of databases for quick, setup-free one-off tasks when you just need to query some data in disk files in a hurry. -* Miller also goes beyond the classic Unix tools by stepping fully into our modern, **no-SQL** world: its essential record-heterogeneity property allows Miller to operate on data where records with different schema (field names) are interleaved. +* Miller also goes beyond the classic Unix tools by stepping fully into our modern, **no-SQL** world: its essential record-heterogeneity property allows Miller to operate on data where records with different schemas (field names) are interleaved. -* Miller is **streaming**: most operations need only a single record in memory at a time, rather than ingesting all input before producing any output. For those operations which require deeper retention (`sort`, `tac`, `stats1`), Miller retains only as much data as needed. This means that whenever functionally possible, you can operate on files which are larger than your system's available RAM, and you can use Miller in **tail -f** contexts. +* Miller is **streaming**: most operations need only a single record in memory at a time, rather than ingesting all input before producing any output. For those operations that require deeper retention (`sort`, `tac`, `stats1`), Miller retains only as much data as needed. This means that whenever functionally possible, you can operate on files that are larger than your system's available RAM, and you can use Miller in **tail -f** contexts. * Miller is **pipe-friendly** and interoperates with the Unix toolkit @@ -46,10 +46,10 @@ including but not limited to the familiar CSV, TSV, JSON, and JSON Lines. * Miller does **conversion** between formats -* Miller's **processing is format-aware**: e.g. CSV `sort` and `tac` keep header lines first +* Miller's **processing is format-aware**: e.g., CSV `sort` and `tac` keep header lines first * Miller has high-throughput **performance** on par with the Unix toolkit -* Not unlike [jq](https://stedolan.github.io/jq/) (for JSON), Miller is written in Go which is a portable, modern language, and Miller has no runtime dependencies. You can download or compile a single binary, `scp` it to a faraway machine, and expect it to work. +* Not unlike [jq](https://stedolan.github.io/jq/) (for JSON), Miller is written in Go, which is a portable, modern language, and Miller has no runtime dependencies. You can download or compile a single binary, `scp` it to a faraway machine, and expect it to work. Releases and release notes: [https://github.com/johnkerl/miller/releases](https://github.com/johnkerl/miller/releases). diff --git a/docs/src/features.md.in b/docs/src/features.md.in index 22b2c5378..13ea25bb2 100644 --- a/docs/src/features.md.in +++ b/docs/src/features.md.in @@ -1,6 +1,6 @@ # Features -Miller is like awk, sed, cut, join, and sort for **name-indexed data such as +Miller is like awk, sed, cut, join, and sort for **name-indexed data, such as CSV, TSV, JSON, and JSON Lines**. You get to work with your data using named fields, without needing to count positional column indices. @@ -20,9 +20,9 @@ including but not limited to the familiar CSV, TSV, JSON, and JSON Lines. * Miller complements SQL **databases**: you can slice, dice, and reformat data on the client side on its way into or out of a database. (See [SQL Examples](sql-examples.md).) You can also reap some of the benefits of databases for quick, setup-free one-off tasks when you just need to query some data in disk files in a hurry. -* Miller also goes beyond the classic Unix tools by stepping fully into our modern, **no-SQL** world: its essential record-heterogeneity property allows Miller to operate on data where records with different schema (field names) are interleaved. +* Miller also goes beyond the classic Unix tools by stepping fully into our modern, **no-SQL** world: its essential record-heterogeneity property allows Miller to operate on data where records with different schemas (field names) are interleaved. -* Miller is **streaming**: most operations need only a single record in memory at a time, rather than ingesting all input before producing any output. For those operations which require deeper retention (`sort`, `tac`, `stats1`), Miller retains only as much data as needed. This means that whenever functionally possible, you can operate on files which are larger than your system's available RAM, and you can use Miller in **tail -f** contexts. +* Miller is **streaming**: most operations need only a single record in memory at a time, rather than ingesting all input before producing any output. For those operations that require deeper retention (`sort`, `tac`, `stats1`), Miller retains only as much data as needed. This means that whenever functionally possible, you can operate on files that are larger than your system's available RAM, and you can use Miller in **tail -f** contexts. * Miller is **pipe-friendly** and interoperates with the Unix toolkit @@ -30,10 +30,10 @@ including but not limited to the familiar CSV, TSV, JSON, and JSON Lines. * Miller does **conversion** between formats -* Miller's **processing is format-aware**: e.g. CSV `sort` and `tac` keep header lines first +* Miller's **processing is format-aware**: e.g., CSV `sort` and `tac` keep header lines first * Miller has high-throughput **performance** on par with the Unix toolkit -* Not unlike [jq](https://stedolan.github.io/jq/) (for JSON), Miller is written in Go which is a portable, modern language, and Miller has no runtime dependencies. You can download or compile a single binary, `scp` it to a faraway machine, and expect it to work. +* Not unlike [jq](https://stedolan.github.io/jq/) (for JSON), Miller is written in Go, which is a portable, modern language, and Miller has no runtime dependencies. You can download or compile a single binary, `scp` it to a faraway machine, and expect it to work. Releases and release notes: [https://github.com/johnkerl/miller/releases](https://github.com/johnkerl/miller/releases). diff --git a/docs/src/file-formats.md b/docs/src/file-formats.md index 31b874f7d..5eaff8b13 100644 --- a/docs/src/file-formats.md +++ b/docs/src/file-formats.md @@ -20,7 +20,7 @@ Miller handles name-indexed data using several formats: some you probably know by name, such as CSV, TSV, JSON, and JSON Lines -- and other formats you're likely already seeing and using in your structured data. -Additionally, Miller gives you the option of including comments within your data. +Additionally, Miller gives you the option to include comments within your data. ## Examples @@ -102,13 +102,13 @@ NIDX: implicitly numerically indexed (Unix-toolkit style) ## CSV/TSV/ASV/USV/etc. -When `mlr` is invoked with the `--csv` or `--csvlite` option, key names are found on the first record and values are taken from subsequent records. This includes the case of CSV-formatted files. See [Record Heterogeneity](record-heterogeneity.md) for how Miller handles changes of field names within a single data stream. +When `mlr` is invoked with the `--csv` or `--csvlite` option, key names are found on the first record, and values are taken from subsequent records. This includes the case of CSV-formatted files. See [Record Heterogeneity](record-heterogeneity.md) for how Miller handles changes of field names within a single data stream. Miller has record separator `RS` and field separator `FS`, just as `awk` does. (See also the [separators page](reference-main-separators.md).) **CSV (comma-separated values):** Miller's `--csv` flag supports [RFC-4180 CSV](https://tools.ietf.org/html/rfc4180). -* This includes CRLF line-terminators by default, regardless of platform. +* This includes CRLF line terminators by default, regardless of platform. * Any cell containing a comma or a carriage return within it must be double-quoted. **TSV (tab-separated values):** Miller's `--tsv` supports [IANA TSV](https://www.iana.org/assignments/media-types/text/tab-separated-values). @@ -131,8 +131,8 @@ Here are the differences between CSV and CSV-lite: * CSV does not allow heterogeneous data; CSV-lite does (see also [Record Heterogeneity](record-heterogeneity.md)). -* TSV-lite is simply CSV-lite with field separator set to tab instead of comma. -In particular, no encode/decode of `\r`, `\n`, `\t`, or `\\` is done. +* TSV-lite is simply CSV-lite with the field separator set to tab instead of a comma. +In particular, no encoding/decoding of `\r`, `\n`, `\t`, or `\\` is done. * CSV-lite allows changing FS and/or RS to any values, perhaps multi-character. @@ -208,21 +208,21 @@ mlr: exiting due to data error. CSV, TSV, CSV-lite, and TSV-lite have in common the `--implicit-csv-header` flag for input and the `--headerless-csv-output` flag for output. -See also the [`--lazy-quotes` flag](reference-main-flag-list.md#csv-only-flags) which can help with CSV files which are not fully compliant with RFC-4180. +See also the [`--lazy-quotes` flag](reference-main-flag-list.md#csv-only-flags), which can help with CSV files that are not fully compliant with RFC-4180. ## JSON [JSON](https://json.org) is a format which supports scalars (numbers, strings, -boolean, etc.) as well as "objects" (maps) and "arrays" (lists), while Miller +booleans, etc.) as well as "objects" (maps) and "arrays" (lists), while Miller is a tool for handling **tabular data** only. By *tabular JSON* I mean the data is either a sequence of one or more objects, or an array consisting of one or more objects. Miller treats JSON objects as name-indexed records. This means Miller cannot (and should not) handle arbitrary JSON. In practice, -though, Miller can handle single JSON objects as well as list of them. The only -kinds of JSON that are unmillerable are single scalars (e.g. file contents `3`) -and arrays of non-object (e.g. file contents `[1,2,3,4,5]`). Check out -[jq](https://stedolan.github.io/jq/) for a tool which handles all valid JSON. +though, Miller can handle single JSON objects as well as lists of them. The only +kinds of JSON that are unmillerable are single scalars (e.g., file contents `3`) +and arrays of non-object (e.g., file contents `[1,2,3,4,5]`). Check out +[jq](https://stedolan.github.io/jq/) for a tool that handles all valid JSON. In short, if you have tabular data represented in JSON -- lists of objects, either with or without outermost `[...]` -- [then Miller can handle that for @@ -336,7 +336,7 @@ input as well as output in JSON format, JSON structure is preserved throughout t ] -But if the input format is JSON and the output format is not (or vice versa) then key-concatenation applies: +But if the input format is JSON and the output format is not (or vice versa), then key-concatenation applies:
 mlr --ijson --opprint head -n 4 data/json-example-2.json
@@ -355,7 +355,7 @@ Use `--jflatsep yourseparatorhere` to specify the string used for key concatenat
 
 ### JSON-in-CSV
 
-It's quite common to have CSV data which contains stringified JSON as a column.
+It's quite common to have CSV data that contains stringified JSON as a column.
 See the [JSON parse and stringify section](reference-main-data-types.md#json-parse-and-stringify) for ways to
 decode these in Miller.
 
@@ -410,7 +410,7 @@ records; using `--ojsonl`, you get no outermost `[...]`, and one line per record
 
 ## PPRINT: Pretty-printed tabular
 
-Miller's pretty-print format is like CSV, but column-aligned.  For example, compare
+Miller's pretty-print format is similar to CSV, but with column alignment.  For example, compare
 
 
 mlr --ocsv cat data/small
@@ -436,7 +436,7 @@ eks wye 4 0.381399 0.134188
 wye pan 5 0.573288 0.863624
 
-Note that while Miller is a line-at-a-time processor and retains input lines in memory only where necessary (e.g. for sort), pretty-print output requires it to accumulate all input lines (so that it can compute maximum column widths) before producing any output. This has two consequences: (a) pretty-print output won't work on `tail -f` contexts, where Miller will be waiting for an end-of-file marker which never arrives; (b) pretty-print output for large files is constrained by available machine memory. +Note that while Miller is a line-at-a-time processor and retains input lines in memory only where necessary (e.g., for sort), pretty-print output requires it to accumulate all input lines (so that it can compute maximum column widths) before producing any output. This has two consequences: (a) Pretty-print output will not work in `tail -f` contexts, where Miller will be waiting for an end-of-file marker that never arrives; (b) Pretty-print output for large files is constrained by the available machine memory. See [Record Heterogeneity](record-heterogeneity.md) for how Miller handles changes of field names within a single data stream. @@ -505,7 +505,7 @@ Markdown format looks like this: | wye | pan | 5 | 0.573288 | 0.863624 |
-which renders like this when dropped into various web tools (e.g. github comments): +which renders like this when dropped into various web tools (e.g. github.comments): ![pix/omd.png](pix/omd.png) @@ -594,7 +594,7 @@ a=eks,b=wye,i=4,x=0.381399,y=0.134188 a=wye,b=pan,i=5,x=0.573288,y=0.863624 -Such data are easy to generate, e.g. in Ruby with +Such data is easy to generate, e.g., in Ruby with
 puts "host=#{hostname},seconds=#{t2-t1},message=#{msg}"
@@ -616,7 +616,7 @@ logger.log("type=3,user=$USER,date=$date\n");
 
 Fields lacking an IPS will have positional index (starting at 1) used as the key, as in NIDX format. For example, `dish=7,egg=8,flint` is parsed as `"dish" => "7", "egg" => "8", "3" => "flint"` and `dish,egg,flint` is parsed as `"1" => "dish", "2" => "egg", "3" => "flint"`.
 
-As discussed in [Record Heterogeneity](record-heterogeneity.md), Miller handles changes of field names within the same data stream. But using DKVP format this is particularly natural. One of my favorite use-cases for Miller is in application/server logs, where I log all sorts of lines such as
+As discussed in [Record Heterogeneity](record-heterogeneity.md), Miller handles changes of field names within the same data stream. But using DKVP format, this is particularly natural. One of my favorite use-cases for Miller is in application/server logs, where I log all sorts of lines such as
 
 
 resource=/path/to/file,loadsec=0.45,ok=true
@@ -624,10 +624,9 @@ record_count=100, resource=/path/to/file
 resource=/some/other/path,loadsec=0.97,ok=false
 
-etc. and I just log them as needed. Then later, I can use `grep`, `mlr --opprint group-like`, etc. -to analyze my logs. +etc., and I log them as needed. Then later, I can use `grep`, `mlr --opprint group-like`, etc. to analyze my logs. -See the [separators page](reference-main-separators.md) regarding how to specify separators other than the default equals-sign and comma. +See the [separators page](reference-main-separators.md) regarding how to specify separators other than the default equals sign and comma. ## NIDX: Index-numbered (toolkit style) @@ -730,7 +729,7 @@ JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively. ## Comments in data -You can include comments within your data files, and either have them ignored, or passed directly through to the standard output as soon as they are encountered: +You can include comments within your data files, and either have them ignored or passed directly through to the standard output as soon as they are encountered:
 mlr help comments-in-data-flags
diff --git a/docs/src/file-formats.md.in b/docs/src/file-formats.md.in
index f72f81387..2ed581b19 100644
--- a/docs/src/file-formats.md.in
+++ b/docs/src/file-formats.md.in
@@ -4,7 +4,7 @@ Miller handles name-indexed data using several formats: some you probably know
 by name, such as CSV, TSV, JSON, and JSON Lines -- and other formats you're likely already
 seeing and using in your structured data.
 
-Additionally, Miller gives you the option of including comments within your data.
+Additionally, Miller gives you the option to include comments within your data.
 
 ## Examples
 
@@ -14,13 +14,13 @@ GENMD-EOF
 
 ## CSV/TSV/ASV/USV/etc.
 
-When `mlr` is invoked with the `--csv` or `--csvlite` option, key names are found on the first record and values are taken from subsequent records.  This includes the case of CSV-formatted files.  See [Record Heterogeneity](record-heterogeneity.md) for how Miller handles changes of field names within a single data stream.
+When `mlr` is invoked with the `--csv` or `--csvlite` option, key names are found on the first record, and values are taken from subsequent records.  This includes the case of CSV-formatted files.  See [Record Heterogeneity](record-heterogeneity.md) for how Miller handles changes of field names within a single data stream.
 
 Miller has record separator `RS` and field separator `FS`, just as `awk` does. (See also the [separators page](reference-main-separators.md).)
 
 **CSV (comma-separated values):** Miller's `--csv` flag supports [RFC-4180 CSV](https://tools.ietf.org/html/rfc4180).
 
-* This includes CRLF line-terminators by default, regardless of platform.
+* This includes CRLF line terminators by default, regardless of platform.
 * Any cell containing a comma or a carriage return within it must be double-quoted.
 
 **TSV (tab-separated values):** Miller's `--tsv` supports [IANA TSV](https://www.iana.org/assignments/media-types/text/tab-separated-values).
@@ -43,8 +43,8 @@ Here are the differences between CSV and CSV-lite:
 
 * CSV does not allow heterogeneous data; CSV-lite does (see also [Record Heterogeneity](record-heterogeneity.md)).
 
-* TSV-lite is simply CSV-lite with field separator set to tab instead of comma.
-In particular, no encode/decode of  `\r`, `\n`, `\t`, or `\\` is done.
+* TSV-lite is simply CSV-lite with the field separator set to tab instead of a comma.
+In particular, no encoding/decoding of  `\r`, `\n`, `\t`, or `\\` is done.
 
 * CSV-lite allows changing FS and/or RS to any values, perhaps multi-character.
 
@@ -77,21 +77,21 @@ GENMD-EOF
 
 CSV, TSV, CSV-lite, and TSV-lite have in common the `--implicit-csv-header` flag for input and the `--headerless-csv-output` flag for output.
 
-See also the [`--lazy-quotes` flag](reference-main-flag-list.md#csv-only-flags) which can help with CSV files which are not fully compliant with RFC-4180.
+See also the [`--lazy-quotes` flag](reference-main-flag-list.md#csv-only-flags), which can help with CSV files that are not fully compliant with RFC-4180.
 
 ## JSON
 
 [JSON](https://json.org) is a format which supports scalars (numbers, strings,
-boolean, etc.) as well as "objects" (maps) and "arrays" (lists), while Miller
+booleans, etc.) as well as "objects" (maps) and "arrays" (lists), while Miller
 is a tool for handling **tabular data** only.  By *tabular JSON* I mean the
 data is either a sequence of one or more objects, or an array consisting of one
 or more objects.  Miller treats JSON objects as name-indexed records.
 
 This means Miller cannot (and should not) handle arbitrary JSON.  In practice,
-though, Miller can handle single JSON objects as well as list of them. The only
-kinds of JSON that are unmillerable are single scalars (e.g. file contents `3`)
-and arrays of non-object (e.g. file contents `[1,2,3,4,5]`).  Check out
-[jq](https://stedolan.github.io/jq/) for a tool which handles all valid JSON.
+though, Miller can handle single JSON objects as well as lists of them. The only
+kinds of JSON that are unmillerable are single scalars (e.g., file contents `3`)
+and arrays of non-object (e.g., file contents `[1,2,3,4,5]`).  Check out
+[jq](https://stedolan.github.io/jq/) for a tool that handles all valid JSON.
 
 In short, if you have tabular data represented in JSON -- lists of objects,
 either with or without outermost `[...]` -- [then Miller can handle that for
@@ -129,7 +129,7 @@ GENMD-RUN-COMMAND
 mlr --json head -n 2 data/json-example-2.json
 GENMD-EOF
 
-But if the input format is JSON and the output format is not (or vice versa) then key-concatenation applies:
+But if the input format is JSON and the output format is not (or vice versa), then key-concatenation applies:
 
 GENMD-RUN-COMMAND
 mlr --ijson --opprint head -n 4 data/json-example-2.json
@@ -141,7 +141,7 @@ Use `--jflatsep yourseparatorhere` to specify the string used for key concatenat
 
 ### JSON-in-CSV
 
-It's quite common to have CSV data which contains stringified JSON as a column.
+It's quite common to have CSV data that contains stringified JSON as a column.
 See the [JSON parse and stringify section](reference-main-data-types.md#json-parse-and-stringify) for ways to
 decode these in Miller.
 
@@ -170,7 +170,7 @@ records; using `--ojsonl`, you get no outermost `[...]`, and one line per record
 
 ## PPRINT: Pretty-printed tabular
 
-Miller's pretty-print format is like CSV, but column-aligned.  For example, compare
+Miller's pretty-print format is similar to CSV, but with column alignment.  For example, compare
 
 GENMD-RUN-COMMAND
 mlr --ocsv cat data/small
@@ -180,7 +180,7 @@ GENMD-RUN-COMMAND
 mlr --opprint cat data/small
 GENMD-EOF
 
-Note that while Miller is a line-at-a-time processor and retains input lines in memory only where necessary (e.g. for sort), pretty-print output requires it to accumulate all input lines (so that it can compute maximum column widths) before producing any output. This has two consequences: (a) pretty-print output won't work on `tail -f` contexts, where Miller will be waiting for an end-of-file marker which never arrives; (b) pretty-print output for large files is constrained by available machine memory.
+Note that while Miller is a line-at-a-time processor and retains input lines in memory only where necessary (e.g., for sort), pretty-print output requires it to accumulate all input lines (so that it can compute maximum column widths) before producing any output. This has two consequences: (a) Pretty-print output will not work in `tail -f` contexts, where Miller will be waiting for an end-of-file marker that never arrives; (b) Pretty-print output for large files is constrained by the available machine memory.
 
 See [Record Heterogeneity](record-heterogeneity.md) for how Miller handles changes of field names within a single data stream.
 
@@ -204,7 +204,7 @@ GENMD-RUN-COMMAND
 mlr --omd cat data/small
 GENMD-EOF
 
-which renders like this when dropped into various web tools (e.g. github comments):
+which renders like this when dropped into various web tools (e.g. github.comments):
 
 ![pix/omd.png](pix/omd.png)
 
@@ -280,7 +280,7 @@ GENMD-RUN-COMMAND
 mlr cat data/small
 GENMD-EOF
 
-Such data are easy to generate, e.g. in Ruby with
+Such data is easy to generate, e.g., in Ruby with
 
 GENMD-CARDIFY
 puts "host=#{hostname},seconds=#{t2-t1},message=#{msg}"
@@ -302,7 +302,7 @@ GENMD-EOF
 
 Fields lacking an IPS will have positional index (starting at 1) used as the key, as in NIDX format. For example, `dish=7,egg=8,flint` is parsed as `"dish" => "7", "egg" => "8", "3" => "flint"` and `dish,egg,flint` is parsed as `"1" => "dish", "2" => "egg", "3" => "flint"`.
 
-As discussed in [Record Heterogeneity](record-heterogeneity.md), Miller handles changes of field names within the same data stream. But using DKVP format this is particularly natural. One of my favorite use-cases for Miller is in application/server logs, where I log all sorts of lines such as
+As discussed in [Record Heterogeneity](record-heterogeneity.md), Miller handles changes of field names within the same data stream. But using DKVP format, this is particularly natural. One of my favorite use-cases for Miller is in application/server logs, where I log all sorts of lines such as
 
 GENMD-CARDIFY
 resource=/path/to/file,loadsec=0.45,ok=true
@@ -310,10 +310,9 @@ record_count=100, resource=/path/to/file
 resource=/some/other/path,loadsec=0.97,ok=false
 GENMD-EOF
 
-etc. and I just log them as needed. Then later, I can use `grep`, `mlr --opprint group-like`, etc.
-to analyze my logs.
+etc., and I log them as needed. Then later, I can use `grep`, `mlr --opprint group-like`, etc. to analyze my logs.
 
-See the [separators page](reference-main-separators.md) regarding how to specify separators other than the default equals-sign and comma.
+See the [separators page](reference-main-separators.md) regarding how to specify separators other than the default equals sign and comma.
 
 ## NIDX: Index-numbered (toolkit style)
 
@@ -361,7 +360,7 @@ GENMD-EOF
 
 ## Comments in data
 
-You can include comments within your data files, and either have them ignored, or passed directly through to the standard output as soon as they are encountered:
+You can include comments within your data files, and either have them ignored or passed directly through to the standard output as soon as they are encountered:
 
 GENMD-RUN-COMMAND
 mlr help comments-in-data-flags
diff --git a/docs/src/how-to-release.md b/docs/src/how-to-release.md
index 57d39b2ff..58a445f8d 100644
--- a/docs/src/how-to-release.md
+++ b/docs/src/how-to-release.md
@@ -40,7 +40,7 @@ In this example I am using version 6.2.0 to 6.3.0; of course that will change fo
     * This creates `miller-6.3.0.tar.gz` which we'll upload to GitHub, the URL of which will be in our `miller.spec`
     * Prepare the source RPM following [README-RPM.md](https://github.com/johnkerl/miller/blob/main/README-RPM.md).
 
-* Create the Github release tag:
+* Create the GitHub release tag:
 
     * Don't forget the `v` in `v6.3.0`
     * Write the release notes -- save as a pre-release until below
diff --git a/docs/src/how-to-release.md.in b/docs/src/how-to-release.md.in
index b54b1be26..e96010f36 100644
--- a/docs/src/how-to-release.md.in
+++ b/docs/src/how-to-release.md.in
@@ -24,7 +24,7 @@ In this example I am using version 6.2.0 to 6.3.0; of course that will change fo
     * This creates `miller-6.3.0.tar.gz` which we'll upload to GitHub, the URL of which will be in our `miller.spec`
     * Prepare the source RPM following [README-RPM.md](https://github.com/johnkerl/miller/blob/main/README-RPM.md).
 
-* Create the Github release tag:
+* Create the GitHub release tag:
 
     * Don't forget the `v` in `v6.3.0`
     * Write the release notes -- save as a pre-release until below
diff --git a/docs/src/index.md b/docs/src/index.md
index fd39051a6..bcb69c8ed 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -16,20 +16,20 @@ Quick links:
 
 # Introduction
 
-**Miller is a command-line tool for querying, shaping, and reformatting data files in various formats including CSV, TSV, JSON, and JSON Lines.**
+**Miller is a command-line tool for querying, shaping, and reformatting data files in various formats, including CSV, TSV, JSON, and JSON Lines.**
 
-**The big picture:** Even well into the 21st century, our world is full of text-formatted data like CSV. Google _CSV memes_, for example. We need tooling to _thrive in this world_, nimbly manipulating data which is in CSVs. And we need tooling to _move beyond CSV_, to be able to pull data out and into other storage and processing systems. Miller is designed for both these goals.
+**The big picture:** Even well into the 21st century, our world is full of text-formatted data such as CSV. Google _CSV memes_, for example. We need tooling to _thrive in this world_, nimbly manipulating data which is in CSVs. And we need tooling to _move beyond CSV_, to be able to pull data out and into other storage and processing systems. Miller is designed for both of these goals.
 
 In several senses, Miller is more than one tool:
 
 **Format conversion:** You can convert CSV files to JSON, or vice versa, or
 pretty-print your data horizontally or vertically to make it easier to read.
 
-**Data manipulation:** With a few keystrokes you can remove columns you don't care about -- or, make new ones.
+**Data manipulation:** With a few keystrokes, you can remove columns you don't care about -- or make new ones.
 
-**Pre-processing/post-processing vs standalone use:** You can use Miller to clean data files and put them into standard formats, perhaps in preparation to load them into a database or a hands-off data-processing pipeline. Or you can use it post-process and summary database-query output. As well, you can use Miller to explore and analyze your data interactively.
+**Pre-processing/post-processing vs standalone use:** You can use Miller to clean data files and put them into standard formats, perhaps in preparation for loading them into a database or a hands-off data-processing pipeline. Or you can use it post-process and summarize database-query output. As well, you can use Miller to explore and analyze your data interactively.
 
-**Compact verbs vs programming language:** For low-keystroking you can do things like
+**Compact verbs vs programming language:** For low-keystroking, you can do things like
 
 
 mlr --csv sort -f name input.csv
@@ -39,16 +39,16 @@ pretty-print your data horizontally or vertically to make it easier to read.
 mlr --json head -n 1 myfile.json
 
-The `sort`, `head`, etc are called *verbs*. They're analogs of familiar command-line tools like `sort`, `head`, and so on -- but they're aware of name-indexed, multi-line file formats like CSV, TSV, and JSON. In addition, though, using Miller's `put` verb you can use programming-language statements for expressions like +The `sort`, `head`, etc., are called *verbs*. They're analogs of familiar command-line tools like `sort`, `head`, and so on -- but they're aware of name-indexed, multi-line file formats like CSV, TSV, and JSON. In addition, though, using Miller's `put` verb, you can use programming-language statements for expressions like
 mlr --csv put '$rate = $units / $seconds' input.csv
 
-which allow you to succinctly express your own logic. +which allow you to express your own logic succinctly. **Multiple domains:** People use Miller for data analysis, data science, software engineering, devops/system-administration, journalism, scientific research, and more. -In the following you can see how CSV, TSV, tabular, JSON, and other **file formats** share a common theme which is **lists of key-value-pairs**. Miller embraces this common theme. +In the following, you can see how CSV, TSV, tabular, JSON, and other **file formats** share a common theme which is **lists of key-value-pairs**. Miller embraces this common theme. ![coverart/cover-combined.png](coverart/cover-combined.png) diff --git a/docs/src/index.md.in b/docs/src/index.md.in index 3722d45e6..25073a3f1 100644 --- a/docs/src/index.md.in +++ b/docs/src/index.md.in @@ -1,19 +1,19 @@ # Introduction -**Miller is a command-line tool for querying, shaping, and reformatting data files in various formats including CSV, TSV, JSON, and JSON Lines.** +**Miller is a command-line tool for querying, shaping, and reformatting data files in various formats, including CSV, TSV, JSON, and JSON Lines.** -**The big picture:** Even well into the 21st century, our world is full of text-formatted data like CSV. Google _CSV memes_, for example. We need tooling to _thrive in this world_, nimbly manipulating data which is in CSVs. And we need tooling to _move beyond CSV_, to be able to pull data out and into other storage and processing systems. Miller is designed for both these goals. +**The big picture:** Even well into the 21st century, our world is full of text-formatted data such as CSV. Google _CSV memes_, for example. We need tooling to _thrive in this world_, nimbly manipulating data which is in CSVs. And we need tooling to _move beyond CSV_, to be able to pull data out and into other storage and processing systems. Miller is designed for both of these goals. In several senses, Miller is more than one tool: **Format conversion:** You can convert CSV files to JSON, or vice versa, or pretty-print your data horizontally or vertically to make it easier to read. -**Data manipulation:** With a few keystrokes you can remove columns you don't care about -- or, make new ones. +**Data manipulation:** With a few keystrokes, you can remove columns you don't care about -- or make new ones. -**Pre-processing/post-processing vs standalone use:** You can use Miller to clean data files and put them into standard formats, perhaps in preparation to load them into a database or a hands-off data-processing pipeline. Or you can use it post-process and summary database-query output. As well, you can use Miller to explore and analyze your data interactively. +**Pre-processing/post-processing vs standalone use:** You can use Miller to clean data files and put them into standard formats, perhaps in preparation for loading them into a database or a hands-off data-processing pipeline. Or you can use it post-process and summarize database-query output. As well, you can use Miller to explore and analyze your data interactively. -**Compact verbs vs programming language:** For low-keystroking you can do things like +**Compact verbs vs programming language:** For low-keystroking, you can do things like GENMD-SHOW-COMMAND mlr --csv sort -f name input.csv @@ -23,16 +23,16 @@ GENMD-SHOW-COMMAND mlr --json head -n 1 myfile.json GENMD-EOF -The `sort`, `head`, etc are called *verbs*. They're analogs of familiar command-line tools like `sort`, `head`, and so on -- but they're aware of name-indexed, multi-line file formats like CSV, TSV, and JSON. In addition, though, using Miller's `put` verb you can use programming-language statements for expressions like +The `sort`, `head`, etc., are called *verbs*. They're analogs of familiar command-line tools like `sort`, `head`, and so on -- but they're aware of name-indexed, multi-line file formats like CSV, TSV, and JSON. In addition, though, using Miller's `put` verb, you can use programming-language statements for expressions like GENMD-SHOW-COMMAND mlr --csv put '$rate = $units / $seconds' input.csv GENMD-EOF -which allow you to succinctly express your own logic. +which allow you to express your own logic succinctly. **Multiple domains:** People use Miller for data analysis, data science, software engineering, devops/system-administration, journalism, scientific research, and more. -In the following you can see how CSV, TSV, tabular, JSON, and other **file formats** share a common theme which is **lists of key-value-pairs**. Miller embraces this common theme. +In the following, you can see how CSV, TSV, tabular, JSON, and other **file formats** share a common theme which is **lists of key-value-pairs**. Miller embraces this common theme. ![coverart/cover-combined.png](coverart/cover-combined.png) diff --git a/docs/src/installing-miller.md b/docs/src/installing-miller.md index b5ae44227..d50b70d31 100644 --- a/docs/src/installing-miller.md +++ b/docs/src/installing-miller.md @@ -21,7 +21,7 @@ You can install Miller for various platforms as follows. Download a binary: * You can get binaries for several platforms on the [releases page](https://github.com/johnkerl/miller/releases). -* You can get latest (head) builds for Linux, MacOS, and Windows by visiting [https://github.com/johnkerl/miller/actions](https://github.com/johnkerl/miller/actions), selecting the latest build, and clicking _Artifacts_. (These are retained for 5 days after each commit.) +* You can get the latest (head) builds for Linux, MacOS, and Windows by visiting [https://github.com/johnkerl/miller/actions](https://github.com/johnkerl/miller/actions), selecting the latest build, and clicking _Artifacts_. (These are retained for 5 days after each commit.) * See also the [build page](build.md) if you prefer to build from source. Using a package manager: @@ -37,7 +37,7 @@ See also: * [@jauderho](https://github.com/jauderho)'s [docker images](https://hub.docker.com/r/jauderho/miller/tags) as discussed in [GitHub Discussions](https://github.com/johnkerl/miller/discussions/851#discussioncomment-1943255) * Example invocation: `docker run --rm -i jauderho/miller:latest --csv sort -f shape < ./example.csv` -Note that the [Miller releases page](https://github.com/johnkerl/miller/releases), `brew`, `macports`, `chocolatey`, and `conda` tend to have current versions; `yum` and `apt-get` may have outdate versions depending on your platform. +Note that the [Miller releases page](https://github.com/johnkerl/miller/releases), `brew`, `macports`, `chocolatey`, and `conda` tend to have current versions; `yum` and `apt-get` may have outdated versions depending on your platform. As a first check, you should be able to run `mlr --version` at your system's command prompt and see something like the following: @@ -50,7 +50,7 @@ mlr 6.0.0 A note on documentation: -* If you downloaded the Miller binary from a tagged release, or installed it using a package manager, you should see a version like `mlr 6.0.0` or `mlr 5.10.3` -- please see the [release docs page](release-docs.md) to find the documentation for your version. +* If you downloaded the Miller binary from a tagged release or installed it using a package manager, you should see a version like `mlr 6.0.0` or `mlr 5.10.3` -- please see the [release docs page](release-docs.md) to find the documentation for your version. * If you installed from source or using a recent build artifact from GitHub Actions, you should see a version like `mlr 6.0.0-dev` -- [https://miller.readthedocs.io](https://miller.readthedocs.io) is the correct reference, since it contains information for the latest contributions to the [Miller repository](https://github.com/johnkerl/miller). As a second check, given [example.csv](./example.csv) you should be able to do @@ -89,6 +89,6 @@ yellow circle true 9 87 63.5058 8.3350 purple square false 10 91 72.3735 8.2430
-If you run into issues on these checks, please check out the resources on the [community page](community.md) for help. +If you encounter issues with these checks, please refer to the resources on the [community page](community.md) for help. Otherwise, let's go on to [Miller in 10 minutes](10min.md)! diff --git a/docs/src/installing-miller.md.in b/docs/src/installing-miller.md.in index da908cdc5..b735be725 100644 --- a/docs/src/installing-miller.md.in +++ b/docs/src/installing-miller.md.in @@ -5,7 +5,7 @@ You can install Miller for various platforms as follows. Download a binary: * You can get binaries for several platforms on the [releases page](https://github.com/johnkerl/miller/releases). -* You can get latest (head) builds for Linux, MacOS, and Windows by visiting [https://github.com/johnkerl/miller/actions](https://github.com/johnkerl/miller/actions), selecting the latest build, and clicking _Artifacts_. (These are retained for 5 days after each commit.) +* You can get the latest (head) builds for Linux, MacOS, and Windows by visiting [https://github.com/johnkerl/miller/actions](https://github.com/johnkerl/miller/actions), selecting the latest build, and clicking _Artifacts_. (These are retained for 5 days after each commit.) * See also the [build page](build.md) if you prefer to build from source. Using a package manager: @@ -21,7 +21,7 @@ See also: * [@jauderho](https://github.com/jauderho)'s [docker images](https://hub.docker.com/r/jauderho/miller/tags) as discussed in [GitHub Discussions](https://github.com/johnkerl/miller/discussions/851#discussioncomment-1943255) * Example invocation: `docker run --rm -i jauderho/miller:latest --csv sort -f shape < ./example.csv` -Note that the [Miller releases page](https://github.com/johnkerl/miller/releases), `brew`, `macports`, `chocolatey`, and `conda` tend to have current versions; `yum` and `apt-get` may have outdate versions depending on your platform. +Note that the [Miller releases page](https://github.com/johnkerl/miller/releases), `brew`, `macports`, `chocolatey`, and `conda` tend to have current versions; `yum` and `apt-get` may have outdated versions depending on your platform. As a first check, you should be able to run `mlr --version` at your system's command prompt and see something like the following: @@ -32,7 +32,7 @@ GENMD-EOF A note on documentation: -* If you downloaded the Miller binary from a tagged release, or installed it using a package manager, you should see a version like `mlr 6.0.0` or `mlr 5.10.3` -- please see the [release docs page](release-docs.md) to find the documentation for your version. +* If you downloaded the Miller binary from a tagged release or installed it using a package manager, you should see a version like `mlr 6.0.0` or `mlr 5.10.3` -- please see the [release docs page](release-docs.md) to find the documentation for your version. * If you installed from source or using a recent build artifact from GitHub Actions, you should see a version like `mlr 6.0.0-dev` -- [https://miller.readthedocs.io](https://miller.readthedocs.io) is the correct reference, since it contains information for the latest contributions to the [Miller repository](https://github.com/johnkerl/miller). As a second check, given [example.csv](./example.csv) you should be able to do @@ -45,6 +45,6 @@ GENMD-RUN-COMMAND mlr --icsv --opprint cat example.csv GENMD-EOF -If you run into issues on these checks, please check out the resources on the [community page](community.md) for help. +If you encounter issues with these checks, please refer to the resources on the [community page](community.md) for help. Otherwise, let's go on to [Miller in 10 minutes](10min.md)! diff --git a/docs/src/keystroke-savers.md b/docs/src/keystroke-savers.md index c62231709..ec15e9308 100644 --- a/docs/src/keystroke-savers.md +++ b/docs/src/keystroke-savers.md @@ -18,7 +18,7 @@ Quick links: ## Short format specifiers, including --c2p -In our examples so far we've often made use of `mlr --icsv --opprint` or `mlr --icsv --ojson`. These are such frequently occurring patterns that they have short options like `--c2p` and `--c2j`: +In our examples so far, we've often made use of `mlr --icsv --opprint` or `mlr --icsv --ojson`. These are such frequently occurring patterns that they have short options like `--c2p` and `--c2j`:
 mlr --c2p head -n 2 example.csv
@@ -59,7 +59,7 @@ You can get the full list [here](file-formats.md#data-conversion-keystroke-saver
 
 ## File names up front, including --from
 
-Already we saw that you can put the filename first using `--from`. When you're interacting with your data at the command line, this makes it easier to up-arrow and append to the previous command:
+Already, we saw that you can put the filename first using `--from`. When you're interacting with your data at the command line, this makes it easier to up-arrow and append to the previous command:
 
 
 mlr --c2p --from example.csv sort -nr index then head -n 3
@@ -110,7 +110,7 @@ I think `mlr --csv ...` explains itself better than `mlr -c ...`. Nonetheless, t
 
 ## .mlrrc file
 
-If you want the default file format for Miller to be CSV, you can simply put `--csv` on a line by itself in your `~/.mlrrc` file. Then instead of `mlr --csv cat example.csv` you can just do `mlr cat example.csv`. This is just a personal default, though, so `mlr --opprint cat example.csv` will use default CSV format for input, and PPRINT (tabular) for output.
+If you want the default file format for Miller to be CSV, you can put `--csv` on a line by itself in your `~/.mlrrc` file. Then, instead of `mlr --csv cat example.csv` you can just do `mlr cat example.csv`. This is just a personal default, though, so `mlr --opprint cat example.csv` will use default CSV format for input, and PPRINT (tabular) for output.
 
 You can read more about this at the [Customization](customization.md) page.
 
@@ -126,6 +126,6 @@ fraction -f count \
 filename-which-varies.csv
 
-Typing this out can get a bit old, if the only thing that changes for you is the filename. +Typing this out can get a bit old if the only thing that changes for you is the filename. See [Scripting with Miller](scripting.md) for some keystroke-saving options. diff --git a/docs/src/keystroke-savers.md.in b/docs/src/keystroke-savers.md.in index 720dfad14..648c63fc7 100644 --- a/docs/src/keystroke-savers.md.in +++ b/docs/src/keystroke-savers.md.in @@ -2,7 +2,7 @@ ## Short format specifiers, including --c2p -In our examples so far we've often made use of `mlr --icsv --opprint` or `mlr --icsv --ojson`. These are such frequently occurring patterns that they have short options like `--c2p` and `--c2j`: +In our examples so far, we've often made use of `mlr --icsv --opprint` or `mlr --icsv --ojson`. These are such frequently occurring patterns that they have short options like `--c2p` and `--c2j`: GENMD-RUN-COMMAND mlr --c2p head -n 2 example.csv @@ -16,7 +16,7 @@ You can get the full list [here](file-formats.md#data-conversion-keystroke-saver ## File names up front, including --from -Already we saw that you can put the filename first using `--from`. When you're interacting with your data at the command line, this makes it easier to up-arrow and append to the previous command: +Already, we saw that you can put the filename first using `--from`. When you're interacting with your data at the command line, this makes it easier to up-arrow and append to the previous command: GENMD-RUN-COMMAND mlr --c2p --from example.csv sort -nr index then head -n 3 @@ -55,7 +55,7 @@ I think `mlr --csv ...` explains itself better than `mlr -c ...`. Nonetheless, t ## .mlrrc file -If you want the default file format for Miller to be CSV, you can simply put `--csv` on a line by itself in your `~/.mlrrc` file. Then instead of `mlr --csv cat example.csv` you can just do `mlr cat example.csv`. This is just a personal default, though, so `mlr --opprint cat example.csv` will use default CSV format for input, and PPRINT (tabular) for output. +If you want the default file format for Miller to be CSV, you can put `--csv` on a line by itself in your `~/.mlrrc` file. Then, instead of `mlr --csv cat example.csv` you can just do `mlr cat example.csv`. This is just a personal default, though, so `mlr --opprint cat example.csv` will use default CSV format for input, and PPRINT (tabular) for output. You can read more about this at the [Customization](customization.md) page. @@ -71,6 +71,6 @@ fraction -f count \ filename-which-varies.csv GENMD-EOF -Typing this out can get a bit old, if the only thing that changes for you is the filename. +Typing this out can get a bit old if the only thing that changes for you is the filename. See [Scripting with Miller](scripting.md) for some keystroke-saving options. diff --git a/docs/src/miller-on-windows.md b/docs/src/miller-on-windows.md index b45ce5c43..8ffb6a44b 100644 --- a/docs/src/miller-on-windows.md +++ b/docs/src/miller-on-windows.md @@ -18,7 +18,7 @@ Quick links: ## Native builds as of Miller 6 -Miller was originally developed for Unix-like operating systems including Linux and MacOS. Since Miller 5.2.0 which was the first version to support Windows at all, that support has been partial. But as of version 6.0.0, Miller builds directly on Windows. +Miller was originally developed for Unix-like operating systems, including Linux and MacOS. Since Miller 5.2.0, which was the first version to support Windows at all, that support has been partial. But as of version 6.0.0, Miller builds directly on Windows. **The experience is now almost the same on Windows as it is on Linux, NetBSD/FreeBSD, and MacOS.** @@ -28,7 +28,7 @@ See [Installation](installing-miller.md) for how to get a copy of `mlr.exe`. ## Setup -Simply place `mlr.exe` somewhere within your `PATH` variable. +Place `mlr.exe` somewhere within your `PATH` variable. ![pix/miller-windows.png](pix/miller-windows.png) @@ -38,7 +38,7 @@ To use Miller from within MSYS2/Cygwin, also make sure `mlr.exe` is within the ` ## Differences -The Windows-support code within Miller makes effort to support Linux/Unix/MacOS-like command-line syntax including single-quoting of expressions for `mlr put` and `mlr filter` -- and in the examples above, this often works. However, there are still some cases where more complex expressions aren't successfully parsed from the Windows prompt, even though they are from MSYS2: +The Windows-support code within Miller makes an effort to support Linux/Unix/MacOS-like command-line syntax, including single-quoting of expressions for `mlr put` and `mlr filter` -- and in the examples above, this often works. However, there are still some cases where more complex expressions aren't successfully parsed from the Windows prompt, even though they are from MSYS2: ![pix/miller-windows-complex.png](pix/miller-windows-complex.png) diff --git a/docs/src/miller-on-windows.md.in b/docs/src/miller-on-windows.md.in index 1bd135d25..4b80ab7ae 100644 --- a/docs/src/miller-on-windows.md.in +++ b/docs/src/miller-on-windows.md.in @@ -2,7 +2,7 @@ ## Native builds as of Miller 6 -Miller was originally developed for Unix-like operating systems including Linux and MacOS. Since Miller 5.2.0 which was the first version to support Windows at all, that support has been partial. But as of version 6.0.0, Miller builds directly on Windows. +Miller was originally developed for Unix-like operating systems, including Linux and MacOS. Since Miller 5.2.0, which was the first version to support Windows at all, that support has been partial. But as of version 6.0.0, Miller builds directly on Windows. **The experience is now almost the same on Windows as it is on Linux, NetBSD/FreeBSD, and MacOS.** @@ -12,7 +12,7 @@ See [Installation](installing-miller.md) for how to get a copy of `mlr.exe`. ## Setup -Simply place `mlr.exe` somewhere within your `PATH` variable. +Place `mlr.exe` somewhere within your `PATH` variable. ![pix/miller-windows.png](pix/miller-windows.png) @@ -22,7 +22,7 @@ To use Miller from within MSYS2/Cygwin, also make sure `mlr.exe` is within the ` ## Differences -The Windows-support code within Miller makes effort to support Linux/Unix/MacOS-like command-line syntax including single-quoting of expressions for `mlr put` and `mlr filter` -- and in the examples above, this often works. However, there are still some cases where more complex expressions aren't successfully parsed from the Windows prompt, even though they are from MSYS2: +The Windows-support code within Miller makes an effort to support Linux/Unix/MacOS-like command-line syntax, including single-quoting of expressions for `mlr put` and `mlr filter` -- and in the examples above, this often works. However, there are still some cases where more complex expressions aren't successfully parsed from the Windows prompt, even though they are from MSYS2: ![pix/miller-windows-complex.png](pix/miller-windows-complex.png) diff --git a/docs/src/miller-programming-language.md b/docs/src/miller-programming-language.md index e5da65233..2b87c5106 100644 --- a/docs/src/miller-programming-language.md +++ b/docs/src/miller-programming-language.md @@ -16,11 +16,11 @@ Quick links: # Intro to Miller's programming language -In the [Miller in 10 minutes](10min.md) page we took a tour of some of Miller's most-used [verbs](reference-verbs.md) including `cat`, `head`, `tail`, `cut`, and `sort`. These are analogs of familiar system commands, but empowered by field-name indexing and file-format awareness: the system `sort` command only knows about lines and column names like `1,2,3,4`, while `mlr sort` knows about CSV/TSV/JSON/etc records, and field names like `color,shape,flag,index`. +On the [Miller in 10 minutes](10min.md) page, we took a tour of some of Miller's most-used [verbs](reference-verbs.md), including `cat`, `head`, `tail`, `cut`, and `sort`. These are analogs of familiar system commands, but empowered by field-name indexing and file-format awareness: the system `sort` command only knows about lines and column names like `1,2,3,4`, while `mlr sort` knows about CSV/TSV/JSON/etc records, and field names like `color,shape,flag,index`. -We also caught a glimpse of Miller's `put` and `filter` verbs. These two are special since they let you express statements using Miller's programming language. It's a *embedded domain-specific language* since it's inside Miller: often referred to simply as the *Miller DSL*. +We also caught a glimpse of Miller's `put` and `filter` verbs. These two are special because they allow you to express statements using Miller's programming language. It's an *embedded domain-specific language* since it's inside Miller: often referred to simply as the *Miller DSL*. -In the [DSL reference](reference-dsl.md) page we have a complete reference to Miller's programming language. For now, let's take a quick look at key features -- you can use as few or as many features as you like. +On the [DSL reference](reference-dsl.md) page, we have a complete reference to Miller's programming language. For now, let's take a quick look at key features -- you can use as few or as many features as you like. ## Records and fields @@ -45,9 +45,9 @@ purple square false 10 91 72.3735 8.2430 596.5747605000001 When we type that, a few things are happening: -* We refer to fields in the input data using a dollar sign and then the field name, e.g. `$quantity`. (If a field name contains special characters like a dot or slash, just use curly braces: `${field.name}`.) +* We refer to fields in the input data using a dollar sign and then the field name, e.g., `$quantity`. (If a field name contains special characters like a dot or slash, just use curly braces: `${field.name}`.) * The expression `$cost = $quantity * $rate` is executed once per record of the data file. Our [example.csv](./example.csv) has 10 records so this expression was executed 10 times, with the field names `$quantity` and `$rate` each time bound to the current record's values for those fields. -* On the left-hand side we have the new field name `$cost` which didn't come from the input data. Assignments to new variables result in a new field being placed after all the other ones. If we'd assigned to an existing field name, it would have been updated in-place. +* On the left-hand side, we have the new field name `$cost`, which didn't come from the input data. Assignments to new variables result in a new field being placed after all the other ones. If we'd assigned to an existing field name, it would have been updated in place. * The entire expression is surrounded by single quotes (with an adjustment needed on [Windows](miller-on-windows.md)), to get it past the system shell. Inside those, only double quotes have meaning in Miller's programming language. ## Multi-line statements, and statements-from-file @@ -91,9 +91,9 @@ yellow circle true 9 8700 63.5058 8.3350 529.3208430000001 purple square false 10 9100 72.3735 8.2430 596.5747605000001
-Anything from a `#` character to end of line is a code comment. +Anything from a `#` character to the end of the line is a code comment. -One of Miller's key features is the ability to express data-transformation right there at the keyboard, interactively. But if you find yourself using expressions repeatedly, you can put everything between the single quotes into a file and refer to that using `put -f`: +One of Miller's key features is the ability to express data transformation right there at the keyboard, interactively. But if you find yourself using expressions repeatedly, you can put everything between the single quotes into a file and refer to that using `put -f`:
 cat dsl-example.mlr
@@ -120,13 +120,13 @@ yellow circle   true  9  8700  63.5058  8.3350 529.3208430000001
 purple square   false 10 9100  72.3735  8.2430 596.5747605000001
 
-This becomes particularly important on Windows. Quite a bit of effort was put into making Miller on Windows be able to handle the kinds of single-quoted expressions we're showing here, but if you get syntax-error messages on Windows using examples in this documentation, you can put the parts between single quotes into a file and refer to that using `mlr put -f` -- or, use the triple-double-quote trick as described in the [Miller on Windows page](miller-on-windows.md). +This becomes particularly important on Windows. Quite a bit of effort was put into making Miller on Windows be able to handle the kinds of single-quoted expressions we're showing here. Still, if you get syntax-error messages on Windows using examples in this documentation, you can put the parts between single quotes into a file and refer to that using `mlr put -f` -- or, use the triple-double-quote trick as described in the [Miller on Windows page](miller-on-windows.md). ## Out-of-stream variables, begin, and end -Above we saw that your expression is executed once per record -- if a file has a million records, your expression will be executed a million times, once for each record. But you can mark statements to only be executed once, either before the record stream begins, or after the record stream is ended. If you know about [AWK](https://en.wikipedia.org/wiki/AWK), you might have noticed that Miller's programming language is loosely inspired by it, including the `begin` and `end` statements. +Above, we saw that your expression is executed once per record: if a file has a million records, your expression will be executed a million times, once for each record. But you can mark statements only to be executed once, either before the record stream begins or after the record stream is ended. If you know about [AWK](https://en.wikipedia.org/wiki/AWK), you might have noticed that Miller's programming language is loosely inspired by it, including the `begin` and `end` statements. -Above we also saw that names like `$quantity` are bound to each record in turn. +Above, we also saw that names like `$quantity` are bound to each record in turn. To make `begin` and `end` statements useful, we need somewhere to put things that persist across the duration of the record stream, and a way to emit them. Miller uses [**out-of-stream variables**](reference-dsl-variables.md#out-of-stream-variables) (or **oosvars** for short) whose names start with an `@` sigil, along with the [`emit`](reference-dsl-output-statements.md#emit-statements) keyword to write them into the output record stream: @@ -210,7 +210,7 @@ Also inspired by [AWK](https://en.wikipedia.org/wiki/AWK), the Miller DSL has th * `FILENAME` -- the filename the current record came from. Especially useful in things like `mlr ... *.csv`. * `FILENUM` -- similarly, but integer 1,2,3,... rather than filename. -* `NF` -- the number of fields in the current record. Note that if you assign `$newcolumn = some value` then `NF` will increment. +* `NF` -- the number of fields in the current record. Note that if you assign `$newcolumn = some value`, then `NF` will increment. * `NR` -- starting from 1, counter of how many records processed so far. * `FNR` -- similar, but resets to 1 at the start of each file. @@ -290,12 +290,12 @@ purple square false 10 91 72.3735 8.2430 3628800 Note that here we used the `-f` flag to `put` to load our function definition, and also the `-e` flag to add another statement on the command line. (We could have also put `$fact = factorial(NR)` inside -`factorial-example.mlr` but that would have made that file less flexible for our +`factorial-example.mlr`, but that would have made that file less flexible for our future use.) ## If-statements, loops, and local variables -Suppose you want to only compute sums conditionally -- you can use an `if` statement: +Suppose you want only to compute sums conditionally -- you can use an `if` statement:
 cat if-example.mlr
@@ -331,7 +331,7 @@ page](reference-dsl-control-structures.md#for-loops), Miller has a few kinds of
 for-loops. In addition to the usual 3-part `for (i = 0; i < 10; i += 1)` kind
 that many programming languages have, Miller also lets you loop over
 [maps](reference-main-maps.md) and [arrays](reference-main-arrays.md). We
-haven't encountered maps and arrays yet in this introduction, but for now it
+haven't encountered maps and arrays yet in this introduction, but for now, it
 suffices to know that `$*` is a special variable holding the current record as
 a map:
 
@@ -375,14 +375,14 @@ Here we used the local variables `k` and `v`. Now we've seen four kinds of varia
 * Local variables like `k`
 * Built-in context variables like `NF` and `NR`
 
-If you're curious about scope and extent of local variables, you can read more in the [section on variables](reference-dsl-variables.md).
+If you're curious about the scope and extent of local variables, you can read more in the [section on variables](reference-dsl-variables.md).
 
 ## Arithmetic
 
 Numbers in Miller's programming language are intended to operate with the principle of least surprise:
 
 * Internally, numbers are either 64-bit signed integers or double-precision floating-point.
-* Sums, differences, and products of integers are also integers (so `2*3=6` not `6.0`) -- unless the result of the operation would overflow a 64-bit signed integer in which case the result is automatically converted to float. (If you ever want integer-to-integer arithmetic, use `x .+ y`, `x .* y`, etc.)
+* Sums, differences, and products of integers are also integers (so `2*3=6` not `6.0`) -- unless the result of the operation would overflow a 64-bit signed integer, in which case the result is automatically converted to float. (If you ever want integer-to-integer arithmetic, use `x .+ y`, `x .* y`, etc.)
 * Quotients of integers are integers if the division is exact, else floating-point:  so `6/2=3` but `7/2=3.5`.
 
 You can read more about this in the [arithmetic reference](reference-main-arithmetic.md).
@@ -397,7 +397,7 @@ see more in the [null-data reference](reference-main-null-data.md) but the
 basic idea is:
 
 * Adding a number to absent gives the number back. This means you don't have to put `@sum = 0` in your `begin` blocks.
-* Any variable which has the absent value is not assigned. This means you don't have to check presence of things from one record to the next.
+* Any variable that has the absent value is not assigned. This means you don't have to check the presence of things from one record to the next.
 
 For example, you can sum up all the `$a` values across records without having to check whether they're present or not:
 
diff --git a/docs/src/miller-programming-language.md.in b/docs/src/miller-programming-language.md.in
index 624a0dc9c..91b9499bf 100644
--- a/docs/src/miller-programming-language.md.in
+++ b/docs/src/miller-programming-language.md.in
@@ -1,10 +1,10 @@
 # Intro to Miller's programming language
 
-In the [Miller in 10 minutes](10min.md) page we took a tour of some of Miller's most-used [verbs](reference-verbs.md) including `cat`, `head`, `tail`, `cut`, and `sort`. These are analogs of familiar system commands, but empowered by field-name indexing and file-format awareness: the system `sort` command only knows about lines and column names like `1,2,3,4`, while `mlr sort` knows about CSV/TSV/JSON/etc records, and field names like `color,shape,flag,index`.
+On the [Miller in 10 minutes](10min.md) page, we took a tour of some of Miller's most-used [verbs](reference-verbs.md), including `cat`, `head`, `tail`, `cut`, and `sort`. These are analogs of familiar system commands, but empowered by field-name indexing and file-format awareness: the system `sort` command only knows about lines and column names like `1,2,3,4`, while `mlr sort` knows about CSV/TSV/JSON/etc records, and field names like `color,shape,flag,index`.
 
-We also caught a glimpse of Miller's `put` and `filter` verbs. These two are special since they let you express statements using Miller's programming language. It's a *embedded domain-specific language* since it's inside Miller: often referred to simply as the *Miller DSL*.
+We also caught a glimpse of Miller's `put` and `filter` verbs. These two are special because they allow you to express statements using Miller's programming language. It's an *embedded domain-specific language* since it's inside Miller: often referred to simply as the *Miller DSL*.
 
-In the [DSL reference](reference-dsl.md) page we have a complete reference to Miller's programming language. For now, let's take a quick look at key features -- you can use as few or as many features as you like.
+On the [DSL reference](reference-dsl.md) page, we have a complete reference to Miller's programming language. For now, let's take a quick look at key features -- you can use as few or as many features as you like.
 
 ## Records and fields
 
@@ -16,9 +16,9 @@ GENMD-EOF
 
 When we type that, a few things are happening:
 
-* We refer to fields in the input data using a dollar sign and then the field name, e.g. `$quantity`. (If a field name contains special characters like a dot or slash, just use curly braces: `${field.name}`.)
+* We refer to fields in the input data using a dollar sign and then the field name, e.g., `$quantity`. (If a field name contains special characters like a dot or slash, just use curly braces: `${field.name}`.)
 * The expression `$cost = $quantity * $rate` is executed once per record of the data file. Our [example.csv](./example.csv) has 10 records so this expression was executed 10 times, with the field names `$quantity` and `$rate` each time bound to the current record's values for those fields.
-* On the left-hand side we have the new field name `$cost` which didn't come from the input data. Assignments to new variables result in a new field being placed after all the other ones. If we'd assigned to an existing field name, it would have been updated in-place.
+* On the left-hand side, we have the new field name `$cost`, which didn't come from the input data. Assignments to new variables result in a new field being placed after all the other ones. If we'd assigned to an existing field name, it would have been updated in place.
 * The entire expression is surrounded by single quotes (with an adjustment needed on [Windows](miller-on-windows.md)), to get it past the system shell. Inside those, only double quotes have meaning in Miller's programming language.
 
 ## Multi-line statements, and statements-from-file
@@ -36,9 +36,9 @@ mlr --c2p put '
 ' example.csv
 GENMD-EOF
 
-Anything from a `#` character to end of line is a code comment.
+Anything from a `#` character to the end of the line is a code comment.
 
-One of Miller's key features is the ability to express data-transformation right there at the keyboard, interactively. But if you find yourself using expressions repeatedly, you can put everything between the single quotes into a file and refer to that using `put -f`:
+One of Miller's key features is the ability to express data transformation right there at the keyboard, interactively. But if you find yourself using expressions repeatedly, you can put everything between the single quotes into a file and refer to that using `put -f`:
 
 GENMD-RUN-COMMAND
 cat dsl-example.mlr
@@ -48,13 +48,13 @@ GENMD-RUN-COMMAND
 mlr --c2p put -f dsl-example.mlr example.csv
 GENMD-EOF
 
-This becomes particularly important on Windows. Quite a bit of effort was put into making Miller on Windows be able to handle the kinds of single-quoted expressions we're showing here, but if you get syntax-error messages on Windows using examples in this documentation, you can put the parts between single quotes into a file and refer to that using `mlr put -f` -- or, use the triple-double-quote trick as described in the [Miller on Windows page](miller-on-windows.md).
+This becomes particularly important on Windows. Quite a bit of effort was put into making Miller on Windows be able to handle the kinds of single-quoted expressions we're showing here. Still, if you get syntax-error messages on Windows using examples in this documentation, you can put the parts between single quotes into a file and refer to that using `mlr put -f` -- or, use the triple-double-quote trick as described in the [Miller on Windows page](miller-on-windows.md).
 
 ## Out-of-stream variables, begin, and end
 
-Above we saw that your expression is executed once per record -- if a file has a million records, your expression will be executed a million times, once for each record. But you can mark statements to only be executed once, either before the record stream begins, or after the record stream is ended. If you know about [AWK](https://en.wikipedia.org/wiki/AWK), you might have noticed that Miller's programming language is loosely inspired by it, including the `begin` and `end` statements.
+Above, we saw that your expression is executed once per record: if a file has a million records, your expression will be executed a million times, once for each record. But you can mark statements only to be executed once, either before the record stream begins or after the record stream is ended. If you know about [AWK](https://en.wikipedia.org/wiki/AWK), you might have noticed that Miller's programming language is loosely inspired by it, including the `begin` and `end` statements.
 
-Above we also saw that names like `$quantity` are bound to each record in turn.
+Above, we also saw that names like `$quantity` are bound to each record in turn.
 
 To make `begin` and `end` statements useful, we need somewhere to put things that persist across the duration of the record stream, and a way to emit them. Miller uses [**out-of-stream variables**](reference-dsl-variables.md#out-of-stream-variables) (or **oosvars** for short) whose names start with an `@` sigil, along with the [`emit`](reference-dsl-output-statements.md#emit-statements) keyword to write them into the output record stream:
 
@@ -95,7 +95,7 @@ Also inspired by [AWK](https://en.wikipedia.org/wiki/AWK), the Miller DSL has th
 
 * `FILENAME` -- the filename the current record came from. Especially useful in things like `mlr ... *.csv`.
 * `FILENUM` -- similarly, but integer 1,2,3,... rather than filename.
-* `NF` -- the number of fields in the current record. Note that if you assign `$newcolumn = some value` then `NF` will increment.
+* `NF` -- the number of fields in the current record. Note that if you assign `$newcolumn = some value`, then `NF` will increment.
 * `NR` -- starting from 1, counter of how many records processed so far.
 * `FNR` -- similar, but resets to 1 at the start of each file.
 
@@ -130,12 +130,12 @@ GENMD-EOF
 Note that here we used the `-f` flag to `put` to load our function
 definition, and also the `-e` flag to add another statement on the command
 line. (We could have also put `$fact = factorial(NR)` inside
-`factorial-example.mlr` but that would have made that file less flexible for our
+`factorial-example.mlr`, but that would have made that file less flexible for our
 future use.)
 
 ## If-statements, loops, and local variables
 
-Suppose you want to only compute sums conditionally -- you can use an `if` statement:
+Suppose you want only to compute sums conditionally -- you can use an `if` statement:
 
 GENMD-RUN-COMMAND
 cat if-example.mlr
@@ -152,7 +152,7 @@ page](reference-dsl-control-structures.md#for-loops), Miller has a few kinds of
 for-loops. In addition to the usual 3-part `for (i = 0; i < 10; i += 1)` kind
 that many programming languages have, Miller also lets you loop over
 [maps](reference-main-maps.md) and [arrays](reference-main-arrays.md). We
-haven't encountered maps and arrays yet in this introduction, but for now it
+haven't encountered maps and arrays yet in this introduction, but for now, it
 suffices to know that `$*` is a special variable holding the current record as
 a map:
 
@@ -175,14 +175,14 @@ Here we used the local variables `k` and `v`. Now we've seen four kinds of varia
 * Local variables like `k`
 * Built-in context variables like `NF` and `NR`
 
-If you're curious about scope and extent of local variables, you can read more in the [section on variables](reference-dsl-variables.md).
+If you're curious about the scope and extent of local variables, you can read more in the [section on variables](reference-dsl-variables.md).
 
 ## Arithmetic
 
 Numbers in Miller's programming language are intended to operate with the principle of least surprise:
 
 * Internally, numbers are either 64-bit signed integers or double-precision floating-point.
-* Sums, differences, and products of integers are also integers (so `2*3=6` not `6.0`) -- unless the result of the operation would overflow a 64-bit signed integer in which case the result is automatically converted to float. (If you ever want integer-to-integer arithmetic, use `x .+ y`, `x .* y`, etc.)
+* Sums, differences, and products of integers are also integers (so `2*3=6` not `6.0`) -- unless the result of the operation would overflow a 64-bit signed integer, in which case the result is automatically converted to float. (If you ever want integer-to-integer arithmetic, use `x .+ y`, `x .* y`, etc.)
 * Quotients of integers are integers if the division is exact, else floating-point:  so `6/2=3` but `7/2=3.5`.
 
 You can read more about this in the [arithmetic reference](reference-main-arithmetic.md).
@@ -197,7 +197,7 @@ see more in the [null-data reference](reference-main-null-data.md) but the
 basic idea is:
 
 * Adding a number to absent gives the number back. This means you don't have to put `@sum = 0` in your `begin` blocks.
-* Any variable which has the absent value is not assigned. This means you don't have to check presence of things from one record to the next.
+* Any variable that has the absent value is not assigned. This means you don't have to check the presence of things from one record to the next.
 
 For example, you can sum up all the `$a` values across records without having to check whether they're present or not:
 
diff --git a/docs/src/new-in-miller-6.md b/docs/src/new-in-miller-6.md
index 32633b6f8..86a52a40d 100644
--- a/docs/src/new-in-miller-6.md
+++ b/docs/src/new-in-miller-6.md
@@ -24,43 +24,23 @@ TL;DRs: [install](installing-miller.md), [binaries](https://github.com/johnkerl/
 
 ### Performance
 
-Performance is on par with Miller 5 for simple processing, and is far better than Miller 5 for
-complex processing chains -- the latter due to improved multicore utilization. CSV I/O is notably
-improved.  See the [Performance benchmarks](#performance-benchmarks) section at the bottom of this
-page for details.
+Performance is on par with Miller 5 for simple processing, and is far better than Miller 5 for complex processing chains -- the latter due to improved multicore utilization. CSV I/O is notably improved.  See the [Performance benchmarks](#performance-benchmarks) section at the bottom of this page for details.
 
 ### Documentation improvements
 
 Documentation (what you're reading here) and online help (`mlr --help`) have been completely reworked.
 
-In the initial release, the focus was convincing users already familiar with
-`awk`/`grep`/`cut` that Miller was a viable alternative -- but over time it's
-become clear that many Miller users aren't expert with those tools. The focus
-has shifted toward a higher quantity of more introductory/accessible material
-for command-line data processing.
+In the initial release, the focus was on convincing users already familiar with `awk`, `grep`, and `cut` that Miller was a viable alternative; however, over time, it has become clear that many Miller users aren't experts with those tools. The focus has shifted toward a higher quantity of more introductory/accessible material for command-line data processing.
 
-Similarly, the FAQ/recipe material has been expanded to include more, and
-simpler, use-cases including resolved questions from
-[Miller Issues](https://github.com/johnkerl/miller/issues)
-and
-[Miller Discussions](https://github.com/johnkerl/miller/discussions);
-more complex/niche material has been pushed farther down. The long reference
-pages have been split up into separate pages. (See also
-[Structure of these documents](structure-of-these-documents.md).)
+Similarly, the FAQ/recipe material has been expanded to include more, and simpler, use-cases, including resolved questions from [Miller Issues](https://github.com/johnkerl/miller/issues) and [Miller Discussions](https://github.com/johnkerl/miller/discussions); more complex/niche material has been pushed farther down. The lengthy reference pages have been divided into separate pages. (See also [Structure of these documents](structure-of-these-documents.md).)
 
-One of the main feedback themes from the 2021 Miller User Survey was that some
-things should be easier to find. Namely, on each doc page there's now a banner
-across the top with things that should be one click away from the landing page
-(or any page): command-line flags, verbs, functions, glossary/acronyms, and a
-finder for docs by release.
+One of the main feedback themes from the 2021 Miller User Survey was that some things should be easier to find. Namely, on each doc page, there's now a banner across the top with things that should be one click away from the landing page (or any page): command-line flags, verbs, functions, glossary/acronyms, and a finder for docs by release.
 
-Since CSV is overwhelmingly the most popular data format for Miller, it is
-now discussed first, and more examples use CSV.
+Since CSV is overwhelmingly the most popular data format for Miller, it is now discussed first, and more examples use CSV.
 
 ### Improved Windows experience
 
-Stronger support for Windows (with or without MSYS2), with a couple of
-exceptions.  See [Miller on Windows](miller-on-windows.md) for more information.
+Stronger support for Windows (with or without MSYS2), with a couple of exceptions.  See [Miller on Windows](miller-on-windows.md) for more information.
 
 Binaries are reliably available using GitHub Actions: see also [Installation](installing-miller.md).
 
@@ -89,9 +69,7 @@ Parse error on token ">" at line 63 column 7.
 
 ### Scripting
 
-Scripting is now easier -- support for `#!` with `sh`, as always, along with now support for `#!` with `mlr -s`. For
-Windows, `mlr -s` can also be used.  These help reduce backslash-clutter and let you do more while typing less.
-See the [scripting page](scripting.md).
+Scripting is now easier -- support for `#!` with `sh`, as always, along with now support for `#!` with `mlr -s`. For Windows, `mlr -s` can also be used.  These help reduce backslash clutter and let you do more while typing less. See the [scripting page](scripting.md).
 
 ### REPL
 
@@ -143,7 +121,7 @@ the `TZ` environment variable. Please see [DSL datetime/timezone functions](refe
 
 ### In-process support for compressed input
 
-In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z`, `.bz2`, and `.zst` files.  Please see the page on [Compressed data](reference-main-compressed-data.md) for more information.
+In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly, for `.z`, `.bz2`, and `.zst` files.  Please refer to the page on [Compressed Data](reference-main-compressed-data.md) for more information.
 
 ### Support for reading web URLs
 
@@ -171,9 +149,7 @@ purple,triangle,false,7,65,80.1405,5.8240
 
 ### Improved JSON / JSON Lines support, and arrays
 
-Arrays are now supported in Miller's `put`/`filter` programming language, as
-described in the [Arrays reference](reference-main-arrays.md). (Also, `array` is
-now a keyword so this is no longer usable as a local-variable or UDF name.)
+Arrays are now supported in Miller's `put`/`filter` programming language, as described in the [Arrays reference](reference-main-arrays.md). (Also, `array` is now a keyword, so this is no longer usable as a local variable or UDF name.)
 
 JSON support is improved:
 
@@ -196,24 +172,13 @@ See also the [Arrays reference](reference-main-arrays.md) for more information.
 
 ### Improved numeric conversion
 
-The most central part of Miller 6 is a deep refactor of how data values are parsed
-from file contents, how types are inferred, and how they're converted back to
-text into output files.
+The most central part of Miller 6 is a deep refactor of how data values are parsed from file contents, how types are inferred, and how they're converted back to text into output files.
 
 This was all initiated by [https://github.com/johnkerl/miller/issues/151](https://github.com/johnkerl/miller/issues/151).
 
-In Miller 5 and below, all values were stored as strings, then only converted
-to int/float as-needed, for example when a particular field was referenced in
-the `stats1` or `put` verbs. This led to awkwardnesses such as the `-S`
-and `-F` flags for `put` and `filter`.
+In Miller 5 and below, all values were stored as strings, then only converted to int/float as needed, for example, when a particular field was referenced in the `stats1` or `put` verbs. This led to awkwardnesses such as the `-S` and `-F` flags for `put` and `filter`.
 
-In Miller 6, things parseable as int/float are treated as such from the moment
-the input data is read, and these are passed along through the verb chain.  All
-values are typed from when they're read, and their types are passed along.
-Meanwhile the original string representation of each value is also retained. If
-a numeric field isn't modified during the processing chain, it's printed out
-the way it arrived. Also, quoted values in JSON strings are flagged as being
-strings throughout the processing chain.
+In Miller 6, values parseable as integers or floating-point numbers are treated as such from the moment the input data is read, and these are passed along through the verb chain.  All values are typed from when they're read, and their types are passed along. Meanwhile, the original string representation of each value is also retained. If a numeric field isn't modified during the processing chain, it's printed out the way it arrived. Additionally, quoted values in JSON strings are consistently flagged as strings throughout the processing chain.
 
 For example (see [https://github.com/johnkerl/miller/issues/178](https://github.com/johnkerl/miller/issues/178)) you can now do
 
@@ -242,30 +207,21 @@ For example (see [https://github.com/johnkerl/miller/issues/178](https://github.
 
 ### Deduping of repeated field names
 
-By default, field names are deduped for all file formats except JSON / JSON Lines. So if you
-have an input record with `x=8,x=9` then the second field's key is renamed to
-`x_2` and so on -- the record scans as `x=8,x_2=9`. Use `mlr
---no-dedupe-field-names` to suppress this, and have the record be scanned as
-`x=9`.
+By default, field names are deduplicated for all file formats except JSON / JSON Lines. So if you have an input record with `x=8,x=9`, then the second field's key is renamed to `x_2` and so on -- the record scans as `x=8,x_2=9`. Use `mlr --no-dedupe-field-names` to suppress this, and have the record be scanned as `x=9`.
 
-For JSON and JSON Lines, the last duplicated key in an input record is always retained,
-regardless of `mlr --no-dedupe-field-names`: `{"x":8,"x":9}` scans as if it
-were `{"x":9}`.
+For JSON and JSON Lines, the last duplicated key in an input record is always retained, regardless of `mlr --no-dedupe-field-names`: `{"x":8,"x":9}` scans as if it were `{"x":9}`.
 
 ### Regex support for IFS and IPS
 
-You can now split fields on whitespace when whitespace is a mix of tabs and
-spaces.  As well, you can use regular expressions for the input field-separator
-and the input pair-separator.  Please see the section on
-[multi-character and regular-expression separators](reference-main-separators.md#multi-character-and-regular-expression-separators).
+You can now split fields on whitespace when whitespace is a mix of tabs and spaces.  As well, you can use regular expressions for the input field-separator and the input pair-separator.  Please see the section on [multi-character and regular-expression separators](reference-main-separators.md#multi-character-and-regular-expression-separators).
 
-In particular, for NIDX format, the default IFS now allows splitting on one or more of space or tab.
+In particular, for NIDX format, the default `IFS` now allows splitting on one or more of space or tab.
 
 ### Case-folded sorting options
 
-The [sort](reference-verbs.md#sort) verb now accepts `-c` and `-cr` options for case-folded ascending/descending sort, respetively.
+The [sort](reference-verbs.md#sort) verb now accepts `-c` and `-cr` options for case-folded ascending/descending sort, respectively.
 
-### New DSL functions / operators
+### New DSL functions and operators
 
 * Higher-order functions [`select`](reference-dsl-builtin-functions.md#select), [`apply`](reference-dsl-builtin-functions.md#apply), [`reduce`](reference-dsl-builtin-functions.md#reduce), [`fold`](reference-dsl-builtin-functions.md#fold), and [`sort`](reference-dsl-builtin-functions.md#sort).  See the [sorting page](sorting.md) and the [higher-order-functions page](reference-dsl-higher-order-functions.md) for more information.
 
@@ -293,30 +249,30 @@ The following differences are rather technical. If they don't sound familiar to
 
 ### Line endings
 
-The `--auto` flag is now ignored. Before, if a file had CR/LF (Windows-style) line endings on input (on any platform), it would have the same on output; likewise, LF (Unix-style) line endings. Now, files with CR/LF or LF line endings are processed on any platform, but the output line-ending is for the platform. E.g. reading CR/LF files on Linux will now produce LF output.
+The `--auto` flag is now ignored. Before, if a file had CR/LF (Windows-style) line endings on input (on any platform), it would have the same on output; likewise, LF (Unix-style) line endings. Now, files with CR/LF or LF line endings are processed on any platform, but the output line ending is for the platform. E.g., reading CR/LF files on Linux will now produce LF output.
 
 ### IFS and IPS as regular expressions
 
-IFS and IPS can be regular expressions now. Please see the section on [multi-character and regular-expression separators](reference-main-separators.md#multi-character-and-regular-expression-separators).
+IFS and IPS can now be regular expressions. Please see the section on [multi-character and regular-expression separators](reference-main-separators.md#multi-character-and-regular-expression-separators).
 
 ### JSON and JSON Lines formatting
 
 * `--jknquoteint` and `jquoteall` are ignored; they were workarounds for the (now much-improved) type-inference and type-tracking in Miller 6.
 * `--json-fatal-arrays-on-input`, `--json-map-arrays-on-input`, and `--json-skip-arrays-on-input` are ignored; Miller 6 now supports arrays fully.
 * See also `mlr help legacy-flags` or the [legacy-flags reference](reference-main-flag-list.md#legacy-flags).
-* Miller 5 accepted input records either with or without enclosing `[...]`; on output, by default it produced single-line records without outermost `[...]`.  Miller 5 let you customize output formatting using `--jvstack` (multi-line records) and `--jlistwrap` (write outermost `[...]`). _Thus, Miller 5's JSON output format, with default flags, was in fact [JSON Lines](file-formats.md#json-lines) all along._
+* Miller 5 accepted input records either with or without enclosing `[...]`; on output, by default, it produced single-line records without outermost `[...]`.  Miller 5 lets you customize output formatting using `--jvstack` (multi-line records) and `--jlistwrap` (write outermost `[...]`). _Thus, Miller 5's JSON output format, with default flags, was in fact [JSON Lines](file-formats.md#json-lines) all along._
 * In Miller 6, [JSON Lines](file-formats.md#json-lines) is acknowledged explicitly.
 * On input, your records are accepted whether or not they have outermost `[...]`, and regardless of line breaks, whether the specified input format is JSON or JSON Lines. (This is similar to [jq](https://stedolan.github.io/jq/).)
 * With `--ojson`, output records are written multiline (pretty-printed), with outermost `[...]`.
 * With `--ojsonl`, output records are written single-line, without outermost `[...]`.
 * This makes `--jvstack` and `--jlistwrap` unnecessary. However, if you want outermost `[...]` with single-line records, you can use `--ojson --no-jvstack`.
-* Miller 5 tolerated trailing commas, which are not compliant with the JSON specification: for example, `{"x":1,"y":2,}`. Miller 6 uses a JSON parser which is compliant with the JSON specification and does not accept trailing commas.
+* Miller 5 tolerated trailing commas, which are not compliant with the JSON specification: for example, `{"x":1,"y":2,}`. Miller 6 uses a JSON parser that is compliant with the JSON specification and does not accept trailing commas.
 
 ### Type-inference
 
 * The `-S` and `-F` flags to `mlr put` and `mlr filter` are ignored, since type-inference is no longer done in `mlr put` and `mlr filter`, but rather, when records are first read. You can use `mlr -S` and `mlr -A`, respectively, instead to control type-inference within the record-readers.
 * Octal numbers like `0123` and `07` are type-inferred as string. Use `mlr -O` to infer them as octal integers. Note that `08` and `09` will then infer as decimal integers.
-* Any numbers prefix with `0o`, e.g. `0o377`, are already treated as octal regardless of `mlr -O` -- `mlr -O` only affects how leading-zero integers are handled.
+* Any numbers prefixed with `0o`, e.g. `0o377`, are already treated as octal, regardless of `mlr -O` -- `mlr -O` only affects how leading-zero integers are handled.
 * See also the [miscellaneous-flags reference](reference-main-flag-list.md#miscellaneous-flags).
 
 ### Emit statements
@@ -341,13 +297,12 @@ This works in Miller 6 (and worked in Miller 5 as well) and is supported:
 input=1
 
-Please see the [section on emit statements](reference-dsl-output-statements.md#emit1-and-emitemitpemitf) -for more information. +Please see the [section on emit statements](reference-dsl-output-statements.md#emit1-and-emitemitpemitf) for more information. ## Developer-specific aspects * Miller has been ported from C to Go. Developer notes: [https://github.com/johnkerl/miller/blob/main/README-dev.md](https://github.com/johnkerl/miller/blob/main/README-dev.md). -* Regression testing has been completely reworked, including regression-testing now running fully on Windows (alongside Linux and Mac) [on each GitHub commit](https://github.com/johnkerl/miller/actions). +* Regression testing has been completely reworked, including regression-testing now running fully on Windows (alongside Linux and Mac) [on each github.commit](https://github.com/johnkerl/miller/actions). ## Performance benchmarks diff --git a/docs/src/new-in-miller-6.md.in b/docs/src/new-in-miller-6.md.in index c450a9622..2da9d3feb 100644 --- a/docs/src/new-in-miller-6.md.in +++ b/docs/src/new-in-miller-6.md.in @@ -8,43 +8,23 @@ TL;DRs: [install](installing-miller.md), [binaries](https://github.com/johnkerl/ ### Performance -Performance is on par with Miller 5 for simple processing, and is far better than Miller 5 for -complex processing chains -- the latter due to improved multicore utilization. CSV I/O is notably -improved. See the [Performance benchmarks](#performance-benchmarks) section at the bottom of this -page for details. +Performance is on par with Miller 5 for simple processing, and is far better than Miller 5 for complex processing chains -- the latter due to improved multicore utilization. CSV I/O is notably improved. See the [Performance benchmarks](#performance-benchmarks) section at the bottom of this page for details. ### Documentation improvements Documentation (what you're reading here) and online help (`mlr --help`) have been completely reworked. -In the initial release, the focus was convincing users already familiar with -`awk`/`grep`/`cut` that Miller was a viable alternative -- but over time it's -become clear that many Miller users aren't expert with those tools. The focus -has shifted toward a higher quantity of more introductory/accessible material -for command-line data processing. +In the initial release, the focus was on convincing users already familiar with `awk`, `grep`, and `cut` that Miller was a viable alternative; however, over time, it has become clear that many Miller users aren't experts with those tools. The focus has shifted toward a higher quantity of more introductory/accessible material for command-line data processing. -Similarly, the FAQ/recipe material has been expanded to include more, and -simpler, use-cases including resolved questions from -[Miller Issues](https://github.com/johnkerl/miller/issues) -and -[Miller Discussions](https://github.com/johnkerl/miller/discussions); -more complex/niche material has been pushed farther down. The long reference -pages have been split up into separate pages. (See also -[Structure of these documents](structure-of-these-documents.md).) +Similarly, the FAQ/recipe material has been expanded to include more, and simpler, use-cases, including resolved questions from [Miller Issues](https://github.com/johnkerl/miller/issues) and [Miller Discussions](https://github.com/johnkerl/miller/discussions); more complex/niche material has been pushed farther down. The lengthy reference pages have been divided into separate pages. (See also [Structure of these documents](structure-of-these-documents.md).) -One of the main feedback themes from the 2021 Miller User Survey was that some -things should be easier to find. Namely, on each doc page there's now a banner -across the top with things that should be one click away from the landing page -(or any page): command-line flags, verbs, functions, glossary/acronyms, and a -finder for docs by release. +One of the main feedback themes from the 2021 Miller User Survey was that some things should be easier to find. Namely, on each doc page, there's now a banner across the top with things that should be one click away from the landing page (or any page): command-line flags, verbs, functions, glossary/acronyms, and a finder for docs by release. -Since CSV is overwhelmingly the most popular data format for Miller, it is -now discussed first, and more examples use CSV. +Since CSV is overwhelmingly the most popular data format for Miller, it is now discussed first, and more examples use CSV. ### Improved Windows experience -Stronger support for Windows (with or without MSYS2), with a couple of -exceptions. See [Miller on Windows](miller-on-windows.md) for more information. +Stronger support for Windows (with or without MSYS2), with a couple of exceptions. See [Miller on Windows](miller-on-windows.md) for more information. Binaries are reliably available using GitHub Actions: see also [Installation](installing-miller.md). @@ -73,9 +53,7 @@ GENMD-EOF ### Scripting -Scripting is now easier -- support for `#!` with `sh`, as always, along with now support for `#!` with `mlr -s`. For -Windows, `mlr -s` can also be used. These help reduce backslash-clutter and let you do more while typing less. -See the [scripting page](scripting.md). +Scripting is now easier -- support for `#!` with `sh`, as always, along with now support for `#!` with `mlr -s`. For Windows, `mlr -s` can also be used. These help reduce backslash clutter and let you do more while typing less. See the [scripting page](scripting.md). ### REPL @@ -125,7 +103,7 @@ the `TZ` environment variable. Please see [DSL datetime/timezone functions](refe ### In-process support for compressed input -In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z`, `.bz2`, and `.zst` files. Please see the page on [Compressed data](reference-main-compressed-data.md) for more information. +In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly, for `.z`, `.bz2`, and `.zst` files. Please refer to the page on [Compressed Data](reference-main-compressed-data.md) for more information. ### Support for reading web URLs @@ -140,9 +118,7 @@ GENMD-EOF ### Improved JSON / JSON Lines support, and arrays -Arrays are now supported in Miller's `put`/`filter` programming language, as -described in the [Arrays reference](reference-main-arrays.md). (Also, `array` is -now a keyword so this is no longer usable as a local-variable or UDF name.) +Arrays are now supported in Miller's `put`/`filter` programming language, as described in the [Arrays reference](reference-main-arrays.md). (Also, `array` is now a keyword, so this is no longer usable as a local variable or UDF name.) JSON support is improved: @@ -165,24 +141,13 @@ See also the [Arrays reference](reference-main-arrays.md) for more information. ### Improved numeric conversion -The most central part of Miller 6 is a deep refactor of how data values are parsed -from file contents, how types are inferred, and how they're converted back to -text into output files. +The most central part of Miller 6 is a deep refactor of how data values are parsed from file contents, how types are inferred, and how they're converted back to text into output files. This was all initiated by [https://github.com/johnkerl/miller/issues/151](https://github.com/johnkerl/miller/issues/151). -In Miller 5 and below, all values were stored as strings, then only converted -to int/float as-needed, for example when a particular field was referenced in -the `stats1` or `put` verbs. This led to awkwardnesses such as the `-S` -and `-F` flags for `put` and `filter`. +In Miller 5 and below, all values were stored as strings, then only converted to int/float as needed, for example, when a particular field was referenced in the `stats1` or `put` verbs. This led to awkwardnesses such as the `-S` and `-F` flags for `put` and `filter`. -In Miller 6, things parseable as int/float are treated as such from the moment -the input data is read, and these are passed along through the verb chain. All -values are typed from when they're read, and their types are passed along. -Meanwhile the original string representation of each value is also retained. If -a numeric field isn't modified during the processing chain, it's printed out -the way it arrived. Also, quoted values in JSON strings are flagged as being -strings throughout the processing chain. +In Miller 6, values parseable as integers or floating-point numbers are treated as such from the moment the input data is read, and these are passed along through the verb chain. All values are typed from when they're read, and their types are passed along. Meanwhile, the original string representation of each value is also retained. If a numeric field isn't modified during the processing chain, it's printed out the way it arrived. Additionally, quoted values in JSON strings are consistently flagged as strings throughout the processing chain. For example (see [https://github.com/johnkerl/miller/issues/178](https://github.com/johnkerl/miller/issues/178)) you can now do @@ -196,30 +161,21 @@ GENMD-EOF ### Deduping of repeated field names -By default, field names are deduped for all file formats except JSON / JSON Lines. So if you -have an input record with `x=8,x=9` then the second field's key is renamed to -`x_2` and so on -- the record scans as `x=8,x_2=9`. Use `mlr ---no-dedupe-field-names` to suppress this, and have the record be scanned as -`x=9`. +By default, field names are deduplicated for all file formats except JSON / JSON Lines. So if you have an input record with `x=8,x=9`, then the second field's key is renamed to `x_2` and so on -- the record scans as `x=8,x_2=9`. Use `mlr --no-dedupe-field-names` to suppress this, and have the record be scanned as `x=9`. -For JSON and JSON Lines, the last duplicated key in an input record is always retained, -regardless of `mlr --no-dedupe-field-names`: `{"x":8,"x":9}` scans as if it -were `{"x":9}`. +For JSON and JSON Lines, the last duplicated key in an input record is always retained, regardless of `mlr --no-dedupe-field-names`: `{"x":8,"x":9}` scans as if it were `{"x":9}`. ### Regex support for IFS and IPS -You can now split fields on whitespace when whitespace is a mix of tabs and -spaces. As well, you can use regular expressions for the input field-separator -and the input pair-separator. Please see the section on -[multi-character and regular-expression separators](reference-main-separators.md#multi-character-and-regular-expression-separators). +You can now split fields on whitespace when whitespace is a mix of tabs and spaces. As well, you can use regular expressions for the input field-separator and the input pair-separator. Please see the section on [multi-character and regular-expression separators](reference-main-separators.md#multi-character-and-regular-expression-separators). -In particular, for NIDX format, the default IFS now allows splitting on one or more of space or tab. +In particular, for NIDX format, the default `IFS` now allows splitting on one or more of space or tab. ### Case-folded sorting options -The [sort](reference-verbs.md#sort) verb now accepts `-c` and `-cr` options for case-folded ascending/descending sort, respetively. +The [sort](reference-verbs.md#sort) verb now accepts `-c` and `-cr` options for case-folded ascending/descending sort, respectively. -### New DSL functions / operators +### New DSL functions and operators * Higher-order functions [`select`](reference-dsl-builtin-functions.md#select), [`apply`](reference-dsl-builtin-functions.md#apply), [`reduce`](reference-dsl-builtin-functions.md#reduce), [`fold`](reference-dsl-builtin-functions.md#fold), and [`sort`](reference-dsl-builtin-functions.md#sort). See the [sorting page](sorting.md) and the [higher-order-functions page](reference-dsl-higher-order-functions.md) for more information. @@ -247,30 +203,30 @@ The following differences are rather technical. If they don't sound familiar to ### Line endings -The `--auto` flag is now ignored. Before, if a file had CR/LF (Windows-style) line endings on input (on any platform), it would have the same on output; likewise, LF (Unix-style) line endings. Now, files with CR/LF or LF line endings are processed on any platform, but the output line-ending is for the platform. E.g. reading CR/LF files on Linux will now produce LF output. +The `--auto` flag is now ignored. Before, if a file had CR/LF (Windows-style) line endings on input (on any platform), it would have the same on output; likewise, LF (Unix-style) line endings. Now, files with CR/LF or LF line endings are processed on any platform, but the output line ending is for the platform. E.g., reading CR/LF files on Linux will now produce LF output. ### IFS and IPS as regular expressions -IFS and IPS can be regular expressions now. Please see the section on [multi-character and regular-expression separators](reference-main-separators.md#multi-character-and-regular-expression-separators). +IFS and IPS can now be regular expressions. Please see the section on [multi-character and regular-expression separators](reference-main-separators.md#multi-character-and-regular-expression-separators). ### JSON and JSON Lines formatting * `--jknquoteint` and `jquoteall` are ignored; they were workarounds for the (now much-improved) type-inference and type-tracking in Miller 6. * `--json-fatal-arrays-on-input`, `--json-map-arrays-on-input`, and `--json-skip-arrays-on-input` are ignored; Miller 6 now supports arrays fully. * See also `mlr help legacy-flags` or the [legacy-flags reference](reference-main-flag-list.md#legacy-flags). -* Miller 5 accepted input records either with or without enclosing `[...]`; on output, by default it produced single-line records without outermost `[...]`. Miller 5 let you customize output formatting using `--jvstack` (multi-line records) and `--jlistwrap` (write outermost `[...]`). _Thus, Miller 5's JSON output format, with default flags, was in fact [JSON Lines](file-formats.md#json-lines) all along._ +* Miller 5 accepted input records either with or without enclosing `[...]`; on output, by default, it produced single-line records without outermost `[...]`. Miller 5 lets you customize output formatting using `--jvstack` (multi-line records) and `--jlistwrap` (write outermost `[...]`). _Thus, Miller 5's JSON output format, with default flags, was in fact [JSON Lines](file-formats.md#json-lines) all along._ * In Miller 6, [JSON Lines](file-formats.md#json-lines) is acknowledged explicitly. * On input, your records are accepted whether or not they have outermost `[...]`, and regardless of line breaks, whether the specified input format is JSON or JSON Lines. (This is similar to [jq](https://stedolan.github.io/jq/).) * With `--ojson`, output records are written multiline (pretty-printed), with outermost `[...]`. * With `--ojsonl`, output records are written single-line, without outermost `[...]`. * This makes `--jvstack` and `--jlistwrap` unnecessary. However, if you want outermost `[...]` with single-line records, you can use `--ojson --no-jvstack`. -* Miller 5 tolerated trailing commas, which are not compliant with the JSON specification: for example, `{"x":1,"y":2,}`. Miller 6 uses a JSON parser which is compliant with the JSON specification and does not accept trailing commas. +* Miller 5 tolerated trailing commas, which are not compliant with the JSON specification: for example, `{"x":1,"y":2,}`. Miller 6 uses a JSON parser that is compliant with the JSON specification and does not accept trailing commas. ### Type-inference * The `-S` and `-F` flags to `mlr put` and `mlr filter` are ignored, since type-inference is no longer done in `mlr put` and `mlr filter`, but rather, when records are first read. You can use `mlr -S` and `mlr -A`, respectively, instead to control type-inference within the record-readers. * Octal numbers like `0123` and `07` are type-inferred as string. Use `mlr -O` to infer them as octal integers. Note that `08` and `09` will then infer as decimal integers. -* Any numbers prefix with `0o`, e.g. `0o377`, are already treated as octal regardless of `mlr -O` -- `mlr -O` only affects how leading-zero integers are handled. +* Any numbers prefixed with `0o`, e.g. `0o377`, are already treated as octal, regardless of `mlr -O` -- `mlr -O` only affects how leading-zero integers are handled. * See also the [miscellaneous-flags reference](reference-main-flag-list.md#miscellaneous-flags). ### Emit statements @@ -290,13 +246,12 @@ GENMD-RUN-COMMAND mlr -n put 'end {@input={"a":1}; emit1 {"input":@input["a"]}}' GENMD-EOF -Please see the [section on emit statements](reference-dsl-output-statements.md#emit1-and-emitemitpemitf) -for more information. +Please see the [section on emit statements](reference-dsl-output-statements.md#emit1-and-emitemitpemitf) for more information. ## Developer-specific aspects * Miller has been ported from C to Go. Developer notes: [https://github.com/johnkerl/miller/blob/main/README-dev.md](https://github.com/johnkerl/miller/blob/main/README-dev.md). -* Regression testing has been completely reworked, including regression-testing now running fully on Windows (alongside Linux and Mac) [on each GitHub commit](https://github.com/johnkerl/miller/actions). +* Regression testing has been completely reworked, including regression-testing now running fully on Windows (alongside Linux and Mac) [on each github.commit](https://github.com/johnkerl/miller/actions). ## Performance benchmarks diff --git a/docs/src/originality.md b/docs/src/originality.md index 7ceb77ab3..6e7fd8c49 100644 --- a/docs/src/originality.md +++ b/docs/src/originality.md @@ -16,7 +16,7 @@ Quick links: # How original is Miller? -It isn't. Miller is one of many, many participants in the online-analytical-processing culture. Other key participants include `awk`, SQL, spreadsheets, etc. etc. etc. Far from being an original concept, Miller explicitly strives to imitate several existing tools: +It isn't. Miller is just one of many participants in the online analytical processing culture. Other key participants include `awk`, SQL, spreadsheets, etc. etc. etc. Far from being an original concept, Miller explicitly strives to imitate several existing tools: **The Unix toolkit**: Intentional similarities as described in [Unix-toolkit Context](unix-toolkit-context.md). @@ -26,7 +26,7 @@ Recipes abound for command-line data analysis using the Unix toolkit. Here are j * [http://www.gregreda.com/2013/07/15/unix-commands-for-data-science](http://www.gregreda.com/2013/07/15/unix-commands-for-data-science) * [https://github.com/dbohdan/structured-text-tools](https://github.com/dbohdan/structured-text-tools) -**RecordStream**: Miller owes particular inspiration to [RecordStream](https://github.com/benbernard/RecordStream). The key difference is that RecordStream is a Perl-based tool for manipulating JSON (including requiring it to separately manipulate other formats such as CSV into and out of JSON), while Miller is fast Go which handles its formats natively. The similarities include the `sort`, `stats1` (analog of RecordStream's `collate`), and `delta` operations, as well as `filter` and `put`, and pretty-print formatting. +**RecordStream**: Miller owes particular inspiration to [RecordStream](https://github.com/benbernard/RecordStream). The key difference is that RecordStream is a Perl-based tool for manipulating JSON (including requiring it to separately manipulate other formats such as CSV into and out of JSON), while Miller is a fast Go tool that handles its formats natively. The similarities include the `sort`, `stats1` (analogous to RecordStream's `collate`), and `delta` operations, as well as `filter` and `put`, and the use of pretty-print formatting. **stats_m**: A third source of lineage is my Python [stats_m](https://github.com/johnkerl/scripts-math/tree/master/stats) module. This includes simple single-pass algorithms which form Miller's `stats1` and `stats2` subcommands. @@ -35,21 +35,21 @@ Recipes abound for command-line data analysis using the Unix toolkit. Here are j **Added value**: Miller's added values include: * Name-indexing, compared to the Unix toolkit's positional indexing. -* Raw speed, compared to `awk`, RecordStream, `stats_m`, or various other kinds of Python/Ruby/etc. scripts one can easily create. +* Raw speed, compared to `awk`, RecordStream, `stats_m`, or various other kinds of Python/Ruby/etc. scripts that one can easily create. * Compact keystroking for many common tasks, with a decent amount of flexibility. -* Ability to handle text files on the Unix pipe, without need for creating database tables, compared to SQL databases. +* Ability to handle text files on the Unix pipe, without the need for creating database tables, compared to SQL databases. * Various file formats, and on-the-fly format conversion. **jq**: Miller does for name-indexed text what [jq](https://stedolan.github.io/jq/) does for JSON. If you're not already familiar with `jq`, please check it out!. **What about similar tools?** -Here's a comprehensive list: [https://github.com/dbohdan/structured-text-tools](https://github.com/dbohdan/structured-text-tools). Last I knew it doesn't mention [rows](https://github.com/turicas/rows) so here's a plug for that as well. As it turns out, I learned about most of these after writing Miller. +Here's a comprehensive list: [https://github.com/dbohdan/structured-text-tools](https://github.com/dbohdan/structured-text-tools). Last I knew, it doesn't mention [rows](https://github.com/turicas/rows) so here's a plug for that as well. As it turns out, I learned about most of these after writing Miller. -**What about DOTADIW?** One of the key points of the [Unix philosophy](http://en.wikipedia.org/wiki/Unix_philosophy) is that a tool should do one thing and do it well. Hence `sort` and `cut` do just one thing. Why does Miller put `awk`-like processing, a few SQL-like operations, and statistical reduction all into one tool? This is a fair question. First note that many standard tools, such as `awk` and `perl`, do quite a few things -- as does `jq`. But I could have pushed for putting format awareness and name-indexing options into `cut`, `awk`, and so on (so you could do `cut -f hostname,uptime` or `awk '{sum += $x*$y}END{print sum}'`). Patching `cut`, `sort`, etc. on multiple operating systems is a non-starter in terms of uptake. Moreover, it makes sense for me to have Miller be a tool which collects together format-aware record-stream processing into one place, with good reuse of Miller-internal library code for its various features. +**What about DOTADIW?** One of the key points of the [Unix philosophy](http://en.wikipedia.org/wiki/Unix_philosophy) is that a tool should do one thing and do it well. Hence, `sort` and `cut` do just one thing. Why does Miller put `awk`-like processing, a few SQL-like operations, and statistical reduction all into one tool? This is a fair question. First, note that many standard tools, such as `awk` and `perl`, do quite a few things -- as does `jq`. But I could have pushed for putting format awareness and name-indexing options into `cut`, `awk`, and so on (so you could do `cut -f hostname,uptime` or `awk '{sum += $x*$y}END{print sum}'`). Patching `cut`, `sort`, etc., on multiple operating systems is a non-starter in terms of uptake. Moreover, it makes sense for me to have Miller be a tool that collects together format-aware record-stream processing into one place, with good reuse of Miller's internal library code for its various features. -**Why not use Perl/Python/Ruby etc.?** Maybe you should. With those tools you'll get far more expressive power, and sufficiently quick turnaround time for small-to-medium-sized data. Using Miller you'll get something less than a complete programming language, but which is fast, with moderate amounts of flexibility and much less keystroking. +**Why not use Perl/Python/Ruby, etc.?** Maybe you should. With those tools, you'll gain significantly more expressive power and a sufficiently quick turnaround time for small to medium-sized datasets. Using Miller, you'll get something less than a complete programming language, but which is fast, with moderate amounts of flexibility and much less keystroking. -When I was first developing Miller I made a survey of several languages. Using low-level implementation languages like C, Go, Rust, and Nim, I'd need to create my own domain-specific language (DSL) which would always be less featured than a full programming language, but I'd get better performance. Using high-level interpreted languages such as Perl/Python/Ruby I'd get the language's `eval` for free and I wouldn't need a DSL; Miller would have mainly been a set of format-specific I/O hooks. If I'd gotten good enough performance from the latter I'd have done it without question and Miller would be far more flexible. But low-level languages win the performance criteria by a landslide so we have Miller in Go with a custom DSL. +When I was first developing Miller, I made a survey of several languages. Using low-level implementation languages like C, Go, Rust, and Nim, I'd need to create my own domain-specific language (DSL), which would always be less featured than a full programming language, but I'd get better performance. Using high-level interpreted languages such as Perl/Python/Ruby, I'd get the language's `eval` for free and I wouldn't need a DSL; Miller would have mainly been a set of format-specific I/O hooks. If I'd gotten good enough performance from the latter, I'd have done it without question, and Miller would be far more flexible. But low-level languages win the performance criteria by a landslide, so we have Miller in Go with a custom DSL. -**No, really, why one more command-line data-manipulation tool?** I wrote Miller because I was frustrated with tools like `grep`, `sed`, and so on being *line-aware* without being *format-aware*. The single most poignant example I can think of is seeing people grep data lines out of their CSV files and sadly losing their header lines. While some lighter-than-SQL processing is very nice to have, at core I wanted the format-awareness of [RecordStream](https://github.com/benbernard/RecordStream) combined with the raw speed of the Unix toolkit. Miller does precisely that. +**No, really, why one more command-line data-manipulation tool?** I wrote Miller because I was frustrated with tools like `grep`, `sed`, and so on being *line-aware* without being *format-aware*. The single most poignant example I can think of is seeing people grep data lines from their CSV files and sadly losing their header lines. While some lighter-than-SQL processing is very nice to have, at core I wanted the format-awareness of [RecordStream](https://github.com/benbernard/RecordStream) combined with the raw speed of the Unix toolkit. Miller does precisely that. diff --git a/docs/src/originality.md.in b/docs/src/originality.md.in index d6825a9d1..15875e183 100644 --- a/docs/src/originality.md.in +++ b/docs/src/originality.md.in @@ -1,6 +1,6 @@ # How original is Miller? -It isn't. Miller is one of many, many participants in the online-analytical-processing culture. Other key participants include `awk`, SQL, spreadsheets, etc. etc. etc. Far from being an original concept, Miller explicitly strives to imitate several existing tools: +It isn't. Miller is just one of many participants in the online analytical processing culture. Other key participants include `awk`, SQL, spreadsheets, etc. etc. etc. Far from being an original concept, Miller explicitly strives to imitate several existing tools: **The Unix toolkit**: Intentional similarities as described in [Unix-toolkit Context](unix-toolkit-context.md). @@ -10,7 +10,7 @@ Recipes abound for command-line data analysis using the Unix toolkit. Here are j * [http://www.gregreda.com/2013/07/15/unix-commands-for-data-science](http://www.gregreda.com/2013/07/15/unix-commands-for-data-science) * [https://github.com/dbohdan/structured-text-tools](https://github.com/dbohdan/structured-text-tools) -**RecordStream**: Miller owes particular inspiration to [RecordStream](https://github.com/benbernard/RecordStream). The key difference is that RecordStream is a Perl-based tool for manipulating JSON (including requiring it to separately manipulate other formats such as CSV into and out of JSON), while Miller is fast Go which handles its formats natively. The similarities include the `sort`, `stats1` (analog of RecordStream's `collate`), and `delta` operations, as well as `filter` and `put`, and pretty-print formatting. +**RecordStream**: Miller owes particular inspiration to [RecordStream](https://github.com/benbernard/RecordStream). The key difference is that RecordStream is a Perl-based tool for manipulating JSON (including requiring it to separately manipulate other formats such as CSV into and out of JSON), while Miller is a fast Go tool that handles its formats natively. The similarities include the `sort`, `stats1` (analogous to RecordStream's `collate`), and `delta` operations, as well as `filter` and `put`, and the use of pretty-print formatting. **stats_m**: A third source of lineage is my Python [stats_m](https://github.com/johnkerl/scripts-math/tree/master/stats) module. This includes simple single-pass algorithms which form Miller's `stats1` and `stats2` subcommands. @@ -19,21 +19,21 @@ Recipes abound for command-line data analysis using the Unix toolkit. Here are j **Added value**: Miller's added values include: * Name-indexing, compared to the Unix toolkit's positional indexing. -* Raw speed, compared to `awk`, RecordStream, `stats_m`, or various other kinds of Python/Ruby/etc. scripts one can easily create. +* Raw speed, compared to `awk`, RecordStream, `stats_m`, or various other kinds of Python/Ruby/etc. scripts that one can easily create. * Compact keystroking for many common tasks, with a decent amount of flexibility. -* Ability to handle text files on the Unix pipe, without need for creating database tables, compared to SQL databases. +* Ability to handle text files on the Unix pipe, without the need for creating database tables, compared to SQL databases. * Various file formats, and on-the-fly format conversion. **jq**: Miller does for name-indexed text what [jq](https://stedolan.github.io/jq/) does for JSON. If you're not already familiar with `jq`, please check it out!. **What about similar tools?** -Here's a comprehensive list: [https://github.com/dbohdan/structured-text-tools](https://github.com/dbohdan/structured-text-tools). Last I knew it doesn't mention [rows](https://github.com/turicas/rows) so here's a plug for that as well. As it turns out, I learned about most of these after writing Miller. +Here's a comprehensive list: [https://github.com/dbohdan/structured-text-tools](https://github.com/dbohdan/structured-text-tools). Last I knew, it doesn't mention [rows](https://github.com/turicas/rows) so here's a plug for that as well. As it turns out, I learned about most of these after writing Miller. -**What about DOTADIW?** One of the key points of the [Unix philosophy](http://en.wikipedia.org/wiki/Unix_philosophy) is that a tool should do one thing and do it well. Hence `sort` and `cut` do just one thing. Why does Miller put `awk`-like processing, a few SQL-like operations, and statistical reduction all into one tool? This is a fair question. First note that many standard tools, such as `awk` and `perl`, do quite a few things -- as does `jq`. But I could have pushed for putting format awareness and name-indexing options into `cut`, `awk`, and so on (so you could do `cut -f hostname,uptime` or `awk '{sum += $x*$y}END{print sum}'`). Patching `cut`, `sort`, etc. on multiple operating systems is a non-starter in terms of uptake. Moreover, it makes sense for me to have Miller be a tool which collects together format-aware record-stream processing into one place, with good reuse of Miller-internal library code for its various features. +**What about DOTADIW?** One of the key points of the [Unix philosophy](http://en.wikipedia.org/wiki/Unix_philosophy) is that a tool should do one thing and do it well. Hence, `sort` and `cut` do just one thing. Why does Miller put `awk`-like processing, a few SQL-like operations, and statistical reduction all into one tool? This is a fair question. First, note that many standard tools, such as `awk` and `perl`, do quite a few things -- as does `jq`. But I could have pushed for putting format awareness and name-indexing options into `cut`, `awk`, and so on (so you could do `cut -f hostname,uptime` or `awk '{sum += $x*$y}END{print sum}'`). Patching `cut`, `sort`, etc., on multiple operating systems is a non-starter in terms of uptake. Moreover, it makes sense for me to have Miller be a tool that collects together format-aware record-stream processing into one place, with good reuse of Miller's internal library code for its various features. -**Why not use Perl/Python/Ruby etc.?** Maybe you should. With those tools you'll get far more expressive power, and sufficiently quick turnaround time for small-to-medium-sized data. Using Miller you'll get something less than a complete programming language, but which is fast, with moderate amounts of flexibility and much less keystroking. +**Why not use Perl/Python/Ruby, etc.?** Maybe you should. With those tools, you'll gain significantly more expressive power and a sufficiently quick turnaround time for small to medium-sized datasets. Using Miller, you'll get something less than a complete programming language, but which is fast, with moderate amounts of flexibility and much less keystroking. -When I was first developing Miller I made a survey of several languages. Using low-level implementation languages like C, Go, Rust, and Nim, I'd need to create my own domain-specific language (DSL) which would always be less featured than a full programming language, but I'd get better performance. Using high-level interpreted languages such as Perl/Python/Ruby I'd get the language's `eval` for free and I wouldn't need a DSL; Miller would have mainly been a set of format-specific I/O hooks. If I'd gotten good enough performance from the latter I'd have done it without question and Miller would be far more flexible. But low-level languages win the performance criteria by a landslide so we have Miller in Go with a custom DSL. +When I was first developing Miller, I made a survey of several languages. Using low-level implementation languages like C, Go, Rust, and Nim, I'd need to create my own domain-specific language (DSL), which would always be less featured than a full programming language, but I'd get better performance. Using high-level interpreted languages such as Perl/Python/Ruby, I'd get the language's `eval` for free and I wouldn't need a DSL; Miller would have mainly been a set of format-specific I/O hooks. If I'd gotten good enough performance from the latter, I'd have done it without question, and Miller would be far more flexible. But low-level languages win the performance criteria by a landslide, so we have Miller in Go with a custom DSL. -**No, really, why one more command-line data-manipulation tool?** I wrote Miller because I was frustrated with tools like `grep`, `sed`, and so on being *line-aware* without being *format-aware*. The single most poignant example I can think of is seeing people grep data lines out of their CSV files and sadly losing their header lines. While some lighter-than-SQL processing is very nice to have, at core I wanted the format-awareness of [RecordStream](https://github.com/benbernard/RecordStream) combined with the raw speed of the Unix toolkit. Miller does precisely that. +**No, really, why one more command-line data-manipulation tool?** I wrote Miller because I was frustrated with tools like `grep`, `sed`, and so on being *line-aware* without being *format-aware*. The single most poignant example I can think of is seeing people grep data lines from their CSV files and sadly losing their header lines. While some lighter-than-SQL processing is very nice to have, at core I wanted the format-awareness of [RecordStream](https://github.com/benbernard/RecordStream) combined with the raw speed of the Unix toolkit. Miller does precisely that. diff --git a/docs/src/record-heterogeneity.md b/docs/src/record-heterogeneity.md index 57e929826..1eb7eb0b5 100644 --- a/docs/src/record-heterogeneity.md +++ b/docs/src/record-heterogeneity.md @@ -16,12 +16,11 @@ Quick links: # Record-heterogeneity -We think of CSV tables as rectangular: if there are 17 columns in the header -then there are 17 columns for every row, else the data have a formatting error. +We think of CSV tables as rectangular: if there are 17 columns in the header, then there are 17 columns for every row, else the data has a formatting error. But heterogeneous data abound -- log-file entries, JSON documents, no-SQL databases such as MongoDB, etc. -- not to mention **data-cleaning -opportunities** we'll look at in this page. Miller offers several ways to +opportunities** we'll look at on this page. Miller offers several ways to handle data heterogeneity. ## Terminology, examples, and solutions @@ -56,7 +55,7 @@ It has three records (written here using JSON Lines formatting): Here every row has the same keys, in the same order: `a,b,c`. -These are also sometimes called **rectangular** since if we pretty-print them we get a nice rectangle: +These are also sometimes called **rectangular** since if we pretty-print them, we get a nice rectangle:
 mlr --icsv --opprint cat data/het/hom.csv
@@ -94,7 +93,7 @@ a,b,c
 This example is still homogeneous, though: every row has the same keys, in the same order: `a,b,c`.
 Empty values don't make the data heterogeneous.
 
-Note however that we can use the [`fill-empty`](reference-verbs.md#fill-empty) verb to make these
+Note, however, that we can use the [`fill-empty`](reference-verbs.md#fill-empty) verb to make these
 values non-empty, if we like:
 
 
@@ -109,7 +108,7 @@ filler 8      9
 
 ### Ragged data
 
-Next let's look at non-well-formed CSV files. For a third example:
+Next, let's look at non-well-formed CSV files. For a third example:
 
 
 cat data/het/ragged.csv
@@ -132,14 +131,9 @@ a,b,c
 mlr: mlr: CSV header/data length mismatch 3 != 2 at filename data/het/ragged.csv row 3.
 
-There are two kinds of raggedness here. Since CSVs form records by zipping the -keys from the header line together with the values from each data line, the -second record has a missing value for key `c` (which ought to be fillable), -while the third record has a value `10` with no key for it. +There are two kinds of raggedness here. Since CSVs form records by zipping the keys from the header line, together with the values from each data line, the second record has a missing value for key `c` (which ought to be fillable), while the third record has a value `10` with no key for it. -Using the [`--allow-ragged-csv-input` flag](reference-main-flag-list.md#csv-only-flags) -we can fill values in too-short rows, and provide a key (column number starting -with 1) for too-long rows: +Using the [`--allow-ragged-csv-input` flag](reference-main-flag-list.md#csv-only-flags), we can fill values in too-short rows and provide a key (column number starting with 1) for too-long rows:
 mlr --icsv --ojson --allow-ragged-csv-input cat data/het/ragged.csv
@@ -186,7 +180,7 @@ This kind of data arises often in practice. One reason is that, while many
 programming languages (including the Miller DSL) [preserve insertion
 order](reference-main-maps.md#insertion-order-is-preserved) in maps; others do
 not. So someone might have written `{"a":4,"b":5,"c":6}` in the source code,
-but the data may not have printed that way into a given data file.
+but the data may not have been printed that way into a given data file.
 
 We can use the [`regularize`](reference-verbs.md#regularize) or
 [`sort-within-records`](reference-verbs.md#sort-within-records) verb to order
@@ -203,13 +197,13 @@ the keys:
 
 The `regularize` verb tries to re-order subsequent rows to look like the first
 (whatever order that is); the `sort-within-records` verb simply uses
-alphabetical order (which is the same in the above example where the first
+alphabetical order (which is the same in the above example, where the first
 record has keys in the order `a,b,c`).
 
 ### Sparse data
 
 Here's another frequently occurring situation -- quite often, systems will log
-data for items which are present, but won't log data for items which aren't.
+data for items that are present, but won't log data for items that aren't.
 
 
 mlr --json cat data/het/sparse.json
@@ -236,8 +230,7 @@ data for items which are present, but won't log data for items which aren't.
 
 This data is called **sparse** (from the [data-storage term](https://en.wikipedia.org/wiki/Sparse_matrix)).
 
-We can use the [`unsparsify`](reference-verbs.md#unsparsify) verb to make sure
-every record has the same keys:
+We can use the [`unsparsify`](reference-verbs.md#unsparsify) verb to make sure every record has the same keys:
 
 
 mlr --json unsparsify data/het/sparse.json
@@ -282,12 +275,11 @@ xy55.east -       /dev/sda1 failover true
 
 ## Reading and writing heterogeneous data
 
-In the previous sections we saw different kinds of data heterogeneity, and ways
-to transform the data to make it homogeneous.
+In the previous sections, we saw different kinds of data heterogeneity and ways to transform the data to make it homogeneous.
 
 ### Non-rectangular file formats: JSON, XTAB, NIDX, DKVP
 
-For these formats, record-heterogeneity comes naturally:
+For these formats, record heterogeneity comes naturally:
 
 
 cat data/het/sparse.json
@@ -371,11 +363,11 @@ record_count=150,resource=/path/to/second/file
 
 ### Rectangular file formats: CSV and pretty-print
 
-CSV and pretty-print formats expect rectangular structure. But Miller lets you
+CSV and pretty-print formats expect a rectangular structure. But Miller lets you
 process non-rectangular using CSV and pretty-print.
 
-For CSV-lite and TSV-lite, Miller simply prints a newline and a new header when there is a schema
-change -- where by _schema_ we mean simply the list of record keys in the order they are
+For CSV-lite and TSV-lite, Miller prints a newline and a new header when there is a schema
+change -- where by _schema_ we mean the list of record keys in the order they are
 encountered. When there is no schema change, you get CSV per se as a special case. Likewise, Miller
 reads heterogeneous CSV or pretty-print input the same way. The difference between CSV and CSV-lite
 is that the former is [RFC-4180-compliant](file-formats.md#csvtsvasvusvetc), while the latter
@@ -470,9 +462,7 @@ mlr: CSV schema change: first keys "resource,loadsec,ok"; current keys "record_c
 mlr: exiting due to data error.
 
-Miller handles explicit header changes as just shown. If your CSV input contains ragged data -- if -there are implicit header changes (no intervening blank line and new header line) as seen above -- -you can use `--allow-ragged-csv-input` (or keystroke-saver `--ragged`). +Miller handles explicit header changes as shown. If your CSV input contains ragged data -- if there are implicit header changes (no intervening blank line and new header line) as seen above -- you can use `--allow-ragged-csv-input` (or keystroke-saver `--ragged`).
 mlr --csv --allow-ragged-csv-input cat data/het/ragged.csv
@@ -487,11 +477,11 @@ a,b,c
 ## Processing heterogeneous data
 
 Above we saw how to make heterogeneous data homogeneous, and then how to print heterogeneous data.
-As for other processing, record-heterogeneity is not a problem for Miller.
+As for other processing, record heterogeneity is not a problem for Miller.
 
 Miller operates on specified fields and takes the rest along: for example, if
-you are sorting on the `count` field then all records in the input stream must
-have a `count` field but the other fields can vary, and moreover the sorted-on
+you are sorting on the `count` field, then all records in the input stream must
+have a `count` field, but the other fields can vary---and moreover the sorted-on
 field name(s) don't need to be in the same position on each line:
 
 
diff --git a/docs/src/record-heterogeneity.md.in b/docs/src/record-heterogeneity.md.in
index 677098ee8..e3c128b57 100644
--- a/docs/src/record-heterogeneity.md.in
+++ b/docs/src/record-heterogeneity.md.in
@@ -1,11 +1,10 @@
 # Record-heterogeneity
 
-We think of CSV tables as rectangular: if there are 17 columns in the header
-then there are 17 columns for every row, else the data have a formatting error.
+We think of CSV tables as rectangular: if there are 17 columns in the header, then there are 17 columns for every row, else the data has a formatting error.
 
 But heterogeneous data abound -- log-file entries, JSON documents, no-SQL
 databases such as MongoDB, etc. -- not to mention **data-cleaning
-opportunities** we'll look at in this page. Miller offers several ways to
+opportunities** we'll look at on this page. Miller offers several ways to
 handle data heterogeneity.
 
 ## Terminology, examples, and solutions
@@ -29,7 +28,7 @@ GENMD-EOF
 
 Here every row has the same keys, in the same order: `a,b,c`.
 
-These are also sometimes called **rectangular** since if we pretty-print them we get a nice rectangle:
+These are also sometimes called **rectangular** since if we pretty-print them, we get a nice rectangle:
 
 GENMD-RUN-COMMAND
 mlr --icsv --opprint cat data/het/hom.csv
@@ -50,7 +49,7 @@ GENMD-EOF
 This example is still homogeneous, though: every row has the same keys, in the same order: `a,b,c`.
 Empty values don't make the data heterogeneous.
 
-Note however that we can use the [`fill-empty`](reference-verbs.md#fill-empty) verb to make these
+Note, however, that we can use the [`fill-empty`](reference-verbs.md#fill-empty) verb to make these
 values non-empty, if we like:
 
 GENMD-RUN-COMMAND
@@ -59,7 +58,7 @@ GENMD-EOF
 
 ### Ragged data
 
-Next let's look at non-well-formed CSV files. For a third example:
+Next, let's look at non-well-formed CSV files. For a third example:
 
 GENMD-RUN-COMMAND
 cat data/het/ragged.csv
@@ -71,14 +70,9 @@ GENMD-RUN-COMMAND-TOLERATING-ERROR
 mlr --csv cat data/het/ragged.csv
 GENMD-EOF
 
-There are two kinds of raggedness here. Since CSVs form records by zipping the
-keys from the header line together with the values from each data line, the
-second record has a missing value for key `c` (which ought to be fillable),
-while the third record has a value `10` with no key for it.
+There are two kinds of raggedness here. Since CSVs form records by zipping the keys from the header line, together with the values from each data line, the second record has a missing value for key `c` (which ought to be fillable), while the third record has a value `10` with no key for it.
 
-Using the [`--allow-ragged-csv-input` flag](reference-main-flag-list.md#csv-only-flags)
-we can fill values in too-short rows, and provide a key (column number starting
-with 1) for too-long rows:
+Using the [`--allow-ragged-csv-input` flag](reference-main-flag-list.md#csv-only-flags), we can fill values in too-short rows and provide a key (column number starting with 1) for too-long rows:
 
 GENMD-RUN-COMMAND-TOLERATING-ERROR
 mlr --icsv --ojson --allow-ragged-csv-input cat data/het/ragged.csv
@@ -101,7 +95,7 @@ This kind of data arises often in practice. One reason is that, while many
 programming languages (including the Miller DSL) [preserve insertion
 order](reference-main-maps.md#insertion-order-is-preserved) in maps; others do
 not. So someone might have written `{"a":4,"b":5,"c":6}` in the source code,
-but the data may not have printed that way into a given data file.
+but the data may not have been printed that way into a given data file.
 
 We can use the [`regularize`](reference-verbs.md#regularize) or
 [`sort-within-records`](reference-verbs.md#sort-within-records) verb to order
@@ -113,13 +107,13 @@ GENMD-EOF
 
 The `regularize` verb tries to re-order subsequent rows to look like the first
 (whatever order that is); the `sort-within-records` verb simply uses
-alphabetical order (which is the same in the above example where the first
+alphabetical order (which is the same in the above example, where the first
 record has keys in the order `a,b,c`).
 
 ### Sparse data
 
 Here's another frequently occurring situation -- quite often, systems will log
-data for items which are present, but won't log data for items which aren't.
+data for items that are present, but won't log data for items that aren't.
 
 GENMD-RUN-COMMAND
 mlr --json cat data/het/sparse.json
@@ -127,8 +121,7 @@ GENMD-EOF
 
 This data is called **sparse** (from the [data-storage term](https://en.wikipedia.org/wiki/Sparse_matrix)).
 
-We can use the [`unsparsify`](reference-verbs.md#unsparsify) verb to make sure
-every record has the same keys:
+We can use the [`unsparsify`](reference-verbs.md#unsparsify) verb to make sure every record has the same keys:
 
 GENMD-RUN-COMMAND
 mlr --json unsparsify data/het/sparse.json
@@ -142,12 +135,11 @@ GENMD-EOF
 
 ## Reading and writing heterogeneous data
 
-In the previous sections we saw different kinds of data heterogeneity, and ways
-to transform the data to make it homogeneous.
+In the previous sections, we saw different kinds of data heterogeneity and ways to transform the data to make it homogeneous.
 
 ### Non-rectangular file formats: JSON, XTAB, NIDX, DKVP
 
-For these formats, record-heterogeneity comes naturally:
+For these formats, record heterogeneity comes naturally:
 
 GENMD-RUN-COMMAND
 cat data/het/sparse.json
@@ -177,11 +169,11 @@ GENMD-EOF
 
 ### Rectangular file formats: CSV and pretty-print
 
-CSV and pretty-print formats expect rectangular structure. But Miller lets you
+CSV and pretty-print formats expect a rectangular structure. But Miller lets you
 process non-rectangular using CSV and pretty-print.
 
-For CSV-lite and TSV-lite, Miller simply prints a newline and a new header when there is a schema
-change -- where by _schema_ we mean simply the list of record keys in the order they are
+For CSV-lite and TSV-lite, Miller prints a newline and a new header when there is a schema
+change -- where by _schema_ we mean the list of record keys in the order they are
 encountered. When there is no schema change, you get CSV per se as a special case. Likewise, Miller
 reads heterogeneous CSV or pretty-print input the same way. The difference between CSV and CSV-lite
 is that the former is [RFC-4180-compliant](file-formats.md#csvtsvasvusvetc), while the latter
@@ -207,9 +199,7 @@ GENMD-RUN-COMMAND-TOLERATING-ERROR
 mlr --ijson --ocsv group-like data/het.json
 GENMD-EOF
 
-Miller handles explicit header changes as just shown. If your CSV input contains ragged data -- if
-there are implicit header changes (no intervening blank line and new header line) as seen above --
-you can use `--allow-ragged-csv-input` (or keystroke-saver `--ragged`).
+Miller handles explicit header changes as shown. If your CSV input contains ragged data -- if there are implicit header changes (no intervening blank line and new header line) as seen above -- you can use `--allow-ragged-csv-input` (or keystroke-saver `--ragged`).
 
 GENMD-RUN-COMMAND
 mlr --csv --allow-ragged-csv-input cat data/het/ragged.csv
@@ -218,11 +208,11 @@ GENMD-EOF
 ## Processing heterogeneous data
 
 Above we saw how to make heterogeneous data homogeneous, and then how to print heterogeneous data.
-As for other processing, record-heterogeneity is not a problem for Miller.
+As for other processing, record heterogeneity is not a problem for Miller.
 
 Miller operates on specified fields and takes the rest along: for example, if
-you are sorting on the `count` field then all records in the input stream must
-have a `count` field but the other fields can vary, and moreover the sorted-on
+you are sorting on the `count` field, then all records in the input stream must
+have a `count` field, but the other fields can vary---and moreover the sorted-on
 field name(s) don't need to be in the same position on each line:
 
 GENMD-RUN-COMMAND
diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md
index 529eddd77..880ffb19f 100644
--- a/docs/src/reference-dsl-builtin-functions.md
+++ b/docs/src/reference-dsl-builtin-functions.md
@@ -16,9 +16,7 @@ Quick links:
 
 # DSL built-in functions
 
-These are functions in the [Miller programming language](miller-programming-language.md)
-that you can call when you use `mlr put` and `mlr filter`. For example, when you type
-
+These are functions in the [Miller programming language](miller-programming-language.md) that you can call when you use `mlr put` and `mlr filter`. For example, when you type
 
 mlr --icsv --opprint --from example.csv put '
   $color = toupper($color);
@@ -43,26 +41,13 @@ the `toupper` and `gsub` bits are _functions_.
 
 ## Overview
 
-At the command line, you can use `mlr -f` and `mlr -F` for information much
-like what's on this page.
+At the command line, you can use `mlr -f` and `mlr -F` for information much like what's on this page.
 
-Each function takes a specific number of arguments, as shown below, except for
-functions marked as variadic such as `min` and `max`. (The latter compute min
-and max of any number of arguments.) There is no notion of optional or
-default-on-absent arguments. All argument-passing is positional rather than by
-name; arguments are passed by value, not by reference.
+Each function takes a specific number of arguments, as shown below, except for functions marked as variadic, such as `min` and `max`. (The latter compute the min and max of any number of arguments.) There is no notion of optional or default-on-absent arguments. All argument-passing is positional rather than by name; arguments are passed by value, not by reference.
 
-At the command line, you can get a list of all functions using `mlr -f`, with
-details using `mlr -F`.  (Or, `mlr help usage-functions-by-class` to get
-details in the order shown on this page.) You can get detail for a given
-function using `mlr help function namegoeshere`, e.g.  `mlr help function
-gsub`.
+At the command line, you can get a list of all functions using `mlr -f`, with details using `mlr -F`.  (Or, `mlr help usage-functions-by-class` to get details in the order shown on this page.) You can get details for a given function using `mlr help function namegoeshere`, e.g., `mlr help function gsub`.
 
-Operators are listed here along with functions. In this case, the
-argument-count is the number of items involved in the infix operator, e.g. we
-say `x+y` so the details for the `+` operator say that its number of arguments
-is 2. Unary operators such as `!` and `~` show argument-count of 1; the ternary
-`? :` operator shows an argument-count of 3.
+Operators are listed here along with functions. In this case, the argument count refers to the number of items involved in the infix operator. For example, we say `x+y`, so the details for the `+` operator indicate that it has two arguments. Unary operators such as `!` and `~` show argument-count of 1; the ternary `? :` operator shows an argument count of 3.
 
 
 ## Functions by class
diff --git a/docs/src/reference-dsl-builtin-functions.md.in b/docs/src/reference-dsl-builtin-functions.md.in
index 4bb51082c..b535cd907 100644
--- a/docs/src/reference-dsl-builtin-functions.md.in
+++ b/docs/src/reference-dsl-builtin-functions.md.in
@@ -1,8 +1,6 @@
 # DSL built-in functions
 
-These are functions in the [Miller programming language](miller-programming-language.md)
-that you can call when you use `mlr put` and `mlr filter`. For example, when you type
-
+These are functions in the [Miller programming language](miller-programming-language.md) that you can call when you use `mlr put` and `mlr filter`. For example, when you type
 GENMD-RUN-COMMAND
 mlr --icsv --opprint --from example.csv put '
   $color = toupper($color);
@@ -14,25 +12,12 @@ the `toupper` and `gsub` bits are _functions_.
 
 ## Overview
 
-At the command line, you can use `mlr -f` and `mlr -F` for information much
-like what's on this page.
+At the command line, you can use `mlr -f` and `mlr -F` for information much like what's on this page.
 
-Each function takes a specific number of arguments, as shown below, except for
-functions marked as variadic such as `min` and `max`. (The latter compute min
-and max of any number of arguments.) There is no notion of optional or
-default-on-absent arguments. All argument-passing is positional rather than by
-name; arguments are passed by value, not by reference.
+Each function takes a specific number of arguments, as shown below, except for functions marked as variadic, such as `min` and `max`. (The latter compute the min and max of any number of arguments.) There is no notion of optional or default-on-absent arguments. All argument-passing is positional rather than by name; arguments are passed by value, not by reference.
 
-At the command line, you can get a list of all functions using `mlr -f`, with
-details using `mlr -F`.  (Or, `mlr help usage-functions-by-class` to get
-details in the order shown on this page.) You can get detail for a given
-function using `mlr help function namegoeshere`, e.g.  `mlr help function
-gsub`.
+At the command line, you can get a list of all functions using `mlr -f`, with details using `mlr -F`.  (Or, `mlr help usage-functions-by-class` to get details in the order shown on this page.) You can get details for a given function using `mlr help function namegoeshere`, e.g., `mlr help function gsub`.
 
-Operators are listed here along with functions. In this case, the
-argument-count is the number of items involved in the infix operator, e.g. we
-say `x+y` so the details for the `+` operator say that its number of arguments
-is 2. Unary operators such as `!` and `~` show argument-count of 1; the ternary
-`? :` operator shows an argument-count of 3.
+Operators are listed here along with functions. In this case, the argument count refers to the number of items involved in the infix operator. For example, we say `x+y`, so the details for the `+` operator indicate that it has two arguments. Unary operators such as `!` and `~` show argument-count of 1; the ternary `? :` operator shows an argument count of 3.
 
 GENMD-RUN-CONTENT-GENERATOR(./mk-func-info.rb)
diff --git a/docs/src/reference-dsl-complexity.md b/docs/src/reference-dsl-complexity.md
index 5fb579155..de97fa3f0 100644
--- a/docs/src/reference-dsl-complexity.md
+++ b/docs/src/reference-dsl-complexity.md
@@ -16,34 +16,9 @@ Quick links:
 
 # A note on the complexity of Miller's expression language
 
-One of Miller's strengths is its brevity: it's much quicker -- and less
-error-prone -- to type `mlr stats1 -a sum -f x,y -g a,b` than having to track
-summation variables as in `awk`, or using Miller's [out-of-stream
-variables](reference-dsl-variables.md#out-of-stream-variables). And the more
-language features Miller's put-DSL has (for-loops, if-statements, nested
-control structures, user-defined functions, etc.) then the *less* powerful it
-begins to seem: because of the other programming-language features it *doesn't*
-have (classes, exceptions, and so on).
+One of Miller's strengths is its brevity: it's much quicker -- and less error-prone -- to type `mlr stats1 -a sum -f x,y -g a,b` than having to track summation variables as in `awk`, or using Miller's [out-of-stream variables](reference-dsl-variables.md#out-of-stream-variables). And the more language features Miller's put-DSL has (for-loops, if-statements, nested control structures, user-defined functions, etc.), then the *less* powerful it begins to seem: because of the other programming-language features it *doesn't* have (classes, exceptions, and so on).
 
-When I was originally prototyping Miller in 2015, the primary decision I had
-was whether to hand-code in a low-level language like C or Rust or Go, with my
-own hand-rolled DSL, or whether to use a higher-level language (like Python or
-Lua or Nim) and let the `put` statements be handled by the implementation
-language's own `eval`: the implementation language would take the place of a
-DSL. Multiple performance experiments showed me I could get better throughput
-using the former, by a wide margin. So Miller is Go under the hood with a
-hand-rolled DSL.
+When I was initially prototyping Miller in 2015, the primary decision I had was whether to hand-code in a low-level language like C or Rust or Go, with my hand-rolled DSL, or whether to use a higher-level language (like Python or Lua or Nim) and let the `put` statements be handled by the implementation language's own `eval`: the implementation language would take the place of a DSL. Multiple performance experiments showed me I could get better throughput using the former, by a wide margin. So Miller is Go under the hood with a hand-rolled DSL.
 
-I do want to keep focusing on what Miller is good at -- concise notation, low
-latency, and high throughput -- and not add too much in terms of
-high-level-language features to the DSL.  That said, some sort of
-customizability is a basic thing to want. As of 4.1.0 we have recursive
-`for`/`while`/`if` [structures](reference-dsl-control-structures.md) on about
-the same complexity level as `awk`; as of 5.0.0 we have [user-defined
-functions](reference-dsl-user-defined-functions.md) and [map-valued
-variables](reference-dsl-variables.md), again on about the same complexity level
-as `awk` along with optional type-declaration syntax; as of Miller 6 we have
-full support for [arrays](reference-main-arrays.md).  While I'm excited by these
-powerful language features, I hope to keep new features focused on Miller's
-sweet spot which is speed plus simplicity.
+I want to continue focusing on what Miller excels at โ€” concise notation, low latency, and high throughput โ€” and not add too many high-level language features to the DSL.  That said, some customizability is a basic thing to want. As of 4.1.0, we have recursive `for`/`while`/`if` [structures](reference-dsl-control-structures.md) on about the same complexity level as `awk`; as of 5.0.0, we have [user-defined functions](reference-dsl-user-defined-functions.md) and [map-valued variables](reference-dsl-variables.md), again on about the same complexity level as `awk` along with optional type-declaration syntax; as of Miller 6, we have full support for [arrays](reference-main-arrays.md).  While I'm excited by these powerful language features, I hope to keep new features focused on Miller's sweet spot, which is speed plus simplicity.
 
diff --git a/docs/src/reference-dsl-complexity.md.in b/docs/src/reference-dsl-complexity.md.in
index 81251b436..3087e00c1 100644
--- a/docs/src/reference-dsl-complexity.md.in
+++ b/docs/src/reference-dsl-complexity.md.in
@@ -1,33 +1,8 @@
 # A note on the complexity of Miller's expression language
 
-One of Miller's strengths is its brevity: it's much quicker -- and less
-error-prone -- to type `mlr stats1 -a sum -f x,y -g a,b` than having to track
-summation variables as in `awk`, or using Miller's [out-of-stream
-variables](reference-dsl-variables.md#out-of-stream-variables). And the more
-language features Miller's put-DSL has (for-loops, if-statements, nested
-control structures, user-defined functions, etc.) then the *less* powerful it
-begins to seem: because of the other programming-language features it *doesn't*
-have (classes, exceptions, and so on).
+One of Miller's strengths is its brevity: it's much quicker -- and less error-prone -- to type `mlr stats1 -a sum -f x,y -g a,b` than having to track summation variables as in `awk`, or using Miller's [out-of-stream variables](reference-dsl-variables.md#out-of-stream-variables). And the more language features Miller's put-DSL has (for-loops, if-statements, nested control structures, user-defined functions, etc.), then the *less* powerful it begins to seem: because of the other programming-language features it *doesn't* have (classes, exceptions, and so on).
 
-When I was originally prototyping Miller in 2015, the primary decision I had
-was whether to hand-code in a low-level language like C or Rust or Go, with my
-own hand-rolled DSL, or whether to use a higher-level language (like Python or
-Lua or Nim) and let the `put` statements be handled by the implementation
-language's own `eval`: the implementation language would take the place of a
-DSL. Multiple performance experiments showed me I could get better throughput
-using the former, by a wide margin. So Miller is Go under the hood with a
-hand-rolled DSL.
+When I was initially prototyping Miller in 2015, the primary decision I had was whether to hand-code in a low-level language like C or Rust or Go, with my hand-rolled DSL, or whether to use a higher-level language (like Python or Lua or Nim) and let the `put` statements be handled by the implementation language's own `eval`: the implementation language would take the place of a DSL. Multiple performance experiments showed me I could get better throughput using the former, by a wide margin. So Miller is Go under the hood with a hand-rolled DSL.
 
-I do want to keep focusing on what Miller is good at -- concise notation, low
-latency, and high throughput -- and not add too much in terms of
-high-level-language features to the DSL.  That said, some sort of
-customizability is a basic thing to want. As of 4.1.0 we have recursive
-`for`/`while`/`if` [structures](reference-dsl-control-structures.md) on about
-the same complexity level as `awk`; as of 5.0.0 we have [user-defined
-functions](reference-dsl-user-defined-functions.md) and [map-valued
-variables](reference-dsl-variables.md), again on about the same complexity level
-as `awk` along with optional type-declaration syntax; as of Miller 6 we have
-full support for [arrays](reference-main-arrays.md).  While I'm excited by these
-powerful language features, I hope to keep new features focused on Miller's
-sweet spot which is speed plus simplicity.
+I want to continue focusing on what Miller excels at โ€” concise notation, low latency, and high throughput โ€” and not add too many high-level language features to the DSL.  That said, some customizability is a basic thing to want. As of 4.1.0, we have recursive `for`/`while`/`if` [structures](reference-dsl-control-structures.md) on about the same complexity level as `awk`; as of 5.0.0, we have [user-defined functions](reference-dsl-user-defined-functions.md) and [map-valued variables](reference-dsl-variables.md), again on about the same complexity level as `awk` along with optional type-declaration syntax; as of Miller 6, we have full support for [arrays](reference-main-arrays.md).  While I'm excited by these powerful language features, I hope to keep new features focused on Miller's sweet spot, which is speed plus simplicity.
 
diff --git a/docs/src/reference-dsl-control-structures.md b/docs/src/reference-dsl-control-structures.md
index 16de01613..60bb52d95 100644
--- a/docs/src/reference-dsl-control-structures.md
+++ b/docs/src/reference-dsl-control-structures.md
@@ -18,7 +18,7 @@ Quick links:
 
 ## Pattern-action blocks
 
-These are reminiscent of `awk` syntax.  They can be used to allow assignments to be done only when appropriate -- e.g. for math-function domain restrictions, regex-matching, and so on:
+These are reminiscent of `awk` syntax.  They can be used to allow assignments to be done only when appropriate -- e.g., for math-function domain restrictions, regex-matching, and so on:
 
 
 mlr cat data/put-gating-example-1.dkvp
@@ -64,7 +64,7 @@ a=some other name
 a=xyz_789,b=left_xyz,c=right_789
 
-This produces heteregenous output which Miller, of course, has no problems with (see [Record Heterogeneity](record-heterogeneity.md)). But if you want homogeneous output, the curly braces can be replaced with a semicolon between the expression and the body statements. This causes `put` to evaluate the boolean expression (along with any side effects, namely, regex-captures `\1`, `\2`, etc.) but doesn't use it as a criterion for whether subsequent assignments should be executed. Instead, subsequent assignments are done unconditionally: +This produces heterogeneous output which Miller, of course, has no problems with (see [Record Heterogeneity](record-heterogeneity.md)). But if you want homogeneous output, the curly braces can be replaced with a semicolon between the expression and the body statements. This causes `put` to evaluate the boolean expression (along with any side effects, namely, regex-captures `\1`, `\2`, etc.) but doesn't use it as a criterion for whether subsequent assignments should be executed. Instead, subsequent assignments are done unconditionally:
 mlr --opprint put '
@@ -172,7 +172,7 @@ records](operating-on-all-records.md) for some options.
 
 ## For-loops
 
-While Miller's `while` and `do-while` statements are much as in many other languages, `for` loops are more idiosyncratic to Miller. They are loops over key-value pairs, whether in stream records, out-of-stream variables, local variables, or map-literals: more reminiscent of `foreach`, as in (for example) PHP. There are **for-loops over map keys** and **for-loops over key-value tuples**.  Additionally, Miller has a **C-style triple-for loop** with initialize, test, and update statements. Each is described below.
+While Miller's `while` and `do-while` statements are much like those in many other languages, `for` loops are more idiosyncratic to Miller. They are loops over key-value pairs, whether in stream records, out-of-stream variables, local variables, or map-literals: more reminiscent of `foreach`, as in (for example) PHP. There are **for-loops over map keys** and **for-loops over key-value tuples**.  Additionally, Miller has a **C-style triple-for loop** with initialize, test, and update statements. Each is described below.
 
 As with `while` and `do-while`, a `break` or `continue` within nested control structures will propagate to the innermost loop enclosing them, if any, and a `break` or `continue` outside a loop is a syntax error that will be flagged as soon as the expression is parsed, before any input records are ingested.
 
@@ -260,11 +260,9 @@ value: true valuetype: bool
 
 ### Key-value for-loops
 
-For [maps](reference-main-maps.md), the first loop variable is the key and the
-second is the value; for [arrays](reference-main-arrays.md), the first loop
-variable is the (1-up) array index and the second is the value.
+For [maps](reference-main-maps.md), the first loop variable is the key, and the second is the value. For [arrays](reference-main-arrays.md), the first loop variable is the (1-based) array index, and the second is the value.
 
-Single-level keys may be gotten at using either `for(k,v)` or `for((k),v)`; multi-level keys may be gotten at using `for((k1,k2,k3),v)` and so on.  The `v` variable will be bound to a scalar value (non-array/non-map) if the map stops at that level, or to a map-valued or array-valued variable if the map goes deeper. If the map isn't deep enough then the loop body won't be executed.
+Single-level keys may be obtained using either `for(k,v)` or `for((k),v)`; multi-level keys may be obtained using `for((k1,k2,k3),v)` and so on.  The `v` variable will be bound to a scalar value (non-array/non-map) if the map stops at that level, or to a map-valued or array-valued variable if the map goes deeper. If the map isn't deep enough then the loop body won't be executed.
 
 
 cat data/for-srec-example.tbl
@@ -333,7 +331,7 @@ eks wye 4 0.381399 0.134188 4.515587           18.062348
 wye pan 5 0.573288 0.863624 6.4369119999999995 25.747647999999998
 
-It can be confusing to modify the stream record while iterating over a copy of it, so instead you might find it simpler to use a local variable in the loop and only update the stream record after the loop: +It can be confusing to modify the stream record while iterating over a copy of it, so instead, you might find it simpler to use a local variable in the loop and only update the stream record after the loop:
 mlr --from data/small --opprint put '
@@ -355,7 +353,7 @@ eks wye 4 0.381399 0.134188 4.515587
 wye pan 5 0.573288 0.863624 6.4369119999999995
 
-You can also start iterating on sub-maps of an out-of-stream or local variable; you can loop over nested keys; you can loop over all out-of-stream variables. The bound variables are bound to a copy of the sub-map as it was before the loop started. The sub-map is specified by square-bracketed indices after `in`, and additional deeper indices are bound to loop key-variables. The terminal values are bound to the loop value-variable whenever the keys are not too shallow. The value-variable may refer to a terminal (string, number) or it may be map-valued if the map goes deeper. Example indexing is as follows: +You can also start iterating on sub-maps of an out-of-stream or local variable; you can loop over nested keys; you can loop over all out-of-stream variables. The bound variables are bound to a copy of the sub-map as it was before the loop started. The sub-map is specified by square-bracketed indices after `in`, and additional deeper indices are bound to loop key variables. The terminal values are bound to the loop value variable whenever the keys are not too shallow. The value variable may refer to a terminal (string, number) or it may be map-valued if the map goes deeper. Example indexing is as follows:
 # Parentheses are optional for single key:
@@ -516,15 +514,15 @@ wye pan 5 0.573288 0.863624 15   31
 
 Notes:
 
-* In `for (start; continuation; update) { body }`, the start, continuation, and update statements may be empty, single statements, or multiple comma-separated statements. If the continuation is empty (e.g. `for(i=1;;i+=1)`) it defaults to true.
+* In `for (start; continuation; update) { body }`, the start, continuation, and update statements may be empty, single statements, or multiple comma-separated statements. If the continuation is empty (e.g. `for(i=1;;i+=1)`), it defaults to true.
 
 * In particular, you may use `$`-variables and/or `@`-variables in the start, continuation, and/or update steps (as well as the body, of course).
 
-* The typedecls such as `int` or `num` are optional.  If a typedecl is provided (for a local variable), it binds a variable scoped to the for-loop regardless of whether a same-name variable is present in outer scope. If a typedecl is not provided, then the variable is scoped to the for-loop if no same-name variable is present in outer scope, or if a same-name variable is present in outer scope then it is modified.
+* The typedecls such as `int` or `num` are optional.  If a typedecl is provided (for a local variable), it binds a variable scoped to the for-loop regardless of whether a same-name variable is present in the outer scope. If a typedecl is not provided, then the variable is scoped to the for-loop if no same-name variable is present in the outer scope, or if a same-name variable is present in the outer scope, then it is modified.
 
 * Miller has no `++` or `--` operators.
 
-* As with all `for`/`if`/`while` statements in Miller, the curly braces are required even if the body is a single statement, or empty.
+* As with all `for`/`if`/`while` statements in Miller, the curly braces are required even if the body is a single statement or empty.
 
 ## Begin/end blocks
 
diff --git a/docs/src/reference-dsl-control-structures.md.in b/docs/src/reference-dsl-control-structures.md.in
index b7161804c..caffa9bdf 100644
--- a/docs/src/reference-dsl-control-structures.md.in
+++ b/docs/src/reference-dsl-control-structures.md.in
@@ -2,7 +2,7 @@
 
 ## Pattern-action blocks
 
-These are reminiscent of `awk` syntax.  They can be used to allow assignments to be done only when appropriate -- e.g. for math-function domain restrictions, regex-matching, and so on:
+These are reminiscent of `awk` syntax.  They can be used to allow assignments to be done only when appropriate -- e.g., for math-function domain restrictions, regex-matching, and so on:
 
 GENMD-RUN-COMMAND
 mlr cat data/put-gating-example-1.dkvp
@@ -24,7 +24,7 @@ mlr put '
   data/put-gating-example-2.dkvp
 GENMD-EOF
 
-This produces heteregenous output which Miller, of course, has no problems with (see [Record Heterogeneity](record-heterogeneity.md)).  But if you want homogeneous output, the curly braces can be replaced with a semicolon between the expression and the body statements.  This causes `put` to evaluate the boolean expression (along with any side effects, namely, regex-captures `\1`, `\2`, etc.) but doesn't use it as a criterion for whether subsequent assignments should be executed. Instead, subsequent assignments are done unconditionally:
+This produces heterogeneous output which Miller, of course, has no problems with (see [Record Heterogeneity](record-heterogeneity.md)).  But if you want homogeneous output, the curly braces can be replaced with a semicolon between the expression and the body statements.  This causes `put` to evaluate the boolean expression (along with any side effects, namely, regex-captures `\1`, `\2`, etc.) but doesn't use it as a criterion for whether subsequent assignments should be executed. Instead, subsequent assignments are done unconditionally:
 
 GENMD-RUN-COMMAND
 mlr --opprint put '
@@ -120,7 +120,7 @@ records](operating-on-all-records.md) for some options.
 
 ## For-loops
 
-While Miller's `while` and `do-while` statements are much as in many other languages, `for` loops are more idiosyncratic to Miller. They are loops over key-value pairs, whether in stream records, out-of-stream variables, local variables, or map-literals: more reminiscent of `foreach`, as in (for example) PHP. There are **for-loops over map keys** and **for-loops over key-value tuples**.  Additionally, Miller has a **C-style triple-for loop** with initialize, test, and update statements. Each is described below.
+While Miller's `while` and `do-while` statements are much like those in many other languages, `for` loops are more idiosyncratic to Miller. They are loops over key-value pairs, whether in stream records, out-of-stream variables, local variables, or map-literals: more reminiscent of `foreach`, as in (for example) PHP. There are **for-loops over map keys** and **for-loops over key-value tuples**.  Additionally, Miller has a **C-style triple-for loop** with initialize, test, and update statements. Each is described below.
 
 As with `while` and `do-while`, a `break` or `continue` within nested control structures will propagate to the innermost loop enclosing them, if any, and a `break` or `continue` outside a loop is a syntax error that will be flagged as soon as the expression is parsed, before any input records are ingested.
 
@@ -165,11 +165,9 @@ GENMD-EOF
 
 ### Key-value for-loops
 
-For [maps](reference-main-maps.md), the first loop variable is the key and the
-second is the value; for [arrays](reference-main-arrays.md), the first loop
-variable is the (1-up) array index and the second is the value.
+For [maps](reference-main-maps.md), the first loop variable is the key, and the second is the value. For [arrays](reference-main-arrays.md), the first loop variable is the (1-based) array index, and the second is the value.
 
-Single-level keys may be gotten at using either `for(k,v)` or `for((k),v)`; multi-level keys may be gotten at using `for((k1,k2,k3),v)` and so on.  The `v` variable will be bound to a scalar value (non-array/non-map) if the map stops at that level, or to a map-valued or array-valued variable if the map goes deeper. If the map isn't deep enough then the loop body won't be executed.
+Single-level keys may be obtained using either `for(k,v)` or `for((k),v)`; multi-level keys may be obtained using `for((k1,k2,k3),v)` and so on.  The `v` variable will be bound to a scalar value (non-array/non-map) if the map stops at that level, or to a map-valued or array-valued variable if the map goes deeper. If the map isn't deep enough then the loop body won't be executed.
 
 GENMD-RUN-COMMAND
 cat data/for-srec-example.tbl
@@ -210,7 +208,7 @@ mlr --from data/small --opprint put '
 '
 GENMD-EOF
 
-It can be confusing to modify the stream record while iterating over a copy of it, so instead you might find it simpler to use a local variable in the loop and only update the stream record after the loop:
+It can be confusing to modify the stream record while iterating over a copy of it, so instead, you might find it simpler to use a local variable in the loop and only update the stream record after the loop:
 
 GENMD-RUN-COMMAND
 mlr --from data/small --opprint put '
@@ -224,7 +222,7 @@ mlr --from data/small --opprint put '
 '
 GENMD-EOF
 
-You can also start iterating on sub-maps of an out-of-stream or local variable; you can loop over nested keys; you can loop over all out-of-stream variables.  The bound variables are bound to a copy of the sub-map as it was before the loop started.  The sub-map is specified by square-bracketed indices after `in`, and additional deeper indices are bound to loop key-variables. The terminal values are bound to the loop value-variable whenever the keys are not too shallow. The value-variable may refer to a terminal (string, number) or it may be map-valued if the map goes deeper. Example indexing is as follows:
+You can also start iterating on sub-maps of an out-of-stream or local variable; you can loop over nested keys; you can loop over all out-of-stream variables.  The bound variables are bound to a copy of the sub-map as it was before the loop started.  The sub-map is specified by square-bracketed indices after `in`, and additional deeper indices are bound to loop key variables. The terminal values are bound to the loop value variable whenever the keys are not too shallow. The value variable may refer to a terminal (string, number) or it may be map-valued if the map goes deeper. Example indexing is as follows:
 
 GENMD-INCLUDE-ESCAPED(data/for-oosvar-example-0a.txt)
 
@@ -333,15 +331,15 @@ GENMD-EOF
 
 Notes:
 
-* In `for (start; continuation; update) { body }`, the start, continuation, and update statements may be empty, single statements, or multiple comma-separated statements. If the continuation is empty (e.g. `for(i=1;;i+=1)`) it defaults to true.
+* In `for (start; continuation; update) { body }`, the start, continuation, and update statements may be empty, single statements, or multiple comma-separated statements. If the continuation is empty (e.g. `for(i=1;;i+=1)`), it defaults to true.
 
 * In particular, you may use `$`-variables and/or `@`-variables in the start, continuation, and/or update steps (as well as the body, of course).
 
-* The typedecls such as `int` or `num` are optional.  If a typedecl is provided (for a local variable), it binds a variable scoped to the for-loop regardless of whether a same-name variable is present in outer scope. If a typedecl is not provided, then the variable is scoped to the for-loop if no same-name variable is present in outer scope, or if a same-name variable is present in outer scope then it is modified.
+* The typedecls such as `int` or `num` are optional.  If a typedecl is provided (for a local variable), it binds a variable scoped to the for-loop regardless of whether a same-name variable is present in the outer scope. If a typedecl is not provided, then the variable is scoped to the for-loop if no same-name variable is present in the outer scope, or if a same-name variable is present in the outer scope, then it is modified.
 
 * Miller has no `++` or `--` operators.
 
-* As with all `for`/`if`/`while` statements in Miller, the curly braces are required even if the body is a single statement, or empty.
+* As with all `for`/`if`/`while` statements in Miller, the curly braces are required even if the body is a single statement or empty.
 
 ## Begin/end blocks
 
diff --git a/docs/src/reference-dsl-filter-statements.md b/docs/src/reference-dsl-filter-statements.md
index 0a2de3dd3..3d2d733f2 100644
--- a/docs/src/reference-dsl-filter-statements.md
+++ b/docs/src/reference-dsl-filter-statements.md
@@ -36,7 +36,7 @@ red,square,true,2,15,79.2778,0.0130
 red,circle,true,3,16,13.8103,2.9010
 
-The former, of course, is a little easier to type. For another example: +The former is a little easier to type. For another example:
 mlr --csv put '@running_sum += $quantity; filter @running_sum > 500' example.csv
diff --git a/docs/src/reference-dsl-filter-statements.md.in b/docs/src/reference-dsl-filter-statements.md.in
index c3acd41e1..7f363593e 100644
--- a/docs/src/reference-dsl-filter-statements.md.in
+++ b/docs/src/reference-dsl-filter-statements.md.in
@@ -10,7 +10,7 @@ GENMD-RUN-COMMAND
 mlr --csv put 'filter NR==2 || NR==3' example.csv
 GENMD-EOF
 
-The former, of course, is a little easier to type. For another example:
+The former is a little easier to type. For another example:
 
 GENMD-RUN-COMMAND
 mlr --csv put '@running_sum += $quantity; filter @running_sum > 500' example.csv
diff --git a/docs/src/reference-dsl-higher-order-functions.md b/docs/src/reference-dsl-higher-order-functions.md
index d40cfd1e7..6e41bd281 100644
--- a/docs/src/reference-dsl-higher-order-functions.md
+++ b/docs/src/reference-dsl-higher-order-functions.md
@@ -29,23 +29,15 @@ As of [Miller 6](new-in-miller-6.md) you can use
 intuitive operations on arrays and maps, as an alternative to things which
 would otherwise require for-loops.
 
-See also the [`get_keys`](reference-dsl-builtin-functions.md#get_keys) and
-[`get_values`](reference-dsl-builtin-functions.md#get_values) functions which,
-when given a map, return an array of its keys or an array of its values,
-respectively.
+See also the [`get_keys`](reference-dsl-builtin-functions.md#get_keys) and [`get_values`](reference-dsl-builtin-functions.md#get_values) functions which, when given a map, return an array of its keys or an array of its values, respectively.
 
 ## select
 
-The [`select`](reference-dsl-builtin-functions.md#select) function takes a map
-or array as its first argument and a function as second argument.  It includes
-each input element in the output if the function returns true.
+The [`select`](reference-dsl-builtin-functions.md#select) function takes a map or array as its first argument and a function as its second argument.  It includes each input element in the output if the function returns true.
 
-For arrays, that function should take one argument, for array element; for
-maps, it should take two, for map-element key and value. In either case it
-should return a boolean.
+For arrays, that function should take one argument, for an array element; for maps, it should take two, for a map element key and value. In either case, it should return a boolean.
 
-A perhaps helpful analogy: the `select` function is to arrays and maps as the
-[`filter`](reference-verbs.md#filter) is to records.
+A perhaps helpful analogy: the `select` function is to arrays and maps as the [`filter`](reference-verbs.md#filter) is to records.
 
 Array examples:
 
@@ -123,16 +115,11 @@ Values with last digit >= 5:
 
 ## apply
 
-The [`apply`](reference-dsl-builtin-functions.md#apply) function takes a map
-or array as its first argument and a function as second argument.  It applies
-the function to each element of the array or map.
+The [`apply`](reference-dsl-builtin-functions.md#apply) function takes a map or array as its first argument and a function as its second argument.  It applies the function to each element of the array or map.
 
-For arrays, the function should take one argument, for array element; it should
-return a new element. For maps, it should take two, for map-element key and
-value. It should return a new key-value pair (i.e. a single-entry map).
+For arrays, the function should take one argument, representing an array element, and return a new element. For maps, it should take two, for the map element key and value. It should return a new key-value pair (i.e., a single-entry map).
 
-A perhaps helpful analogy: the `apply` function is to arrays and maps as the
-[`put`](reference-verbs.md#put) is to records.
+A perhaps helpful analogy: the `apply` function is to arrays and maps as the [`put`](reference-verbs.md#put) is to records.
 
 Array examples:
 
@@ -232,17 +219,11 @@ Same, with upcased keys:
 
 ## reduce
 
-The [`reduce`](reference-dsl-builtin-functions.md#reduce) function takes a map
-or array as its first argument and a function as second argument. It accumulates entries into a final
-output -- for example, sum or product.
+The [`reduce`](reference-dsl-builtin-functions.md#reduce) function takes a map or array as its first argument and a function as its second argument. It accumulates entries into a final output, such as a sum or product.
 
-For arrays, the function should take two arguments, for accumulated value and
-array element; for maps, it should take four, for accumulated key and value
-and map-element key and value. In either case it should return the updated
-accumulator.
+For arrays, the function should take two arguments, for the accumulated value and the array element; for maps, it should take four, for the accumulated key and value, and the map-element key and value. In either case it should return the updated accumulator.
 
-The start value for the accumulator is the first element for arrays, or the
-first element's key-value pair for maps.
+The start value for the accumulator is the first element for arrays, or the first element's key-value pair for maps.
 
 
 mlr -n put '
@@ -370,10 +351,7 @@ String-join of values:
 
 ## fold
 
-The [`fold`](reference-dsl-builtin-functions.md#fold) function is the same as
-`reduce`, except that instead of the starting value for the accumulation being
-taken from the first entry of the array/map, you specify it as the third
-argument.
+The [`fold`](reference-dsl-builtin-functions.md#fold) function is the same as `reduce`, except that instead of the starting value for the accumulation being taken from the first entry of the array/map, you specify it as the third argument.
 
 
 mlr -n put '
@@ -469,22 +447,13 @@ Sum of values with fold and 1000000 initial value:
 
 ## sort
 
-The [`sort`](reference-dsl-builtin-functions.md#sort) function takes a map or
-array as its first argument, and it can take a function as second argument.
-Unlike the other higher-order functions, the second argument can be omitted
-when the natural ordering is desired -- ordered by array element for arrays, or by
-key for maps.
+The [`sort`](reference-dsl-builtin-functions.md#sort) function takes a map or array as its first argument, and it can take a function as its second argument. Unlike the other higher-order functions, the second argument can be omitted when the natural ordering is desired -- ordered by array element for arrays, or by key for maps.
 
-As a second option, character flags such as `r` for reverse or `c` for
-case-folded lexical sort can be supplied as the second argument.
+As a second option, character flags such as `r` for reverse or `c` for case-folded lexical sort can be supplied as the second argument.
 
 As a third option, a function can be supplied as the second argument.
 
-For arrays, that function should take two arguments `a` and `b`, returning a
-negative, zero, or positive number as `ab` respectively.
-For maps, the function should take four arguments `ak`, `av`, `bk`, and `bv`,
-again returning negative, zero, or positive, using `a` and `b`'s keys and
-values.
+For arrays, that function should take two arguments `a` and `b`, returning a negative, zero, or positive number as `ab` respectively. For maps, the function should take four arguments `ak`, `av`, `bk`, and `bv`, again returning negative, zero, or positive, using `a`'s and `b`'s keys and values.
 
 Array examples:
 
@@ -703,9 +672,7 @@ red    square   false 6 64    77.1991  9.5310
 
 ## Combined examples
 
-Using a paradigm from the [page on operating on all
-records](operating-on-all-records.md), we can retain a column from the input
-data as an array, then apply some higher-order functions to it:
+Using a paradigm from the [page on operating on all records](operating-on-all-records.md), we can retain a column from the input data as an array, then apply some higher-order functions to it:
 
 
 mlr --c2p cat example.csv
@@ -776,7 +743,7 @@ Sorted, then cubed, then summed:
 
 ### Remember return
 
-From other languages it's easy to accidentally write
+From other languages, it's easy to write accidentally
 
 
 mlr -n put 'end { print select([1,2,3,4,5], func (e) { e >= 3 })}'
@@ -833,7 +800,7 @@ but this does:
 2187
 
-### Built-in functions currently unsupported as arguments +### Built-in functions are currently unsupported as arguments [Built-in functions](reference-dsl-user-defined-functions.md) are, as of September 2021, a bit separate from [user-defined diff --git a/docs/src/reference-dsl-higher-order-functions.md.in b/docs/src/reference-dsl-higher-order-functions.md.in index ed044c006..de5ccbdf9 100644 --- a/docs/src/reference-dsl-higher-order-functions.md.in +++ b/docs/src/reference-dsl-higher-order-functions.md.in @@ -13,23 +13,15 @@ As of [Miller 6](new-in-miller-6.md) you can use intuitive operations on arrays and maps, as an alternative to things which would otherwise require for-loops. -See also the [`get_keys`](reference-dsl-builtin-functions.md#get_keys) and -[`get_values`](reference-dsl-builtin-functions.md#get_values) functions which, -when given a map, return an array of its keys or an array of its values, -respectively. +See also the [`get_keys`](reference-dsl-builtin-functions.md#get_keys) and [`get_values`](reference-dsl-builtin-functions.md#get_values) functions which, when given a map, return an array of its keys or an array of its values, respectively. ## select -The [`select`](reference-dsl-builtin-functions.md#select) function takes a map -or array as its first argument and a function as second argument. It includes -each input element in the output if the function returns true. +The [`select`](reference-dsl-builtin-functions.md#select) function takes a map or array as its first argument and a function as its second argument. It includes each input element in the output if the function returns true. -For arrays, that function should take one argument, for array element; for -maps, it should take two, for map-element key and value. In either case it -should return a boolean. +For arrays, that function should take one argument, for an array element; for maps, it should take two, for a map element key and value. In either case, it should return a boolean. -A perhaps helpful analogy: the `select` function is to arrays and maps as the -[`filter`](reference-verbs.md#filter) is to records. +A perhaps helpful analogy: the `select` function is to arrays and maps as the [`filter`](reference-verbs.md#filter) is to records. Array examples: @@ -75,16 +67,11 @@ GENMD-EOF ## apply -The [`apply`](reference-dsl-builtin-functions.md#apply) function takes a map -or array as its first argument and a function as second argument. It applies -the function to each element of the array or map. +The [`apply`](reference-dsl-builtin-functions.md#apply) function takes a map or array as its first argument and a function as its second argument. It applies the function to each element of the array or map. -For arrays, the function should take one argument, for array element; it should -return a new element. For maps, it should take two, for map-element key and -value. It should return a new key-value pair (i.e. a single-entry map). +For arrays, the function should take one argument, representing an array element, and return a new element. For maps, it should take two, for the map element key and value. It should return a new key-value pair (i.e., a single-entry map). -A perhaps helpful analogy: the `apply` function is to arrays and maps as the -[`put`](reference-verbs.md#put) is to records. +A perhaps helpful analogy: the `apply` function is to arrays and maps as the [`put`](reference-verbs.md#put) is to records. Array examples: @@ -134,17 +121,11 @@ GENMD-EOF ## reduce -The [`reduce`](reference-dsl-builtin-functions.md#reduce) function takes a map -or array as its first argument and a function as second argument. It accumulates entries into a final -output -- for example, sum or product. +The [`reduce`](reference-dsl-builtin-functions.md#reduce) function takes a map or array as its first argument and a function as its second argument. It accumulates entries into a final output, such as a sum or product. -For arrays, the function should take two arguments, for accumulated value and -array element; for maps, it should take four, for accumulated key and value -and map-element key and value. In either case it should return the updated -accumulator. +For arrays, the function should take two arguments, for the accumulated value and the array element; for maps, it should take four, for the accumulated key and value, and the map-element key and value. In either case it should return the updated accumulator. -The start value for the accumulator is the first element for arrays, or the -first element's key-value pair for maps. +The start value for the accumulator is the first element for arrays, or the first element's key-value pair for maps. GENMD-RUN-COMMAND mlr -n put ' @@ -213,10 +194,7 @@ GENMD-EOF ## fold -The [`fold`](reference-dsl-builtin-functions.md#fold) function is the same as -`reduce`, except that instead of the starting value for the accumulation being -taken from the first entry of the array/map, you specify it as the third -argument. +The [`fold`](reference-dsl-builtin-functions.md#fold) function is the same as `reduce`, except that instead of the starting value for the accumulation being taken from the first entry of the array/map, you specify it as the third argument. GENMD-RUN-COMMAND mlr -n put ' @@ -269,22 +247,13 @@ GENMD-EOF ## sort -The [`sort`](reference-dsl-builtin-functions.md#sort) function takes a map or -array as its first argument, and it can take a function as second argument. -Unlike the other higher-order functions, the second argument can be omitted -when the natural ordering is desired -- ordered by array element for arrays, or by -key for maps. +The [`sort`](reference-dsl-builtin-functions.md#sort) function takes a map or array as its first argument, and it can take a function as its second argument. Unlike the other higher-order functions, the second argument can be omitted when the natural ordering is desired -- ordered by array element for arrays, or by key for maps. -As a second option, character flags such as `r` for reverse or `c` for -case-folded lexical sort can be supplied as the second argument. +As a second option, character flags such as `r` for reverse or `c` for case-folded lexical sort can be supplied as the second argument. As a third option, a function can be supplied as the second argument. -For arrays, that function should take two arguments `a` and `b`, returning a -negative, zero, or positive number as `ab` respectively. -For maps, the function should take four arguments `ak`, `av`, `bk`, and `bv`, -again returning negative, zero, or positive, using `a` and `b`'s keys and -values. +For arrays, that function should take two arguments `a` and `b`, returning a negative, zero, or positive number as `ab` respectively. For maps, the function should take four arguments `ak`, `av`, `bk`, and `bv`, again returning negative, zero, or positive, using `a`'s and `b`'s keys and values. Array examples: @@ -379,9 +348,7 @@ GENMD-EOF ## Combined examples -Using a paradigm from the [page on operating on all -records](operating-on-all-records.md), we can retain a column from the input -data as an array, then apply some higher-order functions to it: +Using a paradigm from the [page on operating on all records](operating-on-all-records.md), we can retain a column from the input data as an array, then apply some higher-order functions to it: GENMD-RUN-COMMAND mlr --c2p cat example.csv @@ -426,7 +393,7 @@ GENMD-EOF ### Remember return -From other languages it's easy to accidentally write +From other languages, it's easy to write accidentally GENMD-RUN-COMMAND-TOLERATING-ERROR mlr -n put 'end { print select([1,2,3,4,5], func (e) { e >= 3 })}' @@ -465,7 +432,7 @@ mlr -n put ' ' GENMD-EOF -### Built-in functions currently unsupported as arguments +### Built-in functions are currently unsupported as arguments [Built-in functions](reference-dsl-user-defined-functions.md) are, as of September 2021, a bit separate from [user-defined diff --git a/docs/src/reference-dsl-operators.md b/docs/src/reference-dsl-operators.md index 1b1173444..cdba1ca55 100644 --- a/docs/src/reference-dsl-operators.md +++ b/docs/src/reference-dsl-operators.md @@ -22,7 +22,7 @@ Operators are listed on the [DSL built-in functions page](reference-dsl-builtin- ## Operator precedence -Operators are listed in order of decreasing precedence, highest first. +Operators are listed in order of decreasing precedence, from highest to lowest. | Operators | Associativity | |-------------------------------|---------------| @@ -46,14 +46,13 @@ Operators are listed in order of decreasing precedence, highest first. | `? :` | right to left | | `=` | N/A for Miller (there is no $a=$b=$c) | -See also the [section on parsing and operator precedence in the REPL](repl.md#parsing-and-operator-precedence) -for information on how to examine operator precedence interactively. +See also the [section on parsing and operator precedence in the REPL](repl.md#parsing-and-operator-precedence) for information on how to examine operator precedence interactively. ## Operator and function semantics * Functions are often pass-throughs straight to the system-standard Go libraries. -* The [`min`](reference-dsl-builtin-functions.md#min) and [`max`](reference-dsl-builtin-functions.md#max) functions are different from other multi-argument functions which return null if any of their inputs are null: for [`min`](reference-dsl-builtin-functions.md#min) and [`max`](reference-dsl-builtin-functions.md#max), by contrast, if one argument is absent-null, the other is returned. Empty-null loses min or max against numeric or boolean; empty-null is less than any other string. +* The [`min`](reference-dsl-builtin-functions.md#min) and [`max`](reference-dsl-builtin-functions.md#max) functions are different from other multi-argument functions, which return null if any of their inputs are null: for [`min`](reference-dsl-builtin-functions.md#min) and [`max`](reference-dsl-builtin-functions.md#max), by contrast, if one argument is absent-null, the other is returned. Empty-null loses min or max against numeric or boolean; empty-null is less than any other string. * Symmetrically with respect to the bitwise OR, AND, and XOR operators [`|`](reference-dsl-builtin-functions.md#bitwise-or), @@ -71,7 +70,7 @@ for information on how to examine operator precedence interactively. The main use for the `.` operator is for string concatenation: `"abc" . "def"` is `"abc.def"`. -However, in Miller 6 it has optional use for map traversal. Example: +However, in Miller 6, it has an optional use for map traversal. Example:
 cat data/server-log.json
@@ -146,7 +145,7 @@ This also works on the left-hand sides of assignment statements:
 
 A few caveats:
 
-* This is why `.` has higher precedece than `+` in the table above -- in Miller 5 and below, where `.` was only used for concatenation, it had the same precedence as `+`. So you can now do this:
+* This is why `.` has higher precedence than `+` in the table above -- in Miller 5 and below, where `.` was only used for concatenation, it had the same precedence as `+`. So you can now do this:
 
 
 mlr --json --from data/server-log.json put -q '
diff --git a/docs/src/reference-dsl-operators.md.in b/docs/src/reference-dsl-operators.md.in
index 73a92d9e1..a4b0322f4 100644
--- a/docs/src/reference-dsl-operators.md.in
+++ b/docs/src/reference-dsl-operators.md.in
@@ -6,7 +6,7 @@ Operators are listed on the [DSL built-in functions page](reference-dsl-builtin-
 
 ## Operator precedence
 
-Operators are listed in order of decreasing precedence, highest first.
+Operators are listed in order of decreasing precedence, from highest to lowest.
 
 | Operators                     | Associativity |
 |-------------------------------|---------------|
@@ -30,14 +30,13 @@ Operators are listed in order of decreasing precedence, highest first.
 | `? :`                         | right to left |
 | `=`                           |  N/A for Miller (there is no $a=$b=$c) |
 
-See also the [section on parsing and operator precedence in the REPL](repl.md#parsing-and-operator-precedence)
-for information on how to examine operator precedence interactively.
+See also the [section on parsing and operator precedence in the REPL](repl.md#parsing-and-operator-precedence) for information on how to examine operator precedence interactively.
 
 ## Operator and function semantics
 
 * Functions are often pass-throughs straight to the system-standard Go libraries.
 
-* The [`min`](reference-dsl-builtin-functions.md#min) and [`max`](reference-dsl-builtin-functions.md#max) functions are different from other multi-argument functions which return null if any of their inputs are null: for [`min`](reference-dsl-builtin-functions.md#min) and [`max`](reference-dsl-builtin-functions.md#max), by contrast, if one argument is absent-null, the other is returned. Empty-null loses min or max against numeric or boolean; empty-null is less than any other string.
+* The [`min`](reference-dsl-builtin-functions.md#min) and [`max`](reference-dsl-builtin-functions.md#max) functions are different from other multi-argument functions, which return null if any of their inputs are null: for [`min`](reference-dsl-builtin-functions.md#min) and [`max`](reference-dsl-builtin-functions.md#max), by contrast, if one argument is absent-null, the other is returned. Empty-null loses min or max against numeric or boolean; empty-null is less than any other string.
 
 * Symmetrically with respect to the bitwise OR, AND, and XOR operators
 [`|`](reference-dsl-builtin-functions.md#bitwise-or),
@@ -55,7 +54,7 @@ for information on how to examine operator precedence interactively.
 
 The main use for the `.` operator is for string concatenation: `"abc" . "def"` is `"abc.def"`.
 
-However, in Miller 6 it has optional use for map traversal. Example:
+However, in Miller 6, it has an optional use for map traversal. Example:
 
 GENMD-RUN-COMMAND
 cat data/server-log.json
@@ -78,7 +77,7 @@ GENMD-EOF
 
 A few caveats:
 
-* This is why `.` has higher precedece than `+` in the table above -- in Miller 5 and below, where `.` was only used for concatenation, it had the same precedence as `+`. So you can now do this:
+* This is why `.` has higher precedence than `+` in the table above -- in Miller 5 and below, where `.` was only used for concatenation, it had the same precedence as `+`. So you can now do this:
 
 GENMD-RUN-COMMAND
 mlr --json --from data/server-log.json put -q '
diff --git a/docs/src/reference-dsl-output-statements.md b/docs/src/reference-dsl-output-statements.md
index 0984b1fd5..cca9fc4ea 100644
--- a/docs/src/reference-dsl-output-statements.md
+++ b/docs/src/reference-dsl-output-statements.md
@@ -22,15 +22,15 @@ You can **output** variable-values or expressions in **five ways**:
 
 * Use **emit1**/**emit**/**emitp**/**emitf** to send out-of-stream variables' current values to the output record stream, e.g.  `@sum += $x; emit1 @sum` which produces an extra record such as `sum=3.1648382`. These records, just like records from input file(s), participate in downstream [then-chaining](reference-main-then-chaining.md) to other verbs.
 
-* Use the **print** or **eprint** keywords which immediately print an expression *directly to standard output or standard error*, respectively. Note that `dump`, `edump`, `print`, and `eprint` don't output records which participate in `then`-chaining; rather, they're just immediate prints to stdout/stderr. The `printn` and `eprintn` keywords are the same except that they don't print final newlines. Additionally, you can print to a specified file instead of stdout/stderr.
+* Use the **print** or **eprint** keywords which immediately print an expression *directly to standard output or standard error*, respectively. Note that `dump`, `edump`, `print`, and `eprint` don't output records that participate in `then`-chaining; rather, they're just immediate prints to stdout/stderr. The `printn` and `eprintn` keywords are the same except that they don't print final newlines. Additionally, you can print to a specified file instead of stdout/stderr.
 
 * Use the **dump** or **edump** keywords, which *immediately print all out-of-stream variables as a JSON data structure to the standard output or standard error* (respectively).
 
-* Use **tee** which formats the current stream record (not just an arbitrary string as with **print**) to a specific file.
+* Use **tee**, which formats the current stream record (not just an arbitrary string as with **print**) to a specific file.
 
-For the first two options you are populating the output-records stream which feeds into the next verb in a `then`-chain (if any), or which otherwise is formatted for output using `--o...` flags.
+For the first two options, you are populating the output-records stream which feeds into the next verb in a `then`-chain (if any), or which otherwise is formatted for output using `--o...` flags.
 
-For the last three options you are sending output directly to standard output, standard error, or a file.
+For the last three options, you are sending output directly to standard output, standard error, or a file.
 
 ## Print statements
 
@@ -38,7 +38,7 @@ The `print` statement is perhaps self-explanatory, but with a few light caveats:
 
 * There are four variants: `print` goes to stdout with final newline, `printn` goes to stdout without final newline (you can include one using "\n" in your output string), `eprint` goes to stderr with final newline, and `eprintn` goes to stderr without final newline.
 
-* Output goes directly to stdout/stderr, respectively: data produced this way do not go downstream to the next verb in a `then`-chain. (Use `emit` for that.)
+* Output goes directly to stdout/stderr, respectively: data produced this way does not go downstream to the next verb in a `then`-chain. (Use `emit` for that.)
 
 * Print statements are for strings (`print "hello"`), or things which can be made into strings: numbers (`print 3`, `print $a + $b`), or concatenations thereof (`print "a + b = " . ($a + $b)`). Maps (in `$*`, map-valued out-of-stream or local variables, and map literals) as well as arrays are printed as JSON.
 
@@ -62,9 +62,9 @@ The `dump` statement is for printing expressions, including maps, directly to st
 
 * There are two variants: `dump` prints to stdout; `edump` prints to stderr.
 
-* Output goes directly to stdout/stderr, respectively: data produced this way do not go downstream to the next verb in a `then`-chain. (Use `emit` for that.)
+* Output goes directly to stdout/stderr, respectively: data produced this way does not go downstream to the next verb in a `then`-chain. (Use `emit` for that.)
 
-* You can use `dump` to output single strings, numbers, or expressions including map-valued data. Map-valued data are printed as JSON.
+* You can use `dump` to output single strings, numbers, or expressions including map-valued data. Map-valued data is printed as JSON.
 
 * If you use `dump` (or `edump`) with no arguments, you get a JSON structure representing the current values of all out-of-stream variables.
 
@@ -76,7 +76,7 @@ The `dump` statement is for printing expressions, including maps, directly to st
 
 Records produced by a `mlr put` go downstream to the next verb in your `then`-chain, if any, or otherwise to standard output.  If you want to additionally copy out records to files, you can do that using `tee`.
 
-The syntax is, by example:
+The syntax is, for example:
 
 
 mlr --from myfile.dat put 'tee > "tap.dat", $*' then sort -n index
@@ -84,8 +84,7 @@ The syntax is, by example:
 
 First is `tee >`, then the filename expression (which can be an expression such as `"tap.".$a.".dat"`), then a comma, then `$*`. (Nothing else but `$*` is teeable.)
 
-You can also write to a variable file name -- for example, you can split a
-single file into multiple ones on field names:
+You can also write to a variable file name -- for example, you can split a single file into multiple ones on field names:
 
 
 mlr --csv cat example.csv
@@ -324,26 +323,12 @@ There are four variants: `emit1`, `emitf`, `emit`, and `emitp`. These are used
 to insert new records into the record stream -- or, optionally, redirect them
 to files.
 
-Keep in mind that out-of-stream variables are a nested, multi-level
-[map](reference-main-maps.md) (directly viewable as JSON using `dump`), while
-Miller record values are as well during processing -- but records may be
-flattened down for output to tabular formats. See the page [Flatten/unflatten:
-JSON vs. tabular formats](flatten-unflatten.md) for more information.
+Keep in mind that out-of-stream variables are a nested, multi-level [map](reference-main-maps.md) (directly viewable as JSON using `dump`), while Miller record values are as well during processing -- but records may be flattened down for output to tabular formats. See the page [Flatten/unflatten: JSON vs. tabular formats](flatten-unflatten.md) for more information.
 
-* You can use `emit1` to emit any map-valued expression, including `$*`,
-  map-valued out-of-stream variables, the entire out-of-stream-variable
-  collection `@*`, map-valued local variables, map literals, or map-valued
-  function return values.
-* For `emit`, `emitp`, and `emitf`, you can emit map-valued local variables,
-  map-valued field attributes (with `$`), map-va out-of-stream variables (with
-  `@`), `$*`, `@*`, or map literals (with outermost `{...}`) -- but not arbitrary
-  expressions which evaluate to map (such as function return values).
+* You can use `emit1` to emit any map-valued expression, including `$*`, map-valued out-of-stream variables, the entire out-of-stream-variable collection `@*`, map-valued local variables, map literals, or map-valued function return values.
+* For `emit`, `emitp`, and `emitf`, you can emit map-valued local variables, map-valued field attributes (with `$`), map-va out-of-stream variables (with `@`), `$*`, `@*`, or map literals (with outermost `{...}`) -- but not arbitrary expressions which evaluate to map (such as function return values).
 
-The reason for this is part historical and part technical. As we'll see below,
-you can do lots of syntactical things with `emit`, `emitp`, and `emitf`,
-including printing them side-by-side, index them, redirect the output to files,
-etc. What this means syntactically is that Miller's parser needs to handle all
-sorts of commas, parentheses, and so on:
+The reason for this is partly historical and partly technical. As we'll see below, you can do lots of syntactical things with `emit`, `emitp`, and `emitf`, including printing them side-by-side, indexing them, redirecting the output to files, etc. What this means syntactically is that Miller's parser needs to handle all sorts of commas, parentheses, and so on:
 
 
   emitf @count, @sum
@@ -352,12 +337,7 @@ sorts of commas, parentheses, and so on:
   # etc
 
-When we try to allow `emitf`/`emit`/`emitp` to handle arbitrary map-valued -expressions, like `mapexcept($*, mymap)` and so on, this inserts more syntactic -complexity in terms of commas, parentheses, and so on. The technical term is -_LR-1 shift-reduce conflicts_, but we can simply think of this in terms of the -parser not being able to efficiently disambiguate all the punctuational -opportunities. +When we try to allow `emitf`/`emit`/`emitp` to handle arbitrary map-valued expressions, like `mapexcept($*, mymap)` and so on, this inserts more syntactic complexity in terms of commas, parentheses, and so on. The technical term is _LR-1 shift-reduce conflicts_, but we can think of this in terms of the parser being unable to efficiently disambiguate all the punctuational opportunities. So, `emit1` can handle syntactic richness in the one thing being emitted; `emitf`, `emit`, and `emitp` can handle syntactic richness in the side-by-side @@ -365,7 +345,7 @@ placement, indexing, and redirection. (Mnemonic: If all you want is to insert a new record into the record stream, `emit1` is probably the _one_ you want.) -What this means is that if you want to emit an expression which evaluates to a map, you can do quite simply +What this means is that if you want to emit an expression that evaluates to a map, you can do it quite simply:
 mlr --c2p --from example.csv put -q '
@@ -386,7 +366,7 @@ id color  shape    flag  k  index quantity rate
 10 purple square   false 10 91    72.3735  8.2430
 
-And if you want indexing, redirects, etc., just assign to a temporary variable and use one of the other emit variants: +And if you want indexing, redirects, etc., just assign to a temporary variable and use one of the other `emit` variants:
 mlr --c2p --from example.csv put -q '
@@ -410,7 +390,7 @@ id color  shape    flag  k  index quantity rate
 
 ## Emitf statements
 
-Use **emitf** to output several out-of-stream variables side-by-side in the same output record. For `emitf` these mustn't have indexing using `@name[...]`. Example:
+Use **emitf** to output several out-of-stream variables side-by-side in the same output record. For `emitf`, these mustn't have indexing using `@name[...]`. Example:
 
 
 mlr put -q '
@@ -426,7 +406,7 @@ count=5,x_sum=2.26476,y_sum=2.585083
 
 ## Emit statements
 
-Use **emit** to output an out-of-stream variable. If it's non-indexed you'll get a simple key-value pair:
+Use **emit** to output an out-of-stream variable. If it's non-indexed, you'll get a simple key-value pair:
 
 
 cat data/small
@@ -455,7 +435,7 @@ a=wye,b=pan,i=5,x=0.573288,y=0.863624
 sum=2.26476
 
-If it's indexed then use as many names after `emit` as there are indices: +If it's indexed, then use as many names after `emit` as there are indices:
 mlr put -q '@sum[$a] += $x; end { dump }' data/small
@@ -624,8 +604,7 @@ sum.wye.wye 0.204603
 sum.wye.pan 0.573288
 
-Use **--flatsep** to specify the character which joins multilevel -keys for `emitp` (it defaults to a colon): +Use **--flatsep** to specify the character that joins multilevel keys for `emitp` (it defaults to a colon):
 mlr --flatsep / put -q '@sum[$a][$b] += $x; end { emitp @sum, "a" }' data/small
@@ -703,11 +682,11 @@ hat hat 182.8535323148762  381     0.47993053101017374
 hat pan 168.5538067327806  363     0.4643355557376876
 
-What this does is walk through the first out-of-stream variable (`@x_sum` in this example) as usual, then for each keylist found (e.g. `pan,wye`), include the values for the remaining out-of-stream variables (here, `@x_count` and `@x_mean`). You should use this when all out-of-stream variables in the emit statement have **the same shape and the same keylists**. +What this does is walk through the first out-of-stream variable (`@x_sum` in this example) as usual, then for each keylist found (e.g., `pan,wye`), include the values for the remaining out-of-stream variables (here, `@x_count` and `@x_mean`). You should use this when all out-of-stream variables in the emit statement have **the same shape and the same keylists**. ## Emit-all statements -Use **emit all** (or `emit @*` which is synonymous) to output all out-of-stream variables. You can use the following idiom to get various accumulators output side-by-side (reminiscent of `mlr stats1`): +Use **emit all** (or `emit @*`, which is synonymous) to output all out-of-stream variables. You can use the following idiom to get various accumulators' output side-by-side (reminiscent of `mlr stats1`):
 mlr --from data/small --opprint put -q '
diff --git a/docs/src/reference-dsl-output-statements.md.in b/docs/src/reference-dsl-output-statements.md.in
index 3b42c2bc7..bfc142209 100644
--- a/docs/src/reference-dsl-output-statements.md.in
+++ b/docs/src/reference-dsl-output-statements.md.in
@@ -6,15 +6,15 @@ You can **output** variable-values or expressions in **five ways**:
 
 * Use **emit1**/**emit**/**emitp**/**emitf** to send out-of-stream variables' current values to the output record stream, e.g.  `@sum += $x; emit1 @sum` which produces an extra record such as `sum=3.1648382`. These records, just like records from input file(s), participate in downstream [then-chaining](reference-main-then-chaining.md) to other verbs.
 
-* Use the **print** or **eprint** keywords which immediately print an expression *directly to standard output or standard error*, respectively. Note that `dump`, `edump`, `print`, and `eprint` don't output records which participate in `then`-chaining; rather, they're just immediate prints to stdout/stderr. The `printn` and `eprintn` keywords are the same except that they don't print final newlines. Additionally, you can print to a specified file instead of stdout/stderr.
+* Use the **print** or **eprint** keywords which immediately print an expression *directly to standard output or standard error*, respectively. Note that `dump`, `edump`, `print`, and `eprint` don't output records that participate in `then`-chaining; rather, they're just immediate prints to stdout/stderr. The `printn` and `eprintn` keywords are the same except that they don't print final newlines. Additionally, you can print to a specified file instead of stdout/stderr.
 
 * Use the **dump** or **edump** keywords, which *immediately print all out-of-stream variables as a JSON data structure to the standard output or standard error* (respectively).
 
-* Use **tee** which formats the current stream record (not just an arbitrary string as with **print**) to a specific file.
+* Use **tee**, which formats the current stream record (not just an arbitrary string as with **print**) to a specific file.
 
-For the first two options you are populating the output-records stream which feeds into the next verb in a `then`-chain (if any), or which otherwise is formatted for output using `--o...` flags.
+For the first two options, you are populating the output-records stream which feeds into the next verb in a `then`-chain (if any), or which otherwise is formatted for output using `--o...` flags.
 
-For the last three options you are sending output directly to standard output, standard error, or a file.
+For the last three options, you are sending output directly to standard output, standard error, or a file.
 
 ## Print statements
 
@@ -22,7 +22,7 @@ The `print` statement is perhaps self-explanatory, but with a few light caveats:
 
 * There are four variants: `print` goes to stdout with final newline, `printn` goes to stdout without final newline (you can include one using "\n" in your output string), `eprint` goes to stderr with final newline, and `eprintn` goes to stderr without final newline.
 
-* Output goes directly to stdout/stderr, respectively: data produced this way do not go downstream to the next verb in a `then`-chain. (Use `emit` for that.)
+* Output goes directly to stdout/stderr, respectively: data produced this way does not go downstream to the next verb in a `then`-chain. (Use `emit` for that.)
 
 * Print statements are for strings (`print "hello"`), or things which can be made into strings: numbers (`print 3`, `print $a + $b`), or concatenations thereof (`print "a + b = " . ($a + $b)`). Maps (in `$*`, map-valued out-of-stream or local variables, and map literals) as well as arrays are printed as JSON.
 
@@ -46,9 +46,9 @@ The `dump` statement is for printing expressions, including maps, directly to st
 
 * There are two variants: `dump` prints to stdout; `edump` prints to stderr.
 
-* Output goes directly to stdout/stderr, respectively: data produced this way do not go downstream to the next verb in a `then`-chain. (Use `emit` for that.)
+* Output goes directly to stdout/stderr, respectively: data produced this way does not go downstream to the next verb in a `then`-chain. (Use `emit` for that.)
 
-* You can use `dump` to output single strings, numbers, or expressions including map-valued data. Map-valued data are printed as JSON.
+* You can use `dump` to output single strings, numbers, or expressions including map-valued data. Map-valued data is printed as JSON.
 
 * If you use `dump` (or `edump`) with no arguments, you get a JSON structure representing the current values of all out-of-stream variables.
 
@@ -60,7 +60,7 @@ The `dump` statement is for printing expressions, including maps, directly to st
 
 Records produced by a `mlr put` go downstream to the next verb in your `then`-chain, if any, or otherwise to standard output.  If you want to additionally copy out records to files, you can do that using `tee`.
 
-The syntax is, by example:
+The syntax is, for example:
 
 GENMD-CARDIFY-HIGHLIGHT-ONE
 mlr --from myfile.dat put 'tee > "tap.dat", $*' then sort -n index
@@ -68,8 +68,7 @@ GENMD-EOF
 
 First is `tee >`, then the filename expression (which can be an expression such as `"tap.".$a.".dat"`), then a comma, then `$*`. (Nothing else but `$*` is teeable.)
 
-You can also write to a variable file name -- for example, you can split a
-single file into multiple ones on field names:
+You can also write to a variable file name -- for example, you can split a single file into multiple ones on field names:
 
 GENMD-RUN-COMMAND
 mlr --csv cat example.csv
@@ -135,26 +134,12 @@ There are four variants: `emit1`, `emitf`, `emit`, and `emitp`. These are used
 to insert new records into the record stream -- or, optionally, redirect them
 to files.
 
-Keep in mind that out-of-stream variables are a nested, multi-level
-[map](reference-main-maps.md) (directly viewable as JSON using `dump`), while
-Miller record values are as well during processing -- but records may be
-flattened down for output to tabular formats. See the page [Flatten/unflatten:
-JSON vs. tabular formats](flatten-unflatten.md) for more information.
+Keep in mind that out-of-stream variables are a nested, multi-level [map](reference-main-maps.md) (directly viewable as JSON using `dump`), while Miller record values are as well during processing -- but records may be flattened down for output to tabular formats. See the page [Flatten/unflatten: JSON vs. tabular formats](flatten-unflatten.md) for more information.
 
-* You can use `emit1` to emit any map-valued expression, including `$*`,
-  map-valued out-of-stream variables, the entire out-of-stream-variable
-  collection `@*`, map-valued local variables, map literals, or map-valued
-  function return values.
-* For `emit`, `emitp`, and `emitf`, you can emit map-valued local variables,
-  map-valued field attributes (with `$`), map-va out-of-stream variables (with
-  `@`), `$*`, `@*`, or map literals (with outermost `{...}`) -- but not arbitrary
-  expressions which evaluate to map (such as function return values).
+* You can use `emit1` to emit any map-valued expression, including `$*`, map-valued out-of-stream variables, the entire out-of-stream-variable collection `@*`, map-valued local variables, map literals, or map-valued function return values.
+* For `emit`, `emitp`, and `emitf`, you can emit map-valued local variables, map-valued field attributes (with `$`), map-va out-of-stream variables (with `@`), `$*`, `@*`, or map literals (with outermost `{...}`) -- but not arbitrary expressions which evaluate to map (such as function return values).
 
-The reason for this is part historical and part technical. As we'll see below,
-you can do lots of syntactical things with `emit`, `emitp`, and `emitf`,
-including printing them side-by-side, index them, redirect the output to files,
-etc. What this means syntactically is that Miller's parser needs to handle all
-sorts of commas, parentheses, and so on:
+The reason for this is partly historical and partly technical. As we'll see below, you can do lots of syntactical things with `emit`, `emitp`, and `emitf`, including printing them side-by-side, indexing them, redirecting the output to files, etc. What this means syntactically is that Miller's parser needs to handle all sorts of commas, parentheses, and so on:
 
 GENMD-CARDIFY
   emitf @count, @sum
@@ -163,12 +148,7 @@ GENMD-CARDIFY
   # etc
 GENMD-EOF
 
-When we try to allow `emitf`/`emit`/`emitp` to handle arbitrary map-valued
-expressions, like `mapexcept($*, mymap)` and so on, this inserts more syntactic
-complexity in terms of commas, parentheses, and so on. The technical term is
-_LR-1 shift-reduce conflicts_, but we can simply think of this in terms of the
-parser not being able to efficiently disambiguate all the punctuational
-opportunities.
+When we try to allow `emitf`/`emit`/`emitp` to handle arbitrary map-valued expressions, like `mapexcept($*, mymap)` and so on, this inserts more syntactic complexity in terms of commas, parentheses, and so on. The technical term is _LR-1 shift-reduce conflicts_, but we can think of this in terms of the parser being unable to efficiently disambiguate all the punctuational opportunities.
 
 So, `emit1` can handle syntactic richness in the one thing being emitted;
 `emitf`, `emit`, and `emitp` can handle syntactic richness in the side-by-side
@@ -176,7 +156,7 @@ placement, indexing, and redirection.
 
 (Mnemonic: If all you want is to insert a new record into the record stream, `emit1` is probably the _one_ you want.)
 
-What this means is that if you want to emit an expression which evaluates to a map, you can do quite simply
+What this means is that if you want to emit an expression that evaluates to a map, you can do it quite simply:
 
 GENMD-RUN-COMMAND
 mlr --c2p --from example.csv put -q '
@@ -184,7 +164,7 @@ mlr --c2p --from example.csv put -q '
 '
 GENMD-EOF
 
-And if you want indexing, redirects, etc., just assign to a temporary variable and use one of the other emit variants:
+And if you want indexing, redirects, etc., just assign to a temporary variable and use one of the other `emit` variants:
 
 GENMD-RUN-COMMAND
 mlr --c2p --from example.csv put -q '
@@ -195,7 +175,7 @@ GENMD-EOF
 
 ## Emitf statements
 
-Use **emitf** to output several out-of-stream variables side-by-side in the same output record. For `emitf` these mustn't have indexing using `@name[...]`. Example:
+Use **emitf** to output several out-of-stream variables side-by-side in the same output record. For `emitf`, these mustn't have indexing using `@name[...]`. Example:
 
 GENMD-RUN-COMMAND
 mlr put -q '
@@ -208,7 +188,7 @@ GENMD-EOF
 
 ## Emit statements
 
-Use **emit** to output an out-of-stream variable. If it's non-indexed you'll get a simple key-value pair:
+Use **emit** to output an out-of-stream variable. If it's non-indexed, you'll get a simple key-value pair:
 
 GENMD-RUN-COMMAND
 cat data/small
@@ -222,7 +202,7 @@ GENMD-RUN-COMMAND
 mlr put -q '@sum += $x; end { emit @sum }' data/small
 GENMD-EOF
 
-If it's indexed then use as many names after `emit` as there are indices:
+If it's indexed, then use as many names after `emit` as there are indices:
 
 GENMD-RUN-COMMAND
 mlr put -q '@sum[$a] += $x; end { dump }' data/small
@@ -277,8 +257,7 @@ GENMD-RUN-COMMAND
 mlr --oxtab put -q '@sum[$a][$b] += $x; end { emitp @sum }' data/small
 GENMD-EOF
 
-Use **--flatsep** to specify the character which joins multilevel
-keys for `emitp` (it defaults to a colon):
+Use **--flatsep** to specify the character that joins multilevel keys for `emitp` (it defaults to a colon):
 
 GENMD-RUN-COMMAND
 mlr --flatsep / put -q '@sum[$a][$b] += $x; end { emitp @sum, "a" }' data/small
@@ -313,11 +292,11 @@ mlr --from data/medium --opprint put -q '
 '
 GENMD-EOF
 
-What this does is walk through the first out-of-stream variable (`@x_sum` in this example) as usual, then for each keylist found (e.g. `pan,wye`), include the values for the remaining out-of-stream variables (here, `@x_count` and `@x_mean`). You should use this when all out-of-stream variables in the emit statement have **the same shape and the same keylists**.
+What this does is walk through the first out-of-stream variable (`@x_sum` in this example) as usual, then for each keylist found (e.g., `pan,wye`), include the values for the remaining out-of-stream variables (here, `@x_count` and `@x_mean`). You should use this when all out-of-stream variables in the emit statement have **the same shape and the same keylists**.
 
 ## Emit-all statements
 
-Use **emit all** (or `emit @*` which is synonymous) to output all out-of-stream variables. You can use the following idiom to get various accumulators output side-by-side (reminiscent of `mlr stats1`):
+Use **emit all** (or `emit @*`, which is synonymous) to output all out-of-stream variables. You can use the following idiom to get various accumulators' output side-by-side (reminiscent of `mlr stats1`):
 
 GENMD-RUN-COMMAND
 mlr --from data/small --opprint put -q '
diff --git a/docs/src/reference-dsl-syntax.md b/docs/src/reference-dsl-syntax.md
index f2a8b45cb..9b51cdd61 100644
--- a/docs/src/reference-dsl-syntax.md
+++ b/docs/src/reference-dsl-syntax.md
@@ -63,7 +63,7 @@ hat wye 10002 0.321507044286237609 0.568893318795083758 5  9  4   2       data/s
 pan zee 10003 0.272054845593895200 0.425789896597056627 5  10 5   2       data/small2
 
-Anything from a `#` character to end of line is a code comment. +Anything from a `#` character to the end of the line is a code comment.
 mlr --opprint filter '($x > 0.5 && $y < 0.5) || ($x < 0.5 && $y > 0.5)' \
@@ -147,11 +147,11 @@ a=eks,b=wye,i=4,x=0.381399,y=0.134188,xy=0.40431623334340655
 a=wye,b=pan,i=5,x=0.573288,y=0.863624,xy=1.036583592538489
 
-A suggested use-case here is defining functions in files, and calling them from command-line expressions. +A suggested use case here is defining functions in files and calling them from command-line expressions. -Another suggested use-case is putting default parameter values in files, e.g. using `begin{@count=is_present(@count)?@count:10}` in the file, where you can precede that using `begin{@count=40}` using `-e`. +Another suggested use case is putting default parameter values in files, e.g., using `begin{@count=is_present(@count)?@count:10}` in the file, where you can precede that using `begin{@count=40}` using `-e`. -Moreover, you can have one or more `-f` expressions (maybe one function per file, for example) and one or more `-e` expressions on the command line. If you mix `-f` and `-e` then the expressions are evaluated in the order encountered. +Moreover, you can have one or more `-f` expressions (maybe one function per file, for example) and one or more `-e` expressions on the command line. If you mix `-f` and `-e`, then the expressions are evaluated in the order encountered. ## Semicolons, commas, newlines, and curly braces @@ -180,7 +180,7 @@ x=1,y=2,3=,4=,5=,6=,7=,8=,9=,10=,foo=bar x=1,y=2,3=,4=,5=,6=,7=,8=,9=,10=,foo=bar
-Semicolons are required between statements even if those statements are on separate lines. **Newlines** are for your convenience but have no syntactic meaning: line endings do not terminate statements. For example, adjacent assignment statements must be separated by semicolons even if those statements are on separate lines: +Semicolons are required between statements, even if those statements are on separate lines. **Newlines** are for your convenience but have no syntactic meaning: line endings do not terminate statements. For example, adjacent assignment statements must be separated by semicolons even if those statements are on separate lines:
 mlr put '
diff --git a/docs/src/reference-dsl-syntax.md.in b/docs/src/reference-dsl-syntax.md.in
index aa918c944..46e71b81f 100644
--- a/docs/src/reference-dsl-syntax.md.in
+++ b/docs/src/reference-dsl-syntax.md.in
@@ -21,7 +21,7 @@ mlr --opprint put '
 ' data/small data/small2
 GENMD-EOF
 
-Anything from a `#` character to end of line is a code comment.
+Anything from a `#` character to the end of the line is a code comment.
 
 GENMD-RUN-COMMAND
 mlr --opprint filter '($x > 0.5 && $y < 0.5) || ($x < 0.5 && $y > 0.5)' \
@@ -62,11 +62,11 @@ GENMD-RUN-COMMAND
 mlr --from data/small put -f data/fe-example-4.mlr -e '$xy = f($x, $y)'
 GENMD-EOF
 
-A suggested use-case here is defining functions in files, and calling them from command-line expressions.
+A suggested use case here is defining functions in files and calling them from command-line expressions.
 
-Another suggested use-case is putting default parameter values in files, e.g. using `begin{@count=is_present(@count)?@count:10}` in the file, where you can precede that using `begin{@count=40}` using `-e`.
+Another suggested use case is putting default parameter values in files, e.g., using `begin{@count=is_present(@count)?@count:10}` in the file, where you can precede that using `begin{@count=40}` using `-e`.
 
-Moreover, you can have one or more `-f` expressions (maybe one function per file, for example) and one or more `-e` expressions on the command line.  If you mix `-f` and `-e` then the expressions are evaluated in the order encountered.
+Moreover, you can have one or more `-f` expressions (maybe one function per file, for example) and one or more `-e` expressions on the command line.  If you mix `-f` and `-e`, then the expressions are evaluated in the order encountered.
 
 ## Semicolons, commas, newlines, and curly braces
 
@@ -84,7 +84,7 @@ GENMD-RUN-COMMAND
 echo x=1,y=2 | mlr put 'while (NF < 10) { $[NF+1] = ""}; $foo = "bar"'
 GENMD-EOF
 
-Semicolons are required between statements even if those statements are on separate lines.  **Newlines** are for your convenience but have no syntactic meaning: line endings do not terminate statements. For example, adjacent assignment statements must be separated by semicolons even if those statements are on separate lines:
+Semicolons are required between statements, even if those statements are on separate lines.  **Newlines** are for your convenience but have no syntactic meaning: line endings do not terminate statements. For example, adjacent assignment statements must be separated by semicolons even if those statements are on separate lines:
 
 GENMD-INCLUDE-ESCAPED(data/newline-example.txt)
 
diff --git a/docs/src/reference-dsl-user-defined-functions.md b/docs/src/reference-dsl-user-defined-functions.md
index d2be5a162..5197701de 100644
--- a/docs/src/reference-dsl-user-defined-functions.md
+++ b/docs/src/reference-dsl-user-defined-functions.md
@@ -16,7 +16,7 @@ Quick links:
 
 # DSL user-defined functions
 
-As of Miller 5.0.0 you can define your own functions, as well as subroutines.
+As of Miller 5.0.0, you can define your own functions, as well as subroutines.
 
 ## User-defined functions
 
@@ -49,7 +49,7 @@ wye pan 5 0.573288 0.863624 211.38663947090302 120
 
 Properties of user-defined functions:
 
-* Function bodies start with `func` and a parameter list, defined outside of `begin`, `end`, or other `func` or `subr` blocks. (I.e. the Miller DSL has no nested functions.)
+* Function bodies start with `func` and a parameter list, defined outside of `begin`, `end`, or other `func` or `subr` blocks. (I.e., the Miller DSL has no nested functions.)
 
 * A function (uniqified by its name) may not be redefined: either by redefining a user-defined function, or by redefining a built-in function. However, functions and subroutines have separate namespaces: you can define a subroutine `log` (for logging messages to stderr, say) which does not clash with the mathematical `log` (logarithm) function.
 
@@ -61,7 +61,7 @@ Properties of user-defined functions:
 
 * When a return value is not implicitly returned, this results in a return value of [absent-null](reference-main-null-data.md). (In the example above, if there were records for which the argument to `f` is non-numeric, the assignments would be skipped.) See also the [null-data reference page](reference-main-null-data.md).
 
-* See the section on [Local variables](reference-dsl-variables.md#local-variables) for information on scope and extent of arguments, as well as for information on the use of local variables within functions.
+* See the section on [Local variables](reference-dsl-variables.md#local-variables) for information on the scope and extent of arguments, as well as for information on the use of local variables within functions.
 
 * See the section on [Expressions from files](reference-dsl-syntax.md#expressions-from-files) for information on the use of `-f` and `-e` flags.
 
@@ -103,7 +103,7 @@ numcalls=15
 
 Properties of user-defined subroutines:
 
-* Subroutine bodies start with `subr` and a parameter list, defined outside of `begin`, `end`, or other `func` or `subr` blocks. (I.e. the Miller DSL has no nested subroutines.)
+* Subroutine bodies start with `subr` and a parameter list, defined outside of `begin`, `end`, or other `func` or `subr` blocks. (I.e., the Miller DSL has no nested subroutines.)
 
 * A subroutine (uniqified by its name) may not be redefined. However, functions and subroutines have separate namespaces: you can define a subroutine `log` which does not clash with the mathematical `log` function.
 
@@ -115,7 +115,7 @@ Properties of user-defined subroutines:
 
 * Argument values may be reassigned: they are not read-only.
 
-* See the section on [local variables](reference-dsl-variables.md#local-variables) for information on scope and extent of arguments, as well as for information on the use of local variables within functions.
+* See the section on [local variables](reference-dsl-variables.md#local-variables) for information on the scope and extent of arguments, as well as for information on the use of local variables within functions.
 
 * See the section on [Expressions from files](reference-dsl-syntax.md#expressions-from-files) for information on the use of `-f` and `-e` flags.
 
@@ -123,15 +123,11 @@ Properties of user-defined subroutines:
 
 Subroutines cannot return values, and they are invoked by the keyword `call`.
 
-In hindsight, subroutines needn't have been invented. If `foo` is a function
-then you can write `foo(1,2,3)` while ignoring its return value, and that plays
-the role of subroutine quite well.
+In hindsight, subroutines needn't have been invented. If `foo is a function, then you can write `foo(1,2,3)` while ignoring its return value, and that plays the role of a subroutine quite well.
 
 ## Loading a library of functions
 
-If you have a file with UDFs you use frequently, say `my-udfs.mlr`, you can use
-`--load` or `--mload` to define them for your Miller scripts. For example, in
-your shell,
+If you have a file with UDFs you use frequently, say `my-udfs.mlr`, you can use `--load` or `--mload` to define them for your Miller scripts. For example, in your shell,
 
 
 alias mlr='mlr --load ~/my-functions.mlr'
@@ -149,8 +145,7 @@ See the [miscellaneous-flags page](reference-main-flag-list.md#miscellaneous-fla
 
 You can define unnamed functions and assign them to variables, or pass them to functions.
 
-See also the [page on higher-order functions](reference-dsl-higher-order-functions.md)
-for more information on
+See also the [page on higher-order functions](reference-dsl-higher-order-functions.md) for more information on
 [`select`](reference-dsl-builtin-functions.md#select),
 [`apply`](reference-dsl-builtin-functions.md#apply),
 [`reduce`](reference-dsl-builtin-functions.md#reduce),
@@ -209,9 +204,7 @@ purple square   false 10 91    72.3735  8.2430 purple:square above
 
 Note that you need a semicolon after the closing curly brace of the function literal.
 
-Unlike named functions, function literals (also known as unnamed functions)
-have access to local variables defined in their enclosing scope. That's
-so you can do things like this:
+Unlike named functions, function literals (also known as unnamed functions) have access to local variables defined in their enclosing scope. That's so you can do things like this:
 
 
 mlr --c2p --from example.csv put '
diff --git a/docs/src/reference-dsl-user-defined-functions.md.in b/docs/src/reference-dsl-user-defined-functions.md.in
index c9f0c6d7c..4d8bb0c18 100644
--- a/docs/src/reference-dsl-user-defined-functions.md.in
+++ b/docs/src/reference-dsl-user-defined-functions.md.in
@@ -1,6 +1,6 @@
 # DSL user-defined functions
 
-As of Miller 5.0.0 you can define your own functions, as well as subroutines.
+As of Miller 5.0.0, you can define your own functions, as well as subroutines.
 
 ## User-defined functions
 
@@ -25,7 +25,7 @@ GENMD-EOF
 
 Properties of user-defined functions:
 
-* Function bodies start with `func` and a parameter list, defined outside of `begin`, `end`, or other `func` or `subr` blocks. (I.e. the Miller DSL has no nested functions.)
+* Function bodies start with `func` and a parameter list, defined outside of `begin`, `end`, or other `func` or `subr` blocks. (I.e., the Miller DSL has no nested functions.)
 
 * A function (uniqified by its name) may not be redefined: either by redefining a user-defined function, or by redefining a built-in function. However, functions and subroutines have separate namespaces: you can define a subroutine `log` (for logging messages to stderr, say) which does not clash with the mathematical `log` (logarithm) function.
 
@@ -37,7 +37,7 @@ Properties of user-defined functions:
 
 * When a return value is not implicitly returned, this results in a return value of [absent-null](reference-main-null-data.md). (In the example above, if there were records for which the argument to `f` is non-numeric, the assignments would be skipped.) See also the [null-data reference page](reference-main-null-data.md).
 
-* See the section on [Local variables](reference-dsl-variables.md#local-variables) for information on scope and extent of arguments, as well as for information on the use of local variables within functions.
+* See the section on [Local variables](reference-dsl-variables.md#local-variables) for information on the scope and extent of arguments, as well as for information on the use of local variables within functions.
 
 * See the section on [Expressions from files](reference-dsl-syntax.md#expressions-from-files) for information on the use of `-f` and `-e` flags.
 
@@ -67,7 +67,7 @@ GENMD-EOF
 
 Properties of user-defined subroutines:
 
-* Subroutine bodies start with `subr` and a parameter list, defined outside of `begin`, `end`, or other `func` or `subr` blocks. (I.e. the Miller DSL has no nested subroutines.)
+* Subroutine bodies start with `subr` and a parameter list, defined outside of `begin`, `end`, or other `func` or `subr` blocks. (I.e., the Miller DSL has no nested subroutines.)
 
 * A subroutine (uniqified by its name) may not be redefined. However, functions and subroutines have separate namespaces: you can define a subroutine `log` which does not clash with the mathematical `log` function.
 
@@ -79,7 +79,7 @@ Properties of user-defined subroutines:
 
 * Argument values may be reassigned: they are not read-only.
 
-* See the section on [local variables](reference-dsl-variables.md#local-variables) for information on scope and extent of arguments, as well as for information on the use of local variables within functions.
+* See the section on [local variables](reference-dsl-variables.md#local-variables) for information on the scope and extent of arguments, as well as for information on the use of local variables within functions.
 
 * See the section on [Expressions from files](reference-dsl-syntax.md#expressions-from-files) for information on the use of `-f` and `-e` flags.
 
@@ -87,15 +87,11 @@ Properties of user-defined subroutines:
 
 Subroutines cannot return values, and they are invoked by the keyword `call`.
 
-In hindsight, subroutines needn't have been invented. If `foo` is a function
-then you can write `foo(1,2,3)` while ignoring its return value, and that plays
-the role of subroutine quite well.
+In hindsight, subroutines needn't have been invented. If `foo is a function, then you can write `foo(1,2,3)` while ignoring its return value, and that plays the role of a subroutine quite well.
 
 ## Loading a library of functions
 
-If you have a file with UDFs you use frequently, say `my-udfs.mlr`, you can use
-`--load` or `--mload` to define them for your Miller scripts. For example, in
-your shell,
+If you have a file with UDFs you use frequently, say `my-udfs.mlr`, you can use `--load` or `--mload` to define them for your Miller scripts. For example, in your shell,
 
 GENMD-CARDIFY-HIGHLIGHT-ONE
 alias mlr='mlr --load ~/my-functions.mlr'
@@ -113,8 +109,7 @@ See the [miscellaneous-flags page](reference-main-flag-list.md#miscellaneous-fla
 
 You can define unnamed functions and assign them to variables, or pass them to functions.
 
-See also the [page on higher-order functions](reference-dsl-higher-order-functions.md)
-for more information on
+See also the [page on higher-order functions](reference-dsl-higher-order-functions.md) for more information on
 [`select`](reference-dsl-builtin-functions.md#select),
 [`apply`](reference-dsl-builtin-functions.md#apply),
 [`reduce`](reference-dsl-builtin-functions.md#reduce),
@@ -147,9 +142,7 @@ GENMD-EOF
 
 Note that you need a semicolon after the closing curly brace of the function literal.
 
-Unlike named functions, function literals (also known as unnamed functions)
-have access to local variables defined in their enclosing scope. That's
-so you can do things like this:
+Unlike named functions, function literals (also known as unnamed functions) have access to local variables defined in their enclosing scope. That's so you can do things like this:
 
 GENMD-RUN-COMMAND
 mlr --c2p --from example.csv put '
diff --git a/docs/src/reference-dsl-variables.md b/docs/src/reference-dsl-variables.md
index 37590a365..161afc018 100644
--- a/docs/src/reference-dsl-variables.md
+++ b/docs/src/reference-dsl-variables.md
@@ -18,11 +18,11 @@ Quick links:
 
 Miller has the following kinds of variables:
 
-**Fields of stream records**, accessed using the `$` prefix. These refer to fields of the current data-stream record. For example, in `echo x=1,y=2 | mlr put '$z = $x + $y'`, `$x` and `$y` refer to input fields, and `$z` refers to a new, computed output field. In a few contexts, presented below, you can refer to the entire record as `$*`.
+**Fields of stream records**, accessed using the `$` prefix. These refer to fields of the current data-stream record. For example, in `echo x=1,y=2 | mlr put '$z = $x + $y'`, `$x` and `$y` refer to input fields, and `$z` refers to a new, computed output field. In the following contexts, you can refer to the entire record as `$*`.
 
-**Out-of-stream variables** accessed using the `@` prefix. These refer to data which persist from one record to the next, including in `begin` and `end` blocks (which execute before/after the record stream is consumed, respectively). You use them to remember values across records, such as sums, differences, counters, and so on.  In a few contexts, presented below, you can refer to the entire out-of-stream-variables collection as `@*`.
+**Out-of-stream variables** accessed using the `@` prefix. These refer to data that persists from one record to the next, including in `begin` and `end` blocks (which execute before/after the record stream is consumed, respectively). You use them to remember values across records, such as sums, differences, and counters, among other things.  In the following contexts, you can refer to the entire out-of-stream-variables collection as `@*`.
 
-**Local variables** are limited in scope and extent to the current statements being executed: these include function arguments, bound variables in for loops, and local variables.
+**Local variables** are limited in scope and extent to the current statements being executed, including function arguments, bound variables in for loops, and local variables.
 
 **Built-in variables** such as `NF`, `NR`, `FILENAME`, `M_PI`, and `M_E`.  These are all capital letters and are read-only (although some of them change value from one record to another).
 
@@ -32,7 +32,7 @@ Miller has the following kinds of variables:
 
 Names of fields within stream records must be specified using a `$` in [filter and put expressions](reference-dsl.md), even though the dollar signs don't appear in the data stream itself. For integer-indexed data, this looks like `awk`'s `$1,$2,$3`, except that Miller allows non-numeric names such as `$quantity` or `$hostname`.  Likewise, enclose string literals in double quotes in `filter` expressions even though they don't appear in file data.  In particular, `mlr filter '$x=="abc"'` passes through the record `x=abc`.
 
-If field names have **special characters** such as `.` then you can use braces, e.g. `'${field.name}'`.
+If field names have **special characters** such as `.`, then you can use braces, e.g. `'${field.name}'`.
 
 You may also use a **computed field name** in square brackets, e.g.
 
@@ -55,7 +55,7 @@ Their **extent** is limited to the current record; their **scope** is the `filte
 
 These are **read-write**: you can do `$y=2*$x`, `$x=$x+1`, etc.
 
-Records are Miller's output: field names present in the input stream are passed through to output (written to standard output) unless fields are removed with `cut`, or records are excluded with `filter` or `put -q`, etc. Simply assign a value to a field and it will be output.
+Records are Miller's output: field names present in the input stream are passed through to output (written to standard output) unless fields are removed with `cut`, or records are excluded with `filter` or `put -q`, etc. Simply assign a value to a field, and it will be output.
 
 ## Positional field names
 
@@ -63,7 +63,7 @@ Even though Miller's main selling point is name-indexing, sometimes you really w
 
 Use `$[[3]]` to access the name of field 3.  More generally, any expression evaluating to an integer can go between `$[[` and `]]`.
 
-Then using a computed field name, `$[ $[[3]] ]` is the value in the third field. This has the shorter equivalent notation `$[[[3]]]`.
+Then, using a computed field name, `$[ $[[3]] ]` is the value in the third field. This has the shorter equivalent notation `$[[[3]]]`.
 
 
 mlr cat data/small
@@ -131,7 +131,7 @@ a=eks,b=wye,i=4,x=NEW,y=0.134188
 a=wye,b=pan,i=5,x=0.573288,y=NEW
 
-Right-hand side accesses to non-existent fields -- i.e. with index less than 1 or greater than `NF` -- return an absent value. Likewise, left-hand side accesses only refer to fields which already exist. For example, if a field has 5 records then assigning the name or value of the 6th (or 600th) field results in a no-op. +Right-hand side accesses to non-existent fields -- i.e., with index less than 1 or greater than `NF` -- return an absent value. Likewise, left-hand side accesses only refer to fields that already exist. For example, if a field has 5 records, then assigning the name or value of the 6th (or 600th) field results in a no-op.
 mlr put '$[[6]] = "NEW"' data/small
@@ -157,13 +157,13 @@ a=wye,b=pan,i=5,x=0.573288,y=0.863624
 
 !!! note
 
-    You can use positional field names only in the [Miller DSL](reference-dsl.md), i.e. only with the verbs `put` and `filter`.
+    You can use positional field names only in the [Miller DSL](reference-dsl.md), i.e., only with the verbs `put` and `filter`.
 
 ## Out-of-stream variables
 
-These are prefixed with an at-sign, e.g. `@sum`.  Furthermore, unlike built-in variables and stream-record fields, they are maintained in an arbitrarily nested map: you can do `@sum += $quantity`, or `@sum[$color] += $quantity`, or `@sum[$color][$shape] += $quantity`. The keys for the multi-level map can be any expression which evaluates to string or integer: e.g.  `@sum[NR] = $a + $b`, `@sum[$a."-".$b] = $x`, etc.
+These are prefixed with an at-sign, e.g., `@sum`.  Furthermore, unlike built-in variables and stream-record fields, they are maintained in an arbitrarily nested map: you can do `@sum += $quantity`, or `@sum[$color] += $quantity`, or `@sum[$color][$shape] += $quantity`. The keys for the multi-level map can be any expression that evaluates to string or integer: e.g.  `@sum[NR] = $a + $b`, `@sum[$a."-".$b] = $x`, etc.
 
-Their names and their values are entirely under your control; they change only when you assign to them.
+Their names and their values are entirely under your control; they change only when you assign them.
 
 Just as for field names in stream records, if you want to define out-of-stream variables with **special characters** such as `.` then you can use braces, e.g. `'@{variable.name}["index"]'`.
 
@@ -198,13 +198,13 @@ sum=5
 sum=50
 
-Out-of-stream variables' **extent** is from the start to the end of the record stream, i.e. every time the `put` or `filter` statement referring to them is executed. +Out-of-stream variables' **extent** is from the start to the end of the record stream, i.e., every time the `put` or `filter` statement referring to them is executed. Out-of-stream variables are **read-write**: you can do `$sum=@sum`, `@sum=$sum`, etc. ## Indexed out-of-stream variables -Using an index on the `@count` and `@sum` variables, we get the benefit of the `-g` (group-by) option which `mlr stats1` and various other Miller commands have: +Using an index on the `@count` and `@sum` variables, we get the benefit of the `-g` (group-by) option, which `mlr stats1` and various other Miller commands have:
 mlr put -q '
@@ -309,8 +309,8 @@ Local variables are similar to out-of-stream variables, except that their extent
 For example:
 
 
-# Here I'm using a specified random-number seed so this example always
-# produces the same output for this web document: in everyday practice we
+# Here I'm using a specified random-number seed, so this example always
+# produces the same output for this web document: in everyday practice, we
 # would leave off the --seed 12345 part.
 mlr --seed 12345 seqgen --start 1 --stop 10 then put '
   func f(a, b) {                          # function arguments a and b
@@ -341,7 +341,7 @@ i=10,o=15.37686787628025
 
 Things which are completely unsurprising, resembling many other languages:
 
-* Parameter names are bound to their arguments but can be reassigned, e.g. if there is a parameter named `a` then you can reassign the value of `a` to be something else within the function if you like.
+* Parameter names are bound to their arguments but can be reassigned, e.g., if there is a parameter named `a`, then you can reassign the value of `a` to be something else within the function if you like.
 
 * However, you cannot redeclare the *type* of an argument or a local: `var a=1; var a=2` is an error but `var a=1;  a=2` is OK.
 
@@ -355,13 +355,13 @@ Things which are completely unsurprising, resembling many other languages:
 
 Things which are perhaps surprising compared to other languages:
 
-* Type declarations using `var`, or typed using `num`, `int`, `float`, `str`, `bool`, `arr`, `map`, `funct` are not necessary to declare local variables.  Function arguments and variables bound in for-loops over stream records and out-of-stream variables are *implicitly* declared using `var`. (Some examples are shown below.)
+* Type declarations using `var`, or typed using `num`, `int`, `float`, `str`, `bool`, `arr`, `map`, `funct`, are not necessary to declare local variables.  Function arguments and variables bound in for-loops over stream records and out-of-stream variables are *implicitly* declared using `var`. (Some examples are shown below.)
 
-* Type-checking is done at assignment time. For example, `float f = 0` is an error (since `0` is an integer), as is `float f = 0.0; f = 1`. For this reason I prefer to use `num` over `float` in most contexts since `num` encompasses integer and floating-point values. More information is at [Type-checking](reference-dsl-variables.md#type-checking).
+* Type-checking is done at assignment time. For example, `float f = 0` is an error (since `0` is an integer), as is `float f = 0.0; f = 1`. For this reason, I prefer to use `num` over `float` in most contexts, as `num` encompasses both integer and floating-point values. For more information, refer to [Type-checking](reference-dsl-variables.md#type-checking).
 
 * Bound variables in for-loops over stream records and out-of-stream variables are implicitly local to that block. E.g. in `for (k, v in $*) { ... }` `for ((k1, k2), v in @*) { ... }` if there are `k`, `v`, etc. in the enclosing scope then those will be masked by the loop-local bound variables in the loop, and moreover the values of the loop-local bound variables are not available after the end of the loop.
 
-* For C-style triple-for loops, if a for-loop variable is defined using `var`, `int`, etc. then it is scoped to that for-loop. E.g. `for (i = 0; i < 10; i += 1) { ... }` and `for (int i = 0; i < 10; i += 1) { ... }`. (This is unsurprising.). If there is no typedecl and an outer-scope variable of that name exists, then it is used. (This is also unsurprising.) But if there is no outer-scope variable of that name, then the variable is scoped to the for-loop only.
+* For C-style triple-for loops, if a for-loop variable is defined using `var`, `int`, etc., then it is scoped to that for-loop. E.g. `for (i = 0; i < 10; i += 1) { ... }` and `for (int i = 0; i < 10; i += 1) { ... }`. (This is unsurprising.). If there is no typedecl and an outer-scope variable of that name exists, then it is used. (This is also unsurprising.) But if there is no outer-scope variable of that name, then the variable is scoped to the for-loop only.
 
 The following example demonstrates the scope rules:
 
@@ -478,7 +478,7 @@ print "outer j =", j;       # j is undefined in this scope.
 
 ## Map literals
 
-Miller's `put`/`filter` DSL has four kinds of maps. **Stream records** are (single-level) maps from name to value. **Out-of-stream variables** and **local variables** can also be maps, although they can be multi-level maps (e.g. `@sum[$x][$y]`).  The fourth kind is **map literals**. These cannot be on the left-hand side of assignment expressions. Syntactically they look like JSON, although Miller allows string and integer keys in its map literals while JSON allows only string keys (e.g. `"3"` rather than `3`). Note though that integer keys become stringified in Miller: `@mymap[3]=4` results in `@mymap` being `{"3":4}`.
+Miller's `put`/`filter` DSL has four kinds of maps. **Stream records** are (single-level) maps from name to value. **Out-of-stream variables** and **local variables** can also be maps, although they can be multi-level maps (e.g. `@sum[$x][$y]`).  The fourth kind is **map literals**. These cannot be on the left-hand side of assignment expressions. Syntactically, they look like JSON, although Miller allows string and integer keys in its map literals while JSON allows only string keys (e.g., `"3"` rather than `3`). Note, though, that integer keys become stringified in Miller: `@mymap[3]=4` results in `@mymap` being `{"3":4}`.
 
 For example, the following swaps the input stream's `a` and `i` fields, modifies `y`, and drops the rest:
 
@@ -565,7 +565,7 @@ there are the read-only separator variables `IRS`, `ORS`, `IFS`, `OFS`, `IPS`,
 and `OPS` as discussed on the [separators page](reference-main-separators.md),
 and the flatten/unflatten separator `FLATSEP` discussed on the
 [flatten/unflatten page](flatten-unflatten.md).  Lastly, the `ENV` map allows
-read/write access to environment variables, e.g.  `ENV["HOME"]` or
+read/write access to environment variables, e.g., `ENV["HOME"]` or
 `ENV["foo_".$hostname]` or `ENV["VERSION"]="1.2.3"`.
 
 
@@ -608,7 +608,7 @@ system environment variables at the time Miller starts. Any changes made to
 `ENV` by assigning to it will affect any subprocesses, such as using
 [piped tee](reference-dsl-output-statements.md#redirected-output-statements).
 
-Their **scope is global**: you can refer to them in any `filter` or `put` statement. Their values are assigned by the input-record reader:
+Their **scope is global**: you can refer to them in any `filter` or `put` statement. The input-record reader assigns their values:
 
 
 mlr --csv put '$nr = NR' data/a.csv
@@ -634,11 +634,11 @@ a,b,c,nr
 
 The **extent** is for the duration of the put/filter: in a `begin` statement (which executes before the first input record is consumed) you will find `NR=1` and in an `end` statement (which is executed after the last input record is consumed) you will find `NR` to be the total number of records ingested.
 
-These are all **read-only** for the `mlr put` and `mlr filter` DSL: they may be assigned from, e.g. `$nr=NR`, but they may not be assigned to: `NR=100` is a syntax error.
+These are all **read-only** for the `mlr put` and `mlr filter` DSL: they may be assigned from, e.g., `$nr=NR`, but they may not be assigned to: `NR=100` is a syntax error.
 
 ## Type-checking
 
-Miller's `put`/`filter` DSL supports two optional kinds of type-checking.  One is inline **type-tests** and **type-assertions** within expressions.  The other is **type declarations** for assignments to local variables, binding of arguments to user-defined functions, and return values from user-defined functions, These are discussed in the following subsections.
+Miller's `put`/`filter` DSL supports two optional kinds of type-checking.  One is inline **type tests** and **type assertions** within expressions.  The other is **type declarations** for assignments to local variables, binding of arguments to user-defined functions, and return values from user-defined functions. These are discussed in the following subsections.
 
 Use of type-checking is entirely up to you: omit it if you want flexibility with heterogeneous data; use it if you want to help catch misspellings in your DSL code or unexpected irregularities in your input data.
 
@@ -699,22 +699,22 @@ asserting_string
 
 See [Data-cleaning Examples](data-cleaning-examples.md) for examples of how to use these.
 
-### Type-declarations for local variables, function parameter, and function return values
+### Type declarations for local variables, function parameters, and function return values
 
 Local variables can be defined either untyped as in `x = 1`, or typed as in `int x = 1`. Types include **var** (explicitly untyped), **int**, **float**, **num** (int or float), **str**, **bool**, **arr**, **map**, and **funct**. These optional type declarations are enforced at the time values are assigned to variables: whether at the initial value assignment as in `int x = 1` or in any subsequent assignments to the same variable farther down in the scope.
 
 The reason for `num` is that `int` and `float` typedecls are very precise:
 
 
-float a = 0;   # Runtime error since 0 is int not float
-int   b = 1.0; # Runtime error since 1.0 is float not int
+float a = 0;   # Runtime error since 0 is int, not float
+int   b = 1.0; # Runtime error since 1.0 is float, not int
 num   c = 0;   # OK
 num   d = 1.0; # OK
 
-A suggestion is to use `num` for general use when you want numeric content, and use `int` when you genuinely want integer-only values, e.g. in loop indices or map keys (since Miller map keys can only be strings or ints). +A suggestion is to use `num` for general use when you want numeric content, and use `int` when you genuinely want integer-only values, e.g., in loop indices or map keys (since Miller map keys can only be strings or ints). -The `var` type declaration indicates no type restrictions, e.g. `var x = 1` has the same type restrictions on `x` as `x = 1`. The difference is in intentional shadowing: if you have `x = 1` in outer scope and `x = 2` in inner scope (e.g. within a for-loop or an if-statement) then outer-scope `x` has value 2 after the second assignment. But if you have `var x = 2` in the inner scope, then you are declaring a variable scoped to the inner block.) For example: +The `var` type declaration indicates no type restrictions, e.g., `var x = 1` has the same type restrictions on `x` as `x = 1`. The difference is in intentional shadowing: if you have `x = 1` in outer scope and `x = 2` in inner scope (e.g., within a for-loop or an if-statement) then outer-scope `x` has value 2 after the second assignment. But if you have `var x = 2` in the inner scope, then you are declaring a variable scoped to the inner block.) For example:
 x = 1;
@@ -732,7 +732,7 @@ if (NR == 4) {
 print x;     # Value of this x is still 1
 
-Likewise function arguments can optionally be typed, with type enforced when the function is called: +Likewise, function arguments can optionally be typed, with type enforced when the function is called:
 func f(map m, int i) {
@@ -764,7 +764,7 @@ func f(map m, int i): bool {
   }
   ...
   ...
-  # In Miller if your functions don't explicitly return a value, they return absent-null.
+  # In Miller, if your functions don't explicitly return a value, they return absent-null.
   # So it would also be a runtime error on reaching the end of this function without
   # an explicit return statement.
 }
@@ -845,7 +845,7 @@ Example recursive copy of out-of-stream variables:
 }
 
-Example of out-of-stream variable assigned to full stream record, where the 2nd record is stashed, and the 4th record is overwritten with that: +Example of an out-of-stream variable assigned to the full stream record, where the 2nd record is stashed, and the 4th record is overwritten with that:
 mlr put 'NR == 2 {@keep = $*}; NR == 4 {$* = @keep}' data/small
diff --git a/docs/src/reference-dsl-variables.md.in b/docs/src/reference-dsl-variables.md.in
index 7871b148e..0b9ddf60b 100644
--- a/docs/src/reference-dsl-variables.md.in
+++ b/docs/src/reference-dsl-variables.md.in
@@ -2,11 +2,11 @@
 
 Miller has the following kinds of variables:
 
-**Fields of stream records**, accessed using the `$` prefix. These refer to fields of the current data-stream record. For example, in `echo x=1,y=2 | mlr put '$z = $x + $y'`, `$x` and `$y` refer to input fields, and `$z` refers to a new, computed output field. In a few contexts, presented below, you can refer to the entire record as `$*`.
+**Fields of stream records**, accessed using the `$` prefix. These refer to fields of the current data-stream record. For example, in `echo x=1,y=2 | mlr put '$z = $x + $y'`, `$x` and `$y` refer to input fields, and `$z` refers to a new, computed output field. In the following contexts, you can refer to the entire record as `$*`.
 
-**Out-of-stream variables** accessed using the `@` prefix. These refer to data which persist from one record to the next, including in `begin` and `end` blocks (which execute before/after the record stream is consumed, respectively). You use them to remember values across records, such as sums, differences, counters, and so on.  In a few contexts, presented below, you can refer to the entire out-of-stream-variables collection as `@*`.
+**Out-of-stream variables** accessed using the `@` prefix. These refer to data that persists from one record to the next, including in `begin` and `end` blocks (which execute before/after the record stream is consumed, respectively). You use them to remember values across records, such as sums, differences, and counters, among other things.  In the following contexts, you can refer to the entire out-of-stream-variables collection as `@*`.
 
-**Local variables** are limited in scope and extent to the current statements being executed: these include function arguments, bound variables in for loops, and local variables.
+**Local variables** are limited in scope and extent to the current statements being executed, including function arguments, bound variables in for loops, and local variables.
 
 **Built-in variables** such as `NF`, `NR`, `FILENAME`, `M_PI`, and `M_E`.  These are all capital letters and are read-only (although some of them change value from one record to another).
 
@@ -16,7 +16,7 @@ Miller has the following kinds of variables:
 
 Names of fields within stream records must be specified using a `$` in [filter and put expressions](reference-dsl.md), even though the dollar signs don't appear in the data stream itself. For integer-indexed data, this looks like `awk`'s `$1,$2,$3`, except that Miller allows non-numeric names such as `$quantity` or `$hostname`.  Likewise, enclose string literals in double quotes in `filter` expressions even though they don't appear in file data.  In particular, `mlr filter '$x=="abc"'` passes through the record `x=abc`.
 
-If field names have **special characters** such as `.` then you can use braces, e.g. `'${field.name}'`.
+If field names have **special characters** such as `.`, then you can use braces, e.g. `'${field.name}'`.
 
 You may also use a **computed field name** in square brackets, e.g.
 
@@ -36,7 +36,7 @@ Their **extent** is limited to the current record; their **scope** is the `filte
 
 These are **read-write**: you can do `$y=2*$x`, `$x=$x+1`, etc.
 
-Records are Miller's output: field names present in the input stream are passed through to output (written to standard output) unless fields are removed with `cut`, or records are excluded with `filter` or `put -q`, etc. Simply assign a value to a field and it will be output.
+Records are Miller's output: field names present in the input stream are passed through to output (written to standard output) unless fields are removed with `cut`, or records are excluded with `filter` or `put -q`, etc. Simply assign a value to a field, and it will be output.
 
 ## Positional field names
 
@@ -44,7 +44,7 @@ Even though Miller's main selling point is name-indexing, sometimes you really w
 
 Use `$[[3]]` to access the name of field 3.  More generally, any expression evaluating to an integer can go between `$[[` and `]]`.
 
-Then using a computed field name, `$[ $[[3]] ]` is the value in the third field. This has the shorter equivalent notation `$[[[3]]]`.
+Then, using a computed field name, `$[ $[[3]] ]` is the value in the third field. This has the shorter equivalent notation `$[[[3]]]`.
 
 GENMD-RUN-COMMAND
 mlr cat data/small
@@ -70,7 +70,7 @@ GENMD-RUN-COMMAND
 mlr put '$[[[NR]]] = "NEW"' data/small
 GENMD-EOF
 
-Right-hand side accesses to non-existent fields -- i.e. with index less than 1 or greater than `NF` -- return an absent value. Likewise, left-hand side accesses only refer to fields which already exist. For example, if a field has 5 records then assigning the name or value of the 6th (or 600th) field results in a no-op.
+Right-hand side accesses to non-existent fields -- i.e., with index less than 1 or greater than `NF` -- return an absent value. Likewise, left-hand side accesses only refer to fields that already exist. For example, if a field has 5 records, then assigning the name or value of the 6th (or 600th) field results in a no-op.
 
 GENMD-RUN-COMMAND
 mlr put '$[[6]] = "NEW"' data/small
@@ -82,13 +82,13 @@ GENMD-EOF
 
 !!! note
 
-    You can use positional field names only in the [Miller DSL](reference-dsl.md), i.e. only with the verbs `put` and `filter`.
+    You can use positional field names only in the [Miller DSL](reference-dsl.md), i.e., only with the verbs `put` and `filter`.
 
 ## Out-of-stream variables
 
-These are prefixed with an at-sign, e.g. `@sum`.  Furthermore, unlike built-in variables and stream-record fields, they are maintained in an arbitrarily nested map: you can do `@sum += $quantity`, or `@sum[$color] += $quantity`, or `@sum[$color][$shape] += $quantity`. The keys for the multi-level map can be any expression which evaluates to string or integer: e.g.  `@sum[NR] = $a + $b`, `@sum[$a."-".$b] = $x`, etc.
+These are prefixed with an at-sign, e.g., `@sum`.  Furthermore, unlike built-in variables and stream-record fields, they are maintained in an arbitrarily nested map: you can do `@sum += $quantity`, or `@sum[$color] += $quantity`, or `@sum[$color][$shape] += $quantity`. The keys for the multi-level map can be any expression that evaluates to string or integer: e.g.  `@sum[NR] = $a + $b`, `@sum[$a."-".$b] = $x`, etc.
 
-Their names and their values are entirely under your control; they change only when you assign to them.
+Their names and their values are entirely under your control; they change only when you assign them.
 
 Just as for field names in stream records, if you want to define out-of-stream variables with **special characters** such as `.` then you can use braces, e.g. `'@{variable.name}["index"]'`.
 
@@ -110,13 +110,13 @@ mlr put '@sum += $a; end {emit @sum}' \
   data/a.dkvp
 GENMD-EOF
 
-Out-of-stream variables' **extent** is from the start to the end of the record stream, i.e. every time the `put` or `filter` statement referring to them is executed.
+Out-of-stream variables' **extent** is from the start to the end of the record stream, i.e., every time the `put` or `filter` statement referring to them is executed.
 
 Out-of-stream variables are **read-write**: you can do `$sum=@sum`, `@sum=$sum`, etc.
 
 ## Indexed out-of-stream variables
 
-Using an index on the `@count` and `@sum` variables, we get the benefit of the `-g` (group-by) option which `mlr stats1` and various other Miller commands have:
+Using an index on the `@count` and `@sum` variables, we get the benefit of the `-g` (group-by) option, which `mlr stats1` and various other Miller commands have:
 
 GENMD-RUN-COMMAND
 mlr put -q '
@@ -173,8 +173,8 @@ Local variables are similar to out-of-stream variables, except that their extent
 For example:
 
 GENMD-RUN-COMMAND
-# Here I'm using a specified random-number seed so this example always
-# produces the same output for this web document: in everyday practice we
+# Here I'm using a specified random-number seed, so this example always
+# produces the same output for this web document: in everyday practice, we
 # would leave off the --seed 12345 part.
 mlr --seed 12345 seqgen --start 1 --stop 10 then put '
   func f(a, b) {                          # function arguments a and b
@@ -193,7 +193,7 @@ GENMD-EOF
 
 Things which are completely unsurprising, resembling many other languages:
 
-* Parameter names are bound to their arguments but can be reassigned, e.g. if there is a parameter named `a` then you can reassign the value of `a` to be something else within the function if you like.
+* Parameter names are bound to their arguments but can be reassigned, e.g., if there is a parameter named `a`, then you can reassign the value of `a` to be something else within the function if you like.
 
 * However, you cannot redeclare the *type* of an argument or a local: `var a=1; var a=2` is an error but `var a=1;  a=2` is OK.
 
@@ -207,13 +207,13 @@ Things which are completely unsurprising, resembling many other languages:
 
 Things which are perhaps surprising compared to other languages:
 
-* Type declarations using `var`, or typed using `num`, `int`, `float`, `str`, `bool`, `arr`, `map`, `funct` are not necessary to declare local variables.  Function arguments and variables bound in for-loops over stream records and out-of-stream variables are *implicitly* declared using `var`. (Some examples are shown below.)
+* Type declarations using `var`, or typed using `num`, `int`, `float`, `str`, `bool`, `arr`, `map`, `funct`, are not necessary to declare local variables.  Function arguments and variables bound in for-loops over stream records and out-of-stream variables are *implicitly* declared using `var`. (Some examples are shown below.)
 
-* Type-checking is done at assignment time. For example, `float f = 0` is an error (since `0` is an integer), as is `float f = 0.0; f = 1`. For this reason I prefer to use `num` over `float` in most contexts since `num` encompasses integer and floating-point values. More information is at [Type-checking](reference-dsl-variables.md#type-checking).
+* Type-checking is done at assignment time. For example, `float f = 0` is an error (since `0` is an integer), as is `float f = 0.0; f = 1`. For this reason, I prefer to use `num` over `float` in most contexts, as `num` encompasses both integer and floating-point values. For more information, refer to [Type-checking](reference-dsl-variables.md#type-checking).
 
 * Bound variables in for-loops over stream records and out-of-stream variables are implicitly local to that block. E.g. in `for (k, v in $*) { ... }` `for ((k1, k2), v in @*) { ... }` if there are `k`, `v`, etc. in the enclosing scope then those will be masked by the loop-local bound variables in the loop, and moreover the values of the loop-local bound variables are not available after the end of the loop.
 
-* For C-style triple-for loops, if a for-loop variable is defined using `var`, `int`, etc. then it is scoped to that for-loop. E.g. `for (i = 0; i < 10; i += 1) { ... }` and `for (int i = 0; i < 10; i += 1) { ... }`. (This is unsurprising.). If there is no typedecl and an outer-scope variable of that name exists, then it is used. (This is also unsurprising.) But if there is no outer-scope variable of that name, then the variable is scoped to the for-loop only.
+* For C-style triple-for loops, if a for-loop variable is defined using `var`, `int`, etc., then it is scoped to that for-loop. E.g. `for (i = 0; i < 10; i += 1) { ... }` and `for (int i = 0; i < 10; i += 1) { ... }`. (This is unsurprising.). If there is no typedecl and an outer-scope variable of that name exists, then it is used. (This is also unsurprising.) But if there is no outer-scope variable of that name, then the variable is scoped to the for-loop only.
 
 The following example demonstrates the scope rules:
 
@@ -237,7 +237,7 @@ GENMD-EOF
 
 ## Map literals
 
-Miller's `put`/`filter` DSL has four kinds of maps. **Stream records** are (single-level) maps from name to value. **Out-of-stream variables** and **local variables** can also be maps, although they can be multi-level maps (e.g. `@sum[$x][$y]`).  The fourth kind is **map literals**. These cannot be on the left-hand side of assignment expressions. Syntactically they look like JSON, although Miller allows string and integer keys in its map literals while JSON allows only string keys (e.g. `"3"` rather than `3`). Note though that integer keys become stringified in Miller: `@mymap[3]=4` results in `@mymap` being `{"3":4}`.
+Miller's `put`/`filter` DSL has four kinds of maps. **Stream records** are (single-level) maps from name to value. **Out-of-stream variables** and **local variables** can also be maps, although they can be multi-level maps (e.g. `@sum[$x][$y]`).  The fourth kind is **map literals**. These cannot be on the left-hand side of assignment expressions. Syntactically, they look like JSON, although Miller allows string and integer keys in its map literals while JSON allows only string keys (e.g., `"3"` rather than `3`). Note, though, that integer keys become stringified in Miller: `@mymap[3]=4` results in `@mymap` being `{"3":4}`.
 
 For example, the following swaps the input stream's `a` and `i` fields, modifies `y`, and drops the rest:
 
@@ -300,7 +300,7 @@ there are the read-only separator variables `IRS`, `ORS`, `IFS`, `OFS`, `IPS`,
 and `OPS` as discussed on the [separators page](reference-main-separators.md),
 and the flatten/unflatten separator `FLATSEP` discussed on the
 [flatten/unflatten page](flatten-unflatten.md).  Lastly, the `ENV` map allows
-read/write access to environment variables, e.g.  `ENV["HOME"]` or
+read/write access to environment variables, e.g., `ENV["HOME"]` or
 `ENV["foo_".$hostname]` or `ENV["VERSION"]="1.2.3"`.
 
 
@@ -320,7 +320,7 @@ system environment variables at the time Miller starts. Any changes made to
 `ENV` by assigning to it will affect any subprocesses, such as using
 [piped tee](reference-dsl-output-statements.md#redirected-output-statements).
 
-Their **scope is global**: you can refer to them in any `filter` or `put` statement. Their values are assigned by the input-record reader:
+Their **scope is global**: you can refer to them in any `filter` or `put` statement. The input-record reader assigns their values:
 
 GENMD-RUN-COMMAND
 mlr --csv put '$nr = NR' data/a.csv
@@ -332,11 +332,11 @@ GENMD-EOF
 
 The **extent** is for the duration of the put/filter: in a `begin` statement (which executes before the first input record is consumed) you will find `NR=1` and in an `end` statement (which is executed after the last input record is consumed) you will find `NR` to be the total number of records ingested.
 
-These are all **read-only** for the `mlr put` and `mlr filter` DSL: they may be assigned from, e.g. `$nr=NR`, but they may not be assigned to: `NR=100` is a syntax error.
+These are all **read-only** for the `mlr put` and `mlr filter` DSL: they may be assigned from, e.g., `$nr=NR`, but they may not be assigned to: `NR=100` is a syntax error.
 
 ## Type-checking
 
-Miller's `put`/`filter` DSL supports two optional kinds of type-checking.  One is inline **type-tests** and **type-assertions** within expressions.  The other is **type declarations** for assignments to local variables, binding of arguments to user-defined functions, and return values from user-defined functions, These are discussed in the following subsections.
+Miller's `put`/`filter` DSL supports two optional kinds of type-checking.  One is inline **type tests** and **type assertions** within expressions.  The other is **type declarations** for assignments to local variables, binding of arguments to user-defined functions, and return values from user-defined functions. These are discussed in the following subsections.
 
 Use of type-checking is entirely up to you: omit it if you want flexibility with heterogeneous data; use it if you want to help catch misspellings in your DSL code or unexpected irregularities in your input data.
 
@@ -354,22 +354,22 @@ GENMD-EOF
 
 See [Data-cleaning Examples](data-cleaning-examples.md) for examples of how to use these.
 
-### Type-declarations for local variables, function parameter, and function return values
+### Type declarations for local variables, function parameters, and function return values
 
 Local variables can be defined either untyped as in `x = 1`, or typed as in `int x = 1`. Types include **var** (explicitly untyped), **int**, **float**, **num** (int or float), **str**, **bool**, **arr**, **map**, and **funct**. These optional type declarations are enforced at the time values are assigned to variables: whether at the initial value assignment as in `int x = 1` or in any subsequent assignments to the same variable farther down in the scope.
 
 The reason for `num` is that `int` and `float` typedecls are very precise:
 
 GENMD-CARDIFY
-float a = 0;   # Runtime error since 0 is int not float
-int   b = 1.0; # Runtime error since 1.0 is float not int
+float a = 0;   # Runtime error since 0 is int, not float
+int   b = 1.0; # Runtime error since 1.0 is float, not int
 num   c = 0;   # OK
 num   d = 1.0; # OK
 GENMD-EOF
 
-A suggestion is to use `num` for general use when you want numeric content, and use `int` when you genuinely want integer-only values, e.g. in loop indices or map keys (since Miller map keys can only be strings or ints).
+A suggestion is to use `num` for general use when you want numeric content, and use `int` when you genuinely want integer-only values, e.g., in loop indices or map keys (since Miller map keys can only be strings or ints).
 
-The `var` type declaration indicates no type restrictions, e.g. `var x = 1` has the same type restrictions on `x` as `x = 1`. The difference is in intentional shadowing: if you have `x = 1` in outer scope and `x = 2` in inner scope (e.g. within a for-loop or an if-statement) then outer-scope `x` has value 2 after the second assignment.  But if you have `var x = 2` in the inner scope, then you are declaring a variable scoped to the inner block.) For example:
+The `var` type declaration indicates no type restrictions, e.g., `var x = 1` has the same type restrictions on `x` as `x = 1`. The difference is in intentional shadowing: if you have `x = 1` in outer scope and `x = 2` in inner scope (e.g., within a for-loop or an if-statement) then outer-scope `x` has value 2 after the second assignment.  But if you have `var x = 2` in the inner scope, then you are declaring a variable scoped to the inner block.) For example:
 
 GENMD-CARDIFY
 x = 1;
@@ -387,7 +387,7 @@ if (NR == 4) {
 print x;     # Value of this x is still 1
 GENMD-EOF
 
-Likewise function arguments can optionally be typed, with type enforced when the function is called:
+Likewise, function arguments can optionally be typed, with type enforced when the function is called:
 
 GENMD-CARDIFY
 func f(map m, int i) {
@@ -419,7 +419,7 @@ func f(map m, int i): bool {
   }
   ...
   ...
-  # In Miller if your functions don't explicitly return a value, they return absent-null.
+  # In Miller, if your functions don't explicitly return a value, they return absent-null.
   # So it would also be a runtime error on reaching the end of this function without
   # an explicit return statement.
 }
@@ -482,7 +482,7 @@ mlr --opprint --from data/small put -q '
 '
 GENMD-EOF
 
-Example of out-of-stream variable assigned to full stream record, where the 2nd record is stashed, and the 4th record is overwritten with that:
+Example of an out-of-stream variable assigned to the full stream record, where the 2nd record is stashed, and the 4th record is overwritten with that:
 
 GENMD-RUN-COMMAND
 mlr put 'NR == 2 {@keep = $*}; NR == 4 {$* = @keep}' data/small
diff --git a/docs/src/reference-main-overview.md b/docs/src/reference-main-overview.md
index cc9c3a0b3..b7e1a97c4 100644
--- a/docs/src/reference-main-overview.md
+++ b/docs/src/reference-main-overview.md
@@ -66,7 +66,7 @@ See also the [Glossary](glossary.md) for more about terms such as
 
 When you type `mlr {something} myfile.dat`, the `{something}` part is called a **verb**. It specifies how you want to transform your data. Most of the verbs are counterparts of built-in system tools like `cut` and `sort` -- but with file-format awareness, and giving you the ability to refer to fields by name.
 
-The verbs `put` and `filter` are special in that they have a rich expression language (domain-specific language, or "DSL"). More information about them can be found at on the [Intro to Miller's programming language page](miller-programming-language.md); see also [DSL reference](reference-dsl.md) for more details.
+The verbs `put` and `filter` are special in that they have a rich expression language (domain-specific language, or "DSL"). More information about them can be found on the [Intro to Miller's Programming Language page](miller-programming-language.md); see also the [DSL Reference](reference-dsl.md) for more details.
 
 Here's a comparison of verbs and `put`/`filter` DSL expressions:
 
diff --git a/docs/src/reference-main-overview.md.in b/docs/src/reference-main-overview.md.in
index 413b358e7..42c3b8f0c 100644
--- a/docs/src/reference-main-overview.md.in
+++ b/docs/src/reference-main-overview.md.in
@@ -35,7 +35,7 @@ See also the [Glossary](glossary.md) for more about terms such as
 
 When you type `mlr {something} myfile.dat`, the `{something}` part is called a **verb**. It specifies how you want to transform your data. Most of the verbs are counterparts of built-in system tools like `cut` and `sort` -- but with file-format awareness, and giving you the ability to refer to fields by name.
 
-The verbs `put` and `filter` are special in that they have a rich expression language (domain-specific language, or "DSL"). More information about them can be found at on the [Intro to Miller's programming language page](miller-programming-language.md); see also [DSL reference](reference-dsl.md) for more details.
+The verbs `put` and `filter` are special in that they have a rich expression language (domain-specific language, or "DSL"). More information about them can be found on the [Intro to Miller's Programming Language page](miller-programming-language.md); see also the [DSL Reference](reference-dsl.md) for more details.
 
 Here's a comparison of verbs and `put`/`filter` DSL expressions:
 
diff --git a/docs/src/structure-of-these-documents.md b/docs/src/structure-of-these-documents.md
index 5d2993ee7..cdaeef8a9 100644
--- a/docs/src/structure-of-these-documents.md
+++ b/docs/src/structure-of-these-documents.md
@@ -19,13 +19,13 @@ Quick links:
 The goal is _multiple levels of detail_.
 
 * The [Introduction page](index.md) is the shortest: headlines and **essential summary**.
-* The _Getting started_ section is for **new or near-new users** who want some simple examples along with connecting narrative. The goal is to get a new user up and running, able to do some interesting things with their own data.
+* The _Getting started_ section is for **new or near-new users** who want some simple examples along with a connecting narrative. The goal is to get a new user up and running, enabling them to perform interesting tasks with their own data.
 * The _Miller in more detail_ section is just-past-introductory, **tell-me-more material** about some of the things that make Miller unique: what file formats it handles (and how it handles them), how it relates to other tools in the Unix toolkit, and so on.
 * The _FAQs and examples_ section is non-introductory for people looking for various ways to do things by example. The discussion is pragmatic rather than theoretical, and **use-case-driven**.
 * The _Background_ section is some **non-essential historical** and meta material on why Miller was created.
-* The _Reference_ section aims to answer all questions the previous sections didn't. The discussion is **concept-driven**, although there are still plenty of examples throughout for concreteness.
-    * _Main reference_ goes carefully through various aspects of Miller, concept by concept.
-    * _DSL reference_ focuses on the [Miller programming language](miller-programming-language.md), again following a concept-at-a-time approach.
-    * _Misc. reference_ is aptly named, with things like build-from-source notes.
-    * _Documents for previous releases_ is not only for historical curiosity -- experience has shown that various Linux/BSD distros update their Miller versions on their own cadences, so the version on your system (as shown by `mlr --version`) might be best-served by its respective documentation version.
-* Lastly, new with the Miller 6 documents is a very easy-to-access **Search field** at the top of each page.
+* The _Reference_ section aims to answer all questions that the previous sections didn't. The discussion is **concept-driven**, although it includes numerous examples throughout for concreteness.
+    * The main reference carefully examines various aspects of Miller, concept by concept.
+    * The _DSL reference_ focuses on the [Miller programming language](miller-programming-language.md), again following a concept-at-a-time approach.
+    * The _miscellaneous reference_ is aptly named, with things like build-from-source notes.
+    * _Documents for previous releases_ is not only for historical curiosity -- experience has shown that various Linux/BSD distros update their Miller versions on their own cadences, so the version on your system (as shown by `mlr --version`) might be best served by its respective documentation version.
+* Lastly, new with the Miller 6 documents is an easy-to-access **Search field** at the top of each page.
diff --git a/docs/src/structure-of-these-documents.md.in b/docs/src/structure-of-these-documents.md.in
index 9cb40d3a1..29a558d51 100644
--- a/docs/src/structure-of-these-documents.md.in
+++ b/docs/src/structure-of-these-documents.md.in
@@ -3,13 +3,13 @@
 The goal is _multiple levels of detail_.
 
 * The [Introduction page](index.md) is the shortest: headlines and **essential summary**.
-* The _Getting started_ section is for **new or near-new users** who want some simple examples along with connecting narrative. The goal is to get a new user up and running, able to do some interesting things with their own data.
+* The _Getting started_ section is for **new or near-new users** who want some simple examples along with a connecting narrative. The goal is to get a new user up and running, enabling them to perform interesting tasks with their own data.
 * The _Miller in more detail_ section is just-past-introductory, **tell-me-more material** about some of the things that make Miller unique: what file formats it handles (and how it handles them), how it relates to other tools in the Unix toolkit, and so on.
 * The _FAQs and examples_ section is non-introductory for people looking for various ways to do things by example. The discussion is pragmatic rather than theoretical, and **use-case-driven**.
 * The _Background_ section is some **non-essential historical** and meta material on why Miller was created.
-* The _Reference_ section aims to answer all questions the previous sections didn't. The discussion is **concept-driven**, although there are still plenty of examples throughout for concreteness.
-    * _Main reference_ goes carefully through various aspects of Miller, concept by concept.
-    * _DSL reference_ focuses on the [Miller programming language](miller-programming-language.md), again following a concept-at-a-time approach.
-    * _Misc. reference_ is aptly named, with things like build-from-source notes.
-    * _Documents for previous releases_ is not only for historical curiosity -- experience has shown that various Linux/BSD distros update their Miller versions on their own cadences, so the version on your system (as shown by `mlr --version`) might be best-served by its respective documentation version.
-* Lastly, new with the Miller 6 documents is a very easy-to-access **Search field** at the top of each page.
+* The _Reference_ section aims to answer all questions that the previous sections didn't. The discussion is **concept-driven**, although it includes numerous examples throughout for concreteness.
+    * The main reference carefully examines various aspects of Miller, concept by concept.
+    * The _DSL reference_ focuses on the [Miller programming language](miller-programming-language.md), again following a concept-at-a-time approach.
+    * The _miscellaneous reference_ is aptly named, with things like build-from-source notes.
+    * _Documents for previous releases_ is not only for historical curiosity -- experience has shown that various Linux/BSD distros update their Miller versions on their own cadences, so the version on your system (as shown by `mlr --version`) might be best served by its respective documentation version.
+* Lastly, new with the Miller 6 documents is an easy-to-access **Search field** at the top of each page.
diff --git a/docs/src/swipes.sh b/docs/src/swipes.sh
new file mode 100755
index 000000000..f5f1064f2
--- /dev/null
+++ b/docs/src/swipes.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+for x in *.md.in; do
+    sed -i .emd 's/  *$//' $x
+    rm $x.emd
+done
diff --git a/docs/src/unix-toolkit-context.md b/docs/src/unix-toolkit-context.md
index 1687f4868..ffc8ede78 100644
--- a/docs/src/unix-toolkit-context.md
+++ b/docs/src/unix-toolkit-context.md
@@ -63,9 +63,9 @@ Likewise with `mlr sort`, `mlr tac`, and so on.
 
 ## awk-like features: mlr filter and mlr put
 
-* `mlr filter` includes/excludes records based on a filter expression, e.g. `mlr filter '$count > 10'`.
+* `mlr filter` includes/excludes records based on a filter expression, e.g., `mlr filter '$count > 10'`.
 
-* `mlr put` adds a new field as a function of others, e.g. `mlr put '$xy = $x * $y'` or `mlr put '$counter = NR'`.
+* `mlr put` adds a new field as a function of others, e.g., `mlr put '$xy = $x * $y'` or `mlr put '$counter = NR'`.
 
 * The `$name` syntax is straight from `awk`'s `$1 $2 $3` (adapted to name-based indexing), as are the variables `FS`, `OFS`, `RS`, `ORS`, `NF`, `NR`, and `FILENAME`. The `ENV[...]` syntax is from Ruby.
 
@@ -73,7 +73,7 @@ Likewise with `mlr sort`, `mlr tac`, and so on.
 
 * Like `awk`, Miller (as of v5.0.0) allows you to define new functions within its `put` and `filter` expression language.  Further programmability comes from chaining with `then`.
 
-* As with `awk`, `$`-variables are stream variables and all verbs (such as `cut`, `stats1`, `put`, etc.) as well as `put`/`filter` statements operate on streams.  This means that you define actions to be done on each record and then stream your data through those actions.  The built-in variables `NF`, `NR`, etc.  change from one record to another, `$x` is a label for field `x` in the current record, and the input to `sqrt($x)` changes from one record to the next.  The expression language for the `put` and `filter` verbs additionally allows you to define `begin {...}` and `end {...}` blocks for actions to be taken before and after records are processed, respectively.
+* As with `awk`, `$`-variables are stream variables and all verbs (such as `cut`, `stats1`, `put`, etc.) as well as `put`/`filter` statements operate on streams.  This means that you define actions to be done on each record and then stream your data through those actions.  The built-in variables `NF`, `NR`, etc.,  change from one record to another, `$x` is a label for field `x` in the current record, and the input to `sqrt($x)` changes from one record to the next.  The expression language for the `put` and `filter` verbs additionally allows you to define `begin {...}` and `end {...}` blocks for actions to be taken before and after records are processed, respectively.
 
 * As with `awk`, Miller's `put`/`filter` language lets you set `@sum=0` before records are read, then update that sum on each record, then print its value at the end.  Unlike `awk`, Miller makes syntactically explicit the difference between variables with extent across all records (names starting with `@`, such as `@sum`) and variables which are local to the current expression invocation (names starting without `@`, such as `sum`).
 
diff --git a/docs/src/unix-toolkit-context.md.in b/docs/src/unix-toolkit-context.md.in
index bea7b27f3..14da2d777 100644
--- a/docs/src/unix-toolkit-context.md.in
+++ b/docs/src/unix-toolkit-context.md.in
@@ -26,9 +26,9 @@ Likewise with `mlr sort`, `mlr tac`, and so on.
 
 ## awk-like features: mlr filter and mlr put
 
-* `mlr filter` includes/excludes records based on a filter expression, e.g. `mlr filter '$count > 10'`.
+* `mlr filter` includes/excludes records based on a filter expression, e.g., `mlr filter '$count > 10'`.
 
-* `mlr put` adds a new field as a function of others, e.g. `mlr put '$xy = $x * $y'` or `mlr put '$counter = NR'`.
+* `mlr put` adds a new field as a function of others, e.g., `mlr put '$xy = $x * $y'` or `mlr put '$counter = NR'`.
 
 * The `$name` syntax is straight from `awk`'s `$1 $2 $3` (adapted to name-based indexing), as are the variables `FS`, `OFS`, `RS`, `ORS`, `NF`, `NR`, and `FILENAME`. The `ENV[...]` syntax is from Ruby.
 
@@ -36,7 +36,7 @@ Likewise with `mlr sort`, `mlr tac`, and so on.
 
 * Like `awk`, Miller (as of v5.0.0) allows you to define new functions within its `put` and `filter` expression language.  Further programmability comes from chaining with `then`.
 
-* As with `awk`, `$`-variables are stream variables and all verbs (such as `cut`, `stats1`, `put`, etc.) as well as `put`/`filter` statements operate on streams.  This means that you define actions to be done on each record and then stream your data through those actions.  The built-in variables `NF`, `NR`, etc.  change from one record to another, `$x` is a label for field `x` in the current record, and the input to `sqrt($x)` changes from one record to the next.  The expression language for the `put` and `filter` verbs additionally allows you to define `begin {...}` and `end {...}` blocks for actions to be taken before and after records are processed, respectively.
+* As with `awk`, `$`-variables are stream variables and all verbs (such as `cut`, `stats1`, `put`, etc.) as well as `put`/`filter` statements operate on streams.  This means that you define actions to be done on each record and then stream your data through those actions.  The built-in variables `NF`, `NR`, etc.,  change from one record to another, `$x` is a label for field `x` in the current record, and the input to `sqrt($x)` changes from one record to the next.  The expression language for the `put` and `filter` verbs additionally allows you to define `begin {...}` and `end {...}` blocks for actions to be taken before and after records are processed, respectively.
 
 * As with `awk`, Miller's `put`/`filter` language lets you set `@sum=0` before records are read, then update that sum on each record, then print its value at the end.  Unlike `awk`, Miller makes syntactically explicit the difference between variables with extent across all records (names starting with `@`, such as `@sum`) and variables which are local to the current expression invocation (names starting without `@`, such as `sum`).
 
diff --git a/docs/src/why.md b/docs/src/why.md
index 937bd0386..aa00458be 100644
--- a/docs/src/why.md
+++ b/docs/src/why.md
@@ -20,44 +20,44 @@ Someone asked me the other day about design, tradeoffs, thought process, why I f
 
 ## Who is Miller for?
 
-For background, I'm a software engineer, with a heavy devops bent and a non-trivial amount of data-engineering in my career. **Initially I wrote Miller mainly for myself:** I'm coder-friendly (being a coder); I'm Github-friendly; most of my data are well-structured or easily structurable (TSV-formatted SQL-query output, CSV files, log files, JSON data structures); I care about interoperability between all the various formats Miller supports (I've encountered them all); I do all my work on Linux or OS X.
+For background, I'm a software engineer with a heavy devops bent and a non-trivial amount of data engineering in my career. **Initially, I wrote Miller mainly for myself:** I'm coder-friendly (being a coder); I'm Github-friendly; most of my data is either well-structured or easily structurable (TSV-formatted SQL-query output, CSV files, log files, JSON data structures); I care about interoperability between all the various formats Miller supports (I've encountered them all); I do all my work on Linux or OS X.
 
-But now there's this neat little tool **which seems to be useful for people in various disciplines**. I don't even know entirely *who*. I can click through Github starrers and read a bit about what they seem to do, but not everyone that uses Miller is even *on* Github (or stars things). I've gotten a lot of feature requests through Github -- but only from people who are Github users.  Not everyone's a coder (it seems like a lot of Miller's Github starrers are devops folks like myself, or data-science-ish people, or biology/genomics folks.) A lot of people care 100% about CSV. And so on.
+But now there's this neat little tool **which seems to be useful for people in various disciplines**. I don't even know entirely *who*. I can click through Github starrers and read a bit about what they seem to do, but not everyone who uses Miller is even *on* Github (or stars things). I've gotten a lot of feature requests through Github -- but only from people who are Github users.  Not everyone's a coder (it seems like many of Miller's Github starrers are devops folks like myself, or data-science-ish people, or biology/genomics folks.) A lot of people care 100% about CSV. And so on.
 
-So the reason for the [Miller User Survey](https://github.com/johnkerl/miller/discussions/542) is to answer questions such as: does Miller do what you need? Do you use it for all sorts of things, or just one or two nice things? Are there things you wish it did but it doesn't? Is it almost there, or just nowhere near what you want? Are there not enough features or way too many? Are the docs too complicated; do you have a hard time finding out how to do what you want? Should I think differently about what this tool even *is* in the first place? Should I think differently about who it's for?
+So the reason for the [Miller User Survey](https://github.com/johnkerl/miller/discussions/542) is to answer questions such as: does Miller do what you need? Do you use it for all sorts of things, or just one or two nice things? Are there things you wish it did, but it doesn't? Is it almost there, or just nowhere near what you want? Are there not enough features or way too many? Are the docs too complicated? Do you have a hard time finding out how to do what you want? Should I think differently about what this tool even *is* in the first place? Should I think differently about who it's for?
 
 ## What was Miller created to do?
 
-First: there are tools like `xsv` which handles CSV marvelously and `jq` which handles JSON marvelously, and so on -- but I over the years of my career in the software industry I've found myself, and others, doing a lot of ad-hoc things which really were fundamentally the same *except* for format. So the number one thing about Miller is doing common things while supporting **multiple formats**: (a) ingest a list of records where a record is a list of key-value pairs (however represented in the input files); (b) transform that stream of records; (c) emit the transformed stream -- either in the same format as input, or in a different format.
+The first thing: there are tools like `xsv` which handles CSV marvelously and `jq` which handles JSON marvelously, and so on -- but I over the years of my career in the software industry I've found myself, and others, doing a lot of ad-hoc things which were fundamentally the same *except* for format. So the number one thing about Miller is doing common things while supporting **multiple formats**: (a) ingest a list of records where a record is a list of key-value pairs (however represented in the input files); (b) transform that stream of records; (c) emit the transformed stream -- either in the same format as input, or in a different format.
 
-Second thing, a lot like the first: just as I didn't want to build something only for a single file format, I didn't want to build something only for one problem domain. In my work doing software engineering, devops, data engineering, etc. I saw a lot of commonalities and I wanted to **solve as many problems simultaneously as possible**.
+The second thing is a lot like the first: just as I didn't want to build something only for a single file format, I didn't want to build something only for one problem domain. In my work doing software engineering, devops, data engineering, etc. I saw a lot of commonalities, and I wanted to **solve as many problems simultaneously as possible**.
 
-Third: it had to be **streaming**. As time goes by and we (some of us, sometimes) have machines with tens or hundreds of GB of RAM, it's maybe less important, but I'm unhappy with tools which ingest all data, then do stuff, then emit all data. One reason is to be able to handle files bigger than available RAM. Another reason is to be able to handle input which trickles in, e.g.  you have some process emitting data now and then and you can pipe it to Miller and it will emit transformed records one at a time.
+Third: it had to be **streaming**. As time goes by and we (some of us, sometimes) have machines with tens or hundreds of GB of RAM, it's less important, but I'm unhappy with tools that ingest all data, then do stuff, then emit all data. One reason is to be able to handle files bigger than available RAM. Another reason is to be able to handle input which trickles in, e.g., you have some process emitting data now and then, and you can pipe it to Miller and it will emit transformed records one at a time.
 
-Fourth: it had to be **fast**. This precludes all sorts of very nice things written in Ruby, for example. I love Ruby as a very expressive language, and I have several very useful little utility scripts written in Ruby. But a few years ago I ported over some of my old tried-and-true C programs and the lines-of-code count was a *lot* lower -- it was great! Until I ran them on multi-GB files and realized they took 60x as long to complete.  So I couldn't write Miller in Ruby, or in languages like it. I was going to have to do something in a low-level language in order to make it performant.
+Fourth: it had to be **fast**. This precludes all sorts of very nice things written in Ruby, for example. I love Ruby as a very expressive language, and I have several very useful little utility scripts written in Ruby. But a few years ago, I ported over some of my old tried-and-true C programs and the lines-of-code count was a *lot* lower -- it was great! Until I ran them on multi-GB files and realized they took 60x as long to complete.  So I couldn't write Miller in Ruby, or languages like it. I was going to have to do something in a low-level language in order to make it performant.
 
-Fifth thing: I wanted Miller to be **pipe-friendly and interoperate with other command-line tools**.  Since the basic paradigm is ingest records, transform records, emit records -- where the input and output formats can be the same or different, and the transform can be complex, or just pass-through -- this means you can use it to transform data, or re-format it, or both. So if you just want to do data-cleaning/prep/formatting and do all the "real" work in R, you can. If you just want a little glue script between other tools you can get that. And if you want to do non-trivial data-reduction in Miller you can.
+The fifth thing: I wanted Miller to be **pipe-friendly and interoperate with other command-line tools**.  Since the basic paradigm is ingest records, transform records, emit records -- where the input and output formats can be the same or different, and the transform can be complex, or just pass-through -- this means you can use it to transform data, or re-format it, or both. So if you just want to do data-cleaning/prep/formatting and do all the "real" work in R, you can. If you want a little glue script between other tools, you can get that. And if you want to do non-trivial data-reduction in Miller, you can.
 
-Sixth thing: Must have **comprehensive documentation and unit-test**. Since Miller handles a lot of formats and solves a lot of problems, there's a lot to test and a lot to keep working correctly as I add features or optimize. And I wanted it to be able to explain itself -- not only through web docs like the one you're reading but also through `man mlr` and `mlr --help`, `mlr sort --help`, etc.
+Sixth thing: Must have **comprehensive documentation and unit-test**. Since Miller handles a wide range of formats and solves numerous problems, there's a lot to test and a lot to keep working correctly as I add features or optimize. And I wanted it to be able to explain itself -- not only through web docs like the one you're reading but also through `man mlr` and `mlr --help`, `mlr sort --help`, etc.
 
-Seventh thing: **Must have a domain-specific language** (DSL) **but also must let you do common things without it**. All those little verbs Miller has to help you *avoid* having to write for-loops are great. I use them for keystroke-saving: `mlr stats1 -a mean,stddev,min,max -f quantity`, for example, without you having to write for-loops or define accumulator variables. But you also have to be able to break out of that and write arbitrary code when you want to: `mlr put '$distance = $rate * $time'` or anything else you can think up. In Perl/AWK/etc.  it's all DSL. In xsv et al.  it's all verbs. In Miller I like having the combination.
+Seventh thing: **Must have a domain-specific language** (DSL) **but also must let you do everyday things without it**. All those little verbs Miller has to help you *avoid* having to write for-loops are great. I use them for keystroke-saving: `mlr stats1 -a mean,stddev,min,max -f quantity`, for example, without you having to write for-loops or define accumulator variables. But you also have to be able to break out of that and write arbitrary code when you want to: `mlr put '$distance = $rate * $time'` or anything else you can think up. In Perl/AWK/etc.  it's all DSL. In xsv et al.  it's all verbs. In Miller, I like having the combination.
 
-Eighth thing: It's an **awful lot of fun to write**. In my experience I didn't find any tools which do multi-format, streaming, efficient, multi-purpose, with DSL and non-DSL, so I wrote one. But I don't guarantee it's unique in the world. It fills a niche in the world (people use it) but it also fills a niche in my life.
+Eighth thing: It's an **awful lot of fun to write**. In my experience, I didn't find any tools that do multi-format, streaming, efficient, multi-purpose, with DSL and non-DSL, so I wrote one. But I don't guarantee it's unique in the world. It fills a niche in the world (people use it), but it also fills a niche in my life.
 
 ## Tradeoffs
 
-Miller is command-line-only by design. People who want a graphical user interface won't find it here.  This is in part (a) accommodating my personal preferences, and in part (b) guided by my experience/belief that the command line is very expressive. Steeper learning curve than a GUI, yes. I consider that price worth paying for the tool-niche which Miller occupies.
+Miller is command-line-only by design. People who want a graphical user interface won't find it here.  This is in part (a) accommodating my personal preferences, and in part (b) guided by my experience/belief that the command line is very expressive. Steeper learning curve than a GUI, yes. That price is worth paying for the tool-niche which Miller occupies.
 
-Another tradeoff: supporting lists of records keeps me supporting only what can be expressed in *all* of those formats. For example, `[1,2,3,4,5]` is valid but unmillerable JSON: the list elements are not records.  So Miller can't (and won't) handle arbitrary JSON -- because Miller only handles tabular data which can be expressed in a variety of formats.
+Another tradeoff: supporting lists of records keeps me supporting only what can be expressed in *all* of those formats. For example, `[1,2,3,4,5]` is valid but unmillerable JSON: the list elements are not records.  So Miller can't (and won't) handle arbitrary JSON -- because Miller only handles tabular data, which can be expressed in a variety of formats.
 
-A third tradeoff is doing build-from-scratch in a low-level language. It'd be quicker to write (but slower to run) if written in a high-level language. If Miller were written in Python, it would be implemented in significantly fewer lines of code than its current Go implementation. The DSL would just be an `eval` of Python code. And it would run slower, but maybe not enough slower to be a problem for most folks. Later I found out about the [rows](https://github.com/turicas/rows) tool -- if you find Miller useful, you should check out `rows` as well.
+A third tradeoff is building from scratch in a low-level language. It'd be quicker to write (but slower to run) if written in a high-level language. If Miller were written in Python, it would be implemented in significantly fewer lines of code than its current Go implementation. The DSL would be an `eval` of Python code. And it would run slower, but maybe not slow enough to be a problem for most people. Later, I discovered the [rows](https://github.com/turicas/rows) tool -- if you find Miller useful, you should also check out `rows`.
 
-A fourth tradeoff is in the DSL (more visibly so in 5.0.0 but already in pre-5.0.0): how much to make it dynamically typed -- so you can just say `y=x+1` with a minimum number of keystrokes -- vs. having it do a good job of telling you when you've made a typo. This is a common paradigm across *all* languages.  Some like Ruby you don't declare anything and they're quick to code little stuff in but programs of even a few thousand lines (which isn't large in the software world) become insanely unmanageable.  Then, Java at the other extreme, does scale and is very typesafe -- but you have to type in a lot of punctuation, angle brackets, datatypes, repetition, etc. just to be able to get anything done. And some in the middle like Go are typesafe but with type-inference which aim to do the best of both. In the Miller (5.0.0) DSL you get `y=x+1` by default but you can have things like `int y = x+1` etc. so the typesafety is opt-in. See also the [Type-checking page](reference-dsl-variables.md#type-checking) for more information on this.
+A fourth tradeoff is in the DSL (more visibly so in 5.0.0 but already in pre-5.0.0): how much to make it dynamically typed -- so you can just say `y=x+1` with a minimum number of keystrokes -- vs. having it do a good job of telling you when you've made a typo. This is a common paradigm across *all* languages.  In some languages, like Ruby, you don't declare anything, and they're quick to code little stuff in, but programs of even a few thousand lines (which isn't large in the software world) become insanely unmanageable.  Then, Java at the other extreme, does scale and is very typesafe -- but you have to type in a lot of punctuation, angle brackets, datatypes, repetition, etc., just to be able to get anything done. And some in the middle, like Go, are typesafe but with type inference, which aim to do the best of both. In the Miller (5.0.0) DSL, you get `y=x+1` by default, but you can have things like `int y = x+1` etc., so the typesafety is opt-in. See also the [Type-checking page](reference-dsl-variables.md#type-checking) for more information on this.
 
 ## Related tools
 
-Here's a comprehensive list: [https://github.com/dbohdan/structured-text-tools](https://github.com/dbohdan/structured-text-tools). It doesn't mention [rows](https://github.com/turicas/rows) so here's a plug for that as well.
+Here's a comprehensive list: [https://github.com/dbohdan/structured-text-tools](https://github.com/dbohdan/structured-text-tools). It doesn't mention [rows](https://github.com/turicas/rows), so here's a plug for that as well.
 
 ## Moving forward
 
-I originally aimed Miller at people who already know what `sed`/`awk`/`cut`/`sort`/`join` are and wanted some options. But as time goes by I realize that tools like this can be useful to folks who *don't* know what those things are; people who aren't primarily coders; people who are scientists, or data scientists. These days some journalists do data analysis.  So moving forward in terms of docs, I am working on having more cookbook, follow-by-example stuff in addition to the existing language-reference kinds of stuff.  And continuing to seek out input from people who use Miller on where to go next.
+I initially aimed Miller at people who already know what `sed`/`awk`/`cut`/`sort`/`join` are and wanted some options. But as time goes by, I realize that tools like this can be helpful to folks who *don't* know what those things are; people who aren't primarily coders; people who are scientists, or data scientists. These days some journalists do data analysis. Moving forward in terms of docs, I am working on having more cookbook, follow-by-example stuff in addition to the existing language-reference kinds of stuff.  And continuing to seek out input from people who use Miller on where to go next.
diff --git a/docs/src/why.md.in b/docs/src/why.md.in
index e33529ba2..3e176a460 100644
--- a/docs/src/why.md.in
+++ b/docs/src/why.md.in
@@ -4,44 +4,44 @@ Someone asked me the other day about design, tradeoffs, thought process, why I f
 
 ## Who is Miller for?
 
-For background, I'm a software engineer, with a heavy devops bent and a non-trivial amount of data-engineering in my career. **Initially I wrote Miller mainly for myself:** I'm coder-friendly (being a coder); I'm Github-friendly; most of my data are well-structured or easily structurable (TSV-formatted SQL-query output, CSV files, log files, JSON data structures); I care about interoperability between all the various formats Miller supports (I've encountered them all); I do all my work on Linux or OS X.
+For background, I'm a software engineer with a heavy devops bent and a non-trivial amount of data engineering in my career. **Initially, I wrote Miller mainly for myself:** I'm coder-friendly (being a coder); I'm Github-friendly; most of my data is either well-structured or easily structurable (TSV-formatted SQL-query output, CSV files, log files, JSON data structures); I care about interoperability between all the various formats Miller supports (I've encountered them all); I do all my work on Linux or OS X.
 
-But now there's this neat little tool **which seems to be useful for people in various disciplines**. I don't even know entirely *who*. I can click through Github starrers and read a bit about what they seem to do, but not everyone that uses Miller is even *on* Github (or stars things). I've gotten a lot of feature requests through Github -- but only from people who are Github users.  Not everyone's a coder (it seems like a lot of Miller's Github starrers are devops folks like myself, or data-science-ish people, or biology/genomics folks.) A lot of people care 100% about CSV. And so on.
+But now there's this neat little tool **which seems to be useful for people in various disciplines**. I don't even know entirely *who*. I can click through Github starrers and read a bit about what they seem to do, but not everyone who uses Miller is even *on* Github (or stars things). I've gotten a lot of feature requests through Github -- but only from people who are Github users.  Not everyone's a coder (it seems like many of Miller's Github starrers are devops folks like myself, or data-science-ish people, or biology/genomics folks.) A lot of people care 100% about CSV. And so on.
 
-So the reason for the [Miller User Survey](https://github.com/johnkerl/miller/discussions/542) is to answer questions such as: does Miller do what you need? Do you use it for all sorts of things, or just one or two nice things? Are there things you wish it did but it doesn't? Is it almost there, or just nowhere near what you want? Are there not enough features or way too many? Are the docs too complicated; do you have a hard time finding out how to do what you want? Should I think differently about what this tool even *is* in the first place? Should I think differently about who it's for?
+So the reason for the [Miller User Survey](https://github.com/johnkerl/miller/discussions/542) is to answer questions such as: does Miller do what you need? Do you use it for all sorts of things, or just one or two nice things? Are there things you wish it did, but it doesn't? Is it almost there, or just nowhere near what you want? Are there not enough features or way too many? Are the docs too complicated? Do you have a hard time finding out how to do what you want? Should I think differently about what this tool even *is* in the first place? Should I think differently about who it's for?
 
 ## What was Miller created to do?
 
-First: there are tools like `xsv` which handles CSV marvelously and `jq` which handles JSON marvelously, and so on -- but I over the years of my career in the software industry I've found myself, and others, doing a lot of ad-hoc things which really were fundamentally the same *except* for format. So the number one thing about Miller is doing common things while supporting **multiple formats**: (a) ingest a list of records where a record is a list of key-value pairs (however represented in the input files); (b) transform that stream of records; (c) emit the transformed stream -- either in the same format as input, or in a different format.
+The first thing: there are tools like `xsv` which handles CSV marvelously and `jq` which handles JSON marvelously, and so on -- but I over the years of my career in the software industry I've found myself, and others, doing a lot of ad-hoc things which were fundamentally the same *except* for format. So the number one thing about Miller is doing common things while supporting **multiple formats**: (a) ingest a list of records where a record is a list of key-value pairs (however represented in the input files); (b) transform that stream of records; (c) emit the transformed stream -- either in the same format as input, or in a different format.
 
-Second thing, a lot like the first: just as I didn't want to build something only for a single file format, I didn't want to build something only for one problem domain. In my work doing software engineering, devops, data engineering, etc. I saw a lot of commonalities and I wanted to **solve as many problems simultaneously as possible**.
+The second thing is a lot like the first: just as I didn't want to build something only for a single file format, I didn't want to build something only for one problem domain. In my work doing software engineering, devops, data engineering, etc. I saw a lot of commonalities, and I wanted to **solve as many problems simultaneously as possible**.
 
-Third: it had to be **streaming**. As time goes by and we (some of us, sometimes) have machines with tens or hundreds of GB of RAM, it's maybe less important, but I'm unhappy with tools which ingest all data, then do stuff, then emit all data. One reason is to be able to handle files bigger than available RAM. Another reason is to be able to handle input which trickles in, e.g.  you have some process emitting data now and then and you can pipe it to Miller and it will emit transformed records one at a time.
+Third: it had to be **streaming**. As time goes by and we (some of us, sometimes) have machines with tens or hundreds of GB of RAM, it's less important, but I'm unhappy with tools that ingest all data, then do stuff, then emit all data. One reason is to be able to handle files bigger than available RAM. Another reason is to be able to handle input which trickles in, e.g., you have some process emitting data now and then, and you can pipe it to Miller and it will emit transformed records one at a time.
 
-Fourth: it had to be **fast**. This precludes all sorts of very nice things written in Ruby, for example. I love Ruby as a very expressive language, and I have several very useful little utility scripts written in Ruby. But a few years ago I ported over some of my old tried-and-true C programs and the lines-of-code count was a *lot* lower -- it was great! Until I ran them on multi-GB files and realized they took 60x as long to complete.  So I couldn't write Miller in Ruby, or in languages like it. I was going to have to do something in a low-level language in order to make it performant.
+Fourth: it had to be **fast**. This precludes all sorts of very nice things written in Ruby, for example. I love Ruby as a very expressive language, and I have several very useful little utility scripts written in Ruby. But a few years ago, I ported over some of my old tried-and-true C programs and the lines-of-code count was a *lot* lower -- it was great! Until I ran them on multi-GB files and realized they took 60x as long to complete.  So I couldn't write Miller in Ruby, or languages like it. I was going to have to do something in a low-level language in order to make it performant.
 
-Fifth thing: I wanted Miller to be **pipe-friendly and interoperate with other command-line tools**.  Since the basic paradigm is ingest records, transform records, emit records -- where the input and output formats can be the same or different, and the transform can be complex, or just pass-through -- this means you can use it to transform data, or re-format it, or both. So if you just want to do data-cleaning/prep/formatting and do all the "real" work in R, you can. If you just want a little glue script between other tools you can get that. And if you want to do non-trivial data-reduction in Miller you can.
+The fifth thing: I wanted Miller to be **pipe-friendly and interoperate with other command-line tools**.  Since the basic paradigm is ingest records, transform records, emit records -- where the input and output formats can be the same or different, and the transform can be complex, or just pass-through -- this means you can use it to transform data, or re-format it, or both. So if you just want to do data-cleaning/prep/formatting and do all the "real" work in R, you can. If you want a little glue script between other tools, you can get that. And if you want to do non-trivial data-reduction in Miller, you can.
 
-Sixth thing: Must have **comprehensive documentation and unit-test**. Since Miller handles a lot of formats and solves a lot of problems, there's a lot to test and a lot to keep working correctly as I add features or optimize. And I wanted it to be able to explain itself -- not only through web docs like the one you're reading but also through `man mlr` and `mlr --help`, `mlr sort --help`, etc.
+Sixth thing: Must have **comprehensive documentation and unit-test**. Since Miller handles a wide range of formats and solves numerous problems, there's a lot to test and a lot to keep working correctly as I add features or optimize. And I wanted it to be able to explain itself -- not only through web docs like the one you're reading but also through `man mlr` and `mlr --help`, `mlr sort --help`, etc.
 
-Seventh thing: **Must have a domain-specific language** (DSL) **but also must let you do common things without it**. All those little verbs Miller has to help you *avoid* having to write for-loops are great. I use them for keystroke-saving: `mlr stats1 -a mean,stddev,min,max -f quantity`, for example, without you having to write for-loops or define accumulator variables. But you also have to be able to break out of that and write arbitrary code when you want to: `mlr put '$distance = $rate * $time'` or anything else you can think up. In Perl/AWK/etc.  it's all DSL. In xsv et al.  it's all verbs. In Miller I like having the combination.
+Seventh thing: **Must have a domain-specific language** (DSL) **but also must let you do everyday things without it**. All those little verbs Miller has to help you *avoid* having to write for-loops are great. I use them for keystroke-saving: `mlr stats1 -a mean,stddev,min,max -f quantity`, for example, without you having to write for-loops or define accumulator variables. But you also have to be able to break out of that and write arbitrary code when you want to: `mlr put '$distance = $rate * $time'` or anything else you can think up. In Perl/AWK/etc.  it's all DSL. In xsv et al.  it's all verbs. In Miller, I like having the combination.
 
-Eighth thing: It's an **awful lot of fun to write**. In my experience I didn't find any tools which do multi-format, streaming, efficient, multi-purpose, with DSL and non-DSL, so I wrote one. But I don't guarantee it's unique in the world. It fills a niche in the world (people use it) but it also fills a niche in my life.
+Eighth thing: It's an **awful lot of fun to write**. In my experience, I didn't find any tools that do multi-format, streaming, efficient, multi-purpose, with DSL and non-DSL, so I wrote one. But I don't guarantee it's unique in the world. It fills a niche in the world (people use it), but it also fills a niche in my life.
 
 ## Tradeoffs
 
-Miller is command-line-only by design. People who want a graphical user interface won't find it here.  This is in part (a) accommodating my personal preferences, and in part (b) guided by my experience/belief that the command line is very expressive. Steeper learning curve than a GUI, yes. I consider that price worth paying for the tool-niche which Miller occupies.
+Miller is command-line-only by design. People who want a graphical user interface won't find it here.  This is in part (a) accommodating my personal preferences, and in part (b) guided by my experience/belief that the command line is very expressive. Steeper learning curve than a GUI, yes. That price is worth paying for the tool-niche which Miller occupies.
 
-Another tradeoff: supporting lists of records keeps me supporting only what can be expressed in *all* of those formats. For example, `[1,2,3,4,5]` is valid but unmillerable JSON: the list elements are not records.  So Miller can't (and won't) handle arbitrary JSON -- because Miller only handles tabular data which can be expressed in a variety of formats.
+Another tradeoff: supporting lists of records keeps me supporting only what can be expressed in *all* of those formats. For example, `[1,2,3,4,5]` is valid but unmillerable JSON: the list elements are not records.  So Miller can't (and won't) handle arbitrary JSON -- because Miller only handles tabular data, which can be expressed in a variety of formats.
 
-A third tradeoff is doing build-from-scratch in a low-level language. It'd be quicker to write (but slower to run) if written in a high-level language. If Miller were written in Python, it would be implemented in significantly fewer lines of code than its current Go implementation. The DSL would just be an `eval` of Python code. And it would run slower, but maybe not enough slower to be a problem for most folks. Later I found out about the [rows](https://github.com/turicas/rows) tool -- if you find Miller useful, you should check out `rows` as well.
+A third tradeoff is building from scratch in a low-level language. It'd be quicker to write (but slower to run) if written in a high-level language. If Miller were written in Python, it would be implemented in significantly fewer lines of code than its current Go implementation. The DSL would be an `eval` of Python code. And it would run slower, but maybe not slow enough to be a problem for most people. Later, I discovered the [rows](https://github.com/turicas/rows) tool -- if you find Miller useful, you should also check out `rows`.
 
-A fourth tradeoff is in the DSL (more visibly so in 5.0.0 but already in pre-5.0.0): how much to make it dynamically typed -- so you can just say `y=x+1` with a minimum number of keystrokes -- vs. having it do a good job of telling you when you've made a typo. This is a common paradigm across *all* languages.  Some like Ruby you don't declare anything and they're quick to code little stuff in but programs of even a few thousand lines (which isn't large in the software world) become insanely unmanageable.  Then, Java at the other extreme, does scale and is very typesafe -- but you have to type in a lot of punctuation, angle brackets, datatypes, repetition, etc. just to be able to get anything done. And some in the middle like Go are typesafe but with type-inference which aim to do the best of both. In the Miller (5.0.0) DSL you get `y=x+1` by default but you can have things like `int y = x+1` etc. so the typesafety is opt-in. See also the [Type-checking page](reference-dsl-variables.md#type-checking) for more information on this.
+A fourth tradeoff is in the DSL (more visibly so in 5.0.0 but already in pre-5.0.0): how much to make it dynamically typed -- so you can just say `y=x+1` with a minimum number of keystrokes -- vs. having it do a good job of telling you when you've made a typo. This is a common paradigm across *all* languages.  In some languages, like Ruby, you don't declare anything, and they're quick to code little stuff in, but programs of even a few thousand lines (which isn't large in the software world) become insanely unmanageable.  Then, Java at the other extreme, does scale and is very typesafe -- but you have to type in a lot of punctuation, angle brackets, datatypes, repetition, etc., just to be able to get anything done. And some in the middle, like Go, are typesafe but with type inference, which aim to do the best of both. In the Miller (5.0.0) DSL, you get `y=x+1` by default, but you can have things like `int y = x+1` etc., so the typesafety is opt-in. See also the [Type-checking page](reference-dsl-variables.md#type-checking) for more information on this.
 
 ## Related tools
 
-Here's a comprehensive list: [https://github.com/dbohdan/structured-text-tools](https://github.com/dbohdan/structured-text-tools). It doesn't mention [rows](https://github.com/turicas/rows) so here's a plug for that as well.
+Here's a comprehensive list: [https://github.com/dbohdan/structured-text-tools](https://github.com/dbohdan/structured-text-tools). It doesn't mention [rows](https://github.com/turicas/rows), so here's a plug for that as well.
 
 ## Moving forward
 
-I originally aimed Miller at people who already know what `sed`/`awk`/`cut`/`sort`/`join` are and wanted some options. But as time goes by I realize that tools like this can be useful to folks who *don't* know what those things are; people who aren't primarily coders; people who are scientists, or data scientists. These days some journalists do data analysis.  So moving forward in terms of docs, I am working on having more cookbook, follow-by-example stuff in addition to the existing language-reference kinds of stuff.  And continuing to seek out input from people who use Miller on where to go next.
+I initially aimed Miller at people who already know what `sed`/`awk`/`cut`/`sort`/`join` are and wanted some options. But as time goes by, I realize that tools like this can be helpful to folks who *don't* know what those things are; people who aren't primarily coders; people who are scientists, or data scientists. These days some journalists do data analysis. Moving forward in terms of docs, I am working on having more cookbook, follow-by-example stuff in addition to the existing language-reference kinds of stuff.  And continuing to seek out input from people who use Miller on where to go next.

From 7a6958926d139a3018ec38acccc3d98eabaff493 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Fri, 4 Jul 2025 13:55:56 -0400
Subject: [PATCH 349/456] Miller 6.14.0 (#1828)

---
 docs/src/manpage.md    | 4 ++--
 docs/src/manpage.txt   | 4 ++--
 man/manpage.txt        | 4 ++--
 man/mlr.1              | 6 +++---
 miller.spec            | 5 ++++-
 pkg/version/version.go | 2 +-
 6 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 8f75d568d..3f4b26f43 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -48,7 +48,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        insertion-ordered hash map.  This encompasses a variety of data
        formats, including but not limited to the familiar CSV, TSV, and JSON.
        (Miller can handle positionally-indexed data as a special case.) This
-       manpage documents mlr 6.13.0-dev.
+       manpage documents mlr 6.14.0.
 
 1mEXAMPLES0m
        mlr --icsv --opprint cat example.csv
@@ -3745,5 +3745,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-                                  2025-07-03                         4mMILLER24m(1)
+                                  2025-07-04                         4mMILLER24m(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 78358a341..198610def 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -27,7 +27,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.13.0-dev. + manpage documents mlr 6.14.0. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv @@ -3724,4 +3724,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-07-03 4mMILLER24m(1) + 2025-07-04 4mMILLER24m(1) diff --git a/man/manpage.txt b/man/manpage.txt index 78358a341..198610def 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -27,7 +27,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.13.0-dev. + manpage documents mlr 6.14.0. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv @@ -3724,4 +3724,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-07-03 4mMILLER24m(1) + 2025-07-04 4mMILLER24m(1) diff --git a/man/mlr.1 b/man/mlr.1 index 028f696f4..a361b0c9c 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2025-07-03 +.\" Date: 2025-07-04 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2025-07-03" "\ \&" "\ \&" +.TH "MILLER" "1" "2025-07-04" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -47,7 +47,7 @@ on integer-indexed fields: if the natural data structure for the latter is the array, then Miller's natural data structure is the insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as -a special case.) This manpage documents mlr 6.13.0-dev. +a special case.) This manpage documents mlr 6.14.0. .SH "EXAMPLES" .sp diff --git a/miller.spec b/miller.spec index 77b5d9012..3a43b6426 100644 --- a/miller.spec +++ b/miller.spec @@ -1,6 +1,6 @@ Summary: Name-indexed data processing tool Name: miller -Version: 6.13.0 +Version: 6.14.0 Release: 1%{?dist} License: BSD Source: https://github.com/johnkerl/miller/releases/download/%{version}/miller-%{version}.tar.gz @@ -36,6 +36,9 @@ make install %{_mandir}/man1/mlr.1* %changelog +* Fri Jul 4 2025 John Kerl - 6.14.0-1 +- 6.14.0 release + * Sat Oct 5 2024 John Kerl - 6.13.0-1 - 6.13.0 release diff --git a/pkg/version/version.go b/pkg/version/version.go index 1a6792614..4654d775d 100644 --- a/pkg/version/version.go +++ b/pkg/version/version.go @@ -4,4 +4,4 @@ package version // Nominally things like "6.0.0" for a release, then "6.0.0-dev" in between. // This makes it clear that a given build is on the main dev branch, not a // particular snapshot tag. -var STRING string = "6.13.0-dev" +var STRING string = "6.14.0" From fefb3046509d7eac385ae42fe091b6e30d8bfea8 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 4 Jul 2025 14:15:59 -0400 Subject: [PATCH 350/456] Update release docs on xattr trick for MacOS --- docs/src/how-to-release.md | 2 +- docs/src/how-to-release.md.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/how-to-release.md b/docs/src/how-to-release.md index 58a445f8d..b19529094 100644 --- a/docs/src/how-to-release.md +++ b/docs/src/how-to-release.md @@ -48,7 +48,7 @@ In this example I am using version 6.2.0 to 6.3.0; of course that will change fo * Thanks to [PR 822](https://github.com/johnkerl/miller/pull/822) which introduces [goreleaser](https://github.com/johnkerl/miller/blob/main/.goreleaser.yml) there are versions for many platforms auto-built and auto-attached to the GitHub release. * Attach the release tarball and SRPM. Double-check assets were successfully uploaded. * Publish the release in pre-release mode, until all CI jobs finish successfully. Note that gorelease will create and attach the rest of the binaries. - * Before marking the release as public, download an executable from among the generated binaries and make sure its `mlr version` prints what you expect -- else, restart this process. + * Before marking the release as public, download an executable from among the generated binaries and make sure its `mlr version` prints what you expect -- else, restart this process. MacOS: `xattr -d com.apple.quarantine ./mlr` first. * Then mark the release as public. * Build the release-specific docs: diff --git a/docs/src/how-to-release.md.in b/docs/src/how-to-release.md.in index e96010f36..522cdbfa9 100644 --- a/docs/src/how-to-release.md.in +++ b/docs/src/how-to-release.md.in @@ -32,7 +32,7 @@ In this example I am using version 6.2.0 to 6.3.0; of course that will change fo * Thanks to [PR 822](https://github.com/johnkerl/miller/pull/822) which introduces [goreleaser](https://github.com/johnkerl/miller/blob/main/.goreleaser.yml) there are versions for many platforms auto-built and auto-attached to the GitHub release. * Attach the release tarball and SRPM. Double-check assets were successfully uploaded. * Publish the release in pre-release mode, until all CI jobs finish successfully. Note that gorelease will create and attach the rest of the binaries. - * Before marking the release as public, download an executable from among the generated binaries and make sure its `mlr version` prints what you expect -- else, restart this process. + * Before marking the release as public, download an executable from among the generated binaries and make sure its `mlr version` prints what you expect -- else, restart this process. MacOS: `xattr -d com.apple.quarantine ./mlr` first. * Then mark the release as public. * Build the release-specific docs: From 0ba6710a798178d6e713910c5b416b49e92fddaf Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 4 Jul 2025 15:10:26 -0400 Subject: [PATCH 351/456] Update `main` version to 6.14.0-dev --- docs/src/manpage.md | 2 +- docs/src/manpage.txt | 2 +- man/manpage.txt | 2 +- man/mlr.1 | 2 +- pkg/version/version.go | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 3f4b26f43..79b2c5a0e 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -48,7 +48,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.14.0. + manpage documents mlr 6.14.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 198610def..6cd711699 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -27,7 +27,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.14.0. + manpage documents mlr 6.14.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/man/manpage.txt b/man/manpage.txt index 198610def..6cd711699 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -27,7 +27,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.14.0. + manpage documents mlr 6.14.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/man/mlr.1 b/man/mlr.1 index a361b0c9c..23b6162f0 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -47,7 +47,7 @@ on integer-indexed fields: if the natural data structure for the latter is the array, then Miller's natural data structure is the insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as -a special case.) This manpage documents mlr 6.14.0. +a special case.) This manpage documents mlr 6.14.0-dev. .SH "EXAMPLES" .sp diff --git a/pkg/version/version.go b/pkg/version/version.go index 4654d775d..40cdef3ed 100644 --- a/pkg/version/version.go +++ b/pkg/version/version.go @@ -4,4 +4,4 @@ package version // Nominally things like "6.0.0" for a release, then "6.0.0-dev" in between. // This makes it clear that a given build is on the main dev branch, not a // particular snapshot tag. -var STRING string = "6.14.0" +var STRING string = "6.14.0-dev" From 313731386783fdb1cbe7c26866bb6836bf1fac46 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 4 Jul 2025 15:18:08 -0400 Subject: [PATCH 352/456] Release-specific docs for 6.14.0 --- docs/src/release-docs.md | 1 + docs/src/release-docs.md.in | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/src/release-docs.md b/docs/src/release-docs.md index 0e9fcc1c8..22924b141 100644 --- a/docs/src/release-docs.md +++ b/docs/src/release-docs.md @@ -24,6 +24,7 @@ If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, witho | Release | Docs | Release notes | |---------|---------------------------------------------------------------------|---------------| main | [main branch](https://miller.readthedocs.io/en/main) | N/A | +6.14.0 | [Miller 6.14.0](https://miller.readthedocs.io/en/6.14.0) | [Survival curve, misc. features and bugfixes](https://github.com/johnkerl/miller/releases/tag/v6.14.0) | 6.13.0 | [Miller 6.13.0](https://miller.readthedocs.io/en/6.13.0) | [File-stat DSL function, new stats accumulator, misc. bugfixes](https://github.com/johnkerl/miller/releases/tag/v6.13.0) | 6.12.0 | [Miller 6.12.0](https://miller.readthedocs.io/en/6.12.0) | [New sparsify verb, wide-table performance improvement, thousands separator for fmtnum function](https://github.com/johnkerl/miller/releases/tag/v6.12.0) | 6.11.0 | [Miller 6.11.0](https://miller.readthedocs.io/en/6.11.0) | [CSV/TSV auto-unsparsify, regex-fieldname support for reorder/sub/ssub/gsub, strmatch DSL function, and more](https://github.com/johnkerl/miller/releases/tag/v6.11.0) | diff --git a/docs/src/release-docs.md.in b/docs/src/release-docs.md.in index 7a840c6f0..4b89cf87d 100644 --- a/docs/src/release-docs.md.in +++ b/docs/src/release-docs.md.in @@ -8,6 +8,7 @@ If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, witho | Release | Docs | Release notes | |---------|---------------------------------------------------------------------|---------------| main | [main branch](https://miller.readthedocs.io/en/main) | N/A | +6.14.0 | [Miller 6.14.0](https://miller.readthedocs.io/en/6.14.0) | [Survival curve, misc. features and bugfixes](https://github.com/johnkerl/miller/releases/tag/v6.14.0) | 6.13.0 | [Miller 6.13.0](https://miller.readthedocs.io/en/6.13.0) | [File-stat DSL function, new stats accumulator, misc. bugfixes](https://github.com/johnkerl/miller/releases/tag/v6.13.0) | 6.12.0 | [Miller 6.12.0](https://miller.readthedocs.io/en/6.12.0) | [New sparsify verb, wide-table performance improvement, thousands separator for fmtnum function](https://github.com/johnkerl/miller/releases/tag/v6.12.0) | 6.11.0 | [Miller 6.11.0](https://miller.readthedocs.io/en/6.11.0) | [CSV/TSV auto-unsparsify, regex-fieldname support for reorder/sub/ssub/gsub, strmatch DSL function, and more](https://github.com/johnkerl/miller/releases/tag/v6.11.0) | From f673c1a30ef0284d8cd498284bf25ec88691a330 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 10 Jul 2025 12:01:21 -0400 Subject: [PATCH 353/456] Bump golang.org/x/sys from 0.33.0 to 0.34.0 (#1832) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.33.0 to 0.34.0. - [Commits](https://github.com/golang/sys/compare/v0.33.0...v0.34.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-version: 0.34.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 526d27725..bc9af88c7 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.10.0 - golang.org/x/sys v0.33.0 + golang.org/x/sys v0.34.0 golang.org/x/term v0.32.0 golang.org/x/text v0.26.0 ) diff --git a/go.sum b/go.sum index a902b4c81..af032d07f 100644 --- a/go.sum +++ b/go.sum @@ -43,8 +43,8 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= -golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= +golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= From 23acc8424a1548512b199b32faa5120c14d2f556 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 10 Jul 2025 12:05:38 -0400 Subject: [PATCH 354/456] Bump golang.org/x/term from 0.32.0 to 0.33.0 (#1831) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.32.0 to 0.33.0. - [Commits](https://github.com/golang/term/compare/v0.32.0...v0.33.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-version: 0.33.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index bc9af88c7..c2d4cad85 100644 --- a/go.mod +++ b/go.mod @@ -30,7 +30,7 @@ require ( github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.10.0 golang.org/x/sys v0.34.0 - golang.org/x/term v0.32.0 + golang.org/x/term v0.33.0 golang.org/x/text v0.26.0 ) diff --git a/go.sum b/go.sum index af032d07f..a42f6ee1f 100644 --- a/go.sum +++ b/go.sum @@ -45,8 +45,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= -golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= +golang.org/x/term v0.33.0 h1:NuFncQrRcaRvVmgRkvM3j/F00gWIAlcmlB8ACEKmGIg= +golang.org/x/term v0.33.0/go.mod h1:s18+ql9tYWp1IfpV9DmCtQDDSRBUjKaw9M1eAv5UeF0= golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc= From 865c9cc5638103298759c8119e3a1262c41c5a97 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 10 Jul 2025 12:31:58 -0400 Subject: [PATCH 355/456] Bump golang.org/x/text from 0.26.0 to 0.27.0 (#1830) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.26.0 to 0.27.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.26.0...v0.27.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-version: 0.27.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index c2d4cad85..cef18a93e 100644 --- a/go.mod +++ b/go.mod @@ -31,7 +31,7 @@ require ( github.com/stretchr/testify v1.10.0 golang.org/x/sys v0.34.0 golang.org/x/term v0.33.0 - golang.org/x/text v0.26.0 + golang.org/x/text v0.27.0 ) require ( @@ -41,7 +41,7 @@ require ( github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - golang.org/x/tools v0.33.0 // indirect + golang.org/x/tools v0.34.0 // indirect gonum.org/v1/gonum v0.16.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index a42f6ee1f..32dc93901 100644 --- a/go.sum +++ b/go.sum @@ -47,10 +47,10 @@ golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.33.0 h1:NuFncQrRcaRvVmgRkvM3j/F00gWIAlcmlB8ACEKmGIg= golang.org/x/term v0.33.0/go.mod h1:s18+ql9tYWp1IfpV9DmCtQDDSRBUjKaw9M1eAv5UeF0= -golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= -golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= -golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc= -golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI= +golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= +golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= +golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo= +golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From e7fe363d9a65ef6693e3de7528b6c49b0dc948a8 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Jul 2025 12:41:04 -0400 Subject: [PATCH 356/456] `mlr sort -b` feature (#1833) * `mlr sort -b` feature * mlr regtest -p test/cases/cli-help && make dev --- docs/src/data-diving-examples.md | 4 ++-- docs/src/date-time-examples.md | 2 +- docs/src/manpage.md | 3 ++- docs/src/manpage.txt | 3 ++- docs/src/online-help.md | 1 + docs/src/reference-verbs.md | 5 +++-- man/manpage.txt | 3 ++- man/mlr.1 | 5 +++-- pkg/transformers/sort.go | 16 ++++++++++++++++ test/cases/cli-help/0001/expout | 1 + 10 files changed, 33 insertions(+), 10 deletions(-) diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 100716ec2..297eca211 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -26,7 +26,7 @@ Vertical-tabular format is good for a quick look at CSV data layout -- seeing wh wc -l data/flins.csv
-   36635 data/flins.csv
+36635 data/flins.csv
 
@@ -227,7 +227,7 @@ Peek at the data:
 wc -l data/colored-shapes.dkvp
 
-   10078 data/colored-shapes.dkvp
+10078 data/colored-shapes.dkvp
 
diff --git a/docs/src/date-time-examples.md b/docs/src/date-time-examples.md
index 5bcbdac01..cab74de3c 100644
--- a/docs/src/date-time-examples.md
+++ b/docs/src/date-time-examples.md
@@ -68,7 +68,7 @@ date,qoh
 wc -l data/miss-date.csv
 
-    1372 data/miss-date.csv
+1372 data/miss-date.csv
 
Since there are 1372 lines in the data file, some automation is called for. To find the missing dates, you can convert the dates to seconds since the epoch using `strptime`, then compute adjacent differences (the `cat -n` simply inserts record-counters): diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 79b2c5a0e..9cf6f281f 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -1837,6 +1837,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p -nf {comma-separated field names} Same as -n -nr {comma-separated field names} Numerical descending; nulls sort first -t {comma-separated field names} Natural ascending + -b Move sort fields to start of record, as in reorder -b -tr|-rt {comma-separated field names} Natural descending -h|--help Show this message. @@ -3745,5 +3746,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-07-04 4mMILLER24m(1) + 2025-07-11 4mMILLER24m(1)
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 6cd711699..b2f31f3d1 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1816,6 +1816,7 @@ -nf {comma-separated field names} Same as -n -nr {comma-separated field names} Numerical descending; nulls sort first -t {comma-separated field names} Natural ascending + -b Move sort fields to start of record, as in reorder -b -tr|-rt {comma-separated field names} Natural descending -h|--help Show this message. @@ -3724,4 +3725,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-07-04 4mMILLER24m(1) + 2025-07-11 4mMILLER24m(1) diff --git a/docs/src/online-help.md b/docs/src/online-help.md index 5bbee15a1..f1b8d4d18 100644 --- a/docs/src/online-help.md +++ b/docs/src/online-help.md @@ -230,6 +230,7 @@ Options: -nf {comma-separated field names} Same as -n -nr {comma-separated field names} Numerical descending; nulls sort first -t {comma-separated field names} Natural ascending +-b Move sort fields to start of record, as in reorder -b -tr|-rt {comma-separated field names} Natural descending -h|--help Show this message. diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index ab7a599fa..49c212b97 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -2960,6 +2960,7 @@ Options: -nf {comma-separated field names} Same as -n -nr {comma-separated field names} Numerical descending; nulls sort first -t {comma-separated field names} Natural ascending +-b Move sort fields to start of record, as in reorder -b -tr|-rt {comma-separated field names} Natural descending -h|--help Show this message. @@ -4133,7 +4134,7 @@ There are two main ways to use `mlr uniq`: the first way is with `-g` to specify wc -l data/colored-shapes.csv
-   10079 data/colored-shapes.csv
+10079 data/colored-shapes.csv
 
@@ -4290,7 +4291,7 @@ color=purple,shape=square,flag=0
 wc -l data/repeats.dkvp
 
-      57 data/repeats.dkvp
+57 data/repeats.dkvp
 
diff --git a/man/manpage.txt b/man/manpage.txt
index 6cd711699..b2f31f3d1 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -1816,6 +1816,7 @@
        -nf {comma-separated field names}  Same as -n
        -nr {comma-separated field names}  Numerical descending; nulls sort first
        -t  {comma-separated field names}  Natural ascending
+       -b                                 Move sort fields to start of record, as in reorder -b
        -tr|-rt {comma-separated field names}  Natural descending
        -h|--help Show this message.
 
@@ -3724,4 +3725,4 @@
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-                                  2025-07-04                         4mMILLER24m(1)
+                                  2025-07-11                         4mMILLER24m(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 23b6162f0..fd96eaa0b 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2025-07-04
+.\"      Date: 2025-07-11
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2025-07-04" "\ \&" "\ \&"
+.TH "MILLER" "1" "2025-07-11" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -2289,6 +2289,7 @@ Options:
 -nf {comma-separated field names}  Same as -n
 -nr {comma-separated field names}  Numerical descending; nulls sort first
 -t  {comma-separated field names}  Natural ascending
+-b                                 Move sort fields to start of record, as in reorder -b
 -tr|-rt {comma-separated field names}  Natural descending
 -h|--help Show this message.
 
diff --git a/pkg/transformers/sort.go b/pkg/transformers/sort.go
index 945c6e581..6342192bb 100644
--- a/pkg/transformers/sort.go
+++ b/pkg/transformers/sort.go
@@ -83,6 +83,7 @@ func transformerSortUsage(
 	fmt.Fprintf(o, "-nf {comma-separated field names}  Same as -n\n")
 	fmt.Fprintf(o, "-nr {comma-separated field names}  Numerical descending; nulls sort first\n")
 	fmt.Fprintf(o, "-t  {comma-separated field names}  Natural ascending\n")
+	fmt.Fprintf(o, "-b                                 Move sort fields to start of record, as in reorder -b\n")
 	fmt.Fprintf(o, "-tr|-rt {comma-separated field names}  Natural descending\n")
 	fmt.Fprintf(o, "-h|--help Show this message.\n")
 	fmt.Fprintf(o, "\n")
@@ -107,6 +108,7 @@ func transformerSortParseCLI(
 
 	groupByFieldNames := make([]string, 0)
 	comparatorFuncs := make([]mlrval.CmpFuncInt, 0)
+	doMoveToHead := false
 
 	for argi < argc /* variable increment: 1 or 2 depending on flag */ {
 		opt := args[argi]
@@ -255,6 +257,9 @@ func transformerSortParseCLI(
 				comparatorFuncs = append(comparatorFuncs, mlrval.NumericDescendingComparator)
 			}
 
+		} else if opt == "-b" {
+			doMoveToHead = true
+
 		} else {
 			transformerSortUsage(os.Stderr)
 			os.Exit(1)
@@ -274,6 +279,7 @@ func transformerSortParseCLI(
 	transformer, err := NewTransformerSort(
 		groupByFieldNames,
 		comparatorFuncs,
+		doMoveToHead,
 	)
 	if err != nil {
 		fmt.Fprintln(os.Stderr, err)
@@ -304,6 +310,7 @@ type TransformerSort struct {
 	// -- Input
 	groupByFieldNames []string
 	comparatorFuncs   []mlrval.CmpFuncInt
+	doMoveToHead      bool
 
 	// -- State
 	// Map from string to *list.List:
@@ -316,11 +323,13 @@ type TransformerSort struct {
 func NewTransformerSort(
 	groupByFieldNames []string,
 	comparatorFuncs []mlrval.CmpFuncInt,
+	doMoveToHead bool,
 ) (*TransformerSort, error) {
 
 	tr := &TransformerSort{
 		groupByFieldNames: groupByFieldNames,
 		comparatorFuncs:   comparatorFuncs,
+		doMoveToHead:      doMoveToHead,
 
 		recordListsByGroup: lib.NewOrderedMap(),
 		groupHeads:         lib.NewOrderedMap(),
@@ -346,6 +355,13 @@ func (tr *TransformerSort) Transform(
 	if !inrecAndContext.EndOfStream {
 		inrec := inrecAndContext.Record
 
+		if tr.doMoveToHead {
+			n := len(tr.groupByFieldNames)
+			for i := n - 1; i >= 0; i-- {
+				inrec.MoveToHead(tr.groupByFieldNames[i])
+			}
+		}
+
 		groupingKey, selectedValues, ok := inrec.GetSelectedValuesAndJoined(
 			tr.groupByFieldNames,
 		)
diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout
index a451ac0c5..c211e2cd9 100644
--- a/test/cases/cli-help/0001/expout
+++ b/test/cases/cli-help/0001/expout
@@ -982,6 +982,7 @@ Options:
 -nf {comma-separated field names}  Same as -n
 -nr {comma-separated field names}  Numerical descending; nulls sort first
 -t  {comma-separated field names}  Natural ascending
+-b                                 Move sort fields to start of record, as in reorder -b
 -tr|-rt {comma-separated field names}  Natural descending
 -h|--help Show this message.
 

From d264f562dc17030ef58ddc488279e137145e6e50 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sun, 20 Jul 2025 16:36:50 -0400
Subject: [PATCH 357/456] Fix doc typo re empty and multiplication (#1838)

* Fix docs typo re empty and multiplication

* Run `make dev`
---
 docs/src/data-diving-examples.md        | 4 ++--
 docs/src/date-time-examples.md          | 2 +-
 docs/src/manpage.md                     | 2 +-
 docs/src/manpage.txt                    | 2 +-
 docs/src/reference-main-null-data.md    | 2 +-
 docs/src/reference-main-null-data.md.in | 2 +-
 docs/src/reference-verbs.md             | 4 ++--
 man/manpage.txt                         | 2 +-
 man/mlr.1                               | 4 ++--
 9 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md
index 297eca211..100716ec2 100644
--- a/docs/src/data-diving-examples.md
+++ b/docs/src/data-diving-examples.md
@@ -26,7 +26,7 @@ Vertical-tabular format is good for a quick look at CSV data layout -- seeing wh
 wc -l data/flins.csv
 
-36635 data/flins.csv
+   36635 data/flins.csv
 
@@ -227,7 +227,7 @@ Peek at the data:
 wc -l data/colored-shapes.dkvp
 
-10078 data/colored-shapes.dkvp
+   10078 data/colored-shapes.dkvp
 
diff --git a/docs/src/date-time-examples.md b/docs/src/date-time-examples.md
index cab74de3c..5bcbdac01 100644
--- a/docs/src/date-time-examples.md
+++ b/docs/src/date-time-examples.md
@@ -68,7 +68,7 @@ date,qoh
 wc -l data/miss-date.csv
 
-1372 data/miss-date.csv
+    1372 data/miss-date.csv
 
Since there are 1372 lines in the data file, some automation is called for. To find the missing dates, you can convert the dates to seconds since the epoch using `strptime`, then compute adjacent differences (the `cat -n` simply inserts record-counters): diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 9cf6f281f..e6cf26b77 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -3746,5 +3746,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-07-11 4mMILLER24m(1) + 2025-07-20 4mMILLER24m(1)
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index b2f31f3d1..130bbcabd 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -3725,4 +3725,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-07-11 4mMILLER24m(1) + 2025-07-20 4mMILLER24m(1) diff --git a/docs/src/reference-main-null-data.md b/docs/src/reference-main-null-data.md index 63bfffaa9..175ae2ad2 100644 --- a/docs/src/reference-main-null-data.md +++ b/docs/src/reference-main-null-data.md @@ -125,7 +125,7 @@ with the exception that the `min` and `max` functions are special: if one argume x=,y=3,a=3,b=
-Likewise, empty works like 0 for addition and subtraction, and multiplication: +Likewise, empty works like 0 for addition and subtraction, and like 1 for multiplication:
 echo 'x=,y=3' | mlr put '$a = $x + $y; $b = $x - $y; $c = $x * $y'
diff --git a/docs/src/reference-main-null-data.md.in b/docs/src/reference-main-null-data.md.in
index 087edaa78..3ac1051ac 100644
--- a/docs/src/reference-main-null-data.md.in
+++ b/docs/src/reference-main-null-data.md.in
@@ -54,7 +54,7 @@ GENMD-RUN-COMMAND
 echo 'x=,y=3' | mlr put '$a=min($x,$y);$b=max($x,$y)'
 GENMD-EOF
 
-Likewise, empty works like 0 for addition and subtraction, and multiplication:
+Likewise, empty works like 0 for addition and subtraction, and like 1 for multiplication:
 
 GENMD-RUN-COMMAND
 echo 'x=,y=3' | mlr put '$a = $x + $y; $b = $x - $y; $c = $x * $y'
diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md
index 49c212b97..3cf5cc8d6 100644
--- a/docs/src/reference-verbs.md
+++ b/docs/src/reference-verbs.md
@@ -4134,7 +4134,7 @@ There are two main ways to use `mlr uniq`: the first way is with `-g` to specify
 wc -l data/colored-shapes.csv
 
-10079 data/colored-shapes.csv
+   10079 data/colored-shapes.csv
 
@@ -4291,7 +4291,7 @@ color=purple,shape=square,flag=0
 wc -l data/repeats.dkvp
 
-57 data/repeats.dkvp
+      57 data/repeats.dkvp
 
diff --git a/man/manpage.txt b/man/manpage.txt
index b2f31f3d1..130bbcabd 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -3725,4 +3725,4 @@
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-                                  2025-07-11                         4mMILLER24m(1)
+                                  2025-07-20                         4mMILLER24m(1)
diff --git a/man/mlr.1 b/man/mlr.1
index fd96eaa0b..3dca0b9a7 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2025-07-11
+.\"      Date: 2025-07-20
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2025-07-11" "\ \&" "\ \&"
+.TH "MILLER" "1" "2025-07-20" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

From fccdf215e6c0df617c8b02108de21da2a006b2b0 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sun, 20 Jul 2025 17:05:24 -0400
Subject: [PATCH 358/456] DKVP `--incr-key` option (#1839)

* Code support for --incr-key

* Add source code for online help for new flag

* Run `make dev`
---
 docs/src/manpage.md                  | 11 ++++++++++
 docs/src/manpage.txt                 | 11 ++++++++++
 docs/src/online-help.md              |  1 +
 docs/src/reference-main-flag-list.md |  9 ++++++++
 man/manpage.txt                      | 11 ++++++++++
 man/mlr.1                            | 19 ++++++++++++++++
 pkg/cli/option_parse.go              | 26 ++++++++++++++++++++++
 pkg/cli/option_types.go              | 11 +++++-----
 pkg/input/record_reader_dkvp_nidx.go | 33 +++++++++++++++++++++-------
 9 files changed, 119 insertions(+), 13 deletions(-)

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index e6cf26b77..2bc383fe7 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -145,6 +145,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p
          mlr help comments-in-data-flags
          mlr help compressed-data-flags
          mlr help csv/tsv-only-flags
+         mlr help dkvp-only-flags
          mlr help file-format-flags
          mlr help flatten-unflatten-flags
          mlr help format-conversion-keystroke-saver-flags
@@ -356,6 +357,16 @@ This is simply a copy of what you should see on running `man mlr` at a command p
        -N                       Keystroke-saver for `--implicit-csv-header
                                 --headerless-csv-output`.
 
+1mDKVP-ONLY FLAGS0m
+       These are flags which are applicable to DKVP format.
+
+       --incr-key               Without this option, keyless DKVP fields are keyed by
+                                field number. For example: `a=10,b=20,30,d=40,50` is
+                                ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With
+                                this option, they're keyed by a running counter of
+                                keyless fields. For example: `a=10,b=20,30,d=40,50`
+                                is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`.
+
 1mFILE-FORMAT FLAGS0m
        See the File formats doc page, and or `mlr help file-formats`, for more
        about file formats Miller supports.
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt
index 130bbcabd..bf4e6c618 100644
--- a/docs/src/manpage.txt
+++ b/docs/src/manpage.txt
@@ -124,6 +124,7 @@
          mlr help comments-in-data-flags
          mlr help compressed-data-flags
          mlr help csv/tsv-only-flags
+         mlr help dkvp-only-flags
          mlr help file-format-flags
          mlr help flatten-unflatten-flags
          mlr help format-conversion-keystroke-saver-flags
@@ -335,6 +336,16 @@
        -N                       Keystroke-saver for `--implicit-csv-header
                                 --headerless-csv-output`.
 
+1mDKVP-ONLY FLAGS0m
+       These are flags which are applicable to DKVP format.
+
+       --incr-key               Without this option, keyless DKVP fields are keyed by
+                                field number. For example: `a=10,b=20,30,d=40,50` is
+                                ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With
+                                this option, they're keyed by a running counter of
+                                keyless fields. For example: `a=10,b=20,30,d=40,50`
+                                is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`.
+
 1mFILE-FORMAT FLAGS0m
        See the File formats doc page, and or `mlr help file-formats`, for more
        about file formats Miller supports.
diff --git a/docs/src/online-help.md b/docs/src/online-help.md
index f1b8d4d18..bb8185e10 100644
--- a/docs/src/online-help.md
+++ b/docs/src/online-help.md
@@ -55,6 +55,7 @@ Flags:
   mlr help comments-in-data-flags
   mlr help compressed-data-flags
   mlr help csv/tsv-only-flags
+  mlr help dkvp-only-flags
   mlr help file-format-flags
   mlr help flatten-unflatten-flags
   mlr help format-conversion-keystroke-saver-flags
diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md
index 28adc12f6..86d3ce042 100644
--- a/docs/src/reference-main-flag-list.md
+++ b/docs/src/reference-main-flag-list.md
@@ -128,6 +128,15 @@ These are flags which are applicable to CSV format.
 * `--quote-all`: Force double-quoting of CSV fields.
 * `-N`: Keystroke-saver for `--implicit-csv-header --headerless-csv-output`.
 
+## DKVP-only flags
+
+These are flags which are applicable to DKVP format.
+
+
+**Flags:**
+
+* `--incr-key`: Without this option, keyless DKVP fields are keyed by field number.  For example: `a=10,b=20,30,d=40,50` is ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`.  With this option, they're keyed by a running counter of keyless fields.  For example: `a=10,b=20,30,d=40,50` is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`.
+
 ## File-format flags
 
 See the File formats doc page, and or `mlr help file-formats`, for more
diff --git a/man/manpage.txt b/man/manpage.txt
index 130bbcabd..bf4e6c618 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -124,6 +124,7 @@
          mlr help comments-in-data-flags
          mlr help compressed-data-flags
          mlr help csv/tsv-only-flags
+         mlr help dkvp-only-flags
          mlr help file-format-flags
          mlr help flatten-unflatten-flags
          mlr help format-conversion-keystroke-saver-flags
@@ -335,6 +336,16 @@
        -N                       Keystroke-saver for `--implicit-csv-header
                                 --headerless-csv-output`.
 
+1mDKVP-ONLY FLAGS0m
+       These are flags which are applicable to DKVP format.
+
+       --incr-key               Without this option, keyless DKVP fields are keyed by
+                                field number. For example: `a=10,b=20,30,d=40,50` is
+                                ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With
+                                this option, they're keyed by a running counter of
+                                keyless fields. For example: `a=10,b=20,30,d=40,50`
+                                is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`.
+
 1mFILE-FORMAT FLAGS0m
        See the File formats doc page, and or `mlr help file-formats`, for more
        about file formats Miller supports.
diff --git a/man/mlr.1 b/man/mlr.1
index 3dca0b9a7..8d32a9817 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -161,6 +161,7 @@ Flags:
   mlr help comments-in-data-flags
   mlr help compressed-data-flags
   mlr help csv/tsv-only-flags
+  mlr help dkvp-only-flags
   mlr help file-format-flags
   mlr help flatten-unflatten-flags
   mlr help format-conversion-keystroke-saver-flags
@@ -410,6 +411,24 @@ These are flags which are applicable to CSV format.
 .fi
 .if n \{\
 .RE
+.SH "DKVP-ONLY FLAGS"
+.sp
+
+.if n \{\
+.RS 0
+.\}
+.nf
+These are flags which are applicable to DKVP format.
+
+--incr-key               Without this option, keyless DKVP fields are keyed by
+                         field number. For example: `a=10,b=20,30,d=40,50` is
+                         ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`. With
+                         this option, they're keyed by a running counter of
+                         keyless fields. For example: `a=10,b=20,30,d=40,50`
+                         is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`.
+.fi
+.if n \{\
+.RE
 .SH "FILE-FORMAT FLAGS"
 .sp
 
diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go
index 156a056a3..dd5ede99f 100644
--- a/pkg/cli/option_parse.go
+++ b/pkg/cli/option_parse.go
@@ -104,6 +104,7 @@ var FLAG_TABLE = FlagTable{
 		&CSVTSVOnlyFlagSection,
 		&JSONOnlyFlagSection,
 		&PPRINTOnlyFlagSection,
+		&DKVPOnlyFlagSection,
 		&CompressedDataFlagSection,
 		&CommentsInDataFlagSection,
 		&OutputColorizationFlagSection,
@@ -523,6 +524,31 @@ var PPRINTOnlyFlagSection = FlagSection{
 	},
 }
 
+// ================================================================
+// DKVP-ONLY FLAGS
+
+func DKVPOnlyPrintInfo() {
+	fmt.Println("These are flags which are applicable to DKVP format.")
+}
+
+func init() { DKVPOnlyFlagSection.Sort() }
+
+var DKVPOnlyFlagSection = FlagSection{
+	name:        "DKVP-only flags",
+	infoPrinter: DKVPOnlyPrintInfo,
+	flags: []Flag{
+
+		{
+			name: "--incr-key",
+			help: "Without this option, keyless DKVP fields are keyed by field number.  For example: `a=10,b=20,30,d=40,50` is ingested as `$a=10,$b=20,$3=30,$d=40,$5=50`.  With this option, they're keyed by a running counter of keyless fields.  For example: `a=10,b=20,30,d=40,50` is ingested as `$a=10,$b=20,$1=30,$d=40,$2=50`.",
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.WriterOptions.BarredPprintOutput = true
+				*pargi += 1
+			},
+		},
+	},
+}
+
 // ================================================================
 // LEGACY FLAGS
 
diff --git a/pkg/cli/option_types.go b/pkg/cli/option_types.go
index 19227fd73..58917728a 100644
--- a/pkg/cli/option_types.go
+++ b/pkg/cli/option_types.go
@@ -53,11 +53,12 @@ type TReaderOptions struct {
 	irsWasSpecified            bool
 	allowRepeatIFSWasSpecified bool
 
-	UseImplicitHeader   bool
-	AllowRaggedCSVInput bool
-	CSVLazyQuotes       bool
-	CSVTrimLeadingSpace bool
-	BarredPprintInput   bool
+	UseImplicitHeader    bool
+	AllowRaggedCSVInput  bool
+	CSVLazyQuotes        bool
+	CSVTrimLeadingSpace  bool
+	BarredPprintInput    bool
+	IncrementImplicitKey bool
 
 	CommentHandling TCommentHandling
 	CommentString   string
diff --git a/pkg/input/record_reader_dkvp_nidx.go b/pkg/input/record_reader_dkvp_nidx.go
index efc0ae385..6a53c8c26 100644
--- a/pkg/input/record_reader_dkvp_nidx.go
+++ b/pkg/input/record_reader_dkvp_nidx.go
@@ -14,7 +14,7 @@ import (
 	"github.com/johnkerl/miller/v6/pkg/types"
 )
 
-// splitter_DKVP_NIDX is a function type for the one bit of code differing
+// line_splitter_DKVP_NIDX is a function type for the one bit of code differing
 // between the DKVP reader and the NIDX reader, namely, how it splits lines.
 type line_splitter_DKVP_NIDX func(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrmap, error)
 
@@ -169,25 +169,42 @@ func recordFromDKVPLine(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrm
 
 	pairs := reader.fieldSplitter.Split(line)
 
+	// Without --incr-key:
+	//   echo 'a,z=b,c' | mlr cat gives 1=a,z=b,3=c
+	//   I.e. implicit keys are taken from the 1-up field counter.
+	// With it:
+	//   echo 'a,z=b,c' | mlr cat gives 1=a,z=b,2=c
+	//   I.e. implicit keys are taken from a 1-up count of fields lacking explicit keys.
+	incr_key := 0
+
 	for i, pair := range pairs {
 		kv := reader.pairSplitter.Split(pair)
 
 		if len(kv) == 0 || (len(kv) == 1 && kv[0] == "") {
 			// Ignore. This is expected when splitting with repeated IFS.
 		} else if len(kv) == 1 {
-			// E.g the pair has no equals sign: "a" rather than "a=1" or
+			// E.g. the pair has no equals sign: "a" rather than "a=1" or
 			// "a=".  Here we use the positional index as the key. This way
 			// DKVP is a generalization of NIDX.
-			key := strconv.Itoa(i + 1) // Miller userspace indices are 1-up
+			//
+			// Also: recall that Miller userspace indices are 1-up.
+			var int_key int
+			if reader.readerOptions.IncrementImplicitKey {
+				int_key = incr_key
+			} else {
+				int_key = i
+			}
+			str_key := strconv.Itoa(int_key + 1)
+			incr_key++
 			value := mlrval.FromDeferredType(kv[0])
-			_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
+			_, err := record.PutReferenceMaybeDedupe(str_key, value, dedupeFieldNames)
 			if err != nil {
 				return nil, err
 			}
 		} else {
-			key := kv[0]
+			str_key := kv[0]
 			value := mlrval.FromDeferredType(kv[1])
-			_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
+			_, err := record.PutReferenceMaybeDedupe(str_key, value, dedupeFieldNames)
 			if err != nil {
 				return nil, err
 			}
@@ -204,9 +221,9 @@ func recordFromNIDXLine(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrm
 	var i int = 0
 	for _, value := range values {
 		i++
-		key := strconv.Itoa(i)
+		str_key := strconv.Itoa(i)
 		mval := mlrval.FromDeferredType(value)
-		record.PutReference(key, mval)
+		record.PutReference(str_key, mval)
 	}
 	return record, nil
 }

From 9445046bfe02b4dd7ae1d7ccdc31d2230248e598 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sun, 20 Jul 2025 17:42:37 -0400
Subject: [PATCH 359/456] Force decimal formatting for ints on JSON output
 (#1840)

* Force decimal formatting for ints on JSON output

* update a test case
---
 pkg/mlrval/mlrval_json.go                 | 12 +++++++++++-
 test/cases/verb-format-values/0003/expout |  2 +-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/pkg/mlrval/mlrval_json.go b/pkg/mlrval/mlrval_json.go
index d1fb880ed..1a193aa14 100644
--- a/pkg/mlrval/mlrval_json.go
+++ b/pkg/mlrval/mlrval_json.go
@@ -13,6 +13,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"strconv"
 
 	"github.com/johnkerl/miller/v6/pkg/colorizer"
 	"github.com/johnkerl/miller/v6/pkg/lib"
@@ -406,7 +407,16 @@ func millerJSONEncodeString(input string) string {
 // ----------------------------------------------------------------
 func (mv *Mlrval) marshalJSONInt(outputIsStdout bool) (string, error) {
 	lib.InternalCodingErrorIf(mv.mvtype != MT_INT)
-	return colorizer.MaybeColorizeValue(mv.String(), outputIsStdout), nil
+	// Other formats would use mv.String(): for example, if the user used hex
+	// format, we would emit whatever they set. However, for JSON, we are
+	// required to disrespect the user's formatting, and only emit decimal.
+	// See also https://github.com/johnkerl/miller/issues/1761.
+	ival, ok := mv.GetIntValue()
+	if !ok {
+		panic("Internal coding error: int-typed mlrval denied int access")
+	}
+	s := strconv.FormatInt(ival, 10)
+	return colorizer.MaybeColorizeValue(s, outputIsStdout), nil
 }
 
 // ----------------------------------------------------------------
diff --git a/test/cases/verb-format-values/0003/expout b/test/cases/verb-format-values/0003/expout
index 06216b5ca..9a45bc186 100644
--- a/test/cases/verb-format-values/0003/expout
+++ b/test/cases/verb-format-values/0003/expout
@@ -1,7 +1,7 @@
 [
 {
   "hostname": "localhost",
-  "pid": 0x3039,
+  "pid": 12345,
   "req": {
     "id": 6789,
     "method": "GET",

From b77d9826eacf0df82f10547e0bf60b91b470f61e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 21 Jul 2025 09:47:44 -0400
Subject: [PATCH 360/456] Bump github/codeql-action from 3.29.2 to 3.29.3
 (#1841)

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.29.2 to 3.29.3.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/181d5eefc20863364f96762470ba6f862bdef56b...d6bbdef45e766d081b84a2def353b0055f728d3e)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-version: 3.29.3
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index b3dd4bcec..62fdff6d2 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -40,7 +40,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@181d5eefc20863364f96762470ba6f862bdef56b
+      uses: github/codeql-action/init@d6bbdef45e766d081b84a2def353b0055f728d3e
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -51,7 +51,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@181d5eefc20863364f96762470ba6f862bdef56b
+      uses: github/codeql-action/autobuild@d6bbdef45e766d081b84a2def353b0055f728d3e
 
     # โ„น๏ธ Command-line programs to run using the OS shell.
     # ๐Ÿ“š https://git.io/JvXDl
@@ -65,4 +65,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@181d5eefc20863364f96762470ba6f862bdef56b
+      uses: github/codeql-action/analyze@d6bbdef45e766d081b84a2def353b0055f728d3e

From c4c3ae21197a47ccdf0e9a1db49e93332385adb1 Mon Sep 17 00:00:00 2001
From: Duncan Lock 
Date: Tue, 22 Jul 2025 06:10:01 -0700
Subject: [PATCH 361/456] Add scoop install to README.md (#1842)

Add `scoop install main/miller` to Windows installation options.
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0819f272e..cebdd7166 100644
--- a/README.md
+++ b/README.md
@@ -70,7 +70,7 @@ There's a good chance you can get Miller pre-built for your system:
 |---|---|
 |Linux|`yum install miller`
`apt-get install miller`| |Mac|`brew install miller`
`port install miller`| -|Windows|`choco install miller`
`winget install Miller.Miller`| +|Windows|`choco install miller`
`winget install Miller.Miller`
`scoop install main/miller`| See also [README-versions.md](./README-versions.md) for a full list of package versions. Note that long-term-support (LtS) releases will likely be on older versions. From 52b7a47ae9084a9d11950152a48ed5a54c806688 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 22 Jul 2025 20:15:48 -0400 Subject: [PATCH 362/456] Use Go 1.24.5 (#1843) --- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- docs/src/build.md | 2 +- docs/src/build.md.in | 2 +- go.mod | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 402382e47..fc312e97e 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -20,7 +20,7 @@ jobs: - name: Set up Go uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 with: - go-version: 1.21 + go-version: 1.24 - name: Build run: make build diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a0f0a5178..da61f0925 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -6,7 +6,7 @@ on: workflow_dispatch: env: - GO_VERSION: 1.21.1 + GO_VERSION: 1.24.5 jobs: release: diff --git a/docs/src/build.md b/docs/src/build.md index 0bceafb0d..b6678282f 100644 --- a/docs/src/build.md +++ b/docs/src/build.md @@ -18,7 +18,7 @@ Quick links: Please also see [Installation](installing-miller.md) for information about pre-built executables. -You will need to first install Go version 1.15 or higher: please see [https://go.dev](https://go.dev). +You will need to first install Go ([this version](https://github.com/johnkerl/miller/blob/main/go.mod#L17)): please see [https://go.dev](https://go.dev). ## Miller license diff --git a/docs/src/build.md.in b/docs/src/build.md.in index ef3e4aa7d..3d35ee560 100644 --- a/docs/src/build.md.in +++ b/docs/src/build.md.in @@ -2,7 +2,7 @@ Please also see [Installation](installing-miller.md) for information about pre-built executables. -You will need to first install Go version 1.15 or higher: please see [https://go.dev](https://go.dev). +You will need to first install Go ([this version](https://github.com/johnkerl/miller/blob/main/go.mod#L17)): please see [https://go.dev](https://go.dev). ## Miller license diff --git a/go.mod b/go.mod index cef18a93e..72e0d34ca 100644 --- a/go.mod +++ b/go.mod @@ -14,9 +14,9 @@ module github.com/johnkerl/miller/v6 // Local development: // replace github.com/johnkerl/lumin => /Users/kerl/git/johnkerl/lumin -go 1.23.0 +go 1.24.0 -toolchain go1.24.2 +toolchain go1.24.5 require ( github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb From f3fdfc4e298d33d59bfd57b9e1723abfbec05345 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 22 Jul 2025 23:11:25 -0400 Subject: [PATCH 363/456] try to fix build error (#1757) Co-authored-by: John Kerl --- go.mod | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 72e0d34ca..9038f3e8d 100644 --- a/go.mod +++ b/go.mod @@ -14,9 +14,7 @@ module github.com/johnkerl/miller/v6 // Local development: // replace github.com/johnkerl/lumin => /Users/kerl/git/johnkerl/lumin -go 1.24.0 - -toolchain go1.24.5 +go 1.24.5 require ( github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb From cf03b6d49c0a9710be371452234820fb3aa04439 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 23 Jul 2025 10:53:04 -0400 Subject: [PATCH 364/456] Bump github.com/klauspost/compress from 1.17.11 to 1.18.0 (#1844) Bumps [github.com/klauspost/compress](https://github.com/klauspost/compress) from 1.17.11 to 1.18.0. - [Release notes](https://github.com/klauspost/compress/releases) - [Changelog](https://github.com/klauspost/compress/blob/master/.goreleaser.yml) - [Commits](https://github.com/klauspost/compress/compare/v1.17.11...v1.18.0) --- updated-dependencies: - dependency-name: github.com/klauspost/compress dependency-version: 1.18.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 9038f3e8d..b6d66acff 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb github.com/johnkerl/lumin v1.0.0 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 - github.com/klauspost/compress v1.17.11 + github.com/klauspost/compress v1.18.0 github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1 github.com/lestrrat-go/strftime v1.1.0 github.com/mattn/go-isatty v0.0.20 diff --git a/go.sum b/go.sum index 32dc93901..d8a7f600e 100644 --- a/go.sum +++ b/go.sum @@ -17,8 +17,8 @@ github.com/johnkerl/lumin v1.0.0 h1:CV34cHZOJ92Y02RbQ0rd4gA0C06Qck9q8blOyaPoWpU= github.com/johnkerl/lumin v1.0.0/go.mod h1:eLf5AdQOaLvzZ2zVy4REr/DSeEwG+CZreHwNLICqv9E= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= -github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= -github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb h1:Z5BVHFk/DLOIUAd2NycF0mLtKfhl7ynm4Uy5+AFhT48= github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb/go.mod h1:+U+6yzfITr4/teU2YhxWhdyw6YzednT/16/UBMjlDrU= github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1 h1:UyIQ1VTQq/0CS/wLYjf3DV6uRKTd1xcsng3BccM4XCY= From 226c9555efed1fc9366e027e2424a8c963761d76 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 24 Jul 2025 09:21:32 -0400 Subject: [PATCH 365/456] Bump github/codeql-action from 3.29.3 to 3.29.4 (#1845) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.29.3 to 3.29.4. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/d6bbdef45e766d081b84a2def353b0055f728d3e...4e828ff8d448a8a6e532957b1811f387a63867e8) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.29.4 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 62fdff6d2..f7bdba080 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@d6bbdef45e766d081b84a2def353b0055f728d3e + uses: github/codeql-action/init@4e828ff8d448a8a6e532957b1811f387a63867e8 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@d6bbdef45e766d081b84a2def353b0055f728d3e + uses: github/codeql-action/autobuild@4e828ff8d448a8a6e532957b1811f387a63867e8 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@d6bbdef45e766d081b84a2def353b0055f728d3e + uses: github/codeql-action/analyze@4e828ff8d448a8a6e532957b1811f387a63867e8 From 1ef87c6278bfe40ff934993e0c287af3d7bc1d10 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 26 Jul 2025 13:10:43 -0400 Subject: [PATCH 366/456] add github.com/GuilloteauQ/miller-exercises to README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index cebdd7166..b9c7a7cab 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ key-value-pair data in a variety of data formats. * [Miller in 10 minutes](https://miller.readthedocs.io/en/latest/10min) * [A Guide To Command-Line Data Manipulation](https://www.smashingmagazine.com/2022/12/guide-command-line-data-manipulation-cli-miller) * [A quick tutorial on Miller](https://www.ict4g.net/adolfo/notes/data-analysis/miller-quick-tutorial.html) +* [Miller Exercises](https://github.com/GuilloteauQ/miller-exercises) * [Tools to manipulate CSV files from the Command Line](https://www.ict4g.net/adolfo/notes/data-analysis/tools-to-manipulate-csv.html) * [www.togaware.com/linux/survivor/CSV_Files.html](https://www.togaware.com/linux/survivor/CSV_Files.html) * [MLR for CSV manipulation](https://guillim.github.io/terminal/2018/06/19/MLR-for-CSV-manipulation.html) From e6ca3f68565004642906d752ae6920f03285e068 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Jul 2025 09:12:48 -0400 Subject: [PATCH 367/456] Bump github.com/lestrrat-go/strftime from 1.1.0 to 1.1.1 (#1846) Bumps [github.com/lestrrat-go/strftime](https://github.com/lestrrat-go/strftime) from 1.1.0 to 1.1.1. - [Release notes](https://github.com/lestrrat-go/strftime/releases) - [Changelog](https://github.com/lestrrat-go/strftime/blob/master/Changes) - [Commits](https://github.com/lestrrat-go/strftime/compare/v1.1.0...v1.1.1) --- updated-dependencies: - dependency-name: github.com/lestrrat-go/strftime dependency-version: 1.1.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index b6d66acff..3abe83fa2 100644 --- a/go.mod +++ b/go.mod @@ -22,7 +22,7 @@ require ( github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 github.com/klauspost/compress v1.18.0 github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1 - github.com/lestrrat-go/strftime v1.1.0 + github.com/lestrrat-go/strftime v1.1.1 github.com/mattn/go-isatty v0.0.20 github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 diff --git a/go.sum b/go.sum index d8a7f600e..74f0f0883 100644 --- a/go.sum +++ b/go.sum @@ -25,8 +25,8 @@ github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1 h1:UyIQ1VTQq/0C github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1/go.mod h1:uvVFnikBpVz7S1pdsyUI+BBRlz64vmU6Q+kviiB+fpU= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is= -github.com/lestrrat-go/strftime v1.1.0 h1:gMESpZy44/4pXLO/m+sL0yBd1W6LjgjrrD4a68Gapyg= -github.com/lestrrat-go/strftime v1.1.0/go.mod h1:uzeIB52CeUJenCo1syghlugshMysrqUT51HlxphXVeI= +github.com/lestrrat-go/strftime v1.1.1 h1:zgf8QCsgj27GlKBy3SU9/8MMgegZ8UCzlCyHYrUF0QU= +github.com/lestrrat-go/strftime v1.1.1/go.mod h1:YDrzHJAODYQ+xxvrn5SG01uFIQAeDTzpxNVppCz7Nmw= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 h1:NC4H8hewgaktBqMI5yzy6L/Vln5/H7BEziyxaE2fX3Y= From 3b8668d06f8501e19fee4ec0da1e00a9b2d3447c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 30 Jul 2025 09:32:31 -0400 Subject: [PATCH 368/456] Bump github/codeql-action from 3.29.4 to 3.29.5 (#1847) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.29.4 to 3.29.5. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/4e828ff8d448a8a6e532957b1811f387a63867e8...51f77329afa6477de8c49fc9c7046c15b9a4e79d) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.29.5 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index f7bdba080..0a1b2d112 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@4e828ff8d448a8a6e532957b1811f387a63867e8 + uses: github/codeql-action/init@51f77329afa6477de8c49fc9c7046c15b9a4e79d with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@4e828ff8d448a8a6e532957b1811f387a63867e8 + uses: github/codeql-action/autobuild@51f77329afa6477de8c49fc9c7046c15b9a4e79d # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@4e828ff8d448a8a6e532957b1811f387a63867e8 + uses: github/codeql-action/analyze@51f77329afa6477de8c49fc9c7046c15b9a4e79d From 19e72f9dac4035bbfa2e46d09356ed2f9faf736f Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 5 Aug 2025 18:11:27 -0500 Subject: [PATCH 369/456] Preserve file mods on `mlr -I` (#1849) * extract a helper function * Preserve file mode on mlr -I --- pkg/entrypoint/entrypoint.go | 171 ++++++++++++++++++++--------------- 1 file changed, 98 insertions(+), 73 deletions(-) diff --git a/pkg/entrypoint/entrypoint.go b/pkg/entrypoint/entrypoint.go index 0d2b8d3a9..d8c56c8cf 100644 --- a/pkg/entrypoint/entrypoint.go +++ b/pkg/entrypoint/entrypoint.go @@ -50,7 +50,7 @@ func Main() MainReturn { if !options.DoInPlace { err = processToStdout(options, recordTransformers) } else { - err = processInPlace(options) + err = processFilesInPlace(options) } if err != nil { fmt.Fprintf(os.Stderr, "mlr: %v.\n", err) @@ -73,7 +73,7 @@ func processToStdout( } // ---------------------------------------------------------------- -// processInPlace is in-place processing without mlr -I. +// processFilesInPlace is in-place processing without mlr -I. // // For in-place mode, reconstruct the transformers on each input file. E.g. // 'mlr -I head -n 2 foo bar' should do head -n 2 on foo as well as on bar. @@ -85,7 +85,7 @@ func processToStdout( // frequently used code path, this would likely lead to latent bugs. So this // approach leads to greater code stability. -func processInPlace( +func processFilesInPlace( originalOptions *cli.TOptions, ) error { // This should have been already checked by the CLI parser when validating @@ -98,79 +98,104 @@ func processInPlace( copy(fileNames, originalOptions.FileNames) for _, fileName := range fileNames { - - if _, err := os.Stat(fileName); os.IsNotExist(err) { - return err - } - - // Reconstruct the transformers for each file name, and allocate - // reader, mappers, and writer individually for each file name. This - // way CSV headers appear in each file, head -n 10 puts 10 rows for - // each output file, and so on. - options, recordTransformers, err := climain.ParseCommandLine(os.Args) + err := processFileInPlace(fileName, originalOptions) if err != nil { return err } - - // We can't in-place update http://, https://, etc. Also, anything with - // --prepipe or --prepipex, we won't try to guess how to invert that - // command to produce re-compressed output. - err = lib.IsUpdateableInPlace(fileName, options.ReaderOptions.Prepipe) - if err != nil { - return err - } - - containingDirectory := path.Dir(fileName) - // Names like ./mlr-in-place-2148227797 and ./mlr-in-place-1792078347, - // as revealed by printing handle.Name(). - handle, err := os.CreateTemp(containingDirectory, "mlr-in-place-") - if err != nil { - return err - } - tempFileName := handle.Name() - - // If the input file is compressed and we'll be doing in-process - // decompression as we read the input file, try to do in-process - // compression as we write the output. - inputFileEncoding := lib.FindInputEncoding(fileName, options.ReaderOptions.FileInputEncoding) - - // Get a handle with, perhaps, a recompression wrapper around it. - wrappedHandle, isNew, err := lib.WrapOutputHandle(handle, inputFileEncoding) - if err != nil { - os.Remove(tempFileName) - return err - } - - // Run the Miller processing stream from the input file to the temp-output file. - err = stream.Stream([]string{fileName}, options, recordTransformers, wrappedHandle, false) - if err != nil { - os.Remove(tempFileName) - return err - } - - // Close the recompressor handle, if any recompression is being applied. - if isNew { - err = wrappedHandle.Close() - if err != nil { - os.Remove(tempFileName) - return err - } - } - - // Close the handle to the output file. This may force final writes, so - // it must be error-checked. - err = handle.Close() - if err != nil { - os.Remove(tempFileName) - return err - } - - // Rename the temp-output file on top of the input file. - err = os.Rename(tempFileName, fileName) - if err != nil { - os.Remove(tempFileName) - return err - } } return nil } + +func processFileInPlace( + fileName string, + originalOptions *cli.TOptions, +) error { + + if _, err := os.Stat(fileName); os.IsNotExist(err) { + return err + } + + // Reconstruct the transformers for each file name, and allocate + // reader, mappers, and writer individually for each file name. This + // way CSV headers appear in each file, head -n 10 puts 10 rows for + // each output file, and so on. + options, recordTransformers, err := climain.ParseCommandLine(os.Args) + if err != nil { + return err + } + + // We can't in-place update http://, https://, etc. Also, anything with + // --prepipe or --prepipex, we won't try to guess how to invert that + // command to produce re-compressed output. + err = lib.IsUpdateableInPlace(fileName, options.ReaderOptions.Prepipe) + if err != nil { + return err + } + + // Get the original file's mode so we can preserve it. + fileInfo, err := os.Stat(fileName) + if err != nil { + return err + } + originalMode := fileInfo.Mode() + + containingDirectory := path.Dir(fileName) + // Names like ./mlr-in-place-2148227797 and ./mlr-in-place-1792078347, + // as revealed by printing handle.Name(). + handle, err := os.CreateTemp(containingDirectory, "mlr-in-place-") + if err != nil { + return err + } + tempFileName := handle.Name() + + // If the input file is compressed and we'll be doing in-process + // decompression as we read the input file, try to do in-process + // compression as we write the output. + inputFileEncoding := lib.FindInputEncoding(fileName, options.ReaderOptions.FileInputEncoding) + + // Get a handle with, perhaps, a recompression wrapper around it. + wrappedHandle, isNew, err := lib.WrapOutputHandle(handle, inputFileEncoding) + if err != nil { + os.Remove(tempFileName) + return err + } + + // Run the Miller processing stream from the input file to the temp-output file. + err = stream.Stream([]string{fileName}, options, recordTransformers, wrappedHandle, false) + if err != nil { + os.Remove(tempFileName) + return err + } + + // Close the recompressor handle, if any recompression is being applied. + if isNew { + err = wrappedHandle.Close() + if err != nil { + os.Remove(tempFileName) + return err + } + } + + // Close the handle to the output file. This may force final writes, so + // it must be error-checked. + err = handle.Close() + if err != nil { + os.Remove(tempFileName) + return err + } + + // Rename the temp-output file on top of the input file. + err = os.Rename(tempFileName, fileName) + if err != nil { + os.Remove(tempFileName) + return err + } + + // Set the mode to match the original. + err = os.Chmod(fileName, originalMode) + if err != nil { + return err + } + + return nil +} From 44ddaea65169fa51461cbf8cee12666381c27328 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 10 Aug 2025 16:53:54 -0500 Subject: [PATCH 370/456] Bump actions/cache from 4.2.3 to 4.2.4 (#1854) Bumps [actions/cache](https://github.com/actions/cache) from 4.2.3 to 4.2.4. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/5a3ec84eff668545956fd18022155c47e93e2684...0400d5f644dc74513175e3cd8d07132dd4860809) --- updated-dependencies: - dependency-name: actions/cache dependency-version: 4.2.4 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index da61f0925..047d632e0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -29,7 +29,7 @@ jobs: # https://github.com/marketplace/actions/cache - name: Cache Go modules - uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 + uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 with: path: | ~/.cache/go-build From ab7a80cbf49ae723d77f8733035308a4446a18fb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 10 Aug 2025 16:54:11 -0500 Subject: [PATCH 371/456] Bump github/codeql-action from 3.29.7 to 3.29.8 (#1853) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.29.7 to 3.29.8. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/51f77329afa6477de8c49fc9c7046c15b9a4e79d...76621b61decf072c1cee8dd1ce2d2a82d33c17ed) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.29.8 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 0a1b2d112..ada8ffb82 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@51f77329afa6477de8c49fc9c7046c15b9a4e79d + uses: github/codeql-action/init@76621b61decf072c1cee8dd1ce2d2a82d33c17ed with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@51f77329afa6477de8c49fc9c7046c15b9a4e79d + uses: github/codeql-action/autobuild@76621b61decf072c1cee8dd1ce2d2a82d33c17ed # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@51f77329afa6477de8c49fc9c7046c15b9a4e79d + uses: github/codeql-action/analyze@76621b61decf072c1cee8dd1ce2d2a82d33c17ed From 24a6e9870949bb5730fe5e00856044020e71c3ab Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 10 Aug 2025 16:54:39 -0500 Subject: [PATCH 372/456] Bump golang.org/x/sys from 0.34.0 to 0.35.0 (#1852) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.34.0 to 0.35.0. - [Commits](https://github.com/golang/sys/compare/v0.34.0...v0.35.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-version: 0.35.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 3abe83fa2..55ec743ca 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.10.0 - golang.org/x/sys v0.34.0 + golang.org/x/sys v0.35.0 golang.org/x/term v0.33.0 golang.org/x/text v0.27.0 ) diff --git a/go.sum b/go.sum index 74f0f0883..f9de9055f 100644 --- a/go.sum +++ b/go.sum @@ -43,8 +43,8 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= -golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.33.0 h1:NuFncQrRcaRvVmgRkvM3j/F00gWIAlcmlB8ACEKmGIg= golang.org/x/term v0.33.0/go.mod h1:s18+ql9tYWp1IfpV9DmCtQDDSRBUjKaw9M1eAv5UeF0= golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= From f3a8fd42bc561f0f0d07338438c767333a7f79b1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 10 Aug 2025 17:12:29 -0500 Subject: [PATCH 373/456] Bump golang.org/x/term from 0.33.0 to 0.34.0 (#1851) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.33.0 to 0.34.0. - [Commits](https://github.com/golang/term/compare/v0.33.0...v0.34.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-version: 0.34.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 55ec743ca..bb2157c5d 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.10.0 golang.org/x/sys v0.35.0 - golang.org/x/term v0.33.0 + golang.org/x/term v0.34.0 golang.org/x/text v0.27.0 ) diff --git a/go.sum b/go.sum index f9de9055f..8746f1c0f 100644 --- a/go.sum +++ b/go.sum @@ -45,8 +45,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.33.0 h1:NuFncQrRcaRvVmgRkvM3j/F00gWIAlcmlB8ACEKmGIg= -golang.org/x/term v0.33.0/go.mod h1:s18+ql9tYWp1IfpV9DmCtQDDSRBUjKaw9M1eAv5UeF0= +golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4= +golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw= golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo= From d4ace7527b26ba0ffb3b2e12345e70575eafddeb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Aug 2025 09:16:11 -0400 Subject: [PATCH 374/456] Bump golang.org/x/text from 0.27.0 to 0.28.0 (#1850) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.27.0 to 0.28.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.27.0...v0.28.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-version: 0.28.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index bb2157c5d..f86fe979d 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( github.com/stretchr/testify v1.10.0 golang.org/x/sys v0.35.0 golang.org/x/term v0.34.0 - golang.org/x/text v0.27.0 + golang.org/x/text v0.28.0 ) require ( @@ -39,7 +39,7 @@ require ( github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - golang.org/x/tools v0.34.0 // indirect + golang.org/x/tools v0.35.0 // indirect gonum.org/v1/gonum v0.16.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 8746f1c0f..7f93593b1 100644 --- a/go.sum +++ b/go.sum @@ -47,10 +47,10 @@ golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4= golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw= -golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= -golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= -golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo= -golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg= +golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= +golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= +golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0= +golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From 78da99707738f7f7286f01afbfd9c9aeb36ee8e4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 12 Aug 2025 13:18:26 -0400 Subject: [PATCH 375/456] Bump github/codeql-action from 3.29.8 to 3.29.9 (#1856) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.29.8 to 3.29.9. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/76621b61decf072c1cee8dd1ce2d2a82d33c17ed...df559355d593797519d70b90fc8edd5db049e7a2) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.29.9 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index ada8ffb82..c3023c791 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@76621b61decf072c1cee8dd1ce2d2a82d33c17ed + uses: github/codeql-action/init@df559355d593797519d70b90fc8edd5db049e7a2 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@76621b61decf072c1cee8dd1ce2d2a82d33c17ed + uses: github/codeql-action/autobuild@df559355d593797519d70b90fc8edd5db049e7a2 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@76621b61decf072c1cee8dd1ce2d2a82d33c17ed + uses: github/codeql-action/analyze@df559355d593797519d70b90fc8edd5db049e7a2 From 369156b70d1fa14ee7d6f7243a6b405301a5a791 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 12 Aug 2025 14:43:42 -0400 Subject: [PATCH 376/456] Bump actions/checkout from 4.2.2 to 5.0.0 (#1857) Bumps [actions/checkout](https://github.com/actions/checkout) from 4.2.2 to 5.0.0. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/11bd71901bbe5b1630ceea73d27597364c9af683...08c6903cd8c0fde910a37f88322edcfb5dd907a8) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: 5.0.0 dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/codespell.yml | 2 +- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index c3023c791..6341b76bd 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -36,7 +36,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index fff2e3c40..0f641b76c 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -21,7 +21,7 @@ jobs: steps: # Check out the code base - name: Check out code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 with: # Full git history is needed to get a proper list of changed files within `super-linter` fetch-depth: 0 diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index fc312e97e..7710a010d 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -15,7 +15,7 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 - name: Set up Go uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 047d632e0..f8c7c84c9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -23,7 +23,7 @@ jobs: id: go - name: Check out code into the Go module directory - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 with: fetch-depth: 0 From 06e16ea3ee997a32b7c7590acfcdc60b62b92ece Mon Sep 17 00:00:00 2001 From: John Kerl Date: Wed, 13 Aug 2025 17:07:32 -0500 Subject: [PATCH 377/456] Don't parse CSV comments (#1859) * `mlr sort -b` feature * mlr regtest -p test/cases/cli-help && make dev * Don't parse CSV comments * Add tests for PR 1346 * Add tests for PR 1787 * Add test CSV files --- pkg/go-csv/csv_reader.go | 25 ++++++++--- pkg/input/record_reader_csv.go | 44 ++++++------------- test/cases/io-skip-pass-comments/pr-1346/cmd | 1 + .../io-skip-pass-comments/pr-1346/experr | 1 + .../io-skip-pass-comments/pr-1346/expout | 5 +++ .../io-skip-pass-comments/pr-1346/should-fail | 0 .../cases/io-skip-pass-comments/pr-1787-a/cmd | 1 + .../io-skip-pass-comments/pr-1787-a/experr | 1 + .../io-skip-pass-comments/pr-1787-a/expout | 2 + .../pr-1787-a/should-fail | 0 .../cases/io-skip-pass-comments/pr-1787-b/cmd | 1 + .../io-skip-pass-comments/pr-1787-b/experr | 0 .../io-skip-pass-comments/pr-1787-b/expout | 4 ++ .../cases/io-skip-pass-comments/pr-1787-c/cmd | 1 + .../io-skip-pass-comments/pr-1787-c/experr | 0 .../io-skip-pass-comments/pr-1787-c/expout | 3 ++ test/input/pr-1346.csv | 6 +++ test/input/pr-1787.csv | 4 ++ 18 files changed, 62 insertions(+), 37 deletions(-) create mode 100644 test/cases/io-skip-pass-comments/pr-1346/cmd create mode 100644 test/cases/io-skip-pass-comments/pr-1346/experr create mode 100644 test/cases/io-skip-pass-comments/pr-1346/expout create mode 100644 test/cases/io-skip-pass-comments/pr-1346/should-fail create mode 100644 test/cases/io-skip-pass-comments/pr-1787-a/cmd create mode 100644 test/cases/io-skip-pass-comments/pr-1787-a/experr create mode 100644 test/cases/io-skip-pass-comments/pr-1787-a/expout create mode 100644 test/cases/io-skip-pass-comments/pr-1787-a/should-fail create mode 100644 test/cases/io-skip-pass-comments/pr-1787-b/cmd create mode 100644 test/cases/io-skip-pass-comments/pr-1787-b/experr create mode 100644 test/cases/io-skip-pass-comments/pr-1787-b/expout create mode 100644 test/cases/io-skip-pass-comments/pr-1787-c/cmd create mode 100644 test/cases/io-skip-pass-comments/pr-1787-c/experr create mode 100644 test/cases/io-skip-pass-comments/pr-1787-c/expout create mode 100644 test/input/pr-1346.csv create mode 100644 test/input/pr-1787.csv diff --git a/pkg/go-csv/csv_reader.go b/pkg/go-csv/csv_reader.go index 507e9a94c..5a0820a01 100644 --- a/pkg/go-csv/csv_reader.go +++ b/pkg/go-csv/csv_reader.go @@ -311,15 +311,28 @@ func (r *Reader) readRecord(dst []string) ([]string, error) { var errRead error for errRead == nil { line, errRead = r.readLine() - if r.Comment != 0 && nextRune(line) == r.Comment { - line = nil - continue // Skip comment lines - } + + // MILLER-SPECIFIC UPDATE: DO NOT DO THIS + // if r.Comment != 0 && nextRune(line) == r.Comment { + // line = nil + // continue // Skip comment lines + // } + // MILLER-SPECIFIC UPDATE: DO NOT DO THIS // if errRead == nil && len(line) == lengthNL(line) { - // line = nil - // continue // Skip empty lines + // line = nil + // continue // Skip empty lines // } + + // MILLER-SPECIFIC UPDATE: If the line starts with the comment character, + // don't attempt to CSV-parse it -- just hand it back as a single field. + // This allows two things: + // * User comments get passed through as intended, without being reformatted; + // * Users can do things like `# a"b` in their comments without getting an + // imbalanced-double-quote error. + if r.Comment != 0 && nextRune(line) == r.Comment { + return []string{string(line)}, nil + } break } if errRead == io.EOF { diff --git a/pkg/input/record_reader_csv.go b/pkg/input/record_reader_csv.go index 6ed07250d..a154ac8ba 100644 --- a/pkg/input/record_reader_csv.go +++ b/pkg/input/record_reader_csv.go @@ -1,7 +1,6 @@ package input import ( - "bytes" "container/list" "fmt" "io" @@ -109,6 +108,14 @@ func (reader *RecordReaderCSV) processHandle( csvReader.Comma = rune(reader.ifs0) csvReader.LazyQuotes = reader.csvLazyQuotes csvReader.TrimLeadingSpace = reader.csvTrimLeadingSpace + + if reader.readerOptions.CommentHandling != cli.CommentsAreData { + if len(reader.readerOptions.CommentString) == 1 { + // Use our modified fork of the go-csv package + csvReader.Comment = rune(reader.readerOptions.CommentString[0]) + } + } + csvRecordsChannel := make(chan *list.List, recordsPerBatch) go channelizedCSVRecordScanner(csvReader, csvRecordsChannel, downstreamDoneChannel, errorChannel, recordsPerBatch) @@ -318,42 +325,17 @@ func (reader *RecordReaderCSV) maybeConsumeComment( // However, sadly, bytes.Buffer does not implement io.Writer because // its Write method has pointer receiver. So we have a WorkaroundBuffer // struct below which has non-pointer receiver. - buffer := NewWorkaroundBuffer() - csvWriter := csv.NewWriter(buffer) - csvWriter.Comma = rune(reader.ifs0) - csvWriter.Write(csvRecord) - csvWriter.Flush() - recordsAndContexts.PushBack(types.NewOutputString(buffer.String(), context)) + + // Contract with our fork of the go-csv CSV Reader + lib.InternalCodingErrorIf(len(csvRecord) != 1) + recordsAndContexts.PushBack(types.NewOutputString(csvRecord[0], context)) + } else /* reader.readerOptions.CommentHandling == cli.SkipComments */ { // discard entirely } return false } -// ---------------------------------------------------------------- -// As noted above: wraps a bytes.Buffer, whose Write method has pointer -// receiver, in a struct with non-pointer receiver so that it implements -// io.Writer. - -type WorkaroundBuffer struct { - pbuffer *bytes.Buffer -} - -func NewWorkaroundBuffer() WorkaroundBuffer { - var buffer bytes.Buffer - return WorkaroundBuffer{ - pbuffer: &buffer, - } -} - -func (wb WorkaroundBuffer) Write(p []byte) (n int, err error) { - return wb.pbuffer.Write(p) -} - -func (wb WorkaroundBuffer) String() string { - return wb.pbuffer.String() -} - // ---------------------------------------------------------------- // BOM-stripping // diff --git a/test/cases/io-skip-pass-comments/pr-1346/cmd b/test/cases/io-skip-pass-comments/pr-1346/cmd new file mode 100644 index 000000000..611187612 --- /dev/null +++ b/test/cases/io-skip-pass-comments/pr-1346/cmd @@ -0,0 +1 @@ +mlr --skip-comments --csv --pass-comments cat test/input/pr-1346.csv diff --git a/test/cases/io-skip-pass-comments/pr-1346/experr b/test/cases/io-skip-pass-comments/pr-1346/experr new file mode 100644 index 000000000..10864f8ab --- /dev/null +++ b/test/cases/io-skip-pass-comments/pr-1346/experr @@ -0,0 +1 @@ +mlr: mlr: CSV header/data length mismatch 2 != 1 at filename test/input/pr-1346.csv row 4. diff --git a/test/cases/io-skip-pass-comments/pr-1346/expout b/test/cases/io-skip-pass-comments/pr-1346/expout new file mode 100644 index 000000000..b7872a7a9 --- /dev/null +++ b/test/cases/io-skip-pass-comments/pr-1346/expout @@ -0,0 +1,5 @@ +field1,field2 +a,b +# that was the first record +c,d +# that was the second record, and there is no more data diff --git a/test/cases/io-skip-pass-comments/pr-1346/should-fail b/test/cases/io-skip-pass-comments/pr-1346/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-skip-pass-comments/pr-1787-a/cmd b/test/cases/io-skip-pass-comments/pr-1787-a/cmd new file mode 100644 index 000000000..8ecdde63e --- /dev/null +++ b/test/cases/io-skip-pass-comments/pr-1787-a/cmd @@ -0,0 +1 @@ +mlr --csv cat test/input/pr-1787.csv diff --git a/test/cases/io-skip-pass-comments/pr-1787-a/experr b/test/cases/io-skip-pass-comments/pr-1787-a/experr new file mode 100644 index 000000000..9e02e68bc --- /dev/null +++ b/test/cases/io-skip-pass-comments/pr-1787-a/experr @@ -0,0 +1 @@ +mlr: parse error on line 3, column 4: bare " in non-quoted-field. diff --git a/test/cases/io-skip-pass-comments/pr-1787-a/expout b/test/cases/io-skip-pass-comments/pr-1787-a/expout new file mode 100644 index 000000000..bfde6bfa0 --- /dev/null +++ b/test/cases/io-skip-pass-comments/pr-1787-a/expout @@ -0,0 +1,2 @@ +a,b,c +1,2,3 diff --git a/test/cases/io-skip-pass-comments/pr-1787-a/should-fail b/test/cases/io-skip-pass-comments/pr-1787-a/should-fail new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-skip-pass-comments/pr-1787-b/cmd b/test/cases/io-skip-pass-comments/pr-1787-b/cmd new file mode 100644 index 000000000..c79588a16 --- /dev/null +++ b/test/cases/io-skip-pass-comments/pr-1787-b/cmd @@ -0,0 +1 @@ +mlr --csv --pass-comments cat test/input/pr-1787.csv diff --git a/test/cases/io-skip-pass-comments/pr-1787-b/experr b/test/cases/io-skip-pass-comments/pr-1787-b/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-skip-pass-comments/pr-1787-b/expout b/test/cases/io-skip-pass-comments/pr-1787-b/expout new file mode 100644 index 000000000..23b8c638c --- /dev/null +++ b/test/cases/io-skip-pass-comments/pr-1787-b/expout @@ -0,0 +1,4 @@ +a,b,c +1,2,3 +# x"y +4,5,6 diff --git a/test/cases/io-skip-pass-comments/pr-1787-c/cmd b/test/cases/io-skip-pass-comments/pr-1787-c/cmd new file mode 100644 index 000000000..8e17a1f3e --- /dev/null +++ b/test/cases/io-skip-pass-comments/pr-1787-c/cmd @@ -0,0 +1 @@ +mlr --csv --skip-comments cat test/input/pr-1787.csv diff --git a/test/cases/io-skip-pass-comments/pr-1787-c/experr b/test/cases/io-skip-pass-comments/pr-1787-c/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-skip-pass-comments/pr-1787-c/expout b/test/cases/io-skip-pass-comments/pr-1787-c/expout new file mode 100644 index 000000000..88700c714 --- /dev/null +++ b/test/cases/io-skip-pass-comments/pr-1787-c/expout @@ -0,0 +1,3 @@ +a,b,c +1,2,3 +4,5,6 diff --git a/test/input/pr-1346.csv b/test/input/pr-1346.csv new file mode 100644 index 000000000..6a46e0994 --- /dev/null +++ b/test/input/pr-1346.csv @@ -0,0 +1,6 @@ +field1,field2 +a,b +# that was the first record +c,d +# that was the second record, and there is no more data + diff --git a/test/input/pr-1787.csv b/test/input/pr-1787.csv new file mode 100644 index 000000000..23b8c638c --- /dev/null +++ b/test/input/pr-1787.csv @@ -0,0 +1,4 @@ +a,b,c +1,2,3 +# x"y +4,5,6 From 4ebef873d244ab9cce001b4ed79ee78a4d680599 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 14 Aug 2025 18:00:22 -0400 Subject: [PATCH 378/456] Miller 6.15.0 (#1860) * miller 6.15.0 * make dev --- docs/src/manpage.md | 4 ++-- docs/src/manpage.txt | 4 ++-- man/manpage.txt | 4 ++-- man/mlr.1 | 6 +++--- miller.spec | 5 ++++- pkg/version/version.go | 2 +- 6 files changed, 14 insertions(+), 11 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 2bc383fe7..62df70f07 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -48,7 +48,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.14.0-dev. + manpage documents mlr 6.15.0. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv @@ -3757,5 +3757,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-07-20 4mMILLER24m(1) + 2025-08-14 4mMILLER24m(1)
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index bf4e6c618..2f2f51484 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -27,7 +27,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.14.0-dev. + manpage documents mlr 6.15.0. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv @@ -3736,4 +3736,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-07-20 4mMILLER24m(1) + 2025-08-14 4mMILLER24m(1) diff --git a/man/manpage.txt b/man/manpage.txt index bf4e6c618..2f2f51484 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -27,7 +27,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.14.0-dev. + manpage documents mlr 6.15.0. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv @@ -3736,4 +3736,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-07-20 4mMILLER24m(1) + 2025-08-14 4mMILLER24m(1) diff --git a/man/mlr.1 b/man/mlr.1 index 8d32a9817..57c1b31d2 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2025-07-20 +.\" Date: 2025-08-14 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2025-07-20" "\ \&" "\ \&" +.TH "MILLER" "1" "2025-08-14" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -47,7 +47,7 @@ on integer-indexed fields: if the natural data structure for the latter is the array, then Miller's natural data structure is the insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as -a special case.) This manpage documents mlr 6.14.0-dev. +a special case.) This manpage documents mlr 6.15.0. .SH "EXAMPLES" .sp diff --git a/miller.spec b/miller.spec index 3a43b6426..9211e0c25 100644 --- a/miller.spec +++ b/miller.spec @@ -1,6 +1,6 @@ Summary: Name-indexed data processing tool Name: miller -Version: 6.14.0 +Version: 6.15.0 Release: 1%{?dist} License: BSD Source: https://github.com/johnkerl/miller/releases/download/%{version}/miller-%{version}.tar.gz @@ -36,6 +36,9 @@ make install %{_mandir}/man1/mlr.1* %changelog +* Thu Aug 14 2025 John Kerl - 6.15.0-1 +- 6.15.0 release + * Fri Jul 4 2025 John Kerl - 6.14.0-1 - 6.14.0 release diff --git a/pkg/version/version.go b/pkg/version/version.go index 40cdef3ed..143c21e17 100644 --- a/pkg/version/version.go +++ b/pkg/version/version.go @@ -4,4 +4,4 @@ package version // Nominally things like "6.0.0" for a release, then "6.0.0-dev" in between. // This makes it clear that a given build is on the main dev branch, not a // particular snapshot tag. -var STRING string = "6.14.0-dev" +var STRING string = "6.15.0" From cd6431f7aaefd829a7fe4cb7d2b84fabe971a333 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 15 Aug 2025 08:07:06 -0400 Subject: [PATCH 379/456] Bump goreleaser/goreleaser-action from 6.3.0 to 6.4.0 (#1863) Bumps [goreleaser/goreleaser-action](https://github.com/goreleaser/goreleaser-action) from 6.3.0 to 6.4.0. - [Release notes](https://github.com/goreleaser/goreleaser-action/releases) - [Commits](https://github.com/goreleaser/goreleaser-action/compare/9c156ee8a17a598857849441385a2041ef570552...e435ccd777264be153ace6237001ef4d979d3a7a) --- updated-dependencies: - dependency-name: goreleaser/goreleaser-action dependency-version: 6.4.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f8c7c84c9..85b887b20 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -40,7 +40,7 @@ jobs: # https://goreleaser.com/ci/actions/ - name: Run GoReleaser - uses: goreleaser/goreleaser-action@9c156ee8a17a598857849441385a2041ef570552 + uses: goreleaser/goreleaser-action@e435ccd777264be153ace6237001ef4d979d3a7a #if: startsWith(github.ref, 'refs/tags/v') with: version: latest From 4d83e88ff6b7804d54ed3a28e39ab2eeccb737d4 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 15 Aug 2025 19:45:18 -0400 Subject: [PATCH 380/456] Note that comment prefix for CSV must be single-character --- pkg/cli/option_parse.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go index dd5ede99f..912d689f1 100644 --- a/pkg/cli/option_parse.go +++ b/pkg/cli/option_parse.go @@ -2678,7 +2678,7 @@ var CommentsInDataFlagSection = FlagSection{ { name: "--skip-comments-with", arg: "{string}", - help: "Ignore commented lines within input, with specified prefix.", + help: "Ignore commented lines within input, with specified prefix. For CSV input format, the prefix must be a single character.", parser: func(args []string, argc int, pargi *int, options *TOptions) { CheckArgCount(args, *pargi, argc, 2) options.ReaderOptions.CommentString = args[*pargi+1] @@ -2700,7 +2700,7 @@ var CommentsInDataFlagSection = FlagSection{ { name: "--pass-comments-with", arg: "{string}", - help: "Immediately print commented lines within input, with specified prefix.", + help: "Immediately print commented lines within input, with specified prefix. For CSV input format, the prefix must be a single character.", parser: func(args []string, argc int, pargi *int, options *TOptions) { CheckArgCount(args, *pargi, argc, 2) options.ReaderOptions.CommentString = args[*pargi+1] From 8b524b3ada0099db178be666e7d53a0bab810499 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 15 Aug 2025 19:48:48 -0400 Subject: [PATCH 381/456] make dev --- docs/src/file-formats.md | 6 ++++-- docs/src/manpage.md | 8 +++++--- docs/src/manpage.txt | 8 +++++--- docs/src/reference-main-flag-list.md | 4 ++-- man/manpage.txt | 8 +++++--- man/mlr.1 | 10 ++++++---- 6 files changed, 27 insertions(+), 17 deletions(-) diff --git a/docs/src/file-formats.md b/docs/src/file-formats.md index 5eaff8b13..8a09dac54 100644 --- a/docs/src/file-formats.md +++ b/docs/src/file-formats.md @@ -757,12 +757,14 @@ Notes: within the input. --pass-comments-with {string} Immediately print commented lines within input, with - specified prefix. + specified prefix. For CSV input format, the prefix + must be a single character. --skip-comments Ignore commented lines (prefixed by `#`) within the input. --skip-comments-with {string} Ignore commented lines within input, with specified - prefix. + prefix. For CSV input format, the prefix must be a + single character.
Examples: diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 62df70f07..96a1e292a 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -255,12 +255,14 @@ This is simply a copy of what you should see on running `man mlr` at a command p within the input. --pass-comments-with {string} Immediately print commented lines within input, with - specified prefix. + specified prefix. For CSV input format, the prefix + must be a single character. --skip-comments Ignore commented lines (prefixed by `#`) within the input. --skip-comments-with {string} Ignore commented lines within input, with specified - prefix. + prefix. For CSV input format, the prefix must be a + single character. 1mCOMPRESSED-DATA FLAGS0m Miller offers a few different ways to handle reading data files @@ -3757,5 +3759,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-08-14 4mMILLER24m(1) + 2025-08-15 4mMILLER24m(1)
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 2f2f51484..d63c071e4 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -234,12 +234,14 @@ within the input. --pass-comments-with {string} Immediately print commented lines within input, with - specified prefix. + specified prefix. For CSV input format, the prefix + must be a single character. --skip-comments Ignore commented lines (prefixed by `#`) within the input. --skip-comments-with {string} Ignore commented lines within input, with specified - prefix. + prefix. For CSV input format, the prefix must be a + single character. 1mCOMPRESSED-DATA FLAGS0m Miller offers a few different ways to handle reading data files @@ -3736,4 +3738,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-08-14 4mMILLER24m(1) + 2025-08-15 4mMILLER24m(1) diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index 86d3ce042..e0f36f3af 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -63,9 +63,9 @@ Notes: **Flags:** * `--pass-comments`: Immediately print commented lines (prefixed by `#`) within the input. -* `--pass-comments-with {string}`: Immediately print commented lines within input, with specified prefix. +* `--pass-comments-with {string}`: Immediately print commented lines within input, with specified prefix. For CSV input format, the prefix must be a single character. * `--skip-comments`: Ignore commented lines (prefixed by `#`) within the input. -* `--skip-comments-with {string}`: Ignore commented lines within input, with specified prefix. +* `--skip-comments-with {string}`: Ignore commented lines within input, with specified prefix. For CSV input format, the prefix must be a single character. ## Compressed-data flags diff --git a/man/manpage.txt b/man/manpage.txt index 2f2f51484..d63c071e4 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -234,12 +234,14 @@ within the input. --pass-comments-with {string} Immediately print commented lines within input, with - specified prefix. + specified prefix. For CSV input format, the prefix + must be a single character. --skip-comments Ignore commented lines (prefixed by `#`) within the input. --skip-comments-with {string} Ignore commented lines within input, with specified - prefix. + prefix. For CSV input format, the prefix must be a + single character. 1mCOMPRESSED-DATA FLAGS0m Miller offers a few different ways to handle reading data files @@ -3736,4 +3738,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-08-14 4mMILLER24m(1) + 2025-08-15 4mMILLER24m(1) diff --git a/man/mlr.1 b/man/mlr.1 index 57c1b31d2..13fe9a03b 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2025-08-14 +.\" Date: 2025-08-15 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2025-08-14" "\ \&" "\ \&" +.TH "MILLER" "1" "2025-08-15" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -291,12 +291,14 @@ Notes: within the input. --pass-comments-with {string} Immediately print commented lines within input, with - specified prefix. + specified prefix. For CSV input format, the prefix + must be a single character. --skip-comments Ignore commented lines (prefixed by `#`) within the input. --skip-comments-with {string} Ignore commented lines within input, with specified - prefix. + prefix. For CSV input format, the prefix must be a + single character. .fi .if n \{\ .RE From d2925aafe56e879cd73c2af084c5249f57171124 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 15 Aug 2025 19:52:37 -0400 Subject: [PATCH 382/456] error-handling in the CSV-reader's constructor --- pkg/input/record_reader_csv.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pkg/input/record_reader_csv.go b/pkg/input/record_reader_csv.go index a154ac8ba..aa7dec084 100644 --- a/pkg/input/record_reader_csv.go +++ b/pkg/input/record_reader_csv.go @@ -39,6 +39,11 @@ func NewRecordReaderCSV( if len(readerOptions.IFS) != 1 { return nil, fmt.Errorf("for CSV, IFS can only be a single character") } + if readerOptions.CommentHandling != cli.CommentsAreData { + if len(readerOptions.CommentString) != 1 { + return nil, fmt.Errorf("for CSV, the comment prefix must be a single character") + } + } return &RecordReaderCSV{ readerOptions: readerOptions, ifs0: readerOptions.IFS[0], @@ -326,7 +331,7 @@ func (reader *RecordReaderCSV) maybeConsumeComment( // its Write method has pointer receiver. So we have a WorkaroundBuffer // struct below which has non-pointer receiver. - // Contract with our fork of the go-csv CSV Reader + // Contract with our fork of the go-csv CSV Reader, and, our own constructor. lib.InternalCodingErrorIf(len(csvRecord) != 1) recordsAndContexts.PushBack(types.NewOutputString(csvRecord[0], context)) From 3ad00b5686519471e56c88d36d562cef22c78a46 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 15 Aug 2025 19:54:36 -0400 Subject: [PATCH 383/456] unit-test coverage for error-handling --- test/cases/io-skip-pass-comments/pr-1787-d/cmd | 1 + test/cases/io-skip-pass-comments/pr-1787-d/experr | 1 + test/cases/io-skip-pass-comments/pr-1787-d/expout | 0 test/cases/io-skip-pass-comments/pr-1787-d/should-fail | 0 4 files changed, 2 insertions(+) create mode 100644 test/cases/io-skip-pass-comments/pr-1787-d/cmd create mode 100644 test/cases/io-skip-pass-comments/pr-1787-d/experr create mode 100644 test/cases/io-skip-pass-comments/pr-1787-d/expout create mode 100644 test/cases/io-skip-pass-comments/pr-1787-d/should-fail diff --git a/test/cases/io-skip-pass-comments/pr-1787-d/cmd b/test/cases/io-skip-pass-comments/pr-1787-d/cmd new file mode 100644 index 000000000..9db12e96e --- /dev/null +++ b/test/cases/io-skip-pass-comments/pr-1787-d/cmd @@ -0,0 +1 @@ +mlr --csv --skip-comments-with '##' cat test/input/pr-1787.csv diff --git a/test/cases/io-skip-pass-comments/pr-1787-d/experr b/test/cases/io-skip-pass-comments/pr-1787-d/experr new file mode 100644 index 000000000..f8b7d1e1a --- /dev/null +++ b/test/cases/io-skip-pass-comments/pr-1787-d/experr @@ -0,0 +1 @@ +mlr: for CSV, the comment prefix must be a single character. diff --git a/test/cases/io-skip-pass-comments/pr-1787-d/expout b/test/cases/io-skip-pass-comments/pr-1787-d/expout new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-skip-pass-comments/pr-1787-d/should-fail b/test/cases/io-skip-pass-comments/pr-1787-d/should-fail new file mode 100644 index 000000000..e69de29bb From 3c2d4b22d25edab62f3fc6eeed84e7da0a7b5f63 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 15 Aug 2025 19:55:46 -0400 Subject: [PATCH 384/456] Miller 6.15.0-dev (#1862) * 6.15.0-dev * make dev --- docs/src/manpage.md | 2 +- docs/src/manpage.txt | 2 +- man/manpage.txt | 2 +- man/mlr.1 | 2 +- pkg/version/version.go | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 96a1e292a..e031ab096 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -48,7 +48,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.15.0. + manpage documents mlr 6.15.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index d63c071e4..f0fcee8e3 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -27,7 +27,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.15.0. + manpage documents mlr 6.15.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/man/manpage.txt b/man/manpage.txt index d63c071e4..f0fcee8e3 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -27,7 +27,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.15.0. + manpage documents mlr 6.15.0-dev. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv diff --git a/man/mlr.1 b/man/mlr.1 index 13fe9a03b..37d5c2a2b 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -47,7 +47,7 @@ on integer-indexed fields: if the natural data structure for the latter is the array, then Miller's natural data structure is the insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as -a special case.) This manpage documents mlr 6.15.0. +a special case.) This manpage documents mlr 6.15.0-dev. .SH "EXAMPLES" .sp diff --git a/pkg/version/version.go b/pkg/version/version.go index 143c21e17..98dd71c33 100644 --- a/pkg/version/version.go +++ b/pkg/version/version.go @@ -4,4 +4,4 @@ package version // Nominally things like "6.0.0" for a release, then "6.0.0-dev" in between. // This makes it clear that a given build is on the main dev branch, not a // particular snapshot tag. -var STRING string = "6.15.0" +var STRING string = "6.15.0-dev" From db11c17e54887ba7157529f89bdcebf88524f0dc Mon Sep 17 00:00:00 2001 From: kz6fittycent Date: Fri, 15 Aug 2025 19:30:56 -0500 Subject: [PATCH 385/456] initial snap commit (#1864) --- .github/workflows/test-snap-can-build.yml | 28 +++++++++++++ README.md | 7 ++-- snap/snapcraft.yaml | 50 +++++++++++++++++++++++ 3 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/test-snap-can-build.yml create mode 100644 snap/snapcraft.yaml diff --git a/.github/workflows/test-snap-can-build.yml b/.github/workflows/test-snap-can-build.yml new file mode 100644 index 000000000..3796e95ed --- /dev/null +++ b/.github/workflows/test-snap-can-build.yml @@ -0,0 +1,28 @@ +name: ๐Ÿงช Snap Builds + +on: + push: + branches: '*' + pull_request: + branches: '*' + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + node-version: [20.x] + + steps: + - uses: actions/checkout@v4 + + - uses: snapcore/action-build@v1 + id: build + + - uses: diddlesnaps/snapcraft-review-action@v1 + with: + snap: ${{ steps.build.outputs.snap }} + isClassic: 'false' + # Plugs and Slots declarations to override default denial (requires store assertion to publish) + # plugs: ./plug-declaration.json + # slots: ./slot-declaration.json diff --git a/README.md b/README.md index b9c7a7cab..d8f4490d1 100644 --- a/README.md +++ b/README.md @@ -46,9 +46,7 @@ key-value-pair data in a variety of data formats. * [Active issues](https://github.com/johnkerl/miller/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc) # Installing - There's a good chance you can get Miller pre-built for your system: - [![Ubuntu](https://img.shields.io/badge/distros-ubuntu-db4923.svg)](https://launchpad.net/ubuntu/+source/miller) [![Ubuntu 16.04 LTS](https://img.shields.io/badge/distros-ubuntu1604lts-db4923.svg)](https://launchpad.net/ubuntu/xenial/+package/miller) [![Fedora](https://img.shields.io/badge/distros-fedora-173b70.svg)](https://packages.fedoraproject.org/pkgs/miller/miller/) @@ -62,6 +60,9 @@ There's a good chance you can get Miller pre-built for your system: [![FreeBSD](https://img.shields.io/badge/distros-freebsd-8c0707.svg)](https://www.freshports.org/textproc/miller/) [![Anaconda](https://img.shields.io/badge/distros-anaconda-63ad41.svg)](https://anaconda.org/conda-forge/miller/) + +[![Snap](https://snapcraft.io/)](https://snapcraft.io/) + [![Homebrew/MacOSX](https://img.shields.io/badge/distros-homebrew-ba832b.svg)](https://formulae.brew.sh/formula/miller) [![MacPorts/MacOSX](https://img.shields.io/badge/distros-macports-1376ec.svg)](https://www.macports.org/ports.php?by=name&substr=miller) [![Chocolatey](https://img.shields.io/badge/distros-chocolatey-red.svg)](https://chocolatey.org/packages/miller) @@ -69,7 +70,7 @@ There's a good chance you can get Miller pre-built for your system: |OS|Installation command| |---|---| -|Linux|`yum install miller`
`apt-get install miller`| +|Linux|`yum install miller`
`apt-get install miller`
`snap install miller`| |Mac|`brew install miller`
`port install miller`| |Windows|`choco install miller`
`winget install Miller.Miller`
`scoop install main/miller`| diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml new file mode 100644 index 000000000..c88b8adc1 --- /dev/null +++ b/snap/snapcraft.yaml @@ -0,0 +1,50 @@ +name: miller +adopt-info: miller +summary: Miller is like awk, sed, cut, join and sort +description: | + Miller is like awk, sed, cut, join, and sort for data formats such as CSV, TSV, JSON, JSON Lines, and positionally-indexed. + +contact: https://github.com/johnkerl/miller/issues +issues: https://github.com/johnkerl/miller/issues +source-code: https://github.com/johnkerl/miller + +license: BSD-2-Clause +base: core24 +grade: stable +confinement: strict +compression: lzo + +platforms: + amd64: + build-on: [amd64] + build-for: [amd64] + arm64: + build-on: [arm64] + build-for: [arm64] + armhf: + build-on: [armhf] + build-for: [armhf] + s390x: + build-on: [s390x] + build-for: [s390x] + ppc64el: + build-on: [ppc64el] + build-for: [ppc64el] + +apps: + miller: + command: usr/local/bin/mlr + plugs: + - home + +parts: + miller: + source: https://github.com/johnkerl/miller + source-type: git + plugin: make + build-snaps: + - go + + override-pull: | + craftctl default + craftctl set version="$(git describe --tags | sed 's/^v//' | cut -d "-" -f1)" From 6509ed45869a28033c8d035c649cbc76316b4b84 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 09:53:09 -0700 Subject: [PATCH 386/456] Bump actions/checkout from 4 to 5 (#1866) Bumps [actions/checkout](https://github.com/actions/checkout) from 4 to 5. - [Release notes](https://github.com/actions/checkout/releases) - [Commits](https://github.com/actions/checkout/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/test-snap-can-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-snap-can-build.yml b/.github/workflows/test-snap-can-build.yml index 3796e95ed..c139c9b49 100644 --- a/.github/workflows/test-snap-can-build.yml +++ b/.github/workflows/test-snap-can-build.yml @@ -14,7 +14,7 @@ jobs: node-version: [20.x] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: snapcore/action-build@v1 id: build From 6266a869eb9ea640947b1e1213b7e9784a0ae8f5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 09:55:58 -0700 Subject: [PATCH 387/456] Bump github/codeql-action from 3.29.9 to 3.29.10 (#1867) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.29.9 to 3.29.10. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/df559355d593797519d70b90fc8edd5db049e7a2...96f518a34f7a870018057716cc4d7a5c014bd61c) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.29.10 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 6341b76bd..2449cb727 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@df559355d593797519d70b90fc8edd5db049e7a2 + uses: github/codeql-action/init@96f518a34f7a870018057716cc4d7a5c014bd61c with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@df559355d593797519d70b90fc8edd5db049e7a2 + uses: github/codeql-action/autobuild@96f518a34f7a870018057716cc4d7a5c014bd61c # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@df559355d593797519d70b90fc8edd5db049e7a2 + uses: github/codeql-action/analyze@96f518a34f7a870018057716cc4d7a5c014bd61c From 120e977c1e84c30c2ba1bd8a59a423790785c583 Mon Sep 17 00:00:00 2001 From: Andrea Borruso Date: Wed, 20 Aug 2025 19:11:34 +0200 Subject: [PATCH 388/456] Update subs.go (#1868) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If I read โ€œConvert all field names,โ€ I think the verb acts on the field names. I think it would be better to write โ€œConvert all fields.โ€ --- pkg/transformers/subs.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/transformers/subs.go b/pkg/transformers/subs.go index 59a8e92de..b5530bb17 100644 --- a/pkg/transformers/subs.go +++ b/pkg/transformers/subs.go @@ -50,7 +50,7 @@ func transformerSubUsage( fmt.Fprintf(o, "Options:\n") fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") fmt.Fprintf(o, "-r {regex} Regular expression for field names to convert.\n") - fmt.Fprintf(o, "-a Convert all field names.\n") + fmt.Fprintf(o, "-a Convert all fields.\n") fmt.Fprintf(o, "-h|--help Show this message.\n") } @@ -64,7 +64,7 @@ func transformerGsubUsage( fmt.Fprintf(o, "Options:\n") fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") fmt.Fprintf(o, "-r {regex} Regular expression for field names to convert.\n") - fmt.Fprintf(o, "-a Convert all field names.\n") + fmt.Fprintf(o, "-a Convert all fields.\n") fmt.Fprintf(o, "-h|--help Show this message.\n") } @@ -77,7 +77,7 @@ func transformerSsubUsage( fmt.Fprintf(o, "Options:\n") fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") fmt.Fprintf(o, "-r {regex} Regular expression for field names to convert.\n") - fmt.Fprintf(o, "-a Convert all field names.\n") + fmt.Fprintf(o, "-a Convert all fields.\n") fmt.Fprintf(o, "-h|--help Show this message.\n") } From d0f824aefe83b0f14750bc588eec42629e4a18a0 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Wed, 20 Aug 2025 13:21:51 -0400 Subject: [PATCH 389/456] Run `make dev` after merge of PR 1868 (#1869) --- docs/src/manpage.md | 8 ++++---- docs/src/manpage.txt | 8 ++++---- docs/src/reference-verbs.md | 6 +++--- man/manpage.txt | 8 ++++---- man/mlr.1 | 10 +++++----- test/cases/cli-help/0001/expout | 6 +++--- 6 files changed, 23 insertions(+), 23 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index e031ab096..14cc7ce9c 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -1288,7 +1288,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. - -a Convert all field names. + -a Convert all fields. -h|--help Show this message. 1mhaving-fields0m @@ -1926,7 +1926,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. - -a Convert all field names. + -a Convert all fields. -h|--help Show this message. 1mstats10m @@ -2075,7 +2075,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. - -a Convert all field names. + -a Convert all fields. -h|--help Show this message. 1msummary0m @@ -3759,5 +3759,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-08-15 4mMILLER24m(1) + 2025-08-20 4mMILLER24m(1)
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index f0fcee8e3..4e12b811c 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1267,7 +1267,7 @@ Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. - -a Convert all field names. + -a Convert all fields. -h|--help Show this message. 1mhaving-fields0m @@ -1905,7 +1905,7 @@ Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. - -a Convert all field names. + -a Convert all fields. -h|--help Show this message. 1mstats10m @@ -2054,7 +2054,7 @@ Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. - -a Convert all field names. + -a Convert all fields. -h|--help Show this message. 1msummary0m @@ -3738,4 +3738,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-08-15 4mMILLER24m(1) + 2025-08-20 4mMILLER24m(1) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 3cf5cc8d6..f33e8b488 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -1465,7 +1465,7 @@ See also the `sub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. --a Convert all field names. +-a Convert all fields. -h|--help Show this message.
@@ -3214,7 +3214,7 @@ the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verb Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. --a Convert all field names. +-a Convert all fields. -h|--help Show this message.
@@ -3720,7 +3720,7 @@ See also the `gsub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. --a Convert all field names. +-a Convert all fields. -h|--help Show this message.
diff --git a/man/manpage.txt b/man/manpage.txt index f0fcee8e3..4e12b811c 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -1267,7 +1267,7 @@ Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. - -a Convert all field names. + -a Convert all fields. -h|--help Show this message. 1mhaving-fields0m @@ -1905,7 +1905,7 @@ Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. - -a Convert all field names. + -a Convert all fields. -h|--help Show this message. 1mstats10m @@ -2054,7 +2054,7 @@ Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. - -a Convert all field names. + -a Convert all fields. -h|--help Show this message. 1msummary0m @@ -3738,4 +3738,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-08-15 4mMILLER24m(1) + 2025-08-20 4mMILLER24m(1) diff --git a/man/mlr.1 b/man/mlr.1 index 37d5c2a2b..24a9ea61a 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2025-08-15 +.\" Date: 2025-08-20 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2025-08-15" "\ \&" "\ \&" +.TH "MILLER" "1" "2025-08-20" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1586,7 +1586,7 @@ See also the `sub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. --a Convert all field names. +-a Convert all fields. -h|--help Show this message. .fi .if n \{\ @@ -2410,7 +2410,7 @@ the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verb Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. --a Convert all field names. +-a Convert all fields. -h|--help Show this message. .fi .if n \{\ @@ -2583,7 +2583,7 @@ See also the `gsub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. --a Convert all field names. +-a Convert all fields. -h|--help Show this message. .fi .if n \{\ diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index c211e2cd9..19a201c62 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -393,7 +393,7 @@ See also the `sub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. --a Convert all field names. +-a Convert all fields. -h|--help Show this message. ================================================================ @@ -1062,7 +1062,7 @@ the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verb Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. --a Convert all field names. +-a Convert all fields. -h|--help Show this message. ================================================================ @@ -1215,7 +1215,7 @@ See also the `gsub` and `ssub` verbs. Options: -f {a,b,c} Field names to convert. -r {regex} Regular expression for field names to convert. --a Convert all field names. +-a Convert all fields. -h|--help Show this message. ================================================================ From 43f6fa9ea6d392389455a5b8c967f01dc9504d3a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 22 Aug 2025 10:41:54 -0400 Subject: [PATCH 390/456] Bump github/codeql-action from 3.29.10 to 3.29.11 (#1873) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.29.10 to 3.29.11. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/96f518a34f7a870018057716cc4d7a5c014bd61c...3c3833e0f8c1c83d449a7478aa59c036a9165498) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.29.11 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 2449cb727..8b93e7fe3 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@96f518a34f7a870018057716cc4d7a5c014bd61c + uses: github/codeql-action/init@3c3833e0f8c1c83d449a7478aa59c036a9165498 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@96f518a34f7a870018057716cc4d7a5c014bd61c + uses: github/codeql-action/autobuild@3c3833e0f8c1c83d449a7478aa59c036a9165498 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@96f518a34f7a870018057716cc4d7a5c014bd61c + uses: github/codeql-action/analyze@3c3833e0f8c1c83d449a7478aa59c036a9165498 From 2f38933a879b0284b03b3c3e6dd69c2a65410637 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Aug 2025 18:26:46 -0400 Subject: [PATCH 391/456] Bump github.com/stretchr/testify from 1.10.0 to 1.11.0 (#1874) Bumps [github.com/stretchr/testify](https://github.com/stretchr/testify) from 1.10.0 to 1.11.0. - [Release notes](https://github.com/stretchr/testify/releases) - [Commits](https://github.com/stretchr/testify/compare/v1.10.0...v1.11.0) --- updated-dependencies: - dependency-name: github.com/stretchr/testify dependency-version: 1.11.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index f86fe979d..145473158 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/mattn/go-isatty v0.0.20 github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 - github.com/stretchr/testify v1.10.0 + github.com/stretchr/testify v1.11.0 golang.org/x/sys v0.35.0 golang.org/x/term v0.34.0 golang.org/x/text v0.28.0 diff --git a/go.sum b/go.sum index 7f93593b1..5cbbe6019 100644 --- a/go.sum +++ b/go.sum @@ -39,8 +39,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.0 h1:ib4sjIrwZKxE5u/Japgo/7SJV3PvgjGiRNAvTVGqQl8= +github.com/stretchr/testify v1.11.0/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= From 74e8e3cef64bbe3bc88c4c73e06c0cdac259de83 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 28 Aug 2025 09:04:25 -0400 Subject: [PATCH 392/456] Bump github.com/stretchr/testify from 1.11.0 to 1.11.1 (#1875) Bumps [github.com/stretchr/testify](https://github.com/stretchr/testify) from 1.11.0 to 1.11.1. - [Release notes](https://github.com/stretchr/testify/releases) - [Commits](https://github.com/stretchr/testify/compare/v1.11.0...v1.11.1) --- updated-dependencies: - dependency-name: github.com/stretchr/testify dependency-version: 1.11.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 145473158..1d7e689a8 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/mattn/go-isatty v0.0.20 github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 - github.com/stretchr/testify v1.11.0 + github.com/stretchr/testify v1.11.1 golang.org/x/sys v0.35.0 golang.org/x/term v0.34.0 golang.org/x/text v0.28.0 diff --git a/go.sum b/go.sum index 5cbbe6019..8be1dad25 100644 --- a/go.sum +++ b/go.sum @@ -39,8 +39,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.11.0 h1:ib4sjIrwZKxE5u/Japgo/7SJV3PvgjGiRNAvTVGqQl8= -github.com/stretchr/testify v1.11.0/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= From 2f3b6d38f95b652a5a54d78cb3cc8466588113d7 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Fri, 29 Aug 2025 15:02:26 +0200 Subject: [PATCH 393/456] Allow any Go 1.24 version (#1876) Miller doesn't require 1.24.5 specifically, reduce the language level to 1.24.0. This allows building with any 1.24 toolchain. Signed-off-by: Stephen Kitt --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 1d7e689a8..b6233ad5a 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ module github.com/johnkerl/miller/v6 // Local development: // replace github.com/johnkerl/lumin => /Users/kerl/git/johnkerl/lumin -go 1.24.5 +go 1.24.0 require ( github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb From 05429ee3ba010f284e7c96f09d775e7cda8e7a0a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Sep 2025 20:15:19 -0400 Subject: [PATCH 394/456] Bump github/codeql-action from 3.29.11 to 3.30.0 (#1878) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.29.11 to 3.30.0. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/3c3833e0f8c1c83d449a7478aa59c036a9165498...2d92b76c45b91eb80fc44c74ce3fce0ee94e8f9d) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.30.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 8b93e7fe3..2ee3a6c62 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@3c3833e0f8c1c83d449a7478aa59c036a9165498 + uses: github/codeql-action/init@2d92b76c45b91eb80fc44c74ce3fce0ee94e8f9d with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@3c3833e0f8c1c83d449a7478aa59c036a9165498 + uses: github/codeql-action/autobuild@2d92b76c45b91eb80fc44c74ce3fce0ee94e8f9d # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@3c3833e0f8c1c83d449a7478aa59c036a9165498 + uses: github/codeql-action/analyze@2d92b76c45b91eb80fc44c74ce3fce0ee94e8f9d From 3b9f169162dc0f267962f546b636d5c31538a5d1 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 2 Sep 2025 16:47:19 -0400 Subject: [PATCH 395/456] Support `-o jsonl` as well as `--ojsonl` (#1879) * `mlr sort -b` feature * mlr regtest -p test/cases/cli-help && make dev * Support `-o jsonl` as well as `--ojsonl` --- pkg/cli/option_parse.go | 8 ++------ pkg/output/record_writer_factory.go | 2 ++ ...d_writer_json.go => record_writer_json_jsonl.go} | 13 +++++++++++++ test/cases/io-jsonl-io/0004/cmd | 2 +- 4 files changed, 18 insertions(+), 7 deletions(-) rename pkg/output/{record_writer_json.go => record_writer_json_jsonl.go} (87%) diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go index 912d689f1..0070b60c8 100644 --- a/pkg/cli/option_parse.go +++ b/pkg/cli/option_parse.go @@ -979,9 +979,7 @@ var FileFormatFlagSection = FlagSection{ name: "--ojsonl", help: "Use JSON Lines format for output data.", parser: func(args []string, argc int, pargi *int, options *TOptions) { - options.WriterOptions.OutputFileFormat = "json" - options.WriterOptions.WrapJSONOutputInOuterList = false - options.WriterOptions.JSONOutputMultiline = false + options.WriterOptions.OutputFileFormat = "jsonl" *pargi += 1 }, }, @@ -1148,9 +1146,7 @@ var FileFormatFlagSection = FlagSection{ altNames: []string{"--l2l"}, parser: func(args []string, argc int, pargi *int, options *TOptions) { options.ReaderOptions.InputFileFormat = "json" - options.WriterOptions.OutputFileFormat = "json" - options.WriterOptions.WrapJSONOutputInOuterList = false - options.WriterOptions.JSONOutputMultiline = false + options.WriterOptions.OutputFileFormat = "jsonl" *pargi += 1 }, }, diff --git a/pkg/output/record_writer_factory.go b/pkg/output/record_writer_factory.go index 84ff64cfe..bb6aba5fa 100644 --- a/pkg/output/record_writer_factory.go +++ b/pkg/output/record_writer_factory.go @@ -16,6 +16,8 @@ func Create(writerOptions *cli.TWriterOptions) (IRecordWriter, error) { return NewRecordWriterDKVP(writerOptions) case "json": return NewRecordWriterJSON(writerOptions) + case "jsonl": + return NewRecordWriterJSONLines(writerOptions) case "md": return NewRecordWriterMarkdown(writerOptions) case "markdown": diff --git a/pkg/output/record_writer_json.go b/pkg/output/record_writer_json_jsonl.go similarity index 87% rename from pkg/output/record_writer_json.go rename to pkg/output/record_writer_json_jsonl.go index d0be01461..8c43d43ff 100644 --- a/pkg/output/record_writer_json.go +++ b/pkg/output/record_writer_json_jsonl.go @@ -35,6 +35,19 @@ func NewRecordWriterJSON(writerOptions *cli.TWriterOptions) (*RecordWriterJSON, }, nil } +// ---------------------------------------------------------------- +func NewRecordWriterJSONLines(writerOptions *cli.TWriterOptions) (*RecordWriterJSON, error) { + wopt := *writerOptions + wopt.WrapJSONOutputInOuterList = false + wopt.JSONOutputMultiline = false + return &RecordWriterJSON{ + writerOptions: &wopt, + jsonFormatting: mlrval.JSON_SINGLE_LINE, + jvQuoteAll: writerOptions.JVQuoteAll, + wroteAnyRecords: false, + }, nil +} + // ---------------------------------------------------------------- func (writer *RecordWriterJSON) Write( outrec *mlrval.Mlrmap, diff --git a/test/cases/io-jsonl-io/0004/cmd b/test/cases/io-jsonl-io/0004/cmd index 380bba0ca..8aa87f37e 100644 --- a/test/cases/io-jsonl-io/0004/cmd +++ b/test/cases/io-jsonl-io/0004/cmd @@ -1 +1 @@ -mlr --ojsonl cat test/input/json-output-options.dkvp +mlr -o jsonl cat test/input/json-output-options.dkvp From d87bd9f7d3d7a57cb8281224905323c6c189254d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 4 Sep 2025 10:31:45 -0400 Subject: [PATCH 396/456] Bump actions/setup-go from 5.5.0 to 6.0.0 (#1880) Bumps [actions/setup-go](https://github.com/actions/setup-go) from 5.5.0 to 6.0.0. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/d35c59abb061a4a6fb18e82ac0862c26744d6ab5...44694675825211faa026b3c33043df3e48a5fa00) --- updated-dependencies: - dependency-name: actions/setup-go dependency-version: 6.0.0 dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 7710a010d..a1d2d7f55 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -18,7 +18,7 @@ jobs: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 - name: Set up Go - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 + uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 with: go-version: 1.24 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 85b887b20..09e3df2ca 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,7 +17,7 @@ jobs: runs-on: ${{ matrix.platform }} steps: - name: Set up Go - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 + uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 with: go-version: ${{ env.GO_VERSION }} id: go From 46653f0a8ff101f10a42115207c213d44e1a6890 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Sep 2025 08:40:04 -0400 Subject: [PATCH 397/456] Bump golang.org/x/text from 0.28.0 to 0.29.0 (#1885) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.28.0 to 0.29.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.28.0...v0.29.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-version: 0.29.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index b6233ad5a..34d976e55 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( github.com/stretchr/testify v1.11.1 golang.org/x/sys v0.35.0 golang.org/x/term v0.34.0 - golang.org/x/text v0.28.0 + golang.org/x/text v0.29.0 ) require ( @@ -39,7 +39,7 @@ require ( github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - golang.org/x/tools v0.35.0 // indirect + golang.org/x/tools v0.36.0 // indirect gonum.org/v1/gonum v0.16.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 8be1dad25..7c3a3ea1c 100644 --- a/go.sum +++ b/go.sum @@ -47,10 +47,10 @@ golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4= golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw= -golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= -golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= -golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0= -golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw= +golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= +golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= +golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= +golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From 26826a0b4b84043413e071f74106476d690f9f49 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Sep 2025 08:41:52 -0400 Subject: [PATCH 398/456] Bump github/codeql-action from 3.30.0 to 3.30.1 (#1882) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.30.0 to 3.30.1. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/2d92b76c45b91eb80fc44c74ce3fce0ee94e8f9d...f1f6e5f6af878fb37288ce1c627459e94dbf7d01) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.30.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 2ee3a6c62..f016a7342 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@2d92b76c45b91eb80fc44c74ce3fce0ee94e8f9d + uses: github/codeql-action/init@f1f6e5f6af878fb37288ce1c627459e94dbf7d01 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@2d92b76c45b91eb80fc44c74ce3fce0ee94e8f9d + uses: github/codeql-action/autobuild@f1f6e5f6af878fb37288ce1c627459e94dbf7d01 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@2d92b76c45b91eb80fc44c74ce3fce0ee94e8f9d + uses: github/codeql-action/analyze@f1f6e5f6af878fb37288ce1c627459e94dbf7d01 From aec5c03093a3555b8e7f39e5f999b520bddb1d42 Mon Sep 17 00:00:00 2001 From: Rehan Daphedar Date: Mon, 8 Sep 2025 18:15:57 +0530 Subject: [PATCH 399/456] fix `go install` command (#1881) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d8f4490d1..ab988a4f8 100644 --- a/README.md +++ b/README.md @@ -114,7 +114,7 @@ See also [building from source](https://miller.readthedocs.io/en/latest/build.ht * Without `make`: * To build: `go build github.com/johnkerl/miller/v6/cmd/mlr`. * To run tests: `go test github.com/johnkerl/miller/v6/pkg/...` and `mlr regtest`. - * To install: `go install github.com/johnkerl/miller/v6/cmd/mlr` will install to _GOPATH_`/bin/mlr`. + * To install: `go install github.com/johnkerl/miller/v6/cmd/mlr@latest` will install to _GOPATH_`/bin/mlr`. * See also the doc page on [building from source](https://miller.readthedocs.io/en/latest/build). * For more developer information please see [README-dev.md](./README-dev.md). From 2d29beb204d303b5eb1cf2b6832638aff669d63b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Sep 2025 08:46:45 -0400 Subject: [PATCH 400/456] Bump golang.org/x/sys from 0.35.0 to 0.36.0 (#1884) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.35.0 to 0.36.0. - [Commits](https://github.com/golang/sys/compare/v0.35.0...v0.36.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-version: 0.36.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 34d976e55..d1daecf09 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.11.1 - golang.org/x/sys v0.35.0 + golang.org/x/sys v0.36.0 golang.org/x/term v0.34.0 golang.org/x/text v0.29.0 ) diff --git a/go.sum b/go.sum index 7c3a3ea1c..bb2b19657 100644 --- a/go.sum +++ b/go.sum @@ -43,8 +43,8 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= -golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= +golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4= golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw= golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= From 46a86503ea3c501306356bed78b7e8deaf5b3cc5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Sep 2025 08:58:40 -0400 Subject: [PATCH 401/456] Bump golang.org/x/term from 0.34.0 to 0.35.0 (#1883) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.34.0 to 0.35.0. - [Commits](https://github.com/golang/term/compare/v0.34.0...v0.35.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-version: 0.35.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index d1daecf09..6d5f0fc74 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.11.1 golang.org/x/sys v0.36.0 - golang.org/x/term v0.34.0 + golang.org/x/term v0.35.0 golang.org/x/text v0.29.0 ) diff --git a/go.sum b/go.sum index bb2b19657..77ff41556 100644 --- a/go.sum +++ b/go.sum @@ -45,8 +45,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4= -golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw= +golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= +golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= From fbe1143e8a75af0994031364aa618c2e15ffaab6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Sep 2025 08:31:04 -0400 Subject: [PATCH 402/456] Bump github/codeql-action from 3.30.1 to 3.30.2 (#1886) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.30.1 to 3.30.2. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/f1f6e5f6af878fb37288ce1c627459e94dbf7d01...d3678e237b9c32a6c9bffb3315c335f976f3549f) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.30.2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index f016a7342..25bb74c97 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@f1f6e5f6af878fb37288ce1c627459e94dbf7d01 + uses: github/codeql-action/init@d3678e237b9c32a6c9bffb3315c335f976f3549f with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@f1f6e5f6af878fb37288ce1c627459e94dbf7d01 + uses: github/codeql-action/autobuild@d3678e237b9c32a6c9bffb3315c335f976f3549f # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@f1f6e5f6af878fb37288ce1c627459e94dbf7d01 + uses: github/codeql-action/analyze@d3678e237b9c32a6c9bffb3315c335f976f3549f From 14e0229c34f168b9f63b884c608c4115fb9aed02 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Sep 2025 10:21:57 -0400 Subject: [PATCH 403/456] Bump github/codeql-action from 3.30.2 to 3.30.3 (#1887) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.30.2 to 3.30.3. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/d3678e237b9c32a6c9bffb3315c335f976f3549f...192325c86100d080feab897ff886c34abd4c83a3) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.30.3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 25bb74c97..4110ffa6d 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@d3678e237b9c32a6c9bffb3315c335f976f3549f + uses: github/codeql-action/init@192325c86100d080feab897ff886c34abd4c83a3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@d3678e237b9c32a6c9bffb3315c335f976f3549f + uses: github/codeql-action/autobuild@192325c86100d080feab897ff886c34abd4c83a3 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@d3678e237b9c32a6c9bffb3315c335f976f3549f + uses: github/codeql-action/analyze@192325c86100d080feab897ff886c34abd4c83a3 From 5c5281fe28f3f5cf8f76208f9d3e6b4ef16aa09e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 25 Sep 2025 09:40:06 -0400 Subject: [PATCH 404/456] Bump github/codeql-action from 3.30.3 to 3.30.4 (#1890) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.30.3 to 3.30.4. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/192325c86100d080feab897ff886c34abd4c83a3...303c0aef88fc2fe5ff6d63d3b1596bfd83dfa1f9) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.30.4 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 4110ffa6d..ad1a4f722 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@192325c86100d080feab897ff886c34abd4c83a3 + uses: github/codeql-action/init@303c0aef88fc2fe5ff6d63d3b1596bfd83dfa1f9 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@192325c86100d080feab897ff886c34abd4c83a3 + uses: github/codeql-action/autobuild@303c0aef88fc2fe5ff6d63d3b1596bfd83dfa1f9 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@192325c86100d080feab897ff886c34abd4c83a3 + uses: github/codeql-action/analyze@303c0aef88fc2fe5ff6d63d3b1596bfd83dfa1f9 From f350581175ec21872c54799d0ccea29c36855157 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 25 Sep 2025 09:41:02 -0400 Subject: [PATCH 405/456] Bump actions/cache from 4.2.4 to 4.3.0 (#1889) Bumps [actions/cache](https://github.com/actions/cache) from 4.2.4 to 4.3.0. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/0400d5f644dc74513175e3cd8d07132dd4860809...0057852bfaa89a56745cba8c7296529d2fc39830) --- updated-dependencies: - dependency-name: actions/cache dependency-version: 4.3.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 09e3df2ca..92604cf1f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -29,7 +29,7 @@ jobs: # https://github.com/marketplace/actions/cache - name: Cache Go modules - uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 with: path: | ~/.cache/go-build From eac178575672abc1090b5f3a651f46f321ffa2ed Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Sep 2025 10:57:13 -0400 Subject: [PATCH 406/456] Bump github/codeql-action from 3.30.4 to 3.30.5 (#1891) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.30.4 to 3.30.5. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/303c0aef88fc2fe5ff6d63d3b1596bfd83dfa1f9...3599b3baa15b485a2e49ef411a7a4bb2452e7f93) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.30.5 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index ad1a4f722..732e05fb6 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@303c0aef88fc2fe5ff6d63d3b1596bfd83dfa1f9 + uses: github/codeql-action/init@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@303c0aef88fc2fe5ff6d63d3b1596bfd83dfa1f9 + uses: github/codeql-action/autobuild@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@303c0aef88fc2fe5ff6d63d3b1596bfd83dfa1f9 + uses: github/codeql-action/analyze@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 From f485bc07a58c17ddec569700f19543bb282cb7e4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 3 Oct 2025 09:24:44 -0400 Subject: [PATCH 407/456] Bump github/codeql-action from 3.30.5 to 3.30.6 (#1892) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.30.5 to 3.30.6. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/3599b3baa15b485a2e49ef411a7a4bb2452e7f93...64d10c13136e1c5bce3e5fbde8d4906eeaafc885) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 3.30.6 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 732e05fb6..c5bb6f4ca 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 + uses: github/codeql-action/init@64d10c13136e1c5bce3e5fbde8d4906eeaafc885 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 + uses: github/codeql-action/autobuild@64d10c13136e1c5bce3e5fbde8d4906eeaafc885 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 + uses: github/codeql-action/analyze@64d10c13136e1c5bce3e5fbde8d4906eeaafc885 From f5226e87fed8191cb188d8a8d5611125f0b92554 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 8 Oct 2025 13:08:08 +0200 Subject: [PATCH 408/456] Bump github/codeql-action from 3.30.6 to 4.30.7 (#1893) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.30.6 to 4.30.7. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/64d10c13136e1c5bce3e5fbde8d4906eeaafc885...e296a935590eb16afc0c0108289f68c87e2a89a5) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 4.30.7 dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index c5bb6f4ca..0bc159be4 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@64d10c13136e1c5bce3e5fbde8d4906eeaafc885 + uses: github/codeql-action/init@e296a935590eb16afc0c0108289f68c87e2a89a5 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@64d10c13136e1c5bce3e5fbde8d4906eeaafc885 + uses: github/codeql-action/autobuild@e296a935590eb16afc0c0108289f68c87e2a89a5 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@64d10c13136e1c5bce3e5fbde8d4906eeaafc885 + uses: github/codeql-action/analyze@e296a935590eb16afc0c0108289f68c87e2a89a5 From 8f882b2f7592a3247722fec9d87e65b054071e15 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 9 Oct 2025 13:08:43 +0200 Subject: [PATCH 409/456] Bump golang.org/x/sys from 0.36.0 to 0.37.0 (#1896) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.36.0 to 0.37.0. - [Commits](https://github.com/golang/sys/compare/v0.36.0...v0.37.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-version: 0.37.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 6d5f0fc74..e4f60a51c 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.11.1 - golang.org/x/sys v0.36.0 + golang.org/x/sys v0.37.0 golang.org/x/term v0.35.0 golang.org/x/text v0.29.0 ) diff --git a/go.sum b/go.sum index 77ff41556..8af6afc20 100644 --- a/go.sum +++ b/go.sum @@ -43,8 +43,8 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= -golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= +golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= From 1557e47ae119640453ed96329c65c3bbe1a21ee8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 9 Oct 2025 13:14:58 +0200 Subject: [PATCH 410/456] Bump golang.org/x/text from 0.29.0 to 0.30.0 (#1894) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.29.0 to 0.30.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.29.0...v0.30.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-version: 0.30.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index e4f60a51c..b8c1ad3a0 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( github.com/stretchr/testify v1.11.1 golang.org/x/sys v0.37.0 golang.org/x/term v0.35.0 - golang.org/x/text v0.29.0 + golang.org/x/text v0.30.0 ) require ( @@ -39,7 +39,7 @@ require ( github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - golang.org/x/tools v0.36.0 // indirect + golang.org/x/tools v0.37.0 // indirect gonum.org/v1/gonum v0.16.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 8af6afc20..34f601e42 100644 --- a/go.sum +++ b/go.sum @@ -47,10 +47,10 @@ golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= -golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= -golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= -golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= -golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= +golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= +golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= +golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= +golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From e71b36d8c119d9ceb4d87740d6c108ae16502bfb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 9 Oct 2025 13:22:41 +0200 Subject: [PATCH 411/456] Bump golang.org/x/term from 0.35.0 to 0.36.0 (#1895) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.35.0 to 0.36.0. - [Commits](https://github.com/golang/term/compare/v0.35.0...v0.36.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-version: 0.36.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index b8c1ad3a0..f14b8f35a 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.11.1 golang.org/x/sys v0.37.0 - golang.org/x/term v0.35.0 + golang.org/x/term v0.36.0 golang.org/x/text v0.30.0 ) diff --git a/go.sum b/go.sum index 34f601e42..fb08be032 100644 --- a/go.sum +++ b/go.sum @@ -45,8 +45,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= -golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= +golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q= +golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss= golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= From 74f4901d05002f96eb6b0900693007dcce231702 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 Oct 2025 09:38:05 -0400 Subject: [PATCH 412/456] Bump github/codeql-action from 4.30.7 to 4.30.8 (#1897) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.30.7 to 4.30.8. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/e296a935590eb16afc0c0108289f68c87e2a89a5...f443b600d91635bebf5b0d9ebc620189c0d6fba5) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 4.30.8 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 0bc159be4..772ad77a0 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@e296a935590eb16afc0c0108289f68c87e2a89a5 + uses: github/codeql-action/init@f443b600d91635bebf5b0d9ebc620189c0d6fba5 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@e296a935590eb16afc0c0108289f68c87e2a89a5 + uses: github/codeql-action/autobuild@f443b600d91635bebf5b0d9ebc620189c0d6fba5 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@e296a935590eb16afc0c0108289f68c87e2a89a5 + uses: github/codeql-action/analyze@f443b600d91635bebf5b0d9ebc620189c0d6fba5 From 3e374f8861a18963c2cf393137c3c435a92057c3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Oct 2025 08:59:32 -0400 Subject: [PATCH 413/456] Bump github/codeql-action from 4.30.8 to 4.30.9 (#1900) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.30.8 to 4.30.9. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/f443b600d91635bebf5b0d9ebc620189c0d6fba5...16140ae1a102900babc80a33c44059580f687047) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 4.30.9 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 772ad77a0..3d151dd3f 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@f443b600d91635bebf5b0d9ebc620189c0d6fba5 + uses: github/codeql-action/init@16140ae1a102900babc80a33c44059580f687047 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@f443b600d91635bebf5b0d9ebc620189c0d6fba5 + uses: github/codeql-action/autobuild@16140ae1a102900babc80a33c44059580f687047 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@f443b600d91635bebf5b0d9ebc620189c0d6fba5 + uses: github/codeql-action/analyze@16140ae1a102900babc80a33c44059580f687047 From 6100f21785662e99ff7d05e0dc2eadcba0daffea Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Oct 2025 09:01:46 -0400 Subject: [PATCH 414/456] Bump github.com/klauspost/compress from 1.18.0 to 1.18.1 (#1899) Bumps [github.com/klauspost/compress](https://github.com/klauspost/compress) from 1.18.0 to 1.18.1. - [Release notes](https://github.com/klauspost/compress/releases) - [Changelog](https://github.com/klauspost/compress/blob/master/.goreleaser.yml) - [Commits](https://github.com/klauspost/compress/compare/v1.18.0...v1.18.1) --- updated-dependencies: - dependency-name: github.com/klauspost/compress dependency-version: 1.18.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index f14b8f35a..b88b5da66 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb github.com/johnkerl/lumin v1.0.0 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 - github.com/klauspost/compress v1.18.0 + github.com/klauspost/compress v1.18.1 github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1 github.com/lestrrat-go/strftime v1.1.1 github.com/mattn/go-isatty v0.0.20 diff --git a/go.sum b/go.sum index fb08be032..2f8dcd6f1 100644 --- a/go.sum +++ b/go.sum @@ -17,8 +17,8 @@ github.com/johnkerl/lumin v1.0.0 h1:CV34cHZOJ92Y02RbQ0rd4gA0C06Qck9q8blOyaPoWpU= github.com/johnkerl/lumin v1.0.0/go.mod h1:eLf5AdQOaLvzZ2zVy4REr/DSeEwG+CZreHwNLICqv9E= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= -github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= -github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co= +github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0= github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb h1:Z5BVHFk/DLOIUAd2NycF0mLtKfhl7ynm4Uy5+AFhT48= github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb/go.mod h1:+U+6yzfITr4/teU2YhxWhdyw6YzednT/16/UBMjlDrU= github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1 h1:UyIQ1VTQq/0CS/wLYjf3DV6uRKTd1xcsng3BccM4XCY= From aea74327ff2712e31b8d85aa65f08af79bae94e7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Oct 2025 09:36:08 -0400 Subject: [PATCH 415/456] Bump actions/upload-artifact from 4.6.2 to 5.0.0 (#1901) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.6.2 to 5.0.0. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/ea165f8d65b6e75b540449e92b4886f43607fa02...330a01c490aca151604b8cf639adc76d48f6c5d4) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-version: 5.0.0 dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index a1d2d7f55..f211d0710 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -41,7 +41,7 @@ jobs: if: matrix.os == 'windows-latest' run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}} - - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 + - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 with: name: mlr-${{matrix.os}} path: bin/${{matrix.os}}/* From 9149fd0d3409854e8ebfe8e2d8b103880fa90093 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Oct 2025 09:37:21 -0400 Subject: [PATCH 416/456] Bump github/codeql-action from 4.30.9 to 4.31.0 (#1902) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.30.9 to 4.31.0. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/16140ae1a102900babc80a33c44059580f687047...4e94bd11f71e507f7f87df81788dff88d1dacbfb) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 4.31.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 3d151dd3f..8f28f84d1 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@16140ae1a102900babc80a33c44059580f687047 + uses: github/codeql-action/init@4e94bd11f71e507f7f87df81788dff88d1dacbfb with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@16140ae1a102900babc80a33c44059580f687047 + uses: github/codeql-action/autobuild@4e94bd11f71e507f7f87df81788dff88d1dacbfb # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@16140ae1a102900babc80a33c44059580f687047 + uses: github/codeql-action/analyze@4e94bd11f71e507f7f87df81788dff88d1dacbfb From bc9c718cf9bb7f2286d27a98156699b1bedf9347 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 30 Oct 2025 09:24:19 -0400 Subject: [PATCH 417/456] Bump github/codeql-action from 4.31.0 to 4.31.1 (#1903) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.31.0 to 4.31.1. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/4e94bd11f71e507f7f87df81788dff88d1dacbfb...5fe9434cd24fe243e33e7f3305f8a5b519b70280) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 4.31.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 8f28f84d1..7d7a068dc 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@4e94bd11f71e507f7f87df81788dff88d1dacbfb + uses: github/codeql-action/init@5fe9434cd24fe243e33e7f3305f8a5b519b70280 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@4e94bd11f71e507f7f87df81788dff88d1dacbfb + uses: github/codeql-action/autobuild@5fe9434cd24fe243e33e7f3305f8a5b519b70280 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@4e94bd11f71e507f7f87df81788dff88d1dacbfb + uses: github/codeql-action/analyze@5fe9434cd24fe243e33e7f3305f8a5b519b70280 From 2a78d165aebbbd573371b4d02d76978bdc24cee6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 31 Oct 2025 10:28:52 -0400 Subject: [PATCH 418/456] Bump github/codeql-action from 4.31.1 to 4.31.2 (#1904) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.31.1 to 4.31.2. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/5fe9434cd24fe243e33e7f3305f8a5b519b70280...0499de31b99561a6d14a36a5f662c2a54f91beee) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 4.31.2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 7d7a068dc..e43173751 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@5fe9434cd24fe243e33e7f3305f8a5b519b70280 + uses: github/codeql-action/init@0499de31b99561a6d14a36a5f662c2a54f91beee with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@5fe9434cd24fe243e33e7f3305f8a5b519b70280 + uses: github/codeql-action/autobuild@0499de31b99561a6d14a36a5f662c2a54f91beee # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@5fe9434cd24fe243e33e7f3305f8a5b519b70280 + uses: github/codeql-action/analyze@0499de31b99561a6d14a36a5f662c2a54f91beee From df8e979b66295261c56a45ff86ba9f48171298ee Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 7 Nov 2025 09:36:06 -0500 Subject: [PATCH 419/456] Bump codespell-project/actions-codespell from 2.1 to 2.2 (#1906) Bumps [codespell-project/actions-codespell](https://github.com/codespell-project/actions-codespell) from 2.1 to 2.2. - [Release notes](https://github.com/codespell-project/actions-codespell/releases) - [Commits](https://github.com/codespell-project/actions-codespell/compare/406322ec52dd7b488e48c1c4b82e2a8b3a1bf630...8f01853be192eb0f849a5c7d721450e7a467c579) --- updated-dependencies: - dependency-name: codespell-project/actions-codespell dependency-version: '2.2' dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codespell.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 0f641b76c..1f0d7a2cd 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -29,7 +29,7 @@ jobs: # Run linter against code base # https://github.com/codespell-project/codespell - name: Codespell - uses: codespell-project/actions-codespell@406322ec52dd7b488e48c1c4b82e2a8b3a1bf630 + uses: codespell-project/actions-codespell@8f01853be192eb0f849a5c7d721450e7a467c579 with: check_filenames: true ignore_words_file: .codespellignore From 6351f51eeb7e33ee5ab4e52a412c8770fe7a371f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Nov 2025 10:20:11 -0500 Subject: [PATCH 420/456] Bump golang.org/x/sys from 0.37.0 to 0.38.0 (#1907) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.37.0 to 0.38.0. - [Commits](https://github.com/golang/sys/compare/v0.37.0...v0.38.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-version: 0.38.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index b88b5da66..74020aeb4 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.11.1 - golang.org/x/sys v0.37.0 + golang.org/x/sys v0.38.0 golang.org/x/term v0.36.0 golang.org/x/text v0.30.0 ) diff --git a/go.sum b/go.sum index 2f8dcd6f1..ba8f0aa16 100644 --- a/go.sum +++ b/go.sum @@ -43,8 +43,8 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= -golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= +golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q= golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss= golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= From a66e45539dfb899d9828d7600d595af04c8d2719 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 12 Nov 2025 08:57:25 -0500 Subject: [PATCH 421/456] Bump golang.org/x/text from 0.30.0 to 0.31.0 (#1908) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.30.0 to 0.31.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.30.0...v0.31.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-version: 0.31.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 74020aeb4..94ffc2fc9 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( github.com/stretchr/testify v1.11.1 golang.org/x/sys v0.38.0 golang.org/x/term v0.36.0 - golang.org/x/text v0.30.0 + golang.org/x/text v0.31.0 ) require ( @@ -39,7 +39,7 @@ require ( github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - golang.org/x/tools v0.37.0 // indirect + golang.org/x/tools v0.38.0 // indirect gonum.org/v1/gonum v0.16.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index ba8f0aa16..a7a443e22 100644 --- a/go.sum +++ b/go.sum @@ -47,10 +47,10 @@ golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q= golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss= -golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= -golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= -golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= -golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= +golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= +golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= +golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= +golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From e5218ed8e73783c38479d58715c4fee598860c33 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Nov 2025 09:43:26 -0500 Subject: [PATCH 422/456] Bump github/codeql-action from 4.31.2 to 4.31.3 (#1910) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.31.2 to 4.31.3. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/0499de31b99561a6d14a36a5f662c2a54f91beee...014f16e7ab1402f30e7c3329d33797e7948572db) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 4.31.3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index e43173751..8804f421e 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@0499de31b99561a6d14a36a5f662c2a54f91beee + uses: github/codeql-action/init@014f16e7ab1402f30e7c3329d33797e7948572db with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@0499de31b99561a6d14a36a5f662c2a54f91beee + uses: github/codeql-action/autobuild@014f16e7ab1402f30e7c3329d33797e7948572db # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@0499de31b99561a6d14a36a5f662c2a54f91beee + uses: github/codeql-action/analyze@014f16e7ab1402f30e7c3329d33797e7948572db From 2aa664bfeaa03469d2c6c88aece2e3d4dd3f9010 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 19 Nov 2025 11:17:48 -0500 Subject: [PATCH 423/456] Bump github/codeql-action from 4.31.3 to 4.31.4 (#1911) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.31.3 to 4.31.4. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/014f16e7ab1402f30e7c3329d33797e7948572db...e12f0178983d466f2f6028f5cc7a6d786fd97f4b) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 4.31.4 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 8804f421e..ae91b9f75 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@014f16e7ab1402f30e7c3329d33797e7948572db + uses: github/codeql-action/init@e12f0178983d466f2f6028f5cc7a6d786fd97f4b with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@014f16e7ab1402f30e7c3329d33797e7948572db + uses: github/codeql-action/autobuild@e12f0178983d466f2f6028f5cc7a6d786fd97f4b # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@014f16e7ab1402f30e7c3329d33797e7948572db + uses: github/codeql-action/analyze@e12f0178983d466f2f6028f5cc7a6d786fd97f4b From efb7b55da50264b73637378e66349c778359eddd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 20 Nov 2025 09:00:05 -0500 Subject: [PATCH 424/456] Bump actions/setup-go from 6.0.0 to 6.1.0 (#1912) Bumps [actions/setup-go](https://github.com/actions/setup-go) from 6.0.0 to 6.1.0. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/44694675825211faa026b3c33043df3e48a5fa00...4dc6199c7b1a012772edbd06daecab0f50c9053c) --- updated-dependencies: - dependency-name: actions/setup-go dependency-version: 6.1.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index f211d0710..80722c7a4 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -18,7 +18,7 @@ jobs: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 - name: Set up Go - uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 + uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c with: go-version: 1.24 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 92604cf1f..bf05d6759 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,7 +17,7 @@ jobs: runs-on: ${{ matrix.platform }} steps: - name: Set up Go - uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 + uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c with: go-version: ${{ env.GO_VERSION }} id: go From 439c4a206111d350fdabefad56055561eeded262 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 21 Nov 2025 08:51:40 -0500 Subject: [PATCH 425/456] Bump actions/checkout from 5 to 6 (#1913) Bumps [actions/checkout](https://github.com/actions/checkout) from 5 to 6. - [Release notes](https://github.com/actions/checkout/releases) - [Commits](https://github.com/actions/checkout/compare/v5...v6) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/codespell.yml | 2 +- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- .github/workflows/test-snap-can-build.yml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index ae91b9f75..805791428 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -36,7 +36,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 1f0d7a2cd..990706a06 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -21,7 +21,7 @@ jobs: steps: # Check out the code base - name: Check out code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 with: # Full git history is needed to get a proper list of changed files within `super-linter` fetch-depth: 0 diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 80722c7a4..11450e9b4 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -15,7 +15,7 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 - name: Set up Go uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index bf05d6759..c6627f074 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -23,7 +23,7 @@ jobs: id: go - name: Check out code into the Go module directory - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 with: fetch-depth: 0 diff --git a/.github/workflows/test-snap-can-build.yml b/.github/workflows/test-snap-can-build.yml index c139c9b49..c6c197de9 100644 --- a/.github/workflows/test-snap-can-build.yml +++ b/.github/workflows/test-snap-can-build.yml @@ -14,7 +14,7 @@ jobs: node-version: [20.x] steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: snapcore/action-build@v1 id: build From df74ffe40d651f9442d35335ae4d492b73241cf6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Nov 2025 09:59:00 -0500 Subject: [PATCH 426/456] Bump github/codeql-action from 4.31.4 to 4.31.5 (#1915) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.31.4 to 4.31.5. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/e12f0178983d466f2f6028f5cc7a6d786fd97f4b...fdbfb4d2750291e159f0156def62b853c2798ca2) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 4.31.5 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 805791428..99d3a8cd9 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@e12f0178983d466f2f6028f5cc7a6d786fd97f4b + uses: github/codeql-action/init@fdbfb4d2750291e159f0156def62b853c2798ca2 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@e12f0178983d466f2f6028f5cc7a6d786fd97f4b + uses: github/codeql-action/autobuild@fdbfb4d2750291e159f0156def62b853c2798ca2 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@e12f0178983d466f2f6028f5cc7a6d786fd97f4b + uses: github/codeql-action/analyze@fdbfb4d2750291e159f0156def62b853c2798ca2 From 93be5051ff6027621a3dc00f3b243d87c7c6ca6b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 Dec 2025 09:18:20 -0500 Subject: [PATCH 427/456] Bump github.com/klauspost/compress from 1.18.1 to 1.18.2 (#1917) Bumps [github.com/klauspost/compress](https://github.com/klauspost/compress) from 1.18.1 to 1.18.2. - [Release notes](https://github.com/klauspost/compress/releases) - [Commits](https://github.com/klauspost/compress/compare/v1.18.1...v1.18.2) --- updated-dependencies: - dependency-name: github.com/klauspost/compress dependency-version: 1.18.2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 94ffc2fc9..15900b532 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb github.com/johnkerl/lumin v1.0.0 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 - github.com/klauspost/compress v1.18.1 + github.com/klauspost/compress v1.18.2 github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1 github.com/lestrrat-go/strftime v1.1.1 github.com/mattn/go-isatty v0.0.20 diff --git a/go.sum b/go.sum index a7a443e22..cf0fc7717 100644 --- a/go.sum +++ b/go.sum @@ -17,8 +17,8 @@ github.com/johnkerl/lumin v1.0.0 h1:CV34cHZOJ92Y02RbQ0rd4gA0C06Qck9q8blOyaPoWpU= github.com/johnkerl/lumin v1.0.0/go.mod h1:eLf5AdQOaLvzZ2zVy4REr/DSeEwG+CZreHwNLICqv9E= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= -github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co= -github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0= +github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk= +github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb h1:Z5BVHFk/DLOIUAd2NycF0mLtKfhl7ynm4Uy5+AFhT48= github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb/go.mod h1:+U+6yzfITr4/teU2YhxWhdyw6YzednT/16/UBMjlDrU= github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1 h1:UyIQ1VTQq/0CS/wLYjf3DV6uRKTd1xcsng3BccM4XCY= From 2f46fec72da8725a580c5dacd0ba0fcb56ada510 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 Dec 2025 09:29:17 -0500 Subject: [PATCH 428/456] Bump github/codeql-action from 4.31.5 to 4.31.6 (#1916) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.31.5 to 4.31.6. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/fdbfb4d2750291e159f0156def62b853c2798ca2...fe4161a26a8629af62121b670040955b330f9af2) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 4.31.6 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 99d3a8cd9..22f5df079 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@fdbfb4d2750291e159f0156def62b853c2798ca2 + uses: github/codeql-action/init@fe4161a26a8629af62121b670040955b330f9af2 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@fdbfb4d2750291e159f0156def62b853c2798ca2 + uses: github/codeql-action/autobuild@fe4161a26a8629af62121b670040955b330f9af2 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@fdbfb4d2750291e159f0156def62b853c2798ca2 + uses: github/codeql-action/analyze@fe4161a26a8629af62121b670040955b330f9af2 From 155227cb4c38681fb893c9621d5f2c1066fa0ecf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Dec 2025 09:39:53 -0500 Subject: [PATCH 429/456] Bump github/codeql-action from 4.31.6 to 4.31.7 (#1918) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.31.6 to 4.31.7. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/fe4161a26a8629af62121b670040955b330f9af2...cf1bb45a277cb3c205638b2cd5c984db1c46a412) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 4.31.7 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 22f5df079..ddb67d45c 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@fe4161a26a8629af62121b670040955b330f9af2 + uses: github/codeql-action/init@cf1bb45a277cb3c205638b2cd5c984db1c46a412 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@fe4161a26a8629af62121b670040955b330f9af2 + uses: github/codeql-action/autobuild@cf1bb45a277cb3c205638b2cd5c984db1c46a412 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@fe4161a26a8629af62121b670040955b330f9af2 + uses: github/codeql-action/analyze@cf1bb45a277cb3c205638b2cd5c984db1c46a412 From 1279a9b4a7696e544e8ad24a02a355d6d900f91d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Dec 2025 10:10:19 -0500 Subject: [PATCH 430/456] Bump golang.org/x/sys from 0.38.0 to 0.39.0 (#1920) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.38.0 to 0.39.0. - [Commits](https://github.com/golang/sys/compare/v0.38.0...v0.39.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-version: 0.39.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 15900b532..dfd5fb238 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4 github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.11.1 - golang.org/x/sys v0.38.0 + golang.org/x/sys v0.39.0 golang.org/x/term v0.36.0 golang.org/x/text v0.31.0 ) diff --git a/go.sum b/go.sum index cf0fc7717..092307e02 100644 --- a/go.sum +++ b/go.sum @@ -43,8 +43,8 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= -golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q= golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss= golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= From 9920e28b91bceeb521f88fdb258d09905fc6c134 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Dec 2025 10:11:14 -0500 Subject: [PATCH 431/456] Bump golang.org/x/text from 0.31.0 to 0.32.0 (#1919) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.31.0 to 0.32.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.31.0...v0.32.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-version: 0.32.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index dfd5fb238..9e4ff0f38 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( github.com/stretchr/testify v1.11.1 golang.org/x/sys v0.39.0 golang.org/x/term v0.36.0 - golang.org/x/text v0.31.0 + golang.org/x/text v0.32.0 ) require ( @@ -39,7 +39,7 @@ require ( github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - golang.org/x/tools v0.38.0 // indirect + golang.org/x/tools v0.39.0 // indirect gonum.org/v1/gonum v0.16.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 092307e02..98b4396da 100644 --- a/go.sum +++ b/go.sum @@ -47,10 +47,10 @@ golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q= golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss= -golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= -golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= -golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= -golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= +golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= +golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= +golang.org/x/tools v0.39.0 h1:ik4ho21kwuQln40uelmciQPp9SipgNDdrafrYA4TmQQ= +golang.org/x/tools v0.39.0/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From 34b1f0d4e9072f7206bb97b980fef1ca588ca9e6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Dec 2025 10:21:08 -0500 Subject: [PATCH 432/456] Bump golang.org/x/term from 0.36.0 to 0.37.0 (#1909) * `mlr sort -b` feature * mlr regtest -p test/cases/cli-help && make dev * Bump golang.org/x/term from 0.36.0 to 0.37.0 Bumps [golang.org/x/term](https://github.com/golang/term) from 0.36.0 to 0.37.0. - [Commits](https://github.com/golang/term/compare/v0.36.0...v0.37.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-version: 0.37.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: John Kerl Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: John Kerl --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 9e4ff0f38..c8af66e99 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.11.1 golang.org/x/sys v0.39.0 - golang.org/x/term v0.36.0 + golang.org/x/term v0.37.0 golang.org/x/text v0.32.0 ) diff --git a/go.sum b/go.sum index 98b4396da..541a2d3b8 100644 --- a/go.sum +++ b/go.sum @@ -45,8 +45,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q= -golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss= +golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU= +golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254= golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= golang.org/x/tools v0.39.0 h1:ik4ho21kwuQln40uelmciQPp9SipgNDdrafrYA4TmQQ= From c078c8036108a8492ad26339fdfe8b77d26bc7ca Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 10 Dec 2025 11:11:28 -0500 Subject: [PATCH 433/456] Bump golang.org/x/term from 0.37.0 to 0.38.0 (#1921) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.37.0 to 0.38.0. - [Commits](https://github.com/golang/term/compare/v0.37.0...v0.38.0) --- updated-dependencies: - dependency-name: golang.org/x/term dependency-version: 0.38.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index c8af66e99..cc5bbc4bb 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/pkg/profile v1.7.0 github.com/stretchr/testify v1.11.1 golang.org/x/sys v0.39.0 - golang.org/x/term v0.37.0 + golang.org/x/term v0.38.0 golang.org/x/text v0.32.0 ) diff --git a/go.sum b/go.sum index 541a2d3b8..5c32a5bf5 100644 --- a/go.sum +++ b/go.sum @@ -45,8 +45,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU= -golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254= +golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q= +golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg= golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= golang.org/x/tools v0.39.0 h1:ik4ho21kwuQln40uelmciQPp9SipgNDdrafrYA4TmQQ= From fe6c8d57bce1c02377fba6721ae70ed2c0326540 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Dec 2025 09:39:28 -0500 Subject: [PATCH 434/456] Bump github/codeql-action from 4.31.7 to 4.31.8 (#1923) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.31.7 to 4.31.8. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/cf1bb45a277cb3c205638b2cd5c984db1c46a412...1b168cd39490f61582a9beae412bb7057a6b2c4e) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 4.31.8 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index ddb67d45c..8eed3119c 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@cf1bb45a277cb3c205638b2cd5c984db1c46a412 + uses: github/codeql-action/init@1b168cd39490f61582a9beae412bb7057a6b2c4e with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@cf1bb45a277cb3c205638b2cd5c984db1c46a412 + uses: github/codeql-action/autobuild@1b168cd39490f61582a9beae412bb7057a6b2c4e # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@cf1bb45a277cb3c205638b2cd5c984db1c46a412 + uses: github/codeql-action/analyze@1b168cd39490f61582a9beae412bb7057a6b2c4e From e5d65fd28cf879a43556d4b03e82d20b55cf5465 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Dec 2025 09:39:39 -0500 Subject: [PATCH 435/456] Bump actions/cache from 4.3.0 to 5.0.0 (#1922) Bumps [actions/cache](https://github.com/actions/cache) from 4.3.0 to 5.0.0. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/0057852bfaa89a56745cba8c7296529d2fc39830...a7833574556fa59680c1b7cb190c1735db73ebf0) --- updated-dependencies: - dependency-name: actions/cache dependency-version: 5.0.0 dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c6627f074..a20e8bb02 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -29,7 +29,7 @@ jobs: # https://github.com/marketplace/actions/cache - name: Cache Go modules - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 + uses: actions/cache@a7833574556fa59680c1b7cb190c1735db73ebf0 with: path: | ~/.cache/go-build From 8f1e327b4eed6a69327c21e429980bfb827c0187 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Dec 2025 09:55:39 -0500 Subject: [PATCH 436/456] Bump actions/upload-artifact from 5.0.0 to 6.0.0 (#1925) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 5.0.0 to 6.0.0. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/330a01c490aca151604b8cf639adc76d48f6c5d4...b7c566a772e6b6bfb58ed0dc250532a479d7789f) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-version: 6.0.0 dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/go.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 11450e9b4..ea34158e6 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -41,7 +41,7 @@ jobs: if: matrix.os == 'windows-latest' run: mkdir -p bin/${{matrix.os}} && cp mlr.exe bin/${{matrix.os}} - - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 + - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f with: name: mlr-${{matrix.os}} path: bin/${{matrix.os}}/* From 38e9ff212b7e8f0c9b0c87df901f2a92cc124177 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Dec 2025 09:55:46 -0500 Subject: [PATCH 437/456] Bump actions/cache from 5.0.0 to 5.0.1 (#1924) Bumps [actions/cache](https://github.com/actions/cache) from 5.0.0 to 5.0.1. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/a7833574556fa59680c1b7cb190c1735db73ebf0...9255dc7a253b0ccc959486e2bca901246202afeb) --- updated-dependencies: - dependency-name: actions/cache dependency-version: 5.0.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a20e8bb02..ad6c9c8c0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -29,7 +29,7 @@ jobs: # https://github.com/marketplace/actions/cache - name: Cache Go modules - uses: actions/cache@a7833574556fa59680c1b7cb190c1735db73ebf0 + uses: actions/cache@9255dc7a253b0ccc959486e2bca901246202afeb with: path: | ~/.cache/go-build From dc9105a92299d3e8a9375ae65660a60cd5b3fd4f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 17 Dec 2025 09:40:33 -0500 Subject: [PATCH 438/456] Bump github/codeql-action from 4.31.8 to 4.31.9 (#1926) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.31.8 to 4.31.9. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/1b168cd39490f61582a9beae412bb7057a6b2c4e...5d4e8d1aca955e8d8589aabd499c5cae939e33c7) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 4.31.9 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 8eed3119c..16cf0f6a4 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@1b168cd39490f61582a9beae412bb7057a6b2c4e + uses: github/codeql-action/init@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@1b168cd39490f61582a9beae412bb7057a6b2c4e + uses: github/codeql-action/autobuild@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@1b168cd39490f61582a9beae412bb7057a6b2c4e + uses: github/codeql-action/analyze@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 From 0b8da34b4af2a51b2ed193bd4adb51d51815b580 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 2 Jan 2026 11:02:02 -0500 Subject: [PATCH 439/456] Use `snap` name `mlr`, not `miller` (#1872) --- snap/snapcraft.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml index c88b8adc1..fe3222340 100644 --- a/snap/snapcraft.yaml +++ b/snap/snapcraft.yaml @@ -1,5 +1,5 @@ -name: miller -adopt-info: miller +name: mlr +adopt-info: mlr summary: Miller is like awk, sed, cut, join and sort description: | Miller is like awk, sed, cut, join, and sort for data formats such as CSV, TSV, JSON, JSON Lines, and positionally-indexed. @@ -32,13 +32,13 @@ platforms: build-for: [ppc64el] apps: - miller: + mlr: command: usr/local/bin/mlr plugs: - home parts: - miller: + mlr: source: https://github.com/johnkerl/miller source-type: git plugin: make From ac30743242d0f0f6c50a51f82422d331c746a77d Mon Sep 17 00:00:00 2001 From: kz6fittycent Date: Fri, 2 Jan 2026 10:52:52 -0600 Subject: [PATCH 440/456] Fixed README (#1871) * initial snap commit * Needs network interface Network interface added - should correct connectivity issue. Added workflow badge, too * not needed * README updates * One more indentation/gap fixed * Changed name to mlr from miller No alias needed with this name change. --------- Co-authored-by: John Kerl --- README.md | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index ab988a4f8..ae0d77fe8 100644 --- a/README.md +++ b/README.md @@ -52,17 +52,12 @@ There's a good chance you can get Miller pre-built for your system: [![Fedora](https://img.shields.io/badge/distros-fedora-173b70.svg)](https://packages.fedoraproject.org/pkgs/miller/miller/) [![Debian](https://img.shields.io/badge/distros-debian-c70036.svg)](https://packages.debian.org/stable/miller) [![Gentoo](https://img.shields.io/badge/distros-gentoo-4e4371.svg)](https://packages.gentoo.org/packages/sys-apps/miller) - [![Pro-Linux](https://img.shields.io/badge/distros-prolinux-3a679d.svg)](http://www.pro-linux.de/cgi-bin/DBApp/check.cgi?ShowApp..20427.100) [![Arch Linux](https://img.shields.io/badge/distros-archlinux-1792d0.svg)](https://aur.archlinux.org/packages/miller-git) - [![NetBSD](https://img.shields.io/badge/distros-netbsd-f26711.svg)](http://pkgsrc.se/textproc/miller) [![FreeBSD](https://img.shields.io/badge/distros-freebsd-8c0707.svg)](https://www.freshports.org/textproc/miller/) - [![Anaconda](https://img.shields.io/badge/distros-anaconda-63ad41.svg)](https://anaconda.org/conda-forge/miller/) - -[![Snap](https://snapcraft.io/)](https://snapcraft.io/) - +[![Snap](https://snapcraft.io/miller)](https://snapcraft.io/miller) [![Homebrew/MacOSX](https://img.shields.io/badge/distros-homebrew-ba832b.svg)](https://formulae.brew.sh/formula/miller) [![MacPorts/MacOSX](https://img.shields.io/badge/distros-macports-1376ec.svg)](https://www.macports.org/ports.php?by=name&substr=miller) [![Chocolatey](https://img.shields.io/badge/distros-chocolatey-red.svg)](https://chocolatey.org/packages/miller) @@ -96,6 +91,7 @@ See also [building from source](https://miller.readthedocs.io/en/latest/build.ht [![Multi-platform build status](https://github.com/johnkerl/miller/actions/workflows/go.yml/badge.svg)](https://github.com/johnkerl/miller/actions/workflows/go.yml) [![CodeQL status](https://github.com/johnkerl/miller/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/johnkerl/miller/actions/workflows/codeql-analysis.yml) [![Codespell status](https://github.com/johnkerl/miller/actions/workflows/codespell.yml/badge.svg)](https://github.com/johnkerl/miller/actions/workflows/codespell.yml) +[![๐Ÿงช Snap Builds](https://github.com/johnkerl/miller/actions/workflows/test-snap-can-build.yml/badge.svg)](https://github.com/johnkerl/miller/actions/workflows/test-snap-can-build.yml) From 7b8822e2efe7aa6eecd02cee9926396962f78e1c Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 2 Jan 2026 11:58:49 -0500 Subject: [PATCH 441/456] Snap name is not `mlr` but `miller` (#1928) --- snap/snapcraft.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml index fe3222340..8e7046349 100644 --- a/snap/snapcraft.yaml +++ b/snap/snapcraft.yaml @@ -1,6 +1,6 @@ -name: mlr -adopt-info: mlr -summary: Miller is like awk, sed, cut, join and sort +name: miller +adopt-info: miller +summary: Miller is like awk, sed, cut, join and sort description: | Miller is like awk, sed, cut, join, and sort for data formats such as CSV, TSV, JSON, JSON Lines, and positionally-indexed. @@ -32,19 +32,19 @@ platforms: build-for: [ppc64el] apps: - mlr: + miller: command: usr/local/bin/mlr plugs: - home -parts: - mlr: +parts: + miller: source: https://github.com/johnkerl/miller source-type: git plugin: make build-snaps: - go - + override-pull: | craftctl default craftctl set version="$(git describe --tags | sed 's/^v//' | cut -d "-" -f1)" From 5b6f64669a95c0a2e905518f7b3dcc1510f11352 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 2 Jan 2026 13:29:41 -0500 Subject: [PATCH 442/456] Snap notes (#1929) --- .github/workflows/release-snap.yaml | 26 +++++ snap/README.md | 150 ++++++++++++++++++++++++++++ snap/snapcraft.yaml | 12 ++- 3 files changed, 184 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/release-snap.yaml create mode 100644 snap/README.md diff --git a/.github/workflows/release-snap.yaml b/.github/workflows/release-snap.yaml new file mode 100644 index 000000000..b1daa053c --- /dev/null +++ b/.github/workflows/release-snap.yaml @@ -0,0 +1,26 @@ +name: Release +on: + push: + tags: + - v* + workflow_dispatch: + +jobs: + snap: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Build snap + uses: snapcore/action-build@v1 + id: build + + - name: Publish to Snap Store + uses: snapcore/action-publish@v1 + env: + SNAPCRAFT_STORE_CREDENTIALS: ${{ secrets.SNAPCRAFT_TOKEN }} + with: + snap: ${{ steps.build.outputs.snap }} + # release: stable # or edge, beta, candidate + release: stable diff --git a/snap/README.md b/snap/README.md new file mode 100644 index 000000000..2af316410 --- /dev/null +++ b/snap/README.md @@ -0,0 +1,150 @@ +# Failed attempts to create a snap interactively + +2026-01-02 I used an Ubuntu 24.04 EC2 instance. I followed https://documentation.ubuntu.com/snapcraft/stable/. Error messages said things like + +``` +A network related operation failed in a context of no network access. +Recommended resolution: Verify that the environment has internet connectivity; see https://canonical-craft-providers.readthedocs-hosted.com/en/latest/explanation/ for further reference. +Full execution log: '/home/ubuntu/.local/state/snapcraft/log/snapcraft-20260102-170252.488632.log' +``` + +when there was in fact no network problem. I remained confused. + +``` +$ sudo snapcraft pack + +$ lxc list + +$ snapcraft pack --destructive-mode + +$ snapcraft pack --use-multipass + +$ sudo snap install multipass + +$ snapcraft pack --use-multipass + +$ sudo lxd init --auto + +$ lxc network list + +$ sudo snapcraft pack + +$ sudo snap set snapcraft provider=multipass + +$ sudo snapcraft pack --destructive-mode + +[This created miller_6.15.0_arm64.snap] + +$ snapcraft upload --release=stable *.snap +No keyring found to store or retrieve credentials from. +Recommended resolution: Ensure the keyring is working or SNAPCRAFT_STORE_CREDENTIALS is correctly exported into the environment +For more information, check out: https://documentation.ubuntu.com/snapcraft/stable/how-to/publishing/authenticate +Full execution log: '/home/ubuntu/.local/state/snapcraft/log/snapcraft-20260102-172357.599171.log' + +$ ll *.snap +-rw-r--r-- 1 root root 8994816 Jan 2 17:22 miller_6.15.0_arm64.snap + +$ snap install *.snap +error: access denied (try with sudo) + +$ sudo snap install *.snap +error: cannot find signatures with metadata for snap/component "miller_6.15.0_arm64.snap" +``` + +Conclusion: + +* I got cryptic error messages with various permutations. +* Through trial and error I got a `.snap` file with `sudo` and `multipass` and `--destructive-mode`. +* Even then, I got a `.snap` file only for the current machine's arch, and the resulting `.snap` file was not locally installable. +* This led me to try a GitHub Action. + +# Info from Claude about auto-releasing + +Here's how to set up automatic Snap publishing from GitHub releases: + +## 1. Create snapcraft.yaml + +First, ensure you have a proper `snapcraft.yaml` in your repo root (or in a `snap/` directory): + +```yaml +name: your-app-name +base: core22 # or core24 for Ubuntu 24.04 +version: git # automatically uses git tags +summary: Single-line summary +description: | + Longer description of your application + +grade: stable # or devel +confinement: strict # or classic, devmode + +apps: + your-app-name: + command: bin/your-binary + plugs: + - home + - network + +parts: + your-app: + plugin: nil # change based on your build system (go, python, etc.) + source: . + # Add build steps as needed +``` + +## 2. Get Snapcraft credentials + +Export your Snapcraft login credentials: + +```bash +snapcraft export-login --snaps=miller --channels=stable,candidate,beta,edge snapcraft-token.txt +``` + +This creates a token file with limited permissions for just your snap. + +## 3. Add token to GitHub Secrets + +1. Go to your GitHub repo โ†’ Settings โ†’ Secrets and variables โ†’ Actions +2. Click "New repository secret" +3. Name: `SNAPCRAFT_TOKEN` +4. Value: Paste the entire contents of `snapcraft-token.txt` + +## 4. Create GitHub Action workflow + +Create `.github/workflows/release.yml`: + +```yaml +name: Release to Snap Store + +on: + release: + types: [published] + +jobs: + snap: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Build snap + uses: snapcore/action-build@v1 + id: build + + - name: Publish to Snap Store + uses: snapcore/action-publish@v1 + env: + SNAPCRAFT_STORE_CREDENTIALS: ${{ secrets.SNAPCRAFT_TOKEN }} + with: + snap: ${{ steps.build.outputs.snap }} + # release: stable # or edge, beta, candidate + release: edge +``` + +## Tips + +- **Version handling**: Using `version: git` in snapcraft.yaml automatically uses your git tag as the version +- **Channels**: Start with `edge` channel for testing, then promote to `stable` once confident +- **Multiple architectures**: Add a build matrix if you need to support arm64, etc. +- **Testing before stable**: Consider publishing to `candidate` or `beta` first, then manually promote to `stable` after testing + +Now when you create a GitHub release with a tag (e.g., `v1.0.0`), the workflow will automatically build and publish your snap! diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml index 8e7046349..d374a84b0 100644 --- a/snap/snapcraft.yaml +++ b/snap/snapcraft.yaml @@ -1,17 +1,21 @@ name: miller -adopt-info: miller +base: core24 +version: git summary: Miller is like awk, sed, cut, join and sort description: | Miller is like awk, sed, cut, join, and sort for data formats such as CSV, TSV, JSON, JSON Lines, and positionally-indexed. +grade: stable +confinement: strict + +adopt-info: miller + +website: https://github.com/johnkerl/miller/issues contact: https://github.com/johnkerl/miller/issues issues: https://github.com/johnkerl/miller/issues source-code: https://github.com/johnkerl/miller license: BSD-2-Clause -base: core24 -grade: stable -confinement: strict compression: lzo platforms: From b8db798a2f2334bb173ff7baf3d91bc2c61968ec Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 2 Jan 2026 13:57:59 -0500 Subject: [PATCH 443/456] Miller 6.16.0 (#1930) * Miller 6.16.0 * make dev --- docs/src/data-diving-examples.md | 4 ++-- docs/src/date-time-examples.md | 2 +- docs/src/manpage.md | 4 ++-- docs/src/manpage.txt | 4 ++-- docs/src/reference-verbs.md | 4 ++-- man/manpage.txt | 4 ++-- man/mlr.1 | 6 +++--- miller.spec | 5 ++++- pkg/version/version.go | 2 +- 9 files changed, 19 insertions(+), 16 deletions(-) diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 100716ec2..297eca211 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -26,7 +26,7 @@ Vertical-tabular format is good for a quick look at CSV data layout -- seeing wh wc -l data/flins.csv
-   36635 data/flins.csv
+36635 data/flins.csv
 
@@ -227,7 +227,7 @@ Peek at the data:
 wc -l data/colored-shapes.dkvp
 
-   10078 data/colored-shapes.dkvp
+10078 data/colored-shapes.dkvp
 
diff --git a/docs/src/date-time-examples.md b/docs/src/date-time-examples.md
index 5bcbdac01..cab74de3c 100644
--- a/docs/src/date-time-examples.md
+++ b/docs/src/date-time-examples.md
@@ -68,7 +68,7 @@ date,qoh
 wc -l data/miss-date.csv
 
-    1372 data/miss-date.csv
+1372 data/miss-date.csv
 
Since there are 1372 lines in the data file, some automation is called for. To find the missing dates, you can convert the dates to seconds since the epoch using `strptime`, then compute adjacent differences (the `cat -n` simply inserts record-counters): diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 14cc7ce9c..39203a0c9 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -48,7 +48,7 @@ This is simply a copy of what you should see on running `man mlr` at a command p insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.15.0-dev. + manpage documents mlr 6.16.0. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv @@ -3759,5 +3759,5 @@ This is simply a copy of what you should see on running `man mlr` at a command p MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-08-20 4mMILLER24m(1) + 2026-01-02 4mMILLER24m(1)
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 4e12b811c..90bff3293 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -27,7 +27,7 @@ insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.15.0-dev. + manpage documents mlr 6.16.0. 1mEXAMPLES0m mlr --icsv --opprint cat example.csv @@ -3738,4 +3738,4 @@ MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2025-08-20 4mMILLER24m(1) + 2026-01-02 4mMILLER24m(1) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index f33e8b488..b50c97d7d 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -4134,7 +4134,7 @@ There are two main ways to use `mlr uniq`: the first way is with `-g` to specify wc -l data/colored-shapes.csv
-   10079 data/colored-shapes.csv
+10079 data/colored-shapes.csv
 
@@ -4291,7 +4291,7 @@ color=purple,shape=square,flag=0
 wc -l data/repeats.dkvp
 
-      57 data/repeats.dkvp
+57 data/repeats.dkvp
 
diff --git a/man/manpage.txt b/man/manpage.txt
index 4e12b811c..90bff3293 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -27,7 +27,7 @@
        insertion-ordered hash map.  This encompasses a variety of data
        formats, including but not limited to the familiar CSV, TSV, and JSON.
        (Miller can handle positionally-indexed data as a special case.) This
-       manpage documents mlr 6.15.0-dev.
+       manpage documents mlr 6.16.0.
 
 1mEXAMPLES0m
        mlr --icsv --opprint cat example.csv
@@ -3738,4 +3738,4 @@
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-                                  2025-08-20                         4mMILLER24m(1)
+                                  2026-01-02                         4mMILLER24m(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 24a9ea61a..f36d5e2f0 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2025-08-20
+.\"      Date: 2026-01-02
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2025-08-20" "\ \&" "\ \&"
+.TH "MILLER" "1" "2026-01-02" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -47,7 +47,7 @@ on integer-indexed fields: if the natural data structure for the latter is the
 array, then Miller's natural data structure is the insertion-ordered hash map.
 This encompasses a variety of data formats, including but not limited to the
 familiar CSV, TSV, and JSON.  (Miller can handle positionally-indexed data as
-a special case.) This manpage documents mlr 6.15.0-dev.
+a special case.) This manpage documents mlr 6.16.0.
 .SH "EXAMPLES"
 .sp
 
diff --git a/miller.spec b/miller.spec
index 9211e0c25..166cb35e0 100644
--- a/miller.spec
+++ b/miller.spec
@@ -1,6 +1,6 @@
 Summary: Name-indexed data processing tool
 Name: miller
-Version: 6.15.0
+Version: 6.16.0
 Release: 1%{?dist}
 License: BSD
 Source: https://github.com/johnkerl/miller/releases/download/%{version}/miller-%{version}.tar.gz
@@ -36,6 +36,9 @@ make install
 %{_mandir}/man1/mlr.1*
 
 %changelog
+* Fri Jan 2 2026 John Kerl  - 6.16.0-1
+- 6.16.0 release
+
 * Thu Aug 14 2025 John Kerl  - 6.15.0-1
 - 6.15.0 release
 
diff --git a/pkg/version/version.go b/pkg/version/version.go
index 98dd71c33..ec9c7208a 100644
--- a/pkg/version/version.go
+++ b/pkg/version/version.go
@@ -4,4 +4,4 @@ package version
 // Nominally things like "6.0.0" for a release, then "6.0.0-dev" in between.
 // This makes it clear that a given build is on the main dev branch, not a
 // particular snapshot tag.
-var STRING string = "6.15.0-dev"
+var STRING string = "6.16.0"

From 421042833abb43540de1232877fb80197efc6c6b Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Fri, 2 Jan 2026 14:24:51 -0500
Subject: [PATCH 444/456] README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ae0d77fe8..7c131fd41 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ There's a good chance you can get Miller pre-built for your system:
 [![NetBSD](https://img.shields.io/badge/distros-netbsd-f26711.svg)](http://pkgsrc.se/textproc/miller)
 [![FreeBSD](https://img.shields.io/badge/distros-freebsd-8c0707.svg)](https://www.freshports.org/textproc/miller/)
 [![Anaconda](https://img.shields.io/badge/distros-anaconda-63ad41.svg)](https://anaconda.org/conda-forge/miller/)
-[![Snap](https://snapcraft.io/miller)](https://snapcraft.io/miller)
+[![Snap](https://snapcraft.io/miller)
 [![Homebrew/MacOSX](https://img.shields.io/badge/distros-homebrew-ba832b.svg)](https://formulae.brew.sh/formula/miller)
 [![MacPorts/MacOSX](https://img.shields.io/badge/distros-macports-1376ec.svg)](https://www.macports.org/ports.php?by=name&substr=miller)
 [![Chocolatey](https://img.shields.io/badge/distros-chocolatey-red.svg)](https://chocolatey.org/packages/miller)

From cee04c07474339882451f6488caa90c753432e5c Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Fri, 2 Jan 2026 14:50:20 -0500
Subject: [PATCH 445/456] Fix Snap link

---
 README.md                        | 2 +-
 docs/src/installing-miller.md    | 1 +
 docs/src/installing-miller.md.in | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7c131fd41..73d788982 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ There's a good chance you can get Miller pre-built for your system:
 [![NetBSD](https://img.shields.io/badge/distros-netbsd-f26711.svg)](http://pkgsrc.se/textproc/miller)
 [![FreeBSD](https://img.shields.io/badge/distros-freebsd-8c0707.svg)](https://www.freshports.org/textproc/miller/)
 [![Anaconda](https://img.shields.io/badge/distros-anaconda-63ad41.svg)](https://anaconda.org/conda-forge/miller/)
-[![Snap](https://snapcraft.io/miller)
+[![Snap](https://img.shields.io/badge/distros-snap-d85f33.svg)](https://snapcraft.io/miller)
 [![Homebrew/MacOSX](https://img.shields.io/badge/distros-homebrew-ba832b.svg)](https://formulae.brew.sh/formula/miller)
 [![MacPorts/MacOSX](https://img.shields.io/badge/distros-macports-1376ec.svg)](https://www.macports.org/ports.php?by=name&substr=miller)
 [![Chocolatey](https://img.shields.io/badge/distros-chocolatey-red.svg)](https://chocolatey.org/packages/miller)
diff --git a/docs/src/installing-miller.md b/docs/src/installing-miller.md
index d50b70d31..314cb2e11 100644
--- a/docs/src/installing-miller.md
+++ b/docs/src/installing-miller.md
@@ -30,6 +30,7 @@ Using a package manager:
 * MacOS: `brew update` and `brew install miller`, or `sudo port selfupdate` and `sudo port install miller`, depending on your preference of [Homebrew](https://brew.sh) or [MacPorts](https://macports.org).
 * Windows: `choco install miller` using [Chocolatey](https://chocolatey.org).
 * Note: Miller 6 was released 2022-01-09; [several platforms](https://github.com/johnkerl/miller/blob/main/README-versions.md) may have Miller 5 available.
+* As of Miller 6.16.0, you can do `snap install miller`. Note however that the executable is named `miller`, _not_ `mlr`.
 
 See also:
 
diff --git a/docs/src/installing-miller.md.in b/docs/src/installing-miller.md.in
index b735be725..2e56a683b 100644
--- a/docs/src/installing-miller.md.in
+++ b/docs/src/installing-miller.md.in
@@ -14,6 +14,7 @@ Using a package manager:
 * MacOS: `brew update` and `brew install miller`, or `sudo port selfupdate` and `sudo port install miller`, depending on your preference of [Homebrew](https://brew.sh) or [MacPorts](https://macports.org).
 * Windows: `choco install miller` using [Chocolatey](https://chocolatey.org).
 * Note: Miller 6 was released 2022-01-09; [several platforms](https://github.com/johnkerl/miller/blob/main/README-versions.md) may have Miller 5 available.
+* As of Miller 6.16.0, you can do `snap install miller`. Note however that the executable is named `miller`, _not_ `mlr`.
 
 See also:
 

From a504e16b9315aee1dc6adf2107c0237ce4484593 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Fri, 2 Jan 2026 15:19:34 -0500
Subject: [PATCH 446/456] Try to build for Ubuntu arm64 (#1931)

* Fix Snap link

* Try to build for Ubuntu arm64
---
 .github/workflows/release-snap.yaml | 7 +++++--
 .github/workflows/release.yml       | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/release-snap.yaml b/.github/workflows/release-snap.yaml
index b1daa053c..951b6b961 100644
--- a/.github/workflows/release-snap.yaml
+++ b/.github/workflows/release-snap.yaml
@@ -1,4 +1,4 @@
-name: Release
+name: Release for Snap
 on: 
   push:
     tags:
@@ -7,7 +7,10 @@ on:
 
 jobs:
   snap:
-    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        os: [ubuntu-latest, ubuntu-24.04-arm]
+    runs-on: ${{ matrix.os }}
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index ad6c9c8c0..094a87706 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,4 +1,4 @@
-name: Release
+name: Release for GitHub
 on: 
   push:
     tags:

From 1cc17e27b0bc2a2315cace7ce4e83186a0c5132f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 5 Jan 2026 09:37:08 -0500
Subject: [PATCH 447/456] Bump actions/checkout from 4 to 6 (#1932)

Bumps [actions/checkout](https://github.com/actions/checkout) from 4 to 6.
- [Release notes](https://github.com/actions/checkout/releases)
- [Commits](https://github.com/actions/checkout/compare/v4...v6)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 2 +-
 .github/workflows/codespell.yml       | 2 +-
 .github/workflows/go.yml              | 2 +-
 .github/workflows/release-snap.yaml   | 2 +-
 .github/workflows/release.yml         | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 16cf0f6a4..e04d9caf9 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -36,7 +36,7 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
+      uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
index 990706a06..839eeb43f 100644
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -21,7 +21,7 @@ jobs:
     steps:
       # Check out the code base
       - name: Check out code
-        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
         with:
           # Full git history is needed to get a proper list of changed files within `super-linter`
           fetch-depth: 0
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index ea34158e6..a41d4b19a 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -15,7 +15,7 @@ jobs:
         os: [ubuntu-latest, macos-latest, windows-latest]
 
     steps:
-    - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
+    - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
 
     - name: Set up Go
       uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c
diff --git a/.github/workflows/release-snap.yaml b/.github/workflows/release-snap.yaml
index 951b6b961..d0dfada19 100644
--- a/.github/workflows/release-snap.yaml
+++ b/.github/workflows/release-snap.yaml
@@ -13,7 +13,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
 
       - name: Build snap
         uses: snapcore/action-build@v1
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 094a87706..1903ccecc 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -23,7 +23,7 @@ jobs:
         id: go
 
       - name: Check out code into the Go module directory
-        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
         with:
           fetch-depth: 0
 

From e08e3ca80c73548f8eb726d248f2c41cf6fbd82a Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Wed, 7 Jan 2026 13:45:36 -0500
Subject: [PATCH 448/456] Add snapcraft.io link to install instructions

---
 docs/src/installing-miller.md    | 2 +-
 docs/src/installing-miller.md.in | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/installing-miller.md b/docs/src/installing-miller.md
index 314cb2e11..9de4558ff 100644
--- a/docs/src/installing-miller.md
+++ b/docs/src/installing-miller.md
@@ -30,7 +30,7 @@ Using a package manager:
 * MacOS: `brew update` and `brew install miller`, or `sudo port selfupdate` and `sudo port install miller`, depending on your preference of [Homebrew](https://brew.sh) or [MacPorts](https://macports.org).
 * Windows: `choco install miller` using [Chocolatey](https://chocolatey.org).
 * Note: Miller 6 was released 2022-01-09; [several platforms](https://github.com/johnkerl/miller/blob/main/README-versions.md) may have Miller 5 available.
-* As of Miller 6.16.0, you can do `snap install miller`. Note however that the executable is named `miller`, _not_ `mlr`.
+* As of Miller 6.16.0, you can do `snap install miller`. Note however that the executable is named `miller`, _not_ `mlr`. See also [https://snapcraft.io/miller](https://snapcraft.io/miller).
 
 See also:
 
diff --git a/docs/src/installing-miller.md.in b/docs/src/installing-miller.md.in
index 2e56a683b..74e5c9f53 100644
--- a/docs/src/installing-miller.md.in
+++ b/docs/src/installing-miller.md.in
@@ -14,7 +14,7 @@ Using a package manager:
 * MacOS: `brew update` and `brew install miller`, or `sudo port selfupdate` and `sudo port install miller`, depending on your preference of [Homebrew](https://brew.sh) or [MacPorts](https://macports.org).
 * Windows: `choco install miller` using [Chocolatey](https://chocolatey.org).
 * Note: Miller 6 was released 2022-01-09; [several platforms](https://github.com/johnkerl/miller/blob/main/README-versions.md) may have Miller 5 available.
-* As of Miller 6.16.0, you can do `snap install miller`. Note however that the executable is named `miller`, _not_ `mlr`.
+* As of Miller 6.16.0, you can do `snap install miller`. Note however that the executable is named `miller`, _not_ `mlr`. See also [https://snapcraft.io/miller](https://snapcraft.io/miller).
 
 See also:
 

From 4ce21e998bfa3e29d61847885bdd1dc537fce230 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 9 Jan 2026 09:54:19 -0500
Subject: [PATCH 449/456] Bump golang.org/x/sys from 0.39.0 to 0.40.0 (#1933)

Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.39.0 to 0.40.0.
- [Commits](https://github.com/golang/sys/compare/v0.39.0...v0.40.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-version: 0.40.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index cc5bbc4bb..d9d14b201 100644
--- a/go.mod
+++ b/go.mod
@@ -27,7 +27,7 @@ require (
 	github.com/nine-lives-later/go-windows-terminal-sequences v1.0.4
 	github.com/pkg/profile v1.7.0
 	github.com/stretchr/testify v1.11.1
-	golang.org/x/sys v0.39.0
+	golang.org/x/sys v0.40.0
 	golang.org/x/term v0.38.0
 	golang.org/x/text v0.32.0
 )
diff --git a/go.sum b/go.sum
index 5c32a5bf5..467f71fab 100644
--- a/go.sum
+++ b/go.sum
@@ -43,8 +43,8 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu
 github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
 golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
-golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
+golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q=
 golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg=
 golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU=

From eb972e19ebde9e81cf076af9a3b3094a5c02f79f Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 10 Jan 2026 16:48:53 -0500
Subject: [PATCH 450/456] Use GOCC fork for performance improvement (#1934)

* Use GOCC fork for performance improvement

* fix versions
---
 README-dev.md                         |  3 ++-
 cmd/experiments/dsl_parser/one/build  |  6 +++---
 cmd/experiments/dsl_parser/one/go.mod |  4 ++--
 cmd/experiments/dsl_parser/one/go.sum | 26 --------------------------
 cmd/experiments/dsl_parser/two/build  |  6 +++---
 cmd/experiments/dsl_parser/two/go.mod |  4 ++--
 cmd/experiments/dsl_parser/two/go.sum | 26 --------------------------
 go.mod                                |  2 ++
 go.sum                                |  4 ++++
 pkg/parsing/mlr.bnf                   |  2 +-
 tools/build-dsl                       |  4 ++--
 11 files changed, 21 insertions(+), 66 deletions(-)

diff --git a/README-dev.md b/README-dev.md
index 0e363db5c..6dd708f95 100644
--- a/README-dev.md
+++ b/README-dev.md
@@ -95,13 +95,14 @@ So, in broad overview, the key packages are:
 
 * Miller dependencies are all in the Go standard library, except two:
   * GOCC lexer/parser code-generator from [github.com/goccmack/gocc](https://github.com/goccmack/gocc):
+    * Forked at [github.com/johnkerl/gocc](github.com/johnkerl/gocc).
     * This package defines the grammar for Miller's domain-specific language (DSL) for the Miller `put` and `filter` verbs. And, GOCC is a joy to use. :)
     * It is used on the terms of its open-source license.
   * [golang.org/x/term](https://pkg.go.dev/golang.org/x/term):
     * Just a one-line Miller callsite for is-a-terminal checking for the [Miller REPL](./pkg/terminals/repl/README.md).
     * It is used on the terms of its open-source license.
 * See also [./go.mod](go.mod). Setup:
-  * `go get github.com/goccmack/gocc`
+  * `go get github.com/johnkerl/gocc`
   * `go get golang.org/x/term`
 
 ### Miller per se
diff --git a/cmd/experiments/dsl_parser/one/build b/cmd/experiments/dsl_parser/one/build
index 373184a92..b43d4bc26 100755
--- a/cmd/experiments/dsl_parser/one/build
+++ b/cmd/experiments/dsl_parser/one/build
@@ -28,9 +28,9 @@ mkdir -p $dir
 # ----------------------------------------------------------------
 # Run the parser-generator
 
-# Build the bin/gocc executable:
-go get github.com/goccmack/gocc
-#go get github.com/johnkerl/gocc
+# Build the bin/gocc executable (use my fork for performance):
+# get github.com/goccmack/gocc
+go get github.com/johnkerl/gocc
 bingocc="$GOPATH/bin/gocc"
 
 if [ ! -x "$bingocc" ]; then
diff --git a/cmd/experiments/dsl_parser/one/go.mod b/cmd/experiments/dsl_parser/one/go.mod
index e4f49daf8..4e81172d6 100644
--- a/cmd/experiments/dsl_parser/one/go.mod
+++ b/cmd/experiments/dsl_parser/one/go.mod
@@ -1,5 +1,5 @@
 module one
 
-go 1.16
+go 1.24
 
-require github.com/goccmack/gocc v0.0.0-20210322175033-34358ebe5808 // indirect
+toolchain go1.24.5
diff --git a/cmd/experiments/dsl_parser/one/go.sum b/cmd/experiments/dsl_parser/one/go.sum
index dfc52feaf..e69de29bb 100644
--- a/cmd/experiments/dsl_parser/one/go.sum
+++ b/cmd/experiments/dsl_parser/one/go.sum
@@ -1,26 +0,0 @@
-github.com/goccmack/gocc v0.0.0-20210322175033-34358ebe5808 h1:MBgZdx/wBJWTR2Q79mQfP6c8uXdQiu5JowfEz3KhFac=
-github.com/goccmack/gocc v0.0.0-20210322175033-34358ebe5808/go.mod h1:dWhnuKE5wcnGTExA2DH6Iicu21YnWwOPMrc/GyhtbCk=
-github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
-golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/mod v0.3.0 h1:RM4zey1++hCTbCVQfnWeKs9/IEsaBLA8vTkd0WVtmH4=
-golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
-golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
-golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
-golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
diff --git a/cmd/experiments/dsl_parser/two/build b/cmd/experiments/dsl_parser/two/build
index 2cb7893d3..1ea06c916 100755
--- a/cmd/experiments/dsl_parser/two/build
+++ b/cmd/experiments/dsl_parser/two/build
@@ -28,9 +28,9 @@ mkdir -p $dir
 # ----------------------------------------------------------------
 # Run the parser-generator
 
-# Build the bin/gocc executable:
-go get github.com/goccmack/gocc
-#go get github.com/johnkerl/gocc
+# Build the bin/gocc executable (use my fork for performance):
+# go get github.com/goccmack/gocc
+go get github.com/johnkerl/gocc
 bingocc="$GOPATH/bin/gocc"
 if [ ! -x "$bingocc" ]; then
   exit 1
diff --git a/cmd/experiments/dsl_parser/two/go.mod b/cmd/experiments/dsl_parser/two/go.mod
index be38de9a3..81c05ea5e 100644
--- a/cmd/experiments/dsl_parser/two/go.mod
+++ b/cmd/experiments/dsl_parser/two/go.mod
@@ -1,5 +1,5 @@
 module two
 
-go 1.16
+go 1.24
 
-require github.com/goccmack/gocc v0.0.0-20210322175033-34358ebe5808 // indirect
+toolchain go1.24.5
diff --git a/cmd/experiments/dsl_parser/two/go.sum b/cmd/experiments/dsl_parser/two/go.sum
index dfc52feaf..e69de29bb 100644
--- a/cmd/experiments/dsl_parser/two/go.sum
+++ b/cmd/experiments/dsl_parser/two/go.sum
@@ -1,26 +0,0 @@
-github.com/goccmack/gocc v0.0.0-20210322175033-34358ebe5808 h1:MBgZdx/wBJWTR2Q79mQfP6c8uXdQiu5JowfEz3KhFac=
-github.com/goccmack/gocc v0.0.0-20210322175033-34358ebe5808/go.mod h1:dWhnuKE5wcnGTExA2DH6Iicu21YnWwOPMrc/GyhtbCk=
-github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
-golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/mod v0.3.0 h1:RM4zey1++hCTbCVQfnWeKs9/IEsaBLA8vTkd0WVtmH4=
-golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
-golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
-golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
-golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
diff --git a/go.mod b/go.mod
index d9d14b201..a1c42dda7 100644
--- a/go.mod
+++ b/go.mod
@@ -37,8 +37,10 @@ require (
 	github.com/felixge/fgprof v0.9.3 // indirect
 	github.com/golang/snappy v1.0.0 // indirect
 	github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect
+	github.com/johnkerl/gocc v0.0.0-20260110202143-38efff71563b // indirect
 	github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
+	golang.org/x/mod v0.30.0 // indirect
 	golang.org/x/tools v0.39.0 // indirect
 	gonum.org/v1/gonum v0.16.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
diff --git a/go.sum b/go.sum
index 467f71fab..979fd84eb 100644
--- a/go.sum
+++ b/go.sum
@@ -13,6 +13,8 @@ github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW
 github.com/google/pprof v0.0.0-20211214055906-6f57359322fd h1:1FjCyPC+syAzJ5/2S8fqdZK1R22vvA0J7JZKcuOIQ7Y=
 github.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg=
 github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w=
+github.com/johnkerl/gocc v0.0.0-20260110202143-38efff71563b h1:YjgYl7wZucuGLa3y7QLGpyZeWwzFQtx8xy6EDa6a+6M=
+github.com/johnkerl/gocc v0.0.0-20260110202143-38efff71563b/go.mod h1:PdhK7K05ZpbKpVgSHtJbJJRQkEIG/aElMtVjAFAIn6U=
 github.com/johnkerl/lumin v1.0.0 h1:CV34cHZOJ92Y02RbQ0rd4gA0C06Qck9q8blOyaPoWpU=
 github.com/johnkerl/lumin v1.0.0/go.mod h1:eLf5AdQOaLvzZ2zVy4REr/DSeEwG+CZreHwNLICqv9E=
 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
@@ -41,6 +43,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
 github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
 github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk=
+golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc=
 golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
diff --git a/pkg/parsing/mlr.bnf b/pkg/parsing/mlr.bnf
index 5903cf419..bd9602f81 100644
--- a/pkg/parsing/mlr.bnf
+++ b/pkg/parsing/mlr.bnf
@@ -7,7 +7,7 @@
 // GRAMMAR FOR THE MILLER DOMAIN-SPECIFIC LANGUAGE
 //
 // This is the Miller DSL's BNF grammar, using the awesome GOCC tool framework
-// from https://github.com/goccmack/gocc.
+// from https://github.com/goccmack/gocc (forked at https://github.com/johnkerl/gocc).
 //
 // The first section is lexical elements and the second section is syntactical
 // elements. These are the analogs of lex and yacc, respectively, using a
diff --git a/tools/build-dsl b/tools/build-dsl
index e2a6186d2..4cf70cbf5 100755
--- a/tools/build-dsl
+++ b/tools/build-dsl
@@ -27,8 +27,8 @@ if [ $# -eq 1 ]; then
   fi
 fi
 
-# Build the bin/gocc executable:
-go install github.com/goccmack/gocc
+# Build the bin/gocc executable (use my fork for performance):
+go install github.com/johnkerl/gocc
 go mod tidy
 bingocc="$HOME/go/bin/gocc"
 if [ ! -x "$bingocc" ]; then

From 49869ba8e4f5efec3d75d1be65b1630622349041 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 12 Jan 2026 10:49:50 -0500
Subject: [PATCH 451/456] Bump golang.org/x/text from 0.32.0 to 0.33.0 (#1937)

Bumps [golang.org/x/text](https://github.com/golang/text) from 0.32.0 to 0.33.0.
- [Release notes](https://github.com/golang/text/releases)
- [Commits](https://github.com/golang/text/compare/v0.32.0...v0.33.0)

---
updated-dependencies:
- dependency-name: golang.org/x/text
  dependency-version: 0.33.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod |  6 ++----
 go.sum | 12 ++++--------
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/go.mod b/go.mod
index a1c42dda7..5e3a39085 100644
--- a/go.mod
+++ b/go.mod
@@ -29,7 +29,7 @@ require (
 	github.com/stretchr/testify v1.11.1
 	golang.org/x/sys v0.40.0
 	golang.org/x/term v0.38.0
-	golang.org/x/text v0.32.0
+	golang.org/x/text v0.33.0
 )
 
 require (
@@ -37,11 +37,9 @@ require (
 	github.com/felixge/fgprof v0.9.3 // indirect
 	github.com/golang/snappy v1.0.0 // indirect
 	github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect
-	github.com/johnkerl/gocc v0.0.0-20260110202143-38efff71563b // indirect
 	github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
-	golang.org/x/mod v0.30.0 // indirect
-	golang.org/x/tools v0.39.0 // indirect
+	golang.org/x/tools v0.40.0 // indirect
 	gonum.org/v1/gonum v0.16.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
diff --git a/go.sum b/go.sum
index 979fd84eb..e7639ddee 100644
--- a/go.sum
+++ b/go.sum
@@ -13,8 +13,6 @@ github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW
 github.com/google/pprof v0.0.0-20211214055906-6f57359322fd h1:1FjCyPC+syAzJ5/2S8fqdZK1R22vvA0J7JZKcuOIQ7Y=
 github.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg=
 github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w=
-github.com/johnkerl/gocc v0.0.0-20260110202143-38efff71563b h1:YjgYl7wZucuGLa3y7QLGpyZeWwzFQtx8xy6EDa6a+6M=
-github.com/johnkerl/gocc v0.0.0-20260110202143-38efff71563b/go.mod h1:PdhK7K05ZpbKpVgSHtJbJJRQkEIG/aElMtVjAFAIn6U=
 github.com/johnkerl/lumin v1.0.0 h1:CV34cHZOJ92Y02RbQ0rd4gA0C06Qck9q8blOyaPoWpU=
 github.com/johnkerl/lumin v1.0.0/go.mod h1:eLf5AdQOaLvzZ2zVy4REr/DSeEwG+CZreHwNLICqv9E=
 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
@@ -43,18 +41,16 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
 github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
 github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
-golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk=
-golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc=
 golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
 golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q=
 golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg=
-golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU=
-golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY=
-golang.org/x/tools v0.39.0 h1:ik4ho21kwuQln40uelmciQPp9SipgNDdrafrYA4TmQQ=
-golang.org/x/tools v0.39.0/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ=
+golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE=
+golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8=
+golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA=
+golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc=
 gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
 gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=

From 888d27acdb8d09965465e82fe80e1ba3ffcd97f7 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 12 Jan 2026 10:55:11 -0500
Subject: [PATCH 452/456] Bump golang.org/x/term from 0.38.0 to 0.39.0 (#1936)

Bumps [golang.org/x/term](https://github.com/golang/term) from 0.38.0 to 0.39.0.
- [Commits](https://github.com/golang/term/compare/v0.38.0...v0.39.0)

---
updated-dependencies:
- dependency-name: golang.org/x/term
  dependency-version: 0.39.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 5e3a39085..53e5f69ac 100644
--- a/go.mod
+++ b/go.mod
@@ -28,7 +28,7 @@ require (
 	github.com/pkg/profile v1.7.0
 	github.com/stretchr/testify v1.11.1
 	golang.org/x/sys v0.40.0
-	golang.org/x/term v0.38.0
+	golang.org/x/term v0.39.0
 	golang.org/x/text v0.33.0
 )
 
diff --git a/go.sum b/go.sum
index e7639ddee..61405a42c 100644
--- a/go.sum
+++ b/go.sum
@@ -45,8 +45,8 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
 golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
-golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q=
-golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg=
+golang.org/x/term v0.39.0 h1:RclSuaJf32jOqZz74CkPA9qFuVTX7vhLlpfj/IGWlqY=
+golang.org/x/term v0.39.0/go.mod h1:yxzUCTP/U+FzoxfdKmLaA0RV1WgE0VY7hXBwKtY/4ww=
 golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE=
 golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8=
 golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA=

From 8ec8de61e3b72bab452d687845dde2672d5690e3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 13 Jan 2026 10:33:32 -0500
Subject: [PATCH 453/456] Bump github/codeql-action from 4.31.9 to 4.31.10
 (#1939)

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.31.9 to 4.31.10.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/5d4e8d1aca955e8d8589aabd499c5cae939e33c7...cdefb33c0f6224e58673d9004f47f7cb3e328b89)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-version: 4.31.10
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index e04d9caf9..ca5ad7259 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -40,7 +40,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@5d4e8d1aca955e8d8589aabd499c5cae939e33c7
+      uses: github/codeql-action/init@cdefb33c0f6224e58673d9004f47f7cb3e328b89
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -51,7 +51,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@5d4e8d1aca955e8d8589aabd499c5cae939e33c7
+      uses: github/codeql-action/autobuild@cdefb33c0f6224e58673d9004f47f7cb3e328b89
 
     # โ„น๏ธ Command-line programs to run using the OS shell.
     # ๐Ÿ“š https://git.io/JvXDl
@@ -65,4 +65,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@5d4e8d1aca955e8d8589aabd499c5cae939e33c7
+      uses: github/codeql-action/analyze@cdefb33c0f6224e58673d9004f47f7cb3e328b89

From b13037c84f97a3c2f333c857a67d67a643db7cbd Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 13 Jan 2026 10:38:30 -0500
Subject: [PATCH 454/456] Bump actions/setup-go from 6.1.0 to 6.2.0 (#1938)

Bumps [actions/setup-go](https://github.com/actions/setup-go) from 6.1.0 to 6.2.0.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](https://github.com/actions/setup-go/compare/4dc6199c7b1a012772edbd06daecab0f50c9053c...7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5)

---
updated-dependencies:
- dependency-name: actions/setup-go
  dependency-version: 6.2.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/go.yml      | 2 +-
 .github/workflows/release.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index a41d4b19a..98d170d1d 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -18,7 +18,7 @@ jobs:
     - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
 
     - name: Set up Go
-      uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c
+      uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5
       with:
         go-version: 1.24
 
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 1903ccecc..bc657fda1 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -17,7 +17,7 @@ jobs:
     runs-on: ${{ matrix.platform }}
     steps:
       - name: Set up Go
-        uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c
+        uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5
         with:
           go-version: ${{ env.GO_VERSION }}
         id: go

From 09083a0d25ba49b3918adb81a274a7d4c3d86df0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 16 Jan 2026 09:52:25 -0500
Subject: [PATCH 455/456] Bump github.com/klauspost/compress from 1.18.2 to
 1.18.3 (#1940)

Bumps [github.com/klauspost/compress](https://github.com/klauspost/compress) from 1.18.2 to 1.18.3.
- [Release notes](https://github.com/klauspost/compress/releases)
- [Commits](https://github.com/klauspost/compress/compare/v1.18.2...v1.18.3)

---
updated-dependencies:
- dependency-name: github.com/klauspost/compress
  dependency-version: 1.18.3
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 53e5f69ac..10b971673 100644
--- a/go.mod
+++ b/go.mod
@@ -20,7 +20,7 @@ require (
 	github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb
 	github.com/johnkerl/lumin v1.0.0
 	github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
-	github.com/klauspost/compress v1.18.2
+	github.com/klauspost/compress v1.18.3
 	github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1
 	github.com/lestrrat-go/strftime v1.1.1
 	github.com/mattn/go-isatty v0.0.20
diff --git a/go.sum b/go.sum
index 61405a42c..0a7bba556 100644
--- a/go.sum
+++ b/go.sum
@@ -17,8 +17,8 @@ github.com/johnkerl/lumin v1.0.0 h1:CV34cHZOJ92Y02RbQ0rd4gA0C06Qck9q8blOyaPoWpU=
 github.com/johnkerl/lumin v1.0.0/go.mod h1:eLf5AdQOaLvzZ2zVy4REr/DSeEwG+CZreHwNLICqv9E=
 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8=
-github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk=
-github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
+github.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+/6zw=
+github.com/klauspost/compress v1.18.3/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
 github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb h1:Z5BVHFk/DLOIUAd2NycF0mLtKfhl7ynm4Uy5+AFhT48=
 github.com/kshedden/dstream v0.0.0-20190512025041-c4c410631beb/go.mod h1:+U+6yzfITr4/teU2YhxWhdyw6YzednT/16/UBMjlDrU=
 github.com/kshedden/statmodel v0.0.0-20210519035403-ee97d3e48df1 h1:UyIQ1VTQq/0CS/wLYjf3DV6uRKTd1xcsng3BccM4XCY=

From f98a35bb057e97576c490a52f5259ffdf67852e9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Jan 2026 09:20:41 -0500
Subject: [PATCH 456/456] Bump actions/cache from 5.0.1 to 5.0.2 (#1941)

Bumps [actions/cache](https://github.com/actions/cache) from 5.0.1 to 5.0.2.
- [Release notes](https://github.com/actions/cache/releases)
- [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md)
- [Commits](https://github.com/actions/cache/compare/9255dc7a253b0ccc959486e2bca901246202afeb...8b402f58fbc84540c8b491a91e594a4576fec3d7)

---
updated-dependencies:
- dependency-name: actions/cache
  dependency-version: 5.0.2
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index bc657fda1..fa2b59ec5 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -29,7 +29,7 @@ jobs:
 
       # https://github.com/marketplace/actions/cache
       - name: Cache Go modules
-        uses: actions/cache@9255dc7a253b0ccc959486e2bca901246202afeb
+        uses: actions/cache@8b402f58fbc84540c8b491a91e594a4576fec3d7
         with:
           path: |
             ~/.cache/go-build