From dffaee0328d052ef1c366d9343544778ff445581 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 23 Aug 2021 22:36:34 -0400 Subject: [PATCH] More docs6 proofreads --- docs6/README.md | 8 +- docs6/docs/10-1.sh | 2 - docs6/docs/10-2.sh | 4 - docs6/docs/contributing.md | 17 +- docs6/docs/contributing.md.in | 17 +- docs6/docs/csv-with-and-without-headers.md.in | 20 +- docs6/docs/data/begin-end-example-1.sh | 5 - docs6/docs/data/begin-end-example-2.sh | 4 - docs6/docs/data/begin-end-example-3.sh | 4 - docs6/docs/data/begin-end-example-4.sh | 8 - docs6/docs/data/begin-end-example-5.sh | 1 - docs6/docs/data/begin-end-example-6.sh | 8 - docs6/docs/data/begin-end-example-6a.sh | 7 - docs6/docs/data/begin-end-example-7.sh | 1 - docs6/docs/data/begin-end-example-8.sh | 14 -- docs6/docs/data/emit-lashed.sh | 10 - docs6/docs/data/factorial-example.sh | 14 -- docs6/docs/data/fe-example-1.sh | 1 - docs6/docs/data/fe-example-2.sh | 1 - docs6/docs/data/for-oosvar-example-0b.sh | 10 - docs6/docs/data/for-oosvar-example-0c.sh | 16 -- docs6/docs/data/for-oosvar-example-0d.sh | 17 -- docs6/docs/data/for-oosvar-example-0e.sh | 17 -- docs6/docs/data/for-srec-example-1.sh | 11 - docs6/docs/data/for-srec-example-2.sh | 10 - docs6/docs/data/for-srec-example-3.sh | 9 - docs6/docs/data/full-reorg.sh | 15 -- docs6/docs/data/iqr1.sh | 3 - docs6/docs/data/iqrn.sh | 7 - docs6/docs/data/keyed-mean-with-oosvars.sh | 10 - docs6/docs/data/keyed-min-max-with-oosvars.sh | 7 - docs6/docs/data/local-example-1.sh | 16 -- docs6/docs/data/map-literal-example-1.sh | 7 - docs6/docs/data/map-literal-example-2.sh | 7 - docs6/docs/data/map-literal-example-3.sh | 19 -- docs6/docs/data/mean-with-oosvars.sh | 8 - docs6/docs/data/miss-date-1.sh | 5 - docs6/docs/data/miss-date-2.sh | 5 - docs6/docs/data/previous-to-current.sh | 5 - docs6/docs/data/put-multiline-example.txt | 7 - docs6/docs/data/ragged-csv-2.sh | 6 - docs6/docs/data/ragged-csv.sh | 8 - docs6/docs/data/rect.sh | 10 - docs6/docs/data/single-for-example-1.sh | 8 - docs6/docs/data/single-for-example-2.sh | 8 - docs6/docs/data/subr-example.sh | 17 -- docs6/docs/data/trailing-commas.sh | 17 -- docs6/docs/data/triple-for-example-1.sh | 7 - docs6/docs/data/triple-for-example-2.sh | 10 - docs6/docs/data/weighted-mean.sh | 24 -- docs6/docs/data/while-example-1.sh | 6 - docs6/docs/data/while-example-2.sh | 9 - ...tes-and-times.md => date-time-examples.md} | 2 +- ...d-times.md.in => date-time-examples.md.in} | 18 +- docs6/docs/dsl-example-multiline.sh | 4 - docs6/docs/gz-example.csv.gz | Bin 0 -> 258 bytes docs6/docs/log-processing-examples.md.in | 12 +- docs6/docs/manpage.md | 2 +- docs6/docs/manpage.md.in | 2 +- docs6/docs/misc-examples.md.in | 8 +- docs6/docs/new-in-miller-6.md | 4 +- docs6/docs/new-in-miller-6.md.in | 4 +- ...nce-main-online-help.md => online-help.md} | 2 +- ...in-online-help.md.in => online-help.md.in} | 2 +- docs6/docs/oosvar-example-ewma.sh | 5 - docs6/docs/oosvar-example-sum-grouped.sh | 6 - docs6/docs/oosvar-example-sum.sh | 6 - docs6/docs/operating-on-all-fields.md.in | 18 +- docs6/docs/output-colorization.md | 2 +- docs6/docs/output-colorization.md.in | 2 +- docs6/docs/programming-language.md.in | 7 +- docs6/docs/proofreads.txt | 35 ++- docs6/docs/purple.csv.gz | Bin 0 -> 132 bytes .../{joins.md => questions-about-joins.md} | 2 +- ...oins.md.in => questions-about-joins.md.in} | 2 +- ...ng.md => questions-about-then-chaining.md} | 2 +- ...in => questions-about-then-chaining.md.in} | 2 +- docs6/docs/red.csv.gz | Bin 0 -> 143 bytes .../docs/reference-dsl-control-structures.md | 107 ++++---- .../reference-dsl-control-structures.md.in | 237 ++++++++++++++++-- docs6/docs/reference-dsl-differences.md | 157 ++++++++++++ docs6/docs/reference-dsl-differences.md.in | 131 ++++++++++ docs6/docs/reference-dsl-maps.md | 4 + docs6/docs/reference-dsl-maps.md.in | 3 + .../reference-dsl-output-statements.md.in | 13 +- docs6/docs/reference-dsl-syntax.md.in | 38 ++- ...reference-dsl-user-defined-functions.md.in | 37 ++- docs6/docs/reference-dsl-variables.md.in | 103 +++++++- docs6/docs/reference-dsl.md | 4 +- docs6/docs/reference-dsl.md.in | 4 +- docs6/docs/reference-main-compressed-data.md | 133 ++++++++++ .../docs/reference-main-compressed-data.md.in | 96 +++++++ docs6/docs/reference-main-io-options.md | 32 +-- docs6/docs/reference-main-io-options.md.in | 32 +-- docs6/docs/reference-main-overview.md | 17 +- docs6/docs/reference-main-overview.md.in | 12 +- docs6/docs/repl.md | 2 +- docs6/docs/repl.md.in | 2 +- docs6/docs/shapes-of-data.md.in | 13 +- docs6/docs/statistics-examples.md.in | 43 +++- docs6/docs/two-pass-algorithms.md.in | 64 ++++- docs6/docs/verb-example-ewma.sh | 1 - docs6/docs/yellow.csv.gz | Bin 0 -> 127 bytes docs6/mkdocs.yml | 21 +- go/src/auxents/help/entry.go | 2 +- go/src/auxents/repl/prompt.go | 2 +- go/src/auxents/repl/session.go | 3 + go/src/auxents/repl/verbs.go | 4 +- go/src/cli/mlrcli.go | 2 +- go/src/lib/paragraph.go | 2 +- go/todo.txt | 8 +- man6/manpage.txt | 11 +- man6/mlr6.1 | 13 +- 113 files changed, 1283 insertions(+), 706 deletions(-) delete mode 100755 docs6/docs/10-1.sh delete mode 100755 docs6/docs/10-2.sh delete mode 100644 docs6/docs/data/begin-end-example-1.sh delete mode 100644 docs6/docs/data/begin-end-example-2.sh delete mode 100644 docs6/docs/data/begin-end-example-3.sh delete mode 100644 docs6/docs/data/begin-end-example-4.sh delete mode 100644 docs6/docs/data/begin-end-example-5.sh delete mode 100644 docs6/docs/data/begin-end-example-6.sh delete mode 100644 docs6/docs/data/begin-end-example-6a.sh delete mode 100644 docs6/docs/data/begin-end-example-7.sh delete mode 100644 docs6/docs/data/begin-end-example-8.sh delete mode 100644 docs6/docs/data/emit-lashed.sh delete mode 100644 docs6/docs/data/factorial-example.sh delete mode 100644 docs6/docs/data/fe-example-1.sh delete mode 100644 docs6/docs/data/fe-example-2.sh delete mode 100644 docs6/docs/data/for-oosvar-example-0b.sh delete mode 100644 docs6/docs/data/for-oosvar-example-0c.sh delete mode 100644 docs6/docs/data/for-oosvar-example-0d.sh delete mode 100644 docs6/docs/data/for-oosvar-example-0e.sh delete mode 100644 docs6/docs/data/for-srec-example-1.sh delete mode 100644 docs6/docs/data/for-srec-example-2.sh delete mode 100644 docs6/docs/data/for-srec-example-3.sh delete mode 100644 docs6/docs/data/full-reorg.sh delete mode 100644 docs6/docs/data/iqr1.sh delete mode 100644 docs6/docs/data/iqrn.sh delete mode 100644 docs6/docs/data/keyed-mean-with-oosvars.sh delete mode 100644 docs6/docs/data/keyed-min-max-with-oosvars.sh delete mode 100755 docs6/docs/data/local-example-1.sh delete mode 100755 docs6/docs/data/map-literal-example-1.sh delete mode 100755 docs6/docs/data/map-literal-example-2.sh delete mode 100755 docs6/docs/data/map-literal-example-3.sh delete mode 100644 docs6/docs/data/mean-with-oosvars.sh delete mode 100644 docs6/docs/data/miss-date-1.sh delete mode 100644 docs6/docs/data/miss-date-2.sh delete mode 100644 docs6/docs/data/previous-to-current.sh delete mode 100644 docs6/docs/data/put-multiline-example.txt delete mode 100644 docs6/docs/data/ragged-csv-2.sh delete mode 100755 docs6/docs/data/ragged-csv.sh delete mode 100644 docs6/docs/data/rect.sh delete mode 100755 docs6/docs/data/single-for-example-1.sh delete mode 100755 docs6/docs/data/single-for-example-2.sh delete mode 100644 docs6/docs/data/subr-example.sh delete mode 100755 docs6/docs/data/trailing-commas.sh delete mode 100644 docs6/docs/data/triple-for-example-1.sh delete mode 100644 docs6/docs/data/triple-for-example-2.sh delete mode 100644 docs6/docs/data/weighted-mean.sh delete mode 100644 docs6/docs/data/while-example-1.sh delete mode 100644 docs6/docs/data/while-example-2.sh rename docs6/docs/{dates-and-times.md => date-time-examples.md} (99%) rename docs6/docs/{dates-and-times.md.in => date-time-examples.md.in} (79%) delete mode 100644 docs6/docs/dsl-example-multiline.sh create mode 100644 docs6/docs/gz-example.csv.gz rename docs6/docs/{reference-main-online-help.md => online-help.md} (98%) rename docs6/docs/{reference-main-online-help.md.in => online-help.md.in} (95%) delete mode 100644 docs6/docs/oosvar-example-ewma.sh delete mode 100644 docs6/docs/oosvar-example-sum-grouped.sh delete mode 100644 docs6/docs/oosvar-example-sum.sh create mode 100644 docs6/docs/purple.csv.gz rename docs6/docs/{joins.md => questions-about-joins.md} (99%) rename docs6/docs/{joins.md.in => questions-about-joins.md.in} (99%) rename docs6/docs/{then-chaining.md => questions-about-then-chaining.md} (99%) rename docs6/docs/{then-chaining.md.in => questions-about-then-chaining.md.in} (98%) create mode 100644 docs6/docs/red.csv.gz create mode 100644 docs6/docs/reference-dsl-differences.md create mode 100644 docs6/docs/reference-dsl-differences.md.in create mode 100644 docs6/docs/reference-dsl-maps.md create mode 100644 docs6/docs/reference-dsl-maps.md.in create mode 100644 docs6/docs/reference-main-compressed-data.md create mode 100644 docs6/docs/reference-main-compressed-data.md.in delete mode 100644 docs6/docs/verb-example-ewma.sh create mode 100644 docs6/docs/yellow.csv.gz diff --git a/docs6/README.md b/docs6/README.md index 2d68b28a8..a452fd0ac 100644 --- a/docs6/README.md +++ b/docs6/README.md @@ -11,15 +11,15 @@ * You need `pip install mkdocs` (or `pip3 install mkdocs`). * The docs include lots of live code examples which will be invoked using `mlr` which must be somewhere in your `$PATH`. -* Clone https://github.com/johnkerl/miller and cd into `docs/` within your clone. +* Clone https://github.com/johnkerl/miller and cd into `docs6/` within your clone. * Quick-editing loop: * In one terminal, cd to this directory and leave `mkdocs serve` running. - * In another terminal, cd to the `docs` subdirectory and edit `*.md.in`. + * In another terminal, cd to the `docs` subdirectory of `docs6` and edit `*.md.in`. * Run `genmds` to re-create all the `*.md` files, or `genmds foo.md.in` to just re-create the `foo.md.in` file you just edited. * In your browser, visit http://127.0.0.1:8000 * Alternate editing loop: * Leave one terminal open as a place you will run `mkdocs build` - * In one terminal, cd to the `docs` subdirectory and edit `*.md.in`. + * In one terminal, cd to the `docs` subdirectory of `docs6` and edit `*.md.in`. * Generate `docs/*.md` from `docs/*.md.in`, and then from that generate the `site/*/*.html`: * Run `genmds` to re-create all the `*.md` files, or `genmds foo.md.in` to just re-create the `foo.md.in` file you just edited. * In the first terminal, run `mkdocs build` which will populate the `site` directory. @@ -34,7 +34,7 @@ ## Notes * CSS: - * I used the Mkdocs Readthedocs theme which I like a lot. I customized `docs/extra.css` for Miller coloring/branding. + * I used the Mkdocs Readthedocs theme which I like a lot. I customized `docs6/docs/extra.css` for Miller coloring/branding. * Live code: * I didn't find a way to include non-Python live-code examples within Mkdocs so I adapted the pre-Mkdocs Miller-doc strategy which is to have a generator script read a template file (here, `foo.md.in`), run the marked lines, and generate the output file (`foo.md`). This is `genmds`. * Edit the `*.md.in` files, not `*.md` directly. diff --git a/docs6/docs/10-1.sh b/docs6/docs/10-1.sh deleted file mode 100755 index 749a161c7..000000000 --- a/docs6/docs/10-1.sh +++ /dev/null @@ -1,2 +0,0 @@ -grep op=cache log.txt \ - | mlr --idkvp --opprint stats1 -a mean -f hit -g type then sort -f type diff --git a/docs6/docs/10-2.sh b/docs6/docs/10-2.sh deleted file mode 100755 index b7f6cd779..000000000 --- a/docs6/docs/10-2.sh +++ /dev/null @@ -1,4 +0,0 @@ -mlr --from log.txt --opprint \ - filter 'is_present($batch_size)' \ - then step -a delta -f time,num_filtered \ - then sec2gmt time diff --git a/docs6/docs/contributing.md b/docs6/docs/contributing.md index f01219c15..9b8b6a93c 100644 --- a/docs6/docs/contributing.md +++ b/docs6/docs/contributing.md @@ -9,27 +9,28 @@ You can ask questions -- or answer them! -- following the links on the [Communit Pre-release Miller documentation is at [https://github.com/johnkerl/miller/tree/main/docs6](https://github.com/johnkerl/miller/tree/main/docs6). -Clone [https://github.com/johnkerl/miller](https://github.com/johnkerl/miller) and `cd` into `docs6`. +Instructions for modifying, viewing, and submitting PRs for these are in the [docs6/README.md](https://github.com/johnkerl/miller/blob/main/docs6/README.md). -After `sudo pip install sphinx` (or `pip3`) you should be able to do `make html`. +While Miller 6 is in pre-release, these docs are not viewable at +[https://miller.readthedocs.io](https://miller.readthedocs.io) which shows Miller 5 docs. +For now, I'll push Miller-6 docs to my ISP space at +[https://johnkerl.org/miller6](https://johnkerl.org/miller6) after your PR is merged. -Edit `*.md.in` files, then `make html` to generate `*.md`, then run the Sphinx document-generator. - -Open `_build/html/index.html` in your browser, e.g. `file:////Users/yourname/git/miller/docs6/_build/html/contributing.html`, to verify. - -PRs are welcome at [https://github.com/johnkerl/miller](https://github.com/johnkerl/miller). + ## Testing As of Miller-6's current pre-release status, the best way to test is to either build from source via [Building from source](build.md), or by getting a recent binary at [https://github.com/johnkerl/miller/actions](https://github.com/johnkerl/miller/actions), then click latest build, then *Artifacts*. Then simply use Miller for whatever you do, and create an issue at [https://github.com/johnkerl/miller/issues](https://github.com/johnkerl/miller/issues). -Do note that as of 2021-06-17 a few things have not been ported to Miller 6 -- most notably, including localtime DSL functions and other issues. +Do note that as of mid-2021 a few things have not been ported to Miller 6 -- most notably, including localtime DSL functions and other issues. ## Feature development diff --git a/docs6/docs/contributing.md.in b/docs6/docs/contributing.md.in index 0d1213103..68a204ba8 100644 --- a/docs6/docs/contributing.md.in +++ b/docs6/docs/contributing.md.in @@ -8,27 +8,28 @@ You can ask questions -- or answer them! -- following the links on the [Communit Pre-release Miller documentation is at [https://github.com/johnkerl/miller/tree/main/docs6](https://github.com/johnkerl/miller/tree/main/docs6). -Clone [https://github.com/johnkerl/miller](https://github.com/johnkerl/miller) and `cd` into `docs6`. +Instructions for modifying, viewing, and submitting PRs for these are in the [docs6/README.md](https://github.com/johnkerl/miller/blob/main/docs6/README.md). -After `sudo pip install sphinx` (or `pip3`) you should be able to do `make html`. +While Miller 6 is in pre-release, these docs are not viewable at +[https://miller.readthedocs.io](https://miller.readthedocs.io) which shows Miller 5 docs. +For now, I'll push Miller-6 docs to my ISP space at +[https://johnkerl.org/miller6](https://johnkerl.org/miller6) after your PR is merged. -Edit `*.md.in` files, then `make html` to generate `*.md`, then run the Sphinx document-generator. - -Open `_build/html/index.html` in your browser, e.g. `file:////Users/yourname/git/miller/docs6/_build/html/contributing.html`, to verify. - -PRs are welcome at [https://github.com/johnkerl/miller](https://github.com/johnkerl/miller). + ## Testing As of Miller-6's current pre-release status, the best way to test is to either build from source via [Building from source](build.md), or by getting a recent binary at [https://github.com/johnkerl/miller/actions](https://github.com/johnkerl/miller/actions), then click latest build, then *Artifacts*. Then simply use Miller for whatever you do, and create an issue at [https://github.com/johnkerl/miller/issues](https://github.com/johnkerl/miller/issues). -Do note that as of 2021-06-17 a few things have not been ported to Miller 6 -- most notably, including localtime DSL functions and other issues. +Do note that as of mid-2021 a few things have not been ported to Miller 6 -- most notably, including localtime DSL functions and other issues. ## Feature development diff --git a/docs6/docs/csv-with-and-without-headers.md.in b/docs6/docs/csv-with-and-without-headers.md.in index b62fad564..2ce41b08e 100644 --- a/docs6/docs/csv-with-and-without-headers.md.in +++ b/docs6/docs/csv-with-and-without-headers.md.in @@ -61,8 +61,24 @@ GENMD_RUN_COMMAND cat data/ragged.csv GENMD_EOF -GENMD_INCLUDE_AND_RUN_ESCAPED(data/ragged-csv.sh) +GENMD_RUN_COMMAND +mlr --from data/ragged.csv --fs comma --nidx put ' + @maxnf = max(@maxnf, NF); + @nf = NF; + while(@nf < @maxnf) { + @nf += 1; + $[@nf] = "" + } +' +GENMD_EOF or, more simply, -GENMD_INCLUDE_AND_RUN_ESCAPED(data/ragged-csv-2.sh) +GENMD_RUN_COMMAND +mlr --from data/ragged.csv --fs comma --nidx put ' + @maxnf = max(@maxnf, NF); + while(NF < @maxnf) { + $[NF+1] = ""; + } +' +GENMD_EOF diff --git a/docs6/docs/data/begin-end-example-1.sh b/docs6/docs/data/begin-end-example-1.sh deleted file mode 100644 index 781a65b1b..000000000 --- a/docs6/docs/data/begin-end-example-1.sh +++ /dev/null @@ -1,5 +0,0 @@ -mlr put ' - begin { @sum = 0 }; - @x_sum += $x; - end { emit @x_sum } -' ./data/small diff --git a/docs6/docs/data/begin-end-example-2.sh b/docs6/docs/data/begin-end-example-2.sh deleted file mode 100644 index 8e291eb94..000000000 --- a/docs6/docs/data/begin-end-example-2.sh +++ /dev/null @@ -1,4 +0,0 @@ -mlr put ' - @x_sum += $x; - end { emit @x_sum } -' ./data/small diff --git a/docs6/docs/data/begin-end-example-3.sh b/docs6/docs/data/begin-end-example-3.sh deleted file mode 100644 index 60441db2b..000000000 --- a/docs6/docs/data/begin-end-example-3.sh +++ /dev/null @@ -1,4 +0,0 @@ -mlr put -q ' - @x_sum += $x; - end { emit @x_sum } -' ./data/small diff --git a/docs6/docs/data/begin-end-example-4.sh b/docs6/docs/data/begin-end-example-4.sh deleted file mode 100644 index a1564006a..000000000 --- a/docs6/docs/data/begin-end-example-4.sh +++ /dev/null @@ -1,8 +0,0 @@ -mlr put -q ' - @x_count += 1; - @x_sum += $x; - end { - emit @x_count; - emit @x_sum; - } -' ./data/small diff --git a/docs6/docs/data/begin-end-example-5.sh b/docs6/docs/data/begin-end-example-5.sh deleted file mode 100644 index 9f2c966b0..000000000 --- a/docs6/docs/data/begin-end-example-5.sh +++ /dev/null @@ -1 +0,0 @@ -mlr stats1 -a count,sum -f x ./data/small diff --git a/docs6/docs/data/begin-end-example-6.sh b/docs6/docs/data/begin-end-example-6.sh deleted file mode 100644 index 382e3302f..000000000 --- a/docs6/docs/data/begin-end-example-6.sh +++ /dev/null @@ -1,8 +0,0 @@ -mlr put -q ' - @x_count[$a] += 1; - @x_sum[$a] += $x; - end { - emit @x_count, "a"; - emit @x_sum, "a"; - } -' ./data/small diff --git a/docs6/docs/data/begin-end-example-6a.sh b/docs6/docs/data/begin-end-example-6a.sh deleted file mode 100644 index 6b9e46bea..000000000 --- a/docs6/docs/data/begin-end-example-6a.sh +++ /dev/null @@ -1,7 +0,0 @@ -mlr --from data/medium put -q ' - @x_count[$a][$b] += 1; - @x_sum[$a][$b] += $x; - end { - emit (@x_count, @x_sum), "a", "b"; - } -' diff --git a/docs6/docs/data/begin-end-example-7.sh b/docs6/docs/data/begin-end-example-7.sh deleted file mode 100644 index da048ac5d..000000000 --- a/docs6/docs/data/begin-end-example-7.sh +++ /dev/null @@ -1 +0,0 @@ -mlr stats1 -a count,sum -f x -g a ./data/small diff --git a/docs6/docs/data/begin-end-example-8.sh b/docs6/docs/data/begin-end-example-8.sh deleted file mode 100644 index a5b6fd3a2..000000000 --- a/docs6/docs/data/begin-end-example-8.sh +++ /dev/null @@ -1,14 +0,0 @@ -mlr put ' - begin { - @num_total = 0; - @num_positive = 0; - }; - @num_total += 1; - $x > 0.0 { - @num_positive += 1; - $y = log10($x); $z = sqrt($y) - }; - end { - emitf @num_total, @num_positive - } -' data/put-gating-example-1.dkvp diff --git a/docs6/docs/data/emit-lashed.sh b/docs6/docs/data/emit-lashed.sh deleted file mode 100644 index 29dd4787c..000000000 --- a/docs6/docs/data/emit-lashed.sh +++ /dev/null @@ -1,10 +0,0 @@ -mlr --from data/medium --opprint put -q ' - @x_count[$a][$b] += 1; - @x_sum[$a][$b] += $x; - end { - for ((a, b), _ in @x_count) { - @x_mean[a][b] = @x_sum[a][b] / @x_count[a][b] - } - emit (@x_sum, @x_count, @x_mean), "a", "b" - } -' diff --git a/docs6/docs/data/factorial-example.sh b/docs6/docs/data/factorial-example.sh deleted file mode 100644 index c3984d568..000000000 --- a/docs6/docs/data/factorial-example.sh +++ /dev/null @@ -1,14 +0,0 @@ -mlr --opprint --from data/small put ' - func f(n) { - if (is_numeric(n)) { - if (n > 0) { - return n * f(n-1); - } else { - return 1; - } - } - # implicitly return absent-null if non-numeric - } - $ox = f($x + NR); - $oi = f($i); -' diff --git a/docs6/docs/data/fe-example-1.sh b/docs6/docs/data/fe-example-1.sh deleted file mode 100644 index 9575e4cf9..000000000 --- a/docs6/docs/data/fe-example-1.sh +++ /dev/null @@ -1 +0,0 @@ -mlr --from data/small put '$xy = sqrt($x**2 + $y**2)' diff --git a/docs6/docs/data/fe-example-2.sh b/docs6/docs/data/fe-example-2.sh deleted file mode 100644 index 048c3672e..000000000 --- a/docs6/docs/data/fe-example-2.sh +++ /dev/null @@ -1 +0,0 @@ -mlr --from data/small put 'func f(a, b) { return sqrt(a**2 + b**2) } $xy = f($x, $y)' diff --git a/docs6/docs/data/for-oosvar-example-0b.sh b/docs6/docs/data/for-oosvar-example-0b.sh deleted file mode 100644 index 538a8f58d..000000000 --- a/docs6/docs/data/for-oosvar-example-0b.sh +++ /dev/null @@ -1,10 +0,0 @@ -mlr -n put --jknquoteint -q ' - begin { - @myvar = { - 1: 2, - 3: { 4 : 5 }, - 6: { 7: { 8: 9 } } - } - } - end { dump } -' diff --git a/docs6/docs/data/for-oosvar-example-0c.sh b/docs6/docs/data/for-oosvar-example-0c.sh deleted file mode 100644 index d70455ccc..000000000 --- a/docs6/docs/data/for-oosvar-example-0c.sh +++ /dev/null @@ -1,16 +0,0 @@ -mlr -n put --jknquoteint -q ' - begin { - @myvar = { - 1: 2, - 3: { 4 : 5 }, - 6: { 7: { 8: 9 } } - } - } - end { - for (k, v in @myvar) { - print - "key=" . k . - ",valuetype=" . typeof(v); - } - } -' diff --git a/docs6/docs/data/for-oosvar-example-0d.sh b/docs6/docs/data/for-oosvar-example-0d.sh deleted file mode 100644 index b03441e75..000000000 --- a/docs6/docs/data/for-oosvar-example-0d.sh +++ /dev/null @@ -1,17 +0,0 @@ -mlr -n put --jknquoteint -q ' - begin { - @myvar = { - 1: 2, - 3: { 4 : 5 }, - 6: { 7: { 8: 9 } } - } - } - end { - for ((k1, k2), v in @myvar) { - print - "key1=" . k1 . - ",key2=" . k2 . - ",valuetype=" . typeof(v); - } - } -' diff --git a/docs6/docs/data/for-oosvar-example-0e.sh b/docs6/docs/data/for-oosvar-example-0e.sh deleted file mode 100644 index 1c0ae6238..000000000 --- a/docs6/docs/data/for-oosvar-example-0e.sh +++ /dev/null @@ -1,17 +0,0 @@ -mlr -n put --jknquoteint -q ' - begin { - @myvar = { - 1: 2, - 3: { 4 : 5 }, - 6: { 7: { 8: 9 } } - } - } - end { - for ((k1, k2), v in @myvar[6]) { - print - "key1=" . k1 . - ",key2=" . k2 . - ",valuetype=" . typeof(v); - } - } -' diff --git a/docs6/docs/data/for-srec-example-1.sh b/docs6/docs/data/for-srec-example-1.sh deleted file mode 100644 index 960ea65c9..000000000 --- a/docs6/docs/data/for-srec-example-1.sh +++ /dev/null @@ -1,11 +0,0 @@ -mlr --pprint --from data/for-srec-example.tbl put ' - $sum1 = $f1 + $f2 + $f3; - $sum2 = 0; - $sum3 = 0; - for (key, value in $*) { - if (key =~ "^f[0-9]+") { - $sum2 += value; - $sum3 += $[key]; - } - } -' diff --git a/docs6/docs/data/for-srec-example-2.sh b/docs6/docs/data/for-srec-example-2.sh deleted file mode 100644 index ffefb4ad7..000000000 --- a/docs6/docs/data/for-srec-example-2.sh +++ /dev/null @@ -1,10 +0,0 @@ -mlr --from data/small --opprint put ' - $sum1 = 0; - $sum2 = 0; - for (k,v in $*) { - if (is_numeric(v)) { - $sum1 +=v; - $sum2 += $[k]; - } - } -' diff --git a/docs6/docs/data/for-srec-example-3.sh b/docs6/docs/data/for-srec-example-3.sh deleted file mode 100644 index a6cc45ccb..000000000 --- a/docs6/docs/data/for-srec-example-3.sh +++ /dev/null @@ -1,9 +0,0 @@ -mlr --from data/small --opprint put ' - sum = 0; - for (k,v in $*) { - if (is_numeric(v)) { - sum += $[k]; - } - } - $sum = sum -' diff --git a/docs6/docs/data/full-reorg.sh b/docs6/docs/data/full-reorg.sh deleted file mode 100644 index fa56418a5..000000000 --- a/docs6/docs/data/full-reorg.sh +++ /dev/null @@ -1,15 +0,0 @@ -mlr put ' - begin { - @i_cumu = 0; - } - - @i_cumu += $i; - $* = { - "z": $x + y, - "KEYFIELD": $a, - "i": @i_cumu, - "b": $b, - "y": $x, - "x": $y, - }; -' data/small diff --git a/docs6/docs/data/iqr1.sh b/docs6/docs/data/iqr1.sh deleted file mode 100644 index d462c2ab3..000000000 --- a/docs6/docs/data/iqr1.sh +++ /dev/null @@ -1,3 +0,0 @@ -mlr --oxtab stats1 -f x -a p25,p75 \ - then put '$x_iqr = $x_p75 - $x_p25' \ - data/medium diff --git a/docs6/docs/data/iqrn.sh b/docs6/docs/data/iqrn.sh deleted file mode 100644 index c3f2ce503..000000000 --- a/docs6/docs/data/iqrn.sh +++ /dev/null @@ -1,7 +0,0 @@ -mlr --oxtab stats1 --fr '[i-z]' -a p25,p75 \ - then put 'for (k,v in $*) { - if (k =~ "(.*)_p25") { - $["\1_iqr"] = $["\1_p75"] - $["\1_p25"] - } - }' \ - data/medium diff --git a/docs6/docs/data/keyed-mean-with-oosvars.sh b/docs6/docs/data/keyed-mean-with-oosvars.sh deleted file mode 100644 index aeba10665..000000000 --- a/docs6/docs/data/keyed-mean-with-oosvars.sh +++ /dev/null @@ -1,10 +0,0 @@ -mlr --opprint put -q ' - @x_sum[$a][$b] += $x; - @x_count[$a][$b] += 1; - end{ - for ((a, b), v in @x_sum) { - @x_mean[a][b] = @x_sum[a][b] / @x_count[a][b]; - } - emit @x_mean, "a", "b" - } -' data/medium diff --git a/docs6/docs/data/keyed-min-max-with-oosvars.sh b/docs6/docs/data/keyed-min-max-with-oosvars.sh deleted file mode 100644 index 48e9e6d5c..000000000 --- a/docs6/docs/data/keyed-min-max-with-oosvars.sh +++ /dev/null @@ -1,7 +0,0 @@ -mlr --opprint --from data/medium put -q ' - @min[$a] = min(@min[$a], $x); - @max[$a] = max(@max[$a], $x); - end{ - emit (@min, @max), "a"; - } -' diff --git a/docs6/docs/data/local-example-1.sh b/docs6/docs/data/local-example-1.sh deleted file mode 100755 index cc4d7bb82..000000000 --- a/docs6/docs/data/local-example-1.sh +++ /dev/null @@ -1,16 +0,0 @@ -# Here I'm using a specified random-number seed so this example always -# produces the same output for this web document: in everyday practice we -# would leave off the --seed 12345 part. -mlr --seed 12345 seqgen --start 1 --stop 10 then put ' - func f(a, b) { # function arguments a and b - r = 0.0; # local r scoped to the function - for (int i = 0; i < 6; i += 1) { # local i scoped to the for-loop - num u = urand(); # local u scoped to the for-loop - r += u; # updates r from the enclosing scope - } - r /= 6; - return a + (b - a) * r; - } - num o = f(10, 20); # local to the top-level scope - $o = o; -' diff --git a/docs6/docs/data/map-literal-example-1.sh b/docs6/docs/data/map-literal-example-1.sh deleted file mode 100755 index 90d04a9be..000000000 --- a/docs6/docs/data/map-literal-example-1.sh +++ /dev/null @@ -1,7 +0,0 @@ -mlr --opprint put ' - $* = { - "a": $i, - "i": $a, - "y": $y * 10, - } -' data/small diff --git a/docs6/docs/data/map-literal-example-2.sh b/docs6/docs/data/map-literal-example-2.sh deleted file mode 100755 index 41f17c7b8..000000000 --- a/docs6/docs/data/map-literal-example-2.sh +++ /dev/null @@ -1,7 +0,0 @@ -mlr --from data/small put ' - func f(map m): map { - m["x"] *= 200; - return m; - } - $* = f({"a": $a, "x": $x}); -' diff --git a/docs6/docs/data/map-literal-example-3.sh b/docs6/docs/data/map-literal-example-3.sh deleted file mode 100755 index 2561ce741..000000000 --- a/docs6/docs/data/map-literal-example-3.sh +++ /dev/null @@ -1,19 +0,0 @@ -mlr --from data/small put -q ' - begin { - @o = { - "nrec": 0, - "nkey": {"numeric":0, "non-numeric":0}, - }; - } - @o["nrec"] += 1; - for (k, v in $*) { - if (is_numeric(v)) { - @o["nkey"]["numeric"] += 1; - } else { - @o["nkey"]["non-numeric"] += 1; - } - } - end { - dump @o; - } -' diff --git a/docs6/docs/data/mean-with-oosvars.sh b/docs6/docs/data/mean-with-oosvars.sh deleted file mode 100644 index b948eee3d..000000000 --- a/docs6/docs/data/mean-with-oosvars.sh +++ /dev/null @@ -1,8 +0,0 @@ -mlr --opprint put -q ' - @x_sum += $x; - @x_count += 1; - end { - @x_mean = @x_sum / @x_count; - emit @x_mean - } -' data/medium diff --git a/docs6/docs/data/miss-date-1.sh b/docs6/docs/data/miss-date-1.sh deleted file mode 100644 index 1701f7280..000000000 --- a/docs6/docs/data/miss-date-1.sh +++ /dev/null @@ -1,5 +0,0 @@ -mlr --from data/miss-date.csv --icsv \ - cat -n \ - then put '$datestamp = strptime($date, "%Y-%m-%d")' \ - then step -a delta -f datestamp \ -| head diff --git a/docs6/docs/data/miss-date-2.sh b/docs6/docs/data/miss-date-2.sh deleted file mode 100644 index 7430b40cf..000000000 --- a/docs6/docs/data/miss-date-2.sh +++ /dev/null @@ -1,5 +0,0 @@ -mlr --from data/miss-date.csv --icsv \ - cat -n \ - then put '$datestamp = strptime($date, "%Y-%m-%d")' \ - then step -a delta -f datestamp \ - then filter '$datestamp_delta != 86400 && $n != 1' diff --git a/docs6/docs/data/previous-to-current.sh b/docs6/docs/data/previous-to-current.sh deleted file mode 100644 index d7480d26c..000000000 --- a/docs6/docs/data/previous-to-current.sh +++ /dev/null @@ -1,5 +0,0 @@ -mlr --icsv --opprint \ - join -j color --ul --ur -f data/prevtemp.csv \ - then unsparsify --fill-with 0 \ - then put '$count_delta = $current_count - $previous_count' \ - data/currtemp.csv diff --git a/docs6/docs/data/put-multiline-example.txt b/docs6/docs/data/put-multiline-example.txt deleted file mode 100644 index 7e1f9e213..000000000 --- a/docs6/docs/data/put-multiline-example.txt +++ /dev/null @@ -1,7 +0,0 @@ -mlr --opprint put ' - $nf = NF; - $nr = NR; - $fnr = FNR; - $filenum = FILENUM; - $filename = FILENAME -' data/small data/small2 diff --git a/docs6/docs/data/ragged-csv-2.sh b/docs6/docs/data/ragged-csv-2.sh deleted file mode 100644 index a2664d8d7..000000000 --- a/docs6/docs/data/ragged-csv-2.sh +++ /dev/null @@ -1,6 +0,0 @@ -mlr --from data/ragged.csv --fs comma --nidx put ' - @maxnf = max(@maxnf, NF); - while(NF < @maxnf) { - $[NF+1] = ""; - } -' diff --git a/docs6/docs/data/ragged-csv.sh b/docs6/docs/data/ragged-csv.sh deleted file mode 100755 index b53812050..000000000 --- a/docs6/docs/data/ragged-csv.sh +++ /dev/null @@ -1,8 +0,0 @@ -mlr --from data/ragged.csv --fs comma --nidx put ' - @maxnf = max(@maxnf, NF); - @nf = NF; - while(@nf < @maxnf) { - @nf += 1; - $[@nf] = "" - } -' diff --git a/docs6/docs/data/rect.sh b/docs6/docs/data/rect.sh deleted file mode 100644 index aa36b2861..000000000 --- a/docs6/docs/data/rect.sh +++ /dev/null @@ -1,10 +0,0 @@ -mlr --from data/rect.txt put -q ' - is_present($outer) { - unset @r - } - for (k, v in $*) { - @r[k] = v - } - is_present($inner1) { - emit @r - }' diff --git a/docs6/docs/data/single-for-example-1.sh b/docs6/docs/data/single-for-example-1.sh deleted file mode 100755 index b0b0735aa..000000000 --- a/docs6/docs/data/single-for-example-1.sh +++ /dev/null @@ -1,8 +0,0 @@ -mlr --from data/small put ' - print "NR = ".NR; - for (key in $*) { - value = $[key]; - print " key:" . key . " value:".value; - } - -' diff --git a/docs6/docs/data/single-for-example-2.sh b/docs6/docs/data/single-for-example-2.sh deleted file mode 100755 index 1fa863209..000000000 --- a/docs6/docs/data/single-for-example-2.sh +++ /dev/null @@ -1,8 +0,0 @@ -mlr -n put ' - end { - o = {1:2, 3:{4:5}}; - for (key in o) { - print " key:" . key . " valuetype:" . typeof(o[key]); - } - } -' diff --git a/docs6/docs/data/subr-example.sh b/docs6/docs/data/subr-example.sh deleted file mode 100644 index 07141a065..000000000 --- a/docs6/docs/data/subr-example.sh +++ /dev/null @@ -1,17 +0,0 @@ -mlr --opprint --from data/small put -q ' - begin { - @call_count = 0; - } - subr s(n) { - @call_count += 1; - if (is_numeric(n)) { - if (n > 1) { - call s(n-1); - } else { - print "numcalls=" . @call_count; - } - } - } - print "NR=" . NR; - call s(NR); -' diff --git a/docs6/docs/data/trailing-commas.sh b/docs6/docs/data/trailing-commas.sh deleted file mode 100755 index 0f655cd89..000000000 --- a/docs6/docs/data/trailing-commas.sh +++ /dev/null @@ -1,17 +0,0 @@ -mlr --csvlite --from data/a.csv put ' - func f( - num a, - num b, - ): num { - return a**2 + b**2; - } - $* = { - "s": $a + $b, - "t": $a - $b, - "u": f( - $a, - $b, - ), - "v": NR, - } -' diff --git a/docs6/docs/data/triple-for-example-1.sh b/docs6/docs/data/triple-for-example-1.sh deleted file mode 100644 index d92979fc3..000000000 --- a/docs6/docs/data/triple-for-example-1.sh +++ /dev/null @@ -1,7 +0,0 @@ -mlr --from data/small --opprint put ' - num suma = 0; - for (a = 1; a <= NR; a += 1) { - suma += a; - } - $suma = suma; -' diff --git a/docs6/docs/data/triple-for-example-2.sh b/docs6/docs/data/triple-for-example-2.sh deleted file mode 100644 index 928dddc59..000000000 --- a/docs6/docs/data/triple-for-example-2.sh +++ /dev/null @@ -1,10 +0,0 @@ -mlr --from data/small --opprint put ' - num suma = 0; - num sumb = 0; - for (num a = 1, num b = 1; a <= NR; a += 1, b *= 2) { - suma += a; - sumb += b; - } - $suma = suma; - $sumb = sumb; -' diff --git a/docs6/docs/data/weighted-mean.sh b/docs6/docs/data/weighted-mean.sh deleted file mode 100644 index e4797cf4f..000000000 --- a/docs6/docs/data/weighted-mean.sh +++ /dev/null @@ -1,24 +0,0 @@ -mlr --from data/medium put -q ' - # Using the y field for weighting in this example - weight = $y; - - # Using the a field for weighted aggregation in this example - @sumwx[$a] += weight * $i; - @sumw[$a] += weight; - - @sumx[$a] += $i; - @sumn[$a] += 1; - - end { - map wmean = {}; - map mean = {}; - for (a in @sumwx) { - wmean[a] = @sumwx[a] / @sumw[a] - } - for (a in @sumx) { - mean[a] = @sumx[a] / @sumn[a] - } - #emit wmean, "a"; - #emit mean, "a"; - emit (wmean, mean), "a"; - }' diff --git a/docs6/docs/data/while-example-1.sh b/docs6/docs/data/while-example-1.sh deleted file mode 100644 index 16575ef86..000000000 --- a/docs6/docs/data/while-example-1.sh +++ /dev/null @@ -1,6 +0,0 @@ -echo x=1,y=2 | mlr put ' - while (NF < 10) { - $[NF+1] = "" - } - $foo = "bar" -' diff --git a/docs6/docs/data/while-example-2.sh b/docs6/docs/data/while-example-2.sh deleted file mode 100644 index ffbb78d9e..000000000 --- a/docs6/docs/data/while-example-2.sh +++ /dev/null @@ -1,9 +0,0 @@ -echo x=1,y=2 | mlr put ' - do { - $[NF+1] = ""; - if (NF == 5) { - break - } - } while (NF < 10); - $foo = "bar" -' diff --git a/docs6/docs/dates-and-times.md b/docs6/docs/date-time-examples.md similarity index 99% rename from docs6/docs/dates-and-times.md rename to docs6/docs/date-time-examples.md index ef6e84aae..3431aee4c 100644 --- a/docs6/docs/dates-and-times.md +++ b/docs6/docs/date-time-examples.md @@ -1,5 +1,5 @@ -# Dates and times +# Date/time examples ## How can I filter by date? diff --git a/docs6/docs/dates-and-times.md.in b/docs6/docs/date-time-examples.md.in similarity index 79% rename from docs6/docs/dates-and-times.md.in rename to docs6/docs/date-time-examples.md.in index cf959cb09..14ac00fb8 100644 --- a/docs6/docs/dates-and-times.md.in +++ b/docs6/docs/date-time-examples.md.in @@ -1,4 +1,4 @@ -# Dates and times +# Date/time examples ## How can I filter by date? @@ -32,11 +32,23 @@ GENMD_EOF Since there are 1372 lines in the data file, some automation is called for. To find the missing dates, you can convert the dates to seconds since the epoch using `strptime`, then compute adjacent differences (the `cat -n` simply inserts record-counters): -GENMD_INCLUDE_AND_RUN_ESCAPED(data/miss-date-1.sh) +GENMD_RUN_COMMAND +mlr --from data/miss-date.csv --icsv \ + cat -n \ + then put '$datestamp = strptime($date, "%Y-%m-%d")' \ + then step -a delta -f datestamp \ +| head +GENMD_EOF Then, filter for adjacent difference not being 86400 (the number of seconds in a day): -GENMD_INCLUDE_AND_RUN_ESCAPED(data/miss-date-2.sh) +GENMD_RUN_COMMAND +mlr --from data/miss-date.csv --icsv \ + cat -n \ + then put '$datestamp = strptime($date, "%Y-%m-%d")' \ + then step -a delta -f datestamp \ + then filter '$datestamp_delta != 86400 && $n != 1' +GENMD_EOF Given this, it's now easy to see where the gaps are: diff --git a/docs6/docs/dsl-example-multiline.sh b/docs6/docs/dsl-example-multiline.sh deleted file mode 100644 index ef467ffd1..000000000 --- a/docs6/docs/dsl-example-multiline.sh +++ /dev/null @@ -1,4 +0,0 @@ -mlr --c2p put ' - $cost = $quantity * $rate; - $index *= 100 -' example.csv diff --git a/docs6/docs/gz-example.csv.gz b/docs6/docs/gz-example.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..9850a07c90d4c6615a8161cc32825c91f0bb2229 GIT binary patch literal 258 zcmV+d0sa0TiwFoZ0wZAn17~_IWq4t2aBO8RV{>)@b&x@B!!Qg4?|I9D5JgF(o?|#l zYSge@*+!e+S80->0n!@}%h@6Od7LZOw{9sojolgV7^Y+S!pGiC+pt|wyR959WgO>E zY<1|S^H|!j7ciI+gi^?cOlIm1wH&dw-FnY>aD-;^ss=}n<(BD(+CMk~PGBKpC-9UV zKXFdoxE2HigDS-sJd{FeWy@ZdAIRP6mW+rDWAZ*b7{&Ywmyp60$2oIzkHY`q6e+?S zad3@H=7W2@ # Manual page -This is simply a copy of what you should see on running **man mlr** at a command prompt, once Miller is installed on your system. +This is simply a copy of what you should see on running `man mlr` at a command prompt, once Miller is installed on your system.
 MILLER(1)							     MILLER(1)
diff --git a/docs6/docs/manpage.md.in b/docs6/docs/manpage.md.in
index 2d783f508..05b2fc97a 100644
--- a/docs6/docs/manpage.md.in
+++ b/docs6/docs/manpage.md.in
@@ -1,5 +1,5 @@
 # Manual page
 
-This is simply a copy of what you should see on running **man mlr** at a command prompt, once Miller is installed on your system.
+This is simply a copy of what you should see on running `man mlr` at a command prompt, once Miller is installed on your system.
 
 GENMD_INCLUDE_ESCAPED(manpage.txt)
diff --git a/docs6/docs/misc-examples.md.in b/docs6/docs/misc-examples.md.in
index 5efe1d74f..2b9581dd6 100644
--- a/docs6/docs/misc-examples.md.in
+++ b/docs6/docs/misc-examples.md.in
@@ -137,7 +137,13 @@ GENMD_EOF
 
 Then, join on the key field(s), and use unsparsify to zero-fill counters absent on one side but present on the other. Use `--ul` and `--ur` to emit unpaired records (namely, purple on the left and yellow on the right):
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/previous-to-current.sh)
+GENMD_RUN_COMMAND
+mlr --icsv --opprint \
+  join -j color --ul --ur -f data/prevtemp.csv \
+  then unsparsify --fill-with 0 \
+  then put '$count_delta = $current_count - $previous_count' \
+  data/currtemp.csv
+GENMD_EOF
 
 ## Memoization with out-of-stream variables
 
diff --git a/docs6/docs/new-in-miller-6.md b/docs6/docs/new-in-miller-6.md
index 8b588f521..58aff06f2 100644
--- a/docs6/docs/new-in-miller-6.md
+++ b/docs6/docs/new-in-miller-6.md
@@ -5,7 +5,7 @@ See also the [list of issues tagged with go-port](https://github.com/johnkerl/mi
 
 ## Documentation improvements
 
-Documentation (what you're reading here) and on-line help (`mlr --help`) have been completely reworked.
+Documentation (what you're reading here) and online help (`mlr --help`) have been completely reworked.
 
 In the initial release, the focus was convincing users already familiar with
 `awk`/`grep`/`cut` that Miller was a viable alternative -- but over time it's
@@ -45,7 +45,7 @@ Binaries are reliably available using GitHub Actions: see also [Installation](in
 
 ## In-process support for compressed input
 
-In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z` and `.bz2` files.  Please see section [TODO:linkify] for more information.
+In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z` and `.bz2` files.  Please see the page on [Compressed data](reference-main-compressed-data.md) for more information.
 
 ## Output colorization
 
diff --git a/docs6/docs/new-in-miller-6.md.in b/docs6/docs/new-in-miller-6.md.in
index faff11713..96a0decdc 100644
--- a/docs6/docs/new-in-miller-6.md.in
+++ b/docs6/docs/new-in-miller-6.md.in
@@ -4,7 +4,7 @@ See also the [list of issues tagged with go-port](https://github.com/johnkerl/mi
 
 ## Documentation improvements
 
-Documentation (what you're reading here) and on-line help (`mlr --help`) have been completely reworked.
+Documentation (what you're reading here) and online help (`mlr --help`) have been completely reworked.
 
 In the initial release, the focus was convincing users already familiar with
 `awk`/`grep`/`cut` that Miller was a viable alternative -- but over time it's
@@ -44,7 +44,7 @@ Binaries are reliably available using GitHub Actions: see also [Installation](in
 
 ## In-process support for compressed input
 
-In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z` and `.bz2` files.  Please see section [TODO:linkify] for more information.
+In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z` and `.bz2` files.  Please see the page on [Compressed data](reference-main-compressed-data.md) for more information.
 
 ## Output colorization
 
diff --git a/docs6/docs/reference-main-online-help.md b/docs6/docs/online-help.md
similarity index 98%
rename from docs6/docs/reference-main-online-help.md
rename to docs6/docs/online-help.md
index 818c71152..6d23ec902 100644
--- a/docs6/docs/reference-main-online-help.md
+++ b/docs6/docs/online-help.md
@@ -221,4 +221,4 @@ Options:
 ## Manual page
 
 If you've gotten Miller from a package installer, you should have `man mlr` producing a traditional manual page.
-If not, no worries -- the manual page is a concatenated listing of the same information also available by each of the topics in `mlr help topics`.
+If not, no worries -- the manual page is a concatenated listing of the same information also available by each of the topics in `mlr help topics`. See also the [Manual page](manpage.md) which is an online copy.
diff --git a/docs6/docs/reference-main-online-help.md.in b/docs6/docs/online-help.md.in
similarity index 95%
rename from docs6/docs/reference-main-online-help.md.in
rename to docs6/docs/online-help.md.in
index 9a83f4c98..fc3858505 100644
--- a/docs6/docs/reference-main-online-help.md.in
+++ b/docs6/docs/online-help.md.in
@@ -83,4 +83,4 @@ GENMD_EOF
 ## Manual page
 
 If you've gotten Miller from a package installer, you should have `man mlr` producing a traditional manual page.
-If not, no worries -- the manual page is a concatenated listing of the same information also available by each of the topics in `mlr help topics`.
+If not, no worries -- the manual page is a concatenated listing of the same information also available by each of the topics in `mlr help topics`. See also the [Manual page](manpage.md) which is an online copy.
diff --git a/docs6/docs/oosvar-example-ewma.sh b/docs6/docs/oosvar-example-ewma.sh
deleted file mode 100644
index b2f1f8665..000000000
--- a/docs6/docs/oosvar-example-ewma.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-mlr --opprint put '
-  begin{ @a=0.1 };
-  $e = NR==1 ? $x : @a * $x + (1 - @a) * @e;
-  @e=$e
-' data/small
diff --git a/docs6/docs/oosvar-example-sum-grouped.sh b/docs6/docs/oosvar-example-sum-grouped.sh
deleted file mode 100644
index 5229a1d98..000000000
--- a/docs6/docs/oosvar-example-sum-grouped.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-mlr --opprint put -q '
-  @x_sum[$b] += $x;
-  end {
-    emit @x_sum, "b"
-  }
-' data/medium
diff --git a/docs6/docs/oosvar-example-sum.sh b/docs6/docs/oosvar-example-sum.sh
deleted file mode 100644
index f31642c86..000000000
--- a/docs6/docs/oosvar-example-sum.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-mlr --oxtab put -q '
-  @x_sum += $x;
-  end {
-    emit @x_sum
-  }
-' data/medium
diff --git a/docs6/docs/operating-on-all-fields.md.in b/docs6/docs/operating-on-all-fields.md.in
index 8a6eb09f2..fdb064271 100644
--- a/docs6/docs/operating-on-all-fields.md.in
+++ b/docs6/docs/operating-on-all-fields.md.in
@@ -52,4 +52,20 @@ GENMD_RUN_COMMAND
 cat data/small
 GENMD_EOF
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/full-reorg.sh)
+GENMD_RUN_COMMAND
+mlr put '
+  begin {
+    @i_cumu = 0;
+  }
+
+  @i_cumu += $i;
+  $* = {
+    "z": $x + y,
+    "KEYFIELD": $a,
+    "i": @i_cumu,
+    "b": $b,
+    "y": $x,
+    "x": $y,
+  };
+' data/small
+GENMD_EOF
diff --git a/docs6/docs/output-colorization.md b/docs6/docs/output-colorization.md
index c7ee2b440..470e19377 100644
--- a/docs6/docs/output-colorization.md
+++ b/docs6/docs/output-colorization.md
@@ -10,7 +10,7 @@ Things having colors:
 * Keys in CSV header lines, JSON keys, etc
 * Values in CSV data lines, JSON scalar values, etc
 * "PASS" and "FAIL" in regression-test output
-* Some online-help strings
+* Some [online-help](online-help.md) strings
 
 Rules for colorization:
 
diff --git a/docs6/docs/output-colorization.md.in b/docs6/docs/output-colorization.md.in
index 5467fe5ba..61bf11769 100644
--- a/docs6/docs/output-colorization.md.in
+++ b/docs6/docs/output-colorization.md.in
@@ -9,7 +9,7 @@ Things having colors:
 * Keys in CSV header lines, JSON keys, etc
 * Values in CSV data lines, JSON scalar values, etc
 * "PASS" and "FAIL" in regression-test output
-* Some online-help strings
+* Some [online-help](online-help.md) strings
 
 Rules for colorization:
 
diff --git a/docs6/docs/programming-language.md.in b/docs6/docs/programming-language.md.in
index b7f5db13a..7cbf9e152 100644
--- a/docs6/docs/programming-language.md.in
+++ b/docs6/docs/programming-language.md.in
@@ -29,7 +29,12 @@ GENMD_RUN_COMMAND
 mlr --c2p put '$cost = $quantity * $rate; $index = $index * 100'  example.csv
 GENMD_EOF
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(dsl-example-multiline.sh)
+GENMD_RUN_COMMAND
+mlr --c2p put '
+  $cost = $quantity * $rate;
+  $index *= 100
+' example.csv
+GENMD_EOF
 
 One of Miller's key features is the ability to express data-transformation right there at the keyboard, interactively. But if you find yourself using expressions repeatedly, you can put everything between the single quotes into a file and refer to that using `put -f`:
 
diff --git a/docs6/docs/proofreads.txt b/docs6/docs/proofreads.txt
index 70fe98a34..7f7433851 100644
--- a/docs6/docs/proofreads.txt
+++ b/docs6/docs/proofreads.txt
@@ -1,31 +1,29 @@
 ----------------------------------------------------------------
 ALL:
-* unvisited links are still blue -- ?!?
 * GENMD_INCLUDE_AND_RUN_ESCAPED -> remove and replace with GENMD_RUN_COMMAND
 * example.csv rename index to something else, and add i column, update any code samples which use index
 * csv to csv,tsv throughout
 * rid of explicitly passing around os.Stdout in all various help functions, annoying
 * het.dkvp > het.json in more places
 * check each page for adequate h2 coverage
+* hash-map / hashmap -> map everywhere
 
 ----------------------------------------------------------------
-w compression page: make one! :)
-w flatten/unflatten page: make one! :)
-w flesh out arrays page!
+E flatten/unflatten page: make one! :)
+E memory/streaming page
+E flesh out data-types page!
+E flesh out arrays page!
+E new maps page!
+  - insertion order ...
+  - for-k-v etc
+
+e reduce #digits in data/small
+e mcvt pass
 
 c GOMAXPROCS -- up it? separate page maybe -- ?
   - note one goroutine for in, out, & each verb
   - check and respect env-var
 
-* move aux-cmds down lower. maybe some other reorders as well.
-
-* new different-from-other-languages page
-  - no ++
-  - 1-up arrays
-    for (i = 1; i <= n; i += 1) { ... }
-  - hash-maps are order-preserving
-  - single-for over array: var is value; over map: var is key
-
 ----------------------------------------------------------------
 
 index:
@@ -56,12 +54,8 @@ record-heterogeneity:
 
 new-in-miller-6:
 w flatten/unflatten -- needs a new separate page
-l gzin/bz2in linkify
 ? TODO marks
-
-contributing:
-L add a pre-release note about https://johnkerl.org/miller6 & why no double RTD
-E update for sphinx -> mkdocs. and/or link to r.md.
+* ?? operator
 
 csv-with-and-without-headers:
 ? Headerless CSV with duplicate field values -> typo-fix -- duplicate keys actually -- ?!?
@@ -120,6 +114,7 @@ statistics-examples:
 two-pass-algorithms:
 l link to "new" verbs x everywhere possible
 l Of course, Miller verbs such as sort, tac, etc. all must ... -> linkify to new memory/streaming page
+x this (or wherever ...) maybe get rid of some of the too-many examples. pick some survivors; x the rest.
 
 misc examples:
 ? Program timing & subsequents -> another page
@@ -195,5 +190,9 @@ E Keep in mind that out-of-stream variables are a nested, multi-level hashmap (d
   o 2 examples not 3?
   o why not '--oflatsep /' respected?
 
+reference-dsl-differences.md:
+l check for linkify opportunities
+
 manpage:
 ? [NEEDS READ-THROUGH]
+? 'Kerl .' and 'Veith .'
diff --git a/docs6/docs/purple.csv.gz b/docs6/docs/purple.csv.gz
new file mode 100644
index 0000000000000000000000000000000000000000..01ca5a94512a9b7dd63081ef311ca9df927e848a
GIT binary patch
literal 132
zcmV-~0DJ!*iwFSWS|nir19i>84#FT1gyDOiWkPl-2t3Y4X(bYhKofm?x#-Q?9KJtu
zJ@V1e?@}YSEIS@ZE2{Z!{Tksb{M>VzeY>pJS8UBP=Y#v(h>cyt0Amk94?L
mcv5U=M#^jRlYKt5uVX@BIIUK<&Om9i(((Z~bCN!!0001Sr#Ynn

literal 0
HcmV?d00001

diff --git a/docs6/docs/joins.md b/docs6/docs/questions-about-joins.md
similarity index 99%
rename from docs6/docs/joins.md
rename to docs6/docs/questions-about-joins.md
index a859aeeba..87f33805a 100644
--- a/docs6/docs/joins.md
+++ b/docs6/docs/questions-about-joins.md
@@ -1,5 +1,5 @@
 
-# Joins
+# Questions about joins
 
 ## Why am I not seeing all possible joins occur?
 
diff --git a/docs6/docs/joins.md.in b/docs6/docs/questions-about-joins.md.in
similarity index 99%
rename from docs6/docs/joins.md.in
rename to docs6/docs/questions-about-joins.md.in
index 63ef05878..c3f91faa6 100644
--- a/docs6/docs/joins.md.in
+++ b/docs6/docs/questions-about-joins.md.in
@@ -1,4 +1,4 @@
-# Joins
+# Questions about joins
 
 ## Why am I not seeing all possible joins occur?
 
diff --git a/docs6/docs/then-chaining.md b/docs6/docs/questions-about-then-chaining.md
similarity index 99%
rename from docs6/docs/then-chaining.md
rename to docs6/docs/questions-about-then-chaining.md
index 01b950338..4a6d5b902 100644
--- a/docs6/docs/then-chaining.md
+++ b/docs6/docs/questions-about-then-chaining.md
@@ -1,5 +1,5 @@
 
-# Then-chaining
+# Questions about then-chaining
 
 ## How do I examine then-chaining?
 
diff --git a/docs6/docs/then-chaining.md.in b/docs6/docs/questions-about-then-chaining.md.in
similarity index 98%
rename from docs6/docs/then-chaining.md.in
rename to docs6/docs/questions-about-then-chaining.md.in
index d0fa80d7d..c014e2838 100644
--- a/docs6/docs/then-chaining.md.in
+++ b/docs6/docs/questions-about-then-chaining.md.in
@@ -1,4 +1,4 @@
-# Then-chaining
+# Questions about then-chaining
 
 ## How do I examine then-chaining?
 
diff --git a/docs6/docs/red.csv.gz b/docs6/docs/red.csv.gz
new file mode 100644
index 0000000000000000000000000000000000000000..5f55d225cb74663d3e7e9f387ffc7925855de841
GIT binary patch
literal 143
zcmV;A0C4{wiwFSWS|nir18vO93c@fDM&Z5B@-t*|o91ysJJn!lHBCX^-YPC!xIKKF
zajk2^cF8xzT=N-Mlx5Nf?t5N(=})xW_0)92cBH20ZC6-ufF!aa0}L5<=3~av#`-l5
x9KntTHV&4OVe|W$bKMjket-yt;4OrFls_mU`ZwZ~7>R;APA{DpVfeWK007FjLKFZ1

literal 0
HcmV?d00001

diff --git a/docs6/docs/reference-dsl-control-structures.md b/docs6/docs/reference-dsl-control-structures.md
index a280ac32f..4ecea076d 100644
--- a/docs6/docs/reference-dsl-control-structures.md
+++ b/docs6/docs/reference-dsl-control-structures.md
@@ -157,78 +157,95 @@ While Miller's `while` and `do-while` statements are much as in many other langu
 
 As with `while` and `do-while`, a `break` or `continue` within nested control structures will propagate to the innermost loop enclosing them, if any, and a `break` or `continue` outside a loop is a syntax error that will be flagged as soon as the expression is parsed, before any input records are ingested.
 
-### Key-only for-loops
+### Single-variable for-loops
 
-The `key` variable is always bound to the *key* of key-value pairs:
+For [maps](reference-dsl-maps.md), the single variable is always bound to the *key* of key-value pairs:
 
 
-mlr --from data/small put '
+mlr --from data/small put -q '
   print "NR = ".NR;
-  for (key in $*) {
-    value = $[key];
-    print "  key:" . key . "  value:".value;
+  for (e in $*) {
+    print "  key:", e, "value:", $[e];
   }
-
 '
 
 NR = 1
-  key:a  value:pan
-  key:b  value:pan
-  key:i  value:1
-  key:x  value:0.3467901443380824
-  key:y  value:0.7268028627434533
-a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
+  key: a value: pan
+  key: b value: pan
+  key: i value: 1
+  key: x value: 0.3467901443380824
+  key: y value: 0.7268028627434533
 NR = 2
-  key:a  value:eks
-  key:b  value:pan
-  key:i  value:2
-  key:x  value:0.7586799647899636
-  key:y  value:0.5221511083334797
-a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
+  key: a value: eks
+  key: b value: pan
+  key: i value: 2
+  key: x value: 0.7586799647899636
+  key: y value: 0.5221511083334797
 NR = 3
-  key:a  value:wye
-  key:b  value:wye
-  key:i  value:3
-  key:x  value:0.20460330576630303
-  key:y  value:0.33831852551664776
-a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
+  key: a value: wye
+  key: b value: wye
+  key: i value: 3
+  key: x value: 0.20460330576630303
+  key: y value: 0.33831852551664776
 NR = 4
-  key:a  value:eks
-  key:b  value:wye
-  key:i  value:4
-  key:x  value:0.38139939387114097
-  key:y  value:0.13418874328430463
-a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463
+  key: a value: eks
+  key: b value: wye
+  key: i value: 4
+  key: x value: 0.38139939387114097
+  key: y value: 0.13418874328430463
 NR = 5
-  key:a  value:wye
-  key:b  value:pan
-  key:i  value:5
-  key:x  value:0.5732889198020006
-  key:y  value:0.8636244699032729
-a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729
+  key: a value: wye
+  key: b value: pan
+  key: i value: 5
+  key: x value: 0.5732889198020006
+  key: y value: 0.8636244699032729
 
-mlr -n put '
+mlr -n put -q '
   end {
-    o = {1:2, 3:{4:5}};
-    for (key in o) {
-      print "  key:" . key . "  valuetype:" . typeof(o[key]);
+    o = {"a":1, "b":{"c":3}};
+    for (e in o) {
+      print "key:", e, "valuetype:", typeof(o[e]);
     }
   }
 '
 
-  key:1  valuetype:int
-  key:3  valuetype:map
+key: a valuetype: int
+key: b valuetype: map
 
-Note that the value corresponding to a given key may be gotten as through a **computed field name** using square brackets as in `$[key]` for stream records, or by indexing the looped-over variable using square brackets. +Note that the value corresponding to a given key may be gotten as through a **computed field name** using square brackets as in `$[e]` for stream records, or by indexing the looped-over variable using square brackets. + +For [arrays](reference-dsl-arrays.md), the single variable is always bound to the *value* (not the array index): + +
+mlr -n put -q '
+  end {
+    o = [10, "20", {}, "four", true];
+    for (e in o) {
+      print "value:", e, "valuetype:", typeof(e);
+    }
+  }
+'
+
+
+value: 10 valuetype: int
+value: 20 valuetype: string
+value: {} valuetype: map
+value: four valuetype: string
+value: true valuetype: bool
+
### Key-value for-loops -Single-level keys may be gotten at using either `for(k,v)` or `for((k),v)`; multi-level keys may be gotten at using `for((k1,k2,k3),v)` and so on. The `v` variable will be bound to to a scalar value (a string or a number) if the map stops at that level, or to a map-valued variable if the map goes deeper. If the map isn't deep enough then the loop body won't be executed. +For [maps](reference-dsl-maps.md), the first loop variable is the key and the +second is the value; for [arrays](reference-dsl-arrays.md), the first loop +variable is the (1-up) array index and the second is the value. + +Single-level keys may be gotten at using either `for(k,v)` or `for((k),v)`; multi-level keys may be gotten at using `for((k1,k2,k3),v)` and so on. The `v` variable will be bound to to a scalar value (non-array/non-map) if the map stops at that level, or to a map-valued or array-valued variable if the map goes deeper. If the map isn't deep enough then the loop body won't be executed.
 cat data/for-srec-example.tbl
diff --git a/docs6/docs/reference-dsl-control-structures.md.in b/docs6/docs/reference-dsl-control-structures.md.in
index fa1158ba7..700cdadc6 100644
--- a/docs6/docs/reference-dsl-control-structures.md.in
+++ b/docs6/docs/reference-dsl-control-structures.md.in
@@ -70,9 +70,26 @@ GENMD_EOF
 
 Miller's `while` and `do-while` are unsurprising in comparison to various languages, as are `break` and `continue`:
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/while-example-1.sh)
+GENMD_RUN_COMMAND
+echo x=1,y=2 | mlr put '
+  while (NF < 10) {
+    $[NF+1] = ""
+  }
+  $foo = "bar"
+'
+GENMD_EOF
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/while-example-2.sh)
+GENMD_RUN_COMMAND
+echo x=1,y=2 | mlr put '
+  do {
+    $[NF+1] = "";
+    if (NF == 5) {
+      break
+    }
+  } while (NF < 10);
+  $foo = "bar"
+'
+GENMD_EOF
 
 A `break` or `continue` within nested conditional blocks or if-statements will,
 of course, propagate to the innermost loop enclosing them, if any. A `break` or
@@ -97,25 +114,70 @@ While Miller's `while` and `do-while` statements are much as in many other langu
 
 As with `while` and `do-while`, a `break` or `continue` within nested control structures will propagate to the innermost loop enclosing them, if any, and a `break` or `continue` outside a loop is a syntax error that will be flagged as soon as the expression is parsed, before any input records are ingested.
 
-### Key-only for-loops
+### Single-variable for-loops
 
-The `key` variable is always bound to the *key* of key-value pairs:
+For [maps](reference-dsl-maps.md), the single variable is always bound to the *key* of key-value pairs:
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/single-for-example-1.sh)
+GENMD_RUN_COMMAND
+mlr --from data/small put -q '
+  print "NR = ".NR;
+  for (e in $*) {
+    print "  key:", e, "value:", $[e];
+  }
+'
+GENMD_EOF
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/single-for-example-2.sh)
+GENMD_RUN_COMMAND
+mlr -n put -q '
+  end {
+    o = {"a":1, "b":{"c":3}};
+    for (e in o) {
+      print "key:", e, "valuetype:", typeof(o[e]);
+    }
+  }
+'
+GENMD_EOF
 
-Note that the value corresponding to a given key may be gotten as through a **computed field name** using square brackets as in `$[key]` for stream records, or by indexing the looped-over variable using square brackets.
+Note that the value corresponding to a given key may be gotten as through a **computed field name** using square brackets as in `$[e]` for stream records, or by indexing the looped-over variable using square brackets.
+
+For [arrays](reference-dsl-arrays.md), the single variable is always bound to the *value* (not the array index):
+
+GENMD_RUN_COMMAND
+mlr -n put -q '
+  end {
+    o = [10, "20", {}, "four", true];
+    for (e in o) {
+      print "value:", e, "valuetype:", typeof(e);
+    }
+  }
+'
+GENMD_EOF
 
 ### Key-value for-loops
 
-Single-level keys may be gotten at using either `for(k,v)` or `for((k),v)`; multi-level keys may be gotten at using `for((k1,k2,k3),v)` and so on.  The `v` variable will be bound to to a scalar value (a string or a number) if the map stops at that level, or to a map-valued variable if the map goes deeper. If the map isn't deep enough then the loop body won't be executed.
+For [maps](reference-dsl-maps.md), the first loop variable is the key and the
+second is the value; for [arrays](reference-dsl-arrays.md), the first loop
+variable is the (1-up) array index and the second is the value.
+
+Single-level keys may be gotten at using either `for(k,v)` or `for((k),v)`; multi-level keys may be gotten at using `for((k1,k2,k3),v)` and so on.  The `v` variable will be bound to to a scalar value (non-array/non-map) if the map stops at that level, or to a map-valued or array-valued variable if the map goes deeper. If the map isn't deep enough then the loop body won't be executed.
 
 GENMD_RUN_COMMAND
 cat data/for-srec-example.tbl
 GENMD_EOF
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/for-srec-example-1.sh)
+GENMD_RUN_COMMAND
+mlr --pprint --from data/for-srec-example.tbl put '
+  $sum1 = $f1 + $f2 + $f3;
+  $sum2 = 0;
+  $sum3 = 0;
+  for (key, value in $*) {
+    if (key =~ "^f[0-9]+") {
+      $sum2 += value;
+      $sum3 += $[key];
+    }
+  }
+'
+GENMD_EOF
 
 GENMD_RUN_COMMAND
 mlr --from data/small --opprint put 'for (k,v in $*) { $[k."_type"] = typeof(v) }'
@@ -125,11 +187,32 @@ Note that the value of the current field in the for-loop can be gotten either us
 
 Important note: to avoid inconsistent looping behavior in case you're setting new fields (and/or unsetting existing ones) while looping over the record, **Miller makes a copy of the record before the loop: loop variables are bound from the copy and all other reads/writes involve the record itself**:
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/for-srec-example-2.sh)
+GENMD_RUN_COMMAND
+mlr --from data/small --opprint put '
+  $sum1 = 0;
+  $sum2 = 0;
+  for (k,v in $*) {
+    if (is_numeric(v)) {
+      $sum1 +=v;
+      $sum2 += $[k];
+    }
+  }
+'
+GENMD_EOF
 
 It can be confusing to modify the stream record while iterating over a copy of it, so instead you might find it simpler to use a local variable in the loop and only update the stream record after the loop:
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/for-srec-example-3.sh)
+GENMD_RUN_COMMAND
+mlr --from data/small --opprint put '
+  sum = 0;
+  for (k,v in $*) {
+    if (is_numeric(v)) {
+      sum += $[k];
+    }
+  }
+  $sum = sum
+'
+GENMD_EOF
 
 You can also start iterating on sub-hashmaps of an out-of-stream or local variable; you can loop over nested keys; you can loop over all out-of-stream variables.  The bound variables are bound to a copy of the sub-hashmap as it was before the loop started.  The sub-hashmap is specified by square-bracketed indices after `in`, and additional deeper indices are bound to loop key-variables. The terminal values are bound to the loop value-variable whenever the keys are not too shallow. The value-variable may refer to a terminal (string, number) or it may be map-valued if the map goes deeper. Example indexing is as follows:
 
@@ -137,23 +220,106 @@ GENMD_INCLUDE_ESCAPED(data/for-oosvar-example-0a.txt)
 
 That's confusing in the abstract, so a concrete example is in order. Suppose the out-of-stream variable `@myvar` is populated as follows:
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/for-oosvar-example-0b.sh)
+GENMD_RUN_COMMAND
+mlr -n put --jknquoteint -q '
+  begin {
+    @myvar = {
+      1: 2,
+      3: { 4 : 5 },
+      6: { 7: { 8: 9 } }
+    }
+  }
+  end { dump }
+'
+GENMD_EOF
 
 Then we can get at various values as follows:
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/for-oosvar-example-0c.sh)
+GENMD_RUN_COMMAND
+mlr -n put --jknquoteint -q '
+  begin {
+    @myvar = {
+      1: 2,
+      3: { 4 : 5 },
+      6: { 7: { 8: 9 } }
+    }
+  }
+  end {
+    for (k, v in @myvar) {
+      print
+        "key=" . k .
+        ",valuetype=" . typeof(v);
+    }
+  }
+'
+GENMD_EOF
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/for-oosvar-example-0d.sh)
+GENMD_RUN_COMMAND
+mlr -n put --jknquoteint -q '
+  begin {
+    @myvar = {
+      1: 2,
+      3: { 4 : 5 },
+      6: { 7: { 8: 9 } }
+    }
+  }
+  end {
+    for ((k1, k2), v in @myvar) {
+      print
+        "key1=" . k1 .
+        ",key2=" . k2 .
+        ",valuetype=" . typeof(v);
+    }
+  }
+'
+GENMD_EOF
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/for-oosvar-example-0e.sh)
+GENMD_RUN_COMMAND
+mlr -n put --jknquoteint -q '
+  begin {
+    @myvar = {
+      1: 2,
+      3: { 4 : 5 },
+      6: { 7: { 8: 9 } }
+    }
+  }
+  end {
+    for ((k1, k2), v in @myvar[6]) {
+      print
+        "key1=" . k1 .
+        ",key2=" . k2 .
+        ",valuetype=" . typeof(v);
+    }
+  }
+'
+GENMD_EOF
 
 ### C-style triple-for loops
 
 These are supported as follows:
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/triple-for-example-1.sh)
+GENMD_RUN_COMMAND
+mlr --from data/small --opprint put '
+  num suma = 0;
+  for (a = 1; a <= NR; a += 1) {
+    suma += a;
+  }
+  $suma = suma;
+'
+GENMD_EOF
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/triple-for-example-2.sh)
+GENMD_RUN_COMMAND
+mlr --from data/small --opprint put '
+  num suma = 0;
+  num sumb = 0;
+  for (num a = 1, num b = 1; a <= NR; a += 1, b *= 2) {
+    suma += a;
+    sumb += b;
+  }
+  $suma = suma;
+  $sumb = sumb;
+'
+GENMD_EOF
 
 Notes:
 
@@ -171,23 +337,50 @@ Notes:
 
 Miller supports an `awk`-like `begin/end` syntax.  The statements in the `begin` block are executed before any input records are read; the statements in the `end` block are executed after the last input record is read.  (If you want to execute some statement at the start of each file, not at the start of the first file as with `begin`, you might use a pattern/action block of the form `FNR == 1 { ... }`.) All statements outside of `begin` or `end` are, of course, executed on every input record. Semicolons separate statements inside or outside of begin/end blocks; semicolons are required between begin/end block bodies and any subsequent statement.  For example:
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/begin-end-example-1.sh)
+GENMD_RUN_COMMAND
+mlr put '
+  begin { @sum = 0 };
+  @x_sum += $x;
+  end { emit @x_sum }
+' ./data/small
+GENMD_EOF
 
 Since uninitialized out-of-stream variables default to 0 for addition/substraction and 1 for multiplication when they appear on expression right-hand sides (not quite as in `awk`, where they'd default to 0 either way), the above can be written more succinctly as
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/begin-end-example-2.sh)
+GENMD_RUN_COMMAND
+mlr put '
+  @x_sum += $x;
+  end { emit @x_sum }
+' ./data/small
+GENMD_EOF
 
 The **put -q** option suppresses printing of each output record, with only `emit` statements being output. So to get only summary outputs, you could write
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/begin-end-example-3.sh)
+GENMD_RUN_COMMAND
+mlr put -q '
+  @x_sum += $x;
+  end { emit @x_sum }
+' ./data/small
+GENMD_EOF
 
 We can do similarly with multiple out-of-stream variables:
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/begin-end-example-4.sh)
+GENMD_RUN_COMMAND
+mlr put -q '
+  @x_count += 1;
+  @x_sum += $x;
+  end {
+    emit @x_count;
+    emit @x_sum;
+  }
+' ./data/small
+GENMD_EOF
 
 This is of course (see also [here](reference-dsl.md#verbs-compared-to-dsl)) not much different than
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/begin-end-example-5.sh)
+GENMD_RUN_COMMAND
+mlr stats1 -a count,sum -f x ./data/small
+GENMD_EOF
 
 Note that it's a syntax error for begin/end blocks to refer to field names (beginning with `$`), since begin/end blocks execute outside the context of input records.
 
diff --git a/docs6/docs/reference-dsl-differences.md b/docs6/docs/reference-dsl-differences.md
new file mode 100644
index 000000000..12fd0f63c
--- /dev/null
+++ b/docs6/docs/reference-dsl-differences.md
@@ -0,0 +1,157 @@
+
+# Differences from other programming languages
+
+The Miller programming language is intended to be straightforward and familiar,
+as well as [not overly complex](reference-dsl-complexity.md). It doesn't try to
+break new ground in terms of syntax; there are no classes or closures, and so
+on.
+
+While the [Principle of Least
+Surprise](https://en.wikipedia.org/wiki/Principle_of_least_astonishment) is
+often held to, nonetheless the following may be surprising.
+
+## No ++ or --
+
+There is no `++` or `--` [operator](reference-dsl-operators.md). To increment
+`x`, use `x = x+1` or `x += 1`, and similarly for decrement.
+
+## Semicolons as delimiters
+
+You don't need a semicolon to end expressions, only to separate them. This
+was done intentionally from the very start of Miller: you should be able to do
+simple things like `mlr put '$z = $x * $y' myfile.dat` without needing a
+semicolon.
+
+Note that since you also don't need a semicolon before or after closing curly
+braces (such as `begin`/`end` blocks, `if`-statements, `for`-loops, etc.) it's
+easy to key in several semicolon-free statements, and then to forget a
+semicolon where one is needed . The parser tries to remind you about semicolons
+whenever there's a chance a missing semicolon might be involved in a parse
+error.
+
+## No autoconvert to boolean
+
+Boolean tests in `if`/`while`/`for`/etc must always take a boolean expression:
+`if (1) {...}` results in the parse error
+`Miller: conditional expression did not evaluate to boolean.`,
+Likewise `if (x) {...}`, unless `x` is a variable of boolean type.
+Please use `if (x != 0) {...}`, etc.
+
+## Integer-preserving arithmetic
+
+As discussed on the [arithmetic page](reference-main-arithmetic.md) the sum, difference, and product of two integers is again an integer, unless overflow occurs -- in which case Miller tries to convert to float in the least obtrusive way possible.
+
+Likewise, while quotient and remainder are generally pythonic, the quotient and exponentiation of two integers is an integer when possible.
+
+
+$ mlr repl -q
+
+
+[mlr] 6/2
+3
+
+[mlr] typeof(6/2)
+int
+
+[mlr] 6/5
+1.2
+
+[mlr] typeof(6/5)
+float
+
+[mlr] typeof(7**8)
+int
+
+[mlr] typeof(7**80)
+float
+
+ +## 1-up array indices + +Arrays are indexed starting with 1, not 0. This is discussed in detail on the [arrays page](reference-dsl-arrays.md). + +
+mlr --csv --from data/short.csv cat
+
+
+word,value
+apple,37
+ball,28
+cat,54
+
+ +
+mlr --csv --from data/short.csv put -q '
+  @records[NR] = $*;
+  end {
+    for (i = 1; i <= NR; i += 1) {
+      print "Record", i, "has word", @records[i]["word"];
+    }
+  }
+'
+
+
+Record 1 has word apple
+Record 2 has word ball
+Record 3 has word cat
+
+ +## Print adds spaces around multiple arguments + +As seen in the previous example, +[`print`](reference-dsl-output-statements.md#print-statements) with multiple +comma-delimited arguments fills in intervening spaces for you. If you want to +avoid this, use the dot operator for string-concatenation instead. + +
+mlr -n put -q '
+  end {
+    print "[", "a", "b", "c", "]";
+    print "[" . "a" . "b" . "c" . "]";
+  }
+'
+
+
+[ a b c ]
+[abc]
+
+ +Similarly, a final newline is printed for you; use [`printn`](reference-dsl-output-statements.md#print-statements) to avoid this. + +## Insertion-order-preserving hashmaps + +Miller's hashmaps [TODO:linkify] (as in many modern languages) preserve insertion order. If you set `x["foo"]=1` and then `x["bar"]=2`, then you are guaranteed that any looping over `x` will retrieve the `"foo"` key-value pair first, and the `"bar"` key-value pair second. + +
+mlr -n put -q 'end {
+  x["foo"] = 1;
+  x["bar"] = 2;
+  dump x;
+  for (k,v in x) {
+    print "key", k, "value", v
+  }
+}'
+
+
+{
+  "foo": 1,
+  "bar": 2
+}
+key foo value 1
+key bar value 2
+
+ +## Two-variable for-loops + +Miller has a [key-value loop flavor](reference-dsl-control-structures.md#key-value-for-loops): whether `x` is a map or array, in `for (k,v in x) { ... }` the `k` will be bound to successive map keys (for maps) or 1-up array indices (for arrays), and the `v` will be bound to successive map values. + +## Semantics for one-variable for-loops + +Miller also has a [single-variable loop flavor](reference-dsl-control-structures.md#single-variable-for-loops). If `x` is a map then `for (e in x) { ... }` binds `e` to successive map _keys_ (not values as in PHP). But if `x` is an array then `for e in x) { ... }` binds `e` to successive array _values_ (not indices). + +## Absent-null + +Miller has a somewhat novel flavor of null data called _absent_: if a record +has a field `x` then `$y=$x` creates a field `y`, but if it doesn't then the assignment +is skipped. See the [null-data page](reference-main-null-data.md) for more +information. diff --git a/docs6/docs/reference-dsl-differences.md.in b/docs6/docs/reference-dsl-differences.md.in new file mode 100644 index 000000000..afed15459 --- /dev/null +++ b/docs6/docs/reference-dsl-differences.md.in @@ -0,0 +1,131 @@ +# Differences from other programming languages + +The Miller programming language is intended to be straightforward and familiar, +as well as [not overly complex](reference-dsl-complexity.md). It doesn't try to +break new ground in terms of syntax; there are no classes or closures, and so +on. + +While the [Principle of Least +Surprise](https://en.wikipedia.org/wiki/Principle_of_least_astonishment) is +often held to, nonetheless the following may be surprising. + +## No ++ or -- + +There is no `++` or `--` [operator](reference-dsl-operators.md). To increment +`x`, use `x = x+1` or `x += 1`, and similarly for decrement. + +## Semicolons as delimiters + +You don't need a semicolon to end expressions, only to separate them. This +was done intentionally from the very start of Miller: you should be able to do +simple things like `mlr put '$z = $x * $y' myfile.dat` without needing a +semicolon. + +Note that since you also don't need a semicolon before or after closing curly +braces (such as `begin`/`end` blocks, `if`-statements, `for`-loops, etc.) it's +easy to key in several semicolon-free statements, and then to forget a +semicolon where one is needed . The parser tries to remind you about semicolons +whenever there's a chance a missing semicolon might be involved in a parse +error. + +## No autoconvert to boolean + +Boolean tests in `if`/`while`/`for`/etc must always take a boolean expression: +`if (1) {...}` results in the parse error +`Miller: conditional expression did not evaluate to boolean.`, +Likewise `if (x) {...}`, unless `x` is a variable of boolean type. +Please use `if (x != 0) {...}`, etc. + +## Integer-preserving arithmetic + +As discussed on the [arithmetic page](reference-main-arithmetic.md) the sum, difference, and product of two integers is again an integer, unless overflow occurs -- in which case Miller tries to convert to float in the least obtrusive way possible. + +Likewise, while quotient and remainder are generally pythonic, the quotient and exponentiation of two integers is an integer when possible. + +GENMD_CARDIFY_HIGHLIGHT_ONE +$ mlr repl -q +[mlr] 6/2 +3 + +[mlr] typeof(6/2) +int + +[mlr] 6/5 +1.2 + +[mlr] typeof(6/5) +float + +[mlr] typeof(7**8) +int + +[mlr] typeof(7**80) +float +GENMD_EOF + +## 1-up array indices + +Arrays are indexed starting with 1, not 0. This is discussed in detail on the [arrays page](reference-dsl-arrays.md). + +GENMD_RUN_COMMAND +mlr --csv --from data/short.csv cat +GENMD_EOF + +GENMD_RUN_COMMAND +mlr --csv --from data/short.csv put -q ' + @records[NR] = $*; + end { + for (i = 1; i <= NR; i += 1) { + print "Record", i, "has word", @records[i]["word"]; + } + } +' +GENMD_EOF + +## Print adds spaces around multiple arguments + +As seen in the previous example, +[`print`](reference-dsl-output-statements.md#print-statements) with multiple +comma-delimited arguments fills in intervening spaces for you. If you want to +avoid this, use the dot operator for string-concatenation instead. + +GENMD_RUN_COMMAND +mlr -n put -q ' + end { + print "[", "a", "b", "c", "]"; + print "[" . "a" . "b" . "c" . "]"; + } +' +GENMD_EOF + +Similarly, a final newline is printed for you; use [`printn`](reference-dsl-output-statements.md#print-statements) to avoid this. + +## Insertion-order-preserving hashmaps + +Miller's hashmaps [TODO:linkify] (as in many modern languages) preserve insertion order. If you set `x["foo"]=1` and then `x["bar"]=2`, then you are guaranteed that any looping over `x` will retrieve the `"foo"` key-value pair first, and the `"bar"` key-value pair second. + +GENMD_RUN_COMMAND +mlr -n put -q 'end { + x["foo"] = 1; + x["bar"] = 2; + dump x; + for (k,v in x) { + print "key", k, "value", v + } +}' +GENMD_EOF + +## Two-variable for-loops + +Miller has a [key-value loop flavor](reference-dsl-control-structures.md#key-value-for-loops): whether `x` is a map or array, in `for (k,v in x) { ... }` the `k` will be bound to successive map keys (for maps) or 1-up array indices (for arrays), and the `v` will be bound to successive map values. + +## Semantics for one-variable for-loops + +Miller also has a [single-variable loop flavor](reference-dsl-control-structures.md#single-variable-for-loops). If `x` is a map then `for (e in x) { ... }` binds `e` to successive map _keys_ (not values as in PHP). But if `x` is an array then `for e in x) { ... }` binds `e` to successive array _values_ (not indices). + +## Absent-null + +Miller has a somewhat novel flavor of null data called _absent_: if a record +has a field `x` then `$y=$x` creates a field `y`, but if it doesn't then the assignment +is skipped. See the [null-data page](reference-main-null-data.md) for more +information. diff --git a/docs6/docs/reference-dsl-maps.md b/docs6/docs/reference-dsl-maps.md new file mode 100644 index 000000000..05f965488 --- /dev/null +++ b/docs6/docs/reference-dsl-maps.md @@ -0,0 +1,4 @@ + +# Maps + +TODO diff --git a/docs6/docs/reference-dsl-maps.md.in b/docs6/docs/reference-dsl-maps.md.in new file mode 100644 index 000000000..16c007f54 --- /dev/null +++ b/docs6/docs/reference-dsl-maps.md.in @@ -0,0 +1,3 @@ +# Maps + +TODO diff --git a/docs6/docs/reference-dsl-output-statements.md.in b/docs6/docs/reference-dsl-output-statements.md.in index c09313018..c3779baba 100644 --- a/docs6/docs/reference-dsl-output-statements.md.in +++ b/docs6/docs/reference-dsl-output-statements.md.in @@ -213,7 +213,18 @@ GENMD_EOF You can emit **multiple map-valued expressions side-by-side** by including their names in parentheses: -GENMD_INCLUDE_AND_RUN_ESCAPED(data/emit-lashed.sh) +GENMD_RUN_COMMAND +mlr --from data/medium --opprint put -q ' + @x_count[$a][$b] += 1; + @x_sum[$a][$b] += $x; + end { + for ((a, b), _ in @x_count) { + @x_mean[a][b] = @x_sum[a][b] / @x_count[a][b] + } + emit (@x_sum, @x_count, @x_mean), "a", "b" + } +' +GENMD_EOF What this does is walk through the first out-of-stream variable (`@x_sum` in this example) as usual, then for each keylist found (e.g. `pan,wye`), include the values for the remaining out-of-stream variables (here, `@x_count` and `@x_mean`). You should use this when all out-of-stream variables in the emit statement have **the same shape and the same keylists**. diff --git a/docs6/docs/reference-dsl-syntax.md.in b/docs6/docs/reference-dsl-syntax.md.in index ec112c7fe..f2299757b 100644 --- a/docs6/docs/reference-dsl-syntax.md.in +++ b/docs6/docs/reference-dsl-syntax.md.in @@ -10,7 +10,15 @@ GENMD_EOF Newlines within the expression are ignored, which can help increase legibility of complex expressions: -GENMD_INCLUDE_AND_RUN_ESCAPED(data/put-multiline-example.txt) +GENMD_RUN_COMMAND +mlr --opprint put ' + $nf = NF; + $nr = NR; + $fnr = FNR; + $filenum = FILENUM; + $filename = FILENAME +' data/small data/small2 +GENMD_EOF GENMD_RUN_COMMAND mlr --opprint filter '($x > 0.5 && $y < 0.5) || ($x < 0.5 && $y > 0.5)' \ @@ -22,9 +30,13 @@ GENMD_EOF The simplest way to enter expressions for `put` and `filter` is between single quotes on the command line (see also [here](miller-on-windows.md) for Windows). For example: -GENMD_INCLUDE_AND_RUN_ESCAPED(data/fe-example-1.sh) +GENMD_RUN_COMMAND +mlr --from data/small put '$xy = sqrt($x**2 + $y**2)' +GENMD_EOF -GENMD_INCLUDE_AND_RUN_ESCAPED(data/fe-example-2.sh) +GENMD_RUN_COMMAND +mlr --from data/small put 'func f(a, b) { return sqrt(a**2 + b**2) } $xy = f($x, $y)' +GENMD_EOF You may, though, find it convenient to put expressions into files for reuse, and read them **using the -f option**. For example: @@ -75,7 +87,25 @@ GENMD_INCLUDE_ESCAPED(data/newline-example.txt) **Trailing commas** are allowed in function/subroutine definitions, function/subroutine callsites, and map literals. This is intended for (although not restricted to) the multi-line case: -GENMD_INCLUDE_AND_RUN_ESCAPED(data/trailing-commas.sh) +GENMD_RUN_COMMAND +mlr --csvlite --from data/a.csv put ' + func f( + num a, + num b, + ): num { + return a**2 + b**2; + } + $* = { + "s": $a + $b, + "t": $a - $b, + "u": f( + $a, + $b, + ), + "v": NR, + } +' +GENMD_EOF Bodies for all compound statements must be enclosed in **curly braces**, even if the body is a single statement: diff --git a/docs6/docs/reference-dsl-user-defined-functions.md.in b/docs6/docs/reference-dsl-user-defined-functions.md.in index 1223ba093..a8f8a8c20 100644 --- a/docs6/docs/reference-dsl-user-defined-functions.md.in +++ b/docs6/docs/reference-dsl-user-defined-functions.md.in @@ -6,7 +6,22 @@ As of Miller 5.0.0 you can define your own functions, as well as subroutines. Here's the obligatory example of a recursive function to compute the factorial function: -GENMD_INCLUDE_AND_RUN_ESCAPED(data/factorial-example.sh) +GENMD_RUN_COMMAND +mlr --opprint --from data/small put ' + func f(n) { + if (is_numeric(n)) { + if (n > 0) { + return n * f(n-1); + } else { + return 1; + } + } + # implicitly return absent-null if non-numeric + } + $ox = f($x + NR); + $oi = f($i); +' +GENMD_EOF Properties of user-defined functions: @@ -30,7 +45,25 @@ Properties of user-defined functions: Example: -GENMD_INCLUDE_AND_RUN_ESCAPED(data/subr-example.sh) +GENMD_RUN_COMMAND +mlr --opprint --from data/small put -q ' + begin { + @call_count = 0; + } + subr s(n) { + @call_count += 1; + if (is_numeric(n)) { + if (n > 1) { + call s(n-1); + } else { + print "numcalls=" . @call_count; + } + } + } + print "NR=" . NR; + call s(NR); +' +GENMD_EOF Properties of user-defined subroutines: diff --git a/docs6/docs/reference-dsl-variables.md.in b/docs6/docs/reference-dsl-variables.md.in index eda56a831..3dd97d5bf 100644 --- a/docs6/docs/reference-dsl-variables.md.in +++ b/docs6/docs/reference-dsl-variables.md.in @@ -144,19 +144,53 @@ Out-of-stream variables are **read-write**: you can do `$sum=@sum`, `@sum=$sum`, Using an index on the `@count` and `@sum` variables, we get the benefit of the `-g` (group-by) option which `mlr stats1` and various other Miller commands have: -GENMD_INCLUDE_AND_RUN_ESCAPED(data/begin-end-example-6.sh) +GENMD_RUN_COMMAND +mlr put -q ' + @x_count[$a] += 1; + @x_sum[$a] += $x; + end { + emit @x_count, "a"; + emit @x_sum, "a"; + } +' ./data/small +GENMD_EOF -GENMD_INCLUDE_AND_RUN_ESCAPED(data/begin-end-example-7.sh) +GENMD_RUN_COMMAND +mlr stats1 -a count,sum -f x -g a ./data/small +GENMD_EOF Indices can be arbitrarily deep -- here there are two or more of them: -GENMD_INCLUDE_AND_RUN_ESCAPED(data/begin-end-example-6a.sh) +GENMD_RUN_COMMAND +mlr --from data/medium put -q ' + @x_count[$a][$b] += 1; + @x_sum[$a][$b] += $x; + end { + emit (@x_count, @x_sum), "a", "b"; + } +' +GENMD_EOF The idea is that `stats1`, and other Miller verbs, encapsulate frequently-used patterns with a minimum of keystroking (and run a little faster), whereas using out-of-stream variables you have more flexibility and control in what you do. Begin/end blocks can be mixed with pattern/action blocks. For example: -GENMD_INCLUDE_AND_RUN_ESCAPED(data/begin-end-example-8.sh) +GENMD_RUN_COMMAND +mlr put ' + begin { + @num_total = 0; + @num_positive = 0; + }; + @num_total += 1; + $x > 0.0 { + @num_positive += 1; + $y = log10($x); $z = sqrt($y) + }; + end { + emitf @num_total, @num_positive + } +' data/put-gating-example-1.dkvp +GENMD_EOF ## Local variables @@ -164,7 +198,24 @@ Local variables are similar to out-of-stream variables, except that their extent For example: -GENMD_INCLUDE_AND_RUN_ESCAPED(data/local-example-1.sh) +GENMD_RUN_COMMAND +# Here I'm using a specified random-number seed so this example always +# produces the same output for this web document: in everyday practice we +# would leave off the --seed 12345 part. +mlr --seed 12345 seqgen --start 1 --stop 10 then put ' + func f(a, b) { # function arguments a and b + r = 0.0; # local r scoped to the function + for (int i = 0; i < 6; i += 1) { # local i scoped to the for-loop + num u = urand(); # local u scoped to the for-loop + r += u; # updates r from the enclosing scope + } + r /= 6; + return a + (b - a) * r; + } + num o = f(10, 20); # local to the top-level scope + $o = o; +' +GENMD_EOF Things which are completely unsurprising, resembling many other languages: @@ -216,15 +267,51 @@ Miller's `put`/`filter` DSL has four kinds of hashmaps. **Stream records** are ( For example, the following swaps the input stream's `a` and `i` fields, modifies `y`, and drops the rest: -GENMD_INCLUDE_AND_RUN_ESCAPED(data/map-literal-example-1.sh) +GENMD_RUN_COMMAND +mlr --opprint put ' + $* = { + "a": $i, + "i": $a, + "y": $y * 10, + } +' data/small +GENMD_EOF Likewise, you can assign map literals to out-of-stream variables or local variables; pass them as arguments to user-defined functions, return them from functions, and so on: -GENMD_INCLUDE_AND_RUN_ESCAPED(data/map-literal-example-2.sh) +GENMD_RUN_COMMAND +mlr --from data/small put ' + func f(map m): map { + m["x"] *= 200; + return m; + } + $* = f({"a": $a, "x": $x}); +' +GENMD_EOF Like out-of-stream and local variables, map literals can be multi-level: -GENMD_INCLUDE_AND_RUN_ESCAPED(data/map-literal-example-3.sh) +GENMD_RUN_COMMAND +mlr --from data/small put -q ' + begin { + @o = { + "nrec": 0, + "nkey": {"numeric":0, "non-numeric":0}, + }; + } + @o["nrec"] += 1; + for (k, v in $*) { + if (is_numeric(v)) { + @o["nkey"]["numeric"] += 1; + } else { + @o["nkey"]["non-numeric"] += 1; + } + } + end { + dump @o; + } +' +GENMD_EOF ## Type-checking diff --git a/docs6/docs/reference-dsl.md b/docs6/docs/reference-dsl.md index 494337fc5..799470171 100644 --- a/docs6/docs/reference-dsl.md +++ b/docs6/docs/reference-dsl.md @@ -59,7 +59,7 @@ the body of the loop. (You can, if you like, use the per-record statements to grow a list of records, then loop over them all in an `end` block. This is described in the page on -[operating over all records](operating-over-all-records.md)). +[operating on all records](operating-on-all-records.md)). To see this in action, let's take a look at the [data/short.csv](./data/short.csv) file: @@ -105,7 +105,7 @@ statement on each loop iteration. For almost all simple uses of the Miller programming language, this implicit looping over records is probably all you will need. (For more involved cases you -can see the pages on [operating over all records](operating-on-all-records.md), +can see the pages on [operating on all records](operating-on-all-records.md), [out-of-stream variables](reference-dsl-variables.md#out-of-stream-variables), and [two-pass algorithms](two-pass-algorithms.md).) diff --git a/docs6/docs/reference-dsl.md.in b/docs6/docs/reference-dsl.md.in index cb3b099a7..2c75b5202 100644 --- a/docs6/docs/reference-dsl.md.in +++ b/docs6/docs/reference-dsl.md.in @@ -48,7 +48,7 @@ the body of the loop. (You can, if you like, use the per-record statements to grow a list of records, then loop over them all in an `end` block. This is described in the page on -[operating over all records](operating-over-all-records.md)). +[operating on all records](operating-on-all-records.md)). To see this in action, let's take a look at the [data/short.csv](./data/short.csv) file: @@ -80,7 +80,7 @@ statement on each loop iteration. For almost all simple uses of the Miller programming language, this implicit looping over records is probably all you will need. (For more involved cases you -can see the pages on [operating over all records](operating-on-all-records.md), +can see the pages on [operating on all records](operating-on-all-records.md), [out-of-stream variables](reference-dsl-variables.md#out-of-stream-variables), and [two-pass algorithms](two-pass-algorithms.md).) diff --git a/docs6/docs/reference-main-compressed-data.md b/docs6/docs/reference-main-compressed-data.md new file mode 100644 index 000000000..da89fadc3 --- /dev/null +++ b/docs6/docs/reference-main-compressed-data.md @@ -0,0 +1,133 @@ + +# Compressed data + +As of [Miller 6](new-in-miller-6.md), Miller supports reading GZIP, BZIP2, and +ZLIB formats transparently, and in-process. And (as before Miller 6) you have a +more general `--prepipe` option to support other decompression programs. + +## Automatic detection on input + +If your files end in `.gz`, `.bz2`, or `.z` then Miller will autodetect by file extension: + +
+file gz-example.csv.gz
+
+
+gz-example.csv.gz: gzip compressed data, was "gz-example.csv", last modified: Mon Aug 23 02:04:34 2021, from Unix, original size modulo 2^32 429
+
+ +
+mlr --csv sort -f color gz-example.csv.gz
+
+
+color,shape,flag,k,index,quantity,rate
+purple,triangle,false,5,51,81.2290,8.5910
+purple,triangle,false,7,65,80.1405,5.8240
+purple,square,false,10,91,72.3735,8.2430
+red,square,true,2,15,79.2778,0.0130
+red,circle,true,3,16,13.8103,2.9010
+red,square,false,4,48,77.5542,7.4670
+red,square,false,6,64,77.1991,9.5310
+yellow,triangle,true,1,11,43.6498,9.8870
+yellow,circle,true,8,73,63.9785,4.2370
+yellow,circle,true,9,87,63.5058,8.3350
+
+ +This will decompress the input data on the fly, while leaving the disk file unmodified. This helps you save disk space, at the cost of some additional runtime CPU usage to decompress the data. + +## Manual detection on input + +If the filename doesn't in in `.gz`, `.bz2`, or `.z` then you can use the flags `--gzin`, `--bz2in`, or `--zin` to let Miller know: + +
+mlr --csv --gzin sort -f color myfile.bin # myfile.bin has gzip contents
+
+ +## External decompressors on input + +Using the `--prepipe` flag, you can provide the name of any decompression +program in your `$PATH` and Miller will run it on each input file, effectively +piping the standard output of that program to Miller's standard input. + +You can, of course, already do without this for single input files, for example: + +
+gunzip < gz-example.csv.gz | mlr --csv sort -f color
+
+
+color,shape,flag,k,index,quantity,rate
+purple,triangle,false,5,51,81.2290,8.5910
+purple,triangle,false,7,65,80.1405,5.8240
+purple,square,false,10,91,72.3735,8.2430
+red,square,true,2,15,79.2778,0.0130
+red,circle,true,3,16,13.8103,2.9010
+red,square,false,4,48,77.5542,7.4670
+red,square,false,6,64,77.1991,9.5310
+yellow,triangle,true,1,11,43.6498,9.8870
+yellow,circle,true,8,73,63.9785,4.2370
+yellow,circle,true,9,87,63.5058,8.3350
+
+ +The benefit of `--prepipe` is that Miller will run the specified program once per +file, respecting file boundaries. + +The prepipe command can be anything which reads from standard input and produces +data acceptable to Miller. Nominally this allows you to use whichever +decompression utilities you have installed on your system, on a per-file basis. + +If the command has flags, quote them: e.g. `mlr --prepipe 'zcat -cf'`. + +Note that this feature is quite general and is not limited to decompression +utilities. You can use it to apply per-file filters of your choice: e.g. `mlr +--prepipe head -n 10 ...`, if you like. + +There is a `--prepipe` and a `--prepipex`: + +* If the command normally runs with `nameofprogram < filename.ext` (such as `gunzip` or `zcat -cf` or `xz -cd`) then use `--prepipe`. +* If the command normally runs with `nameofprogram filename.ext` (such as `unzip -qc`) then use `--prepipex`. + +Lastly, note that if `--prepipe` or `--prepipex` is specified on the Miller +command line, it replaces any autodetect decisions that might have been made +based on the filename extension. Likewise, `--gzin`/`--bz2in`/`--zin` are ignored if +`--prepipe` or `--prepipex` is also specified. + +## Compressed output + +Everything said so far on this page has to do with compressed input. + +For compressed output: + +* Normally Miller output is to stdout, so you can pipe the output: `mlr sort -n quantity foo.csv | gzip > sorted.csv.gz`. + +* For [`tee` statements](reference-dsl-output-statements.md#tee-statements), which write output to files rather than stdout, use `tee`'s redirect syntax: + +
+mlr --from example.csv --csv put -q '
+  filename = $color.".csv.gz";
+  tee | "gzip > ".filename, $*
+'
+
+ +
+file red.csv.gz purple.csv.gz yellow.csv.gz
+
+
+red.csv.gz:    gzip compressed data, last modified: Mon Aug 23 02:34:05 2021, from Unix, original size modulo 2^32 185
+purple.csv.gz: gzip compressed data, last modified: Mon Aug 23 02:34:05 2021, from Unix, original size modulo 2^32 164
+yellow.csv.gz: gzip compressed data, last modified: Mon Aug 23 02:34:05 2021, from Unix, original size modulo 2^32 158
+
+ +
+mlr --csv cat yellow.csv.gz
+
+
+color,shape,flag,k,index,quantity,rate
+yellow,triangle,true,1,11,43.6498,9.8870
+yellow,circle,true,8,73,63.9785,4.2370
+yellow,circle,true,9,87,63.5058,8.3350
+
+ +* Using the [in-place flag](reference-main-io-options.md#in-place-mode) `-I`, +as of August 2021 the overwritten file will _not_ be compressed as it was when it was read: +e.g. `mlr -I --csv cat gz-example.csv.gz` will write `gz-example.csv.gz` which contains +a plain, uncompressed CSV contents. This is a bug and will be fixed. diff --git a/docs6/docs/reference-main-compressed-data.md.in b/docs6/docs/reference-main-compressed-data.md.in new file mode 100644 index 000000000..ea7d9279e --- /dev/null +++ b/docs6/docs/reference-main-compressed-data.md.in @@ -0,0 +1,96 @@ +# Compressed data + +As of [Miller 6](new-in-miller-6.md), Miller supports reading GZIP, BZIP2, and +ZLIB formats transparently, and in-process. And (as before Miller 6) you have a +more general `--prepipe` option to support other decompression programs. + +## Automatic detection on input + +If your files end in `.gz`, `.bz2`, or `.z` then Miller will autodetect by file extension: + +GENMD_CARDIFY_HIGHLIGHT_ONE +file gz-example.csv.gz +gz-example.csv.gz: gzip compressed data, was "gz-example.csv", last modified: Mon Aug 23 02:04:34 2021, from Unix, original size modulo 2^32 429 +GENMD_EOF + +GENMD_RUN_COMMAND +mlr --csv sort -f color gz-example.csv.gz +GENMD_EOF + +This will decompress the input data on the fly, while leaving the disk file unmodified. This helps you save disk space, at the cost of some additional runtime CPU usage to decompress the data. + +## Manual detection on input + +If the filename doesn't in in `.gz`, `.bz2`, or `.z` then you can use the flags `--gzin`, `--bz2in`, or `--zin` to let Miller know: + +GENMD_CARDIFY_HIGHLIGHT_ONE +mlr --csv --gzin sort -f color myfile.bin # myfile.bin has gzip contents +GENMD_EOF + +## External decompressors on input + +Using the `--prepipe` flag, you can provide the name of any decompression +program in your `$PATH` and Miller will run it on each input file, effectively +piping the standard output of that program to Miller's standard input. + +You can, of course, already do without this for single input files, for example: + +GENMD_RUN_COMMAND +gunzip < gz-example.csv.gz | mlr --csv sort -f color +GENMD_EOF + +The benefit of `--prepipe` is that Miller will run the specified program once per +file, respecting file boundaries. + +The prepipe command can be anything which reads from standard input and produces +data acceptable to Miller. Nominally this allows you to use whichever +decompression utilities you have installed on your system, on a per-file basis. + +If the command has flags, quote them: e.g. `mlr --prepipe 'zcat -cf'`. + +Note that this feature is quite general and is not limited to decompression +utilities. You can use it to apply per-file filters of your choice: e.g. `mlr +--prepipe head -n 10 ...`, if you like. + +There is a `--prepipe` and a `--prepipex`: + +* If the command normally runs with `nameofprogram < filename.ext` (such as `gunzip` or `zcat -cf` or `xz -cd`) then use `--prepipe`. +* If the command normally runs with `nameofprogram filename.ext` (such as `unzip -qc`) then use `--prepipex`. + +Lastly, note that if `--prepipe` or `--prepipex` is specified on the Miller +command line, it replaces any autodetect decisions that might have been made +based on the filename extension. Likewise, `--gzin`/`--bz2in`/`--zin` are ignored if +`--prepipe` or `--prepipex` is also specified. + +## Compressed output + +Everything said so far on this page has to do with compressed input. + +For compressed output: + +* Normally Miller output is to stdout, so you can pipe the output: `mlr sort -n quantity foo.csv | gzip > sorted.csv.gz`. + +* For [`tee` statements](reference-dsl-output-statements.md#tee-statements), which write output to files rather than stdout, use `tee`'s redirect syntax: + +GENMD_RUN_COMMAND +mlr --from example.csv --csv put -q ' + filename = $color.".csv.gz"; + tee | "gzip > ".filename, $* +' +GENMD_EOF + +GENMD_CARDIFY_HIGHLIGHT_ONE +file red.csv.gz purple.csv.gz yellow.csv.gz +red.csv.gz: gzip compressed data, last modified: Mon Aug 23 02:34:05 2021, from Unix, original size modulo 2^32 185 +purple.csv.gz: gzip compressed data, last modified: Mon Aug 23 02:34:05 2021, from Unix, original size modulo 2^32 164 +yellow.csv.gz: gzip compressed data, last modified: Mon Aug 23 02:34:05 2021, from Unix, original size modulo 2^32 158 +GENMD_EOF + +GENMD_RUN_COMMAND +mlr --csv cat yellow.csv.gz +GENMD_EOF + +* Using the [in-place flag](reference-main-io-options.md#in-place-mode) `-I`, +as of August 2021 the overwritten file will _not_ be compressed as it was when it was read: +e.g. `mlr -I --csv cat gz-example.csv.gz` will write `gz-example.csv.gz` which contains +a plain, uncompressed CSV contents. This is a bug and will be fixed. diff --git a/docs6/docs/reference-main-io-options.md b/docs6/docs/reference-main-io-options.md index 626f53b02..b6e4e1639 100644 --- a/docs6/docs/reference-main-io-options.md +++ b/docs6/docs/reference-main-io-options.md @@ -61,37 +61,7 @@ Please see [Choices for printing to files](10min.md#choices-for-printing-to-file ## Compression -Options: - -
---prepipe {command}
-
- - -The prepipe command is anything which reads from standard input and produces data acceptable to Miller. Nominally this allows you to use whichever decompression utilities you have installed on your system, on a per-file basis. If the command has flags, quote them: e.g. `mlr --prepipe 'zcat -cf'`. Examples: - -
-# These two produce the same output:
-$ gunzip < myfile1.csv.gz | mlr cut -f hostname,uptime
-$ mlr --prepipe gunzip cut -f hostname,uptime myfile1.csv.gz
-# With multiple input files you need --prepipe:
-$ mlr --prepipe gunzip cut -f hostname,uptime myfile1.csv.gz myfile2.csv.gz
-$ mlr --prepipe gunzip --idkvp --oxtab cut -f hostname,uptime myfile1.dat.gz myfile2.dat.gz
-
- -
-# Similar to the above, but with compressed output as well as input:
-$ gunzip < myfile1.csv.gz | mlr cut -f hostname,uptime | gzip > outfile.csv.gz
-$ mlr --prepipe gunzip cut -f hostname,uptime myfile1.csv.gz | gzip > outfile.csv.gz
-$ mlr --prepipe gunzip cut -f hostname,uptime myfile1.csv.gz myfile2.csv.gz | gzip > outfile.csv.gz
-
- -
-# Similar to the above, but with different compression tools for input and output:
-$ gunzip < myfile1.csv.gz | mlr cut -f hostname,uptime | xz -z > outfile.csv.xz
-$ xz -cd < myfile1.csv.xz | mlr cut -f hostname,uptime | gzip > outfile.csv.xz
-$ mlr --prepipe 'xz -cd' cut -f hostname,uptime myfile1.csv.xz myfile2.csv.xz | xz -z > outfile.csv.xz
-
+See the separate page on [Compressed data](reference-main-compressed-data.md). ## Record/field/pair separators diff --git a/docs6/docs/reference-main-io-options.md.in b/docs6/docs/reference-main-io-options.md.in index ac806f589..bddebcc11 100644 --- a/docs6/docs/reference-main-io-options.md.in +++ b/docs6/docs/reference-main-io-options.md.in @@ -44,37 +44,7 @@ Please see [Choices for printing to files](10min.md#choices-for-printing-to-file ## Compression -Options: - -GENMD_CARDIFY ---prepipe {command} -GENMD_EOF - - -The prepipe command is anything which reads from standard input and produces data acceptable to Miller. Nominally this allows you to use whichever decompression utilities you have installed on your system, on a per-file basis. If the command has flags, quote them: e.g. `mlr --prepipe 'zcat -cf'`. Examples: - -GENMD_CARDIFY -# These two produce the same output: -$ gunzip < myfile1.csv.gz | mlr cut -f hostname,uptime -$ mlr --prepipe gunzip cut -f hostname,uptime myfile1.csv.gz -# With multiple input files you need --prepipe: -$ mlr --prepipe gunzip cut -f hostname,uptime myfile1.csv.gz myfile2.csv.gz -$ mlr --prepipe gunzip --idkvp --oxtab cut -f hostname,uptime myfile1.dat.gz myfile2.dat.gz -GENMD_EOF - -GENMD_CARDIFY -# Similar to the above, but with compressed output as well as input: -$ gunzip < myfile1.csv.gz | mlr cut -f hostname,uptime | gzip > outfile.csv.gz -$ mlr --prepipe gunzip cut -f hostname,uptime myfile1.csv.gz | gzip > outfile.csv.gz -$ mlr --prepipe gunzip cut -f hostname,uptime myfile1.csv.gz myfile2.csv.gz | gzip > outfile.csv.gz -GENMD_EOF - -GENMD_CARDIFY -# Similar to the above, but with different compression tools for input and output: -$ gunzip < myfile1.csv.gz | mlr cut -f hostname,uptime | xz -z > outfile.csv.xz -$ xz -cd < myfile1.csv.xz | mlr cut -f hostname,uptime | gzip > outfile.csv.xz -$ mlr --prepipe 'xz -cd' cut -f hostname,uptime myfile1.csv.xz myfile2.csv.xz | xz -z > outfile.csv.xz -GENMD_EOF +See the separate page on [Compressed data](reference-main-compressed-data.md). ## Record/field/pair separators diff --git a/docs6/docs/reference-main-overview.md b/docs6/docs/reference-main-overview.md index 526f8177d..9bb85a75c 100644 --- a/docs6/docs/reference-main-overview.md +++ b/docs6/docs/reference-main-overview.md @@ -3,12 +3,12 @@ ## Overview -The outline of an invocation of Miller is +The outline of an invocation of Miller is: -* `mlr` +* The program name `mlr`. * Options controlling input/output formatting, etc. (See [I/O options](reference-main-io-options.md)). * One or more verbs -- such as `cut`, `sort`, etc. (see [Verbs Reference](reference-verbs.md)) -- chained together using [then](reference-main-then-chaining.md). You use these to transform your data. -* Zero or more filenames, with input taken from standard input if there are no filenames present. +* Zero or more filenames, with input taken from standard input if there are no filenames present. (You can place the filenames up front using `--from` or `--mfrom` as described on the [keystroke-savers page](keystroke-savers.md#file-names-up-front-including-from).) For example, reading from a file: @@ -21,6 +21,15 @@ red square true 2 15 79.2778 0.0130 yellow triangle true 1 11 43.6498 9.8870
+
+mlr --from example.csv --icsv --opprint head -n 2 then sort -f shape
+
+
+color  shape    flag k index quantity rate
+red    square   true 2 15    79.2778  0.0130
+yellow triangle true 1 11    43.6498  9.8870
+
+ Reading from standard input:
@@ -38,7 +47,7 @@ The rest of this reference section gives you full information on each of these p
 
 When you type `mlr {something} myfile.dat`, the `{something}` part is called a **verb**. It specifies how you want to transform your data. Most of the verbs are counterparts of built-in system tools like `cut` and `sort` -- but with file-format awareness, and giving you the ability to refer to fields by name.
 
-The verbs `put` and `filter` are special in that they have a rich expression language (domain-specific language, or "DSL"). More information about them can be found at [DSL reference](reference-dsl.md).
+The verbs `put` and `filter` are special in that they have a rich expression language (domain-specific language, or "DSL"). More information about them can be found at on the [Intro to Miller's programming language page](programming-language.md); see also [DSL reference](reference-dsl.md) for more details.
 
 Here's a comparison of verbs and `put`/`filter` DSL expressions:
 
diff --git a/docs6/docs/reference-main-overview.md.in b/docs6/docs/reference-main-overview.md.in
index 3ad62abfd..18285cc1e 100644
--- a/docs6/docs/reference-main-overview.md.in
+++ b/docs6/docs/reference-main-overview.md.in
@@ -2,12 +2,12 @@
 
 ## Overview
 
-The outline of an invocation of Miller is
+The outline of an invocation of Miller is:
 
-* `mlr`
+* The program name `mlr`.
 * Options controlling input/output formatting, etc. (See [I/O options](reference-main-io-options.md)).
 * One or more verbs -- such as `cut`, `sort`, etc. (see [Verbs Reference](reference-verbs.md)) -- chained together using [then](reference-main-then-chaining.md). You use these to transform your data.
-* Zero or more filenames, with input taken from standard input if there are no filenames present.
+* Zero or more filenames, with input taken from standard input if there are no filenames present. (You can place the filenames up front using `--from` or `--mfrom` as described on the [keystroke-savers page](keystroke-savers.md#file-names-up-front-including-from).)
 
 For example, reading from a file:
 
@@ -15,6 +15,10 @@ GENMD_RUN_COMMAND
 mlr --icsv --opprint head -n 2 then sort -f shape example.csv
 GENMD_EOF
 
+GENMD_RUN_COMMAND
+mlr --from example.csv --icsv --opprint head -n 2 then sort -f shape
+GENMD_EOF
+
 Reading from standard input:
 
 GENMD_RUN_COMMAND
@@ -27,7 +31,7 @@ The rest of this reference section gives you full information on each of these p
 
 When you type `mlr {something} myfile.dat`, the `{something}` part is called a **verb**. It specifies how you want to transform your data. Most of the verbs are counterparts of built-in system tools like `cut` and `sort` -- but with file-format awareness, and giving you the ability to refer to fields by name.
 
-The verbs `put` and `filter` are special in that they have a rich expression language (domain-specific language, or "DSL"). More information about them can be found at [DSL reference](reference-dsl.md).
+The verbs `put` and `filter` are special in that they have a rich expression language (domain-specific language, or "DSL"). More information about them can be found at on the [Intro to Miller's programming language page](programming-language.md); see also [DSL reference](reference-dsl.md) for more details.
 
 Here's a comparison of verbs and `put`/`filter` DSL expressions:
 
diff --git a/docs6/docs/repl.md b/docs6/docs/repl.md
index c265caff2..52dea4f05 100644
--- a/docs6/docs/repl.md
+++ b/docs6/docs/repl.md
@@ -179,6 +179,6 @@ etc. depending on your platform.
 
 Suggestion: `alias mrpl='rlwrap mlr repl'` in your shell's startup file.
 
-## On-line help
+## Online help
 
 After `mlr repl`, type `:help` to see more about your options. In particular, `:help examples`.
diff --git a/docs6/docs/repl.md.in b/docs6/docs/repl.md.in
index 9c601a6b0..e2312104b 100644
--- a/docs6/docs/repl.md.in
+++ b/docs6/docs/repl.md.in
@@ -146,6 +146,6 @@ etc. depending on your platform.
 
 Suggestion: `alias mrpl='rlwrap mlr repl'` in your shell's startup file.
 
-## On-line help
+## Online help
 
 After `mlr repl`, type `:help` to see more about your options. In particular, `:help examples`.
diff --git a/docs6/docs/shapes-of-data.md.in b/docs6/docs/shapes-of-data.md.in
index 409698469..9472d3d93 100644
--- a/docs6/docs/shapes-of-data.md.in
+++ b/docs6/docs/shapes-of-data.md.in
@@ -244,4 +244,15 @@ GENMD_INCLUDE_ESCAPED(data/rect.txt)
 
 The idea here is that middles starting with a 1 belong to the outer value of 1, and so on.  (For example, the outer values might be account IDs, the middle values might be invoice IDs, and the inner values might be invoice line-items.) If you want all the middle and inner lines to have the context of which outers they belong to, you can modify your software to pass all those through your methods. Alternatively, don't refactor your code just to handle some ad-hoc log-data formatting -- instead, use the following to rectangularize the data.  The idea is to use an out-of-stream variable to accumulate fields across records. Clear that variable when you see an outer ID; accumulate fields; emit output when you see the inner IDs.
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/rect.sh)
+GENMD_RUN_COMMAND
+mlr --from data/rect.txt put -q '
+  is_present($outer) {
+    unset @r
+  }
+  for (k, v in $*) {
+    @r[k] = v
+  }
+  is_present($inner1) {
+    emit @r
+  }'
+GENMD_EOF
diff --git a/docs6/docs/statistics-examples.md.in b/docs6/docs/statistics-examples.md.in
index 382f75775..7ab879c9f 100644
--- a/docs6/docs/statistics-examples.md.in
+++ b/docs6/docs/statistics-examples.md.in
@@ -4,14 +4,51 @@
 
 For one or more specified field names, simply compute p25 and p75, then write the IQR as the difference of p75 and p25:
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/iqr1.sh)
+GENMD_RUN_COMMAND
+mlr --oxtab stats1 -f x -a p25,p75 \
+    then put '$x_iqr = $x_p75 - $x_p25' \
+    data/medium 
+GENMD_EOF
 
 For wildcarded field names, first compute p25 and p75, then loop over field names with `p25` in them:
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/iqrn.sh)
+GENMD_RUN_COMMAND
+mlr --oxtab stats1 --fr '[i-z]' -a p25,p75 \
+    then put 'for (k,v in $*) {
+      if (k =~ "(.*)_p25") {
+        $["\1_iqr"] = $["\1_p75"] - $["\1_p25"]
+      }
+    }' \
+    data/medium 
+GENMD_EOF
 
 ## Computing weighted means
 
 This might be more elegantly implemented as an option within the `stats1` verb. Meanwhile, it's expressible within the DSL:
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/weighted-mean.sh)
+GENMD_RUN_COMMAND
+mlr --from data/medium put -q '
+  # Using the y field for weighting in this example
+  weight = $y;
+
+  # Using the a field for weighted aggregation in this example
+  @sumwx[$a] += weight * $i;
+  @sumw[$a] += weight;
+
+  @sumx[$a] += $i;
+  @sumn[$a] += 1;
+
+  end {
+    map wmean = {};
+    map mean  = {};
+    for (a in @sumwx) {
+      wmean[a] = @sumwx[a] / @sumw[a]
+    }
+    for (a in @sumx) {
+      mean[a] = @sumx[a] / @sumn[a]
+    }
+    #emit wmean, "a";
+    #emit mean, "a";
+    emit (wmean, mean), "a";
+  }'
+GENMD_EOF
diff --git a/docs6/docs/two-pass-algorithms.md.in b/docs6/docs/two-pass-algorithms.md.in
index a4112a788..90d2d729a 100644
--- a/docs6/docs/two-pass-algorithms.md.in
+++ b/docs6/docs/two-pass-algorithms.md.in
@@ -32,11 +32,25 @@ GENMD_EOF
 
 rather than the more tedious
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(oosvar-example-sum.sh)
+GENMD_RUN_COMMAND
+mlr --oxtab put -q '
+  @x_sum += $x;
+  end {
+    emit @x_sum
+  }
+' data/medium
+GENMD_EOF
 
 or
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(oosvar-example-sum-grouped.sh)
+GENMD_RUN_COMMAND
+mlr --opprint put -q '
+  @x_sum[$b] += $x;
+  end {
+    emit @x_sum, "b"
+  }
+' data/medium
+GENMD_EOF
 
 The former (`mlr stats1` et al.) has the advantages of being easier to type, being less error-prone to type, and running faster.
 
@@ -169,7 +183,16 @@ GENMD_RUN_COMMAND
 mlr --opprint stats1 -a mean -f x data/medium
 GENMD_EOF
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/mean-with-oosvars.sh)
+GENMD_RUN_COMMAND
+mlr --opprint put -q '
+  @x_sum += $x;
+  @x_count += 1;
+  end {
+    @x_mean = @x_sum / @x_count;
+    emit @x_mean
+  }
+' data/medium
+GENMD_EOF
 
 ## Keyed mean without/with oosvars
 
@@ -177,7 +200,18 @@ GENMD_RUN_COMMAND
 mlr --opprint stats1 -a mean -f x -g a,b data/medium
 GENMD_EOF
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/keyed-mean-with-oosvars.sh)
+GENMD_RUN_COMMAND
+mlr --opprint put -q '
+  @x_sum[$a][$b] += $x;
+  @x_count[$a][$b] += 1;
+  end{
+    for ((a, b), v in @x_sum) {
+      @x_mean[a][b] = @x_sum[a][b] / @x_count[a][b];
+    }
+    emit @x_mean, "a", "b"
+  }
+' data/medium
+GENMD_EOF
 
 ## Variance and standard deviation without/with oosvars
 
@@ -215,7 +249,15 @@ GENMD_RUN_COMMAND
 mlr --opprint stats1 -a min,max -f x -g a data/medium
 GENMD_EOF
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(data/keyed-min-max-with-oosvars.sh)
+GENMD_RUN_COMMAND
+mlr --opprint --from data/medium put -q '
+  @min[$a] = min(@min[$a], $x);
+  @max[$a] = max(@max[$a], $x);
+  end{
+    emit (@min, @max), "a";
+  }
+'
+GENMD_EOF
 
 ## Delta without/with oosvars
 
@@ -245,6 +287,14 @@ GENMD_EOF
 
 ## Exponentially weighted moving averages without/with oosvars
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(verb-example-ewma.sh)
+GENMD_RUN_COMMAND
+mlr --opprint step -a ewma -d 0.1 -f x data/small
+GENMD_EOF
 
-GENMD_INCLUDE_AND_RUN_ESCAPED(oosvar-example-ewma.sh)
+GENMD_RUN_COMMAND
+mlr --opprint put '
+  begin{ @a=0.1 };
+  $e = NR==1 ? $x : @a * $x + (1 - @a) * @e;
+  @e=$e
+' data/small
+GENMD_EOF
diff --git a/docs6/docs/verb-example-ewma.sh b/docs6/docs/verb-example-ewma.sh
deleted file mode 100644
index e990b52b4..000000000
--- a/docs6/docs/verb-example-ewma.sh
+++ /dev/null
@@ -1 +0,0 @@
-mlr --opprint step -a ewma -d 0.1 -f x data/small
diff --git a/docs6/docs/yellow.csv.gz b/docs6/docs/yellow.csv.gz
new file mode 100644
index 0000000000000000000000000000000000000000..d781f0c99f00b92bcb08d1db2e4910611b4193ac
GIT binary patch
literal 127
zcmV-_0D%7=iwFSWS|nir18vGd4#FT11<-xZ@y+3$05@urL+%5&#^8=cB}#eV_-?mHZX!>_WFz|_jERf
hC(NXRX9ue;^uGnLKbq5y!HUS~=?BE(YZ{&a007uCJyHMw

literal 0
HcmV?d00001

diff --git a/docs6/mkdocs.yml b/docs6/mkdocs.yml
index d187dff5f..4d9a68150 100644
--- a/docs6/mkdocs.yml
+++ b/docs6/mkdocs.yml
@@ -24,26 +24,27 @@ nav:
     - "Output colorization": "output-colorization.md"
     - "Customization: .mlrrc": "customization.md"
     - "The REPL": "repl.md"
+    - "Online help": "online-help.md"
     - "What's new in Miller 6": "new-in-miller-6.md"
     - "How to contribute": "contributing.md"
-  - 'FAQs and recipes':
+  - 'FAQs and examples':
     - "CSV, with and without headers": "csv-with-and-without-headers.md"
     - "Shapes of data": "shapes-of-data.md"
     - "Operating on all fields": "operating-on-all-fields.md"
     - "Operating on all records": "operating-on-all-records.md"
+    - "Questions about then-chaining": "questions-about-then-chaining.md"
+    - "Questions about joins": "questions-about-joins.md"
+    - "Date/time examples": "date-time-examples.md"
     - "Special symbols and formatting": "special-symbols-and-formatting.md"
-    - "Dates and times": "dates-and-times.md"
-    - "Then-chaining": "then-chaining.md"
-    - "Joins": "joins.md"
     - "Running shell commands": "shell-commands.md"
+    - "Data-cleaning examples": "data-cleaning-examples.md"
     - "Data-diving examples": "data-diving-examples.md"
     - "Log-processing examples": "log-processing-examples.md"
     - "SQL examples": "sql-examples.md"
-    - "Data-cleaning examples": "data-cleaning-examples.md"
+    - "DKVP I/O examples": "dkvp-examples.md"
     - "Statistics examples": "statistics-examples.md"
     - "Randomizing examples": "randomizing-examples.md"
     - "Two-pass algorithms": "two-pass-algorithms.md"
-    - "DKVP I/O examples": "dkvp-examples.md"
     - "Programming-language examples": "programming-examples.md"
     - "Miscellaneous examples": "misc-examples.md"
   - 'Background':
@@ -58,13 +59,13 @@ nav:
       - "I/O options": "reference-main-io-options.md"
       - "List of verbs": "reference-verbs.md"
       - "Data types": "reference-main-data-types.md"
+      - "Arithmetic": "reference-main-arithmetic.md"
+      - "Maps": "reference-dsl-maps.md"
       - "Arrays": "reference-dsl-arrays.md"
       - "Null data": "reference-main-null-data.md"
-      - "Arithmetic": "reference-main-arithmetic.md"
       - "Regular expressions": "reference-main-regular-expressions.md"
+      - "Compressed data": "reference-main-compressed-data.md"
       - "Miller environment variables": "reference-main-env-vars.md"
-      - "Online help": "reference-main-online-help.md"
-      - "Auxiliary commands": "reference-main-auxiliary-commands.md"
       - "Documents for previous releases": "release-docs.md"
     - 'DSL reference':
       - "DSL overview": "reference-dsl.md"
@@ -78,8 +79,10 @@ nav:
       - "DSL output statements": "reference-dsl-output-statements.md"
       - "DSL unset statements": "reference-dsl-unset-statements.md"
       - "DSL errors and transparency": "reference-dsl-errors.md"
+      - "Differences from other programming languages": "reference-dsl-differences.md"
       - "A note on the complexity of Miller's expression language": "reference-dsl-complexity.md"
     - 'Misc. reference':
+      - "Auxiliary commands": "reference-main-auxiliary-commands.md"
       - "Manual page": "manpage.md"
       - "Installation": "installation.md"
       - "Building from source": "build.md"
diff --git a/go/src/auxents/help/entry.go b/go/src/auxents/help/entry.go
index 955273456..e2c55dcbf 100644
--- a/go/src/auxents/help/entry.go
+++ b/go/src/auxents/help/entry.go
@@ -1,5 +1,5 @@
 // ================================================================
-// On-line help
+// Online help
 // ================================================================
 
 package help
diff --git a/go/src/auxents/repl/prompt.go b/go/src/auxents/repl/prompt.go
index d9677c5b5..31529296f 100644
--- a/go/src/auxents/repl/prompt.go
+++ b/go/src/auxents/repl/prompt.go
@@ -52,7 +52,7 @@ func (repl *Repl) printStartupBanner() {
 	if repl.inputIsTerminal {
 		fmt.Printf("Miller %s REPL for %s:%s:%s\n", version.STRING, runtime.GOOS, runtime.GOARCH, runtime.Version())
 		fmt.Printf("Pre-release docs for Miller 6: %s\n", lib.DOC_URL)
-		fmt.Printf("Type ':h' or ':help' for on-line help; ':q' or ':quit' to quit.\n")
+		fmt.Printf("Type ':h' or ':help' for online help; ':q' or ':quit' to quit.\n")
 	}
 }
 
diff --git a/go/src/auxents/repl/session.go b/go/src/auxents/repl/session.go
index 10d350586..b823388c3 100644
--- a/go/src/auxents/repl/session.go
+++ b/go/src/auxents/repl/session.go
@@ -154,6 +154,9 @@ func (repl *Repl) handleSession(istream *os.File) {
 
 		line, err := lineReader.ReadString('\n')
 		if err == io.EOF {
+			if repl.inputIsTerminal {
+				fmt.Println()
+			}
 			break
 		}
 
diff --git a/go/src/auxents/repl/verbs.go b/go/src/auxents/repl/verbs.go
index a80fdc46b..e79e0a7c9 100644
--- a/go/src/auxents/repl/verbs.go
+++ b/go/src/auxents/repl/verbs.go
@@ -770,7 +770,7 @@ func usageQuit(repl *Repl) {
 }
 
 // The :quit command is handled outside this file; we have a help function,
-// though, to expose it for on-line help.
+// though, to expose it for online help.
 
 // ----------------------------------------------------------------
 func usageHelp(repl *Repl) {
@@ -957,7 +957,7 @@ delight. You may need 'brew install rlwrap', 'sudo apt-get install rlwrap',
 etc. depending on your platform.`)
 	fmt.Println()
 
-	fmt.Println(colorizer.MaybeColorizeHelp("On-line help:", true))
+	fmt.Println(colorizer.MaybeColorizeHelp("Online help:", true))
 	fmt.Println("Type ':h' or ':help' to see more about your options. In particular, ':help examples'.")
 }
 
diff --git a/go/src/cli/mlrcli.go b/go/src/cli/mlrcli.go
index 146ab8d98..8ef69a436 100644
--- a/go/src/cli/mlrcli.go
+++ b/go/src/cli/mlrcli.go
@@ -49,7 +49,7 @@ package cli
 //	return singleton_pdesc_to_chars_map;
 //}
 
-//// For displaying the default separators in on-line help
+//// For displaying the default separators in online help
 //static char* rebackslash(char* sep) {
 //	if sep == "\r"))
 //		return "\\r";
diff --git a/go/src/lib/paragraph.go b/go/src/lib/paragraph.go
index 122526cd6..289f59486 100644
--- a/go/src/lib/paragraph.go
+++ b/go/src/lib/paragraph.go
@@ -5,7 +5,7 @@ import (
 	"os"
 )
 
-// For on-line help contexts like printing all the built-in DSL functions, or
+// For online help contexts like printing all the built-in DSL functions, or
 // the list of all verbs.
 func PrintWordsAsParagraph(words []string, o *os.File) {
 	separator := " "
diff --git a/go/todo.txt b/go/todo.txt
index 7bcc03da6..b08caf0a5 100644
--- a/go/todo.txt
+++ b/go/todo.txt
@@ -17,7 +17,10 @@ TOP OF LIST:
 * GOMAXPROCS env-override, else default 8 with comment
 
 ----------------------------------------------------------------
-indexing bug:
+* mlr repl (w/o mrpl) doesn't print final newline on EOF
+* also: feature/shorthand for repl newline before prompt
+
+* slice-indexing bug:
 
   μεταμόρφωσις: x=[1,2,3,4,5]
   μεταμόρφωσις: x[:2]
@@ -31,6 +34,9 @@ implicit r:
 
 ----------------------------------------------------------------
 docs:
+
+* src/man/doc hygiene @ build script
+
 w discussion re docs6 ...
 * single cheatsheet page -- put out RFH?
 
diff --git a/man6/manpage.txt b/man6/manpage.txt
index 2ef1fea52..b15e1854d 100644
--- a/man6/manpage.txt
+++ b/man6/manpage.txt
@@ -219,7 +219,7 @@ OPTIONS
                      --jsonx --ojsonx  Keystroke-savers for --json --jvstack
                      --jsonx --ojsonx  and --ojson --jvstack, respectively.
                            --jlistwrap Wrap JSON output in outermost [ ].
-                   --oflatsep {string} Separator for flattening multi-level JSON keys,
+                   --flatsep {string} Separator for flattening multi-level JSON keys,
                                        e.g. '{"a":{"b":3}}' becomes a:b => 3 for
                                        non-JSON formats. Defaults to ..\n",
 
@@ -700,7 +700,7 @@ VERBS
        and value '{"b": { "c": 4 }}' becomes name 'a.b.c' and value 4.
        Options:
        -f Comma-separated list of field names to flatten (default all).
-       -s Separator, defaulting to mlr --oflatsep value.
+       -s Separator, defaulting to mlr --flatsep value.
        -h|--help Show this message.
 
    format-values
@@ -793,7 +793,8 @@ VERBS
 
    group-like
        Usage: mlr group-like [options]
-       Outputs records in batches having identical field names.Options:
+       Outputs records in batches having identical field names.
+       Options:
        -h|--help Show this message.
 
    having-fields
@@ -1465,7 +1466,7 @@ VERBS
        becomes name 'a' and value '{"b": { "c": 4 }}'.
        Options:
        -f {a,b,c} Comma-separated list of field names to unflatten (default all).
-       -s {string} Separator, defaulting to mlr --oflatsep value.
+       -s {string} Separator, defaulting to mlr --flatsep value.
        -h|--help Show this message.
 
    uniq
@@ -2520,4 +2521,4 @@ SEE ALSO
 
 
 
-                                  2021-07-08                         MILLER(1)
+                                  2021-08-23                         MILLER(1)
diff --git a/man6/mlr6.1 b/man6/mlr6.1
index f4be6c767..560536201 100644
--- a/man6/mlr6.1
+++ b/man6/mlr6.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2021-07-08
+.\"      Date: 2021-08-23
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2021-07-08" "\ \&" "\ \&"
+.TH "MILLER" "1" "2021-08-23" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -274,7 +274,7 @@ output separator to the given value.
               --jsonx --ojsonx  Keystroke-savers for --json --jvstack
               --jsonx --ojsonx  and --ojson --jvstack, respectively.
                     --jlistwrap Wrap JSON output in outermost [ ].
-            --oflatsep {string} Separator for flattening multi-level JSON keys,
+            --flatsep {string} Separator for flattening multi-level JSON keys,
                                 e.g. '{"a":{"b":3}}' becomes a:b => 3 for
                                 non-JSON formats. Defaults to ..\en",
 
@@ -919,7 +919,7 @@ Flattens multi-level maps to single-level ones. Example: field with name 'a'
 and value '{"b": { "c": 4 }}' becomes name 'a.b.c' and value 4.
 Options:
 -f Comma-separated list of field names to flatten (default all).
--s Separator, defaulting to mlr --oflatsep value.
+-s Separator, defaulting to mlr --flatsep value.
 -h|--help Show this message.
 .fi
 .if n \{\
@@ -1048,7 +1048,8 @@ Outputs records in batches having identical values at specified field names.Opti
 .\}
 .nf
 Usage: mlr group-like [options]
-Outputs records in batches having identical field names.Options:
+Outputs records in batches having identical field names.
+Options:
 -h|--help Show this message.
 .fi
 .if n \{\
@@ -1936,7 +1937,7 @@ Reverses flatten. Example: field with name 'a.b.c' and value 4
 becomes name 'a' and value '{"b": { "c": 4 }}'.
 Options:
 -f {a,b,c} Comma-separated list of field names to unflatten (default all).
--s {string} Separator, defaulting to mlr --oflatsep value.
+-s {string} Separator, defaulting to mlr --flatsep value.
 -h|--help Show this message.
 .fi
 .if n \{\