From 0424320199d43182dd8810e17fe6dd5a7ced9453 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 18 Feb 2024 13:54:42 -0500 Subject: [PATCH] make dev artifacts for sparsify --- docs/src/data-diving-examples.md | 46 ++++++++++++++++---------------- docs/src/manpage.md | 27 ++++++++++++------- docs/src/manpage.txt | 27 ++++++++++++------- docs/src/two-pass-algorithms.md | 4 +-- man/manpage.txt | 27 ++++++++++++------- man/mlr.1 | 27 +++++++++++++++---- 6 files changed, 98 insertions(+), 60 deletions(-) diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 39738f193..100716ec2 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -160,11 +160,11 @@ CITRUS COUNTY 1332.9 79974.9 483785.1 stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
-tiv_2011_tiv_2012_corr  0.9730497632351692
-tiv_2011_tiv_2012_ols_m 0.9835583980337723
-tiv_2011_tiv_2012_ols_b 433854.6428968317
+tiv_2011_tiv_2012_corr  0.9730497632351701
+tiv_2011_tiv_2012_ols_m 0.9835583980337732
+tiv_2011_tiv_2012_ols_b 433854.6428968301
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320189
+tiv_2011_tiv_2012_r2    0.9468258417320204
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199866178
+0.1334180491027861 -0.011319841199852926
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887236   -0.01856553658708754
-orange   square   0.17685855992752927   -0.07104431573806054
- green   circle   0.05764419437577255    0.01179572988801509
-   red   square   0.05574477124893523 -0.0006801456507510942
-yellow triangle   0.04457273771962798   0.024604310103081825
-yellow   square   0.04379172927296089   -0.04462197201631237
-purple   circle   0.03587354936895086     0.1341133954140899
-  blue   square   0.03241153095761164  -0.053507648119643196
-  blue triangle  0.015356427073158766 -0.0006089997461435399
-orange   circle  0.010518953877704048   -0.16279397329279383
-   red triangle   0.00809782571528034   0.012486621357942596
-purple triangle  0.005155190909099334  -0.045057909256220656
-purple   square -0.025680276963377404    0.05769429647930396
- green   square   -0.0257760734502851  -0.003265173252087127
-orange triangle -0.030456661186085785    -0.1318699981926352
-yellow   circle  -0.06477331572781474    0.07369449819706045
-  blue   circle  -0.10234761901929677  -0.030528539069837757
- green triangle  -0.10901825107358765   -0.04848782060162929
+   red   circle    0.9807984401887242  -0.018565536587084836
+orange   square   0.17685855992752933   -0.07104431573805543
+ green   circle   0.05764419437577257   0.011795729888018455
+   red   square    0.0557447712489348 -0.0006801456507506415
+yellow triangle    0.0445727377196281   0.024604310103079844
+yellow   square    0.0437917292729612  -0.044621972016306265
+purple   circle   0.03587354936895115    0.13411339541407613
+  blue   square   0.03241153095761152   -0.05350764811965621
+  blue triangle  0.015356427073158612 -0.0006089997461408209
+orange   circle  0.010518953877704181    -0.1627939732927932
+   red triangle   0.00809782571528054    0.01248662135795501
+purple triangle  0.005155190909099739   -0.04505790925621933
+purple   square  -0.02568027696337717   0.057694296479293694
+ green   square -0.025776073450284875 -0.0032651732520739014
+orange triangle -0.030456661186085584   -0.13186999819263814
+yellow   circle  -0.06477331572781515     0.0736944981970553
+  blue   circle   -0.1023476190192966  -0.030528539069839333
+ green triangle  -0.10901825107358747   -0.04848782060162855
 
diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 442f006dc..2d7935bd6 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -19,9 +19,7 @@ Quick links: This is simply a copy of what you should see on running `man mlr` at a command prompt, once Miller is installed on your system.
-MILLER(1)                                                            MILLER(1)
-
-
+4mMILLER24m(1)                                                            4mMILLER24m(1)
 
 1mNAME0m
        Miller -- like awk, sed, cut, join, and sort for name-indexed data such
@@ -199,9 +197,9 @@ MILLER(1)                                                            MILLER(1)
        json-parse json-stringify join label latin1-to-utf8 least-frequent
        merge-fields most-frequent nest nothing put regularize remove-empty-columns
        rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
-       skip-trivial-records sort sort-within-records split ssub stats1 stats2 step
-       sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace
-       unsparsify
+       skip-trivial-records sort sort-within-records sparsify split ssub stats1
+       stats2 step sub summary tac tail tee template top utf8-to-latin1 unflatten
+       uniq unspace unsparsify
 
 1mFUNCTION LIST0m
        abs acos acosh antimode any append apply arrayify asin asinh asserting_absent
@@ -810,7 +808,7 @@ MILLER(1)                                                            MILLER(1)
                markdown " "    N/A    "\n"
                nidx     " "    N/A    "\n"
                pprint   " "    N/A    "\n"
-               tsv      "  "    N/A    "\n"
+               tsv      "     "    N/A    "\n"
                xtab     "\n"   " "    "\n\n"
 
        --fs {string}            Specify FS for input and output.
@@ -1840,6 +1838,17 @@ MILLER(1)                                                            MILLER(1)
        -r        Recursively sort subobjects/submaps, e.g. for JSON input.
        -h|--help Show this message.
 
+   1msparsify0m
+       Usage: mlr sparsify [options]
+       Unsets fields for which the key is the empty string (or, optionally, another
+       specified value). Only makes sense with output format not being CSV or TSV.
+       Options:
+       -s {filler string} What values to remove. Defaults to the empty string.
+       -f {a,b,c} Specify field names to be operated on; any other fields won't be
+                  modified. The default is to modify all fields.
+       -h|--help  Show this message.
+       Example: if input is a=1,b=,c=3 then output is a=1,c=3.
+
    1msplit0m
        Usage: mlr split [options] {filename}
        Options:
@@ -3690,7 +3699,5 @@ MILLER(1)                                                            MILLER(1)
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-
-
-                                  2024-01-23                         MILLER(1)
+                                  2024-02-18                         4mMILLER24m(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index bc525f8de..151b0fc33 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -1,6 +1,4 @@ -MILLER(1) MILLER(1) - - +4mMILLER24m(1) 4mMILLER24m(1) 1mNAME0m Miller -- like awk, sed, cut, join, and sort for name-indexed data such @@ -178,9 +176,9 @@ MILLER(1) MILLER(1) json-parse json-stringify join label latin1-to-utf8 least-frequent merge-fields most-frequent nest nothing put regularize remove-empty-columns rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle - skip-trivial-records sort sort-within-records split ssub stats1 stats2 step - sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace - unsparsify + skip-trivial-records sort sort-within-records sparsify split ssub stats1 + stats2 step sub summary tac tail tee template top utf8-to-latin1 unflatten + uniq unspace unsparsify 1mFUNCTION LIST0m abs acos acosh antimode any append apply arrayify asin asinh asserting_absent @@ -789,7 +787,7 @@ MILLER(1) MILLER(1) markdown " " N/A "\n" nidx " " N/A "\n" pprint " " N/A "\n" - tsv " " N/A "\n" + tsv " " N/A "\n" xtab "\n" " " "\n\n" --fs {string} Specify FS for input and output. @@ -1819,6 +1817,17 @@ MILLER(1) MILLER(1) -r Recursively sort subobjects/submaps, e.g. for JSON input. -h|--help Show this message. + 1msparsify0m + Usage: mlr sparsify [options] + Unsets fields for which the key is the empty string (or, optionally, another + specified value). Only makes sense with output format not being CSV or TSV. + Options: + -s {filler string} What values to remove. Defaults to the empty string. + -f {a,b,c} Specify field names to be operated on; any other fields won't be + modified. The default is to modify all fields. + -h|--help Show this message. + Example: if input is a=1,b=,c=3 then output is a=1,c=3. + 1msplit0m Usage: mlr split [options] {filename} Options: @@ -3669,6 +3678,4 @@ MILLER(1) MILLER(1) MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - - - 2024-01-23 MILLER(1) + 2024-02-18 4mMILLER24m(1) diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index 146f3a81e..e475aebf3 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144456 -x_stddev 0.2902925151144007 +x_var 0.08426974433144457 +x_stddev 0.29029251511440074
diff --git a/man/manpage.txt b/man/manpage.txt
index bc525f8de..151b0fc33 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -1,6 +1,4 @@
-MILLER(1)                                                            MILLER(1)
-
-
+4mMILLER24m(1)                                                            4mMILLER24m(1)
 
 1mNAME0m
        Miller -- like awk, sed, cut, join, and sort for name-indexed data such
@@ -178,9 +176,9 @@ MILLER(1)                                                            MILLER(1)
        json-parse json-stringify join label latin1-to-utf8 least-frequent
        merge-fields most-frequent nest nothing put regularize remove-empty-columns
        rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
-       skip-trivial-records sort sort-within-records split ssub stats1 stats2 step
-       sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace
-       unsparsify
+       skip-trivial-records sort sort-within-records sparsify split ssub stats1
+       stats2 step sub summary tac tail tee template top utf8-to-latin1 unflatten
+       uniq unspace unsparsify
 
 1mFUNCTION LIST0m
        abs acos acosh antimode any append apply arrayify asin asinh asserting_absent
@@ -789,7 +787,7 @@ MILLER(1)                                                            MILLER(1)
                markdown " "    N/A    "\n"
                nidx     " "    N/A    "\n"
                pprint   " "    N/A    "\n"
-               tsv      "  "    N/A    "\n"
+               tsv      "     "    N/A    "\n"
                xtab     "\n"   " "    "\n\n"
 
        --fs {string}            Specify FS for input and output.
@@ -1819,6 +1817,17 @@ MILLER(1)                                                            MILLER(1)
        -r        Recursively sort subobjects/submaps, e.g. for JSON input.
        -h|--help Show this message.
 
+   1msparsify0m
+       Usage: mlr sparsify [options]
+       Unsets fields for which the key is the empty string (or, optionally, another
+       specified value). Only makes sense with output format not being CSV or TSV.
+       Options:
+       -s {filler string} What values to remove. Defaults to the empty string.
+       -f {a,b,c} Specify field names to be operated on; any other fields won't be
+                  modified. The default is to modify all fields.
+       -h|--help  Show this message.
+       Example: if input is a=1,b=,c=3 then output is a=1,c=3.
+
    1msplit0m
        Usage: mlr split [options] {filename}
        Options:
@@ -3669,6 +3678,4 @@ MILLER(1)                                                            MILLER(1)
        MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite
        https://miller.readthedocs.io
 
-
-
-                                  2024-01-23                         MILLER(1)
+                                  2024-02-18                         4mMILLER24m(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 9a5cb0487..3d5c75b4e 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2024-01-23
+.\"      Date: 2024-02-18
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2024-01-23" "\ \&" "\ \&"
+.TH "MILLER" "1" "2024-02-18" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -219,9 +219,9 @@ fraction gap grep group-by group-like gsub having-fields head histogram
 json-parse json-stringify join label latin1-to-utf8 least-frequent
 merge-fields most-frequent nest nothing put regularize remove-empty-columns
 rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
-skip-trivial-records sort sort-within-records split ssub stats1 stats2 step
-sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace
-unsparsify
+skip-trivial-records sort sort-within-records sparsify split ssub stats1
+stats2 step sub summary tac tail tee template top utf8-to-latin1 unflatten
+uniq unspace unsparsify
 .fi
 .if n \{\
 .RE
@@ -2298,6 +2298,23 @@ Options:
 .fi
 .if n \{\
 .RE
+.SS "sparsify"
+.if n \{\
+.RS 0
+.\}
+.nf
+Usage: mlr sparsify [options]
+Unsets fields for which the key is the empty string (or, optionally, another
+specified value). Only makes sense with output format not being CSV or TSV.
+Options:
+-s {filler string} What values to remove. Defaults to the empty string.
+-f {a,b,c} Specify field names to be operated on; any other fields won't be
+           modified. The default is to modify all fields.
+-h|--help  Show this message.
+Example: if input is a=1,b=,c=3 then output is a=1,c=3.
+.fi
+.if n \{\
+.RE
 .SS "split"
 .if n \{\
 .RS 0