mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
mlr --help split up (#582)
* Remove leading "$ " from code examples, now that highlighting is in place * avoid package dependency cycle between auxents and cli * transforming/transformers package merge * pivotable lib.DOC_URL * unexpose auxent usage funcs * major refactor
This commit is contained in:
parent
cd42669a07
commit
4fce7a8079
119 changed files with 6300 additions and 5900 deletions
|
|
@ -25,4 +25,4 @@ of this software, even if advised of the possibility of such damage.
|
|||
|
||||
I am providing code in this repository to you under an open-source license.
|
||||
Because this is my personal repository, the license you receive to my code is
|
||||
from me and not from my employer (Facebook).
|
||||
from me and not from my employer.
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ Suppose you have this CSV data file (`example.csv <./example.csv>`_):
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat example.csv
|
||||
cat example.csv
|
||||
color,shape,flag,index,quantity,rate
|
||||
yellow,triangle,true,11,43.6498,9.8870
|
||||
red,square,true,15,79.2778,0.0130
|
||||
|
|
@ -30,7 +30,7 @@ Suppose you have this CSV data file (`example.csv <./example.csv>`_):
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv cat example.csv
|
||||
mlr --csv cat example.csv
|
||||
color,shape,flag,index,quantity,rate
|
||||
yellow,triangle,true,11,43.6498,9.8870
|
||||
red,square,true,15,79.2778,0.0130
|
||||
|
|
@ -48,7 +48,7 @@ but it can also do format conversion (here, you can pretty-print in tabular form
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint cat example.csv
|
||||
mlr --icsv --opprint cat example.csv
|
||||
color shape flag index quantity rate
|
||||
yellow triangle true 11 43.6498 9.8870
|
||||
red square true 15 79.2778 0.0130
|
||||
|
|
@ -66,7 +66,7 @@ but it can also do format conversion (here, you can pretty-print in tabular form
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv head -n 4 example.csv
|
||||
mlr --csv head -n 4 example.csv
|
||||
color,shape,flag,index,quantity,rate
|
||||
yellow,triangle,true,11,43.6498,9.8870
|
||||
red,square,true,15,79.2778,0.0130
|
||||
|
|
@ -76,7 +76,7 @@ but it can also do format conversion (here, you can pretty-print in tabular form
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv tail -n 4 example.csv
|
||||
mlr --csv tail -n 4 example.csv
|
||||
color,shape,flag,index,quantity,rate
|
||||
purple,triangle,false,65,80.1405,5.8240
|
||||
yellow,circle,true,73,63.9785,4.2370
|
||||
|
|
@ -88,7 +88,7 @@ You can sort primarily alphabetically on one field, then secondarily numerically
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint sort -f shape -nr index example.csv
|
||||
mlr --icsv --opprint sort -f shape -nr index example.csv
|
||||
color shape flag index quantity rate
|
||||
yellow circle true 87 63.5058 8.3350
|
||||
yellow circle true 73 63.9785 4.2370
|
||||
|
|
@ -106,7 +106,7 @@ You can use ``cut`` to retain only specified fields, in the same order they appe
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint cut -f flag,shape example.csv
|
||||
mlr --icsv --opprint cut -f flag,shape example.csv
|
||||
shape flag
|
||||
triangle true
|
||||
square true
|
||||
|
|
@ -124,7 +124,7 @@ You can also use ``cut -o`` to retain only specified fields in your preferred or
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint cut -o -f flag,shape example.csv
|
||||
mlr --icsv --opprint cut -o -f flag,shape example.csv
|
||||
flag shape
|
||||
true triangle
|
||||
true square
|
||||
|
|
@ -142,7 +142,7 @@ You can use ``cut -x`` to omit fields you don't care about:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint cut -x -f flag,shape example.csv
|
||||
mlr --icsv --opprint cut -x -f flag,shape example.csv
|
||||
color index quantity rate
|
||||
yellow 11 43.6498 9.8870
|
||||
red 15 79.2778 0.0130
|
||||
|
|
@ -160,7 +160,7 @@ You can use ``filter`` to keep only records you care about:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint filter '$color == "red"' example.csv
|
||||
mlr --icsv --opprint filter '$color == "red"' example.csv
|
||||
color shape flag index quantity rate
|
||||
red square true 15 79.2778 0.0130
|
||||
red circle true 16 13.8103 2.9010
|
||||
|
|
@ -170,14 +170,14 @@ You can use ``filter`` to keep only records you care about:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint filter '$color == "red" && $flag == 1' example.csv
|
||||
mlr --icsv --opprint filter '$color == "red" && $flag == 1' example.csv
|
||||
|
||||
You can use ``put`` to create new fields which are computed from other fields:
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint put '$ratio = $quantity / $rate; $color_shape = $color . "_" . $shape' example.csv
|
||||
mlr --icsv --opprint put '$ratio = $quantity / $rate; $color_shape = $color . "_" . $shape' example.csv
|
||||
color shape flag index quantity rate ratio color_shape
|
||||
yellow triangle true 11 43.6498 9.8870 4.414868 yellow_triangle
|
||||
red square true 15 79.2778 0.0130 6098.292308 red_square
|
||||
|
|
@ -195,7 +195,7 @@ Even though Miller's main selling point is name-indexing, sometimes you really w
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint put '$[[3]] = "NEW"' example.csv
|
||||
mlr --icsv --opprint put '$[[3]] = "NEW"' example.csv
|
||||
color shape NEW index quantity rate
|
||||
yellow triangle true 11 43.6498 9.8870
|
||||
red square true 15 79.2778 0.0130
|
||||
|
|
@ -211,7 +211,7 @@ Even though Miller's main selling point is name-indexing, sometimes you really w
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint put '$[[[3]]] = "NEW"' example.csv
|
||||
mlr --icsv --opprint put '$[[[3]]] = "NEW"' example.csv
|
||||
color shape flag index quantity rate
|
||||
yellow triangle NEW 11 43.6498 9.8870
|
||||
red square NEW 15 79.2778 0.0130
|
||||
|
|
@ -232,7 +232,7 @@ OK, CSV and pretty-print are fine. But Miller can also convert between a few oth
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --ojson put '$ratio = $quantity/$rate; $shape = toupper($shape)' example.csv
|
||||
mlr --icsv --ojson put '$ratio = $quantity/$rate; $shape = toupper($shape)' example.csv
|
||||
{ "color": "yellow", "shape": "TRIANGLE", "flag": true, "index": 11, "quantity": 43.6498, "rate": 9.8870, "ratio": 4.414868 }
|
||||
{ "color": "red", "shape": "SQUARE", "flag": true, "index": 15, "quantity": 79.2778, "rate": 0.0130, "ratio": 6098.292308 }
|
||||
{ "color": "red", "shape": "CIRCLE", "flag": true, "index": 16, "quantity": 13.8103, "rate": 2.9010, "ratio": 4.760531 }
|
||||
|
|
@ -249,7 +249,7 @@ Or, JSON output with vertical-formatting flags:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --ojsonx tail -n 2 example.csv
|
||||
mlr --icsv --ojsonx tail -n 2 example.csv
|
||||
{
|
||||
"color": "yellow",
|
||||
"shape": "circle",
|
||||
|
|
@ -277,7 +277,7 @@ Here are the records with the top three ``index`` values:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint sort -f shape -nr index then head -n 3 example.csv
|
||||
mlr --icsv --opprint sort -f shape -nr index then head -n 3 example.csv
|
||||
color shape flag index quantity rate
|
||||
yellow circle true 87 63.5058 8.3350
|
||||
yellow circle true 73 63.9785 4.2370
|
||||
|
|
@ -288,7 +288,7 @@ Lots of Miller commands take a ``-g`` option for group-by: here, ``head -n 1 -g
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint sort -f shape -nr index then head -n 1 -g shape example.csv
|
||||
mlr --icsv --opprint sort -f shape -nr index then head -n 1 -g shape example.csv
|
||||
color shape flag index quantity rate
|
||||
yellow circle true 87 63.5058 8.3350
|
||||
purple square false 91 72.3735 8.2430
|
||||
|
|
@ -299,7 +299,7 @@ Statistics can be computed with or without group-by field(s):
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint --from example.csv stats1 -a count,min,mean,max -f quantity -g shape
|
||||
mlr --icsv --opprint --from example.csv stats1 -a count,min,mean,max -f quantity -g shape
|
||||
shape quantity_count quantity_min quantity_mean quantity_max
|
||||
triangle 3 43.649800 68.339767 81.229000
|
||||
square 4 72.373500 76.601150 79.277800
|
||||
|
|
@ -308,7 +308,7 @@ Statistics can be computed with or without group-by field(s):
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint --from example.csv stats1 -a count,min,mean,max -f quantity -g shape,color
|
||||
mlr --icsv --opprint --from example.csv stats1 -a count,min,mean,max -f quantity -g shape,color
|
||||
shape color quantity_count quantity_min quantity_mean quantity_max
|
||||
triangle yellow 1 43.649800 43.649800 43.649800
|
||||
square red 3 77.199100 78.010367 79.277800
|
||||
|
|
@ -322,7 +322,7 @@ If your output has a lot of columns, you can use XTAB format to line things up v
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --oxtab --from example.csv stats1 -a p0,p10,p25,p50,p75,p90,p99,p100 -f rate
|
||||
mlr --icsv --oxtab --from example.csv stats1 -a p0,p10,p25,p50,p75,p90,p99,p100 -f rate
|
||||
rate_p0 0.013000
|
||||
rate_p10 2.901000
|
||||
rate_p25 4.237000
|
||||
|
|
@ -422,12 +422,12 @@ Lastly, using ``tee`` within ``put``, you can split your input data into separat
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv --from example.csv put -q 'tee > $shape.".csv", $*'
|
||||
mlr --csv --from example.csv put -q 'tee > $shape.".csv", $*'
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat circle.csv
|
||||
cat circle.csv
|
||||
color,shape,flag,index,quantity,rate
|
||||
red,circle,true,16,13.8103,2.9010
|
||||
yellow,circle,true,73,63.9785,4.2370
|
||||
|
|
@ -436,7 +436,7 @@ Lastly, using ``tee`` within ``put``, you can split your input data into separat
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat square.csv
|
||||
cat square.csv
|
||||
color,shape,flag,index,quantity,rate
|
||||
red,square,true,15,79.2778,0.0130
|
||||
red,square,false,48,77.5542,7.4670
|
||||
|
|
@ -446,7 +446,7 @@ Lastly, using ``tee`` within ``put``, you can split your input data into separat
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat triangle.csv
|
||||
cat triangle.csv
|
||||
color,shape,flag,index,quantity,rate
|
||||
yellow,triangle,true,11,43.6498,9.8870
|
||||
purple,triangle,false,51,81.2290,8.5910
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ Sometimes we get CSV files which lack a header. For example (`data/headerless.cs
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/headerless.csv
|
||||
cat data/headerless.csv
|
||||
John,23,present
|
||||
Fred,34,present
|
||||
Alice,56,missing
|
||||
|
|
@ -23,7 +23,7 @@ You can use Miller to add a header. The ``--implicit-csv-header`` applies positi
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv --implicit-csv-header cat data/headerless.csv
|
||||
mlr --csv --implicit-csv-header cat data/headerless.csv
|
||||
1,2,3
|
||||
John,23,present
|
||||
Fred,34,present
|
||||
|
|
@ -35,7 +35,7 @@ Following that, you can rename the positionally indexed labels to names with mea
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv --implicit-csv-header label name,age,status data/headerless.csv
|
||||
mlr --csv --implicit-csv-header label name,age,status data/headerless.csv
|
||||
name,age,status
|
||||
John,23,present
|
||||
Fred,34,present
|
||||
|
|
@ -47,7 +47,7 @@ Likewise, if you need to produce CSV which is lacking its header, you can pipe M
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ head -5 data/colored-shapes.dkvp | mlr --ocsv cat
|
||||
head -5 data/colored-shapes.dkvp | mlr --ocsv cat
|
||||
color,shape,flag,i,u,v,w,x
|
||||
yellow,triangle,1,11,0.6321695890307647,0.9887207810889004,0.4364983936735774,5.7981881667050565
|
||||
red,square,1,15,0.21966833570651523,0.001257332190235938,0.7927778364718627,2.944117399716207
|
||||
|
|
@ -58,7 +58,7 @@ Likewise, if you need to produce CSV which is lacking its header, you can pipe M
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ head -5 data/colored-shapes.dkvp | mlr --ocsv --headerless-csv-output cat
|
||||
head -5 data/colored-shapes.dkvp | mlr --ocsv --headerless-csv-output cat
|
||||
yellow,triangle,1,11,0.6321695890307647,0.9887207810889004,0.4364983936735774,5.7981881667050565
|
||||
red,square,1,15,0.21966833570651523,0.001257332190235938,0.7927778364718627,2.944117399716207
|
||||
red,circle,1,16,0.20901671281497636,0.29005231936593445,0.13810280912907674,5.065034003400998
|
||||
|
|
@ -70,7 +70,7 @@ Lastly, often we say "CSV" or "TSV" when we have positionally indexed data in co
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --inidx --ifs comma --oxtab cut -f 1,3 data/headerless.csv
|
||||
mlr --inidx --ifs comma --oxtab cut -f 1,3 data/headerless.csv
|
||||
1 John
|
||||
3 present
|
||||
|
||||
|
|
@ -91,7 +91,7 @@ Suppose we have the following data:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat multi-join/input.csv
|
||||
cat multi-join/input.csv
|
||||
id,task
|
||||
10,chop
|
||||
20,puree
|
||||
|
|
@ -107,7 +107,7 @@ And we want to augment the ``id`` column with lookups from the following data fi
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat multi-join/name-lookup.csv
|
||||
cat multi-join/name-lookup.csv
|
||||
id,name
|
||||
30,Alice
|
||||
10,Bob
|
||||
|
|
@ -116,7 +116,7 @@ And we want to augment the ``id`` column with lookups from the following data fi
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat multi-join/status-lookup.csv
|
||||
cat multi-join/status-lookup.csv
|
||||
id,status
|
||||
30,occupied
|
||||
10,idle
|
||||
|
|
@ -127,7 +127,7 @@ We can run the input file through multiple ``join`` commands in a ``then``-chain
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint join -f multi-join/name-lookup.csv -j id then join -f multi-join/status-lookup.csv -j id multi-join/input.csv
|
||||
mlr --icsv --opprint join -f multi-join/name-lookup.csv -j id then join -f multi-join/status-lookup.csv -j id multi-join/input.csv
|
||||
id status name task
|
||||
10 idle Bob chop
|
||||
20 idle Carol puree
|
||||
|
|
@ -146,7 +146,7 @@ Suppose you want to replace spaces with underscores in your column names:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/spaces.csv
|
||||
cat data/spaces.csv
|
||||
a b c,def,g h i
|
||||
123,4567,890
|
||||
2468,1357,3579
|
||||
|
|
@ -157,7 +157,7 @@ The simplest way is to use ``mlr rename`` with ``-g`` (for global replace, not j
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv rename -g -r ' ,_' data/spaces.csv
|
||||
mlr --csv rename -g -r ' ,_' data/spaces.csv
|
||||
a_b_c,def,g_h_i
|
||||
123,4567,890
|
||||
2468,1357,3579
|
||||
|
|
@ -166,7 +166,7 @@ The simplest way is to use ``mlr rename`` with ``-g`` (for global replace, not j
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv --opprint rename -g -r ' ,_' data/spaces.csv
|
||||
mlr --csv --opprint rename -g -r ' ,_' data/spaces.csv
|
||||
a_b_c def g_h_i
|
||||
123 4567 890
|
||||
2468 1357 3579
|
||||
|
|
@ -177,7 +177,7 @@ You can also do this with a for-loop:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/bulk-rename-for-loop.mlr
|
||||
cat data/bulk-rename-for-loop.mlr
|
||||
map newrec = {};
|
||||
for (oldk, v in $*) {
|
||||
newrec[gsub(oldk, " ", "_")] = v;
|
||||
|
|
@ -187,7 +187,7 @@ You can also do this with a for-loop:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint put -f data/bulk-rename-for-loop.mlr data/spaces.csv
|
||||
mlr --icsv --opprint put -f data/bulk-rename-for-loop.mlr data/spaces.csv
|
||||
a_b_c def g_h_i
|
||||
123 4567 890
|
||||
2468 1357 3579
|
||||
|
|
@ -201,7 +201,7 @@ How to do ``$name = gsub($name, "old", "new")`` for all fields?
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/sar.csv
|
||||
cat data/sar.csv
|
||||
a,b,c
|
||||
the quick,brown fox,jumped
|
||||
over,the,lazy dogs
|
||||
|
|
@ -209,7 +209,7 @@ How to do ``$name = gsub($name, "old", "new")`` for all fields?
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/sar.mlr
|
||||
cat data/sar.mlr
|
||||
for (k in $*) {
|
||||
$[k] = gsub($[k], "e", "X");
|
||||
}
|
||||
|
|
@ -217,7 +217,7 @@ How to do ``$name = gsub($name, "old", "new")`` for all fields?
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv put -f data/sar.mlr data/sar.csv
|
||||
mlr --csv put -f data/sar.mlr data/sar.csv
|
||||
a,b,c
|
||||
thX quick,brown fox,jumpXd
|
||||
ovXr,thX,lazy dogs
|
||||
|
|
@ -230,7 +230,7 @@ Using Miller 5.0.0's map literals and assigning to ``$*``, you can fully general
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/small
|
||||
cat data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -240,7 +240,7 @@ Using Miller 5.0.0's map literals and assigning to ``$*``, you can fully general
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '
|
||||
mlr put '
|
||||
begin {
|
||||
@i_cumu = 0;
|
||||
}
|
||||
|
|
@ -269,7 +269,7 @@ The ``awk``-like built-in variable ``NR`` is incremented for each input record:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/small
|
||||
cat data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -279,7 +279,7 @@ The ``awk``-like built-in variable ``NR`` is incremented for each input record:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '$nr = NR' data/small
|
||||
mlr put '$nr = NR' data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533,nr=1
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797,nr=2
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776,nr=3
|
||||
|
|
@ -291,7 +291,7 @@ However, this is the record number within the original input stream -- not after
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr filter '$a == "wye"' then put '$nr = NR' data/small
|
||||
mlr filter '$a == "wye"' then put '$nr = NR' data/small
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776,nr=3
|
||||
a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729,nr=5
|
||||
|
||||
|
|
@ -300,7 +300,7 @@ There are two good options here. One is to use the ``cat`` verb with ``-n``:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr filter '$a == "wye"' then cat -n data/small
|
||||
mlr filter '$a == "wye"' then cat -n data/small
|
||||
n=1,a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
n=2,a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729
|
||||
|
||||
|
|
@ -309,7 +309,7 @@ The other is to keep your own counter within the ``put`` DSL:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr filter '$a == "wye"' then put 'begin {@n = 1} $n = @n; @n += 1' data/small
|
||||
mlr filter '$a == "wye"' then put 'begin {@n = 1} $n = @n; @n += 1' data/small
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776,n=1
|
||||
a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729,n=2
|
||||
|
||||
|
|
@ -332,7 +332,7 @@ Here are some ways to use the type-checking options as described in :ref:`refere
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/het-bool.csv
|
||||
cat data/het-bool.csv
|
||||
name,reachable
|
||||
barney,false
|
||||
betty,true
|
||||
|
|
@ -344,7 +344,7 @@ One option is to coerce everything to boolean, or integer:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint put '$reachable = boolean($reachable)' data/het-bool.csv
|
||||
mlr --icsv --opprint put '$reachable = boolean($reachable)' data/het-bool.csv
|
||||
name reachable
|
||||
barney false
|
||||
betty true
|
||||
|
|
@ -354,7 +354,7 @@ One option is to coerce everything to boolean, or integer:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint put '$reachable = int(boolean($reachable))' data/het-bool.csv
|
||||
mlr --icsv --opprint put '$reachable = int(boolean($reachable))' data/het-bool.csv
|
||||
name reachable
|
||||
barney 0
|
||||
betty 1
|
||||
|
|
@ -366,7 +366,7 @@ A second option is to flag badly formatted data within the output stream:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint put '$format_ok = is_string($reachable)' data/het-bool.csv
|
||||
mlr --icsv --opprint put '$format_ok = is_string($reachable)' data/het-bool.csv
|
||||
name reachable format_ok
|
||||
barney false true
|
||||
betty true true
|
||||
|
|
@ -378,7 +378,7 @@ Or perhaps to flag badly formatted data outside the output stream:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint put 'if (!is_string($reachable)) {eprint "Malformed at NR=".NR} ' data/het-bool.csv
|
||||
mlr --icsv --opprint put 'if (!is_string($reachable)) {eprint "Malformed at NR=".NR} ' data/het-bool.csv
|
||||
Malformed at NR=4
|
||||
name reachable
|
||||
barney false
|
||||
|
|
@ -391,7 +391,7 @@ A third way is to abort the process on first instance of bad data:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv put '$reachable = asserting_string($reachable)' data/het-bool.csv
|
||||
mlr --csv put '$reachable = asserting_string($reachable)' data/het-bool.csv
|
||||
mlr: string type-assertion failed at NR=4 FNR=4 FILENAME=data/het-bool.csv
|
||||
name,reachable
|
||||
barney,false
|
||||
|
|
@ -414,7 +414,7 @@ The simplest option is to use :ref:`mlr nest <reference-verbs-nest>`:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --tsv nest --explode --values --across-records -f b --nested-fs : data/nested.tsv
|
||||
mlr --tsv nest --explode --values --across-records -f b --nested-fs : data/nested.tsv
|
||||
a b
|
||||
x z
|
||||
s u
|
||||
|
|
@ -424,7 +424,7 @@ The simplest option is to use :ref:`mlr nest <reference-verbs-nest>`:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --tsv nest --explode --values --across-fields -f b --nested-fs : data/nested.tsv
|
||||
mlr --tsv nest --explode --values --across-fields -f b --nested-fs : data/nested.tsv
|
||||
a b_1
|
||||
x z
|
||||
|
||||
|
|
@ -438,7 +438,7 @@ One option to split out the colon-delimited values in the ``b`` column is to use
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/nested.tsv --itsv --oxtab put 'o=splitnv($b, ":"); for (k,v in o) {$["p".k]=v}'
|
||||
mlr --from data/nested.tsv --itsv --oxtab put 'o=splitnv($b, ":"); for (k,v in o) {$["p".k]=v}'
|
||||
a x
|
||||
b z
|
||||
p1 z
|
||||
|
|
@ -454,7 +454,7 @@ while another is to loop over the same map from ``splitnv`` and use it (with ``p
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/nested.tsv --itsv --oxtab put -q 'o=splitnv($b, ":"); for (k,v in o) {emit mapsum($*, {"b":v})}'
|
||||
mlr --from data/nested.tsv --itsv --oxtab put -q 'o=splitnv($b, ":"); for (k,v in o) {emit mapsum($*, {"b":v})}'
|
||||
a x
|
||||
b z
|
||||
|
||||
|
|
@ -470,7 +470,7 @@ while another is to loop over the same map from ``splitnv`` and use it (with ``p
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/nested.tsv --tsv put -q 'o=splitnv($b, ":"); for (k,v in o) {emit mapsum($*, {"b":v})}'
|
||||
mlr --from data/nested.tsv --tsv put -q 'o=splitnv($b, ":"); for (k,v in o) {emit mapsum($*, {"b":v})}'
|
||||
a b
|
||||
x z
|
||||
s u
|
||||
|
|
@ -485,7 +485,7 @@ Suppose you have a database query which you run at one point in time, producing
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/previous_counters.csv
|
||||
cat data/previous_counters.csv
|
||||
color,count
|
||||
red,3472
|
||||
blue,6838
|
||||
|
|
@ -495,7 +495,7 @@ Suppose you have a database query which you run at one point in time, producing
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/current_counters.csv
|
||||
cat data/current_counters.csv
|
||||
color,count
|
||||
red,3467
|
||||
orange,670
|
||||
|
|
@ -509,12 +509,12 @@ First, rename counter columns to make them distinct:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv rename count,previous_count data/previous_counters.csv > data/prevtemp.csv
|
||||
mlr --csv rename count,previous_count data/previous_counters.csv > data/prevtemp.csv
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/prevtemp.csv
|
||||
cat data/prevtemp.csv
|
||||
color,previous_count
|
||||
red,3472
|
||||
blue,6838
|
||||
|
|
@ -524,12 +524,12 @@ First, rename counter columns to make them distinct:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv rename count,current_count data/current_counters.csv > data/currtemp.csv
|
||||
mlr --csv rename count,current_count data/current_counters.csv > data/currtemp.csv
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/currtemp.csv
|
||||
cat data/currtemp.csv
|
||||
color,current_count
|
||||
red,3467
|
||||
orange,670
|
||||
|
|
@ -541,7 +541,7 @@ Then, join on the key field(s), and use unsparsify to zero-fill counters absent
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint \
|
||||
mlr --icsv --opprint \
|
||||
join -j color --ul --ur -f data/prevtemp.csv \
|
||||
then unsparsify --fill-with 0 \
|
||||
then put '$count_delta = $current_count - $previous_count' \
|
||||
|
|
@ -561,7 +561,7 @@ Suppose you have some date-stamped data which may (or may not) be missing entrie
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ head -n 10 data/miss-date.csv
|
||||
head -n 10 data/miss-date.csv
|
||||
date,qoh
|
||||
2012-03-05,10055
|
||||
2012-03-06,10486
|
||||
|
|
@ -576,7 +576,7 @@ Suppose you have some date-stamped data which may (or may not) be missing entrie
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ wc -l data/miss-date.csv
|
||||
wc -l data/miss-date.csv
|
||||
1372 data/miss-date.csv
|
||||
|
||||
Since there are 1372 lines in the data file, some automation is called for. To find the missing dates, you can convert the dates to seconds since the epoch using ``strptime``, then compute adjacent differences (the ``cat -n`` simply inserts record-counters):
|
||||
|
|
@ -584,7 +584,7 @@ Since there are 1372 lines in the data file, some automation is called for. To f
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/miss-date.csv --icsv \
|
||||
mlr --from data/miss-date.csv --icsv \
|
||||
cat -n \
|
||||
then put '$datestamp = strptime($date, "%Y-%m-%d")' \
|
||||
then step -a delta -f datestamp \
|
||||
|
|
@ -605,7 +605,7 @@ Then, filter for adjacent difference not being 86400 (the number of seconds in a
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/miss-date.csv --icsv \
|
||||
mlr --from data/miss-date.csv --icsv \
|
||||
cat -n \
|
||||
then put '$datestamp = strptime($date, "%Y-%m-%d")' \
|
||||
then step -a delta -f datestamp \
|
||||
|
|
@ -618,7 +618,7 @@ Given this, it's now easy to see where the gaps are:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr cat -n then filter '$n >= 770 && $n <= 780' data/miss-date.csv
|
||||
mlr cat -n then filter '$n >= 770 && $n <= 780' data/miss-date.csv
|
||||
n=770,1=2014-04-12,2=129435
|
||||
n=771,1=2014-04-13,2=129868
|
||||
n=772,1=2014-04-14,2=129797
|
||||
|
|
@ -634,7 +634,7 @@ Given this, it's now easy to see where the gaps are:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr cat -n then filter '$n >= 1115 && $n <= 1125' data/miss-date.csv
|
||||
mlr cat -n then filter '$n >= 1115 && $n <= 1125' data/miss-date.csv
|
||||
n=1115,1=2015-03-25,2=181006
|
||||
n=1116,1=2015-03-26,2=180995
|
||||
n=1117,1=2015-03-27,2=181043
|
||||
|
|
@ -660,7 +660,7 @@ For example, mapping numeric values down a column to the percentage between thei
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/small --opprint put -q '
|
||||
mlr --from data/small --opprint put -q '
|
||||
# These are executed once per record, which is the first pass.
|
||||
# The key is to use NR to index an out-of-stream variable to
|
||||
# retain all the x-field values.
|
||||
|
|
@ -691,7 +691,7 @@ Similarly, finding the total record count requires first reading through all the
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint --from data/small put -q '
|
||||
mlr --opprint --from data/small put -q '
|
||||
@records[NR] = $*;
|
||||
end {
|
||||
for((I,k),v in @records) {
|
||||
|
|
@ -717,7 +717,7 @@ The idea is to retain records having the largest value of ``n`` in the following
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --itsv --opprint cat data/maxrows.tsv
|
||||
mlr --itsv --opprint cat data/maxrows.tsv
|
||||
a b n score
|
||||
purple red 5 0.743231
|
||||
blue purple 2 0.093710
|
||||
|
|
@ -756,7 +756,7 @@ Of course, the largest value of ``n`` isn't known until after all data have been
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/maxrows.mlr
|
||||
cat data/maxrows.mlr
|
||||
# Retain all records
|
||||
@records[NR] = $*;
|
||||
# Track max value of n
|
||||
|
|
@ -776,7 +776,7 @@ Of course, the largest value of ``n`` isn't known until after all data have been
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --itsv --opprint put -q -f data/maxrows.mlr data/maxrows.tsv
|
||||
mlr --itsv --opprint put -q -f data/maxrows.mlr data/maxrows.tsv
|
||||
a b n score
|
||||
purple red 5 0.743231
|
||||
purple red 5 0.389055
|
||||
|
|
@ -850,7 +850,7 @@ The idea here is that middles starting with a 1 belong to the outer value of 1,
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/rect.txt put -q '
|
||||
mlr --from data/rect.txt put -q '
|
||||
is_present($outer) {
|
||||
unset @r
|
||||
}
|
||||
|
|
@ -876,7 +876,7 @@ Miller handles compliant CSV: in particular, it's an error if the number of data
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/ragged.csv
|
||||
cat data/ragged.csv
|
||||
a,b,c
|
||||
1,2,3
|
||||
4,5
|
||||
|
|
@ -885,7 +885,7 @@ Miller handles compliant CSV: in particular, it's an error if the number of data
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/ragged.csv --fs comma --nidx put '
|
||||
mlr --from data/ragged.csv --fs comma --nidx put '
|
||||
@maxnf = max(@maxnf, NF);
|
||||
@nf = NF;
|
||||
while(@nf < @maxnf) {
|
||||
|
|
@ -903,7 +903,7 @@ or, more simply,
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/ragged.csv --fs comma --nidx put '
|
||||
mlr --from data/ragged.csv --fs comma --nidx put '
|
||||
@maxnf = max(@maxnf, NF);
|
||||
while(NF < @maxnf) {
|
||||
$[NF+1] = "";
|
||||
|
|
@ -957,7 +957,7 @@ Then
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --json put -q -f data/feature-count.mlr data/features.json
|
||||
mlr --json put -q -f data/feature-count.mlr data/features.json
|
||||
{ "record_count": 12 }
|
||||
{ "key": "qoh", "key_counts": 8 }
|
||||
{ "key": "rate", "key_counts": 8 }
|
||||
|
|
@ -975,7 +975,7 @@ Then
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --ijson --opprint put -q -f data/feature-count.mlr data/features.json
|
||||
mlr --ijson --opprint put -q -f data/feature-count.mlr data/features.json
|
||||
record_count
|
||||
12
|
||||
|
||||
|
|
@ -1005,7 +1005,7 @@ For example, suppose you have JSON input like this:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/sparse.json
|
||||
cat data/sparse.json
|
||||
{"a":1,"b":2,"v":3}
|
||||
{"u":1,"b":2}
|
||||
{"a":1,"v":2,"x":3}
|
||||
|
|
@ -1016,7 +1016,7 @@ There are field names ``a``, ``b``, ``v``, ``u``, ``x``, ``w`` in the data -- bu
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/unsparsify.mlr
|
||||
cat data/unsparsify.mlr
|
||||
# First pass:
|
||||
# Remember all unique key names:
|
||||
for (k in $*) {
|
||||
|
|
@ -1047,7 +1047,7 @@ There are field names ``a``, ``b``, ``v``, ``u``, ``x``, ``w`` in the data -- bu
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --json put -q -f data/unsparsify.mlr data/sparse.json
|
||||
mlr --json put -q -f data/unsparsify.mlr data/sparse.json
|
||||
{ "a": 1, "b": 2, "v": 3, "u": "", "x": "", "w": "" }
|
||||
{ "a": "", "b": 2, "v": "", "u": 1, "x": "", "w": "" }
|
||||
{ "a": 1, "b": "", "v": 2, "u": "", "x": 3, "w": "" }
|
||||
|
|
@ -1056,7 +1056,7 @@ There are field names ``a``, ``b``, ``v``, ``u``, ``x``, ``w`` in the data -- bu
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --ijson --ocsv put -q -f data/unsparsify.mlr data/sparse.json
|
||||
mlr --ijson --ocsv put -q -f data/unsparsify.mlr data/sparse.json
|
||||
a,b,v,u,x,w
|
||||
1,2,3,,,
|
||||
,2,,1,,
|
||||
|
|
@ -1066,7 +1066,7 @@ There are field names ``a``, ``b``, ``v``, ``u``, ``x``, ``w`` in the data -- bu
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --ijson --opprint put -q -f data/unsparsify.mlr data/sparse.json
|
||||
mlr --ijson --opprint put -q -f data/unsparsify.mlr data/sparse.json
|
||||
a b v u x w
|
||||
1 2 3 - - -
|
||||
- 2 - 1 - -
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ For one or more specified field names, simply compute p25 and p75, then write th
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --oxtab stats1 -f x -a p25,p75 \
|
||||
mlr --oxtab stats1 -f x -a p25,p75 \
|
||||
then put '$x_iqr = $x_p75 - $x_p25' \
|
||||
data/medium
|
||||
x_p25 0.246670
|
||||
|
|
@ -124,7 +124,7 @@ For wildcarded field names, first compute p25 and p75, then loop over field name
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --oxtab stats1 --fr '[i-z]' -a p25,p75 \
|
||||
mlr --oxtab stats1 --fr '[i-z]' -a p25,p75 \
|
||||
then put 'for (k,v in $*) {
|
||||
if (k =~ "(.*)_p25") {
|
||||
$["\1_iqr"] = $["\1_p75"] - $["\1_p25"]
|
||||
|
|
@ -149,7 +149,7 @@ This might be more elegantly implemented as an option within the ``stats1`` verb
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/medium put -q '
|
||||
mlr --from data/medium put -q '
|
||||
# Using the y field for weighting in this example
|
||||
weight = $y;
|
||||
|
||||
|
|
@ -187,7 +187,7 @@ Here we can chain together a few simple building blocks:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat expo-sample.sh
|
||||
cat expo-sample.sh
|
||||
# Generate 100,000 pairs of independent and identically distributed
|
||||
# exponentially distributed random variables with the same rate parameter
|
||||
# (namely, 2.5). Then compute histograms of one of them, along with
|
||||
|
|
@ -230,7 +230,7 @@ The output is as follows:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ sh expo-sample.sh
|
||||
sh expo-sample.sh
|
||||
bin_lo bin_hi u_count s_count p_count
|
||||
0.000000 0.040000 [78]*******************#[9497] [353]#...................[3732] [20]*******************#[39755]
|
||||
0.040000 0.080000 [78]******************..[9497] [353]*****...............[3732] [20]*******.............[39755]
|
||||
|
|
@ -291,7 +291,7 @@ The `Sieve of Eratosthenes <http://en.wikipedia.org/wiki/Sieve_of_Eratosthenes>`
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat programs/sieve.mlr
|
||||
cat programs/sieve.mlr
|
||||
# ================================================================
|
||||
# Sieve of Eratosthenes: simple example of Miller DSL as programming language.
|
||||
# ================================================================
|
||||
|
|
@ -328,7 +328,7 @@ The `Sieve of Eratosthenes <http://en.wikipedia.org/wiki/Sieve_of_Eratosthenes>`
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr -n put -f programs/sieve.mlr
|
||||
mlr -n put -f programs/sieve.mlr
|
||||
2
|
||||
3
|
||||
5
|
||||
|
|
@ -365,7 +365,7 @@ The (approximate) computation of points in the complex plane which are and aren'
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat programs/mand.mlr
|
||||
cat programs/mand.mlr
|
||||
# Mandelbrot set generator: simple example of Miller DSL as programming language.
|
||||
begin {
|
||||
# Set defaults
|
||||
|
|
@ -472,7 +472,7 @@ At standard resolution this makes a nice little ASCII plot:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr -n put -f ./programs/mand.mlr
|
||||
mlr -n put -f ./programs/mand.mlr
|
||||
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ One of Miller's strengths is its compact notation: for example, given input of t
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ head -n 5 ../data/medium
|
||||
head -n 5 ../data/medium
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -24,7 +24,7 @@ you can simply do
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --oxtab stats1 -a sum -f x ../data/medium
|
||||
mlr --oxtab stats1 -a sum -f x ../data/medium
|
||||
x_sum 4986.019682
|
||||
|
||||
or
|
||||
|
|
@ -32,7 +32,7 @@ or
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint stats1 -a sum -f x -g b ../data/medium
|
||||
mlr --opprint stats1 -a sum -f x -g b ../data/medium
|
||||
b x_sum
|
||||
pan 965.763670
|
||||
wye 1023.548470
|
||||
|
|
@ -45,7 +45,7 @@ rather than the more tedious
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --oxtab put -q '
|
||||
mlr --oxtab put -q '
|
||||
@x_sum += $x;
|
||||
end {
|
||||
emit @x_sum
|
||||
|
|
@ -58,7 +58,7 @@ or
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint put -q '
|
||||
mlr --opprint put -q '
|
||||
@x_sum[$b] += $x;
|
||||
end {
|
||||
emit @x_sum, "b"
|
||||
|
|
@ -83,14 +83,14 @@ Mean without/with oosvars
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint stats1 -a mean -f x data/medium
|
||||
mlr --opprint stats1 -a mean -f x data/medium
|
||||
x_mean
|
||||
0.498602
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint put -q '
|
||||
mlr --opprint put -q '
|
||||
@x_sum += $x;
|
||||
@x_count += 1;
|
||||
end {
|
||||
|
|
@ -107,7 +107,7 @@ Keyed mean without/with oosvars
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint stats1 -a mean -f x -g a,b data/medium
|
||||
mlr --opprint stats1 -a mean -f x -g a,b data/medium
|
||||
a b x_mean
|
||||
pan pan 0.513314
|
||||
eks pan 0.485076
|
||||
|
|
@ -138,7 +138,7 @@ Keyed mean without/with oosvars
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint put -q '
|
||||
mlr --opprint put -q '
|
||||
@x_sum[$a][$b] += $x;
|
||||
@x_count[$a][$b] += 1;
|
||||
end{
|
||||
|
|
@ -181,7 +181,7 @@ Variance and standard deviation without/with oosvars
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --oxtab stats1 -a count,sum,mean,var,stddev -f x data/medium
|
||||
mlr --oxtab stats1 -a count,sum,mean,var,stddev -f x data/medium
|
||||
x_count 10000
|
||||
x_sum 4986.019682
|
||||
x_mean 0.498602
|
||||
|
|
@ -191,7 +191,7 @@ Variance and standard deviation without/with oosvars
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat variance.mlr
|
||||
cat variance.mlr
|
||||
@n += 1;
|
||||
@sumx += $x;
|
||||
@sumx2 += $x**2;
|
||||
|
|
@ -205,7 +205,7 @@ Variance and standard deviation without/with oosvars
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --oxtab put -q -f variance.mlr data/medium
|
||||
mlr --oxtab put -q -f variance.mlr data/medium
|
||||
n 10000
|
||||
sumx 4986.019682
|
||||
sumx2 3328.652400
|
||||
|
|
@ -221,14 +221,14 @@ Min/max without/with oosvars
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --oxtab stats1 -a min,max -f x data/medium
|
||||
mlr --oxtab stats1 -a min,max -f x data/medium
|
||||
x_min 0.000045
|
||||
x_max 0.999953
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --oxtab put -q '@x_min = min(@x_min, $x); @x_max = max(@x_max, $x); end{emitf @x_min, @x_max}' data/medium
|
||||
mlr --oxtab put -q '@x_min = min(@x_min, $x); @x_max = max(@x_max, $x); end{emitf @x_min, @x_max}' data/medium
|
||||
x_min 0.000045
|
||||
x_max 0.999953
|
||||
|
||||
|
|
@ -238,7 +238,7 @@ Keyed min/max without/with oosvars
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint stats1 -a min,max -f x -g a data/medium
|
||||
mlr --opprint stats1 -a min,max -f x -g a data/medium
|
||||
a x_min x_max
|
||||
pan 0.000204 0.999403
|
||||
eks 0.000692 0.998811
|
||||
|
|
@ -249,7 +249,7 @@ Keyed min/max without/with oosvars
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint --from data/medium put -q '
|
||||
mlr --opprint --from data/medium put -q '
|
||||
@min[$a] = min(@min[$a], $x);
|
||||
@max[$a] = max(@max[$a], $x);
|
||||
end{
|
||||
|
|
@ -269,7 +269,7 @@ Delta without/with oosvars
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint step -a delta -f x data/small
|
||||
mlr --opprint step -a delta -f x data/small
|
||||
a b i x y x_delta
|
||||
pan pan 1 0.3467901443380824 0.7268028627434533 0
|
||||
eks pan 2 0.7586799647899636 0.5221511083334797 0.411890
|
||||
|
|
@ -280,7 +280,7 @@ Delta without/with oosvars
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint put '$x_delta = is_present(@last) ? $x - @last : 0; @last = $x' data/small
|
||||
mlr --opprint put '$x_delta = is_present(@last) ? $x - @last : 0; @last = $x' data/small
|
||||
a b i x y x_delta
|
||||
pan pan 1 0.3467901443380824 0.7268028627434533 0
|
||||
eks pan 2 0.7586799647899636 0.5221511083334797 0.411890
|
||||
|
|
@ -294,7 +294,7 @@ Keyed delta without/with oosvars
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint step -a delta -f x -g a data/small
|
||||
mlr --opprint step -a delta -f x -g a data/small
|
||||
a b i x y x_delta
|
||||
pan pan 1 0.3467901443380824 0.7268028627434533 0
|
||||
eks pan 2 0.7586799647899636 0.5221511083334797 0
|
||||
|
|
@ -305,7 +305,7 @@ Keyed delta without/with oosvars
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint put '$x_delta = is_present(@last[$a]) ? $x - @last[$a] : 0; @last[$a]=$x' data/small
|
||||
mlr --opprint put '$x_delta = is_present(@last[$a]) ? $x - @last[$a] : 0; @last[$a]=$x' data/small
|
||||
a b i x y x_delta
|
||||
pan pan 1 0.3467901443380824 0.7268028627434533 0
|
||||
eks pan 2 0.7586799647899636 0.5221511083334797 0
|
||||
|
|
@ -319,7 +319,7 @@ Exponentially weighted moving averages without/with oosvars
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint step -a ewma -d 0.1 -f x data/small
|
||||
mlr --opprint step -a ewma -d 0.1 -f x data/small
|
||||
a b i x y x_ewma_0.1
|
||||
pan pan 1 0.3467901443380824 0.7268028627434533 0.346790
|
||||
eks pan 2 0.7586799647899636 0.5221511083334797 0.387979
|
||||
|
|
@ -330,7 +330,7 @@ Exponentially weighted moving averages without/with oosvars
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint put '
|
||||
mlr --opprint put '
|
||||
begin{ @a=0.1 };
|
||||
$e = NR==1 ? $x : @a * $x + (1 - @a) * @e;
|
||||
@e=$e
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ Vertical-tabular format is good for a quick look at CSV data layout -- seeing wh
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ head -n 2 data/flins.csv | mlr --icsv --oxtab cat
|
||||
head -n 2 data/flins.csv | mlr --icsv --oxtab cat
|
||||
county Seminole
|
||||
tiv_2011 22890.55
|
||||
tiv_2012 20848.71
|
||||
|
|
@ -25,7 +25,7 @@ A few simple queries:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/flins.csv --icsv --opprint count-distinct -f county | head
|
||||
mlr --from data/flins.csv --icsv --opprint count-distinct -f county | head
|
||||
county count
|
||||
Seminole 1
|
||||
Miami Dade 2
|
||||
|
|
@ -37,26 +37,26 @@ A few simple queries:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/flins.csv --icsv --opprint count-distinct -f construction,line
|
||||
mlr --from data/flins.csv --icsv --opprint count-distinct -f construction,line
|
||||
|
||||
Categorization of total insured value:
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/flins.csv --icsv --opprint stats1 -a min,mean,max -f tiv_2012
|
||||
mlr --from data/flins.csv --icsv --opprint stats1 -a min,mean,max -f tiv_2012
|
||||
tiv_2012_min tiv_2012_mean tiv_2012_max
|
||||
19757.910000 1061531.463750 2785551.630000
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/flins.csv --icsv --opprint stats1 -a min,mean,max -f tiv_2012 -g construction,line
|
||||
mlr --from data/flins.csv --icsv --opprint stats1 -a min,mean,max -f tiv_2012 -g construction,line
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/flins.csv --icsv --oxtab stats1 -a p0,p10,p50,p90,p95,p99,p100 -f hu_site_deductible
|
||||
mlr --from data/flins.csv --icsv --oxtab stats1 -a p0,p10,p50,p90,p95,p99,p100 -f hu_site_deductible
|
||||
hu_site_deductible_p0
|
||||
hu_site_deductible_p10
|
||||
hu_site_deductible_p50
|
||||
|
|
@ -68,7 +68,7 @@ Categorization of total insured value:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/flins.csv --icsv --opprint stats1 -a p95,p99,p100 -f hu_site_deductible -g county then sort -f county | head
|
||||
mlr --from data/flins.csv --icsv --opprint stats1 -a p95,p99,p100 -f hu_site_deductible -g county then sort -f county | head
|
||||
county hu_site_deductible_p95 hu_site_deductible_p99 hu_site_deductible_p100
|
||||
Duval - - -
|
||||
Highlands - - -
|
||||
|
|
@ -80,7 +80,7 @@ Categorization of total insured value:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/flins.csv --icsv --oxtab stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
|
||||
mlr --from data/flins.csv --icsv --oxtab stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
|
||||
tiv_2011_tiv_2012_corr 0.935363
|
||||
tiv_2011_tiv_2012_ols_m 1.089091
|
||||
tiv_2011_tiv_2012_ols_b 103095.523356
|
||||
|
|
@ -90,7 +90,7 @@ Categorization of total insured value:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/flins.csv --icsv --opprint stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012 -g county
|
||||
mlr --from data/flins.csv --icsv --opprint stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012 -g county
|
||||
county tiv_2011_tiv_2012_corr tiv_2011_tiv_2012_ols_m tiv_2011_tiv_2012_ols_b tiv_2011_tiv_2012_ols_n tiv_2011_tiv_2012_r2
|
||||
Seminole - - - 1 -
|
||||
Miami Dade 1.000000 0.930643 -2311.154328 2 1.000000
|
||||
|
|
@ -117,13 +117,13 @@ Peek at the data:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ wc -l data/colored-shapes.dkvp
|
||||
wc -l data/colored-shapes.dkvp
|
||||
10078 data/colored-shapes.dkvp
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ head -n 6 data/colored-shapes.dkvp | mlr --opprint cat
|
||||
head -n 6 data/colored-shapes.dkvp | mlr --opprint cat
|
||||
color shape flag i u v w x
|
||||
yellow triangle 1 11 0.6321695890307647 0.9887207810889004 0.4364983936735774 5.7981881667050565
|
||||
red square 1 15 0.21966833570651523 0.001257332190235938 0.7927778364718627 2.944117399716207
|
||||
|
|
@ -139,7 +139,7 @@ Here it looks reasonable that ``u`` is unit-uniform; something's up with ``v`` b
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --oxtab stats1 -a min,mean,max -f flag,u,v data/colored-shapes.dkvp | creach 3
|
||||
mlr --oxtab stats1 -a min,mean,max -f flag,u,v data/colored-shapes.dkvp | creach 3
|
||||
flag_min 0
|
||||
flag_mean 0.398889
|
||||
flag_max 1
|
||||
|
|
@ -157,7 +157,7 @@ The histogram shows the different distribution of 0/1 flags:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint histogram -f flag,u,v --lo -0.1 --hi 1.1 --nbins 12 data/colored-shapes.dkvp
|
||||
mlr --opprint histogram -f flag,u,v --lo -0.1 --hi 1.1 --nbins 12 data/colored-shapes.dkvp
|
||||
bin_lo bin_hi flag_count u_count v_count
|
||||
-0.100000 0.000000 6058 0 36
|
||||
0.000000 0.100000 0 1062 988
|
||||
|
|
@ -177,7 +177,7 @@ Look at univariate stats by color and shape. In particular, color-dependent flag
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint stats1 -a min,mean,max -f flag,u,v -g color then sort -f color data/colored-shapes.dkvp
|
||||
mlr --opprint stats1 -a min,mean,max -f flag,u,v -g color then sort -f color data/colored-shapes.dkvp
|
||||
color flag_min flag_mean flag_max u_min u_mean u_max v_min v_mean v_max
|
||||
blue 0 0.584354 1 0.000044 0.517717 0.999969 0.001489 0.491056 0.999576
|
||||
green 0 0.209197 1 0.000488 0.504861 0.999936 0.000501 0.499085 0.999676
|
||||
|
|
@ -189,7 +189,7 @@ Look at univariate stats by color and shape. In particular, color-dependent flag
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint stats1 -a min,mean,max -f flag,u,v -g shape then sort -f shape data/colored-shapes.dkvp
|
||||
mlr --opprint stats1 -a min,mean,max -f flag,u,v -g shape then sort -f shape data/colored-shapes.dkvp
|
||||
shape flag_min flag_mean flag_max u_min u_mean u_max v_min v_mean v_max
|
||||
circle 0 0.399846 1 0.000044 0.498555 0.999923 -0.092709 0.495524 1.072500
|
||||
square 0 0.396112 1 0.000188 0.499385 0.999969 0.000089 0.496538 0.999975
|
||||
|
|
@ -200,14 +200,14 @@ Look at bivariate stats by color and shape. In particular, ``u,v`` pairwise corr
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint --right stats2 -a corr -f u,v,w,x data/colored-shapes.dkvp
|
||||
mlr --opprint --right stats2 -a corr -f u,v,w,x data/colored-shapes.dkvp
|
||||
u_v_corr w_x_corr
|
||||
0.133418 -0.011320
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint --right stats2 -a corr -f u,v,w,x -g color,shape then sort -nr u_v_corr data/colored-shapes.dkvp
|
||||
mlr --opprint --right stats2 -a corr -f u,v,w,x -g color,shape then sort -nr u_v_corr data/colored-shapes.dkvp
|
||||
color shape u_v_corr w_x_corr
|
||||
red circle 0.980798 -0.018565
|
||||
orange square 0.176858 -0.071044
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ And here is an example using them:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat polyglot-dkvp-io/example.py
|
||||
cat polyglot-dkvp-io/example.py
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
|
|
@ -118,7 +118,7 @@ Run as-is:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ python polyglot-dkvp-io/example.py < data/small
|
||||
python polyglot-dkvp-io/example.py < data/small
|
||||
a=pan,b=pan,i=1,y=0.7268028627434533,ab=panpan,iy=1.7268028627434533,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
|
||||
a=eks,b=pan,i=2,y=0.5221511083334797,ab=ekspan,iy=2.5221511083334796,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
|
||||
a=wye,b=wye,i=3,y=0.33831852551664776,ab=wyewye,iy=3.3383185255166477,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
|
||||
|
|
@ -130,7 +130,7 @@ Run as-is, then pipe to Miller for pretty-printing:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ python polyglot-dkvp-io/example.py < data/small | mlr --opprint cat
|
||||
python polyglot-dkvp-io/example.py < data/small | mlr --opprint cat
|
||||
a b i y ab iy ta tb ti ty tab tiy
|
||||
pan pan 1 0.7268028627434533 panpan 1.7268028627434533 str str int float str float
|
||||
eks pan 2 0.5221511083334797 ekspan 2.5221511083334796 str str int float str float
|
||||
|
|
@ -203,7 +203,7 @@ And here is an example using them:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat polyglot-dkvp-io/example.rb
|
||||
cat polyglot-dkvp-io/example.rb
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
require 'dkvp_io'
|
||||
|
|
@ -234,7 +234,7 @@ Run as-is:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ ruby -I./polyglot-dkvp-io polyglot-dkvp-io/example.rb data/small
|
||||
ruby -I./polyglot-dkvp-io polyglot-dkvp-io/example.rb data/small
|
||||
a=pan,b=pan,i=1,y=0.7268028627434533,ab=panpan,iy=1.7268028627434533,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
|
||||
a=eks,b=pan,i=2,y=0.5221511083334797,ab=ekspan,iy=2.5221511083334796,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
|
||||
a=wye,b=wye,i=3,y=0.33831852551664776,ab=wyewye,iy=3.3383185255166477,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
|
||||
|
|
@ -246,7 +246,7 @@ Run as-is, then pipe to Miller for pretty-printing:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ ruby -I./polyglot-dkvp-io polyglot-dkvp-io/example.rb data/small | mlr --opprint cat
|
||||
ruby -I./polyglot-dkvp-io polyglot-dkvp-io/example.rb data/small | mlr --opprint cat
|
||||
a b i y ab iy ta tb ti ty tab tiy
|
||||
pan pan 1 0.7268028627434533 panpan 1.7268028627434533 String String Integer Float String Float
|
||||
eks pan 2 0.5221511083334797 ekspan 2.5221511083334796 String String Integer Float String Float
|
||||
|
|
@ -272,7 +272,7 @@ The :ref:`reference-dsl-system` DSL function allows you to run a specific shell
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint put '$o = system("echo hello world")' data/small
|
||||
mlr --opprint put '$o = system("echo hello world")' data/small
|
||||
a b i x y o
|
||||
pan pan 1 0.3467901443380824 0.7268028627434533 hello world
|
||||
eks pan 2 0.7586799647899636 0.5221511083334797 hello world
|
||||
|
|
@ -283,7 +283,7 @@ The :ref:`reference-dsl-system` DSL function allows you to run a specific shell
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint put '$o = system("echo {" . NR . "}")' data/small
|
||||
mlr --opprint put '$o = system("echo {" . NR . "}")' data/small
|
||||
a b i x y o
|
||||
pan pan 1 0.3467901443380824 0.7268028627434533 {1}
|
||||
eks pan 2 0.7586799647899636 0.5221511083334797 {2}
|
||||
|
|
@ -294,7 +294,7 @@ The :ref:`reference-dsl-system` DSL function allows you to run a specific shell
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint put '$o = system("echo -n ".$a."| sha1sum")' data/small
|
||||
mlr --opprint put '$o = system("echo -n ".$a."| sha1sum")' data/small
|
||||
a b i x y o
|
||||
pan pan 1 0.3467901443380824 0.7268028627434533 f29c748220331c273ef16d5115f6ecd799947f13 -
|
||||
eks pan 2 0.7586799647899636 0.5221511083334797 456d988ecb3bf1b75f057fc6e9fe70db464e9388 -
|
||||
|
|
|
|||
90
docs/faq.rst
90
docs/faq.rst
|
|
@ -105,7 +105,7 @@ Within ``mlr put`` and ``mlr filter``, the default behavior for scanning input r
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/scan-example-1.tbl
|
||||
cat data/scan-example-1.tbl
|
||||
value
|
||||
1
|
||||
2.0
|
||||
|
|
@ -115,7 +115,7 @@ Within ``mlr put`` and ``mlr filter``, the default behavior for scanning input r
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --pprint put '$copy = $value; $type = typeof($value)' data/scan-example-1.tbl
|
||||
mlr --pprint put '$copy = $value; $type = typeof($value)' data/scan-example-1.tbl
|
||||
value copy type
|
||||
1 1 int
|
||||
2.0 2.000000 float
|
||||
|
|
@ -135,7 +135,7 @@ But now suppose you have data like these:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/scan-example-2.tbl
|
||||
cat data/scan-example-2.tbl
|
||||
value
|
||||
0001
|
||||
0002
|
||||
|
|
@ -151,7 +151,7 @@ But now suppose you have data like these:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --pprint put '$copy = $value; $type = typeof($value)' data/scan-example-2.tbl
|
||||
mlr --pprint put '$copy = $value; $type = typeof($value)' data/scan-example-2.tbl
|
||||
value copy type
|
||||
0001 1 int
|
||||
0002 2 int
|
||||
|
|
@ -181,7 +181,7 @@ The solution is to **use the -S flag** for ``mlr put`` and/or ``mlr filter``. Th
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --pprint put -S '$copy = $value; $type = typeof($value)' data/scan-example-2.tbl
|
||||
mlr --pprint put -S '$copy = $value; $type = typeof($value)' data/scan-example-2.tbl
|
||||
value copy type
|
||||
0001 0001 string
|
||||
0002 0002 string
|
||||
|
|
@ -204,7 +204,7 @@ First, look at the input data:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/then-example.csv
|
||||
cat data/then-example.csv
|
||||
Status,Payment_Type,Amount
|
||||
paid,cash,10.00
|
||||
pending,debit,20.00
|
||||
|
|
@ -217,7 +217,7 @@ Next, run the first step of your command, omitting anything from the first ``the
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint count-distinct -f Status,Payment_Type data/then-example.csv
|
||||
mlr --icsv --opprint count-distinct -f Status,Payment_Type data/then-example.csv
|
||||
Status Payment_Type count
|
||||
paid cash 2
|
||||
pending debit 1
|
||||
|
|
@ -229,7 +229,7 @@ After that, run it with the next ``then`` step included:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --opprint count-distinct -f Status,Payment_Type then sort -nr count data/then-example.csv
|
||||
mlr --icsv --opprint count-distinct -f Status,Payment_Type then sort -nr count data/then-example.csv
|
||||
Status Payment_Type count
|
||||
paid cash 2
|
||||
pending debit 1
|
||||
|
|
@ -243,7 +243,7 @@ Note, by the way, that you'll get the same results using pipes:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv count-distinct -f Status,Payment_Type data/then-example.csv | mlr --icsv --opprint sort -nr count
|
||||
mlr --csv count-distinct -f Status,Payment_Type data/then-example.csv | mlr --icsv --opprint sort -nr count
|
||||
Status Payment_Type count
|
||||
paid cash 2
|
||||
pending debit 1
|
||||
|
|
@ -258,44 +258,44 @@ Miller records are ordered lists of key-value pairs. For NIDX format, DKVP forma
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo x,y,z | mlr --dkvp cat
|
||||
echo x,y,z | mlr --dkvp cat
|
||||
1=x,2=y,3=z
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo x,y,z | mlr --dkvp put '$6="a";$4="b";$55="cde"'
|
||||
echo x,y,z | mlr --dkvp put '$6="a";$4="b";$55="cde"'
|
||||
1=x,2=y,3=z,6=a,4=b,55=cde
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo x,y,z | mlr --nidx cat
|
||||
echo x,y,z | mlr --nidx cat
|
||||
x,y,z
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo x,y,z | mlr --csv --implicit-csv-header cat
|
||||
echo x,y,z | mlr --csv --implicit-csv-header cat
|
||||
1,2,3
|
||||
x,y,z
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo x,y,z | mlr --dkvp rename 2,999
|
||||
echo x,y,z | mlr --dkvp rename 2,999
|
||||
1=x,999=y,3=z
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo x,y,z | mlr --dkvp rename 2,newname
|
||||
echo x,y,z | mlr --dkvp rename 2,newname
|
||||
1=x,newname=y,3=z
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo x,y,z | mlr --csv --implicit-csv-header reorder -f 3,1,2
|
||||
echo x,y,z | mlr --csv --implicit-csv-header reorder -f 3,1,2
|
||||
3,1,2
|
||||
z,x,y
|
||||
|
||||
|
|
@ -307,7 +307,7 @@ Given input like
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat dates.csv
|
||||
cat dates.csv
|
||||
date,event
|
||||
2018-02-03,initialization
|
||||
2018-03-07,discovery
|
||||
|
|
@ -318,7 +318,7 @@ we can use ``strptime`` to parse the date field into seconds-since-epoch and the
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv filter 'strptime($date, "%Y-%m-%d") > strptime("2018-03-03", "%Y-%m-%d")' dates.csv
|
||||
mlr --csv filter 'strptime($date, "%Y-%m-%d") > strptime("2018-03-03", "%Y-%m-%d")' dates.csv
|
||||
date,event
|
||||
2018-03-07,discovery
|
||||
|
||||
|
|
@ -332,7 +332,7 @@ How can I handle commas-as-data in various formats?
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat commas.csv
|
||||
cat commas.csv
|
||||
Name,Role
|
||||
"Xiao, Lin",administrator
|
||||
"Khavari, Darius",tester
|
||||
|
|
@ -342,7 +342,7 @@ Likewise :ref:`file-formats-json`:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --ojson cat commas.csv
|
||||
mlr --icsv --ojson cat commas.csv
|
||||
{ "Name": "Xiao, Lin", "Role": "administrator" }
|
||||
{ "Name": "Khavari, Darius", "Role": "tester" }
|
||||
|
||||
|
|
@ -351,7 +351,7 @@ For Miller's :ref:`vertical-tabular format <file-formats-xtab>` there is no esca
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --oxtab cat commas.csv
|
||||
mlr --icsv --oxtab cat commas.csv
|
||||
Name Xiao, Lin
|
||||
Role administrator
|
||||
|
||||
|
|
@ -363,7 +363,7 @@ But for :ref:`Key-value_pairs <file-formats-dkvp>` and :ref:`index-numbered <fil
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --odkvp cat commas.csv
|
||||
mlr --icsv --odkvp cat commas.csv
|
||||
Name=Xiao, Lin,Role=administrator
|
||||
Name=Khavari, Darius,Role=tester
|
||||
|
||||
|
|
@ -372,7 +372,7 @@ One solution is to use a different delimiter, such as a pipe character:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --odkvp --ofs pipe cat commas.csv
|
||||
mlr --icsv --odkvp --ofs pipe cat commas.csv
|
||||
Name=Xiao, Lin|Role=administrator
|
||||
Name=Khavari, Darius|Role=tester
|
||||
|
||||
|
|
@ -382,7 +382,7 @@ characters as delimiters -- here, control-A:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --odkvp --ofs '\001' cat commas.csv | cat -v
|
||||
mlr --icsv --odkvp --ofs '\001' cat commas.csv | cat -v
|
||||
Name=Xiao, Lin^ARole=administrator
|
||||
Name=Khavari, Darius^ARole=tester
|
||||
|
||||
|
|
@ -394,7 +394,7 @@ Simply surround the field names with curly braces:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo 'x.a=3,y:b=4,z/c=5' | mlr put '${product.all} = ${x.a} * ${y:b} * ${z/c}'
|
||||
echo 'x.a=3,y:b=4,z/c=5' | mlr put '${product.all} = ${x.a} * ${y:b} * ${z/c}'
|
||||
x.a=3,y:b=4,z/c=5,product.all=60
|
||||
|
||||
How to escape '?' in regexes?
|
||||
|
|
@ -405,19 +405,19 @@ One way is to use square brackets; an alternative is to use simple string-substi
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/question.dat
|
||||
cat data/question.dat
|
||||
a=is it?,b=it is!
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --oxtab put '$c = gsub($a, "[?]"," ...")' data/question.dat
|
||||
mlr --oxtab put '$c = gsub($a, "[?]"," ...")' data/question.dat
|
||||
a is it?
|
||||
b it is!
|
||||
c is it ...
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --oxtab put '$c = ssub($a, "?"," ...")' data/question.dat
|
||||
mlr --oxtab put '$c = ssub($a, "?"," ...")' data/question.dat
|
||||
a is it?
|
||||
b it is!
|
||||
c is it ...
|
||||
|
|
@ -436,7 +436,7 @@ This is a little tricky due to the shell's handling of quotes. For simplicity, l
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo a=bcd | mlr put -f data/single-quote-example.mlr
|
||||
echo a=bcd | mlr put -f data/single-quote-example.mlr
|
||||
a=It's OK, I said, then 'for now'.
|
||||
|
||||
So, it's simple: Miller's DSL uses double quotes for strings, and you can put single quotes (or backslash-escaped double-quotes) inside strings, no problem.
|
||||
|
|
@ -446,7 +446,7 @@ Without putting the update expression in a file, it's messier:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo a=bcd | mlr put '$a="It'\''s OK, I said, '\''for now'\''."'
|
||||
echo a=bcd | mlr put '$a="It'\''s OK, I said, '\''for now'\''."'
|
||||
a=It's OK, I said, 'for now'.
|
||||
|
||||
The idea is that the outermost single-quotes are to protect the ``put`` expression from the shell, and the double quotes within them are for Miller. To get a single quote in the middle there, you need to actually put it *outside* the single-quoting for the shell. The pieces are the following, all concatenated together:
|
||||
|
|
@ -467,7 +467,7 @@ Example: columns ``x,i,a`` were requested but they appear here in the order ``a,
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/small
|
||||
cat data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -477,7 +477,7 @@ Example: columns ``x,i,a`` were requested but they appear here in the order ``a,
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr cut -f x,i,a data/small
|
||||
mlr cut -f x,i,a data/small
|
||||
a=pan,i=1,x=0.3467901443380824
|
||||
a=eks,i=2,x=0.7586799647899636
|
||||
a=wye,i=3,x=0.20460330576630303
|
||||
|
|
@ -491,7 +491,7 @@ The solution is to use the ``-o`` option:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr cut -o -f x,i,a data/small
|
||||
mlr cut -o -f x,i,a data/small
|
||||
x=0.3467901443380824,i=1,a=pan
|
||||
x=0.7586799647899636,i=2,a=eks
|
||||
x=0.20460330576630303,i=3,a=wye
|
||||
|
|
@ -506,7 +506,7 @@ Given this input data:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/small
|
||||
cat data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -518,7 +518,7 @@ why don't I see ``NR=1`` and ``NR=2`` here??
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr filter '$x > 0.5' then put '$NR = NR' data/small
|
||||
mlr filter '$x > 0.5' then put '$NR = NR' data/small
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797,NR=2
|
||||
a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729,NR=5
|
||||
|
||||
|
|
@ -527,7 +527,7 @@ The reason is that ``NR`` is computed for the original input records and isn't d
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo x=1,y=2,z=3 | mlr put '$nf1 = NF; $u = 4; $nf2 = NF; unset $x,$y,$z; $nf3 = NF'
|
||||
echo x=1,y=2,z=3 | mlr put '$nf1 = NF; $u = 4; $nf2 = NF; unset $x,$y,$z; $nf3 = NF'
|
||||
nf1=3,u=4,nf2=5,nf3=3
|
||||
|
||||
``NR``, by contrast (and ``FNR`` as well), retains the value from the original input stream, and records may be dropped by a ``filter`` within a ``then``-chain. To recover consecutive record numbers, you can use out-of-stream variables as follows:
|
||||
|
|
@ -535,7 +535,7 @@ The reason is that ``NR`` is computed for the original input records and isn't d
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint --from data/small put '
|
||||
mlr --opprint --from data/small put '
|
||||
begin{ @nr1 = 0 }
|
||||
@nr1 += 1;
|
||||
$nr1 = @nr1
|
||||
|
|
@ -555,7 +555,7 @@ Or, simply use ``mlr cat -n``:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr filter '$x > 0.5' then cat -n data/small
|
||||
mlr filter '$x > 0.5' then cat -n data/small
|
||||
n=1,a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
n=2,a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729
|
||||
|
||||
|
|
@ -569,7 +569,7 @@ For example, the right file here has nine records, and the left file should add
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsvlite --opprint cat data/join-u-left.csv
|
||||
mlr --icsvlite --opprint cat data/join-u-left.csv
|
||||
hostname ipaddr
|
||||
nadir.east.our.org 10.3.1.18
|
||||
zenith.west.our.org 10.3.1.27
|
||||
|
|
@ -578,7 +578,7 @@ For example, the right file here has nine records, and the left file should add
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsvlite --opprint cat data/join-u-right.csv
|
||||
mlr --icsvlite --opprint cat data/join-u-right.csv
|
||||
ipaddr timestamp bytes
|
||||
10.3.1.27 1448762579 4568
|
||||
10.3.1.18 1448762578 8729
|
||||
|
|
@ -593,7 +593,7 @@ For example, the right file here has nine records, and the left file should add
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsvlite --opprint join -s -j ipaddr -f data/join-u-left.csv data/join-u-right.csv
|
||||
mlr --icsvlite --opprint join -s -j ipaddr -f data/join-u-left.csv data/join-u-right.csv
|
||||
ipaddr hostname timestamp bytes
|
||||
10.3.1.27 zenith.west.our.org 1448762579 4568
|
||||
10.4.5.94 apoapsis.east.our.org 1448762579 17445
|
||||
|
|
@ -607,7 +607,7 @@ The solution (besides pre-sorting the input files on the join keys) is to simply
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsvlite --opprint join -u -j ipaddr -f data/join-u-left.csv data/join-u-right.csv
|
||||
mlr --icsvlite --opprint join -u -j ipaddr -f data/join-u-left.csv data/join-u-right.csv
|
||||
ipaddr hostname timestamp bytes
|
||||
10.3.1.27 zenith.west.our.org 1448762579 4568
|
||||
10.3.1.18 nadir.east.our.org 1448762578 8729
|
||||
|
|
@ -644,7 +644,7 @@ Joining on color the results are as expected:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv join -j id -f data/color-codes.csv data/color-names.csv
|
||||
mlr --csv join -j id -f data/color-codes.csv data/color-names.csv
|
||||
id,code,color
|
||||
4,ff0000,red
|
||||
2,00ff00,green
|
||||
|
|
@ -654,7 +654,7 @@ However, if we ask for left-unpaireds, since there's no ``color`` column, we get
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv join --ul -j id -f data/color-codes.csv data/color-names.csv
|
||||
mlr --csv join --ul -j id -f data/color-codes.csv data/color-names.csv
|
||||
id,code,color
|
||||
4,ff0000,red
|
||||
2,00ff00,green
|
||||
|
|
@ -667,7 +667,7 @@ To fix this, we can use **unsparsify**:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv join --ul -j id -f data/color-codes.csv then unsparsify --fill-with "" data/color-names.csv
|
||||
mlr --csv join --ul -j id -f data/color-codes.csv then unsparsify --fill-with "" data/color-names.csv
|
||||
id,code,color
|
||||
4,ff0000,red
|
||||
2,00ff00,green
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ Miller respects CSV headers. If you do ``mlr --csv cat *.csv`` then the header l
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/a.csv
|
||||
cat data/a.csv
|
||||
a,b,c
|
||||
1,2,3
|
||||
4,5,6
|
||||
|
|
@ -22,14 +22,14 @@ Miller respects CSV headers. If you do ``mlr --csv cat *.csv`` then the header l
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/b.csv
|
||||
cat data/b.csv
|
||||
a,b,c
|
||||
7,8,9
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv cat data/a.csv data/b.csv
|
||||
mlr --csv cat data/a.csv data/b.csv
|
||||
a,b,c
|
||||
1,2,3
|
||||
4,5,6
|
||||
|
|
@ -38,7 +38,7 @@ Miller respects CSV headers. If you do ``mlr --csv cat *.csv`` then the header l
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv sort -nr b data/a.csv data/b.csv
|
||||
mlr --csv sort -nr b data/a.csv data/b.csv
|
||||
a,b,c
|
||||
7,8,9
|
||||
4,5,6
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ Examples
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --usage-data-format-examples
|
||||
mlr --usage-data-format-examples
|
||||
DKVP: delimited key-value pairs (Miller default format)
|
||||
+---------------------+
|
||||
| apple=1,bat=2,cog=3 | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
|
|
@ -121,7 +121,7 @@ Miller's default file format is DKVP, for **delimited key-value pairs**. Example
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr cat data/small
|
||||
mlr cat data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -175,7 +175,7 @@ Example with index-numbered output:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/small
|
||||
cat data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -185,7 +185,7 @@ Example with index-numbered output:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --onidx --ofs ' ' cat data/small
|
||||
mlr --onidx --ofs ' ' cat data/small
|
||||
pan pan 1 0.3467901443380824 0.7268028627434533
|
||||
eks pan 2 0.7586799647899636 0.5221511083334797
|
||||
wye wye 3 0.20460330576630303 0.33831852551664776
|
||||
|
|
@ -197,7 +197,7 @@ Example with index-numbered input:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/mydata.txt
|
||||
cat data/mydata.txt
|
||||
oh say can you see
|
||||
by the dawn's
|
||||
early light
|
||||
|
|
@ -205,7 +205,7 @@ Example with index-numbered input:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --inidx --ifs ' ' --odkvp cat data/mydata.txt
|
||||
mlr --inidx --ifs ' ' --odkvp cat data/mydata.txt
|
||||
1=oh,2=say,3=can,4=you,5=see
|
||||
1=by,2=the,3=dawn's
|
||||
1=early,2=light
|
||||
|
|
@ -215,7 +215,7 @@ Example with index-numbered input and output:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/mydata.txt
|
||||
cat data/mydata.txt
|
||||
oh say can you see
|
||||
by the dawn's
|
||||
early light
|
||||
|
|
@ -223,7 +223,7 @@ Example with index-numbered input and output:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --nidx --fs ' ' --repifs cut -f 2,3 data/mydata.txt
|
||||
mlr --nidx --fs ' ' --repifs cut -f 2,3 data/mydata.txt
|
||||
say can
|
||||
the dawn's
|
||||
light
|
||||
|
|
@ -245,14 +245,14 @@ An **array of single-level objects** is, quite simply, **a table**:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --json head -n 2 then cut -f color,shape data/json-example-1.json
|
||||
mlr --json head -n 2 then cut -f color,shape data/json-example-1.json
|
||||
{ "color": "yellow", "shape": "triangle" }
|
||||
{ "color": "red", "shape": "square" }
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --json --jvstack head -n 2 then cut -f color,u,v data/json-example-1.json
|
||||
mlr --json --jvstack head -n 2 then cut -f color,u,v data/json-example-1.json
|
||||
{
|
||||
"color": "yellow",
|
||||
"u": 0.6321695890307647,
|
||||
|
|
@ -267,7 +267,7 @@ An **array of single-level objects** is, quite simply, **a table**:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --ijson --opprint stats1 -a mean,stddev,count -f u -g shape data/json-example-1.json
|
||||
mlr --ijson --opprint stats1 -a mean,stddev,count -f u -g shape data/json-example-1.json
|
||||
shape u_mean u_stddev u_count
|
||||
triangle 0.583995 0.131184 3
|
||||
square 0.409355 0.365428 4
|
||||
|
|
@ -281,7 +281,7 @@ Additionally, Miller can **tabularize nested objects by concatentating keys**:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --json --jvstack head -n 2 data/json-example-2.json
|
||||
mlr --json --jvstack head -n 2 data/json-example-2.json
|
||||
{
|
||||
"flag": 1,
|
||||
"i": 11,
|
||||
|
|
@ -314,7 +314,7 @@ Additionally, Miller can **tabularize nested objects by concatentating keys**:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --ijson --opprint head -n 4 data/json-example-2.json
|
||||
mlr --ijson --opprint head -n 4 data/json-example-2.json
|
||||
flag i attributes:color attributes:shape values:u values:v values:w values:x
|
||||
1 11 yellow triangle 0.632170 0.988721 0.436498 5.798188
|
||||
1 15 red square 0.219668 0.001257 0.792778 2.944117
|
||||
|
|
@ -326,7 +326,7 @@ Note in particular that as far as Miller's ``put`` and ``filter``, as well as ot
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --json --jvstack head -n 1 then put '${values:uv} = ${values:u} * ${values:v}' data/json-example-2.json
|
||||
mlr --json --jvstack head -n 1 then put '${values:uv} = ${values:u} * ${values:v}' data/json-example-2.json
|
||||
{
|
||||
"flag": 1,
|
||||
"i": 11,
|
||||
|
|
@ -353,7 +353,7 @@ Suppose we have arrays like this in our input data:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/json-example-3.json
|
||||
cat data/json-example-3.json
|
||||
{
|
||||
"label": "orange",
|
||||
"values": [12.2, 13.8, 17.2]
|
||||
|
|
@ -368,7 +368,7 @@ Then integer indices (starting from 0 and counting up) are used as map keys:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --ijson --oxtab cat data/json-example-3.json
|
||||
mlr --ijson --oxtab cat data/json-example-3.json
|
||||
label orange
|
||||
values:0 12.2
|
||||
values:1 13.8
|
||||
|
|
@ -383,7 +383,7 @@ When the data are written back out as JSON, field names are re-expanded as above
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --json --jvstack cat data/json-example-3.json
|
||||
mlr --json --jvstack cat data/json-example-3.json
|
||||
{
|
||||
"label": "orange",
|
||||
"values": {
|
||||
|
|
@ -440,7 +440,7 @@ Miller's pretty-print format is like CSV, but column-aligned. For example, comp
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --ocsv cat data/small
|
||||
mlr --ocsv cat data/small
|
||||
a,b,i,x,y
|
||||
pan,pan,1,0.3467901443380824,0.7268028627434533
|
||||
eks,pan,2,0.7586799647899636,0.5221511083334797
|
||||
|
|
@ -451,7 +451,7 @@ Miller's pretty-print format is like CSV, but column-aligned. For example, comp
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint cat data/small
|
||||
mlr --opprint cat data/small
|
||||
a b i x y
|
||||
pan pan 1 0.3467901443380824 0.7268028627434533
|
||||
eks pan 2 0.7586799647899636 0.5221511083334797
|
||||
|
|
@ -468,7 +468,7 @@ For output only (this isn't supported in the input-scanner as of 5.0.0) you can
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint --barred cat data/small
|
||||
mlr --opprint --barred cat data/small
|
||||
+-----+-----+---+---------------------+---------------------+
|
||||
| a | b | i | x | y |
|
||||
+-----+-----+---+---------------------+---------------------+
|
||||
|
|
@ -550,7 +550,7 @@ Markdown format looks like this:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --omd cat data/small
|
||||
mlr --omd cat data/small
|
||||
| a | b | i | x | y |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| pan | pan | 1 | 0.3467901443380824 | 0.7268028627434533 |
|
||||
|
|
@ -573,7 +573,7 @@ While you can do format conversion using ``mlr --icsv --ojson cat myfile.csv``,
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --usage-format-conversion-keystroke-saver-options
|
||||
mlr --usage-format-conversion-keystroke-saver-options
|
||||
As keystroke-savers for format-conversion you may use the following:
|
||||
--c2t --c2d --c2n --c2j --c2x --c2p --c2m
|
||||
--t2c --t2d --t2n --t2j --t2x --t2p --t2m
|
||||
|
|
@ -609,7 +609,7 @@ You can include comments within your data files, and either have them ignored, o
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --usage-comments-in-data
|
||||
mlr --usage-comments-in-data
|
||||
--skip-comments Ignore commented lines (prefixed by "#")
|
||||
within the input.
|
||||
--skip-comments-with {string} Ignore commented lines within input, with
|
||||
|
|
@ -632,7 +632,7 @@ Examples:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/budget.csv
|
||||
cat data/budget.csv
|
||||
# Asana -- here are the budget figures you asked for!
|
||||
type,quantity
|
||||
purple,456.78
|
||||
|
|
@ -642,7 +642,7 @@ Examples:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --skip-comments --icsv --opprint sort -nr quantity data/budget.csv
|
||||
mlr --skip-comments --icsv --opprint sort -nr quantity data/budget.csv
|
||||
type quantity
|
||||
green 678.12
|
||||
purple 456.78
|
||||
|
|
@ -651,7 +651,7 @@ Examples:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --pass-comments --icsv --opprint sort -nr quantity data/budget.csv
|
||||
mlr --pass-comments --icsv --opprint sort -nr quantity data/budget.csv
|
||||
# Asana -- here are the budget figures you asked for!
|
||||
type quantity
|
||||
green 678.12
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ Suppose your program has printed something like this (`log.txt <./log.txt>`_):
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat log.txt
|
||||
cat log.txt
|
||||
op=enter,time=1472819681
|
||||
op=cache,type=A9,hit=0
|
||||
op=cache,type=A4,hit=1
|
||||
|
|
@ -65,7 +65,7 @@ Each print statement simply contains local information: the current timestamp, w
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ grep op=cache log.txt \
|
||||
grep op=cache log.txt \
|
||||
| mlr --idkvp --opprint stats1 -a mean -f hit -g type then sort -f type
|
||||
type hit_mean
|
||||
A1 0.857143
|
||||
|
|
@ -75,7 +75,7 @@ Each print statement simply contains local information: the current timestamp, w
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from log.txt --opprint \
|
||||
mlr --from log.txt --opprint \
|
||||
filter 'is_present($batch_size)' \
|
||||
then step -a delta -f time,num_filtered \
|
||||
then sec2gmt time
|
||||
|
|
@ -92,7 +92,7 @@ Alternatively, we can simply group the similar data for a better look:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint group-like log.txt
|
||||
mlr --opprint group-like log.txt
|
||||
op time
|
||||
enter 1472819681
|
||||
|
||||
|
|
@ -145,7 +145,7 @@ Alternatively, we can simply group the similar data for a better look:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint group-like then sec2gmt time log.txt
|
||||
mlr --opprint group-like then sec2gmt time log.txt
|
||||
op time
|
||||
enter 2016-09-02T12:34:41Z
|
||||
|
||||
|
|
|
|||
|
|
@ -68,14 +68,13 @@ def run_command(cmd, output_handle)
|
|||
end
|
||||
# The command can be multi-line
|
||||
cmd_lines = cmd.split(/\n/)
|
||||
cmd_line_1 = cmd_lines.shift
|
||||
write_card(true, ['$ '+cmd_line_1] + cmd_lines + cmd_output.split(/\n/), output_handle)
|
||||
write_card(true, cmd_lines + cmd_output.split(/\n/), output_handle)
|
||||
end
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
def run_command_tolerating_error(cmd, output_handle)
|
||||
cmd_output = `#{cmd} 2>&1`
|
||||
write_card(true, ['$ '+cmd] + cmd_output.split(/\n/), output_handle)
|
||||
write_card(true, [cmd] + cmd_output.split(/\n/), output_handle)
|
||||
end
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ Miller simply prints a newline and a new header when there is a schema change. W
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/het.dkvp
|
||||
cat data/het.dkvp
|
||||
resource=/path/to/file,loadsec=0.45,ok=true
|
||||
record_count=100,resource=/path/to/file
|
||||
resource=/path/to/second/file,loadsec=0.32,ok=true
|
||||
|
|
@ -29,7 +29,7 @@ Miller simply prints a newline and a new header when there is a schema change. W
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --ocsvlite cat data/het.dkvp
|
||||
mlr --ocsvlite cat data/het.dkvp
|
||||
resource,loadsec,ok
|
||||
/path/to/file,0.45,true
|
||||
|
||||
|
|
@ -48,7 +48,7 @@ Miller simply prints a newline and a new header when there is a schema change. W
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint cat data/het.dkvp
|
||||
mlr --opprint cat data/het.dkvp
|
||||
resource loadsec ok
|
||||
/path/to/file 0.45 true
|
||||
|
||||
|
|
@ -69,7 +69,7 @@ Miller handles explicit header changes as just shown. If your CSV input contains
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/ragged.csv
|
||||
cat data/ragged.csv
|
||||
a,b,c
|
||||
1,2,3
|
||||
4,5
|
||||
|
|
@ -78,7 +78,7 @@ Miller handles explicit header changes as just shown. If your CSV input contains
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --icsv --oxtab --allow-ragged-csv-input cat data/ragged.csv
|
||||
mlr --icsv --oxtab --allow-ragged-csv-input cat data/ragged.csv
|
||||
a 1
|
||||
b 2
|
||||
c 3
|
||||
|
|
@ -97,7 +97,7 @@ You may also find Miller's ``group-like`` feature handy (see also :doc:`referenc
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --ocsvlite group-like data/het.dkvp
|
||||
mlr --ocsvlite group-like data/het.dkvp
|
||||
resource,loadsec,ok
|
||||
/path/to/file,0.45,true
|
||||
/path/to/second/file,0.32,true
|
||||
|
|
@ -110,7 +110,7 @@ You may also find Miller's ``group-like`` feature handy (see also :doc:`referenc
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint group-like data/het.dkvp
|
||||
mlr --opprint group-like data/het.dkvp
|
||||
resource loadsec ok
|
||||
/path/to/file 0.45 true
|
||||
/path/to/second/file 0.32 true
|
||||
|
|
@ -128,7 +128,7 @@ For these formats, record-heterogeneity comes naturally:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/het.dkvp
|
||||
cat data/het.dkvp
|
||||
resource=/path/to/file,loadsec=0.45,ok=true
|
||||
record_count=100,resource=/path/to/file
|
||||
resource=/path/to/second/file,loadsec=0.32,ok=true
|
||||
|
|
@ -138,7 +138,7 @@ For these formats, record-heterogeneity comes naturally:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --onidx --ofs ' ' cat data/het.dkvp
|
||||
mlr --onidx --ofs ' ' cat data/het.dkvp
|
||||
/path/to/file 0.45 true
|
||||
100 /path/to/file
|
||||
/path/to/second/file 0.32 true
|
||||
|
|
@ -148,7 +148,7 @@ For these formats, record-heterogeneity comes naturally:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --oxtab cat data/het.dkvp
|
||||
mlr --oxtab cat data/het.dkvp
|
||||
resource /path/to/file
|
||||
loadsec 0.45
|
||||
ok true
|
||||
|
|
@ -170,7 +170,7 @@ For these formats, record-heterogeneity comes naturally:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --oxtab group-like data/het.dkvp
|
||||
mlr --oxtab group-like data/het.dkvp
|
||||
resource /path/to/file
|
||||
loadsec 0.45
|
||||
ok true
|
||||
|
|
@ -197,7 +197,7 @@ Miller operates on specified fields and takes the rest along: for example, if yo
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/sort-het.dkvp
|
||||
cat data/sort-het.dkvp
|
||||
count=500,color=green
|
||||
count=600
|
||||
status=ok,count=250,hours=0.22
|
||||
|
|
@ -209,7 +209,7 @@ Miller operates on specified fields and takes the rest along: for example, if yo
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr sort -n count data/sort-het.dkvp
|
||||
mlr sort -n count data/sort-het.dkvp
|
||||
count=100,color=green
|
||||
status=ok,count=200,hours=3.4
|
||||
status=ok,count=250,hours=0.22
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ Example:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr stats1 -a sum -f x -g a data/small
|
||||
mlr stats1 -a sum -f x -g a data/small
|
||||
a=pan,x_sum=0.346790
|
||||
a=eks,x_sum=1.140079
|
||||
a=wye,x_sum=0.777892
|
||||
|
|
@ -30,7 +30,7 @@ Example:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@x_sum[$a] += $x; end{emit @x_sum, "a"}' data/small
|
||||
mlr put -q '@x_sum[$a] += $x; end{emit @x_sum, "a"}' data/small
|
||||
a=pan,x_sum=0.346790
|
||||
a=eks,x_sum=1.140079
|
||||
a=wye,x_sum=0.777892
|
||||
|
|
@ -48,7 +48,7 @@ The essential usages of ``mlr filter`` and ``mlr put`` are for record-selection
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/small
|
||||
cat data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -60,7 +60,7 @@ you might retain only the records whose ``a`` field has value ``eks``:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr filter '$a == "eks"' data/small
|
||||
mlr filter '$a == "eks"' data/small
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463
|
||||
|
||||
|
|
@ -69,7 +69,7 @@ or you might add a new field which is a function of existing fields:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '$ab = $a . "_" . $b ' data/small
|
||||
mlr put '$ab = $a . "_" . $b ' data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533,ab=pan_pan
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797,ab=eks_pan
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776,ab=wye_wye
|
||||
|
|
@ -99,7 +99,7 @@ Multiple expressions may be given, separated by semicolons, and each may refer t
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ ruby -e '10.times{|i|puts "i=#{i}"}' | mlr --opprint put '$j = $i + 1; $k = $i +$j'
|
||||
ruby -e '10.times{|i|puts "i=#{i}"}' | mlr --opprint put '$j = $i + 1; $k = $i +$j'
|
||||
i j k
|
||||
0 1 1
|
||||
1 2 3
|
||||
|
|
@ -117,7 +117,7 @@ Newlines within the expression are ignored, which can help increase legibility o
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint put '
|
||||
mlr --opprint put '
|
||||
$nf = NF;
|
||||
$nr = NR;
|
||||
$fnr = FNR;
|
||||
|
|
@ -139,7 +139,7 @@ Newlines within the expression are ignored, which can help increase legibility o
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint filter '($x > 0.5 && $y < 0.5) || ($x < 0.5 && $y > 0.5)' then stats2 -a corr -f x,y data/medium
|
||||
mlr --opprint filter '($x > 0.5 && $y < 0.5) || ($x < 0.5 && $y > 0.5)' then stats2 -a corr -f x,y data/medium
|
||||
x_y_corr
|
||||
-0.747994
|
||||
|
||||
|
|
@ -153,7 +153,7 @@ The simplest way to enter expressions for ``put`` and ``filter`` is between sing
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/small put '$xy = sqrt($x**2 + $y**2)'
|
||||
mlr --from data/small put '$xy = sqrt($x**2 + $y**2)'
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533,xy=0.805299
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797,xy=0.920998
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776,xy=0.395376
|
||||
|
|
@ -163,7 +163,7 @@ The simplest way to enter expressions for ``put`` and ``filter`` is between sing
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/small put 'func f(a, b) { return sqrt(a**2 + b**2) } $xy = f($x, $y)'
|
||||
mlr --from data/small put 'func f(a, b) { return sqrt(a**2 + b**2) } $xy = f($x, $y)'
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533,xy=0.805299
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797,xy=0.920998
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776,xy=0.395376
|
||||
|
|
@ -176,7 +176,7 @@ You may, though, find it convenient to put expressions into files for reuse, and
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/fe-example-3.mlr
|
||||
cat data/fe-example-3.mlr
|
||||
func f(a, b) {
|
||||
return sqrt(a**2 + b**2)
|
||||
}
|
||||
|
|
@ -185,7 +185,7 @@ You may, though, find it convenient to put expressions into files for reuse, and
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/small put -f data/fe-example-3.mlr
|
||||
mlr --from data/small put -f data/fe-example-3.mlr
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533,xy=0.805299
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797,xy=0.920998
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776,xy=0.395376
|
||||
|
|
@ -197,7 +197,7 @@ If you have some of the logic in a file and you want to write the rest on the co
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/fe-example-4.mlr
|
||||
cat data/fe-example-4.mlr
|
||||
func f(a, b) {
|
||||
return sqrt(a**2 + b**2)
|
||||
}
|
||||
|
|
@ -205,7 +205,7 @@ If you have some of the logic in a file and you want to write the rest on the co
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/small put -f data/fe-example-4.mlr -e '$xy = f($x, $y)'
|
||||
mlr --from data/small put -f data/fe-example-4.mlr -e '$xy = f($x, $y)'
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533,xy=0.805299
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797,xy=0.920998
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776,xy=0.395376
|
||||
|
|
@ -235,13 +235,13 @@ Semicolons are optional after closing curly braces (which close conditionals and
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo x=1,y=2 | mlr put 'while (NF < 10) { $[NF+1] = ""} $foo = "bar"'
|
||||
echo x=1,y=2 | mlr put 'while (NF < 10) { $[NF+1] = ""} $foo = "bar"'
|
||||
x=1,y=2,3=,4=,5=,6=,7=,8=,9=,10=,foo=bar
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo x=1,y=2 | mlr put 'while (NF < 10) { $[NF+1] = ""}; $foo = "bar"'
|
||||
echo x=1,y=2 | mlr put 'while (NF < 10) { $[NF+1] = ""}; $foo = "bar"'
|
||||
x=1,y=2,3=,4=,5=,6=,7=,8=,9=,10=,foo=bar
|
||||
|
||||
Semicolons are required between statements even if those statements are on separate lines. **Newlines** are for your convenience but have no syntactic meaning: line endings do not terminate statements. For example, adjacent assignment statements must be separated by semicolons even if those statements are on separate lines:
|
||||
|
|
@ -263,7 +263,7 @@ Semicolons are required between statements even if those statements are on separ
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csvlite --from data/a.csv put '
|
||||
mlr --csvlite --from data/a.csv put '
|
||||
func f(
|
||||
num a,
|
||||
num b,
|
||||
|
|
@ -325,7 +325,7 @@ Namely, Miller supports the following five built-in variables for :doc:`filter a
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr filter 'FNR == 2' data/small*
|
||||
mlr filter 'FNR == 2' data/small*
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
1=pan,2=pan,3=1,4=0.3467901443380824,5=0.7268028627434533
|
||||
a=wye,b=eks,i=10000,x=0.734806020620654365,y=0.884788571337605134
|
||||
|
|
@ -333,7 +333,7 @@ Namely, Miller supports the following five built-in variables for :doc:`filter a
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '$fnr = FNR' data/small*
|
||||
mlr put '$fnr = FNR' data/small*
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533,fnr=1
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797,fnr=2
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776,fnr=3
|
||||
|
|
@ -358,7 +358,7 @@ Their **scope is global**: you can refer to them in any ``filter`` or ``put`` st
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv put '$nr = NR' data/a.csv
|
||||
mlr --csv put '$nr = NR' data/a.csv
|
||||
a,b,c,nr
|
||||
1,2,3,1
|
||||
4,5,6,2
|
||||
|
|
@ -366,7 +366,7 @@ Their **scope is global**: you can refer to them in any ``filter`` or ``put`` st
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --csv repeat -n 3 then put '$nr = NR' data/a.csv
|
||||
mlr --csv repeat -n 3 then put '$nr = NR' data/a.csv
|
||||
a,b,c,nr
|
||||
1,2,3,1
|
||||
1,2,3,1
|
||||
|
|
@ -391,12 +391,12 @@ You may also use a **computed field name** in square brackets, e.g.
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo a=3,b=4 | mlr filter '$["x"] < 0.5'
|
||||
echo a=3,b=4 | mlr filter '$["x"] < 0.5'
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo s=green,t=blue,a=3,b=4 | mlr put '$[$s."_".$t] = $a * $b'
|
||||
echo s=green,t=blue,a=3,b=4 | mlr put '$[$s."_".$t] = $a * $b'
|
||||
s=green,t=blue,a=3,b=4,green_blue=12
|
||||
|
||||
Notes:
|
||||
|
|
@ -421,7 +421,7 @@ Then using a computed field name, ``$[ $[[3]] ]`` is the value in the third fiel
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr cat data/small
|
||||
mlr cat data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -431,7 +431,7 @@ Then using a computed field name, ``$[ $[[3]] ]`` is the value in the third fiel
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '$[[3]] = "NEW"' data/small
|
||||
mlr put '$[[3]] = "NEW"' data/small
|
||||
a=pan,b=pan,NEW=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,NEW=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,NEW=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -441,7 +441,7 @@ Then using a computed field name, ``$[ $[[3]] ]`` is the value in the third fiel
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '$[[[3]]] = "NEW"' data/small
|
||||
mlr put '$[[[3]]] = "NEW"' data/small
|
||||
a=pan,b=pan,i=NEW,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,i=NEW,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=NEW,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -451,7 +451,7 @@ Then using a computed field name, ``$[ $[[3]] ]`` is the value in the third fiel
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '$NEW = $[[NR]]' data/small
|
||||
mlr put '$NEW = $[[NR]]' data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533,NEW=a
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797,NEW=b
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776,NEW=i
|
||||
|
|
@ -461,7 +461,7 @@ Then using a computed field name, ``$[ $[[3]] ]`` is the value in the third fiel
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '$NEW = $[[[NR]]]' data/small
|
||||
mlr put '$NEW = $[[[NR]]]' data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533,NEW=pan
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797,NEW=pan
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776,NEW=3
|
||||
|
|
@ -471,7 +471,7 @@ Then using a computed field name, ``$[ $[[3]] ]`` is the value in the third fiel
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '$[[[NR]]] = "NEW"' data/small
|
||||
mlr put '$[[[NR]]] = "NEW"' data/small
|
||||
a=NEW,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=NEW,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=NEW,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -483,7 +483,7 @@ Right-hand side accesses to non-existent fields -- i.e. with index less than 1 o
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '$[[6]] = "NEW"' data/small
|
||||
mlr put '$[[6]] = "NEW"' data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -493,7 +493,7 @@ Right-hand side accesses to non-existent fields -- i.e. with index less than 1 o
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '$[[[6]]] = "NEW"' data/small
|
||||
mlr put '$[[[6]]] = "NEW"' data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -514,7 +514,7 @@ You may use a **computed key** in square brackets, e.g.
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo s=green,t=blue,a=3,b=4 | mlr put -q '@[$s."_".$t] = $a * $b; emit all'
|
||||
echo s=green,t=blue,a=3,b=4 | mlr put -q '@[$s."_".$t] = $a * $b; emit all'
|
||||
green_blue=12
|
||||
|
||||
Out-of-stream variables are **scoped** to the ``put`` command in which they appear. In particular, if you have two or more ``put`` commands separated by ``then``, each put will have its own set of out-of-stream variables:
|
||||
|
|
@ -522,14 +522,14 @@ Out-of-stream variables are **scoped** to the ``put`` command in which they appe
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/a.dkvp
|
||||
cat data/a.dkvp
|
||||
a=1,b=2,c=3
|
||||
a=4,b=5,c=6
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '@sum += $a; end {emit @sum}' then put 'is_present($a) {$a=10*$a; @sum += $a}; end {emit @sum}' data/a.dkvp
|
||||
mlr put '@sum += $a; end {emit @sum}' then put 'is_present($a) {$a=10*$a; @sum += $a}; end {emit @sum}' data/a.dkvp
|
||||
a=10,b=2,c=3
|
||||
a=40,b=5,c=6
|
||||
sum=5
|
||||
|
|
@ -547,7 +547,7 @@ Using an index on the ``@count`` and ``@sum`` variables, we get the benefit of t
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '
|
||||
mlr put -q '
|
||||
@x_count[$a] += 1;
|
||||
@x_sum[$a] += $x;
|
||||
end {
|
||||
|
|
@ -569,7 +569,7 @@ Using an index on the ``@count`` and ``@sum`` variables, we get the benefit of t
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr stats1 -a count,sum -f x -g a ../data/small
|
||||
mlr stats1 -a count,sum -f x -g a ../data/small
|
||||
a=pan,x_count=2,x_sum=0.849416
|
||||
a=eks,x_count=3,x_sum=1.751863
|
||||
a=wye,x_count=2,x_sum=0.777892
|
||||
|
|
@ -581,7 +581,7 @@ Indices can be arbitrarily deep -- here there are two or more of them:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/medium put -q '
|
||||
mlr --from data/medium put -q '
|
||||
@x_count[$a][$b] += 1;
|
||||
@x_sum[$a][$b] += $x;
|
||||
end {
|
||||
|
|
@ -621,7 +621,7 @@ Begin/end blocks can be mixed with pattern/action blocks. For example:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '
|
||||
mlr put '
|
||||
begin {
|
||||
@num_total = 0;
|
||||
@num_positive = 0;
|
||||
|
|
@ -654,7 +654,7 @@ For example:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ # Here I'm using a specified random-number seed so this example always
|
||||
# Here I'm using a specified random-number seed so this example always
|
||||
# produces the same output for this web document: in everyday practice we
|
||||
# would leave off the --seed 12345 part.
|
||||
mlr --seed 12345 seqgen --start 1 --stop 10 then put '
|
||||
|
|
@ -710,7 +710,7 @@ The following example demonstrates the scope rules:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/scope-example.mlr
|
||||
cat data/scope-example.mlr
|
||||
func f(a) { # argument is local to the function
|
||||
var b = 100; # local to the function
|
||||
c = 100; # local to the function; does not overwrite outer c
|
||||
|
|
@ -738,7 +738,7 @@ The following example demonstrates the scope rules:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/scope-example.dat
|
||||
cat data/scope-example.dat
|
||||
n=1,x=123
|
||||
n=2,x=456
|
||||
n=3,x=789
|
||||
|
|
@ -746,7 +746,7 @@ The following example demonstrates the scope rules:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --oxtab --from data/scope-example.dat put -f data/scope-example.mlr
|
||||
mlr --oxtab --from data/scope-example.dat put -f data/scope-example.mlr
|
||||
n 1
|
||||
x 123
|
||||
outer_a 10
|
||||
|
|
@ -774,7 +774,7 @@ And this example demonstrates the type-declaration rules:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/type-decl-example.mlr
|
||||
cat data/type-decl-example.mlr
|
||||
subr s(a, str b, int c) { # a is implicitly var (untyped).
|
||||
# b is explicitly str.
|
||||
# c is explicitly int.
|
||||
|
|
@ -823,7 +823,7 @@ For example, the following swaps the input stream's ``a`` and ``i`` fields, modi
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint put '
|
||||
mlr --opprint put '
|
||||
$* = {
|
||||
"a": $i,
|
||||
"i": $a,
|
||||
|
|
@ -842,7 +842,7 @@ Likewise, you can assign map literals to out-of-stream variables or local variab
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/small put '
|
||||
mlr --from data/small put '
|
||||
func f(map m): map {
|
||||
m["x"] *= 200;
|
||||
return m;
|
||||
|
|
@ -860,7 +860,7 @@ Like out-of-stream and local variables, map literals can be multi-level:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/small put -q '
|
||||
mlr --from data/small put -q '
|
||||
begin {
|
||||
@o = {
|
||||
"nrec": 0,
|
||||
|
|
@ -908,7 +908,7 @@ The following ``is...`` functions take a value and return a boolean indicating w
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr -F | grep ^is
|
||||
mlr -F | grep ^is
|
||||
is_absent
|
||||
is_bool
|
||||
is_boolean
|
||||
|
|
@ -929,7 +929,7 @@ The following ``is...`` functions take a value and return a boolean indicating w
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr -F | grep ^assert
|
||||
mlr -F | grep ^assert
|
||||
asserting_absent
|
||||
asserting_bool
|
||||
asserting_boolean
|
||||
|
|
@ -1040,7 +1040,7 @@ Example recursive copy of out-of-stream variables:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint put -q '@v["sum"] += $x; @v["count"] += 1; end{dump; @w = @v; dump}' data/small
|
||||
mlr --opprint put -q '@v["sum"] += $x; @v["count"] += 1; end{dump; @w = @v; dump}' data/small
|
||||
{
|
||||
"v": {
|
||||
"sum": 2.264762,
|
||||
|
|
@ -1063,7 +1063,7 @@ Example of out-of-stream variable assigned to full stream record, where the 2nd
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put 'NR == 2 {@keep = $*}; NR == 4 {$* = @keep}' data/small
|
||||
mlr put 'NR == 2 {@keep = $*}; NR == 4 {$* = @keep}' data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -1075,7 +1075,7 @@ Example of full stream record assigned to an out-of-stream variable, finding the
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/small
|
||||
cat data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -1085,7 +1085,7 @@ Example of full stream record assigned to an out-of-stream variable, finding the
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint put -q 'is_null(@xmax) || $x > @xmax {@xmax=$x; @recmax=$*}; end {emit @recmax}' data/small
|
||||
mlr --opprint put -q 'is_null(@xmax) || $x > @xmax {@xmax=$x; @recmax=$*}; end {emit @recmax}' data/small
|
||||
a b i x y
|
||||
eks pan 2 0.7586799647899636 0.5221511083334797
|
||||
|
||||
|
|
@ -1095,7 +1095,7 @@ Keywords for filter and put
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --help-all-keywords
|
||||
mlr --help-all-keywords
|
||||
all: used in "emit", "emitp", and "unset" as a synonym for @*
|
||||
|
||||
begin: defines a block of statements to be executed before input records
|
||||
|
|
@ -1440,7 +1440,7 @@ These are reminiscent of ``awk`` syntax. They can be used to allow assignments
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr cat data/put-gating-example-1.dkvp
|
||||
mlr cat data/put-gating-example-1.dkvp
|
||||
x=-1
|
||||
x=0
|
||||
x=1
|
||||
|
|
@ -1450,7 +1450,7 @@ These are reminiscent of ``awk`` syntax. They can be used to allow assignments
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '$x > 0.0 { $y = log10($x); $z = sqrt($y) }' data/put-gating-example-1.dkvp
|
||||
mlr put '$x > 0.0 { $y = log10($x); $z = sqrt($y) }' data/put-gating-example-1.dkvp
|
||||
x=-1
|
||||
x=0
|
||||
x=1,y=0.000000,z=0.000000
|
||||
|
|
@ -1460,7 +1460,7 @@ These are reminiscent of ``awk`` syntax. They can be used to allow assignments
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr cat data/put-gating-example-2.dkvp
|
||||
mlr cat data/put-gating-example-2.dkvp
|
||||
a=abc_123
|
||||
a=some other name
|
||||
a=xyz_789
|
||||
|
|
@ -1468,7 +1468,7 @@ These are reminiscent of ``awk`` syntax. They can be used to allow assignments
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '$a =~ "([a-z]+)_([0-9]+)" { $b = "left_\1"; $c = "right_\2" }' data/put-gating-example-2.dkvp
|
||||
mlr put '$a =~ "([a-z]+)_([0-9]+)" { $b = "left_\1"; $c = "right_\2" }' data/put-gating-example-2.dkvp
|
||||
a=abc_123,b=left_abc,c=right_123
|
||||
a=some other name
|
||||
a=xyz_789,b=left_xyz,c=right_789
|
||||
|
|
@ -1478,7 +1478,7 @@ This produces heteregenous output which Miller, of course, has no problems with
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '$x > 0.0; $y = log10($x); $z = sqrt($y)' data/put-gating-example-1.dkvp
|
||||
mlr put '$x > 0.0; $y = log10($x); $z = sqrt($y)' data/put-gating-example-1.dkvp
|
||||
x=-1,y=nan,z=nan
|
||||
x=0,y=-inf,z=nan
|
||||
x=1,y=0.000000,z=0.000000
|
||||
|
|
@ -1488,7 +1488,7 @@ This produces heteregenous output which Miller, of course, has no problems with
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '$a =~ "([a-z]+)_([0-9]+)"; $b = "left_\1"; $c = "right_\2"' data/put-gating-example-2.dkvp
|
||||
mlr put '$a =~ "([a-z]+)_([0-9]+)"; $b = "left_\1"; $c = "right_\2"' data/put-gating-example-2.dkvp
|
||||
a=abc_123,b=left_abc,c=right_123
|
||||
a=some other name,b=left_,c=right_
|
||||
a=xyz_789,b=left_xyz,c=right_789
|
||||
|
|
@ -1530,7 +1530,7 @@ Miller's ``while`` and ``do-while`` are unsurprising in comparison to various la
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo x=1,y=2 | mlr put '
|
||||
echo x=1,y=2 | mlr put '
|
||||
while (NF < 10) {
|
||||
$[NF+1] = ""
|
||||
}
|
||||
|
|
@ -1541,7 +1541,7 @@ Miller's ``while`` and ``do-while`` are unsurprising in comparison to various la
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo x=1,y=2 | mlr put '
|
||||
echo x=1,y=2 | mlr put '
|
||||
do {
|
||||
$[NF+1] = "";
|
||||
if (NF == 5) {
|
||||
|
|
@ -1570,7 +1570,7 @@ The ``key`` variable is always bound to the *key* of key-value pairs:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/small put '
|
||||
mlr --from data/small put '
|
||||
print "NR = ".NR;
|
||||
for (key in $*) {
|
||||
value = $[key];
|
||||
|
|
@ -1617,7 +1617,7 @@ The ``key`` variable is always bound to the *key* of key-value pairs:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr -n put '
|
||||
mlr -n put '
|
||||
end {
|
||||
o = {1:2, 3:{4:5}};
|
||||
for (key in o) {
|
||||
|
|
@ -1638,7 +1638,7 @@ Single-level keys may be gotten at using either ``for(k,v)`` or ``for((k),v)``;
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/for-srec-example.tbl
|
||||
cat data/for-srec-example.tbl
|
||||
label1 label2 f1 f2 f3
|
||||
blue green 100 240 350
|
||||
red green 120 11 195
|
||||
|
|
@ -1647,7 +1647,7 @@ Single-level keys may be gotten at using either ``for(k,v)`` or ``for((k),v)``;
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --pprint --from data/for-srec-example.tbl put '
|
||||
mlr --pprint --from data/for-srec-example.tbl put '
|
||||
$sum1 = $f1 + $f2 + $f3;
|
||||
$sum2 = 0;
|
||||
$sum3 = 0;
|
||||
|
|
@ -1666,7 +1666,7 @@ Single-level keys may be gotten at using either ``for(k,v)`` or ``for((k),v)``;
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/small --opprint put 'for (k,v in $*) { $[k."_type"] = typeof(v) }'
|
||||
mlr --from data/small --opprint put 'for (k,v in $*) { $[k."_type"] = typeof(v) }'
|
||||
a b i x y a_type b_type i_type x_type y_type
|
||||
pan pan 1 0.3467901443380824 0.7268028627434533 string string int float float
|
||||
eks pan 2 0.7586799647899636 0.5221511083334797 string string int float float
|
||||
|
|
@ -1681,7 +1681,7 @@ Important note: to avoid inconsistent looping behavior in case you're setting ne
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/small --opprint put '
|
||||
mlr --from data/small --opprint put '
|
||||
$sum1 = 0;
|
||||
$sum2 = 0;
|
||||
for (k,v in $*) {
|
||||
|
|
@ -1703,7 +1703,7 @@ It can be confusing to modify the stream record while iterating over a copy of i
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/small --opprint put '
|
||||
mlr --from data/small --opprint put '
|
||||
sum = 0;
|
||||
for (k,v in $*) {
|
||||
if (is_numeric(v)) {
|
||||
|
|
@ -1737,7 +1737,7 @@ That's confusing in the abstract, so a concrete example is in order. Suppose the
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr -n put --jknquoteint -q '
|
||||
mlr -n put --jknquoteint -q '
|
||||
begin {
|
||||
@myvar = {
|
||||
1: 2,
|
||||
|
|
@ -1766,7 +1766,7 @@ Then we can get at various values as follows:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr -n put --jknquoteint -q '
|
||||
mlr -n put --jknquoteint -q '
|
||||
begin {
|
||||
@myvar = {
|
||||
1: 2,
|
||||
|
|
@ -1789,7 +1789,7 @@ Then we can get at various values as follows:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr -n put --jknquoteint -q '
|
||||
mlr -n put --jknquoteint -q '
|
||||
begin {
|
||||
@myvar = {
|
||||
1: 2,
|
||||
|
|
@ -1812,7 +1812,7 @@ Then we can get at various values as follows:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr -n put --jknquoteint -q '
|
||||
mlr -n put --jknquoteint -q '
|
||||
begin {
|
||||
@myvar = {
|
||||
1: 2,
|
||||
|
|
@ -1839,7 +1839,7 @@ These are supported as follows:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/small --opprint put '
|
||||
mlr --from data/small --opprint put '
|
||||
num suma = 0;
|
||||
for (a = 1; a <= NR; a += 1) {
|
||||
suma += a;
|
||||
|
|
@ -1856,7 +1856,7 @@ These are supported as follows:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/small --opprint put '
|
||||
mlr --from data/small --opprint put '
|
||||
num suma = 0;
|
||||
num sumb = 0;
|
||||
for (num a = 1, num b = 1; a <= NR; a += 1, b *= 2) {
|
||||
|
|
@ -1893,7 +1893,7 @@ Miller supports an ``awk``-like ``begin/end`` syntax. The statements in the ``b
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '
|
||||
mlr put '
|
||||
begin { @sum = 0 };
|
||||
@x_sum += $x;
|
||||
end { emit @x_sum }
|
||||
|
|
@ -1915,7 +1915,7 @@ Since uninitialized out-of-stream variables default to 0 for addition/substracti
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '
|
||||
mlr put '
|
||||
@x_sum += $x;
|
||||
end { emit @x_sum }
|
||||
' ../data/small
|
||||
|
|
@ -1936,7 +1936,7 @@ The **put -q** option is a shorthand which suppresses printing of each output re
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '
|
||||
mlr put -q '
|
||||
@x_sum += $x;
|
||||
end { emit @x_sum }
|
||||
' ../data/small
|
||||
|
|
@ -1947,7 +1947,7 @@ We can do similarly with multiple out-of-stream variables:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '
|
||||
mlr put -q '
|
||||
@x_count += 1;
|
||||
@x_sum += $x;
|
||||
end {
|
||||
|
|
@ -1963,7 +1963,7 @@ This is of course not much different than
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr stats1 -a count,sum -f x ../data/small
|
||||
mlr stats1 -a count,sum -f x ../data/small
|
||||
x_count=10,x_sum=4.536294
|
||||
|
||||
Note that it's a syntax error for begin/end blocks to refer to field names (beginning with ``$``), since these execute outside the context of input records.
|
||||
|
|
@ -2046,7 +2046,7 @@ Details:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --help-keyword print
|
||||
mlr --help-keyword print
|
||||
print: prints expression immediately to stdout.
|
||||
Example: mlr --from f.dat put -q 'print "The sum of x and y is ".($x+$y)'
|
||||
Example: mlr --from f.dat put -q 'for (k, v in $*) { print k . " => " . v }'
|
||||
|
|
@ -2055,7 +2055,7 @@ Details:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --help-keyword dump
|
||||
mlr --help-keyword dump
|
||||
dump: prints all currently defined out-of-stream variables immediately
|
||||
to stdout as JSON.
|
||||
|
||||
|
|
@ -2079,7 +2079,7 @@ Details:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --help-keyword tee
|
||||
mlr --help-keyword tee
|
||||
tee: prints the current record to specified file.
|
||||
This is an immediate print to the specified file (except for pprint format
|
||||
which of course waits until the end of the input stream to format all output).
|
||||
|
|
@ -2110,7 +2110,7 @@ Details:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --help-keyword emitf
|
||||
mlr --help-keyword emitf
|
||||
emitf: inserts non-indexed out-of-stream variable(s) side-by-side into the
|
||||
output record stream.
|
||||
|
||||
|
|
@ -2141,7 +2141,7 @@ Details:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --help-keyword emitp
|
||||
mlr --help-keyword emitp
|
||||
emitp: inserts an out-of-stream variable into the output record stream.
|
||||
Hashmap indices present in the data but not slotted by emitp arguments are
|
||||
output concatenated with ":".
|
||||
|
|
@ -2174,7 +2174,7 @@ Details:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --help-keyword emit
|
||||
mlr --help-keyword emit
|
||||
emit: inserts an out-of-stream variable into the output record stream. Hashmap
|
||||
indices present in the data but not slotted by emit arguments are not output.
|
||||
|
||||
|
|
@ -2217,7 +2217,7 @@ Use **emitf** to output several out-of-stream variables side-by-side in the same
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@count += 1; @x_sum += $x; @y_sum += $y; end { emitf @count, @x_sum, @y_sum}' data/small
|
||||
mlr put -q '@count += 1; @x_sum += $x; @y_sum += $y; end { emitf @count, @x_sum, @y_sum}' data/small
|
||||
count=5,x_sum=2.264762,y_sum=2.585086
|
||||
|
||||
Use **emit** to output an out-of-stream variable. If it's non-indexed you'll get a simple key-value pair:
|
||||
|
|
@ -2225,7 +2225,7 @@ Use **emit** to output an out-of-stream variable. If it's non-indexed you'll get
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/small
|
||||
cat data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -2235,7 +2235,7 @@ Use **emit** to output an out-of-stream variable. If it's non-indexed you'll get
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@sum += $x; end { dump }' data/small
|
||||
mlr put -q '@sum += $x; end { dump }' data/small
|
||||
{
|
||||
"sum": 2.264762
|
||||
}
|
||||
|
|
@ -2243,7 +2243,7 @@ Use **emit** to output an out-of-stream variable. If it's non-indexed you'll get
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@sum += $x; end { emit @sum }' data/small
|
||||
mlr put -q '@sum += $x; end { emit @sum }' data/small
|
||||
sum=2.264762
|
||||
|
||||
If it's indexed then use as many names after ``emit`` as there are indices:
|
||||
|
|
@ -2251,7 +2251,7 @@ If it's indexed then use as many names after ``emit`` as there are indices:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@sum[$a] += $x; end { dump }' data/small
|
||||
mlr put -q '@sum[$a] += $x; end { dump }' data/small
|
||||
{
|
||||
"sum": {
|
||||
"pan": 0.346790,
|
||||
|
|
@ -2263,7 +2263,7 @@ If it's indexed then use as many names after ``emit`` as there are indices:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@sum[$a] += $x; end { emit @sum, "a" }' data/small
|
||||
mlr put -q '@sum[$a] += $x; end { emit @sum, "a" }' data/small
|
||||
a=pan,sum=0.346790
|
||||
a=eks,sum=1.140079
|
||||
a=wye,sum=0.777892
|
||||
|
|
@ -2271,7 +2271,7 @@ If it's indexed then use as many names after ``emit`` as there are indices:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@sum[$a][$b] += $x; end { dump }' data/small
|
||||
mlr put -q '@sum[$a][$b] += $x; end { dump }' data/small
|
||||
{
|
||||
"sum": {
|
||||
"pan": {
|
||||
|
|
@ -2291,7 +2291,7 @@ If it's indexed then use as many names after ``emit`` as there are indices:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@sum[$a][$b] += $x; end { emit @sum, "a", "b" }' data/small
|
||||
mlr put -q '@sum[$a][$b] += $x; end { emit @sum, "a", "b" }' data/small
|
||||
a=pan,b=pan,sum=0.346790
|
||||
a=eks,b=pan,sum=0.758680
|
||||
a=eks,b=wye,sum=0.381399
|
||||
|
|
@ -2301,7 +2301,7 @@ If it's indexed then use as many names after ``emit`` as there are indices:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@sum[$a][$b][$i] += $x; end { dump }' data/small
|
||||
mlr put -q '@sum[$a][$b][$i] += $x; end { dump }' data/small
|
||||
{
|
||||
"sum": {
|
||||
"pan": {
|
||||
|
|
@ -2331,7 +2331,7 @@ If it's indexed then use as many names after ``emit`` as there are indices:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@sum[$a][$b][$i] += $x; end { emit @sum, "a", "b", "i" }' data/small
|
||||
mlr put -q '@sum[$a][$b][$i] += $x; end { emit @sum, "a", "b", "i" }' data/small
|
||||
a=pan,b=pan,i=1,sum=0.346790
|
||||
a=eks,b=pan,i=2,sum=0.758680
|
||||
a=eks,b=wye,i=4,sum=0.381399
|
||||
|
|
@ -2343,7 +2343,7 @@ Now for **emitp**: if you have as many names following ``emit`` as there are lev
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@sum[$a][$b] += $x; end { dump }' data/small
|
||||
mlr put -q '@sum[$a][$b] += $x; end { dump }' data/small
|
||||
{
|
||||
"sum": {
|
||||
"pan": {
|
||||
|
|
@ -2363,7 +2363,7 @@ Now for **emitp**: if you have as many names following ``emit`` as there are lev
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@sum[$a][$b] += $x; end { emit @sum, "a" }' data/small
|
||||
mlr put -q '@sum[$a][$b] += $x; end { emit @sum, "a" }' data/small
|
||||
a=pan,pan=0.346790
|
||||
a=eks,pan=0.758680,wye=0.381399
|
||||
a=wye,wye=0.204603,pan=0.573289
|
||||
|
|
@ -2371,7 +2371,7 @@ Now for **emitp**: if you have as many names following ``emit`` as there are lev
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@sum[$a][$b] += $x; end { emit @sum }' data/small
|
||||
mlr put -q '@sum[$a][$b] += $x; end { emit @sum }' data/small
|
||||
pan=0.346790
|
||||
pan=0.758680,wye=0.381399
|
||||
wye=0.204603,pan=0.573289
|
||||
|
|
@ -2379,7 +2379,7 @@ Now for **emitp**: if you have as many names following ``emit`` as there are lev
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@sum[$a][$b] += $x; end { emitp @sum, "a" }' data/small
|
||||
mlr put -q '@sum[$a][$b] += $x; end { emitp @sum, "a" }' data/small
|
||||
a=pan,sum:pan=0.346790
|
||||
a=eks,sum:pan=0.758680,sum:wye=0.381399
|
||||
a=wye,sum:wye=0.204603,sum:pan=0.573289
|
||||
|
|
@ -2387,13 +2387,13 @@ Now for **emitp**: if you have as many names following ``emit`` as there are lev
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@sum[$a][$b] += $x; end { emitp @sum }' data/small
|
||||
mlr put -q '@sum[$a][$b] += $x; end { emitp @sum }' data/small
|
||||
sum:pan:pan=0.346790,sum:eks:pan=0.758680,sum:eks:wye=0.381399,sum:wye:wye=0.204603,sum:wye:pan=0.573289
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --oxtab put -q '@sum[$a][$b] += $x; end { emitp @sum }' data/small
|
||||
mlr --oxtab put -q '@sum[$a][$b] += $x; end { emitp @sum }' data/small
|
||||
sum:pan:pan 0.346790
|
||||
sum:eks:pan 0.758680
|
||||
sum:eks:wye 0.381399
|
||||
|
|
@ -2406,7 +2406,7 @@ keys for ``emitp`` (it defaults to a colon):
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q --oflatsep / '@sum[$a][$b] += $x; end { emitp @sum, "a" }' data/small
|
||||
mlr put -q --oflatsep / '@sum[$a][$b] += $x; end { emitp @sum, "a" }' data/small
|
||||
a=pan,sum/pan=0.346790
|
||||
a=eks,sum/pan=0.758680,sum/wye=0.381399
|
||||
a=wye,sum/wye=0.204603,sum/pan=0.573289
|
||||
|
|
@ -2414,13 +2414,13 @@ keys for ``emitp`` (it defaults to a colon):
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q --oflatsep / '@sum[$a][$b] += $x; end { emitp @sum }' data/small
|
||||
mlr put -q --oflatsep / '@sum[$a][$b] += $x; end { emitp @sum }' data/small
|
||||
sum/pan/pan=0.346790,sum/eks/pan=0.758680,sum/eks/wye=0.381399,sum/wye/wye=0.204603,sum/wye/pan=0.573289
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --oxtab put -q --oflatsep / '@sum[$a][$b] += $x; end { emitp @sum }' data/small
|
||||
mlr --oxtab put -q --oflatsep / '@sum[$a][$b] += $x; end { emitp @sum }' data/small
|
||||
sum/pan/pan 0.346790
|
||||
sum/eks/pan 0.758680
|
||||
sum/eks/wye 0.381399
|
||||
|
|
@ -2436,7 +2436,7 @@ including their names in parentheses:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/medium --opprint put -q '
|
||||
mlr --from data/medium --opprint put -q '
|
||||
@x_count[$a][$b] += 1;
|
||||
@x_sum[$a][$b] += $x;
|
||||
end {
|
||||
|
|
@ -2483,7 +2483,7 @@ Use **emit all** (or ``emit @*`` which is synonymous) to output all out-of-strea
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/small --opprint put -q '@v[$a][$b]["sum"] += $x; @v[$a][$b]["count"] += 1; end{emit @*,"a","b"}'
|
||||
mlr --from data/small --opprint put -q '@v[$a][$b]["sum"] += $x; @v[$a][$b]["count"] += 1; end{emit @*,"a","b"}'
|
||||
a b sum count
|
||||
pan pan 0.346790 1
|
||||
eks pan 0.758680 1
|
||||
|
|
@ -2494,7 +2494,7 @@ Use **emit all** (or ``emit @*`` which is synonymous) to output all out-of-strea
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/small --opprint put -q '@sum[$a][$b] += $x; @count[$a][$b] += 1; end{emit @*,"a","b"}'
|
||||
mlr --from data/small --opprint put -q '@sum[$a][$b] += $x; @count[$a][$b] += 1; end{emit @*,"a","b"}'
|
||||
a b sum
|
||||
pan pan 0.346790
|
||||
eks pan 0.758680
|
||||
|
|
@ -2512,7 +2512,7 @@ Use **emit all** (or ``emit @*`` which is synonymous) to output all out-of-strea
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --from data/small --opprint put -q '@sum[$a][$b] += $x; @count[$a][$b] += 1; end{emit (@sum, @count),"a","b"}'
|
||||
mlr --from data/small --opprint put -q '@sum[$a][$b] += $x; @count[$a][$b] += 1; end{emit (@sum, @count),"a","b"}'
|
||||
a b sum count
|
||||
pan pan 0.346790 1
|
||||
eks pan 0.758680 1
|
||||
|
|
@ -2528,7 +2528,7 @@ You can clear a map key by assigning the empty string as its value: ``$x=""`` or
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/small
|
||||
cat data/small
|
||||
a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
|
@ -2538,7 +2538,7 @@ You can clear a map key by assigning the empty string as its value: ``$x=""`` or
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put 'unset $x, $a' data/small
|
||||
mlr put 'unset $x, $a' data/small
|
||||
b=pan,i=1,y=0.7268028627434533
|
||||
b=pan,i=2,y=0.5221511083334797
|
||||
b=wye,i=3,y=0.33831852551664776
|
||||
|
|
@ -2550,7 +2550,7 @@ This can also be done, of course, using ``mlr cut -x``. You can also clear out-o
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@sum[$a][$b] += $x; end { dump; unset @sum; dump }' data/small
|
||||
mlr put -q '@sum[$a][$b] += $x; end { dump; unset @sum; dump }' data/small
|
||||
{
|
||||
"sum": {
|
||||
"pan": {
|
||||
|
|
@ -2572,7 +2572,7 @@ This can also be done, of course, using ``mlr cut -x``. You can also clear out-o
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put -q '@sum[$a][$b] += $x; end { dump; unset @sum["eks"]; dump }' data/small
|
||||
mlr put -q '@sum[$a][$b] += $x; end { dump; unset @sum["eks"]; dump }' data/small
|
||||
{
|
||||
"sum": {
|
||||
"pan": {
|
||||
|
|
@ -2610,14 +2610,14 @@ You can use ``filter`` within ``put``. In fact, the following two are synonymous
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr filter 'NR==2 || NR==3' data/small
|
||||
mlr filter 'NR==2 || NR==3' data/small
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put 'filter NR==2 || NR==3' data/small
|
||||
mlr put 'filter NR==2 || NR==3' data/small
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
|
||||
|
|
@ -2626,7 +2626,7 @@ The former, of course, is much easier to type. But the latter allows you to defi
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '@running_sum += $x; filter @running_sum > 1.3' data/small
|
||||
mlr put '@running_sum += $x; filter @running_sum > 1.3' data/small
|
||||
a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463
|
||||
a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729
|
||||
|
|
@ -2634,7 +2634,7 @@ The former, of course, is much easier to type. But the latter allows you to defi
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '$z = $x * $y; filter $z > 0.3' data/small
|
||||
mlr put '$z = $x * $y; filter $z > 0.3' data/small
|
||||
a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797,z=0.396146
|
||||
a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729,z=0.495106
|
||||
|
||||
|
|
@ -4917,7 +4917,7 @@ Here's the obligatory example of a recursive function to compute the factorial f
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint --from data/small put '
|
||||
mlr --opprint --from data/small put '
|
||||
func f(n) {
|
||||
if (is_numeric(n)) {
|
||||
if (n > 0) {
|
||||
|
|
@ -4966,7 +4966,7 @@ Example:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint --from data/small put -q '
|
||||
mlr --opprint --from data/small put -q '
|
||||
begin {
|
||||
@call_count = 0;
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -54,7 +54,7 @@ These are as discussed in :doc:`file-formats`, with the exception of ``--right``
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint cat data/small
|
||||
mlr --opprint cat data/small
|
||||
a b i x y
|
||||
pan pan 1 0.3467901443380824 0.7268028627434533
|
||||
eks pan 2 0.7586799647899636 0.5221511083334797
|
||||
|
|
@ -65,7 +65,7 @@ These are as discussed in :doc:`file-formats`, with the exception of ``--right``
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --opprint --right cat data/small
|
||||
mlr --opprint --right cat data/small
|
||||
a b i x y
|
||||
pan pan 1 0.3467901443380824 0.7268028627434533
|
||||
eks pan 2 0.7586799647899636 0.5221511083334797
|
||||
|
|
@ -168,13 +168,13 @@ To apply formatting to a single field, overriding the global ``ofmt``, use ``fmt
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo 'x=3.1,y=4.3' | mlr put '$z=fmtnum($x*$y,"%08lf")'
|
||||
echo 'x=3.1,y=4.3' | mlr put '$z=fmtnum($x*$y,"%08lf")'
|
||||
x=3.1,y=4.3,z=13.330000
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo 'x=0xffff,y=0xff' | mlr put '$z=fmtnum(int($x*$y),"%08llx")'
|
||||
echo 'x=0xffff,y=0xff' | mlr put '$z=fmtnum(int($x*$y),"%08llx")'
|
||||
x=0xffff,y=0xff,z=00feff01
|
||||
|
||||
Input conversion from hexadecimal is done automatically on fields handled by ``mlr put`` and ``mlr filter`` as long as the field value begins with "0x". To apply output conversion to hexadecimal on a single column, you may use ``fmtnum``, or the keystroke-saving ``hexfmt`` function. Example:
|
||||
|
|
@ -182,7 +182,7 @@ Input conversion from hexadecimal is done automatically on fields handled by ``m
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo 'x=0xffff,y=0xff' | mlr put '$z=hexfmt($x*$y)'
|
||||
echo 'x=0xffff,y=0xff' | mlr put '$z=hexfmt($x*$y)'
|
||||
x=0xffff,y=0xff,z=0xfeff01
|
||||
|
||||
Data transformations (verbs)
|
||||
|
|
@ -245,7 +245,7 @@ There are a few nearly-standalone programs which have nothing to do with the res
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr aux-list
|
||||
mlr aux-list
|
||||
Available subcommands:
|
||||
aux-list
|
||||
lecat
|
||||
|
|
@ -258,7 +258,7 @@ There are a few nearly-standalone programs which have nothing to do with the res
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr lecat --help
|
||||
mlr lecat --help
|
||||
Usage: mlr lecat [options] {zero or more file names}
|
||||
Simply echoes input, but flags CR characters in red and LF characters in green.
|
||||
If zero file names are supplied, standard input is read.
|
||||
|
|
@ -269,7 +269,7 @@ There are a few nearly-standalone programs which have nothing to do with the res
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr termcvt --help
|
||||
mlr termcvt --help
|
||||
Usage: mlr termcvt [option] {zero or more file names}
|
||||
Option (exactly one is required):
|
||||
--cr2crlf
|
||||
|
|
@ -286,7 +286,7 @@ There are a few nearly-standalone programs which have nothing to do with the res
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr hex --help
|
||||
mlr hex --help
|
||||
Usage: mlr hex [options] {zero or more file names}
|
||||
Simple hex-dump.
|
||||
If zero file names are supplied, standard input is read.
|
||||
|
|
@ -297,7 +297,7 @@ There are a few nearly-standalone programs which have nothing to do with the res
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr unhex --help
|
||||
mlr unhex --help
|
||||
Usage: mlr unhex [option] {zero or more file names}
|
||||
Options:
|
||||
-h or --help: print this message
|
||||
|
|
@ -309,19 +309,19 @@ Examples:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo 'Hello, world!' | mlr lecat --mono
|
||||
echo 'Hello, world!' | mlr lecat --mono
|
||||
Hello, world![LF]
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo 'Hello, world!' | mlr termcvt --lf2crlf | mlr lecat --mono
|
||||
echo 'Hello, world!' | mlr termcvt --lf2crlf | mlr lecat --mono
|
||||
Hello, world![CR][LF]
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr hex data/budget.csv
|
||||
mlr hex data/budget.csv
|
||||
00000000: 23 20 41 73 61 6e 61 20 2d 2d 20 68 65 72 65 20 |# Asana -- here |
|
||||
00000010: 61 72 65 20 74 68 65 20 62 75 64 67 65 74 20 66 |are the budget f|
|
||||
00000020: 69 67 75 72 65 73 20 79 6f 75 20 61 73 6b 65 64 |igures you asked|
|
||||
|
|
@ -333,7 +333,7 @@ Examples:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr hex -r data/budget.csv
|
||||
mlr hex -r data/budget.csv
|
||||
23 20 41 73 61 6e 61 20 2d 2d 20 68 65 72 65 20
|
||||
61 72 65 20 74 68 65 20 62 75 64 67 65 74 20 66
|
||||
69 67 75 72 65 73 20 79 6f 75 20 61 73 6b 65 64
|
||||
|
|
@ -345,7 +345,7 @@ Examples:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr hex -r data/budget.csv | sed 's/20/2a/g' | mlr unhex
|
||||
mlr hex -r data/budget.csv | sed 's/20/2a/g' | mlr unhex
|
||||
#*Asana*--*here*are*the*budget*figures*you*asked*for!
|
||||
type,quantity
|
||||
purple,456.78
|
||||
|
|
@ -397,7 +397,7 @@ Rules for null-handling:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr cat data/sort-null.dat
|
||||
mlr cat data/sort-null.dat
|
||||
a=3,b=2
|
||||
a=1,b=8
|
||||
a=,b=4
|
||||
|
|
@ -407,7 +407,7 @@ Rules for null-handling:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr sort -n a data/sort-null.dat
|
||||
mlr sort -n a data/sort-null.dat
|
||||
a=1,b=8
|
||||
a=3,b=2
|
||||
a=5,b=7
|
||||
|
|
@ -417,7 +417,7 @@ Rules for null-handling:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr sort -nr a data/sort-null.dat
|
||||
mlr sort -nr a data/sort-null.dat
|
||||
a=,b=4
|
||||
a=5,b=7
|
||||
a=3,b=2
|
||||
|
|
@ -429,19 +429,19 @@ Rules for null-handling:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo 'x=2,y=3' | mlr put '$a=$x+$y'
|
||||
echo 'x=2,y=3' | mlr put '$a=$x+$y'
|
||||
x=2,y=3,a=5
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo 'x=,y=3' | mlr put '$a=$x+$y'
|
||||
echo 'x=,y=3' | mlr put '$a=$x+$y'
|
||||
x=,y=3,a=
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo 'x=,y=3' | mlr put '$a=log($x);$b=log($y)'
|
||||
echo 'x=,y=3' | mlr put '$a=log($x);$b=log($y)'
|
||||
x=,y=3,a=,b=1.098612
|
||||
|
||||
with the exception that the ``min`` and ``max`` functions are special: if one argument is non-null, it wins:
|
||||
|
|
@ -449,7 +449,7 @@ with the exception that the ``min`` and ``max`` functions are special: if one ar
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo 'x=,y=3' | mlr put '$a=min($x,$y);$b=max($x,$y)'
|
||||
echo 'x=,y=3' | mlr put '$a=min($x,$y);$b=max($x,$y)'
|
||||
x=,y=3,a=3,b=3
|
||||
|
||||
* Functions of *absent* variables (e.g. ``mlr put '$y = log10($nonesuch)'``) evaluate to absent, and arithmetic/bitwise/boolean operators with both operands being absent evaluate to absent. Arithmetic operators with one absent operand return the other operand. More specifically, absent values act like zero for addition/subtraction, and one for multiplication: Furthermore, **any expression which evaluates to absent is not stored in the left-hand side of an assignment statement**:
|
||||
|
|
@ -457,13 +457,13 @@ with the exception that the ``min`` and ``max`` functions are special: if one ar
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo 'x=2,y=3' | mlr put '$a=$u+$v; $b=$u+$y; $c=$x+$y'
|
||||
echo 'x=2,y=3' | mlr put '$a=$u+$v; $b=$u+$y; $c=$x+$y'
|
||||
x=2,y=3,b=3,c=5
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ echo 'x=2,y=3' | mlr put '$a=min($x,$v);$b=max($u,$y);$c=min($u,$v)'
|
||||
echo 'x=2,y=3' | mlr put '$a=min($x,$v);$b=max($u,$y);$c=min($u,$v)'
|
||||
x=2,y=3,a=2,b=3
|
||||
|
||||
* Likewise, for assignment to maps, **absent-valued keys or values result in a skipped assignment**.
|
||||
|
|
@ -483,7 +483,7 @@ Since absent plus absent is absent (and likewise for other operators), accumulat
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr cat data/het.dkvp
|
||||
mlr cat data/het.dkvp
|
||||
resource=/path/to/file,loadsec=0.45,ok=true
|
||||
record_count=100,resource=/path/to/file
|
||||
resource=/path/to/second/file,loadsec=0.32,ok=true
|
||||
|
|
@ -493,7 +493,7 @@ Since absent plus absent is absent (and likewise for other operators), accumulat
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put 'is_present($loadsec) { $loadmillis = $loadsec * 1000 }' data/het.dkvp
|
||||
mlr put 'is_present($loadsec) { $loadmillis = $loadsec * 1000 }' data/het.dkvp
|
||||
resource=/path/to/file,loadsec=0.45,ok=true,loadmillis=450.000000
|
||||
record_count=100,resource=/path/to/file
|
||||
resource=/path/to/second/file,loadsec=0.32,ok=true,loadmillis=320.000000
|
||||
|
|
@ -503,7 +503,7 @@ Since absent plus absent is absent (and likewise for other operators), accumulat
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr put '$loadmillis = (is_present($loadsec) ? $loadsec : 0.0) * 1000' data/het.dkvp
|
||||
mlr put '$loadmillis = (is_present($loadsec) ? $loadsec : 0.0) * 1000' data/het.dkvp
|
||||
resource=/path/to/file,loadsec=0.45,ok=true,loadmillis=450.000000
|
||||
record_count=100,resource=/path/to/file,loadmillis=0.000000
|
||||
resource=/path/to/second/file,loadsec=0.32,ok=true,loadmillis=320.000000
|
||||
|
|
@ -515,7 +515,7 @@ If you're interested in a formal description of how empty and absent fields part
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --print-type-arithmetic-info
|
||||
mlr --print-type-arithmetic-info
|
||||
(+) | error absent empty string int float bool
|
||||
------ + ------ ------ ------ ------ ------ ------ ------
|
||||
error | error error error error error error error
|
||||
|
|
@ -583,7 +583,7 @@ Example:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ cat data/regex-in-data.dat
|
||||
cat data/regex-in-data.dat
|
||||
name=jane,regex=^j.*e$
|
||||
name=bill,regex=^b[ou]ll$
|
||||
name=bull,regex=^b[ou]ll$
|
||||
|
|
@ -591,7 +591,7 @@ Example:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr filter '$name =~ $regex' data/regex-in-data.dat
|
||||
mlr filter '$name =~ $regex' data/regex-in-data.dat
|
||||
name=jane,regex=^j.*e$
|
||||
name=bull,regex=^b[ou]ll$
|
||||
|
||||
|
|
@ -682,7 +682,7 @@ Examples:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr --help
|
||||
mlr --help
|
||||
Usage: mlr [I/O options] {verb} [verb-dependent options ...] {zero or more file names}
|
||||
|
||||
Command-line-syntax examples:
|
||||
|
|
@ -1108,7 +1108,7 @@ Examples:
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1,1
|
||||
|
||||
$ mlr sort --help
|
||||
mlr sort --help
|
||||
Usage: mlr sort {flags}
|
||||
Flags:
|
||||
-f {comma-separated field names} Lexical ascending
|
||||
|
|
|
|||
|
|
@ -14,67 +14,67 @@ Examples
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1-1
|
||||
|
||||
mlr --usage-data-format-examples
|
||||
CSV/CSV-lite: comma-separated values with separate header line
|
||||
TSV: same but with tabs in places of commas
|
||||
+---------------------+
|
||||
| apple,bat,cog |
|
||||
| 1,2,3 | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| 4,5,6 | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+---------------------+
|
||||
mlr help data-formats
|
||||
CSV/CSV-lite: comma-separated values with separate header line
|
||||
TSV: same but with tabs in places of commas
|
||||
+---------------------+
|
||||
| apple,bat,cog |
|
||||
| 1,2,3 | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| 4,5,6 | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+---------------------+
|
||||
|
||||
JSON (sequence or array of objects):
|
||||
+---------------------+
|
||||
| { |
|
||||
| "apple": 1, | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| "bat": 2, |
|
||||
| "cog": 3 |
|
||||
| } |
|
||||
| { |
|
||||
| "dish": { | Record 2: "dish:egg" => "7", "dish:flint" => "8", "garlic" => ""
|
||||
| "egg": 7, |
|
||||
| "flint": 8 |
|
||||
| }, |
|
||||
| "garlic": "" |
|
||||
| } |
|
||||
+---------------------+
|
||||
JSON (sequence or array of objects):
|
||||
+---------------------+
|
||||
| { |
|
||||
| "apple": 1, | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| "bat": 2, |
|
||||
| "cog": 3 |
|
||||
| } |
|
||||
| { |
|
||||
| "dish": { | Record 2: "dish:egg" => "7", "dish:flint" => "8", "garlic" => ""
|
||||
| "egg": 7, |
|
||||
| "flint": 8 |
|
||||
| }, |
|
||||
| "garlic": "" |
|
||||
| } |
|
||||
+---------------------+
|
||||
|
||||
PPRINT: pretty-printed tabular
|
||||
+---------------------+
|
||||
| apple bat cog |
|
||||
| 1 2 3 | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| 4 5 6 | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+---------------------+
|
||||
PPRINT: pretty-printed tabular
|
||||
+---------------------+
|
||||
| apple bat cog |
|
||||
| 1 2 3 | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| 4 5 6 | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+---------------------+
|
||||
|
||||
Markdown tabular (supported for output only):
|
||||
+-----------------------+
|
||||
| | apple | bat | cog | |
|
||||
| | --- | --- | --- | |
|
||||
| | 1 | 2 | 3 | | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| | 4 | 5 | 6 | | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+-----------------------+
|
||||
Markdown tabular (supported for output only):
|
||||
+-----------------------+
|
||||
| | apple | bat | cog | |
|
||||
| | --- | --- | --- | |
|
||||
| | 1 | 2 | 3 | | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| | 4 | 5 | 6 | | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+-----------------------+
|
||||
|
||||
XTAB: pretty-printed transposed tabular
|
||||
+---------------------+
|
||||
| apple 1 | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| bat 2 |
|
||||
| cog 3 |
|
||||
| |
|
||||
| dish 7 | Record 2: "dish" => "7", "egg" => "8"
|
||||
| egg 8 |
|
||||
+---------------------+
|
||||
XTAB: pretty-printed transposed tabular
|
||||
+---------------------+
|
||||
| apple 1 | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| bat 2 |
|
||||
| cog 3 |
|
||||
| |
|
||||
| dish 7 | Record 2: "dish" => "7", "egg" => "8"
|
||||
| egg 8 |
|
||||
+---------------------+
|
||||
|
||||
DKVP: delimited key-value pairs (Miller default format)
|
||||
+---------------------+
|
||||
| apple=1,bat=2,cog=3 | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| dish=7,egg=8,flint | Record 2: "dish" => "7", "egg" => "8", "3" => "flint"
|
||||
+---------------------+
|
||||
DKVP: delimited key-value pairs (Miller default format)
|
||||
+---------------------+
|
||||
| apple=1,bat=2,cog=3 | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| dish=7,egg=8,flint | Record 2: "dish" => "7", "egg" => "8", "3" => "flint"
|
||||
+---------------------+
|
||||
|
||||
NIDX: implicitly numerically indexed (Unix-toolkit style)
|
||||
+---------------------+
|
||||
| the quick brown | Record 1: "1" => "the", "2" => "quick", "3" => "brown"
|
||||
| fox jumped | Record 2: "1" => "fox", "2" => "jumped"
|
||||
+---------------------+
|
||||
NIDX: implicitly numerically indexed (Unix-toolkit style)
|
||||
+---------------------+
|
||||
| the quick brown | Record 1: "1" => "the", "2" => "quick", "3" => "brown"
|
||||
| fox jumped | Record 2: "1" => "fox", "2" => "jumped"
|
||||
+---------------------+
|
||||
|
||||
.. _file-formats-csv:
|
||||
|
||||
|
|
@ -573,15 +573,15 @@ While you can do format conversion using ``mlr --icsv --ojson cat myfile.csv``,
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1-1
|
||||
|
||||
mlr --usage-format-conversion-keystroke-saver-options
|
||||
mlr help format-conversion
|
||||
As keystroke-savers for format-conversion you may use the following:
|
||||
--c2t --c2d --c2n --c2j --c2x --c2p --c2m
|
||||
--t2c --t2d --t2n --t2j --t2x --t2p --t2m
|
||||
--d2c --d2t --d2n --d2j --d2x --d2p --d2m
|
||||
--n2c --n2t --n2d --n2j --n2x --n2p --n2m
|
||||
--j2c --j2t --j2d --j2n --j2x --j2p --j2m
|
||||
--x2c --x2t --x2d --x2n --x2j --x2p --x2m
|
||||
--p2c --p2t --p2d --p2n --p2j --p2x --p2m
|
||||
--c2t --c2d --c2n --c2j --c2x --c2p --c2m
|
||||
--t2c --t2d --t2n --t2j --t2x --t2p --t2m
|
||||
--d2c --d2t --d2n --d2j --d2x --d2p --d2m
|
||||
--n2c --n2t --n2d --n2j --n2x --n2p --n2m
|
||||
--j2c --j2t --j2d --j2n --j2x --j2p --j2m
|
||||
--x2c --x2t --x2d --x2n --x2j --x2p --x2m
|
||||
--p2c --p2t --p2d --p2n --p2j --p2x --p2m
|
||||
The letters c t d n j x p m refer to formats CSV, TSV, DKVP, NIDX, JSON, XTAB,
|
||||
PPRINT, and markdown, respectively. Note that markdown format is available for
|
||||
output only.
|
||||
|
|
@ -609,23 +609,24 @@ You can include comments within your data files, and either have them ignored, o
|
|||
.. code-block:: none
|
||||
:emphasize-lines: 1-1
|
||||
|
||||
mlr --usage-comments-in-data
|
||||
--skip-comments Ignore commented lines (prefixed by "#")
|
||||
within the input.
|
||||
--skip-comments-with {string} Ignore commented lines within input, with
|
||||
specified prefix.
|
||||
--pass-comments Immediately print commented lines (prefixed by "#")
|
||||
within the input.
|
||||
--pass-comments-with {string} Immediately print commented lines within input, with
|
||||
specified prefix.
|
||||
mlr help comments-in-data
|
||||
--skip-comments Ignore commented lines (prefixed by "#")
|
||||
within the input.
|
||||
--skip-comments-with {string} Ignore commented lines within input, with
|
||||
specified prefix.
|
||||
--pass-comments Immediately print commented lines (prefixed by "#")
|
||||
within the input.
|
||||
--pass-comments-with {string} Immediately print commented lines within input, with
|
||||
specified prefix.
|
||||
|
||||
Notes:
|
||||
* Comments are only honored at the start of a line.
|
||||
* In the absence of any of the above four options, comments are data like
|
||||
any other text.
|
||||
* When pass-comments is used, comment lines are written to standard output
|
||||
immediately upon being read; they are not part of the record stream.
|
||||
Results may be counterintuitive. A suggestion is to place comments at the
|
||||
start of data files.
|
||||
immediately upon being read; they are not part of the record stream. Results
|
||||
may be counterintuitive. A suggestion is to place comments at the start of
|
||||
data files.
|
||||
|
||||
Examples:
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ Examples
|
|||
----------------------------------------------------------------
|
||||
|
||||
GENRST_RUN_COMMAND
|
||||
mlr --usage-data-format-examples
|
||||
mlr help data-formats
|
||||
GENRST_EOF
|
||||
|
||||
.. _file-formats-csv:
|
||||
|
|
@ -290,7 +290,7 @@ Data-conversion keystroke-savers
|
|||
While you can do format conversion using ``mlr --icsv --ojson cat myfile.csv``, there are also keystroke-savers for this purpose, such as ``mlr --c2j cat myfile.csv``. For a complete list:
|
||||
|
||||
GENRST_RUN_COMMAND
|
||||
mlr --usage-format-conversion-keystroke-saver-options
|
||||
mlr help format-conversion
|
||||
GENRST_EOF
|
||||
|
||||
Autodetect of line endings
|
||||
|
|
@ -314,7 +314,7 @@ Comments in data
|
|||
You can include comments within your data files, and either have them ignored, or passed directly through to the standard output as soon as they are encountered:
|
||||
|
||||
GENRST_RUN_COMMAND
|
||||
mlr --usage-comments-in-data
|
||||
mlr help comments-in-data
|
||||
GENRST_EOF
|
||||
|
||||
Examples:
|
||||
|
|
|
|||
|
|
@ -7,7 +7,9 @@ Miller on Windows
|
|||
Native builds as of Miller 6
|
||||
----------------------------------------------------------------
|
||||
|
||||
As of version 6.0.0, Miller builds directly on Windows. The experience is now almost the same as on Linux, NetBSD/FreeBSD, and MacOS.
|
||||
Miller was originally developed for Unix-like operating systems including Linux and MacOS. Since the initial release of Miller in 2015, support for Windows has been partial. But as of version 6.0.0, Miller builds directly on Windows.
|
||||
|
||||
The experience is now almost the same as on Linux, NetBSD/FreeBSD, and MacOS.
|
||||
|
||||
MSYS2 is no longer required, although you can use Miller from within MSYS2 if you like. There is now simply a single ``mlr.exe``, with no ``msys2.dll`` alongside anymore.
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,9 @@ Miller on Windows
|
|||
Native builds as of Miller 6
|
||||
----------------------------------------------------------------
|
||||
|
||||
As of version 6.0.0, Miller builds directly on Windows. The experience is now almost the same as on Linux, NetBSD/FreeBSD, and MacOS.
|
||||
Miller was originally developed for Unix-like operating systems including Linux and MacOS. Since the initial release of Miller in 2015, support for Windows has been partial. But as of version 6.0.0, Miller builds directly on Windows.
|
||||
|
||||
The experience is now almost the same as on Linux, NetBSD/FreeBSD, and MacOS.
|
||||
|
||||
MSYS2 is no longer required, although you can use Miller from within MSYS2 if you like. There is now simply a single ``mlr.exe``, with no ``msys2.dll`` alongside anymore.
|
||||
|
||||
|
|
|
|||
|
|
@ -44,13 +44,13 @@ How you can control colorization:
|
|||
|
||||
* ``export MLR_KEY_COLOR=208``
|
||||
* ``export MLR_VALUE_COLOR=33``
|
||||
* Likewise for ``MLR_PASS_COLOR``, ``MLR_FAIL_COLOR``, and ``MLR_HELP_COLOR``.
|
||||
* Command-line flags ``--key-color 208``, ``--value-color 33``, etc.
|
||||
* Likewise for ``MLR_PASS_COLOR``, ``MLR_FAIL_COLOR``, ``MLR_HELP_COLOR``, ``MLR_REPL_PS1_COLOR``, and ``MLR_REPL_PS2_COLOR``.
|
||||
* Command-line flags ``--key-color 208``, ``--value-color 33``, etc., and likewise for ``--pass-color``, ``--fail-color``, ``--repl-ps1-color``, ``--repl-ps2-color``, and ``--help-color``.
|
||||
* This is particularly useful if your terminal's background color clashes with current settings.
|
||||
|
||||
* If environment-variable settings and command-line flags are both provided, the latter take precedence.
|
||||
If environment-variable settings and command-line flags are both provided, the latter take precedence.
|
||||
|
||||
* Please do ``mlr --list-colors`` to see the available color codes.
|
||||
Please do ``mlr --list-colors`` to see the available color codes.
|
||||
|
||||
.. image:: pix/colorization2.png
|
||||
|
||||
|
|
|
|||
|
|
@ -41,13 +41,13 @@ How you can control colorization:
|
|||
|
||||
* ``export MLR_KEY_COLOR=208``
|
||||
* ``export MLR_VALUE_COLOR=33``
|
||||
* Likewise for ``MLR_PASS_COLOR``, ``MLR_FAIL_COLOR``, and ``MLR_HELP_COLOR``.
|
||||
* Command-line flags ``--key-color 208``, ``--value-color 33``, etc.
|
||||
* Likewise for ``MLR_PASS_COLOR``, ``MLR_FAIL_COLOR``, ``MLR_HELP_COLOR``, ``MLR_REPL_PS1_COLOR``, and ``MLR_REPL_PS2_COLOR``.
|
||||
* Command-line flags ``--key-color 208``, ``--value-color 33``, etc., and likewise for ``--pass-color``, ``--fail-color``, ``--repl-ps1-color``, ``--repl-ps2-color``, and ``--help-color``.
|
||||
* This is particularly useful if your terminal's background color clashes with current settings.
|
||||
|
||||
* If environment-variable settings and command-line flags are both provided, the latter take precedence.
|
||||
If environment-variable settings and command-line flags are both provided, the latter take precedence.
|
||||
|
||||
* Please do ``mlr --list-colors`` to see the available color codes.
|
||||
Please do ``mlr --list-colors`` to see the available color codes.
|
||||
|
||||
.. image:: pix/colorization2.png
|
||||
|
||||
|
|
|
|||
|
|
@ -14,10 +14,11 @@ There are a few nearly-standalone programs which have nothing to do with the res
|
|||
aux-list
|
||||
hex
|
||||
lecat
|
||||
regtest
|
||||
repl
|
||||
termcvt
|
||||
unhex
|
||||
help
|
||||
regtest
|
||||
repl
|
||||
For more information, please invoke mlr {subcommand} --help.
|
||||
|
||||
.. code-block:: none
|
||||
|
|
|
|||
|
|
@ -12,575 +12,7 @@ Examples:
|
|||
:emphasize-lines: 1-1
|
||||
|
||||
mlr --help
|
||||
Usage: mlr [I/O options] {verb} [verb-dependent options ...] {zero or more file names}
|
||||
|
||||
COMMAND-LINE-SYNTAX EXAMPLES:
|
||||
mlr --csv cut -f hostname,uptime mydata.csv
|
||||
mlr --tsv --rs lf filter '$status != "down" && $upsec >= 10000' *.tsv
|
||||
mlr --nidx put '$sum = $7 < 0.0 ? 3.5 : $7 + 2.1*$8' *.dat
|
||||
grep -v '^#' /etc/group | mlr --ifs : --nidx --opprint label group,pass,gid,member then sort -f group
|
||||
mlr join -j account_id -f accounts.dat then group-by account_name balances.dat
|
||||
mlr --json put '$attr = sub($attr, "([0-9]+)_([0-9]+)_.*", "\1:\2")' data/*.json
|
||||
mlr stats1 -a min,mean,max,p10,p50,p90 -f flag,u,v data/*
|
||||
mlr stats2 -a linreg-pca -f u,v -g shape data/*
|
||||
mlr put -q '@sum[$a][$b] += $x; end {emit @sum, "a", "b"}' data/*
|
||||
mlr --from estimates.tbl put '
|
||||
for (k,v in $*) {
|
||||
if (is_numeric(v) && k =~ "^[t-z].*$") {
|
||||
$sum += v; $count += 1
|
||||
}
|
||||
}
|
||||
$mean = $sum / $count # no assignment if count unset'
|
||||
mlr --from infile.dat put -f analyze.mlr
|
||||
mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
|
||||
mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
|
||||
mlr --from infile.dat put -q '@v=$*; dump | "jq .[]"'
|
||||
mlr --from infile.dat put '(NR % 1000 == 0) { print > os.Stderr, "Checkpoint ".NR}'
|
||||
|
||||
DATA-FORMAT EXAMPLES:
|
||||
CSV/CSV-lite: comma-separated values with separate header line
|
||||
TSV: same but with tabs in places of commas
|
||||
+---------------------+
|
||||
| apple,bat,cog |
|
||||
| 1,2,3 | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| 4,5,6 | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+---------------------+
|
||||
|
||||
JSON (sequence or array of objects):
|
||||
+---------------------+
|
||||
| { |
|
||||
| "apple": 1, | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| "bat": 2, |
|
||||
| "cog": 3 |
|
||||
| } |
|
||||
| { |
|
||||
| "dish": { | Record 2: "dish:egg" => "7", "dish:flint" => "8", "garlic" => ""
|
||||
| "egg": 7, |
|
||||
| "flint": 8 |
|
||||
| }, |
|
||||
| "garlic": "" |
|
||||
| } |
|
||||
+---------------------+
|
||||
|
||||
PPRINT: pretty-printed tabular
|
||||
+---------------------+
|
||||
| apple bat cog |
|
||||
| 1 2 3 | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| 4 5 6 | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+---------------------+
|
||||
|
||||
Markdown tabular (supported for output only):
|
||||
+-----------------------+
|
||||
| | apple | bat | cog | |
|
||||
| | --- | --- | --- | |
|
||||
| | 1 | 2 | 3 | | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| | 4 | 5 | 6 | | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+-----------------------+
|
||||
|
||||
XTAB: pretty-printed transposed tabular
|
||||
+---------------------+
|
||||
| apple 1 | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| bat 2 |
|
||||
| cog 3 |
|
||||
| |
|
||||
| dish 7 | Record 2: "dish" => "7", "egg" => "8"
|
||||
| egg 8 |
|
||||
+---------------------+
|
||||
|
||||
DKVP: delimited key-value pairs (Miller default format)
|
||||
+---------------------+
|
||||
| apple=1,bat=2,cog=3 | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| dish=7,egg=8,flint | Record 2: "dish" => "7", "egg" => "8", "3" => "flint"
|
||||
+---------------------+
|
||||
|
||||
NIDX: implicitly numerically indexed (Unix-toolkit style)
|
||||
+---------------------+
|
||||
| the quick brown | Record 1: "1" => "the", "2" => "quick", "3" => "brown"
|
||||
| fox jumped | Record 2: "1" => "fox", "2" => "jumped"
|
||||
+---------------------+
|
||||
|
||||
HELP OPTIONS:
|
||||
-h or --help Show this message.
|
||||
--version Show the software version.
|
||||
{verb name} --help Show verb-specific help.
|
||||
--help-all-verbs Show help on all verbs.
|
||||
-l or --list-all-verbs List only verb names.
|
||||
-L List only verb names, one per line.
|
||||
-f or --help-all-functions Show help on all built-in functions.
|
||||
-F Show a bare listing of built-in functions by name.
|
||||
-k or --help-all-keywords Show help on all keywords.
|
||||
-K Show a bare listing of keywords by name.
|
||||
|
||||
CUSTOMIZATION VIA .MLRRC:
|
||||
You can set up personal defaults via a $HOME/.mlrrc and/or ./.mlrrc.
|
||||
For example, if you usually process CSV, then you can put "--csv" in your .mlrrc file
|
||||
and that will be the default input/output format unless otherwise specified on the command line.
|
||||
|
||||
The .mlrrc file format is one "--flag" or "--option value" per line, with the leading "--" optional.
|
||||
Hash-style comments and blank lines are ignored.
|
||||
|
||||
Sample .mlrrc:
|
||||
# Input and output formats are CSV by default (unless otherwise specified
|
||||
# on the mlr command line):
|
||||
csv
|
||||
# These are no-ops for CSV, but when I do use JSON output, I want these
|
||||
# pretty-printing options to be used:
|
||||
jvstack
|
||||
jlistwrap
|
||||
|
||||
How to specify location of .mlrrc:
|
||||
* If $MLRRC is set:
|
||||
o If its value is "__none__" then no .mlrrc files are processed.
|
||||
o Otherwise, its value (as a filename) is loaded and processed. If there are syntax
|
||||
errors, they abort mlr with a usage message (as if you had mistyped something on the
|
||||
command line). If the file can't be loaded at all, though, it is silently skipped.
|
||||
o Any .mlrrc in your home directory or current directory is ignored whenever $MLRRC is
|
||||
set in the environment.
|
||||
* Otherwise:
|
||||
o If $HOME/.mlrrc exists, it's then processed as above.
|
||||
o If ./.mlrrc exists, it's then also processed as above.
|
||||
(I.e. current-directory .mlrrc defaults are stacked over home-directory .mlrrc defaults.)
|
||||
|
||||
See also:
|
||||
https://miller.readthedocs.io/en/latest/customization.html
|
||||
|
||||
VERBS:
|
||||
altkv bar bootstrap cat check clean-whitespace count-distinct count
|
||||
count-similar cut decimate fill-down fill-empty filter flatten format-values
|
||||
fraction gap grep group-by group-like having-fields head histogram
|
||||
json-parse json-stringify join label least-frequent merge-fields
|
||||
most-frequent nest nothing put regularize remove-empty-columns rename
|
||||
reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
|
||||
skip-trivial-records sort sort-within-records stats1 stats2 step tac tail
|
||||
tee top unflatten uniq unsparsify
|
||||
|
||||
FUNCTIONS FOR THE FILTER AND PUT VERBS:
|
||||
+
|
||||
-
|
||||
*
|
||||
/
|
||||
//
|
||||
**
|
||||
pow
|
||||
.+
|
||||
.-
|
||||
.*
|
||||
./
|
||||
%
|
||||
~
|
||||
&
|
||||
|
|
||||
^
|
||||
<<
|
||||
>>
|
||||
>>>
|
||||
bitcount
|
||||
madd
|
||||
msub
|
||||
mmul
|
||||
mexp
|
||||
!
|
||||
==
|
||||
!=
|
||||
>
|
||||
>=
|
||||
<
|
||||
<=
|
||||
=~
|
||||
!=~
|
||||
&&
|
||||
||
|
||||
^^
|
||||
??
|
||||
???
|
||||
?:
|
||||
.
|
||||
capitalize
|
||||
clean_whitespace
|
||||
collapse_whitespace
|
||||
gsub
|
||||
lstrip
|
||||
regextract
|
||||
regextract_or_else
|
||||
rstrip
|
||||
strip
|
||||
strlen
|
||||
ssub
|
||||
sub
|
||||
substr0
|
||||
substr1
|
||||
substr
|
||||
tolower
|
||||
toupper
|
||||
truncate
|
||||
md5
|
||||
sha1
|
||||
sha256
|
||||
sha512
|
||||
abs
|
||||
acos
|
||||
acosh
|
||||
asin
|
||||
asinh
|
||||
atan
|
||||
atan2
|
||||
atanh
|
||||
cbrt
|
||||
ceil
|
||||
cos
|
||||
cosh
|
||||
erf
|
||||
erfc
|
||||
exp
|
||||
expm1
|
||||
floor
|
||||
invqnorm
|
||||
log
|
||||
log10
|
||||
log1p
|
||||
logifit
|
||||
max
|
||||
min
|
||||
qnorm
|
||||
round
|
||||
sgn
|
||||
sin
|
||||
sinh
|
||||
sqrt
|
||||
tan
|
||||
tanh
|
||||
roundm
|
||||
urand
|
||||
urandint
|
||||
urandrange
|
||||
urand32
|
||||
gmt2sec
|
||||
sec2gmt
|
||||
sec2gmtdate
|
||||
systime
|
||||
systimeint
|
||||
uptime
|
||||
strftime
|
||||
strptime
|
||||
dhms2fsec
|
||||
dhms2sec
|
||||
fsec2dhms
|
||||
fsec2hms
|
||||
hms2fsec
|
||||
hms2sec
|
||||
sec2dhms
|
||||
sec2hms
|
||||
is_absent
|
||||
is_array
|
||||
is_bool
|
||||
is_boolean
|
||||
is_empty
|
||||
is_empty_map
|
||||
is_error
|
||||
is_float
|
||||
is_int
|
||||
is_map
|
||||
is_nonempty_map
|
||||
is_not_empty
|
||||
is_not_map
|
||||
is_not_array
|
||||
is_not_null
|
||||
is_null
|
||||
is_numeric
|
||||
is_present
|
||||
is_string
|
||||
asserting_absent
|
||||
asserting_array
|
||||
asserting_bool
|
||||
asserting_boolean
|
||||
asserting_error
|
||||
asserting_empty
|
||||
asserting_empty_map
|
||||
asserting_float
|
||||
asserting_int
|
||||
asserting_map
|
||||
asserting_nonempty_map
|
||||
asserting_not_empty
|
||||
asserting_not_map
|
||||
asserting_not_array
|
||||
asserting_not_null
|
||||
asserting_null
|
||||
asserting_numeric
|
||||
asserting_present
|
||||
asserting_string
|
||||
typeof
|
||||
boolean
|
||||
float
|
||||
fmtnum
|
||||
hexfmt
|
||||
int
|
||||
joink
|
||||
joinv
|
||||
joinkv
|
||||
splita
|
||||
splitax
|
||||
splitkv
|
||||
splitkvx
|
||||
splitnv
|
||||
splitnvx
|
||||
string
|
||||
append
|
||||
arrayify
|
||||
depth
|
||||
flatten
|
||||
get_keys
|
||||
get_values
|
||||
haskey
|
||||
json_parse
|
||||
json_stringify
|
||||
leafcount
|
||||
length
|
||||
mapdiff
|
||||
mapexcept
|
||||
mapselect
|
||||
mapsum
|
||||
unflatten
|
||||
hostname
|
||||
os
|
||||
system
|
||||
version
|
||||
Please use "mlr --help-function {function name}" for function-specific help.
|
||||
|
||||
DATA-FORMAT OPTIONS, FOR INPUT, OUTPUT, OR BOTH:
|
||||
|
||||
--idkvp --odkvp --dkvp Delimited key-value pairs, e.g "a=1,b=2"
|
||||
(this is Miller's default format).
|
||||
|
||||
--inidx --onidx --nidx Implicitly-integer-indexed fields
|
||||
(Unix-toolkit style).
|
||||
-T Synonymous with "--nidx --fs tab".
|
||||
|
||||
--icsv --ocsv --csv Comma-separated value (or tab-separated
|
||||
with --fs tab, etc.)
|
||||
|
||||
--itsv --otsv --tsv Keystroke-savers for "--icsv --ifs tab",
|
||||
"--ocsv --ofs tab", "--csv --fs tab".
|
||||
--iasv --oasv --asv Similar but using ASCII FS 0x1f and RS 0x1e\n",
|
||||
--iusv --ousv --usv Similar but using Unicode FS U+241F (UTF-8 0xe2909f)\n",
|
||||
and RS U+241E (UTF-8 0xe2909e)\n",
|
||||
|
||||
--icsvlite --ocsvlite --csvlite Comma-separated value (or tab-separated
|
||||
with --fs tab, etc.). The 'lite' CSV does not handle
|
||||
RFC-CSV double-quoting rules; is slightly faster;
|
||||
and handles heterogeneity in the input stream via
|
||||
empty newline followed by new header line. See also
|
||||
http://johnkerl.org/miller/doc/file-formats.html#CSV/TSV/etc.
|
||||
|
||||
--itsvlite --otsvlite --tsvlite Keystroke-savers for "--icsvlite --ifs tab",
|
||||
"--ocsvlite --ofs tab", "--csvlite --fs tab".
|
||||
-t Synonymous with --tsvlite.
|
||||
--iasvlite --oasvlite --asvlite Similar to --itsvlite et al. but using ASCII FS 0x1f and RS 0x1e\n",
|
||||
--iusvlite --ousvlite --usvlite Similar to --itsvlite et al. but using Unicode FS U+241F (UTF-8 0xe2909f)\n",
|
||||
and RS U+241E (UTF-8 0xe2909e)\n",
|
||||
|
||||
--ipprint --opprint --pprint Pretty-printed tabular (produces no
|
||||
output until all input is in).
|
||||
--right Right-justifies all fields for PPRINT output.
|
||||
--barred Prints a border around PPRINT output
|
||||
(only available for output).
|
||||
|
||||
--omd Markdown-tabular (only available for output).
|
||||
|
||||
--ixtab --oxtab --xtab Pretty-printed vertical-tabular.
|
||||
--xvright Right-justifies values for XTAB format.
|
||||
|
||||
--ijson --ojson --json JSON tabular: sequence or list of one-level
|
||||
maps: {...}{...} or [{...},{...}].
|
||||
--json-map-arrays-on-input JSON arrays are unmillerable. --json-map-arrays-on-input
|
||||
--json-skip-arrays-on-input is the default: arrays are converted to integer-indexed
|
||||
--json-fatal-arrays-on-input maps. The other two options cause them to be skipped, or
|
||||
to be treated as errors. Please use the jq tool for full
|
||||
JSON (pre)processing.
|
||||
--jvstack Put one key-value pair per line for JSON output.
|
||||
--no-jvstack Put objects/arrays all on one line for JSON output.
|
||||
--jsonx --ojsonx Keystroke-savers for --json --jvstack
|
||||
--jsonx --ojsonx and --ojson --jvstack, respectively.
|
||||
--jlistwrap Wrap JSON output in outermost [ ].
|
||||
--jknquoteint Do not quote non-string map keys in JSON output.
|
||||
--jvquoteall Quote map values in JSON output, even if they're
|
||||
numeric.
|
||||
--oflatsep {string} Separator for flattening multi-level JSON keys,
|
||||
e.g. '{"a":{"b":3}}' becomes a:b => 3 for
|
||||
non-JSON formats. Defaults to ..\n",
|
||||
|
||||
-p is a keystroke-saver for --nidx --fs space --repifs
|
||||
|
||||
Examples: --csv for CSV-formatted input and output; --idkvp --opprint for
|
||||
DKVP-formatted input and pretty-printed output.
|
||||
|
||||
Please use --iformat1 --oformat2 rather than --format1 --oformat2.
|
||||
The latter sets up input and output flags for format1, not all of which
|
||||
are overridden in all cases by setting output format to format2.
|
||||
|
||||
|
||||
COMMENTS IN DATA:
|
||||
--skip-comments Ignore commented lines (prefixed by "#")
|
||||
within the input.
|
||||
--skip-comments-with {string} Ignore commented lines within input, with
|
||||
specified prefix.
|
||||
--pass-comments Immediately print commented lines (prefixed by "#")
|
||||
within the input.
|
||||
--pass-comments-with {string} Immediately print commented lines within input, with
|
||||
specified prefix.
|
||||
Notes:
|
||||
* Comments are only honored at the start of a line.
|
||||
* In the absence of any of the above four options, comments are data like
|
||||
any other text.
|
||||
* When pass-comments is used, comment lines are written to standard output
|
||||
immediately upon being read; they are not part of the record stream.
|
||||
Results may be counterintuitive. A suggestion is to place comments at the
|
||||
start of data files.
|
||||
|
||||
FORMAT-CONVERSION KEYSTROKE-SAVER OPTIONS:
|
||||
As keystroke-savers for format-conversion you may use the following:
|
||||
--c2t --c2d --c2n --c2j --c2x --c2p --c2m
|
||||
--t2c --t2d --t2n --t2j --t2x --t2p --t2m
|
||||
--d2c --d2t --d2n --d2j --d2x --d2p --d2m
|
||||
--n2c --n2t --n2d --n2j --n2x --n2p --n2m
|
||||
--j2c --j2t --j2d --j2n --j2x --j2p --j2m
|
||||
--x2c --x2t --x2d --x2n --x2j --x2p --x2m
|
||||
--p2c --p2t --p2d --p2n --p2j --p2x --p2m
|
||||
The letters c t d n j x p m refer to formats CSV, TSV, DKVP, NIDX, JSON, XTAB,
|
||||
PPRINT, and markdown, respectively. Note that markdown format is available for
|
||||
output only.
|
||||
|
||||
COMPRESSED-DATA OPTIONS:
|
||||
Decompression done within the Miller process itself:
|
||||
--gzin Uncompress gzip within the Miller process. Done by default if file ends in ".gz".
|
||||
--bz2in Uncompress bz2ip within the Miller process. Done by default if file ends in ".bz2".
|
||||
--zin Uncompress zlib within the Miller process. Done by default if file ends in ".z".
|
||||
Decompression done outside the Miller processn --prepipe {command} You can, of course, already do without this for single input files,
|
||||
e.g. "gunzip < myfile.csv.gz | mlr ...".
|
||||
However, when multiple input files are present, between-file separations are
|
||||
lost; also, the FILENAME variable doesn't iterate. Using --prepipe you can
|
||||
specify an action to be taken on each input file. This prepipe command must
|
||||
be able to read from standard input; it will be invoked with
|
||||
{command} < {filename}.
|
||||
--prepipex {command} Like --prepipe with one exception: doesn't insert '<' between
|
||||
command and filename at runtime. Useful for some commands like 'unzip -qc' which don't
|
||||
read standard input.
|
||||
Examples:
|
||||
mlr --prepipe 'gunzip'
|
||||
mlr --prepipe 'zcat -cf'
|
||||
mlr --prepipe 'xz -cd'
|
||||
mlr --prepipe cat
|
||||
Note that this feature is quite general and is not limited to decompression
|
||||
utilities. You can use it to apply per-file filters of your choice.
|
||||
For output compression (or other) utilities, simply pipe the output:
|
||||
mlr ... | {your compression command}
|
||||
Lastly, note that if --prepipe is specified, it replaces any decisions that might
|
||||
have been made based on the file suffix. Also, --gzin/--bz2in/--zin are ignored
|
||||
if --prepipe is also specified.
|
||||
|
||||
RELEVANT TO CSV/CSV-LITE INPUT ONLY:
|
||||
--implicit-csv-header Use 1,2,3,... as field labels, rather than from line 1
|
||||
of input files. Tip: combine with "label" to recreate
|
||||
missing headers.
|
||||
--no-implicit-csv-header Do not use --implicit-csv-header. This is the default
|
||||
anyway -- the main use is for the flags to 'mlr join' if you have
|
||||
main file(s) which are headerless but you want to join in on
|
||||
a file which does have a CSV header. Then you could use
|
||||
'mlr --csv --implicit-csv-header join --no-implicit-csv-header
|
||||
-l your-join-in-with-header.csv ... your-headerless.csv'
|
||||
--allow-ragged-csv-input|--ragged If a data line has fewer fields than the header line,
|
||||
fill remaining keys with empty string. If a data line has more
|
||||
fields than the header line, use integer field labels as in
|
||||
the implicit-header case.
|
||||
--headerless-csv-output Print only CSV data lines.
|
||||
-N Keystroke-saver for --implicit-csv-header --headerless-csv-output.
|
||||
|
||||
NUMERICAL FORMATTING:
|
||||
--ofmt {format} E.g. %.18f, %.0f, %9.6e. Please use sprintf-style codes for
|
||||
floating-point nummbers. If not specified, default formatting is used.
|
||||
See also the fmtnum function within mlr put (mlr --help-all-functions);
|
||||
see also the format-values function.
|
||||
|
||||
OUTPUT COLORIZATION:
|
||||
Things having colors:
|
||||
* Keys in CSV header lines, JSON keys, etc
|
||||
* Values in CSV data lines, JSON scalar values, etc
|
||||
* "PASS" and "FAIL" in regression-test output
|
||||
* Some online-help strings
|
||||
* Coloring for the REPL prompt
|
||||
|
||||
Rules for coloring:
|
||||
* By default, colorize output only if writing to stdout and stdout is a TTY.
|
||||
* Example: color: mlr --csv cat foo.csv
|
||||
* Example: no color: mlr --csv cat foo.csv > bar.csv
|
||||
* Example: no color: mlr --csv cat foo.csv | less
|
||||
* The default colors were chosen since they look OK with white or black terminal background,
|
||||
and are differentiable with common varieties of human color vision.
|
||||
|
||||
Mechanisms for coloring:
|
||||
* Miller uses ANSI escape sequences only. This does not work on Windows except on Cygwin.
|
||||
* Requires TERM environment variable to be set to non-empty string.
|
||||
* Doesn't try to check to see whether the terminal is capable of 256-color
|
||||
ANSI vs 16-color ANSI. Note that if colors are in the range 0..15
|
||||
then 16-color ANSI escapes are used, so this is in the user's control.
|
||||
|
||||
How you can control colorization:
|
||||
* Suppression/unsuppression:
|
||||
* Environment variable export MLR_NO_COLOR=true means don't color even if stdout+TTY.
|
||||
* Environment variable export MLR_ALWAYS_COLOR=true means do color even if not stdout+TTY.
|
||||
For example, you might want to use this when piping mlr output to less -r.
|
||||
* Command-line flags ``--no-color`` or ``-M``, ``--always-color`` or ``-C``.
|
||||
* Color choices can be specified by using environment variables, or command-line flags,
|
||||
with values 0..255:
|
||||
* export MLR_KEY_COLOR=208, MLR_VALUE_COLOR-33, etc.
|
||||
* Command-line flags --key-color 208, --value-color 33, etc.
|
||||
* This is particularly useful if your terminal's background color clashes with current settings.
|
||||
* If environment-variable settings and command-line flags are both provided,the latter take precedence.
|
||||
* Please do mlr --list-colors to see the available color codes.
|
||||
|
||||
OTHER OPTIONS:
|
||||
--seed {n} with n of the form 12345678 or 0xcafefeed. For put/filter
|
||||
urand()/urandint()/urand32().
|
||||
--nr-progress-mod {m}, with m a positive integer: print filename and record
|
||||
count to os.Stderr every m input records.
|
||||
--from {filename} Use this to specify an input file before the verb(s),
|
||||
rather than after. May be used more than once. Example:
|
||||
"mlr --from a.dat --from b.dat cat" is the same as
|
||||
"mlr cat a.dat b.dat".
|
||||
--mfrom {filenames} -- Use this to specify one of more input files before the verb(s),
|
||||
rather than after. May be used more than once.
|
||||
The list of filename must end with "--". This is useful
|
||||
for example since "--from *.csv" doesn't do what you might
|
||||
hope but "--mfrom *.csv --" does.
|
||||
--load {filename} Load DSL script file for all put/filter operations on the command line.
|
||||
If the name following --load is a directory, load all "*.mlr" files
|
||||
in that directory. This is just like "put -f" and "filter -f"
|
||||
except it's up-front on the command line, so you can do something like
|
||||
alias mlr='mlr --load ~/myscripts' if you like.
|
||||
--mload {names} -- Like --load but works with more than one filename,
|
||||
e.g. '--mload *.mlr --'.
|
||||
-n Process no input files, nor standard input either. Useful
|
||||
for mlr put with begin/end statements only. (Same as --from
|
||||
/dev/null.) Also useful in "mlr -n put -v '...'" for
|
||||
analyzing abstract syntax trees (if that's your thing).
|
||||
-I Process files in-place. For each file name on the command
|
||||
line, output is written to a temp file in the same
|
||||
directory, which is then renamed over the original. Each
|
||||
file is processed in isolation: if the output format is
|
||||
CSV, CSV headers will be present in each output file;
|
||||
statistics are only over each file's own records; and so on.
|
||||
|
||||
THEN-CHAINING:
|
||||
Output of one verb may be chained as input to another using "then", e.g.
|
||||
mlr stats1 -a min,mean,max -f flag,u,v -g color then sort -f color
|
||||
|
||||
AUXILIARY COMMANDS:
|
||||
Miller has a few otherwise-standalone executables packaged within it.
|
||||
They do not participate in any other parts of Miller.
|
||||
Please use "mlr aux-list" for more information.
|
||||
|
||||
SEE ALSO:
|
||||
For more information please see http://johnkerl.org/miller/doc and/or
|
||||
http://github.com/johnkerl/miller. This is Miller version v6.0.0-dev.
|
||||
Please run "mlr --help" for detailed usage information.
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1-1
|
||||
|
|
|
|||
|
|
@ -1819,8 +1819,8 @@ most-frequent
|
|||
square red 1874
|
||||
triangle red 1560
|
||||
circle red 1207
|
||||
square yellow 589
|
||||
square blue 589
|
||||
square yellow 589
|
||||
|
||||
.. code-block:: none
|
||||
:emphasize-lines: 1-1
|
||||
|
|
|
|||
|
|
@ -173,6 +173,8 @@ No command-line-history-editing feature is built in but **rlwrap mlr repl** is a
|
|||
delight. You may need ``brew install rlwrap``, ``sudo apt-get install rlwrap``,
|
||||
etc. depending on your platform.
|
||||
|
||||
Suggestion: ``alias mrpl='rlwrap mlr repl'`` in your shell's startup file.
|
||||
|
||||
On-line help
|
||||
----------------------------------------------------------------
|
||||
|
||||
|
|
|
|||
|
|
@ -151,6 +151,8 @@ No command-line-history-editing feature is built in but **rlwrap mlr repl** is a
|
|||
delight. You may need ``brew install rlwrap``, ``sudo apt-get install rlwrap``,
|
||||
etc. depending on your platform.
|
||||
|
||||
Suggestion: ``alias mrpl='rlwrap mlr repl'`` in your shell's startup file.
|
||||
|
||||
On-line help
|
||||
----------------------------------------------------------------
|
||||
|
||||
|
|
|
|||
487
go/help.txt
487
go/help.txt
|
|
@ -1,487 +0,0 @@
|
|||
================================================================
|
||||
manpage:
|
||||
|
||||
mlr --usage-synopsis
|
||||
mlr --version
|
||||
mlr --usage-examples
|
||||
mlr --usage-data-format-examples
|
||||
mlr --usage-help-options
|
||||
mlr --usage-list-all-verbs
|
||||
mlr --usage-functions
|
||||
mlr --usage-data-format-options
|
||||
mlr --usage-comments-in-data
|
||||
mlr --usage-format-conversion-keystroke-saver-options
|
||||
mlr --usage-compressed-data-options
|
||||
mlr --usage-separator-options
|
||||
mlr --usage-csv-options
|
||||
mlr --usage-double-quoting
|
||||
mlr --usage-numerical-formatting
|
||||
mlr --usage-other-options
|
||||
mlr --usage-then-chaining
|
||||
mlr --usage-auxents
|
||||
mlr --list-all-verbs-raw
|
||||
mlr #{verb} -h
|
||||
mlr --list-all-functions-raw
|
||||
mlr --help-function '#{function}
|
||||
mlr --list-all-keywords-raw`
|
||||
mlr --help-keyword '#{keyword}'
|
||||
|
||||
----------------------------------------------------------------
|
||||
old mlr --help:
|
||||
|
||||
mainUsageSynopsis(o, argv0)
|
||||
mainUsageExamples(o, argv0, " ")
|
||||
mainUsageDataFormatExamples(o, argv0)
|
||||
mainUsageHelpOptions(o, argv0)
|
||||
mainUsageMlrrc(o, argv0)
|
||||
listAllVerbs(o, " ")
|
||||
mainUsageFunctions(o)
|
||||
mainUsageDataFormatOptions(o, argv0)
|
||||
mainUsageCommentsInData(o, argv0)
|
||||
mainUsageFormatConversionKeystrokeSaverOptions(o, argv0)
|
||||
mainUsageCompressedDataOptions(o, argv0)
|
||||
mainUsageSeparatorOptions(o, argv0);
|
||||
mainUsageCsvOptions(o, argv0)
|
||||
mainUsageDoubleQuoting(o, argv0);
|
||||
mainUsageNumericalFormatting(o, argv0)
|
||||
mainUsageOutputColorization(o, argv0)
|
||||
mainUsageOtherOptions(o, argv0)
|
||||
mainUsageThenChaining(o, argv0)
|
||||
mainUsageAuxents(o)
|
||||
mainUsageSeeAlso(o, argv0)
|
||||
|
||||
----------------------------------------------------------------
|
||||
new -- ?!?
|
||||
|
||||
mlr --help
|
||||
= mlr help
|
||||
< mlr --usage-synopsis
|
||||
|
||||
mlr help {foo} w/ polymorphic lookup -- ?
|
||||
|
||||
mlr --version
|
||||
= mlr version
|
||||
|
||||
mlr --usage-examples -> mlr help ___
|
||||
mlr --usage-data-format-examples -> mlr help format-examples
|
||||
mlr --usage-help-options -> mlr help ___
|
||||
mlr --usage-list-all-verbs -> mlr help ___
|
||||
mlr --usage-functions -> mlr help ___
|
||||
mlr --usage-data-format-options -> mlr help ___
|
||||
mlr --usage-comments-in-data -> mlr help ___
|
||||
mlr --usage-format-conversion-keystroke-sa.. -> mlr help keystroke-savers
|
||||
mlr --usage-compressed-data-options -> mlr help compressed-data
|
||||
mlr --usage-separator-options -> mlr help separators
|
||||
mlr --usage-csv-options -> mlr help csv-options
|
||||
mlr --usage-double-quoting -> mlr help double-quoting
|
||||
mlr --usage-numerical-formatting -> mlr help number-formatting
|
||||
mlr --usage-other-options -> mlr help ???
|
||||
mlr --usage-then-chaining -> mlr help then-chaining
|
||||
mlr --usage-auxents -> mlr help auxents
|
||||
mlr --list-all-verbs-raw -> mlr help list-verbs
|
||||
mlr #{verb} -h -> mlr help ????
|
||||
mlr --list-all-functions-raw -> mlr help list-functions
|
||||
mlr --help-function #{function} -> mlr help ___
|
||||
mlr --list-all-keywords-raw -> mlr help list-keywords
|
||||
mlr --help-keyword -> mlr help '#{keyword}'
|
||||
|
||||
----------------------------------------------------------------
|
||||
HELP OPTIONS:
|
||||
-h or --help Show this message.
|
||||
--version Show the software version.
|
||||
{verb name} --help Show verb-specific help.
|
||||
--help-all-verbs Show help on all verbs.
|
||||
-l or --list-all-verbs List only verb names.
|
||||
-L List only verb names, one per line.
|
||||
-f or --help-all-functions Show help on all built-in functions.
|
||||
-F Show a bare listing of built-in functions by name.
|
||||
-k or --help-all-keywords Show help on all keywords.
|
||||
-K Show a bare listing of keywords by name.
|
||||
|
||||
mlr help verbs
|
||||
mlr help data-format-options
|
||||
mlr help comments-in-data
|
||||
mlr help other-options
|
||||
mlr help functions
|
||||
mlr help mlrrc
|
||||
mlr aux-list
|
||||
|
||||
----------------------------------------------------------------
|
||||
SEE ALSO:
|
||||
For more information please see http://johnkerl.org/miller/doc and/or
|
||||
http://github.com/johnkerl/miller. This is Miller version v6.0.0-dev.
|
||||
|
||||
----------------------------------------------------------------
|
||||
xxx some brief examples:
|
||||
|
||||
xxx for more information please see mlr help io-options
|
||||
|
||||
----------------------------------------------------------------
|
||||
VERBS:
|
||||
altkv bar bootstrap cat check clean-whitespace count-distinct count
|
||||
count-similar cut decimate fill-down fill-empty filter flatten format-values
|
||||
fraction gap grep group-by group-like having-fields head histogram
|
||||
json-parse json-stringify join label least-frequent merge-fields
|
||||
most-frequent nest nothing put regularize remove-empty-columns rename
|
||||
reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
|
||||
skip-trivial-records sort sort-within-records stats1 stats2 step tac tail
|
||||
tee top unflatten uniq unsparsify
|
||||
|
||||
xxx for more information please see mlr help {verb name}
|
||||
|
||||
----------------------------------------------------------------
|
||||
THEN-CHAINING:
|
||||
Output of one verb may be chained as input to another using "then", e.g.
|
||||
mlr stats1 -a min,mean,max -f flag,u,v -g color then sort -f color
|
||||
|
||||
----------------------------------------------------------------
|
||||
DATA-FORMAT OPTIONS, FOR INPUT, OUTPUT, OR BOTH:
|
||||
|
||||
--icsv --ocsv --csv Comma-separated value (or tab-separated
|
||||
with --fs tab, etc.)
|
||||
|
||||
--itsv --otsv --tsv Keystroke-savers for "--icsv --ifs tab",
|
||||
"--ocsv --ofs tab", "--csv --fs tab".
|
||||
--iasv --oasv --asv Similar but using ASCII FS 0x1f and RS 0x1e\n",
|
||||
--iusv --ousv --usv Similar but using Unicode FS U+241F (UTF-8 0xe2909f)\n",
|
||||
and RS U+241E (UTF-8 0xe2909e)\n",
|
||||
|
||||
--icsvlite --ocsvlite --csvlite Comma-separated value (or tab-separated
|
||||
with --fs tab, etc.). The 'lite' CSV does not handle
|
||||
RFC-CSV double-quoting rules; is slightly faster;
|
||||
and handles heterogeneity in the input stream via
|
||||
empty newline followed by new header line. See also
|
||||
http://johnkerl.org/miller/doc/file-formats.html#CSV/TSV/etc.
|
||||
|
||||
--itsvlite --otsvlite --tsvlite Keystroke-savers for "--icsvlite --ifs tab",
|
||||
"--ocsvlite --ofs tab", "--csvlite --fs tab".
|
||||
-t Synonymous with --tsvlite.
|
||||
--iasvlite --oasvlite --asvlite Similar to --itsvlite et al. but using ASCII FS 0x1f and RS 0x1e\n",
|
||||
--iusvlite --ousvlite --usvlite Similar to --itsvlite et al. but using Unicode FS U+241F (UTF-8 0xe2909f)\n",
|
||||
and RS U+241E (UTF-8 0xe2909e)\n",
|
||||
|
||||
--ipprint --opprint --pprint Pretty-printed tabular (produces no
|
||||
output until all input is in).
|
||||
--right Right-justifies all fields for PPRINT output.
|
||||
--barred Prints a border around PPRINT output
|
||||
(only available for output).
|
||||
|
||||
--omd Markdown-tabular (only available for output).
|
||||
|
||||
--ixtab --oxtab --xtab Pretty-printed vertical-tabular.
|
||||
--xvright Right-justifies values for XTAB format.
|
||||
|
||||
--idkvp --odkvp --dkvp Delimited key-value pairs, e.g "a=1,b=2"
|
||||
(this is Miller's default format).
|
||||
|
||||
--inidx --onidx --nidx Implicitly-integer-indexed fields
|
||||
(Unix-toolkit style).
|
||||
|
||||
--ijson --ojson --json JSON tabular: sequence or list of one-level
|
||||
maps: {...}{...} or [{...},{...}].
|
||||
--json-map-arrays-on-input JSON arrays are unmillerable. --json-map-arrays-on-input
|
||||
--json-skip-arrays-on-input is the default: arrays are converted to integer-indexed
|
||||
--json-fatal-arrays-on-input maps. The other two options cause them to be skipped, or
|
||||
to be treated as errors. Please use the jq tool for full
|
||||
JSON (pre)processing.
|
||||
--jvstack Put one key-value pair per line for JSON output.
|
||||
--no-jvstack Put objects/arrays all on one line for JSON output.
|
||||
--jsonx --ojsonx Keystroke-savers for --json --jvstack
|
||||
--jsonx --ojsonx and --ojson --jvstack, respectively.
|
||||
--jlistwrap Wrap JSON output in outermost [ ].
|
||||
--jknquoteint Do not quote non-string map keys in JSON output.
|
||||
--jvquoteall Quote map values in JSON output, even if they're
|
||||
numeric.
|
||||
--oflatsep {string} Separator for flattening multi-level JSON keys,
|
||||
e.g. '{"a":{"b":3}}' becomes a:b => 3 for
|
||||
non-JSON formats. Defaults to ..\n",
|
||||
|
||||
-T Synonymous with "--nidx --fs tab".
|
||||
-p is a keystroke-saver for --nidx --fs space --repifs
|
||||
|
||||
Examples: --csv for CSV-formatted input and output; --idkvp --opprint for
|
||||
DKVP-formatted input and pretty-printed output.
|
||||
|
||||
Please use --iformat1 --oformat2 rather than --format1 --oformat2.
|
||||
The latter sets up input and output flags for format1, not all of which
|
||||
are overridden in all cases by setting output format to format2.
|
||||
|
||||
----------------------------------------------------------------
|
||||
RELEVANT TO CSV/CSV-LITE INPUT ONLY:
|
||||
--implicit-csv-header Use 1,2,3,... as field labels, rather than from line 1
|
||||
of input files. Tip: combine with "label" to recreate
|
||||
missing headers.
|
||||
--no-implicit-csv-header Do not use --implicit-csv-header. This is the default
|
||||
anyway -- the main use is for the flags to 'mlr join' if you have
|
||||
main file(s) which are headerless but you want to join in on
|
||||
a file which does have a CSV header. Then you could use
|
||||
'mlr --csv --implicit-csv-header join --no-implicit-csv-header
|
||||
-l your-join-in-with-header.csv ... your-headerless.csv'
|
||||
--allow-ragged-csv-input|--ragged If a data line has fewer fields than the header line,
|
||||
fill remaining keys with empty string. If a data line has more
|
||||
fields than the header line, use integer field labels as in
|
||||
the implicit-header case.
|
||||
--headerless-csv-output Print only CSV data lines.
|
||||
-N Keystroke-saver for --implicit-csv-header --headerless-csv-output.
|
||||
|
||||
----------------------------------------------------------------
|
||||
FORMAT-CONVERSION KEYSTROKE-SAVER OPTIONS:
|
||||
As keystroke-savers for format-conversion you may use the following:
|
||||
--c2t --c2d --c2n --c2j --c2x --c2p --c2m
|
||||
--t2c --t2d --t2n --t2j --t2x --t2p --t2m
|
||||
--d2c --d2t --d2n --d2j --d2x --d2p --d2m
|
||||
--n2c --n2t --n2d --n2j --n2x --n2p --n2m
|
||||
--j2c --j2t --j2d --j2n --j2x --j2p --j2m
|
||||
--x2c --x2t --x2d --x2n --x2j --x2p --x2m
|
||||
--p2c --p2t --p2d --p2n --p2j --p2x --p2m
|
||||
The letters c t d n j x p m refer to formats CSV, TSV, DKVP, NIDX, JSON, XTAB,
|
||||
PPRINT, and markdown, respectively. Note that markdown format is available for
|
||||
output only.
|
||||
|
||||
----------------------------------------------------------------
|
||||
COMPRESSED-DATA OPTIONS:
|
||||
Decompression done within the Miller process itself:
|
||||
--gzin Uncompress gzip within the Miller process. Done by default if file ends in ".gz".
|
||||
--bz2in Uncompress bz2ip within the Miller process. Done by default if file ends in ".bz2".
|
||||
--zin Uncompress zlib within the Miller process. Done by default if file ends in ".z".
|
||||
Decompression done outside the Miller processn --prepipe {command} You can, of course, already do without this for single input files,
|
||||
e.g. "gunzip < myfile.csv.gz | mlr ...".
|
||||
However, when multiple input files are present, between-file separations are
|
||||
lost; also, the FILENAME variable doesn't iterate. Using --prepipe you can
|
||||
specify an action to be taken on each input file. This prepipe command must
|
||||
be able to read from standard input; it will be invoked with
|
||||
{command} < {filename}.
|
||||
--prepipex {command} Like --prepipe with one exception: doesn't insert '<' between
|
||||
command and filename at runtime. Useful for some commands like 'unzip -qc' which don't
|
||||
read standard input.
|
||||
Examples:
|
||||
mlr --prepipe 'gunzip'
|
||||
mlr --prepipe 'zcat -cf'
|
||||
mlr --prepipe 'xz -cd'
|
||||
mlr --prepipe cat
|
||||
Note that this feature is quite general and is not limited to decompression
|
||||
utilities. You can use it to apply per-file filters of your choice.
|
||||
For output compression (or other) utilities, simply pipe the output:
|
||||
mlr ... | {your compression command}
|
||||
Lastly, note that if --prepipe is specified, it replaces any decisions that might
|
||||
have been made based on the file suffix. Also, --gzin/--bz2in/--zin are ignored
|
||||
if --prepipe is also specified.
|
||||
|
||||
----------------------------------------------------------------
|
||||
|
||||
xxx mlr help comments-in-data
|
||||
|
||||
COMMENTS IN DATA:
|
||||
--skip-comments Ignore commented lines (prefixed by "#")
|
||||
within the input.
|
||||
--skip-comments-with {string} Ignore commented lines within input, with
|
||||
specified prefix.
|
||||
--pass-comments Immediately print commented lines (prefixed by "#")
|
||||
within the input.
|
||||
--pass-comments-with {string} Immediately print commented lines within input, with
|
||||
specified prefix.
|
||||
Notes:
|
||||
* Comments are only honored at the start of a line.
|
||||
* In the absence of any of the above four options, comments are data like
|
||||
any other text.
|
||||
* When pass-comments is used, comment lines are written to standard output
|
||||
immediately upon being read; they are not part of the record stream.
|
||||
Results may be counterintuitive. A suggestion is to place comments at the
|
||||
start of data files.
|
||||
|
||||
----------------------------------------------------------------
|
||||
xxx mlr help other-options
|
||||
|
||||
OTHER OPTIONS:
|
||||
--ofmt {format} E.g. %.18f, %.0f, %9.6e. Please use sprintf-style codes for
|
||||
floating-point nummbers. If not specified, default formatting is used.
|
||||
See also the fmtnum function within mlr put (mlr --help-all-functions);
|
||||
see also the format-values function.
|
||||
--seed {n} with n of the form 12345678 or 0xcafefeed. For put/filter
|
||||
urand()/urandint()/urand32().
|
||||
--nr-progress-mod {m}, with m a positive integer: print filename and record
|
||||
count to os.Stderr every m input records.
|
||||
--from {filename} Use this to specify an input file before the verb(s),
|
||||
rather than after. May be used more than once. Example:
|
||||
"mlr --from a.dat --from b.dat cat" is the same as
|
||||
"mlr cat a.dat b.dat".
|
||||
--mfrom {filenames} -- Use this to specify one of more input files before the verb(s),
|
||||
rather than after. May be used more than once.
|
||||
The list of filename must end with "--". This is useful
|
||||
for example since "--from *.csv" doesn't do what you might
|
||||
hope but "--mfrom *.csv --" does.
|
||||
--load {filename} Load DSL script file for all put/filter operations on the command line.
|
||||
If the name following --load is a directory, load all "*.mlr" files
|
||||
in that directory. This is just like "put -f" and "filter -f"
|
||||
except it's up-front on the command line, so you can do something like
|
||||
alias mlr='mlr --load ~/myscripts' if you like.
|
||||
--mload {names} -- Like --load but works with more than one filename,
|
||||
e.g. '--mload *.mlr --'.
|
||||
-n Process no input files, nor standard input either. Useful
|
||||
for mlr put with begin/end statements only. (Same as --from
|
||||
/dev/null.) Also useful in "mlr -n put -v '...'" for
|
||||
analyzing abstract syntax trees (if that's your thing).
|
||||
-I Process files in-place. For each file name on the command
|
||||
line, output is written to a temp file in the same
|
||||
directory, which is then renamed over the original. Each
|
||||
file is processed in isolation: if the output format is
|
||||
CSV, CSV headers will be present in each output file;
|
||||
statistics are only over each file's own records; and so on.
|
||||
|
||||
----------------------------------------------------------------
|
||||
xxx mlr aux-list
|
||||
|
||||
AUXILIARY COMMANDS:
|
||||
Miller has a few otherwise-standalone executables packaged within it.
|
||||
They do not participate in any other parts of Miller.
|
||||
Please use "mlr aux-list" for more information.
|
||||
|
||||
----------------------------------------------------------------
|
||||
FUNCTIONS FOR THE FILTER AND PUT VERBS:
|
||||
|
||||
+ - * / // ** pow .+ .- .* ./ % ~ & | ^ << >> >>> bitcount madd msub mmul mexp
|
||||
! == != > >= < <= =~ !=~ && || ^^ ?? ??? ?: . capitalize clean_whitespace
|
||||
collapse_whitespace gsub lstrip regextract regextract_or_else rstrip strip
|
||||
strlen ssub sub substr0 substr1 substr tolower toupper truncate md5 sha1 sha256
|
||||
sha512 abs acos acosh asin asinh atan atan2 atanh cbrt ceil cos cosh erf erfc
|
||||
exp expm1 floor invqnorm log log10 log1p logifit max min qnorm round sgn sin
|
||||
sinh sqrt tan tanh roundm urand urandint urandrange urand32 gmt2sec sec2gmt
|
||||
sec2gmtdate systime systimeint uptime strftime strptime dhms2fsec dhms2sec
|
||||
fsec2dhms fsec2hms hms2fsec hms2sec sec2dhms sec2hms is_absent is_array is_bool
|
||||
is_boolean is_empty is_empty_map is_error is_float is_int is_map
|
||||
is_nonempty_map is_not_empty is_not_map is_not_array is_not_null is_null
|
||||
is_numeric is_present is_string asserting_absent asserting_array asserting_bool
|
||||
asserting_boolean asserting_error asserting_empty asserting_empty_map
|
||||
asserting_float asserting_int asserting_map asserting_nonempty_map
|
||||
asserting_not_empty asserting_not_map asserting_not_array asserting_not_null
|
||||
asserting_null asserting_numeric asserting_present asserting_string typeof
|
||||
boolean float fmtnum hexfmt int joink joinv joinkv splita splitax splitkv
|
||||
splitkvx splitnv splitnvx string append arrayify depth flatten get_keys
|
||||
get_values haskey json_parse json_stringify leafcount length mapdiff mapexcept
|
||||
mapselect mapsum unflatten hostname os system version Please use "mlr
|
||||
--help-function {function name}" for function-specific help.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
================================================================
|
||||
COMMAND-LINE-SYNTAX EXAMPLES:
|
||||
mlr --csv cut -f hostname,uptime mydata.csv
|
||||
mlr --tsv --rs lf filter '$status != "down" && $upsec >= 10000' *.tsv
|
||||
mlr --nidx put '$sum = $7 < 0.0 ? 3.5 : $7 + 2.1*$8' *.dat
|
||||
grep -v '^#' /etc/group | mlr --ifs : --nidx --opprint label group,pass,gid,member then sort -f group
|
||||
mlr join -j account_id -f accounts.dat then group-by account_name balances.dat
|
||||
mlr --json put '$attr = sub($attr, "([0-9]+)_([0-9]+)_.*", "\1:\2")' data/*.json
|
||||
mlr stats1 -a min,mean,max,p10,p50,p90 -f flag,u,v data/*
|
||||
mlr stats2 -a linreg-pca -f u,v -g shape data/*
|
||||
mlr put -q '@sum[$a][$b] += $x; end {emit @sum, "a", "b"}' data/*
|
||||
mlr --from estimates.tbl put '
|
||||
for (k,v in $*) {
|
||||
if (is_numeric(v) && k =~ "^[t-z].*$") {
|
||||
$sum += v; $count += 1
|
||||
}
|
||||
}
|
||||
$mean = $sum / $count # no assignment if count unset'
|
||||
mlr --from infile.dat put -f analyze.mlr
|
||||
mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
|
||||
mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
|
||||
mlr --from infile.dat put -q '@v=$*; dump | "jq .[]"'
|
||||
mlr --from infile.dat put '(NR % 1000 == 0) { print > os.Stderr, "Checkpoint ".NR}'
|
||||
|
||||
----------------------------------------------------------------
|
||||
DATA-FORMAT EXAMPLES:
|
||||
DKVP: delimited key-value pairs (Miller default format)
|
||||
+---------------------+
|
||||
| apple=1,bat=2,cog=3 | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| dish=7,egg=8,flint | Record 2: "dish" => "7", "egg" => "8", "3" => "flint"
|
||||
+---------------------+
|
||||
|
||||
NIDX: implicitly numerically indexed (Unix-toolkit style)
|
||||
+---------------------+
|
||||
| the quick brown | Record 1: "1" => "the", "2" => "quick", "3" => "brown"
|
||||
| fox jumped | Record 2: "1" => "fox", "2" => "jumped"
|
||||
+---------------------+
|
||||
|
||||
CSV/CSV-lite: comma-separated values with separate header line
|
||||
+---------------------+
|
||||
| apple,bat,cog |
|
||||
| 1,2,3 | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| 4,5,6 | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+---------------------+
|
||||
|
||||
Tabular JSON: nested objects are supported, although arrays within them are not:
|
||||
+---------------------+
|
||||
| { |
|
||||
| "apple": 1, | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| "bat": 2, |
|
||||
| "cog": 3 |
|
||||
| } |
|
||||
| { |
|
||||
| "dish": { | Record 2: "dish:egg" => "7", "dish:flint" => "8", "garlic" => ""
|
||||
| "egg": 7, |
|
||||
| "flint": 8 |
|
||||
| }, |
|
||||
| "garlic": "" |
|
||||
| } |
|
||||
+---------------------+
|
||||
|
||||
PPRINT: pretty-printed tabular
|
||||
+---------------------+
|
||||
| apple bat cog |
|
||||
| 1 2 3 | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| 4 5 6 | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+---------------------+
|
||||
|
||||
XTAB: pretty-printed transposed tabular
|
||||
+---------------------+
|
||||
| apple 1 | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| bat 2 |
|
||||
| cog 3 |
|
||||
| |
|
||||
| dish 7 | Record 2: "dish" => "7", "egg" => "8"
|
||||
| egg 8 |
|
||||
+---------------------+
|
||||
|
||||
Markdown tabular (supported for output only):
|
||||
+-----------------------+
|
||||
| | apple | bat | cog | |
|
||||
| | --- | --- | --- | |
|
||||
| | 1 | 2 | 3 | | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| | 4 | 5 | 6 | | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+-----------------------+
|
||||
|
||||
----------------------------------------------------------------
|
||||
CUSTOMIZATION VIA .MLRRC:
|
||||
You can set up personal defaults via a $HOME/.mlrrc and/or ./.mlrrc.
|
||||
For example, if you usually process CSV, then you can put "--csv" in your .mlrrc file
|
||||
and that will be the default input/output format unless otherwise specified on the command line.
|
||||
|
||||
The .mlrrc file format is one "--flag" or "--option value" per line, with the leading "--" optional.
|
||||
Hash-style comments and blank lines are ignored.
|
||||
|
||||
Sample .mlrrc:
|
||||
# Input and output formats are CSV by default (unless otherwise specified
|
||||
# on the mlr command line):
|
||||
csv
|
||||
# These are no-ops for CSV, but when I do use JSON output, I want these
|
||||
# pretty-printing options to be used:
|
||||
jvstack
|
||||
jlistwrap
|
||||
|
||||
How to specify location of .mlrrc:
|
||||
* If $MLRRC is set:
|
||||
o If its value is "__none__" then no .mlrrc files are processed.
|
||||
o Otherwise, its value (as a filename) is loaded and processed. If there are syntax
|
||||
errors, they abort mlr with a usage message (as if you had mistyped something on the
|
||||
command line). If the file can't be loaded at all, though, it is silently skipped.
|
||||
o Any .mlrrc in your home directory or current directory is ignored whenever $MLRRC is
|
||||
set in the environment.
|
||||
* Otherwise:
|
||||
o If $HOME/.mlrrc exists, it's then processed as above.
|
||||
o If ./.mlrrc exists, it's then also processed as above.
|
||||
(I.e. current-directory .mlrrc defaults are stacked over home-directory .mlrrc defaults.)
|
||||
|
||||
See also:
|
||||
https://miller.readthedocs.io/en/latest/customization.html
|
||||
|
||||
|
|
@ -1 +1 @@
|
|||
mlr --help-all-verbs
|
||||
mlr help usage-verbs
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ Usage: mlr altkv [options]
|
|||
Given fields with values of the form a,b,c,d,e,f emits a=b,c=d,e=f pairs.
|
||||
Options:
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr bar [options]
|
||||
Replaces a numeric field with a number of asterisks, allowing for cheesy
|
||||
|
|
@ -20,6 +21,7 @@ Options:
|
|||
Nominally the fill, out-of-bounds, and blank characters will be strings of length 1.
|
||||
However you can make them all longer if you so desire.
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr bootstrap [options]
|
||||
Emits an n-sample, with replacement, of the input records.
|
||||
|
|
@ -28,6 +30,7 @@ Options:
|
|||
-n Number of samples to output. Defaults to number of input records.
|
||||
Must be non-negative.
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr cat [options]
|
||||
Passes input records directly to output. Most useful for format conversion.
|
||||
|
|
@ -36,12 +39,14 @@ Options:
|
|||
-N {name} Prepend field {name} to each record with record-counter starting at 1.
|
||||
-g {a,b,c} Optional group-by-field names for counters, e.g. a,b,c
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr check [options]
|
||||
Consumes records without printing any output.
|
||||
Useful for doing a well-formatted check on input data.
|
||||
Options:
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr clean-whitespace [options]
|
||||
For each record, for each field in the record, whitespace-cleans the keys and/or
|
||||
|
|
@ -56,6 +61,7 @@ Options:
|
|||
It is an error to specify -k as well as -v -- to clean keys and values,
|
||||
leave off -k as well as -v.
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr count-distinct [options]
|
||||
Prints number of records having distinct values for specified field names.
|
||||
|
|
@ -71,6 +77,7 @@ Options:
|
|||
and b field values. With -f a,b and with -u, computes counts
|
||||
for distinct a field values and counts for distinct b field
|
||||
values separately.
|
||||
|
||||
================================================================
|
||||
Usage: mlr count [options]
|
||||
Prints number of records, optionally grouped by distinct values for specified field names.
|
||||
|
|
@ -79,6 +86,7 @@ Options:
|
|||
-n {n} Show only the number of distinct values. Not interesting without -g.
|
||||
-o {name} Field name for output-count. Default "count".
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr count-similar [options]
|
||||
Ingests all records, then emits each record augmented by a count of
|
||||
|
|
@ -87,6 +95,7 @@ Options:
|
|||
-g {a,b,c} Group-by-field names for counts, e.g. a,b,c
|
||||
-o {name} Field name for output-counts. Defaults to "count".
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr cut [options]
|
||||
Passes through input records with specified fields included/excluded.
|
||||
|
|
@ -106,6 +115,7 @@ Examples:
|
|||
mlr cut -r -f '^status$,sda[0-9]'
|
||||
mlr cut -r -f '^status$,"sda[0-9]"'
|
||||
mlr cut -r -f '^status$,"sda[0-9]"i' (this is case-insensitive)
|
||||
|
||||
================================================================
|
||||
Usage: mlr decimate [options]
|
||||
Passes through one of every n records, optionally by category.
|
||||
|
|
@ -115,6 +125,7 @@ Options:
|
|||
-g {a,b,c} Optional group-by-field names for decimate counts, e.g. a,b,c.
|
||||
-n {n} Decimation factor (default 10).
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr fill-down [options]
|
||||
If a given record has a missing value for a given field, fill that from
|
||||
|
|
@ -130,11 +141,13 @@ Options:
|
|||
With -a, a field is 'missing' only if it is absent.
|
||||
-f Field names for fill-down.
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr fill-empty [options]
|
||||
Fills empty-string fields with specified fill-value.
|
||||
Options:
|
||||
-v {string} Fill-value: defaults to "N/A"
|
||||
|
||||
================================================================
|
||||
Usage: mlr filter [options] {DSL expression}
|
||||
Options:
|
||||
|
|
@ -188,6 +201,7 @@ Parser-info options:
|
|||
|
||||
-X Exit after parsing but before stream-processing. Useful with -v/-d/-D, if you
|
||||
only want to look at parser information.
|
||||
|
||||
================================================================
|
||||
Usage: mlr flatten [options]
|
||||
Flattens multi-level maps to single-level ones. Example: field with name 'a'
|
||||
|
|
@ -196,6 +210,7 @@ Options:
|
|||
-f Comma-separated list of field names to flatten (default all).
|
||||
-s Separator, defaulting to mlr --oflatsep value.
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr format-values [options]
|
||||
Applies format strings to all field values, depending on autodetected type.
|
||||
|
|
@ -226,6 +241,7 @@ Options:
|
|||
with s in them. Undefined behavior results otherwise.
|
||||
-n Coerce field values autodetected as int to float, and then
|
||||
apply the float format.
|
||||
|
||||
================================================================
|
||||
Usage: mlr fraction [options]
|
||||
For each record's value in specified fields, computes the ratio of that
|
||||
|
|
@ -247,6 +263,7 @@ Options:
|
|||
E.g. with input records x=1 x=2 x=3 and x=4, emits output records
|
||||
x=1,x_cumulative_fraction=0.1 x=2,x_cumulative_fraction=0.3
|
||||
x=3,x_cumulative_fraction=0.6 and x=4,x_cumulative_fraction=1.0
|
||||
|
||||
================================================================
|
||||
Usage: mlr gap [options]
|
||||
Emits an empty record every n records, or when certain values change.
|
||||
|
|
@ -257,6 +274,7 @@ Emits an empty record every n records, or when certain values change.
|
|||
One of -f or -g is required.
|
||||
-n is ignored if -g is present.
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr grep [options] {regular expression}
|
||||
Passes through records which match the regular expression.
|
||||
|
|
@ -275,14 +293,17 @@ be matched, not against either of these lines, but against the DKVP line
|
|||
and this command is intended to be merely a keystroke-saver. To get all the
|
||||
features of system grep, you can do
|
||||
"mlr --odkvp ... | grep ... | mlr --idkvp ..."
|
||||
|
||||
================================================================
|
||||
Usage: mlr group-by [options] {comma-separated field names}
|
||||
Outputs records in batches having identical values at specified field names.Options:
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr group-like [options]
|
||||
Outputs records in batches having identical field names.Options:
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr having-fields [options]
|
||||
Conditionally passes through records depending on each record's field names.
|
||||
|
|
@ -298,6 +319,7 @@ Examples:
|
|||
mlr having-fields --any-matching 'sda[0-9]'
|
||||
mlr having-fields --any-matching '"sda[0-9]"'
|
||||
mlr having-fields --any-matching '"sda[0-9]"i' (this is case-insensitive)
|
||||
|
||||
================================================================
|
||||
Usage: mlr head [options]
|
||||
Passes through the first n records, optionally by category.
|
||||
|
|
@ -305,6 +327,7 @@ Options:
|
|||
-g {a,b,c} Optional group-by-field names for head counts, e.g. a,b,c.
|
||||
-n {n} Head-count to print. Default 10.
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Just a histogram. Input values < lo or > hi are not counted.
|
||||
Usage: mlr histogram [options]
|
||||
|
|
@ -316,12 +339,14 @@ Usage: mlr histogram [options]
|
|||
Holds all values in memory before producing any output.
|
||||
-o {prefix} Prefix for output field name. Default: no prefix.
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr json-parse [options]
|
||||
Tries to convert string field values to parsed JSON, e.g. "[1,2,3]" -> [1,2,3].
|
||||
Options:
|
||||
-f {...} Comma-separated list of field names to json-parse (default all).
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr json-stringify [options]
|
||||
Produces string field values from field-value data, e.g. [1,2,3] -> "[1,2,3]".
|
||||
|
|
@ -330,6 +355,7 @@ Options:
|
|||
--jvstack Produce multi-line JSON output.
|
||||
--no-jvstack Produce single-line JSON output per record (default).
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr join [options]
|
||||
Joins records from specified left file name with records from all file names
|
||||
|
|
@ -378,6 +404,7 @@ expected to be headerless as well unless you put '--no-implicit-csv-header' afte
|
|||
Please use "mlr --usage-separator-options" for information on specifying separators.
|
||||
Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#join for more information
|
||||
including examples.
|
||||
|
||||
================================================================
|
||||
Usage: mlr label [options] {new1,new2,new3,...}
|
||||
Given n comma-separated names, renames the first n fields of each record to
|
||||
|
|
@ -387,6 +414,7 @@ useful names to otherwise integer-indexed fields.
|
|||
|
||||
Options:
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr least-frequent [options]
|
||||
Shows the least frequently occurring distinct values for specified field names.
|
||||
|
|
@ -397,6 +425,7 @@ Options:
|
|||
-b Suppress counts; show only field values.
|
||||
-o {name} Field name for output count. Default "count".
|
||||
See also "mlr most-frequent".
|
||||
|
||||
================================================================
|
||||
Usage: mlr merge-fields [options]
|
||||
Computes univariate statistics for each input record, accumulated across
|
||||
|
|
@ -441,6 +470,7 @@ Example: mlr merge-fields -a sum,count -c in_,out_
|
|||
produces "a_x_sum=3,a_x_count=2,b_y_sum=4,b_y_count=1,b_x_sum=8,b_x_count=1"
|
||||
since "a_in_x" and "a_out_x" both collapse to "a_x", "b_in_y" collapses to
|
||||
"b_y", and "b_out_x" collapses to "b_x".
|
||||
|
||||
================================================================
|
||||
Usage: mlr most-frequent [options]
|
||||
Shows the most frequently occurring distinct values for specified field names.
|
||||
|
|
@ -451,6 +481,7 @@ Options:
|
|||
-b Suppress counts; show only field values.
|
||||
-o {name} Field name for output count. Default "count".
|
||||
See also "mlr least-frequent".
|
||||
|
||||
================================================================
|
||||
Usage: mlr nest [options]
|
||||
Explodes specified field values into separate fields/records, or reverses this.
|
||||
|
|
@ -499,12 +530,14 @@ Notes:
|
|||
* It's up to you to ensure that the nested-fs is distinct from your data's IFS:
|
||||
e.g. by default the former is semicolon and the latter is comma.
|
||||
See also mlr reshape.
|
||||
|
||||
================================================================
|
||||
Usage: mlr nothing [options]
|
||||
Drops all input records. Useful for testing, or after tee/print/etc. have
|
||||
produced other output.
|
||||
Options:
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr put [options] {DSL expression}
|
||||
Options:
|
||||
|
|
@ -558,15 +591,18 @@ Parser-info options:
|
|||
|
||||
-X Exit after parsing but before stream-processing. Useful with -v/-d/-D, if you
|
||||
only want to look at parser information.
|
||||
|
||||
================================================================
|
||||
Usage: mlr regularize [options]
|
||||
Outputs records sorted lexically ascending by keys.Options:
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr remove-empty-columns [options]
|
||||
Omits fields which are empty on every input row. Non-streaming.
|
||||
Options:
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr rename [options] {old1,new1,old2,new2,...}
|
||||
Renames specified fields.
|
||||
|
|
@ -589,6 +625,7 @@ mlr rename -r 'Date_[0-9]+,Date,' Rename all such fields to be "Date"
|
|||
mlr rename -r '"Date_[0-9]+",Date' Same
|
||||
mlr rename -r 'Date_([0-9]+).*,\1' Rename all such fields to be of the form 20151015
|
||||
mlr rename -r '"name"i,Name' Rename "name", "Name", "NAME", etc. to "Name"
|
||||
|
||||
================================================================
|
||||
Usage: mlr reorder [options]
|
||||
Moves specified names to start of record, or end of record.
|
||||
|
|
@ -606,6 +643,7 @@ Options:
|
|||
Examples:
|
||||
mlr reorder -f a,b sends input record "d=4,b=2,a=1,c=3" to "a=1,b=2,d=4,c=3".
|
||||
mlr reorder -e -f a,b sends input record "d=4,b=2,a=1,c=3" to "d=4,c=3,a=1,b=2".
|
||||
|
||||
================================================================
|
||||
Usage: mlr repeat [options]
|
||||
Copies input records to output records multiple times.
|
||||
|
|
@ -632,6 +670,7 @@ produces:
|
|||
a=1,b=2,c=3
|
||||
a=1,b=2,c=3
|
||||
a=1,b=2,c=3
|
||||
|
||||
================================================================
|
||||
Usage: mlr reshape [options]
|
||||
Wide-to-long options:
|
||||
|
|
@ -688,6 +727,7 @@ Examples:
|
|||
2009-01-02 -0.89248112 0.2154713
|
||||
2009-01-03 0.98012375 1.3179287
|
||||
See also mlr nest.
|
||||
|
||||
================================================================
|
||||
Usage: mlr sample [options]
|
||||
Reservoir sampling (subsampling without replacement), optionally by category.
|
||||
|
|
@ -696,6 +736,7 @@ Options:
|
|||
-g {a,b,c} Optional: group-by-field names for samples, e.g. a,b,c.
|
||||
-k {k} Required: number of records to output in total, or by group if using -g.
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: ../c/mlr sec2gmtdate {comma-separated list of field names}
|
||||
Replaces a numeric field representing seconds since the epoch with the
|
||||
|
|
@ -704,6 +745,7 @@ This is nothing more than a keystroke-saver for the sec2gmtdate function:
|
|||
../c/mlr sec2gmtdate time1,time2
|
||||
is the same as
|
||||
../c/mlr put '$time1=sec2gmtdate($time1);$time2=sec2gmtdate($time2)'
|
||||
|
||||
================================================================
|
||||
Usage: mlr sec2gmt [options] {comma-separated list of field names}
|
||||
Replaces a numeric field representing seconds since the epoch with the
|
||||
|
|
@ -718,6 +760,7 @@ Options:
|
|||
--micros Input numbers are treated as microseconds since the epoch.
|
||||
--nanos Input numbers are treated as nanoseconds since the epoch.
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr seqgen [options]
|
||||
Passes input records directly to output. Most useful for format conversion.
|
||||
|
|
@ -733,18 +776,21 @@ Options:
|
|||
Start, stop, and/or step may be floating-point. Output is integer if start,
|
||||
stop, and step are all integers. Step may be negative. It may not be zero
|
||||
unless start == stop.
|
||||
|
||||
================================================================
|
||||
Usage: mlr shuffle [options]
|
||||
Outputs records randomly permuted. No output records are produced until
|
||||
all input records are read. See also mlr bootstrap and mlr sample.
|
||||
Options:
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr skip-trivial-records [options]
|
||||
Passes through all records except those with zero fields,
|
||||
or those for which all fields have empty value.
|
||||
Options:
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr sort {flags}
|
||||
Sorts records primarily by the first specified field, secondarily by the second
|
||||
|
|
@ -765,12 +811,14 @@ Example:
|
|||
mlr sort -f a,b -nr x,y,z
|
||||
which is the same as:
|
||||
mlr sort -f a -f b -nr x -nr y -nr z
|
||||
|
||||
================================================================
|
||||
Usage: mlr sort-within-records [options]
|
||||
Outputs records sorted lexically ascending by keys.
|
||||
Options:
|
||||
-r Recursively sort subobjects/submaps, e.g. for JSON input.
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr stats1 [options]
|
||||
Computes univariate statistics for one or more given fields, accumulated across
|
||||
|
|
@ -824,6 +872,7 @@ Notes:
|
|||
* count and mode allow text input; the rest require numeric input.
|
||||
In particular, 1 and 1.0 are distinct text for count and mode.
|
||||
* When there are mode ties, the first-encountered datum wins.
|
||||
|
||||
================================================================
|
||||
Usage: mlr stats2 [options]
|
||||
Computes bivariate statistics for one or more given field-name pairs,
|
||||
|
|
@ -851,6 +900,7 @@ Only one of -s or --fit may be used.
|
|||
Example: mlr stats2 -a linreg-pca -f x,y
|
||||
Example: mlr stats2 -a linreg-ols,r2 -f x,y -g size,shape
|
||||
Example: mlr stats2 -a corr -f x,y
|
||||
|
||||
================================================================
|
||||
Usage: mlr step [options]
|
||||
Computes values dependent on the previous record, optionally grouped by category.
|
||||
|
|
@ -889,11 +939,13 @@ Examples:
|
|||
Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#filter or
|
||||
https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average
|
||||
for more information on EWMA.
|
||||
|
||||
================================================================
|
||||
Usage: mlr tac [options]
|
||||
Prints records in reverse order from the order in which they were encountered.
|
||||
Options:
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr tail [options]
|
||||
Passes through the last n records, optionally by category.
|
||||
|
|
@ -901,6 +953,7 @@ Options:
|
|||
-g {a,b,c} Optional group-by-field names for head counts, e.g. a,b,c.
|
||||
-n {n} Head-count to print. Default 10.
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr tee [options] {filename}
|
||||
Options:
|
||||
|
|
@ -912,6 +965,7 @@ the input is CSV, the output is pretty-print tabular, but the tee-file output
|
|||
is written in JSON format.
|
||||
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr top [options]
|
||||
-f {a,b,c} Value-field names for top counts.
|
||||
|
|
@ -925,6 +979,7 @@ Usage: mlr top [options]
|
|||
-o {name} Field name for output indices. Default "top_idx".
|
||||
Prints the n records with smallest/largest values at specified fields,
|
||||
optionally by category.
|
||||
|
||||
================================================================
|
||||
Usage: mlr unflatten [options]
|
||||
Reverses flatten. Example: field with name 'a.b.c' and value 4
|
||||
|
|
@ -933,6 +988,7 @@ Options:
|
|||
-f {a,b,c} Comma-separated list of field names to unflatten (default all).
|
||||
-s {string} Separator, defaulting to mlr --oflatsep value.
|
||||
-h|--help Show this message.
|
||||
|
||||
================================================================
|
||||
Usage: mlr uniq [options]
|
||||
Prints distinct values for specified field names. With -c, same as
|
||||
|
|
@ -947,6 +1003,7 @@ Options:
|
|||
With -c, produces unique records, with repeat counts for each.
|
||||
With -n, produces only one record which is the unique-record count.
|
||||
With neither -c nor -n, produces unique records.
|
||||
|
||||
================================================================
|
||||
Usage: mlr unsparsify [options]
|
||||
Prints records with the union of field names over all input records.
|
||||
|
|
|
|||
|
|
@ -9,18 +9,17 @@ import (
|
|||
"fmt"
|
||||
"os"
|
||||
|
||||
"miller/src/auxents/help"
|
||||
"miller/src/auxents/regtest"
|
||||
"miller/src/auxents/repl"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
type tAuxMain func(args []string) int
|
||||
type tAuxUsage func(verbName string, o *os.File, exitCode int)
|
||||
|
||||
type tAuxLookupEntry struct {
|
||||
name string
|
||||
main tAuxMain
|
||||
usage tAuxUsage
|
||||
name string
|
||||
main tAuxMain
|
||||
}
|
||||
|
||||
// We get a Golang "initialization loop" if this is defined statically. So, we
|
||||
|
|
@ -29,13 +28,14 @@ var _AUX_LOOKUP_TABLE = []tAuxLookupEntry{}
|
|||
|
||||
func init() {
|
||||
_AUX_LOOKUP_TABLE = []tAuxLookupEntry{
|
||||
{"aux-list", auxListMain, auxListUsage},
|
||||
{"hex", hexMain, hexUsage},
|
||||
{"lecat", lecatMain, lecatUsage},
|
||||
{"regtest", regtest.RegTestMain, regtest.RegTestUsage},
|
||||
{"repl", repl.ReplMain, repl.ReplUsage},
|
||||
{"termcvt", termcvtMain, termcvtUsage},
|
||||
{"unhex", unhexMain, unhexUsage},
|
||||
{"aux-list", auxListMain},
|
||||
{"hex", hexMain},
|
||||
{"lecat", lecatMain},
|
||||
{"termcvt", termcvtMain},
|
||||
{"unhex", unhexMain},
|
||||
{"help", help.HelpMain},
|
||||
{"regtest", regtest.RegTestMain},
|
||||
{"repl", repl.ReplMain},
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
715
go/src/auxents/help/entry.go
Normal file
715
go/src/auxents/help/entry.go
Normal file
|
|
@ -0,0 +1,715 @@
|
|||
// ================================================================
|
||||
// TODO: comment
|
||||
// ================================================================
|
||||
|
||||
package help
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/dsl/cst"
|
||||
"miller/src/lib"
|
||||
"miller/src/transformers"
|
||||
)
|
||||
|
||||
// ================================================================
|
||||
type tZaryHandlerFunc func()
|
||||
type tUnaryHandlerFunc func(arg string)
|
||||
|
||||
type shorthandInfo struct {
|
||||
shorthand string
|
||||
longhand string
|
||||
}
|
||||
|
||||
type handlerInfo struct {
|
||||
name string
|
||||
zaryHandlerFunc tZaryHandlerFunc
|
||||
unaryHandlerFunc tUnaryHandlerFunc
|
||||
}
|
||||
|
||||
// We get a Golang "initialization loop" if this is defined statically. So, we
|
||||
// use a "package init" function.
|
||||
var shorthandLookupTable = []shorthandInfo{}
|
||||
var handlerLookupTable = []handlerInfo{}
|
||||
|
||||
func init() {
|
||||
// For things like 'mlr -F', invoked through the CLI parser which does not
|
||||
// go through our HelpMain().
|
||||
shorthandLookupTable = []shorthandInfo{
|
||||
// TODO: remove handler func & replace with just short/long
|
||||
{shorthand: "-l", longhand: "list-verbs"},
|
||||
{shorthand: "-L", longhand: "list-verbs-vertically"},
|
||||
{shorthand: "-f", longhand: "usage-functions"},
|
||||
{shorthand: "-F", longhand: "list-functions"},
|
||||
{shorthand: "-k", longhand: "usage-keywords"},
|
||||
{shorthand: "-K", longhand: "list-keywords"},
|
||||
}
|
||||
|
||||
// For things like 'mlr help foo', invoked through the auxent framework
|
||||
// which goes through our HelpMain().
|
||||
handlerLookupTable = []handlerInfo{
|
||||
{name: "topics", zaryHandlerFunc: listTopics},
|
||||
{name: "auxents", zaryHandlerFunc: helpAuxents},
|
||||
{name: "comments-in-data", zaryHandlerFunc: helpCommentsInData},
|
||||
{name: "compressed-data", zaryHandlerFunc: helpCompressedDataOptions},
|
||||
{name: "csv-options", zaryHandlerFunc: helpCSVOptions},
|
||||
{name: "data-format-options", zaryHandlerFunc: helpDataFormatOptions},
|
||||
{name: "data-formats", zaryHandlerFunc: helpDataFormats},
|
||||
{name: "double-quoting", zaryHandlerFunc: helpDoubleQuoting},
|
||||
{name: "format-conversion", zaryHandlerFunc: helpFormatConversionKeystrokeSaverOptions},
|
||||
{name: "function", unaryHandlerFunc: helpForFunction},
|
||||
{name: "keyword", unaryHandlerFunc: helpForKeyword},
|
||||
{name: "list-functions", zaryHandlerFunc: listFunctions},
|
||||
{name: "list-functions-vertically", zaryHandlerFunc: listFunctionsVertically},
|
||||
{name: "list-keywords", zaryHandlerFunc: listKeywords},
|
||||
{name: "list-verbs", zaryHandlerFunc: listVerbsAsParagraph},
|
||||
{name: "list-verbs-vertically", zaryHandlerFunc: listVerbsVertically},
|
||||
{name: "misc", zaryHandlerFunc: helpMiscOptions},
|
||||
{name: "mlrrc", zaryHandlerFunc: helpMlrrc},
|
||||
{name: "number-formatting", zaryHandlerFunc: helpNumberFormatting},
|
||||
{name: "output-colorizations", zaryHandlerFunc: helpOutputColorization},
|
||||
{name: "separator-options", zaryHandlerFunc: helpSeparatorOptions},
|
||||
{name: "usage-functions", zaryHandlerFunc: usageFunctions},
|
||||
{name: "usage-keywords", zaryHandlerFunc: usageKeywords},
|
||||
{name: "usage-verbs", zaryHandlerFunc: usageVerbs},
|
||||
{name: "verb", unaryHandlerFunc: helpForVerb},
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: keywords listed as paragraph
|
||||
// TODO: search for function/keyword/verb/etc like in the REPL
|
||||
|
||||
// TODO:
|
||||
// function-list as paragraph (for manpage)
|
||||
// type-arithmetic-info printTypeArithmeticInfo(os.Stdout, lib.MlrExeName());
|
||||
|
||||
// ================================================================
|
||||
// For things like 'mlr help foo', invoked through the auxent framework which
|
||||
// goes through our HelpMain(). Here, the args are the full Miller command
|
||||
// line: "mlr help foo bar".
|
||||
func HelpMain(args []string) int {
|
||||
args = args[2:]
|
||||
|
||||
// "mlr help" and nothing else
|
||||
if len(args) == 0 {
|
||||
handleDefault()
|
||||
return 0
|
||||
}
|
||||
|
||||
// "mlr help something" where we recognize the something
|
||||
name := args[0]
|
||||
for _, info := range handlerLookupTable {
|
||||
if info.name == name {
|
||||
if info.zaryHandlerFunc != nil {
|
||||
if len(args) != 1 {
|
||||
fmt.Printf("mlr help %s takes no additional argument.\n", name)
|
||||
return 0
|
||||
}
|
||||
info.zaryHandlerFunc()
|
||||
return 0
|
||||
}
|
||||
if info.unaryHandlerFunc != nil {
|
||||
if len(args) < 2 {
|
||||
fmt.Printf("mlr help %s takes at least one required argument.\n", name)
|
||||
return 0
|
||||
}
|
||||
for _, arg := range args[1:] {
|
||||
info.unaryHandlerFunc(arg)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: free-ranging keyword/function/verb/etc search as in mlr repl.
|
||||
|
||||
// "mlr help something" where we do not recognize the something
|
||||
listTopics()
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func MainUsage(o *os.File) {
|
||||
fmt.Fprintf(o,
|
||||
`Usage: mlr [I/O options] {verb} [verb-dependent options ...] {zero or more file names}
|
||||
Output of one verb may be chained as input to another using "then", e.g.
|
||||
mlr stats1 -a min,mean,max -f flag,u,v -g color then sort -f color
|
||||
Please see 'mlr help topics' for more information.
|
||||
`)
|
||||
fmt.Fprintf(o, "Please also see %s\n", lib.DOC_URL)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// For things like 'mlr -F', invoked through the CLI parser which does not
|
||||
// go through our HelpMain().
|
||||
func ParseTerminalUsage(arg string) bool {
|
||||
if arg == "-h" || arg == "--help" {
|
||||
handleDefault()
|
||||
return true
|
||||
}
|
||||
// "mlr -l" is shorthand for "mlr help list-verbs", etc.
|
||||
for _, sinfo := range shorthandLookupTable {
|
||||
if sinfo.shorthand == arg {
|
||||
for _, info := range handlerLookupTable {
|
||||
if info.name == sinfo.longhand {
|
||||
info.zaryHandlerFunc()
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ================================================================
|
||||
func handleDefault() {
|
||||
MainUsage(os.Stdout)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func listTopics() {
|
||||
fmt.Println("Type 'mlr help {topic}' for any of the following:")
|
||||
for _, info := range handlerLookupTable {
|
||||
fmt.Printf(" mlr help %s\n", info.name)
|
||||
}
|
||||
fmt.Println("Shorthands:")
|
||||
for _, info := range shorthandLookupTable {
|
||||
fmt.Printf(" mlr %s = mlr help %s\n", info.shorthand, info.longhand)
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func helpAuxents() {
|
||||
fmt.Print(`Miller has a few otherwise-standalone executables packaged within it.
|
||||
They do not participate in any other parts of Miller.
|
||||
Please "mlr aux-list" for more information.
|
||||
`)
|
||||
// imports miller/src/auxents: import cycle not allowed
|
||||
// auxents.ShowAuxEntries(o)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func helpCommentsInData() {
|
||||
fmt.Printf(
|
||||
`--skip-comments Ignore commented lines (prefixed by "%s")
|
||||
within the input.
|
||||
--skip-comments-with {string} Ignore commented lines within input, with
|
||||
specified prefix.
|
||||
--pass-comments Immediately print commented lines (prefixed by "%s")
|
||||
within the input.
|
||||
--pass-comments-with {string} Immediately print commented lines within input, with
|
||||
specified prefix.
|
||||
|
||||
Notes:
|
||||
* Comments are only honored at the start of a line.
|
||||
* In the absence of any of the above four options, comments are data like
|
||||
any other text.
|
||||
* When pass-comments is used, comment lines are written to standard output
|
||||
immediately upon being read; they are not part of the record stream. Results
|
||||
may be counterintuitive. A suggestion is to place comments at the start of
|
||||
data files.
|
||||
`,
|
||||
cliutil.DEFAULT_COMMENT_STRING,
|
||||
cliutil.DEFAULT_COMMENT_STRING)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func helpCompressedDataOptions() {
|
||||
fmt.Print(`Decompression done within the Miller process itself:
|
||||
--gzin Uncompress gzip within the Miller process. Done by default if file ends in ".gz".
|
||||
--bz2in Uncompress bz2ip within the Miller process. Done by default if file ends in ".bz2".
|
||||
--zin Uncompress zlib within the Miller process. Done by default if file ends in ".z".
|
||||
|
||||
Decompression done outside the Miller process:
|
||||
--prepipe {command} You can, of course, already do without this for single input files,
|
||||
e.g. "gunzip < myfile.csv.gz | mlr ..."
|
||||
--prepipex {command} Like --prepipe with one exception: doesn't insert '<' between
|
||||
command and filename at runtime. Useful for some commands like 'unzip -qc'
|
||||
which don't read standard input.
|
||||
|
||||
Using --prepipe and --prepipex you can specify an action to be taken on each
|
||||
input file. This prepipe command must be able to read from standard input; it
|
||||
will be invoked with {command} < {filename}.
|
||||
|
||||
Examples:
|
||||
mlr --prepipe gunzip
|
||||
mlr --prepipe zcat -cf
|
||||
mlr --prepipe xz -cd
|
||||
mlr --prepipe cat
|
||||
|
||||
Note that this feature is quite general and is not limited to decompression
|
||||
utilities. You can use it to apply per-file filters of your choice. For output
|
||||
compression (or other) utilities, simply pipe the output:
|
||||
mlr ... | {your compression command} > outputfilenamegoeshere
|
||||
|
||||
Lastly, note that if --prepipe or --prepipex is specified, it replaces any
|
||||
decisions that might have been made based on the file suffix. Also,
|
||||
--gzin/--bz2in/--zin are ignored if --prepipe is also specified.
|
||||
`)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func helpCSVOptions() {
|
||||
fmt.Print(
|
||||
` --implicit-csv-header Use 1,2,3,... as field labels, rather than from line 1
|
||||
of input files. Tip: combine with "label" to recreate
|
||||
missing headers.
|
||||
--no-implicit-csv-header Do not use --implicit-csv-header. This is the default
|
||||
anyway -- the main use is for the flags to 'mlr join' if you have
|
||||
main file(s) which are headerless but you want to join in on
|
||||
a file which does have a CSV header. Then you could use
|
||||
'mlr --csv --implicit-csv-header join --no-implicit-csv-header
|
||||
-l your-join-in-with-header.csv ... your-headerless.csv'
|
||||
--allow-ragged-csv-input|--ragged If a data line has fewer fields than the header line,
|
||||
fill remaining keys with empty string. If a data line has more
|
||||
fields than the header line, use integer field labels as in
|
||||
the implicit-header case.
|
||||
--headerless-csv-output Print only CSV data lines.
|
||||
-N Keystroke-saver for --implicit-csv-header --headerless-csv-output.
|
||||
`)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func helpDataFormats() {
|
||||
fmt.Printf(
|
||||
`CSV/CSV-lite: comma-separated values with separate header line
|
||||
TSV: same but with tabs in places of commas
|
||||
+---------------------+
|
||||
| apple,bat,cog |
|
||||
| 1,2,3 | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| 4,5,6 | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+---------------------+
|
||||
|
||||
JSON (sequence or array of objects):
|
||||
+---------------------+
|
||||
| { |
|
||||
| "apple": 1, | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| "bat": 2, |
|
||||
| "cog": 3 |
|
||||
| } |
|
||||
| { |
|
||||
| "dish": { | Record 2: "dish:egg" => "7", "dish:flint" => "8", "garlic" => ""
|
||||
| "egg": 7, |
|
||||
| "flint": 8 |
|
||||
| }, |
|
||||
| "garlic": "" |
|
||||
| } |
|
||||
+---------------------+
|
||||
|
||||
PPRINT: pretty-printed tabular
|
||||
+---------------------+
|
||||
| apple bat cog |
|
||||
| 1 2 3 | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| 4 5 6 | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+---------------------+
|
||||
|
||||
Markdown tabular (supported for output only):
|
||||
+-----------------------+
|
||||
| | apple | bat | cog | |
|
||||
| | --- | --- | --- | |
|
||||
| | 1 | 2 | 3 | | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| | 4 | 5 | 6 | | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+-----------------------+
|
||||
|
||||
XTAB: pretty-printed transposed tabular
|
||||
+---------------------+
|
||||
| apple 1 | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| bat 2 |
|
||||
| cog 3 |
|
||||
| |
|
||||
| dish 7 | Record 2: "dish" => "7", "egg" => "8"
|
||||
| egg 8 |
|
||||
+---------------------+
|
||||
|
||||
DKVP: delimited key-value pairs (Miller default format)
|
||||
+---------------------+
|
||||
| apple=1,bat=2,cog=3 | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| dish=7,egg=8,flint | Record 2: "dish" => "7", "egg" => "8", "3" => "flint"
|
||||
+---------------------+
|
||||
|
||||
NIDX: implicitly numerically indexed (Unix-toolkit style)
|
||||
+---------------------+
|
||||
| the quick brown | Record 1: "1" => "the", "2" => "quick", "3" => "brown"
|
||||
| fox jumped | Record 2: "1" => "fox", "2" => "jumped"
|
||||
+---------------------+
|
||||
`)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func helpDataFormatOptions() {
|
||||
fmt.Printf(
|
||||
`--idkvp --odkvp --dkvp Delimited key-value pairs, e.g "a=1,b=2"
|
||||
(Miller's default format).
|
||||
|
||||
--inidx --onidx --nidx Implicitly-integer-indexed fields (Unix-toolkit style).
|
||||
-T Synonymous with "--nidx --fs tab".
|
||||
|
||||
--icsv --ocsv --csv Comma-separated value (or tab-separated with --fs tab, etc.)
|
||||
|
||||
--itsv --otsv --tsv Keystroke-savers for "--icsv --ifs tab",
|
||||
"--ocsv --ofs tab", "--csv --fs tab".
|
||||
--iasv --oasv --asv Similar but using ASCII FS %s and RS %s\n",
|
||||
--iusv --ousv --usv Similar but using Unicode FS %s\n",
|
||||
and RS %s\n",
|
||||
|
||||
--icsvlite --ocsvlite --csvlite Comma-separated value (or tab-separated with --fs tab, etc.).
|
||||
The 'lite' CSV does not handle RFC-CSV double-quoting rules; is
|
||||
slightly faster and handles heterogeneity in the input stream via
|
||||
empty newline followed by new header line. See also
|
||||
%s/file-formats.html#csv-tsv-asv-usv-etc
|
||||
|
||||
--itsvlite --otsvlite --tsvlite Keystroke-savers for "--icsvlite --ifs tab",
|
||||
"--ocsvlite --ofs tab", "--csvlite --fs tab".
|
||||
-t Synonymous with --tsvlite.
|
||||
--iasvlite --oasvlite --asvlite Similar to --itsvlite et al. but using ASCII FS %s and RS %s\n",
|
||||
--iusvlite --ousvlite --usvlite Similar to --itsvlite et al. but using Unicode FS %s\n",
|
||||
and RS %s\n",
|
||||
|
||||
--ipprint --opprint --pprint Pretty-printed tabular (produces no
|
||||
output until all input is in).
|
||||
--right Right-justifies all fields for PPRINT output.
|
||||
--barred Prints a border around PPRINT output
|
||||
(only available for output).
|
||||
|
||||
--omd Markdown-tabular (only available for output).
|
||||
|
||||
--ixtab --oxtab --xtab Pretty-printed vertical-tabular.
|
||||
--xvright Right-justifies values for XTAB format.
|
||||
|
||||
--ijson --ojson --json JSON tabular: sequence or list of one-level
|
||||
maps: {...}{...} or [{...},{...}].
|
||||
--jvstack Put one key-value pair per line for JSON output.
|
||||
--no-jvstack Put objects/arrays all on one line for JSON output.
|
||||
--jsonx --ojsonx Keystroke-savers for --json --jvstack
|
||||
--jsonx --ojsonx and --ojson --jvstack, respectively.
|
||||
--jlistwrap Wrap JSON output in outermost [ ].
|
||||
--oflatsep {string} Separator for flattening multi-level JSON keys,
|
||||
e.g. '{"a":{"b":3}}' becomes a:b => 3 for
|
||||
non-JSON formats. Defaults to %s.\n",
|
||||
|
||||
-p is a keystroke-saver for --nidx --fs space --repifs
|
||||
|
||||
Examples: --csv for CSV-formatted input and output; --icsv --opprint for
|
||||
CSV-formatted input and pretty-printed output.
|
||||
|
||||
Please use --iformat1 --oformat2 rather than --format1 --oformat2.
|
||||
The latter sets up input and output flags for format1, not all of which
|
||||
are overridden in all cases by setting output format to format2.`,
|
||||
|
||||
cliutil.ASV_FS_FOR_HELP,
|
||||
cliutil.ASV_RS_FOR_HELP,
|
||||
cliutil.USV_FS_FOR_HELP,
|
||||
cliutil.USV_RS_FOR_HELP,
|
||||
lib.DOC_URL,
|
||||
cliutil.ASV_FS_FOR_HELP,
|
||||
cliutil.ASV_RS_FOR_HELP,
|
||||
cliutil.USV_FS_FOR_HELP,
|
||||
cliutil.USV_RS_FOR_HELP,
|
||||
cliutil.DEFAULT_JSON_FLATTEN_SEPARATOR,
|
||||
)
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// TBD FOR MILLER 6:
|
||||
|
||||
func helpDoubleQuoting() {
|
||||
fmt.Printf("THIS IS STILL WIP FOR MILLER 6\n")
|
||||
fmt.Printf(" --quote-all Wrap all fields in double quotes\n")
|
||||
fmt.Printf(" --quote-none Do not wrap any fields in double quotes, even if they have\n")
|
||||
fmt.Printf(" OFS or ORS in them\n")
|
||||
fmt.Printf(" --quote-minimal Wrap fields in double quotes only if they have OFS or ORS\n")
|
||||
fmt.Printf(" in them (default)\n")
|
||||
fmt.Printf(" --quote-numeric Wrap fields in double quotes only if they have numbers\n")
|
||||
fmt.Printf(" in them\n")
|
||||
fmt.Printf(" --quote-original Wrap fields in double quotes if and only if they were\n")
|
||||
fmt.Printf(" quoted on input. This isn't sticky for computed fields:\n")
|
||||
fmt.Printf(" e.g. if fields a and b were quoted on input and you do\n")
|
||||
fmt.Printf(" \"put '$c = $a . $b'\" then field c won't inherit a or b's\n")
|
||||
fmt.Printf(" was-quoted-on-input flag.\n")
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func helpFormatConversionKeystrokeSaverOptions() {
|
||||
fmt.Print(`As keystroke-savers for format-conversion you may use the following:
|
||||
--c2t --c2d --c2n --c2j --c2x --c2p --c2m
|
||||
--t2c --t2d --t2n --t2j --t2x --t2p --t2m
|
||||
--d2c --d2t --d2n --d2j --d2x --d2p --d2m
|
||||
--n2c --n2t --n2d --n2j --n2x --n2p --n2m
|
||||
--j2c --j2t --j2d --j2n --j2x --j2p --j2m
|
||||
--x2c --x2t --x2d --x2n --x2j --x2p --x2m
|
||||
--p2c --p2t --p2d --p2n --p2j --p2x --p2m
|
||||
The letters c t d n j x p m refer to formats CSV, TSV, DKVP, NIDX, JSON, XTAB,
|
||||
PPRINT, and markdown, respectively. Note that markdown format is available for
|
||||
output only.
|
||||
`)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func helpMiscOptions() {
|
||||
fmt.Printf(` --seed {n} with n of the form 12345678 or 0xcafefeed. For put/filter
|
||||
urand()/urandint()/urand32().
|
||||
--nr-progress-mod {m}, with m a positive integer: print filename and record
|
||||
count to os.Stderr every m input records.
|
||||
--from {filename} Use this to specify an input file before the verb(s),
|
||||
rather than after. May be used more than once. Example:
|
||||
"mlr --from a.dat --from b.dat cat" is the same as
|
||||
"mlr cat a.dat b.dat".
|
||||
--mfrom {filenames} -- Use this to specify one of more input files before the verb(s),
|
||||
rather than after. May be used more than once.
|
||||
The list of filename must end with "--". This is useful
|
||||
for example since "--from *.csv" doesn't do what you might
|
||||
hope but "--mfrom *.csv --" does.
|
||||
--load {filename} Load DSL script file for all put/filter operations on the command line.
|
||||
If the name following --load is a directory, load all "*.mlr" files
|
||||
in that directory. This is just like "put -f" and "filter -f"
|
||||
except it's up-front on the command line, so you can do something like
|
||||
alias mlr='mlr --load ~/myscripts' if you like.
|
||||
--mload {names} -- Like --load but works with more than one filename,
|
||||
e.g. '--mload *.mlr --'.
|
||||
-n Process no input files, nor standard input either. Useful
|
||||
for mlr put with begin/end statements only. (Same as --from
|
||||
/dev/null.) Also useful in "mlr -n put -v '...'" for
|
||||
analyzing abstract syntax trees (if that's your thing).
|
||||
-I Process files in-place. For each file name on the command
|
||||
line, output is written to a temp file in the same
|
||||
directory, which is then renamed over the original. Each
|
||||
file is processed in isolation: if the output format is
|
||||
CSV, CSV headers will be present in each output file
|
||||
statistics are only over each file's own records; and so on.
|
||||
`)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func helpMlrrc() {
|
||||
fmt.Print(
|
||||
`You can set up personal defaults via a $HOME/.mlrrc and/or ./.mlrrc.
|
||||
For example, if you usually process CSV, then you can put "--csv" in your .mlrrc file
|
||||
and that will be the default input/output format unless otherwise specified on the command line.
|
||||
|
||||
The .mlrrc file format is one "--flag" or "--option value" per line, with the leading "--" optional.
|
||||
Hash-style comments and blank lines are ignored.
|
||||
|
||||
Sample .mlrrc:
|
||||
# Input and output formats are CSV by default (unless otherwise specified
|
||||
# on the mlr command line):
|
||||
csv
|
||||
# These are no-ops for CSV, but when I do use JSON output, I want these
|
||||
# pretty-printing options to be used:
|
||||
jvstack
|
||||
jlistwrap
|
||||
|
||||
How to specify location of .mlrrc:
|
||||
* If $MLRRC is set:
|
||||
o If its value is "__none__" then no .mlrrc files are processed.
|
||||
o Otherwise, its value (as a filename) is loaded and processed. If there are syntax
|
||||
errors, they abort mlr with a usage message (as if you had mistyped something on the
|
||||
command line). If the file can't be loaded at all, though, it is silently skipped.
|
||||
o Any .mlrrc in your home directory or current directory is ignored whenever $MLRRC is
|
||||
set in the environment.
|
||||
* Otherwise:
|
||||
o If $HOME/.mlrrc exists, it's then processed as above.
|
||||
o If ./.mlrrc exists, it's then also processed as above.
|
||||
(I.e. current-directory .mlrrc defaults are stacked over home-directory .mlrrc defaults.)
|
||||
|
||||
See also:
|
||||
https://miller.readthedocs.io/en/latest/customization.html
|
||||
`)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func helpOutputColorization() {
|
||||
fmt.Print(`Things having colors:
|
||||
* Keys in CSV header lines, JSON keys, etc
|
||||
* Values in CSV data lines, JSON scalar values, etc
|
||||
in regression-test output
|
||||
* Some online-help strings
|
||||
|
||||
Rules for coloring:
|
||||
* By default, colorize output only if writing to stdout and stdout is a TTY.
|
||||
* Example: color: mlr --csv cat foo.csv
|
||||
* Example: no color: mlr --csv cat foo.csv > bar.csv
|
||||
* Example: no color: mlr --csv cat foo.csv | less
|
||||
* The default colors were chosen since they look OK with white or black terminal background,
|
||||
and are differentiable with common varieties of human color vision.
|
||||
|
||||
Mechanisms for coloring:
|
||||
* Miller uses ANSI escape sequences only. This does not work on Windows except on Cygwin.
|
||||
* Requires TERM environment variable to be set to non-empty string.
|
||||
* Doesn't try to check to see whether the terminal is capable of 256-color
|
||||
ANSI vs 16-color ANSI. Note that if colors are in the range 0..15
|
||||
then 16-color ANSI escapes are used, so this is in the user's control.
|
||||
|
||||
How you can control colorization:
|
||||
* Suppression/unsuppression:
|
||||
* Environment variable export MLR_NO_COLOR=true means don't color even if stdout+TTY.
|
||||
* Environment variable export MLR_ALWAYS_COLOR=true means do color even if not stdout+TTY.
|
||||
For example, you might want to use this when piping mlr output to less -r.
|
||||
* Command-line flags --no-color or -M, --always-color or -C.
|
||||
|
||||
* Color choices can be specified by using environment variables, or command-line flags,
|
||||
with values 0..255:
|
||||
* export MLR_KEY_COLOR=208, MLR_VALUE_COLOR-33, etc.:
|
||||
MLR_KEY_COLOR MLR_VALUE_COLOR MLR_PASS_COLOR MLR_FAIL_COLOR
|
||||
MLR_REPL_PS1_COLOR MLR_REPL_PS2_COLOR MLR_HELP_COLOR
|
||||
* Command-line flags --key-color 208, --value-color 33, etc.:
|
||||
--key-color --value-color --pass-color --fail-color
|
||||
--repl-ps1-color --repl-ps2-color --help-color
|
||||
* This is particularly useful if your terminal's background color clashes with current settings.
|
||||
|
||||
If environment-variable settings and command-line flags are both provided,the latter take precedence.
|
||||
|
||||
Please do mlr --list-colors to see the available color codes.
|
||||
`)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// TBD FOR MILLER 6:
|
||||
|
||||
func helpNumberFormatting() {
|
||||
fmt.Printf("THIS IS STILL WIP FOR MILLER 6\n")
|
||||
fmt.Printf(" --ofmt {format} E.g. %%.18f, %%.0f, %%9.6e. Please use sprintf-style codes for\n")
|
||||
fmt.Printf(" floating-point nummbers. If not specified, default formatting is used.\n")
|
||||
fmt.Printf(" See also the fmtnum function within mlr put (mlr --help-all-functions);\n")
|
||||
fmt.Printf(" see also the format-values function.\n")
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// TBD FOR MILLER 6:
|
||||
|
||||
func helpSeparatorOptions() {
|
||||
fmt.Println("THIS IS STILL TBD FOR MILLER 6")
|
||||
// fmt.Print(`Separator options:
|
||||
// --rs --irs --ors Record separators, e.g. 'lf' or '\\r\\n'
|
||||
// --fs --ifs --ofs --repifs Field separators, e.g. comma
|
||||
// --ps --ips --ops Pair separators, e.g. equals sign
|
||||
//
|
||||
// Notes about line endings:
|
||||
// * Default line endings (--irs and --ors) are "auto" which means autodetect from
|
||||
// the input file format, as long as the input file(s) have lines ending in either
|
||||
// LF (also known as linefeed, '\\n', 0x0a, Unix-style) or CRLF (also known as
|
||||
// carriage-return/linefeed pairs, '\\r\\n', 0x0d 0x0a, Windows style).
|
||||
// * If both irs and ors are auto (which is the default) then LF input will lead to LF
|
||||
// output and CRLF input will lead to CRLF output, regardless of the platform you're
|
||||
// running on.
|
||||
// * The line-ending autodetector triggers on the first line ending detected in the input
|
||||
// stream. E.g. if you specify a CRLF-terminated file on the command line followed by an
|
||||
// LF-terminated file then autodetected line endings will be CRLF.
|
||||
// * If you use --ors {something else} with (default or explicitly specified) --irs auto
|
||||
// then line endings are autodetected on input and set to what you specify on output.
|
||||
// * If you use --irs {something else} with (default or explicitly specified) --ors auto
|
||||
// then the output line endings used are LF on Unix/Linux/BSD/MacOSX, and CRLF on Windows.
|
||||
//
|
||||
// Notes about all other separators:
|
||||
// * IPS/OPS are only used for DKVP and XTAB formats, since only in these formats
|
||||
// do key-value pairs appear juxtaposed.
|
||||
// * IRS/ORS are ignored for XTAB format. Nominally IFS and OFS are newlines;
|
||||
// XTAB records are separated by two or more consecutive IFS/OFS -- i.e.
|
||||
// a blank line. Everything above about --irs/--ors/--rs auto becomes --ifs/--ofs/--fs
|
||||
// auto for XTAB format. (XTAB's default IFS/OFS are "auto".)
|
||||
// * OFS must be single-character for PPRINT format. This is because it is used
|
||||
// with repetition for alignment; multi-character separators would make
|
||||
// alignment impossible.
|
||||
// * OPS may be multi-character for XTAB format, in which case alignment is
|
||||
// disabled.
|
||||
// * TSV is simply CSV using tab as field separator ("--fs tab").
|
||||
// * FS/PS are ignored for markdown format; RS is used.
|
||||
// * All FS and PS options are ignored for JSON format, since they are not relevant
|
||||
// to the JSON format.
|
||||
// * You can specify separators in any of the following ways, shown by example:
|
||||
// - Type them out, quoting as necessary for shell escapes, e.g.
|
||||
// "--fs '|' --ips :"
|
||||
// - C-style escape sequences, e.g. "--rs '\\r\\n' --fs '\\t'".
|
||||
// - To avoid backslashing, you can use any of the following names:
|
||||
// ")
|
||||
//// lhmss_t* pmap = get_desc_to_chars_map()
|
||||
//// for (lhmsse_t* pe = pmap.phead; pe != nil; pe = pe.pnext) {
|
||||
// %s", pe.key)
|
||||
//// }
|
||||
//
|
||||
// * Default separators by format:
|
||||
// %-12s %-8s %-8s %s\n", "File format", "RS", "FS", "PS")
|
||||
//// lhmss_t* default_rses = get_default_rses()
|
||||
//// lhmss_t* default_fses = get_default_fses()
|
||||
//// lhmss_t* default_pses = get_default_pses()
|
||||
//// for (lhmsse_t* pe = default_rses.phead; pe != nil; pe = pe.pnext) {
|
||||
//// char* filefmt = pe.key
|
||||
//// char* rs = pe.value
|
||||
//// char* fs = lhmss_get(default_fses, filefmt)
|
||||
//// char* ps = lhmss_get(default_pses, filefmt)
|
||||
// %-12s %-8s %-8s %s\n", filefmt, rebackslash(rs), rebackslash(fs), rebackslash(ps))
|
||||
//// }
|
||||
}
|
||||
|
||||
// ================================================================
|
||||
// TODO: port the paragraphifier
|
||||
func listFunctions() {
|
||||
cst.BuiltinFunctionManagerInstance.ListBuiltinFunctionNames(os.Stdout)
|
||||
fmt.Printf("Please use \"%s --help-function {function name}\" for function-specific help.\n", lib.MlrExeName())
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func listFunctionsVertically() {
|
||||
cst.BuiltinFunctionManagerInstance.ListBuiltinFunctionNames(os.Stdout)
|
||||
fmt.Printf("Please use \"%s --help-function {function name}\" for function-specific help.\n", lib.MlrExeName())
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func usageFunctions() {
|
||||
cst.BuiltinFunctionManagerInstance.ListBuiltinFunctionUsages(os.Stdout)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func helpForFunction(arg string) {
|
||||
cst.BuiltinFunctionManagerInstance.TryListBuiltinFunctionUsage(arg, os.Stdout)
|
||||
}
|
||||
|
||||
// ================================================================
|
||||
func listKeywords() {
|
||||
cst.ListKeywords()
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func usageKeywords() {
|
||||
cst.UsageKeywords()
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func helpForKeyword(arg string) {
|
||||
cst.UsageForKeyword(arg)
|
||||
}
|
||||
|
||||
// ================================================================
|
||||
func listVerbsAsParagraph() {
|
||||
transformers.ListVerbNamesAsParagraph()
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func listVerbsVertically() {
|
||||
transformers.ListVerbNamesVertically()
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func listVerbNamesAsParagraph() {
|
||||
transformers.ListVerbNamesAsParagraph()
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func helpForVerb(arg string) {
|
||||
transformerSetup := transformers.LookUp(arg)
|
||||
if transformerSetup != nil {
|
||||
transformerSetup.UsageFunc(os.Stdout, true, 0)
|
||||
} else {
|
||||
fmt.Printf(
|
||||
"mlr: verb \"%s\" not found. Please use \"mlr help list-verbs\" for a list.\n",
|
||||
arg)
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func usageVerbs() {
|
||||
transformers.UsageVerbs()
|
||||
}
|
||||
|
|
@ -26,7 +26,7 @@ func lecatMain(args []string) int {
|
|||
args = args[2:]
|
||||
if len(args) >= 1 {
|
||||
if args[0] == "-h" || args[0] == "--help" {
|
||||
hexUsage(verb, os.Stdout, 0)
|
||||
lecatUsage(verb, os.Stdout, 0)
|
||||
}
|
||||
|
||||
if args[0][0] == '-' {
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ import (
|
|||
)
|
||||
|
||||
// ================================================================
|
||||
func RegTestUsage(verbName string, o *os.File, exitCode int) {
|
||||
func regTestUsage(verbName string, o *os.File, exitCode int) {
|
||||
exeName := path.Base(os.Args[0])
|
||||
fmt.Fprintf(o, "Usage: %s %s [options] [one or more directories/files]\n", exeName, verbName)
|
||||
fmt.Fprintf(o, "If no directories/files are specified, the directory %s is used by default.\n", DefaultPath)
|
||||
|
|
@ -54,11 +54,11 @@ func RegTestMain(args []string) int {
|
|||
argi++
|
||||
|
||||
if arg == "-h" || arg == "--help" {
|
||||
RegTestUsage(verbName, os.Stdout, 0)
|
||||
regTestUsage(verbName, os.Stdout, 0)
|
||||
|
||||
} else if arg == "-m" {
|
||||
if argi >= argc {
|
||||
RegTestUsage(verbName, os.Stderr, 1)
|
||||
regTestUsage(verbName, os.Stderr, 1)
|
||||
}
|
||||
exeName = args[argi]
|
||||
argi++
|
||||
|
|
@ -68,11 +68,11 @@ func RegTestMain(args []string) int {
|
|||
|
||||
} else if arg == "-s" {
|
||||
if argi >= argc {
|
||||
RegTestUsage(verbName, os.Stderr, 1)
|
||||
regTestUsage(verbName, os.Stderr, 1)
|
||||
}
|
||||
temp, err := strconv.Atoi(args[argi])
|
||||
if err != nil {
|
||||
RegTestUsage(verbName, os.Stderr, 1)
|
||||
regTestUsage(verbName, os.Stderr, 1)
|
||||
}
|
||||
firstNFailsToShow = temp
|
||||
argi++
|
||||
|
|
@ -84,7 +84,7 @@ func RegTestMain(args []string) int {
|
|||
verbosityLevel++
|
||||
|
||||
} else {
|
||||
RegTestUsage(verbName, os.Stderr, 1)
|
||||
regTestUsage(verbName, os.Stderr, 1)
|
||||
}
|
||||
}
|
||||
paths := args[argi:]
|
||||
|
|
|
|||
|
|
@ -27,12 +27,11 @@ import (
|
|||
"path"
|
||||
"strings"
|
||||
|
||||
"miller/src/cli"
|
||||
"miller/src/cliutil"
|
||||
)
|
||||
|
||||
// ================================================================
|
||||
func ReplUsage(verbName string, o *os.File, exitCode int) {
|
||||
func replUsage(verbName string, o *os.File, exitCode int) {
|
||||
exeName := path.Base(os.Args[0])
|
||||
fmt.Fprintf(o, "Usage: %s %s [options] {zero or more data-file names}\n", exeName, verbName)
|
||||
|
||||
|
|
@ -97,7 +96,7 @@ func ReplMain(args []string) int {
|
|||
}
|
||||
|
||||
if args[argi] == "-h" || args[argi] == "--help" {
|
||||
ReplUsage(replName, os.Stdout, 0)
|
||||
replUsage(replName, os.Stdout, 0)
|
||||
|
||||
} else if args[argi] == "-q" {
|
||||
showStartupBanner = false
|
||||
|
|
@ -120,14 +119,14 @@ func ReplMain(args []string) int {
|
|||
|
||||
} else if args[argi] == "--load" {
|
||||
if argc-argi < 2 {
|
||||
ReplUsage(replName, os.Stderr, 1)
|
||||
replUsage(replName, os.Stderr, 1)
|
||||
}
|
||||
options.DSLPreloadFileNames = append(options.DSLPreloadFileNames, args[argi+1])
|
||||
argi += 2
|
||||
|
||||
} else if args[argi] == "--mload" {
|
||||
if argc-argi < 2 {
|
||||
ReplUsage(replName, os.Stderr, 1)
|
||||
replUsage(replName, os.Stderr, 1)
|
||||
}
|
||||
argi += 1
|
||||
for argi < argc && args[argi] != "--" {
|
||||
|
|
@ -147,14 +146,14 @@ func ReplMain(args []string) int {
|
|||
} else if cliutil.ParseWriterOptions(args, argc, &argi, &options.WriterOptions) {
|
||||
|
||||
} else {
|
||||
ReplUsage(replName, os.Stderr, 1)
|
||||
replUsage(replName, os.Stderr, 1)
|
||||
}
|
||||
}
|
||||
|
||||
// --auto-flatten is on by default. But if input and output formats are both JSON,
|
||||
// then we don't need to actually do anything. See also mlrcli_parse.go.
|
||||
options.WriterOptions.AutoFlatten = cli.DecideFinalFlatten(&options)
|
||||
options.WriterOptions.AutoUnflatten = cli.DecideFinalUnflatten(&options)
|
||||
options.WriterOptions.AutoFlatten = cliutil.DecideFinalFlatten(&options)
|
||||
options.WriterOptions.AutoUnflatten = cliutil.DecideFinalUnflatten(&options)
|
||||
|
||||
repl, err := NewRepl(
|
||||
exeName,
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import (
|
|||
"golang.org/x/term"
|
||||
|
||||
"miller/src/colorizer"
|
||||
"miller/src/lib"
|
||||
"miller/src/version"
|
||||
)
|
||||
|
||||
|
|
@ -19,7 +20,6 @@ const ENV_PRIMARY_PROMPT = "MLR_REPL_PS1"
|
|||
const ENV_SECONDARY_PROMPT = "MLR_REPL_PS2"
|
||||
const DEFAULT_PRIMARY_PROMPT = "[mlr] "
|
||||
const DEFAULT_SECONDARY_PROMPT = "... "
|
||||
const MILLER_6_DOCS_URL = "http://johnkerl.org/miller-releases/miller-head/docs6/_build/html/index.html"
|
||||
|
||||
func getInputIsTerminal() bool {
|
||||
if runtime.GOOS == "windows" && os.Getenv("MSYSTEM") != "" {
|
||||
|
|
@ -51,7 +51,7 @@ func getPrompt2() string {
|
|||
func (repl *Repl) printStartupBanner() {
|
||||
if repl.inputIsTerminal {
|
||||
fmt.Printf("Miller %s REPL for %s:%s:%s\n", version.STRING, runtime.GOOS, runtime.GOARCH, runtime.Version())
|
||||
fmt.Printf("Pre-release docs for Miller 6: %s\n", MILLER_6_DOCS_URL)
|
||||
fmt.Printf("Pre-release docs for Miller 6: %s\n", lib.DOC_URL)
|
||||
fmt.Printf("Type ':help' for on-line help; ':quit' to quit.\n")
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -856,7 +856,8 @@ func handleHelpSingle(repl *Repl, arg string) {
|
|||
}
|
||||
|
||||
if arg == "function-names" {
|
||||
cst.BuiltinFunctionManagerInstance.ListBuiltinFunctionsRaw(os.Stdout)
|
||||
// TODO: move to src/auxents/help
|
||||
cst.BuiltinFunctionManagerInstance.ListBuiltinFunctionNames(os.Stdout)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ func unhexMain(args []string) int {
|
|||
|
||||
if len(args) >= 1 {
|
||||
if args[0] == "-h" || args[0] == "--help" {
|
||||
hexUsage(verb, os.Stdout, 0)
|
||||
unhexUsage(verb, os.Stdout, 0)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,4 +2,4 @@ Logic for parsing the Miller command line.
|
|||
|
||||
* `src/cli` is the flag-parsing logic for supporting Miller's command-line interface. When you type something like `mlr --icsv --ojson put '$sum = $a + $b' then filter '$sum > 1000' myfile.csv`, it's the CLI parser which makes it possible for Miller to construct a CSV record-reader, a transformer chain of `put` then `filter`, and a JSON record-writer.
|
||||
* `src/cliutil` contains datatypes for the CLI-parser, which was split out to avoid a Go package-import cycle.
|
||||
* I don't use the Go [`flag`](https://golang.org/pkg/flag/) package here, although I do use it within the transformers' subcommand flag-handling. The `flag` package is quite fine; Miller's command-line processing is multi-purpose between serving CLI needs per se as well as for manpage/docfile generation, and I found it simplest to roll my own command-line handling here.
|
||||
* I don't use the Go [`flag`](https://golang.org/pkg/flag/) package. The `flag` package is quite fine; Miller's command-line processing is multi-purpose between serving CLI needs per se as well as for manpage/docfile generation, and I found it simplest to roll my own command-line handling here. More importantly, some Miller verbs such as ``sort`` take flags more than once -- ``mlr sort -f field1 -n field2 -f field3`` -- which is not supported by the `flag` package.
|
||||
|
|
|
|||
135
go/src/cli/mlrcli_mlrrc.go
Normal file
135
go/src/cli/mlrcli_mlrrc.go
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
package cli
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// * If $MLRRC is set, use it and only it.
|
||||
// * Otherwise try first $HOME/.mlrrc and then ./.mlrrc but let them
|
||||
// stack: e.g. $HOME/.mlrrc is lots of settings and maybe in one
|
||||
// subdir you want to override just a setting or two.
|
||||
|
||||
// TODO: move to separate file?
|
||||
func loadMlrrcOrDie(
|
||||
options *cliutil.TOptions,
|
||||
) {
|
||||
env_mlrrc := os.Getenv("MLRRC")
|
||||
|
||||
if env_mlrrc != "" {
|
||||
if env_mlrrc == "__none__" {
|
||||
return
|
||||
}
|
||||
if tryLoadMlrrc(options, env_mlrrc) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
env_home := os.Getenv("HOME")
|
||||
if env_home != "" {
|
||||
path := env_home + "/.mlrrc"
|
||||
tryLoadMlrrc(options, path)
|
||||
}
|
||||
|
||||
tryLoadMlrrc(options, "./.mlrrc")
|
||||
}
|
||||
|
||||
func tryLoadMlrrc(
|
||||
options *cliutil.TOptions,
|
||||
path string,
|
||||
) bool {
|
||||
handle, err := os.Open(path)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer handle.Close()
|
||||
|
||||
lineReader := bufio.NewReader(handle)
|
||||
|
||||
eof := false
|
||||
lineno := 0
|
||||
for !eof {
|
||||
line, err := lineReader.ReadString('\n')
|
||||
if err == io.EOF {
|
||||
err = nil
|
||||
eof = true
|
||||
break
|
||||
}
|
||||
lineno++
|
||||
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, lib.MlrExeName(), err)
|
||||
os.Exit(1)
|
||||
return false
|
||||
}
|
||||
|
||||
// This is how to do a chomp:
|
||||
// TODO: handle \r\n with libified solution.
|
||||
line = strings.TrimRight(line, "\n")
|
||||
|
||||
if !handleMlrrcLine(options, line) {
|
||||
fmt.Fprintf(os.Stderr, "%s: parse error at file \"%s\" line %d: %s\n",
|
||||
lib.MlrExeName(), path, lineno, line,
|
||||
)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func handleMlrrcLine(
|
||||
options *cliutil.TOptions,
|
||||
line string,
|
||||
) bool {
|
||||
|
||||
// Comment-strip
|
||||
re := regexp.MustCompile("#.*")
|
||||
line = re.ReplaceAllString(line, "")
|
||||
|
||||
// Left-trim / right-trim
|
||||
line = strings.TrimSpace(line)
|
||||
|
||||
if line == "" { // line was whitespace-only
|
||||
return true
|
||||
}
|
||||
|
||||
// Prepend initial "--" if it's not already there
|
||||
if !strings.HasPrefix(line, "-") {
|
||||
line = "--" + line
|
||||
}
|
||||
|
||||
// Split line into args array
|
||||
args := strings.Fields(line)
|
||||
argi := 0
|
||||
argc := len(args)
|
||||
|
||||
if args[0] == "--prepipe" || args[0] == "--prepipex" {
|
||||
// Don't allow code execution via .mlrrc
|
||||
return false
|
||||
} else if args[0] == "--load" || args[0] == "--mload" {
|
||||
// Don't allow code execution via .mlrrc
|
||||
return false
|
||||
} else if cliutil.ParseReaderOptions(args, argc, &argi, &options.ReaderOptions) {
|
||||
// handled
|
||||
} else if cliutil.ParseWriterOptions(args, argc, &argi, &options.WriterOptions) {
|
||||
// handled
|
||||
} else if cliutil.ParseReaderWriterOptions(args, argc, &argi,
|
||||
&options.ReaderOptions, &options.WriterOptions) {
|
||||
// handled
|
||||
} else if cliutil.ParseMiscOptions(args, argc, &argi, options) {
|
||||
// handled
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
|
@ -1,18 +1,13 @@
|
|||
package cli
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"miller/src/auxents/help"
|
||||
"miller/src/cliutil"
|
||||
"miller/src/dsl/cst"
|
||||
"miller/src/lib"
|
||||
"miller/src/transformers"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
"miller/src/version"
|
||||
)
|
||||
|
|
@ -20,7 +15,7 @@ import (
|
|||
// ----------------------------------------------------------------
|
||||
func ParseCommandLine(args []string) (
|
||||
options cliutil.TOptions,
|
||||
recordTransformers []transforming.IRecordTransformer,
|
||||
recordTransformers []transformers.IRecordTransformer,
|
||||
err error,
|
||||
) {
|
||||
options = cliutil.DefaultOptions()
|
||||
|
|
@ -39,12 +34,20 @@ func ParseCommandLine(args []string) (
|
|||
for argi < argc /* variable increment: 1 or 2 depending on flag */ {
|
||||
if args[argi][0] != '-' {
|
||||
break // No more flag options to process
|
||||
|
||||
} else if args[argi] == "--cpuprofile" {
|
||||
// Already handled in main(); ignore here.
|
||||
cliutil.CheckArgCount(args, argi, argc, 1)
|
||||
argi += 2
|
||||
} else if parseTerminalUsage(args, argc, argi) {
|
||||
} else if args[argi] == "--version" {
|
||||
fmt.Printf("Miller %s\n", version.STRING)
|
||||
os.Exit(0)
|
||||
|
||||
} else if help.ParseTerminalUsage(args[argi]) {
|
||||
// Most help is in the 'mlr help' auxent but there are a few shorthands
|
||||
// like 'mlr -h' and 'mlr -F'.
|
||||
os.Exit(0)
|
||||
|
||||
} else if cliutil.ParseReaderOptions(args, argc, &argi, &options.ReaderOptions) {
|
||||
// handled
|
||||
} else if cliutil.ParseWriterOptions(args, argc, &argi, &options.WriterOptions) {
|
||||
|
|
@ -56,7 +59,9 @@ func ParseCommandLine(args []string) (
|
|||
// handled
|
||||
} else {
|
||||
// unhandled
|
||||
usageUnrecognizedVerb(lib.MlrExeName(), args[argi])
|
||||
fmt.Fprintf(os.Stderr, "%s: option \"%s\" not recognized.\n", lib.MlrExeName(), args[argi])
|
||||
fmt.Fprintf(os.Stderr, "Please run \"%s --help\" for usage information.\n", lib.MlrExeName())
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -79,7 +84,7 @@ func ParseCommandLine(args []string) (
|
|||
options.NoInput = true // e.g. then-chain begins with seqgen
|
||||
}
|
||||
|
||||
if DecideFinalFlatten(&options) {
|
||||
if cliutil.DecideFinalFlatten(&options) {
|
||||
// E.g. '{"req": {"method": "GET", "path": "/api/check"}}' becomes
|
||||
// req.method=GET,req.path=/api/check.
|
||||
transformer, err := transformers.NewTransformerFlatten(options.WriterOptions.OFLATSEP, nil)
|
||||
|
|
@ -88,7 +93,7 @@ func ParseCommandLine(args []string) (
|
|||
recordTransformers = append(recordTransformers, transformer)
|
||||
}
|
||||
|
||||
if DecideFinalUnflatten(&options) {
|
||||
if cliutil.DecideFinalUnflatten(&options) {
|
||||
// E.g. req.method=GET,req.path=/api/check becomes
|
||||
// '{"req": {"method": "GET", "path": "/api/check"}}'
|
||||
transformer, err := transformers.NewTransformerUnflatten(options.WriterOptions.OFLATSEP, nil)
|
||||
|
|
@ -120,84 +125,6 @@ func ParseCommandLine(args []string) (
|
|||
return options, recordTransformers, nil
|
||||
}
|
||||
|
||||
// ================================================================
|
||||
// Decide whether to insert a flatten or unflatten verb at the end of the
|
||||
// chain. See also repl/verbs.go which handles the same issue in the REPL.
|
||||
//
|
||||
// ----------------------------------------------------------------
|
||||
// PROBLEM TO BE SOLVED:
|
||||
//
|
||||
// JSON has nested structures and CSV et al. do not. For example:
|
||||
// {
|
||||
// "req" : {
|
||||
// "method": "GET",
|
||||
// "path": "api/check",
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// For CSV we flatten this down to
|
||||
//
|
||||
// {
|
||||
// "req.method": "GET",
|
||||
// "req.path": "api/check"
|
||||
// }
|
||||
//
|
||||
// ----------------------------------------------------------------
|
||||
// APPROACH:
|
||||
//
|
||||
// Use the Principle of Least Surprise (POLS).
|
||||
//
|
||||
// * If input is JSON and output is JSON:
|
||||
// o Records can be nested from record-read
|
||||
// o They remain that way through the Miller record-processing stream
|
||||
// o They are nested on record-write
|
||||
// o No action needs to be taken
|
||||
//
|
||||
// * If input is JSON and output is non-JSON:
|
||||
// o Records can be nested from record-read
|
||||
// o They remain that way through the Miller record-processing stream
|
||||
// o On record-write, nested structures will be converted to string (carriage
|
||||
// returns and all) using json_stringify. People *might* want this but
|
||||
// (using POLS) we will (by default) AUTO-FLATTEN for them. There is a
|
||||
// --no-auto-unflatten CLI flag for those who want it.
|
||||
//
|
||||
// * If input is non-JSON and output is non-JSON:
|
||||
// o If there is a "req.method" field, people should be able to do
|
||||
// 'mlr sort -f req.method' with no surprises. (Again, POLS.) Therefore
|
||||
// no auto-unflatten on input. People can insert an unflatten verb
|
||||
// into their verb chain if they really want unflatten for non-JSON
|
||||
// files.
|
||||
// o The DSL can make nested data, so AUTO-FLATTEN at output.
|
||||
//
|
||||
// * If input is non-JSON and output is JSON:
|
||||
// o Default is to auto-unflatten at output.
|
||||
// o There is a --no-auto-unflatten for those who want it.
|
||||
// ================================================================
|
||||
|
||||
func DecideFinalFlatten(options *cliutil.TOptions) bool {
|
||||
ofmt := options.WriterOptions.OutputFileFormat
|
||||
if options.WriterOptions.AutoFlatten {
|
||||
if ofmt != "json" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func DecideFinalUnflatten(options *cliutil.TOptions) bool {
|
||||
ifmt := options.ReaderOptions.InputFileFormat
|
||||
ofmt := options.WriterOptions.OutputFileFormat
|
||||
|
||||
if options.WriterOptions.AutoUnflatten {
|
||||
if ifmt != "json" {
|
||||
if ofmt == "json" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Returns a list of transformers, from the starting point in args given by *pargi.
|
||||
// Bumps *pargi to point to remaining post-transformer-setup args, i.e. filenames.
|
||||
|
|
@ -208,12 +135,12 @@ func parseTransformers(
|
|||
argc int,
|
||||
options *cliutil.TOptions,
|
||||
) (
|
||||
transformerList []transforming.IRecordTransformer,
|
||||
transformerList []transformers.IRecordTransformer,
|
||||
ignoresInput bool,
|
||||
err error,
|
||||
) {
|
||||
|
||||
transformerList = make([]transforming.IRecordTransformer, 0)
|
||||
transformerList = make([]transformers.IRecordTransformer, 0)
|
||||
ignoresInput = false
|
||||
|
||||
argi := *pargi
|
||||
|
|
@ -226,7 +153,7 @@ func parseTransformers(
|
|||
|
||||
if (argc - argi) < 1 {
|
||||
fmt.Fprintf(os.Stderr, "%s: no verb supplied.\n", lib.MlrExeName())
|
||||
mainUsageShort()
|
||||
help.MainUsage(os.Stderr)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
|
|
@ -236,7 +163,7 @@ func parseTransformers(
|
|||
cliutil.CheckArgCount(args, argi, argc, 1)
|
||||
verb := args[argi]
|
||||
|
||||
transformerSetup := lookUpTransformerSetup(verb)
|
||||
transformerSetup := transformers.LookUp(verb)
|
||||
if transformerSetup == nil {
|
||||
fmt.Fprintf(os.Stderr,
|
||||
"%s: verb \"%s\" not found. Please use \"%s --help\" for a list.\n",
|
||||
|
|
@ -284,247 +211,3 @@ func parseTransformers(
|
|||
*pargi = argi
|
||||
return transformerList, ignoresInput, nil
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func parseTerminalUsage(args []string, argc int, argi int) bool {
|
||||
if args[argi] == "--version" {
|
||||
fmt.Printf("Miller %s\n", version.STRING)
|
||||
return true
|
||||
} else if args[argi] == "-h" {
|
||||
mainUsageLong(os.Stdout, lib.MlrExeName())
|
||||
return true
|
||||
} else if args[argi] == "--help" {
|
||||
mainUsageLong(os.Stdout, lib.MlrExeName())
|
||||
return true
|
||||
} else if args[argi] == "--print-type-arithmetic-info" {
|
||||
fmt.Println("TODO: port printTypeArithmeticInfo")
|
||||
// printTypeArithmeticInfo(os.Stdout, lib.MlrExeName());
|
||||
return true
|
||||
|
||||
} else if args[argi] == "--help-all-verbs" || args[argi] == "--usage-all-verbs" {
|
||||
usageAllVerbs(lib.MlrExeName())
|
||||
} else if args[argi] == "--list-all-verbs" || args[argi] == "-l" {
|
||||
listAllVerbs(os.Stdout, " ")
|
||||
return true
|
||||
} else if args[argi] == "--list-all-verbs-raw" || args[argi] == "-L" {
|
||||
listAllVerbsRaw(os.Stdout)
|
||||
return true
|
||||
|
||||
} else if args[argi] == "--list-all-functions-raw" || args[argi] == "-F" {
|
||||
cst.BuiltinFunctionManagerInstance.ListBuiltinFunctionsRaw(os.Stdout)
|
||||
return true
|
||||
// } else if args[argi] == "--list-all-functions-as-table" {
|
||||
// fmgr_t* pfmgr = fmgr_alloc();
|
||||
// fmgr_list_all_functions_as_table(pfmgr, os.Stdout);
|
||||
// fmgr_free(pfmgr, nil);
|
||||
// return true;
|
||||
} else if args[argi] == "--help-all-functions" || args[argi] == "-f" {
|
||||
cst.BuiltinFunctionManagerInstance.ListBuiltinFunctionUsages(os.Stdout)
|
||||
return true
|
||||
} else if args[argi] == "--help-function" || args[argi] == "--hf" {
|
||||
cliutil.CheckArgCount(args, argi, argc, 2)
|
||||
cst.BuiltinFunctionManagerInstance.ListBuiltinFunctionUsage(args[argi+1], os.Stdout)
|
||||
argi++
|
||||
return true
|
||||
|
||||
} else if args[argi] == "--list-all-keywords-raw" || args[argi] == "-K" {
|
||||
fmt.Println("TODO: port mlr_dsl_list_all_keywords_raw")
|
||||
// mlr_dsl_list_all_keywords_raw(os.Stdout);
|
||||
return true
|
||||
} else if args[argi] == "--help-all-keywords" || args[argi] == "-k" {
|
||||
fmt.Println("TODO: port mlr_dsl_list_all_keywords")
|
||||
// mlr_dsl_keyword_usage(os.Stdout, nil);
|
||||
return true
|
||||
} else if args[argi] == "--help-keyword" || args[argi] == "--hk" {
|
||||
cliutil.CheckArgCount(args, argi, argc, 2)
|
||||
fmt.Println("TODO: port mlr_dsl_keyword_usage")
|
||||
// mlr_dsl_keyword_usage(os.Stdout, args[argi+1]);
|
||||
return true
|
||||
|
||||
// // main-usage subsections, individually accessible for the benefit of
|
||||
// // the manpage-autogenerator
|
||||
} else if args[argi] == "--usage-synopsis" {
|
||||
mainUsageSynopsis(os.Stdout, lib.MlrExeName())
|
||||
return true
|
||||
} else if args[argi] == "--usage-examples" {
|
||||
mainUsageExamples(os.Stdout, lib.MlrExeName(), "")
|
||||
return true
|
||||
} else if args[argi] == "--usage-list-all-verbs" {
|
||||
listAllVerbs(os.Stdout, "")
|
||||
return true
|
||||
} else if args[argi] == "--usage-help-options" {
|
||||
mainUsageHelpOptions(os.Stdout, lib.MlrExeName())
|
||||
return true
|
||||
} else if args[argi] == "--usage-mlrrc" {
|
||||
mainUsageMlrrc(os.Stdout, lib.MlrExeName())
|
||||
return true
|
||||
} else if args[argi] == "--usage-functions" {
|
||||
mainUsageFunctions(os.Stdout)
|
||||
return true
|
||||
} else if args[argi] == "--usage-data-format-examples" {
|
||||
mainUsageDataFormatExamples(os.Stdout, lib.MlrExeName())
|
||||
return true
|
||||
} else if args[argi] == "--usage-data-format-options" {
|
||||
mainUsageDataFormatOptions(os.Stdout, lib.MlrExeName())
|
||||
return true
|
||||
} else if args[argi] == "--usage-comments-in-data" {
|
||||
mainUsageCommentsInData(os.Stdout, lib.MlrExeName())
|
||||
return true
|
||||
} else if args[argi] == "--usage-format-conversion-keystroke-saver-options" {
|
||||
mainUsageFormatConversionKeystrokeSaverOptions(os.Stdout, lib.MlrExeName())
|
||||
return true
|
||||
} else if args[argi] == "--usage-compressed-data-options" {
|
||||
mainUsageCompressedDataOptions(os.Stdout, lib.MlrExeName())
|
||||
return true
|
||||
// } else if args[argi] == "--usage-separator-options" {
|
||||
// mainUsageSeparatorOptions(os.Stdout, lib.MlrExeName());
|
||||
// return true;
|
||||
} else if args[argi] == "--usage-csv-options" {
|
||||
mainUsageCsvOptions(os.Stdout, lib.MlrExeName())
|
||||
return true
|
||||
// } else if args[argi] == "--usage-double-quoting" {
|
||||
// mainUsageDoubleQuoting(os.Stdout, lib.MlrExeName());
|
||||
// return true;
|
||||
// } else if args[argi] == "--usage-numerical-formatting" {
|
||||
// mainUsageNumericalFormatting(os.Stdout, lib.MlrExeName());
|
||||
// return true;
|
||||
} else if args[argi] == "--usage-output-colorization" {
|
||||
mainUsageOutputColorization(os.Stdout, lib.MlrExeName())
|
||||
return true
|
||||
} else if args[argi] == "--usage-other-options" {
|
||||
mainUsageOtherOptions(os.Stdout, lib.MlrExeName())
|
||||
return true
|
||||
} else if args[argi] == "--usage-then-chaining" {
|
||||
mainUsageThenChaining(os.Stdout, lib.MlrExeName())
|
||||
return true
|
||||
} else if args[argi] == "--usage-auxents" {
|
||||
mainUsageAuxents(os.Stdout)
|
||||
return true
|
||||
} else if args[argi] == "--usage-see-also" {
|
||||
mainUsageSeeAlso(os.Stdout, lib.MlrExeName())
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// * If $MLRRC is set, use it and only it.
|
||||
// * Otherwise try first $HOME/.mlrrc and then ./.mlrrc but let them
|
||||
// stack: e.g. $HOME/.mlrrc is lots of settings and maybe in one
|
||||
// subdir you want to override just a setting or two.
|
||||
|
||||
// TODO: move to separate file?
|
||||
func loadMlrrcOrDie(
|
||||
options *cliutil.TOptions,
|
||||
) {
|
||||
env_mlrrc := os.Getenv("MLRRC")
|
||||
|
||||
if env_mlrrc != "" {
|
||||
if env_mlrrc == "__none__" {
|
||||
return
|
||||
}
|
||||
if tryLoadMlrrc(options, env_mlrrc) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
env_home := os.Getenv("HOME")
|
||||
if env_home != "" {
|
||||
path := env_home + "/.mlrrc"
|
||||
tryLoadMlrrc(options, path)
|
||||
}
|
||||
|
||||
tryLoadMlrrc(options, "./.mlrrc")
|
||||
}
|
||||
|
||||
func tryLoadMlrrc(
|
||||
options *cliutil.TOptions,
|
||||
path string,
|
||||
) bool {
|
||||
handle, err := os.Open(path)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer handle.Close()
|
||||
|
||||
lineReader := bufio.NewReader(handle)
|
||||
|
||||
eof := false
|
||||
lineno := 0
|
||||
for !eof {
|
||||
line, err := lineReader.ReadString('\n')
|
||||
if err == io.EOF {
|
||||
err = nil
|
||||
eof = true
|
||||
break
|
||||
}
|
||||
lineno++
|
||||
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, lib.MlrExeName(), err)
|
||||
os.Exit(1)
|
||||
return false
|
||||
}
|
||||
|
||||
// This is how to do a chomp:
|
||||
// TODO: handle \r\n with libified solution.
|
||||
line = strings.TrimRight(line, "\n")
|
||||
|
||||
if !handleMlrrcLine(options, line) {
|
||||
fmt.Fprintf(os.Stderr, "%s: parse error at file \"%s\" line %d: %s\n",
|
||||
lib.MlrExeName(), path, lineno, line,
|
||||
)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func handleMlrrcLine(
|
||||
options *cliutil.TOptions,
|
||||
line string,
|
||||
) bool {
|
||||
|
||||
// Comment-strip
|
||||
re := regexp.MustCompile("#.*")
|
||||
line = re.ReplaceAllString(line, "")
|
||||
|
||||
// Left-trim / right-trim
|
||||
line = strings.TrimSpace(line)
|
||||
|
||||
if line == "" { // line was whitespace-only
|
||||
return true
|
||||
}
|
||||
|
||||
// Prepend initial "--" if it's not already there
|
||||
if !strings.HasPrefix(line, "-") {
|
||||
line = "--" + line
|
||||
}
|
||||
|
||||
// Split line into args array
|
||||
args := strings.Fields(line)
|
||||
argi := 0
|
||||
argc := len(args)
|
||||
|
||||
if args[0] == "--prepipe" || args[0] == "--prepipex" {
|
||||
// Don't allow code execution via .mlrrc
|
||||
return false
|
||||
} else if args[0] == "--load" || args[0] == "--mload" {
|
||||
// Don't allow code execution via .mlrrc
|
||||
return false
|
||||
} else if cliutil.ParseReaderOptions(args, argc, &argi, &options.ReaderOptions) {
|
||||
// handled
|
||||
} else if cliutil.ParseWriterOptions(args, argc, &argi, &options.WriterOptions) {
|
||||
// handled
|
||||
} else if cliutil.ParseReaderWriterOptions(args, argc, &argi,
|
||||
&options.ReaderOptions, &options.WriterOptions) {
|
||||
// handled
|
||||
} else if cliutil.ParseMiscOptions(args, argc, &argi, options) {
|
||||
// handled
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,127 +0,0 @@
|
|||
package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"miller/src/lib"
|
||||
"miller/src/transformers"
|
||||
"miller/src/transforming"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
var MAPPER_LOOKUP_TABLE = []transforming.TransformerSetup{
|
||||
transformers.AltkvSetup,
|
||||
transformers.BarSetup,
|
||||
transformers.BootstrapSetup,
|
||||
transformers.CatSetup,
|
||||
transformers.CheckSetup,
|
||||
transformers.CleanWhitespaceSetup,
|
||||
transformers.CountDistinctSetup,
|
||||
transformers.CountSetup,
|
||||
transformers.CountSimilarSetup,
|
||||
transformers.CutSetup,
|
||||
transformers.DecimateSetup,
|
||||
transformers.FillDownSetup,
|
||||
transformers.FillEmptySetup,
|
||||
transformers.FilterSetup,
|
||||
transformers.FlattenSetup,
|
||||
transformers.FormatValuesSetup,
|
||||
transformers.FractionSetup,
|
||||
transformers.GapSetup,
|
||||
transformers.GrepSetup,
|
||||
transformers.GroupBySetup,
|
||||
transformers.GroupLikeSetup,
|
||||
transformers.HavingFieldsSetup,
|
||||
transformers.HeadSetup,
|
||||
transformers.HistogramSetup,
|
||||
transformers.JSONParseSetup,
|
||||
transformers.JSONStringifySetup,
|
||||
transformers.JoinSetup,
|
||||
transformers.LabelSetup,
|
||||
transformers.LeastFrequentSetup,
|
||||
transformers.MergeFieldsSetup,
|
||||
transformers.MostFrequentSetup,
|
||||
transformers.NestSetup,
|
||||
transformers.NothingSetup,
|
||||
transformers.PutSetup,
|
||||
transformers.RegularizeSetup,
|
||||
transformers.RemoveEmptyColumnsSetup,
|
||||
transformers.RenameSetup,
|
||||
transformers.ReorderSetup,
|
||||
transformers.RepeatSetup,
|
||||
transformers.ReshapeSetup,
|
||||
transformers.SampleSetup,
|
||||
transformers.Sec2GMTDateSetup,
|
||||
transformers.Sec2GMTSetup,
|
||||
transformers.SeqgenSetup,
|
||||
transformers.ShuffleSetup,
|
||||
transformers.SkipTrivialRecordsSetup,
|
||||
transformers.SortSetup,
|
||||
transformers.SortWithinRecordsSetup,
|
||||
transformers.Stats1Setup,
|
||||
transformers.Stats2Setup,
|
||||
transformers.StepSetup,
|
||||
transformers.TacSetup,
|
||||
transformers.TailSetup,
|
||||
transformers.TeeSetup,
|
||||
transformers.TopSetup,
|
||||
transformers.UnflattenSetup,
|
||||
transformers.UniqSetup,
|
||||
transformers.UnsparsifySetup,
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func lookUpTransformerSetup(verb string) *transforming.TransformerSetup {
|
||||
for _, transformerSetup := range MAPPER_LOOKUP_TABLE {
|
||||
if transformerSetup.Verb == verb {
|
||||
return &transformerSetup
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func listAllVerbsRaw(o *os.File) {
|
||||
for _, transformerSetup := range MAPPER_LOOKUP_TABLE {
|
||||
fmt.Fprintf(o, "%s\n", transformerSetup.Verb)
|
||||
}
|
||||
}
|
||||
|
||||
func listAllVerbs(o *os.File, leader string) {
|
||||
separator := " "
|
||||
|
||||
leaderlen := len(leader)
|
||||
separatorlen := len(separator)
|
||||
linelen := leaderlen
|
||||
j := 0
|
||||
|
||||
for _, transformerSetup := range MAPPER_LOOKUP_TABLE {
|
||||
verb := transformerSetup.Verb
|
||||
verblen := len(verb)
|
||||
linelen += separatorlen + verblen
|
||||
if linelen >= 80 {
|
||||
fmt.Fprintf(o, "\n")
|
||||
linelen = leaderlen + separatorlen + verblen
|
||||
j = 0
|
||||
}
|
||||
if j == 0 {
|
||||
fmt.Fprintf(o, "%s", leader)
|
||||
}
|
||||
fmt.Fprintf(o, "%s%s", separator, verb)
|
||||
j++
|
||||
}
|
||||
|
||||
fmt.Fprintf(o, "\n")
|
||||
}
|
||||
|
||||
func usageAllVerbs(argv0 string) {
|
||||
separator := "================================================================"
|
||||
|
||||
for _, transformerSetup := range MAPPER_LOOKUP_TABLE {
|
||||
fmt.Printf("%s\n", separator)
|
||||
lib.InternalCodingErrorIf(transformerSetup.UsageFunc == nil)
|
||||
transformerSetup.UsageFunc(os.Stdout, false, 0)
|
||||
}
|
||||
fmt.Printf("%s\n", separator)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
|
@ -1,608 +0,0 @@
|
|||
package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
//"miller/src/auxents"
|
||||
"miller/src/cliutil"
|
||||
"miller/src/dsl/cst"
|
||||
"miller/src/lib"
|
||||
"miller/src/version"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func mainUsageShort() {
|
||||
fmt.Fprintf(os.Stderr, "Please run \"%s --help\" for detailed usage information.\n", lib.MlrExeName())
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// The mainUsageLong() function is split out into subroutines in support of the
|
||||
// manpage autogenerator.
|
||||
|
||||
func mainUsageLong(o *os.File, argv0 string) {
|
||||
mainUsageSynopsis(o, argv0)
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
fmt.Fprintf(o, "COMMAND-LINE-SYNTAX EXAMPLES:\n")
|
||||
mainUsageExamples(o, argv0, " ")
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
fmt.Fprintf(o, "DATA-FORMAT EXAMPLES:\n")
|
||||
mainUsageDataFormatExamples(o, argv0)
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
fmt.Fprintf(o, "HELP OPTIONS:\n")
|
||||
mainUsageHelpOptions(o, argv0)
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
fmt.Fprintf(o, "CUSTOMIZATION VIA .MLRRC:\n")
|
||||
mainUsageMlrrc(o, argv0)
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
fmt.Fprintf(o, "VERBS:\n")
|
||||
listAllVerbs(o, " ")
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
fmt.Fprintf(o, "FUNCTIONS FOR THE FILTER AND PUT VERBS:\n")
|
||||
mainUsageFunctions(o)
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
fmt.Fprintf(o, "DATA-FORMAT OPTIONS, FOR INPUT, OUTPUT, OR BOTH:\n")
|
||||
mainUsageDataFormatOptions(o, argv0)
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
fmt.Fprintf(o, "COMMENTS IN DATA:\n")
|
||||
mainUsageCommentsInData(o, argv0)
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
fmt.Fprintf(o, "FORMAT-CONVERSION KEYSTROKE-SAVER OPTIONS:\n")
|
||||
mainUsageFormatConversionKeystrokeSaverOptions(o, argv0)
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
fmt.Fprintf(o, "COMPRESSED-DATA OPTIONS:\n")
|
||||
mainUsageCompressedDataOptions(o, argv0)
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
// fmt.Fprintf(o, "SEPARATOR OPTIONS:\n");
|
||||
// mainUsageSeparatorOptions(o, argv0);
|
||||
// fmt.Fprintf(o, "\n");
|
||||
|
||||
fmt.Fprintf(o, "RELEVANT TO CSV/CSV-LITE INPUT ONLY:\n")
|
||||
mainUsageCsvOptions(o, argv0)
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
// fmt.Fprintf(o, "DOUBLE-QUOTING FOR CSV OUTPUT:\n");
|
||||
// mainUsageDoubleQuoting(o, argv0);
|
||||
// fmt.Fprintf(o, "\n");
|
||||
|
||||
fmt.Fprintf(o, "NUMERICAL FORMATTING:\n")
|
||||
mainUsageNumericalFormatting(o, argv0)
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
fmt.Fprintf(o, "OUTPUT COLORIZATION:\n")
|
||||
mainUsageOutputColorization(o, argv0)
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
fmt.Fprintf(o, "OTHER OPTIONS:\n")
|
||||
mainUsageOtherOptions(o, argv0)
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
fmt.Fprintf(o, "THEN-CHAINING:\n")
|
||||
mainUsageThenChaining(o, argv0)
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
fmt.Fprintf(o, "AUXILIARY COMMANDS:\n")
|
||||
mainUsageAuxents(o)
|
||||
fmt.Fprintf(o, "\n")
|
||||
|
||||
fmt.Fprintf(o, "SEE ALSO:\n")
|
||||
mainUsageSeeAlso(o, argv0)
|
||||
}
|
||||
|
||||
func mainUsageSynopsis(o *os.File, argv0 string) {
|
||||
fmt.Fprintf(o, "Usage: %s [I/O options] {verb} [verb-dependent options ...] {zero or more file names}\n", argv0)
|
||||
}
|
||||
|
||||
func mainUsageExamples(o *os.File, argv0 string, leader string) {
|
||||
fmt.Fprintf(o, "%s%s --csv cut -f hostname,uptime mydata.csv\n", leader, argv0)
|
||||
fmt.Fprintf(o, "%s%s --tsv --rs lf filter '$status != \"down\" && $upsec >= 10000' *.tsv\n", leader, argv0)
|
||||
fmt.Fprintf(o, "%s%s --nidx put '$sum = $7 < 0.0 ? 3.5 : $7 + 2.1*$8' *.dat\n", leader, argv0)
|
||||
fmt.Fprintf(o, "%sgrep -v '^#' /etc/group | %s --ifs : --nidx --opprint label group,pass,gid,member then sort -f group\n", leader, argv0)
|
||||
fmt.Fprintf(o, "%s%s join -j account_id -f accounts.dat then group-by account_name balances.dat\n", leader, argv0)
|
||||
fmt.Fprintf(o, "%s%s --json put '$attr = sub($attr, \"([0-9]+)_([0-9]+)_.*\", \"\\1:\\2\")' data/*.json\n", leader, argv0)
|
||||
fmt.Fprintf(o, "%s%s stats1 -a min,mean,max,p10,p50,p90 -f flag,u,v data/*\n", leader, argv0)
|
||||
fmt.Fprintf(o, "%s%s stats2 -a linreg-pca -f u,v -g shape data/*\n", leader, argv0)
|
||||
fmt.Fprintf(o, "%s%s put -q '@sum[$a][$b] += $x; end {emit @sum, \"a\", \"b\"}' data/*\n", leader, argv0)
|
||||
fmt.Fprintf(o, "%s%s --from estimates.tbl put '\n", leader, argv0)
|
||||
fmt.Fprintf(o, " for (k,v in $*) {\n")
|
||||
fmt.Fprintf(o, " if (is_numeric(v) && k =~ \"^[t-z].*$\") {\n")
|
||||
fmt.Fprintf(o, " $sum += v; $count += 1\n")
|
||||
fmt.Fprintf(o, " }\n")
|
||||
fmt.Fprintf(o, " }\n")
|
||||
fmt.Fprintf(o, " $mean = $sum / $count # no assignment if count unset'\n")
|
||||
fmt.Fprintf(o, "%s%s --from infile.dat put -f analyze.mlr\n", leader, argv0)
|
||||
fmt.Fprintf(o, "%s%s --from infile.dat put 'tee > \"./taps/data-\".$a.\"-\".$b, $*'\n", leader, argv0)
|
||||
fmt.Fprintf(o, "%s%s --from infile.dat put 'tee | \"gzip > ./taps/data-\".$a.\"-\".$b.\".gz\", $*'\n", leader, argv0)
|
||||
fmt.Fprintf(o, "%s%s --from infile.dat put -q '@v=$*; dump | \"jq .[]\"'\n", leader, argv0)
|
||||
fmt.Fprintf(o, "%s%s --from infile.dat put '(NR %% 1000 == 0) { print > os.Stderr, \"Checkpoint \".NR}'\n",
|
||||
leader, argv0)
|
||||
}
|
||||
|
||||
func mainUsageHelpOptions(o *os.File, argv0 string) {
|
||||
fmt.Fprintf(o, " -h or --help Show this message.\n")
|
||||
fmt.Fprintf(o, " --version Show the software version.\n")
|
||||
fmt.Fprintf(o, " {verb name} --help Show verb-specific help.\n")
|
||||
fmt.Fprintf(o, " --help-all-verbs Show help on all verbs.\n")
|
||||
fmt.Fprintf(o, " -l or --list-all-verbs List only verb names.\n")
|
||||
fmt.Fprintf(o, " -L List only verb names, one per line.\n")
|
||||
fmt.Fprintf(o, " -f or --help-all-functions Show help on all built-in functions.\n")
|
||||
fmt.Fprintf(o, " -F Show a bare listing of built-in functions by name.\n")
|
||||
fmt.Fprintf(o, " -k or --help-all-keywords Show help on all keywords.\n")
|
||||
fmt.Fprintf(o, " -K Show a bare listing of keywords by name.\n")
|
||||
}
|
||||
|
||||
func mainUsageMlrrc(o *os.File, argv0 string) {
|
||||
fmt.Fprintf(o, "You can set up personal defaults via a $HOME/.mlrrc and/or ./.mlrrc.\n")
|
||||
fmt.Fprintf(o, "For example, if you usually process CSV, then you can put \"--csv\" in your .mlrrc file\n")
|
||||
fmt.Fprintf(o, "and that will be the default input/output format unless otherwise specified on the command line.\n")
|
||||
fmt.Fprintf(o, "\n")
|
||||
fmt.Fprintf(o, "The .mlrrc file format is one \"--flag\" or \"--option value\" per line, with the leading \"--\" optional.\n")
|
||||
fmt.Fprintf(o, "Hash-style comments and blank lines are ignored.\n")
|
||||
fmt.Fprintf(o, "\n")
|
||||
fmt.Fprintf(o, "Sample .mlrrc:\n")
|
||||
fmt.Fprintf(o, "# Input and output formats are CSV by default (unless otherwise specified\n")
|
||||
fmt.Fprintf(o, "# on the mlr command line):\n")
|
||||
fmt.Fprintf(o, "csv\n")
|
||||
fmt.Fprintf(o, "# These are no-ops for CSV, but when I do use JSON output, I want these\n")
|
||||
fmt.Fprintf(o, "# pretty-printing options to be used:\n")
|
||||
fmt.Fprintf(o, "jvstack\n")
|
||||
fmt.Fprintf(o, "jlistwrap\n")
|
||||
fmt.Fprintf(o, "\n")
|
||||
fmt.Fprintf(o, "How to specify location of .mlrrc:\n")
|
||||
fmt.Fprintf(o, "* If $MLRRC is set:\n")
|
||||
fmt.Fprintf(o, " o If its value is \"__none__\" then no .mlrrc files are processed.\n")
|
||||
fmt.Fprintf(o, " o Otherwise, its value (as a filename) is loaded and processed. If there are syntax\n")
|
||||
fmt.Fprintf(o, " errors, they abort mlr with a usage message (as if you had mistyped something on the\n")
|
||||
fmt.Fprintf(o, " command line). If the file can't be loaded at all, though, it is silently skipped.\n")
|
||||
fmt.Fprintf(o, " o Any .mlrrc in your home directory or current directory is ignored whenever $MLRRC is\n")
|
||||
fmt.Fprintf(o, " set in the environment.\n")
|
||||
fmt.Fprintf(o, "* Otherwise:\n")
|
||||
fmt.Fprintf(o, " o If $HOME/.mlrrc exists, it's then processed as above.\n")
|
||||
fmt.Fprintf(o, " o If ./.mlrrc exists, it's then also processed as above.\n")
|
||||
fmt.Fprintf(o, " (I.e. current-directory .mlrrc defaults are stacked over home-directory .mlrrc defaults.)\n")
|
||||
fmt.Fprintf(o, "\n")
|
||||
fmt.Fprintf(o, "See also:\n")
|
||||
fmt.Fprintf(o, "https://miller.readthedocs.io/en/latest/customization.html\n")
|
||||
}
|
||||
|
||||
func mainUsageFunctions(o *os.File) {
|
||||
cst.BuiltinFunctionManagerInstance.ListBuiltinFunctionsRaw(os.Stdout)
|
||||
fmt.Fprintf(o, "Please use \"%s --help-function {function name}\" for function-specific help.\n", lib.MlrExeName())
|
||||
}
|
||||
|
||||
// TODO: rid of argv0 throughout, replacing with lib.MlrExeName()
|
||||
func mainUsageDataFormatExamples(o *os.File, argv0 string) {
|
||||
fmt.Fprintf(o,
|
||||
` CSV/CSV-lite: comma-separated values with separate header line
|
||||
TSV: same but with tabs in places of commas
|
||||
+---------------------+
|
||||
| apple,bat,cog |
|
||||
| 1,2,3 | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| 4,5,6 | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+---------------------+
|
||||
|
||||
JSON (sequence or array of objects):
|
||||
+---------------------+
|
||||
| { |
|
||||
| "apple": 1, | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| "bat": 2, |
|
||||
| "cog": 3 |
|
||||
| } |
|
||||
| { |
|
||||
| "dish": { | Record 2: "dish:egg" => "7", "dish:flint" => "8", "garlic" => ""
|
||||
| "egg": 7, |
|
||||
| "flint": 8 |
|
||||
| }, |
|
||||
| "garlic": "" |
|
||||
| } |
|
||||
+---------------------+
|
||||
|
||||
PPRINT: pretty-printed tabular
|
||||
+---------------------+
|
||||
| apple bat cog |
|
||||
| 1 2 3 | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| 4 5 6 | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+---------------------+
|
||||
|
||||
Markdown tabular (supported for output only):
|
||||
+-----------------------+
|
||||
| | apple | bat | cog | |
|
||||
| | --- | --- | --- | |
|
||||
| | 1 | 2 | 3 | | Record 1: "apple => "1", "bat" => "2", "cog" => "3"
|
||||
| | 4 | 5 | 6 | | Record 2: "apple" => "4", "bat" => "5", "cog" => "6"
|
||||
+-----------------------+
|
||||
|
||||
XTAB: pretty-printed transposed tabular
|
||||
+---------------------+
|
||||
| apple 1 | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| bat 2 |
|
||||
| cog 3 |
|
||||
| |
|
||||
| dish 7 | Record 2: "dish" => "7", "egg" => "8"
|
||||
| egg 8 |
|
||||
+---------------------+
|
||||
|
||||
DKVP: delimited key-value pairs (Miller default format)
|
||||
+---------------------+
|
||||
| apple=1,bat=2,cog=3 | Record 1: "apple" => "1", "bat" => "2", "cog" => "3"
|
||||
| dish=7,egg=8,flint | Record 2: "dish" => "7", "egg" => "8", "3" => "flint"
|
||||
+---------------------+
|
||||
|
||||
NIDX: implicitly numerically indexed (Unix-toolkit style)
|
||||
+---------------------+
|
||||
| the quick brown | Record 1: "1" => "the", "2" => "quick", "3" => "brown"
|
||||
| fox jumped | Record 2: "1" => "fox", "2" => "jumped"
|
||||
+---------------------+
|
||||
`)
|
||||
}
|
||||
|
||||
func mainUsageDataFormatOptions(o *os.File, argv0 string) {
|
||||
fmt.Fprintf(o,
|
||||
`
|
||||
--idkvp --odkvp --dkvp Delimited key-value pairs, e.g "a=1,b=2"
|
||||
(this is Miller's default format).
|
||||
|
||||
--inidx --onidx --nidx Implicitly-integer-indexed fields
|
||||
(Unix-toolkit style).
|
||||
-T Synonymous with "--nidx --fs tab".
|
||||
|
||||
--icsv --ocsv --csv Comma-separated value (or tab-separated
|
||||
with --fs tab, etc.)
|
||||
|
||||
--itsv --otsv --tsv Keystroke-savers for "--icsv --ifs tab",
|
||||
"--ocsv --ofs tab", "--csv --fs tab".
|
||||
--iasv --oasv --asv Similar but using ASCII FS %s and RS %s\n",
|
||||
--iusv --ousv --usv Similar but using Unicode FS %s\n",
|
||||
and RS %s\n",
|
||||
|
||||
--icsvlite --ocsvlite --csvlite Comma-separated value (or tab-separated
|
||||
with --fs tab, etc.). The 'lite' CSV does not handle
|
||||
RFC-CSV double-quoting rules; is slightly faster;
|
||||
and handles heterogeneity in the input stream via
|
||||
empty newline followed by new header line. See also
|
||||
http://johnkerl.org/miller/doc/file-formats.html#CSV/TSV/etc.
|
||||
|
||||
--itsvlite --otsvlite --tsvlite Keystroke-savers for "--icsvlite --ifs tab",
|
||||
"--ocsvlite --ofs tab", "--csvlite --fs tab".
|
||||
-t Synonymous with --tsvlite.
|
||||
--iasvlite --oasvlite --asvlite Similar to --itsvlite et al. but using ASCII FS %s and RS %s\n",
|
||||
--iusvlite --ousvlite --usvlite Similar to --itsvlite et al. but using Unicode FS %s\n",
|
||||
and RS %s\n",
|
||||
|
||||
--ipprint --opprint --pprint Pretty-printed tabular (produces no
|
||||
output until all input is in).
|
||||
--right Right-justifies all fields for PPRINT output.
|
||||
--barred Prints a border around PPRINT output
|
||||
(only available for output).
|
||||
|
||||
--omd Markdown-tabular (only available for output).
|
||||
|
||||
--ixtab --oxtab --xtab Pretty-printed vertical-tabular.
|
||||
--xvright Right-justifies values for XTAB format.
|
||||
|
||||
--ijson --ojson --json JSON tabular: sequence or list of one-level
|
||||
maps: {...}{...} or [{...},{...}].
|
||||
--json-map-arrays-on-input JSON arrays are unmillerable. --json-map-arrays-on-input
|
||||
--json-skip-arrays-on-input is the default: arrays are converted to integer-indexed
|
||||
--json-fatal-arrays-on-input maps. The other two options cause them to be skipped, or
|
||||
to be treated as errors. Please use the jq tool for full
|
||||
JSON (pre)processing.
|
||||
--jvstack Put one key-value pair per line for JSON output.
|
||||
--no-jvstack Put objects/arrays all on one line for JSON output.
|
||||
--jsonx --ojsonx Keystroke-savers for --json --jvstack
|
||||
--jsonx --ojsonx and --ojson --jvstack, respectively.
|
||||
--jlistwrap Wrap JSON output in outermost [ ].
|
||||
--jknquoteint Do not quote non-string map keys in JSON output.
|
||||
--jvquoteall Quote map values in JSON output, even if they're
|
||||
numeric.
|
||||
--oflatsep {string} Separator for flattening multi-level JSON keys,
|
||||
e.g. '{"a":{"b":3}}' becomes a:b => 3 for
|
||||
non-JSON formats. Defaults to %s.\n",
|
||||
|
||||
-p is a keystroke-saver for --nidx --fs space --repifs
|
||||
|
||||
Examples: --csv for CSV-formatted input and output; --idkvp --opprint for
|
||||
DKVP-formatted input and pretty-printed output.
|
||||
|
||||
Please use --iformat1 --oformat2 rather than --format1 --oformat2.
|
||||
The latter sets up input and output flags for format1, not all of which
|
||||
are overridden in all cases by setting output format to format2.`,
|
||||
|
||||
cliutil.ASV_FS_FOR_HELP,
|
||||
cliutil.ASV_RS_FOR_HELP,
|
||||
cliutil.USV_FS_FOR_HELP,
|
||||
cliutil.USV_RS_FOR_HELP,
|
||||
cliutil.ASV_FS_FOR_HELP,
|
||||
cliutil.ASV_RS_FOR_HELP,
|
||||
cliutil.USV_FS_FOR_HELP,
|
||||
cliutil.USV_RS_FOR_HELP,
|
||||
cliutil.DEFAULT_JSON_FLATTEN_SEPARATOR,
|
||||
)
|
||||
fmt.Println()
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
func mainUsageCommentsInData(o *os.File, argv0 string) {
|
||||
fmt.Fprintf(o, " --skip-comments Ignore commented lines (prefixed by \"%s\")\n",
|
||||
cliutil.DEFAULT_COMMENT_STRING)
|
||||
fmt.Fprintf(o, " within the input.\n")
|
||||
fmt.Fprintf(o, " --skip-comments-with {string} Ignore commented lines within input, with\n")
|
||||
fmt.Fprintf(o, " specified prefix.\n")
|
||||
fmt.Fprintf(o, " --pass-comments Immediately print commented lines (prefixed by \"%s\")\n",
|
||||
cliutil.DEFAULT_COMMENT_STRING)
|
||||
fmt.Fprintf(o, " within the input.\n")
|
||||
fmt.Fprintf(o, " --pass-comments-with {string} Immediately print commented lines within input, with\n")
|
||||
fmt.Fprintf(o, " specified prefix.\n")
|
||||
fmt.Fprintf(o, "Notes:\n")
|
||||
fmt.Fprintf(o, "* Comments are only honored at the start of a line.\n")
|
||||
fmt.Fprintf(o, "* In the absence of any of the above four options, comments are data like\n")
|
||||
fmt.Fprintf(o, " any other text.\n")
|
||||
fmt.Fprintf(o, "* When pass-comments is used, comment lines are written to standard output\n")
|
||||
fmt.Fprintf(o, " immediately upon being read; they are not part of the record stream.\n")
|
||||
fmt.Fprintf(o, " Results may be counterintuitive. A suggestion is to place comments at the\n")
|
||||
fmt.Fprintf(o, " start of data files.\n")
|
||||
}
|
||||
|
||||
func mainUsageFormatConversionKeystrokeSaverOptions(o *os.File, argv0 string) {
|
||||
fmt.Fprintf(o, "As keystroke-savers for format-conversion you may use the following:\n")
|
||||
fmt.Fprintf(o, " --c2t --c2d --c2n --c2j --c2x --c2p --c2m\n")
|
||||
fmt.Fprintf(o, " --t2c --t2d --t2n --t2j --t2x --t2p --t2m\n")
|
||||
fmt.Fprintf(o, " --d2c --d2t --d2n --d2j --d2x --d2p --d2m\n")
|
||||
fmt.Fprintf(o, " --n2c --n2t --n2d --n2j --n2x --n2p --n2m\n")
|
||||
fmt.Fprintf(o, " --j2c --j2t --j2d --j2n --j2x --j2p --j2m\n")
|
||||
fmt.Fprintf(o, " --x2c --x2t --x2d --x2n --x2j --x2p --x2m\n")
|
||||
fmt.Fprintf(o, " --p2c --p2t --p2d --p2n --p2j --p2x --p2m\n")
|
||||
fmt.Fprintf(o, "The letters c t d n j x p m refer to formats CSV, TSV, DKVP, NIDX, JSON, XTAB,\n")
|
||||
fmt.Fprintf(o, "PPRINT, and markdown, respectively. Note that markdown format is available for\n")
|
||||
fmt.Fprintf(o, "output only.\n")
|
||||
}
|
||||
|
||||
// TODO: ---gzip --bz2in --zin as well
|
||||
func mainUsageCompressedDataOptions(o *os.File, argv0 string) {
|
||||
fmt.Fprintf(o, " Decompression done within the Miller process itself:\n")
|
||||
fmt.Fprintf(o, " --gzin Uncompress gzip within the Miller process. Done by default if file ends in \".gz\".\n")
|
||||
fmt.Fprintf(o, " --bz2in Uncompress bz2ip within the Miller process. Done by default if file ends in \".bz2\".\n")
|
||||
fmt.Fprintf(o, " --zin Uncompress zlib within the Miller process. Done by default if file ends in \".z\".\n")
|
||||
fmt.Fprintf(o, " Decompression done outside the Miller processn")
|
||||
fmt.Fprintf(o, " --prepipe {command} You can, of course, already do without this for single input files,\n")
|
||||
fmt.Fprintf(o, " e.g. \"gunzip < myfile.csv.gz | %s ...\".\n", argv0)
|
||||
fmt.Fprintf(o, " However, when multiple input files are present, between-file separations are\n")
|
||||
fmt.Fprintf(o, " lost; also, the FILENAME variable doesn't iterate. Using --prepipe you can\n")
|
||||
fmt.Fprintf(o, " specify an action to be taken on each input file. This prepipe command must\n")
|
||||
fmt.Fprintf(o, " be able to read from standard input; it will be invoked with\n")
|
||||
fmt.Fprintf(o, " {command} < {filename}.\n")
|
||||
fmt.Fprintf(o, " --prepipex {command} Like --prepipe with one exception: doesn't insert '<' between\n")
|
||||
fmt.Fprintf(o, " command and filename at runtime. Useful for some commands like 'unzip -qc' which don't\n")
|
||||
fmt.Fprintf(o, " read standard input.\n")
|
||||
fmt.Fprintf(o, " Examples:\n")
|
||||
fmt.Fprintf(o, " %s --prepipe 'gunzip'\n", argv0)
|
||||
fmt.Fprintf(o, " %s --prepipe 'zcat -cf'\n", argv0)
|
||||
fmt.Fprintf(o, " %s --prepipe 'xz -cd'\n", argv0)
|
||||
fmt.Fprintf(o, " %s --prepipe cat\n", argv0)
|
||||
fmt.Fprintf(o, " Note that this feature is quite general and is not limited to decompression\n")
|
||||
fmt.Fprintf(o, " utilities. You can use it to apply per-file filters of your choice.\n")
|
||||
fmt.Fprintf(o, " For output compression (or other) utilities, simply pipe the output:\n")
|
||||
fmt.Fprintf(o, " %s ... | {your compression command}\n", argv0)
|
||||
fmt.Fprintf(o, " Lastly, note that if --prepipe is specified, it replaces any decisions that might\n")
|
||||
fmt.Fprintf(o, " have been made based on the file suffix. Also, --gzin/--bz2in/--zin are ignored\n")
|
||||
fmt.Fprintf(o, " if --prepipe is also specified.\n")
|
||||
}
|
||||
|
||||
//func mainUsageSeparatorOptions(o *os.File, argv0 string) {
|
||||
// fmt.Fprintf(o, " --rs --irs --ors Record separators, e.g. 'lf' or '\\r\\n'\n");
|
||||
// fmt.Fprintf(o, " --fs --ifs --ofs --repifs Field separators, e.g. comma\n");
|
||||
// fmt.Fprintf(o, " --ps --ips --ops Pair separators, e.g. equals sign\n");
|
||||
// fmt.Fprintf(o, "\n");
|
||||
// fmt.Fprintf(o, " Notes about line endings:\n");
|
||||
// fmt.Fprintf(o, " * Default line endings (--irs and --ors) are \"auto\" which means autodetect from\n");
|
||||
// fmt.Fprintf(o, " the input file format, as long as the input file(s) have lines ending in either\n");
|
||||
// fmt.Fprintf(o, " LF (also known as linefeed, '\\n', 0x0a, Unix-style) or CRLF (also known as\n");
|
||||
// fmt.Fprintf(o, " carriage-return/linefeed pairs, '\\r\\n', 0x0d 0x0a, Windows style).\n");
|
||||
// fmt.Fprintf(o, " * If both irs and ors are auto (which is the default) then LF input will lead to LF\n");
|
||||
// fmt.Fprintf(o, " output and CRLF input will lead to CRLF output, regardless of the platform you're\n");
|
||||
// fmt.Fprintf(o, " running on.\n");
|
||||
// fmt.Fprintf(o, " * The line-ending autodetector triggers on the first line ending detected in the input\n");
|
||||
// fmt.Fprintf(o, " stream. E.g. if you specify a CRLF-terminated file on the command line followed by an\n");
|
||||
// fmt.Fprintf(o, " LF-terminated file then autodetected line endings will be CRLF.\n");
|
||||
// fmt.Fprintf(o, " * If you use --ors {something else} with (default or explicitly specified) --irs auto\n");
|
||||
// fmt.Fprintf(o, " then line endings are autodetected on input and set to what you specify on output.\n");
|
||||
// fmt.Fprintf(o, " * If you use --irs {something else} with (default or explicitly specified) --ors auto\n");
|
||||
// fmt.Fprintf(o, " then the output line endings used are LF on Unix/Linux/BSD/MacOSX, and CRLF on Windows.\n");
|
||||
// fmt.Fprintf(o, "\n");
|
||||
// fmt.Fprintf(o, " Notes about all other separators:\n");
|
||||
// fmt.Fprintf(o, " * IPS/OPS are only used for DKVP and XTAB formats, since only in these formats\n");
|
||||
// fmt.Fprintf(o, " do key-value pairs appear juxtaposed.\n");
|
||||
// fmt.Fprintf(o, " * IRS/ORS are ignored for XTAB format. Nominally IFS and OFS are newlines;\n");
|
||||
// fmt.Fprintf(o, " XTAB records are separated by two or more consecutive IFS/OFS -- i.e.\n");
|
||||
// fmt.Fprintf(o, " a blank line. Everything above about --irs/--ors/--rs auto becomes --ifs/--ofs/--fs\n");
|
||||
// fmt.Fprintf(o, " auto for XTAB format. (XTAB's default IFS/OFS are \"auto\".)\n");
|
||||
// fmt.Fprintf(o, " * OFS must be single-character for PPRINT format. This is because it is used\n");
|
||||
// fmt.Fprintf(o, " with repetition for alignment; multi-character separators would make\n");
|
||||
// fmt.Fprintf(o, " alignment impossible.\n");
|
||||
// fmt.Fprintf(o, " * OPS may be multi-character for XTAB format, in which case alignment is\n");
|
||||
// fmt.Fprintf(o, " disabled.\n");
|
||||
// fmt.Fprintf(o, " * TSV is simply CSV using tab as field separator (\"--fs tab\").\n");
|
||||
// fmt.Fprintf(o, " * FS/PS are ignored for markdown format; RS is used.\n");
|
||||
// fmt.Fprintf(o, " * All FS and PS options are ignored for JSON format, since they are not relevant\n");
|
||||
// fmt.Fprintf(o, " to the JSON format.\n");
|
||||
// fmt.Fprintf(o, " * You can specify separators in any of the following ways, shown by example:\n");
|
||||
// fmt.Fprintf(o, " - Type them out, quoting as necessary for shell escapes, e.g.\n");
|
||||
// fmt.Fprintf(o, " \"--fs '|' --ips :\"\n");
|
||||
// fmt.Fprintf(o, " - C-style escape sequences, e.g. \"--rs '\\r\\n' --fs '\\t'\".\n");
|
||||
// fmt.Fprintf(o, " - To avoid backslashing, you can use any of the following names:\n");
|
||||
// fmt.Fprintf(o, " ");
|
||||
// lhmss_t* pmap = get_desc_to_chars_map();
|
||||
// for (lhmsse_t* pe = pmap.phead; pe != nil; pe = pe.pnext) {
|
||||
// fmt.Fprintf(o, " %s", pe.key);
|
||||
// }
|
||||
// fmt.Fprintf(o, "\n");
|
||||
// fmt.Fprintf(o, " * Default separators by format:\n");
|
||||
// fmt.Fprintf(o, " %-12s %-8s %-8s %s\n", "File format", "RS", "FS", "PS");
|
||||
// lhmss_t* default_rses = get_default_rses();
|
||||
// lhmss_t* default_fses = get_default_fses();
|
||||
// lhmss_t* default_pses = get_default_pses();
|
||||
// for (lhmsse_t* pe = default_rses.phead; pe != nil; pe = pe.pnext) {
|
||||
// char* filefmt = pe.key;
|
||||
// char* rs = pe.value;
|
||||
// char* fs = lhmss_get(default_fses, filefmt);
|
||||
// char* ps = lhmss_get(default_pses, filefmt);
|
||||
// fmt.Fprintf(o, " %-12s %-8s %-8s %s\n", filefmt, rebackslash(rs), rebackslash(fs), rebackslash(ps));
|
||||
// }
|
||||
//}
|
||||
|
||||
func mainUsageCsvOptions(o *os.File, argv0 string) {
|
||||
fmt.Fprintf(o, " --implicit-csv-header Use 1,2,3,... as field labels, rather than from line 1\n")
|
||||
fmt.Fprintf(o, " of input files. Tip: combine with \"label\" to recreate\n")
|
||||
fmt.Fprintf(o, " missing headers.\n")
|
||||
fmt.Fprintf(o, " --no-implicit-csv-header Do not use --implicit-csv-header. This is the default\n")
|
||||
fmt.Fprintf(o, " anyway -- the main use is for the flags to 'mlr join' if you have\n")
|
||||
fmt.Fprintf(o, " main file(s) which are headerless but you want to join in on\n")
|
||||
fmt.Fprintf(o, " a file which does have a CSV header. Then you could use\n")
|
||||
fmt.Fprintf(o, " 'mlr --csv --implicit-csv-header join --no-implicit-csv-header\n")
|
||||
fmt.Fprintf(o, " -l your-join-in-with-header.csv ... your-headerless.csv'\n")
|
||||
fmt.Fprintf(o, " --allow-ragged-csv-input|--ragged If a data line has fewer fields than the header line,\n")
|
||||
fmt.Fprintf(o, " fill remaining keys with empty string. If a data line has more\n")
|
||||
fmt.Fprintf(o, " fields than the header line, use integer field labels as in\n")
|
||||
fmt.Fprintf(o, " the implicit-header case.\n")
|
||||
fmt.Fprintf(o, " --headerless-csv-output Print only CSV data lines.\n")
|
||||
fmt.Fprintf(o, " -N Keystroke-saver for --implicit-csv-header --headerless-csv-output.\n")
|
||||
}
|
||||
|
||||
//func mainUsageDoubleQuoting(o *os.File, argv0 string) {
|
||||
// fmt.Fprintf(o, " --quote-all Wrap all fields in double quotes\n");
|
||||
// fmt.Fprintf(o, " --quote-none Do not wrap any fields in double quotes, even if they have\n");
|
||||
// fmt.Fprintf(o, " OFS or ORS in them\n");
|
||||
// fmt.Fprintf(o, " --quote-minimal Wrap fields in double quotes only if they have OFS or ORS\n");
|
||||
// fmt.Fprintf(o, " in them (default)\n");
|
||||
// fmt.Fprintf(o, " --quote-numeric Wrap fields in double quotes only if they have numbers\n");
|
||||
// fmt.Fprintf(o, " in them\n");
|
||||
// fmt.Fprintf(o, " --quote-original Wrap fields in double quotes if and only if they were\n");
|
||||
// fmt.Fprintf(o, " quoted on input. This isn't sticky for computed fields:\n");
|
||||
// fmt.Fprintf(o, " e.g. if fields a and b were quoted on input and you do\n");
|
||||
// fmt.Fprintf(o, " \"put '$c = $a . $b'\" then field c won't inherit a or b's\n");
|
||||
// fmt.Fprintf(o, " was-quoted-on-input flag.\n");
|
||||
//}
|
||||
|
||||
func mainUsageNumericalFormatting(o *os.File, argv0 string) {
|
||||
// TODO: update comment
|
||||
fmt.Fprintf(o, " --ofmt {format} E.g. %%.18f, %%.0f, %%9.6e. Please use sprintf-style codes for\n")
|
||||
fmt.Fprintf(o, " floating-point nummbers. If not specified, default formatting is used.\n")
|
||||
fmt.Fprintf(o, " See also the fmtnum function within mlr put (mlr --help-all-functions);\n")
|
||||
fmt.Fprintf(o, " see also the format-values function.\n")
|
||||
}
|
||||
|
||||
func mainUsageOutputColorization(o *os.File, argv0 string) {
|
||||
fmt.Fprintf(o, "Things having colors:\n")
|
||||
fmt.Fprintf(o, "* Keys in CSV header lines, JSON keys, etc\n")
|
||||
fmt.Fprintf(o, "* Values in CSV data lines, JSON scalar values, etc\n")
|
||||
fmt.Fprintf(o, "* \"PASS\" and \"FAIL\" in regression-test output\n")
|
||||
fmt.Fprintf(o, "* Some online-help strings\n")
|
||||
fmt.Fprintf(o, "* Coloring for the REPL prompt\n")
|
||||
fmt.Fprintf(o, "\n")
|
||||
fmt.Fprintf(o, "Rules for coloring:\n")
|
||||
fmt.Fprintf(o, "* By default, colorize output only if writing to stdout and stdout is a TTY.\n")
|
||||
fmt.Fprintf(o, " * Example: color: mlr --csv cat foo.csv\n")
|
||||
fmt.Fprintf(o, " * Example: no color: mlr --csv cat foo.csv > bar.csv\n")
|
||||
fmt.Fprintf(o, " * Example: no color: mlr --csv cat foo.csv | less\n")
|
||||
fmt.Fprintf(o, "* The default colors were chosen since they look OK with white or black terminal background,\n")
|
||||
fmt.Fprintf(o, " and are differentiable with common varieties of human color vision.\n")
|
||||
fmt.Fprintf(o, "\n")
|
||||
fmt.Fprintf(o, "Mechanisms for coloring:\n")
|
||||
fmt.Fprintf(o, "* Miller uses ANSI escape sequences only. This does not work on Windows except on Cygwin.\n")
|
||||
fmt.Fprintf(o, "* Requires TERM environment variable to be set to non-empty string.\n")
|
||||
fmt.Fprintf(o, "* Doesn't try to check to see whether the terminal is capable of 256-color\n")
|
||||
fmt.Fprintf(o, " ANSI vs 16-color ANSI. Note that if colors are in the range 0..15\n")
|
||||
fmt.Fprintf(o, " then 16-color ANSI escapes are used, so this is in the user's control.\n")
|
||||
fmt.Fprintf(o, "\n")
|
||||
fmt.Fprintf(o, "How you can control colorization:\n")
|
||||
fmt.Fprintf(o, "* Suppression/unsuppression:\n")
|
||||
fmt.Fprintf(o, " * Environment variable export MLR_NO_COLOR=true means don't color even if stdout+TTY.\n")
|
||||
fmt.Fprintf(o, " * Environment variable export MLR_ALWAYS_COLOR=true means do color even if not stdout+TTY.\n")
|
||||
fmt.Fprintf(o, " For example, you might want to use this when piping mlr output to less -r.\n")
|
||||
fmt.Fprintf(o, " * Command-line flags ``--no-color`` or ``-M``, ``--always-color`` or ``-C``.\n")
|
||||
|
||||
fmt.Fprintf(o, "* Color choices can be specified by using environment variables, or command-line flags,\n")
|
||||
fmt.Fprintf(o, " with values 0..255:\n")
|
||||
fmt.Fprintf(o, " * export MLR_KEY_COLOR=208, MLR_VALUE_COLOR-33, etc.\n")
|
||||
fmt.Fprintf(o, " * Command-line flags --key-color 208, --value-color 33, etc.\n")
|
||||
fmt.Fprintf(o, " * This is particularly useful if your terminal's background color clashes with current settings.\n")
|
||||
fmt.Fprintf(o, "* If environment-variable settings and command-line flags are both provided,the latter take precedence.\n")
|
||||
fmt.Fprintf(o, "* Please do %s --list-colors to see the available color codes.\n", lib.MlrExeName())
|
||||
}
|
||||
|
||||
// TODO
|
||||
func mainUsageOtherOptions(o *os.File, argv0 string) {
|
||||
fmt.Fprintf(o, " --seed {n} with n of the form 12345678 or 0xcafefeed. For put/filter\n")
|
||||
fmt.Fprintf(o, " urand()/urandint()/urand32().\n")
|
||||
fmt.Fprintf(o, " --nr-progress-mod {m}, with m a positive integer: print filename and record\n")
|
||||
fmt.Fprintf(o, " count to os.Stderr every m input records.\n")
|
||||
fmt.Fprintf(o, " --from {filename} Use this to specify an input file before the verb(s),\n")
|
||||
fmt.Fprintf(o, " rather than after. May be used more than once. Example:\n")
|
||||
fmt.Fprintf(o, " \"%s --from a.dat --from b.dat cat\" is the same as\n", argv0)
|
||||
fmt.Fprintf(o, " \"%s cat a.dat b.dat\".\n", argv0)
|
||||
fmt.Fprintf(o, " --mfrom {filenames} -- Use this to specify one of more input files before the verb(s),\n")
|
||||
fmt.Fprintf(o, " rather than after. May be used more than once.\n")
|
||||
fmt.Fprintf(o, " The list of filename must end with \"--\". This is useful\n")
|
||||
fmt.Fprintf(o, " for example since \"--from *.csv\" doesn't do what you might\n")
|
||||
fmt.Fprintf(o, " hope but \"--mfrom *.csv --\" does.\n")
|
||||
fmt.Fprintf(o, " --load {filename} Load DSL script file for all put/filter operations on the command line.\n")
|
||||
fmt.Fprintf(o, " If the name following --load is a directory, load all \"*.mlr\" files\n")
|
||||
fmt.Fprintf(o, " in that directory. This is just like \"put -f\" and \"filter -f\"\n")
|
||||
fmt.Fprintf(o, " except it's up-front on the command line, so you can do something like\n")
|
||||
fmt.Fprintf(o, " alias mlr='mlr --load ~/myscripts' if you like.\n")
|
||||
fmt.Fprintf(o, " --mload {names} -- Like --load but works with more than one filename,\n")
|
||||
fmt.Fprintf(o, " e.g. '--mload *.mlr --'.\n")
|
||||
fmt.Fprintf(o, " -n Process no input files, nor standard input either. Useful\n")
|
||||
fmt.Fprintf(o, " for %s put with begin/end statements only. (Same as --from\n", argv0)
|
||||
fmt.Fprintf(o, " /dev/null.) Also useful in \"%s -n put -v '...'\" for\n", argv0)
|
||||
fmt.Fprintf(o, " analyzing abstract syntax trees (if that's your thing).\n")
|
||||
fmt.Fprintf(o, " -I Process files in-place. For each file name on the command\n")
|
||||
fmt.Fprintf(o, " line, output is written to a temp file in the same\n")
|
||||
fmt.Fprintf(o, " directory, which is then renamed over the original. Each\n")
|
||||
fmt.Fprintf(o, " file is processed in isolation: if the output format is\n")
|
||||
fmt.Fprintf(o, " CSV, CSV headers will be present in each output file;\n")
|
||||
fmt.Fprintf(o, " statistics are only over each file's own records; and so on.\n")
|
||||
}
|
||||
|
||||
func mainUsageThenChaining(o *os.File, argv0 string) {
|
||||
fmt.Fprintf(o, "Output of one verb may be chained as input to another using \"then\", e.g.\n")
|
||||
fmt.Fprintf(o, " %s stats1 -a min,mean,max -f flag,u,v -g color then sort -f color\n", argv0)
|
||||
}
|
||||
|
||||
func mainUsageAuxents(o *os.File) {
|
||||
fmt.Fprintf(o, "Miller has a few otherwise-standalone executables packaged within it.\n")
|
||||
fmt.Fprintf(o, "They do not participate in any other parts of Miller.\n")
|
||||
fmt.Fprintf(o, "Please use \"%s aux-list\" for more information.\n", lib.MlrExeName())
|
||||
// TODO:
|
||||
// package miller/src/cli
|
||||
// imports miller/src/entrypoint
|
||||
// imports miller/src/auxents
|
||||
// imports miller/src/auxents/repl
|
||||
// imports miller/src/cli
|
||||
// imports miller/src/auxents: import cycle not allowed
|
||||
//auxents.ShowAuxEntries(o)
|
||||
}
|
||||
|
||||
func mainUsageSeeAlso(o *os.File, argv0 string) {
|
||||
fmt.Fprintf(o, "For more information please see http://johnkerl.org/miller/doc and/or\n")
|
||||
fmt.Fprintf(o, "http://github.com/johnkerl/miller.")
|
||||
fmt.Fprintf(o, " This is Miller version %s.\n", version.STRING)
|
||||
}
|
||||
|
||||
func usageUnrecognizedVerb(argv0 string, arg string) {
|
||||
fmt.Fprintf(os.Stderr, "%s: option \"%s\" not recognized.\n", argv0, arg)
|
||||
fmt.Fprintf(os.Stderr, "Please run \"%s --help\" for usage information.\n", argv0)
|
||||
os.Exit(1)
|
||||
}
|
||||
79
go/src/cliutil/flatten_unflatten.go
Normal file
79
go/src/cliutil/flatten_unflatten.go
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
package cliutil
|
||||
|
||||
// ================================================================
|
||||
// Decide whether to insert a flatten or unflatten verb at the end of the
|
||||
// chain. See also repl/verbs.go which handles the same issue in the REPL.
|
||||
//
|
||||
// ----------------------------------------------------------------
|
||||
// PROBLEM TO BE SOLVED:
|
||||
//
|
||||
// JSON has nested structures and CSV et al. do not. For example:
|
||||
// {
|
||||
// "req" : {
|
||||
// "method": "GET",
|
||||
// "path": "api/check",
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// For CSV we flatten this down to
|
||||
//
|
||||
// {
|
||||
// "req.method": "GET",
|
||||
// "req.path": "api/check"
|
||||
// }
|
||||
//
|
||||
// ----------------------------------------------------------------
|
||||
// APPROACH:
|
||||
//
|
||||
// Use the Principle of Least Surprise (POLS).
|
||||
//
|
||||
// * If input is JSON and output is JSON:
|
||||
// o Records can be nested from record-read
|
||||
// o They remain that way through the Miller record-processing stream
|
||||
// o They are nested on record-write
|
||||
// o No action needs to be taken
|
||||
//
|
||||
// * If input is JSON and output is non-JSON:
|
||||
// o Records can be nested from record-read
|
||||
// o They remain that way through the Miller record-processing stream
|
||||
// o On record-write, nested structures will be converted to string (carriage
|
||||
// returns and all) using json_stringify. People *might* want this but
|
||||
// (using POLS) we will (by default) AUTO-FLATTEN for them. There is a
|
||||
// --no-auto-unflatten CLI flag for those who want it.
|
||||
//
|
||||
// * If input is non-JSON and output is non-JSON:
|
||||
// o If there is a "req.method" field, people should be able to do
|
||||
// 'mlr sort -f req.method' with no surprises. (Again, POLS.) Therefore
|
||||
// no auto-unflatten on input. People can insert an unflatten verb
|
||||
// into their verb chain if they really want unflatten for non-JSON
|
||||
// files.
|
||||
// o The DSL can make nested data, so AUTO-FLATTEN at output.
|
||||
//
|
||||
// * If input is non-JSON and output is JSON:
|
||||
// o Default is to auto-unflatten at output.
|
||||
// o There is a --no-auto-unflatten for those who want it.
|
||||
// ================================================================
|
||||
|
||||
func DecideFinalFlatten(options *TOptions) bool {
|
||||
ofmt := options.WriterOptions.OutputFileFormat
|
||||
if options.WriterOptions.AutoFlatten {
|
||||
if ofmt != "json" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func DecideFinalUnflatten(options *TOptions) bool {
|
||||
ifmt := options.ReaderOptions.InputFileFormat
|
||||
ofmt := options.WriterOptions.OutputFileFormat
|
||||
|
||||
if options.WriterOptions.AutoUnflatten {
|
||||
if ifmt != "json" {
|
||||
if ofmt == "json" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
@ -9,6 +9,7 @@ package cst
|
|||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"miller/src/lib"
|
||||
|
|
@ -46,6 +47,30 @@ type BuiltinFunctionInfo struct {
|
|||
}
|
||||
|
||||
// ================================================================
|
||||
// Sort the function table by class, then by function name. Useful for online help.
|
||||
// Or: just by function name ...
|
||||
//
|
||||
// TODO: pipes and tildes are coming after text, & other symbols before, due to ASCII ordering.
|
||||
// Code around that.
|
||||
func init() {
|
||||
// Go sort API: for ascending sort, return true if element i < element j.
|
||||
sort.Slice(_BUILTIN_FUNCTION_LOOKUP_TABLE, func(i, j int) bool {
|
||||
//if _BUILTIN_FUNCTION_LOOKUP_TABLE[i].class < _BUILTIN_FUNCTION_LOOKUP_TABLE[j].class {
|
||||
//return true
|
||||
//}
|
||||
//if _BUILTIN_FUNCTION_LOOKUP_TABLE[i].class > _BUILTIN_FUNCTION_LOOKUP_TABLE[j].class {
|
||||
//return false
|
||||
//}
|
||||
if _BUILTIN_FUNCTION_LOOKUP_TABLE[i].name < _BUILTIN_FUNCTION_LOOKUP_TABLE[j].name {
|
||||
return true
|
||||
}
|
||||
if _BUILTIN_FUNCTION_LOOKUP_TABLE[i].name > _BUILTIN_FUNCTION_LOOKUP_TABLE[j].name {
|
||||
return false
|
||||
}
|
||||
return false
|
||||
})
|
||||
}
|
||||
|
||||
var _BUILTIN_FUNCTION_LOOKUP_TABLE = []BuiltinFunctionInfo{
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
|
@ -791,8 +816,7 @@ for the seconds part`,
|
|||
class: FUNC_CLASS_TIME,
|
||||
help: `Formats seconds since epoch (integer part)
|
||||
as GMT timestamp with year-month-date, e.g. sec2gmtdate(1440768801.7) = "2015-08-28".
|
||||
Leaves non-numbers as-is.
|
||||
`,
|
||||
Leaves non-numbers as-is.`,
|
||||
unaryFunc: types.MlrvalSec2GMTDate,
|
||||
},
|
||||
|
||||
|
|
@ -826,8 +850,7 @@ Leaves non-numbers as-is.
|
|||
Format strings are as in the C library (please see "man strftime" on your system),
|
||||
with the Miller-specific addition of "%1S" through "%9S" which format the seconds
|
||||
with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.)
|
||||
See also strftime_local.
|
||||
`,
|
||||
See also strftime_local.`,
|
||||
binaryFunc: types.MlrvalStrftime,
|
||||
},
|
||||
|
||||
|
|
@ -837,8 +860,7 @@ Leaves non-numbers as-is.
|
|||
help: `strptime: Parses timestamp as floating-point seconds since the epoch,
|
||||
e.g. strptime("2015-08-28T13:33:21Z","%Y-%m-%dT%H:%M:%SZ") = 1440768801.000000,
|
||||
and strptime("2015-08-28T13:33:21.345Z","%Y-%m-%dT%H:%M:%SZ") = 1440768801.345000.
|
||||
See also strptime_local.
|
||||
`,
|
||||
See also strptime_local.`,
|
||||
binaryFunc: types.MlrvalStrptime,
|
||||
},
|
||||
|
||||
|
|
@ -849,66 +871,58 @@ Leaves non-numbers as-is.
|
|||
// strptime_local (class=time #args=2): Like strptime, but consults $TZ environment variable to find and use local timezone.
|
||||
|
||||
{
|
||||
name: "dhms2fsec",
|
||||
class: FUNC_CLASS_TIME,
|
||||
help: `Recovers floating-point seconds as in dhms2fsec("5d18h53m20.250000s") = 500000.250000
|
||||
`,
|
||||
name: "dhms2fsec",
|
||||
class: FUNC_CLASS_TIME,
|
||||
help: `Recovers floating-point seconds as in dhms2fsec("5d18h53m20.250000s") = 500000.250000`,
|
||||
unaryFunc: types.MlrvalDHMS2FSec,
|
||||
},
|
||||
|
||||
{
|
||||
name: "dhms2sec",
|
||||
class: FUNC_CLASS_TIME,
|
||||
help: `Recovers integer seconds as in dhms2sec("5d18h53m20s") = 500000
|
||||
`,
|
||||
name: "dhms2sec",
|
||||
class: FUNC_CLASS_TIME,
|
||||
help: `Recovers integer seconds as in dhms2sec("5d18h53m20s") = 500000`,
|
||||
unaryFunc: types.MlrvalDHMS2Sec,
|
||||
},
|
||||
|
||||
{
|
||||
name: "fsec2dhms",
|
||||
class: FUNC_CLASS_TIME,
|
||||
help: `Formats floating-point seconds as in fsec2dhms(500000.25) = "5d18h53m20.250000s"
|
||||
`,
|
||||
name: "fsec2dhms",
|
||||
class: FUNC_CLASS_TIME,
|
||||
help: `Formats floating-point seconds as in fsec2dhms(500000.25) = "5d18h53m20.250000s"`,
|
||||
unaryFunc: types.MlrvalFSec2DHMS,
|
||||
},
|
||||
|
||||
{
|
||||
name: "fsec2hms",
|
||||
class: FUNC_CLASS_TIME,
|
||||
help: `Formats floating-point seconds as in fsec2hms(5000.25) = "01:23:20.250000"
|
||||
`,
|
||||
name: "fsec2hms",
|
||||
class: FUNC_CLASS_TIME,
|
||||
help: `Formats floating-point seconds as in fsec2hms(5000.25) = "01:23:20.250000"`,
|
||||
unaryFunc: types.MlrvalFSec2HMS,
|
||||
},
|
||||
|
||||
{
|
||||
name: "hms2fsec",
|
||||
class: FUNC_CLASS_TIME,
|
||||
help: `Recovers floating-point seconds as in hms2fsec("01:23:20.250000") = 5000.250000
|
||||
`,
|
||||
name: "hms2fsec",
|
||||
class: FUNC_CLASS_TIME,
|
||||
help: `Recovers floating-point seconds as in hms2fsec("01:23:20.250000") = 5000.250000`,
|
||||
unaryFunc: types.MlrvalHMS2FSec,
|
||||
},
|
||||
|
||||
{
|
||||
name: "hms2sec",
|
||||
class: FUNC_CLASS_TIME,
|
||||
help: `Recovers integer seconds as in hms2sec("01:23:20") = 5000
|
||||
`,
|
||||
name: "hms2sec",
|
||||
class: FUNC_CLASS_TIME,
|
||||
help: `Recovers integer seconds as in hms2sec("01:23:20") = 5000`,
|
||||
unaryFunc: types.MlrvalHMS2Sec,
|
||||
},
|
||||
|
||||
{
|
||||
name: "sec2dhms",
|
||||
class: FUNC_CLASS_TIME,
|
||||
help: `Formats integer seconds as in sec2dhms(500000) = "5d18h53m20s"
|
||||
`,
|
||||
name: "sec2dhms",
|
||||
class: FUNC_CLASS_TIME,
|
||||
help: `Formats integer seconds as in sec2dhms(500000) = "5d18h53m20s"`,
|
||||
unaryFunc: types.MlrvalSec2DHMS,
|
||||
},
|
||||
|
||||
{
|
||||
name: "sec2hms",
|
||||
class: FUNC_CLASS_TIME,
|
||||
help: `Formats integer seconds as in sec2hms(5000) = "01:23:20"
|
||||
`,
|
||||
name: "sec2hms",
|
||||
class: FUNC_CLASS_TIME,
|
||||
help: `Formats integer seconds as in sec2hms(5000) = "01:23:20"`,
|
||||
unaryFunc: types.MlrvalSec2HMS,
|
||||
},
|
||||
|
||||
|
|
@ -1481,7 +1495,7 @@ key-value pairs from all arguments. Rightmost collisions win, e.g.
|
|||
name: "unflatten",
|
||||
class: FUNC_CLASS_COLLECTIONS,
|
||||
help: `Reverses flatten. Example:
|
||||
unflatten({"a.b.c" : 4}, ".") is {"a": "b": { "c": 4 }}}.
|
||||
unflatten({"a.b.c" : 4}, ".") is {"a": "b": { "c": 4 }}.
|
||||
Useful for nested JSON-like structures for non-JSON file formats like CSV.
|
||||
See also arrayify.`,
|
||||
binaryFunc: types.MlrvalUnflatten,
|
||||
|
|
@ -1562,7 +1576,7 @@ func hashifyLookupTable(lookupTable *[]BuiltinFunctionInfo) map[string]*BuiltinF
|
|||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func (manager *BuiltinFunctionManager) ListBuiltinFunctionsRaw(o *os.File) {
|
||||
func (manager *BuiltinFunctionManager) ListBuiltinFunctionNames(o *os.File) {
|
||||
for _, builtinFunctionInfo := range *manager.lookupTable {
|
||||
fmt.Fprintln(o, builtinFunctionInfo.name)
|
||||
}
|
||||
|
|
@ -1631,11 +1645,16 @@ func (manager *BuiltinFunctionManager) listBuiltinFunctionUsageApproximate(
|
|||
o *os.File,
|
||||
) {
|
||||
fmt.Fprintf(o, "No exact match for \"%s\". Inexact matches:\n", text)
|
||||
found := false
|
||||
for _, builtinFunctionInfo := range *manager.lookupTable {
|
||||
if strings.Contains(builtinFunctionInfo.name, text) {
|
||||
fmt.Fprintf(o, " %s\n", builtinFunctionInfo.name)
|
||||
found = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
fmt.Fprintln(o, "None found.")
|
||||
}
|
||||
}
|
||||
|
||||
func describeNargs(info *BuiltinFunctionInfo) string {
|
||||
|
|
|
|||
563
go/src/dsl/cst/keyword_usage.go
Normal file
563
go/src/dsl/cst/keyword_usage.go
Normal file
|
|
@ -0,0 +1,563 @@
|
|||
package cst
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"miller/src/lib"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
type tKeywordUsageFunc func()
|
||||
|
||||
type tKeywordUsageEntry struct {
|
||||
name string
|
||||
usageFunc tKeywordUsageFunc
|
||||
}
|
||||
|
||||
var KEYWORD_USAGE_TABLE = []tKeywordUsageEntry{
|
||||
{"all", allKeywordUsage},
|
||||
{"begin", beginKeywordUsage},
|
||||
{"bool", boolKeywordUsage},
|
||||
{"break", breakKeywordUsage},
|
||||
{"call", callKeywordUsage},
|
||||
{"continue", continueKeywordUsage},
|
||||
{"do", doKeywordUsage},
|
||||
{"dump", dumpKeywordUsage},
|
||||
{"edump", edumpKeywordUsage},
|
||||
{"elif", elifKeywordUsage},
|
||||
{"else", elseKeywordUsage},
|
||||
{"emit", emitKeywordUsage},
|
||||
{"emitf", emitfKeywordUsage},
|
||||
{"emitp", emitpKeywordUsage},
|
||||
{"end", endKeywordUsage},
|
||||
{"eprint", eprintKeywordUsage},
|
||||
{"eprintn", eprintnKeywordUsage},
|
||||
{"false", falseKeywordUsage},
|
||||
{"filter", filterKeywordUsage},
|
||||
{"float", floatKeywordUsage},
|
||||
{"for", forKeywordUsage},
|
||||
{"func", funcKeywordUsage},
|
||||
{"if", ifKeywordUsage},
|
||||
{"in", inKeywordUsage},
|
||||
{"int", intKeywordUsage},
|
||||
{"map", mapKeywordUsage},
|
||||
{"num", numKeywordUsage},
|
||||
{"print", printKeywordUsage},
|
||||
{"printn", printnKeywordUsage},
|
||||
{"return", returnKeywordUsage},
|
||||
{"stderr", stderrKeywordUsage},
|
||||
{"stdout", stdoutKeywordUsage},
|
||||
{"str", strKeywordUsage},
|
||||
{"subr", subrKeywordUsage},
|
||||
{"tee", teeKeywordUsage},
|
||||
{"true", trueKeywordUsage},
|
||||
{"unset", unsetKeywordUsage},
|
||||
{"var", varKeywordUsage},
|
||||
{"while", whileKeywordUsage},
|
||||
{"ENV", ENVKeywordUsage},
|
||||
{"FILENAME", FILENAMEKeywordUsage},
|
||||
{"FILENUM", FILENUMKeywordUsage},
|
||||
{"FNR", FNRKeywordUsage},
|
||||
{"IFS", IFSKeywordUsage},
|
||||
{"IPS", IPSKeywordUsage},
|
||||
{"IRS", IRSKeywordUsage},
|
||||
{"M_E", M_EKeywordUsage},
|
||||
{"M_PI", M_PIKeywordUsage},
|
||||
{"NF", NFKeywordUsage},
|
||||
{"NR", NRKeywordUsage},
|
||||
{"OFS", OFSKeywordUsage},
|
||||
{"OPS", OPSKeywordUsage},
|
||||
{"ORS", ORSKeywordUsage},
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Pass function_name == NULL to get usage for all keywords.
|
||||
func UsageKeywords() {
|
||||
for i, entry := range KEYWORD_USAGE_TABLE {
|
||||
if i > 0 {
|
||||
fmt.Println()
|
||||
}
|
||||
entry.usageFunc()
|
||||
}
|
||||
}
|
||||
|
||||
func UsageForKeyword(name string) {
|
||||
found := false
|
||||
for _, entry := range KEYWORD_USAGE_TABLE {
|
||||
if entry.name == name {
|
||||
entry.usageFunc()
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
fmt.Printf("mlr: unrecognized keyword \"%s\".\n", name)
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func ListKeywords() {
|
||||
for _, entry := range KEYWORD_USAGE_TABLE {
|
||||
fmt.Println(entry.name)
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func allKeywordUsage() {
|
||||
fmt.Println(
|
||||
`all: used in "emit", "emitp", and "unset" as a synonym for @*`,
|
||||
)
|
||||
}
|
||||
|
||||
func beginKeywordUsage() {
|
||||
fmt.Println(
|
||||
`begin: defines a block of statements to be executed before input records
|
||||
are ingested. The body statements must be wrapped in curly braces.
|
||||
|
||||
Example: 'begin { @count = 0 }'`)
|
||||
}
|
||||
|
||||
func boolKeywordUsage() {
|
||||
fmt.Println(
|
||||
`bool: declares a boolean local variable in the current curly-braced scope.
|
||||
Type-checking happens at assignment: 'bool b = 1' is an error.`)
|
||||
}
|
||||
|
||||
func breakKeywordUsage() {
|
||||
fmt.Println(
|
||||
`break: causes execution to continue after the body of the current for/while/do-while loop.`)
|
||||
}
|
||||
|
||||
func callKeywordUsage() {
|
||||
fmt.Println(
|
||||
`call: used for invoking a user-defined subroutine.
|
||||
|
||||
Example: 'subr s(k,v) { print k . " is " . v} call s("a", $a)'`)
|
||||
}
|
||||
|
||||
func continueKeywordUsage() {
|
||||
fmt.Println(
|
||||
`continue: causes execution to skip the remaining statements in the body of
|
||||
the current for/while/do-while loop. For-loop increments are still applied.`)
|
||||
}
|
||||
|
||||
func doKeywordUsage() {
|
||||
fmt.Println(
|
||||
`do: with "while", introduces a do-while loop. The body statements must be wrapped
|
||||
in curly braces.`)
|
||||
}
|
||||
|
||||
func dumpKeywordUsage() {
|
||||
fmt.Println(
|
||||
`dump: prints all currently defined out-of-stream variables immediately
|
||||
to stdout as JSON.
|
||||
|
||||
With >, >>, or |, the data do not become part of the output record stream but
|
||||
are instead redirected.
|
||||
|
||||
The > and >> are for write and append, as in the shell, but (as with awk) the
|
||||
file-overwrite for > is on first write, not per record. The | is for piping to
|
||||
a process which will process the data. There will be one open file for each
|
||||
distinct file name (for > and >>) or one subordinate process for each distinct
|
||||
value of the piped-to command (for |). Output-formatting flags are taken from
|
||||
the main command line.
|
||||
|
||||
Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump }'
|
||||
Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump > "mytap.dat"}'
|
||||
Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump >> "mytap.dat"}'
|
||||
Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump | "jq .[]"}'`)
|
||||
}
|
||||
|
||||
func edumpKeywordUsage() {
|
||||
fmt.Println(
|
||||
`edump: prints all currently defined out-of-stream variables immediately
|
||||
to stderr as JSON.
|
||||
|
||||
Example: mlr --from f.dat put -q '@v[NR]=$*; end { edump }'`)
|
||||
}
|
||||
|
||||
func elifKeywordUsage() {
|
||||
fmt.Println(
|
||||
`elif: the way Miller spells "else if". The body statements must be wrapped
|
||||
in curly braces.`)
|
||||
}
|
||||
|
||||
func elseKeywordUsage() {
|
||||
fmt.Println(
|
||||
`else: terminates an if/elif/elif chain. The body statements must be wrapped
|
||||
in curly braces.`)
|
||||
}
|
||||
|
||||
func emitKeywordUsage() {
|
||||
fmt.Printf(
|
||||
`emit: inserts an out-of-stream variable into the output record stream. Hashmap
|
||||
indices present in the data but not slotted by emit arguments are not output.
|
||||
|
||||
With >, >>, or |, the data do not become part of the output record stream but
|
||||
are instead redirected.
|
||||
|
||||
The > and >> are for write and append, as in the shell, but (as with awk) the
|
||||
file-overwrite for > is on first write, not per record. The | is for piping to
|
||||
a process which will process the data. There will be one open file for each
|
||||
distinct file name (for > and >>) or one subordinate process for each distinct
|
||||
value of the piped-to command (for |). Output-formatting flags are taken from
|
||||
the main command line.
|
||||
|
||||
You can use any of the output-format command-line flags, e.g. --ocsv, --ofs,
|
||||
etc., to control the format of the output if the output is redirected. See also mlr -h.
|
||||
|
||||
Example: mlr --from f.dat put 'emit > "/tmp/data-".$a, $*'
|
||||
Example: mlr --from f.dat put 'emit > "/tmp/data-".$a, mapexcept($*, "a")'
|
||||
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums'
|
||||
Example: mlr --from f.dat put --ojson '@sums[$a][$b]+=$x; emit > "tap-".$a.$b.".dat", @sums'
|
||||
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums, "index1", "index2"'
|
||||
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @*, "index1", "index2"'
|
||||
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > "mytap.dat", @*, "index1", "index2"'
|
||||
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit >> "mytap.dat", @*, "index1", "index2"'
|
||||
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | "gzip > mytap.dat.gz", @*, "index1", "index2"'
|
||||
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > stderr, @*, "index1", "index2"'
|
||||
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | "grep somepattern", @*, "index1", "index2"'
|
||||
|
||||
Please see %s://johnkerl.org/miller/doc for more information.
|
||||
`, lib.DOC_URL)
|
||||
}
|
||||
|
||||
func emitfKeywordUsage() {
|
||||
fmt.Printf(
|
||||
`emitf: inserts non-indexed out-of-stream variable(s) side-by-side into the
|
||||
output record stream.
|
||||
|
||||
With >, >>, or |, the data do not become part of the output record stream but
|
||||
are instead redirected.
|
||||
|
||||
The > and >> are for write and append, as in the shell, but (as with awk) the
|
||||
file-overwrite for > is on first write, not per record. The | is for piping to
|
||||
a process which will process the data. There will be one open file for each
|
||||
distinct file name (for > and >>) or one subordinate process for each distinct
|
||||
value of the piped-to command (for |). Output-formatting flags are taken from
|
||||
the main command line.
|
||||
|
||||
You can use any of the output-format command-line flags, e.g. --ocsv, --ofs,
|
||||
etc., to control the format of the output if the output is redirected. See also mlr -h.
|
||||
|
||||
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a'
|
||||
Example: mlr --from f.dat put --oxtab '@a=$i;@b+=$x;@c+=$y; emitf > "tap-".$i.".dat", @a'
|
||||
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a, @b, @c'
|
||||
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf > "mytap.dat", @a, @b, @c'
|
||||
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf >> "mytap.dat", @a, @b, @c'
|
||||
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf > stderr, @a, @b, @c'
|
||||
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf | "grep somepattern", @a, @b, @c'
|
||||
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf | "grep somepattern > mytap.dat", @a, @b, @c'
|
||||
|
||||
Please see %s://johnkerl.org/miller/doc for more information.
|
||||
`, lib.DOC_URL)
|
||||
}
|
||||
|
||||
func emitpKeywordUsage() {
|
||||
fmt.Printf(
|
||||
`emitp: inserts an out-of-stream variable into the output record stream.
|
||||
Hashmap indices present in the data but not slotted by emitp arguments are
|
||||
output concatenated with ":".
|
||||
|
||||
With >, >>, or |, the data do not become part of the output record stream but
|
||||
are instead redirected.
|
||||
|
||||
The > and >> are for write and append, as in the shell, but (as with awk) the
|
||||
file-overwrite for > is on first write, not per record. The | is for piping to
|
||||
a process which will process the data. There will be one open file for each
|
||||
distinct file name (for > and >>) or one subordinate process for each distinct
|
||||
value of the piped-to command (for |). Output-formatting flags are taken from
|
||||
the main command line.
|
||||
|
||||
You can use any of the output-format command-line flags, e.g. --ocsv, --ofs,
|
||||
etc., to control the format of the output if the output is redirected. See also mlr -h.
|
||||
|
||||
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums'
|
||||
Example: mlr --from f.dat put --opprint '@sums[$a][$b]+=$x; emitp > "tap-".$a.$b.".dat", @sums'
|
||||
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums, "index1", "index2"'
|
||||
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @*, "index1", "index2"'
|
||||
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > "mytap.dat", @*, "index1", "index2"'
|
||||
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp >> "mytap.dat", @*, "index1", "index2"'
|
||||
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | "gzip > mytap.dat.gz", @*, "index1", "index2"'
|
||||
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > stderr, @*, "index1", "index2"'
|
||||
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | "grep somepattern", @*, "index1", "index2"'
|
||||
|
||||
Please see %s://johnkerl.org/miller/doc for more information.
|
||||
`, lib.DOC_URL)
|
||||
}
|
||||
|
||||
func endKeywordUsage() {
|
||||
fmt.Println(
|
||||
`end: defines a block of statements to be executed after input records
|
||||
are ingested. The body statements must be wrapped in curly braces.
|
||||
|
||||
Example: 'end { emit @count }'
|
||||
Example: 'end { eprint "Final count is " . @count }'`)
|
||||
}
|
||||
|
||||
func eprintKeywordUsage() {
|
||||
fmt.Println(
|
||||
`eprint: prints expression immediately to stderr.
|
||||
|
||||
Example: mlr --from f.dat put -q 'eprint "The sum of x and y is ".($x+$y)'
|
||||
Example: mlr --from f.dat put -q 'for (k, v in $*) { eprint k . " => " . v }'
|
||||
Example: mlr --from f.dat put '(NR %% 1000 == 0) { eprint "Checkpoint ".NR}'`)
|
||||
}
|
||||
|
||||
func eprintnKeywordUsage() {
|
||||
fmt.Println(
|
||||
`eprintn: prints expression immediately to stderr, without trailing newline.
|
||||
|
||||
Example: mlr --from f.dat put -q 'eprintn "The sum of x and y is ".($x+$y); eprint ""'`)
|
||||
}
|
||||
|
||||
func falseKeywordUsage() {
|
||||
fmt.Println(`false: the boolean literal value.`)
|
||||
}
|
||||
|
||||
func filterKeywordUsage() {
|
||||
fmt.Println(
|
||||
`filter: includes/excludes the record in the output record stream.
|
||||
|
||||
Example: mlr --from f.dat put 'filter (NR == 2 || $x > 5.4)'
|
||||
|
||||
Instead of put with 'filter false' you can simply use put -q. The following
|
||||
uses the input record to accumulate data but only prints the running sum
|
||||
without printing the input record:
|
||||
|
||||
Example: mlr --from f.dat put -q '@running_sum += $x * $y; emit @running_sum'`)
|
||||
}
|
||||
|
||||
func floatKeywordUsage() {
|
||||
fmt.Println(
|
||||
`float: declares a floating-point local variable in the current curly-braced scope.
|
||||
Type-checking happens at assignment: 'float x = 0' is an error.`)
|
||||
}
|
||||
|
||||
func forKeywordUsage() {
|
||||
fmt.Println(
|
||||
`for: defines a for-loop using one of three styles. The body statements must
|
||||
be wrapped in curly braces.
|
||||
For-loop over stream record:
|
||||
|
||||
Example: 'for (k, v in $*) { ... }'
|
||||
|
||||
For-loop over out-of-stream variables:
|
||||
|
||||
Example: 'for (k, v in @counts) { ... }'
|
||||
Example: 'for ((k1, k2), v in @counts) { ... }'
|
||||
Example: 'for ((k1, k2, k3), v in @*) { ... }'
|
||||
|
||||
C-style for-loop:
|
||||
|
||||
Example: 'for (var i = 0, var b = 1; i < 10; i += 1, b *= 2) { ... }'`)
|
||||
}
|
||||
|
||||
func funcKeywordUsage() {
|
||||
fmt.Println(
|
||||
`func: used for defining a user-defined function.
|
||||
|
||||
Example: 'func f(a,b) { return sqrt(a**2+b**2)} $d = f($x, $y)'`)
|
||||
}
|
||||
|
||||
func ifKeywordUsage() {
|
||||
fmt.Println(
|
||||
`if: starts an if/elif/elif chain. The body statements must be wrapped
|
||||
in curly braces.`)
|
||||
}
|
||||
|
||||
func inKeywordUsage() {
|
||||
fmt.Println(`in: used in for-loops over stream records or out-of-stream variables.`)
|
||||
}
|
||||
|
||||
func intKeywordUsage() {
|
||||
fmt.Println(
|
||||
`int: declares an integer local variable in the current curly-braced scope.
|
||||
Type-checking happens at assignment: 'int x = 0.0' is an error.`)
|
||||
}
|
||||
|
||||
func mapKeywordUsage() {
|
||||
fmt.Println(
|
||||
`map: declares an map-valued local variable in the current curly-braced scope.
|
||||
Type-checking happens at assignment: 'map b = 0' is an error. map b = {} is
|
||||
always OK. map b = a is OK or not depending on whether a is a map.`)
|
||||
}
|
||||
|
||||
func numKeywordUsage() {
|
||||
fmt.Println(
|
||||
`num: declares an int/float local variable in the current curly-braced scope.
|
||||
Type-checking happens at assignment: 'num b = true' is an error.`)
|
||||
}
|
||||
|
||||
func printKeywordUsage() {
|
||||
fmt.Println(
|
||||
`print: prints expression immediately to stdout.
|
||||
|
||||
Example: mlr --from f.dat put -q 'print "The sum of x and y is ".($x+$y)'
|
||||
Example: mlr --from f.dat put -q 'for (k, v in $*) { print k . " => " . v }'
|
||||
Example: mlr --from f.dat put '(NR %% 1000 == 0) { print > stderr, "Checkpoint ".NR}'`)
|
||||
}
|
||||
|
||||
func printnKeywordUsage() {
|
||||
fmt.Println(
|
||||
`printn: prints expression immediately to stdout, without trailing newline.
|
||||
|
||||
Example: mlr --from f.dat put -q 'printn "."; end { print "" }'`)
|
||||
}
|
||||
|
||||
func returnKeywordUsage() {
|
||||
fmt.Println(
|
||||
`return: specifies the return value from a user-defined function.
|
||||
Omitted return statements (including via if-branches) result in an absent-null
|
||||
return value, which in turns results in a skipped assignment to an LHS.`)
|
||||
}
|
||||
|
||||
func stderrKeywordUsage() {
|
||||
fmt.Println(
|
||||
`stderr: Used for tee, emit, emitf, emitp, print, and dump in place of filename
|
||||
to print to standard error.`)
|
||||
}
|
||||
|
||||
func stdoutKeywordUsage() {
|
||||
fmt.Println(
|
||||
`stdout: Used for tee, emit, emitf, emitp, print, and dump in place of filename
|
||||
to print to standard output.`)
|
||||
}
|
||||
|
||||
func strKeywordUsage() {
|
||||
fmt.Println(
|
||||
`str: declares a string local variable in the current curly-braced scope.
|
||||
Type-checking happens at assignment.`)
|
||||
}
|
||||
|
||||
func subrKeywordUsage() {
|
||||
fmt.Println(
|
||||
`subr: used for defining a subroutine.
|
||||
|
||||
Example: 'subr s(k,v) { print k . " is " . v} call s("a", $a)'`)
|
||||
}
|
||||
|
||||
func teeKeywordUsage() {
|
||||
fmt.Println(
|
||||
`tee: prints the current record to specified file.
|
||||
This is an immediate print to the specified file (except for pprint format
|
||||
which of course waits until the end of the input stream to format all output).
|
||||
|
||||
The > and >> are for write and append, as in the shell, but (as with awk) the
|
||||
file-overwrite for > is on first write, not per record. The | is for piping to
|
||||
a process which will process the data. There will be one open file for each
|
||||
distinct file name (for > and >>) or one subordinate process for each distinct
|
||||
value of the piped-to command (for |). Output-formatting flags are taken from
|
||||
the main command line.
|
||||
|
||||
You can use any of the output-format command-line flags, e.g. --ocsv, --ofs,
|
||||
etc., to control the format of the output. See also mlr -h.
|
||||
|
||||
emit with redirect and tee with redirect are identical, except tee can only
|
||||
output $*.
|
||||
|
||||
Example: mlr --from f.dat put 'tee > "/tmp/data-".$a, $*'
|
||||
Example: mlr --from f.dat put 'tee >> "/tmp/data-".$a.$b, $*'
|
||||
Example: mlr --from f.dat put 'tee > stderr, $*'
|
||||
Example: mlr --from f.dat put -q 'tee | "tr \[a-z\\] \[A-Z\\]", $*'
|
||||
Example: mlr --from f.dat put -q 'tee | "tr \[a-z\\] \[A-Z\\] > /tmp/data-".$a, $*'
|
||||
Example: mlr --from f.dat put -q 'tee | "gzip > /tmp/data-".$a.".gz", $*'
|
||||
Example: mlr --from f.dat put -q --ojson 'tee | "gzip > /tmp/data-".$a.".gz", $*'`)
|
||||
}
|
||||
|
||||
func trueKeywordUsage() {
|
||||
fmt.Println(`true: the boolean literal value.`)
|
||||
}
|
||||
|
||||
func unsetKeywordUsage() {
|
||||
fmt.Println(
|
||||
`unset: clears field(s) from the current record, or an out-of-stream or local variable.
|
||||
|
||||
Example: mlr --from f.dat put 'unset $x'
|
||||
Example: mlr --from f.dat put 'unset $*'
|
||||
Example: mlr --from f.dat put 'for (k, v in $*) { if (k =~ "a.*") { unset $[k] } }'
|
||||
Example: mlr --from f.dat put '...; unset @sums'
|
||||
Example: mlr --from f.dat put '...; unset @sums["green"]'
|
||||
Example: mlr --from f.dat put '...; unset @*'`)
|
||||
}
|
||||
|
||||
func varKeywordUsage() {
|
||||
fmt.Println(
|
||||
`var: declares an untyped local variable in the current curly-braced scope.
|
||||
|
||||
Examples: 'var a=1', 'var xyz=""'`)
|
||||
}
|
||||
|
||||
func whileKeywordUsage() {
|
||||
fmt.Println(
|
||||
`while: introduces a while loop, or with "do", introduces a do-while loop.
|
||||
The body statements must be wrapped in curly braces.`)
|
||||
}
|
||||
|
||||
func ENVKeywordUsage() {
|
||||
fmt.Println(`ENV: access to environment variables by name, e.g. '$home = ENV["HOME"]'`)
|
||||
}
|
||||
|
||||
func FILENAMEKeywordUsage() {
|
||||
fmt.Println(`FILENAME: evaluates to the name of the current file being processed.`)
|
||||
}
|
||||
|
||||
func FILENUMKeywordUsage() {
|
||||
fmt.Println(
|
||||
`FILENUM: evaluates to the number of the current file being processed,
|
||||
starting with 1.`)
|
||||
}
|
||||
|
||||
func FNRKeywordUsage() {
|
||||
fmt.Println(
|
||||
`FNR: evaluates to the number of the current record within the current file
|
||||
being processed, starting with 1. Resets at the start of each file.`)
|
||||
}
|
||||
|
||||
func IFSKeywordUsage() {
|
||||
fmt.Println(`IFS: evaluates to the input field separator from the command line.`)
|
||||
}
|
||||
|
||||
func IPSKeywordUsage() {
|
||||
fmt.Println(`IPS: evaluates to the input pair separator from the command line.`)
|
||||
}
|
||||
|
||||
func IRSKeywordUsage() {
|
||||
fmt.Println(
|
||||
`IRS: evaluates to the input record separator from the command line,
|
||||
or to LF or CRLF from the input data if in autodetect mode (which is
|
||||
the default).`)
|
||||
}
|
||||
|
||||
func M_EKeywordUsage() {
|
||||
fmt.Println(`M_E: the mathematical constant e.`)
|
||||
}
|
||||
|
||||
func M_PIKeywordUsage() {
|
||||
fmt.Println(`M_PI: the mathematical constant pi.`)
|
||||
}
|
||||
|
||||
func NFKeywordUsage() {
|
||||
fmt.Println(`NF: evaluates to the number of fields in the current record.`)
|
||||
}
|
||||
|
||||
func NRKeywordUsage() {
|
||||
fmt.Println(
|
||||
`NR: evaluates to the number of the current record over all files
|
||||
being processed, starting with 1. Does not reset at the start of each file.`)
|
||||
}
|
||||
|
||||
func OFSKeywordUsage() {
|
||||
fmt.Println(`OFS: evaluates to the output field separator from the command line.`)
|
||||
}
|
||||
|
||||
func OPSKeywordUsage() {
|
||||
fmt.Println(`OPS: evaluates to the output pair separator from the command line.`)
|
||||
}
|
||||
|
||||
func ORSKeywordUsage() {
|
||||
fmt.Println(
|
||||
`ORS: evaluates to the output record separator from the command line,
|
||||
or to LF or CRLF from the input data if in autodetect mode (which is
|
||||
the default).`)
|
||||
}
|
||||
|
|
@ -17,7 +17,7 @@ import (
|
|||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/stream"
|
||||
"miller/src/transforming"
|
||||
"miller/src/transformers"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
|
@ -62,7 +62,7 @@ func Main() {
|
|||
|
||||
func processToStdout(
|
||||
options cliutil.TOptions,
|
||||
recordTransformers []transforming.IRecordTransformer,
|
||||
recordTransformers []transformers.IRecordTransformer,
|
||||
) {
|
||||
err := stream.Stream(options.FileNames, options, recordTransformers, os.Stdout, true)
|
||||
if err != nil {
|
||||
|
|
|
|||
6
go/src/lib/docurl.go
Normal file
6
go/src/lib/docurl.go
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
package lib
|
||||
|
||||
// Currently these are Miller 5 docs:
|
||||
//const DOC_URL = "https://miller.readthedocs.io/en/latest/"
|
||||
// Provisional docs until Miller 6 is released:
|
||||
const DOC_URL = "https://johnkerl.org/miller6"
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
This directory contains a single source file, `mlr.bnf`, which is the lexical/semantic grammar file for the Miller `put`/`filter` DSL using the GOCC framework. (In a classical Lex/Yacc framework, there would be separate `mlr.l` and `mlr.y` files; using GOCC, there is a single `mlr.bnf` file.)
|
||||
|
||||
All subdirectories of `src/parsing/` are autogen code created by GOCC's processing of `mlr.bnf`. They are nonetheless committed to source control, since running GOCC takes quite a bit longer than the `go build mlr.go` does, and the BNF file doesn't often change. See the top-level `miller/go` build scripts for how to rerun GOCC. As of this writing, it's `bin/gocc -o src/parsing src/parsing/mlr.bnf` as invoked from the `miller/go` base directory.
|
||||
|
||||
Making changes to `mlr.bnf` requires several minutes to re-run GOCC. For experimental changes, please see the [experiments](../../../experiments/dsl-parser) directory.
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import (
|
|||
"miller/src/input"
|
||||
"miller/src/lib"
|
||||
"miller/src/output"
|
||||
"miller/src/transforming"
|
||||
"miller/src/transformers"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
|
|
@ -33,7 +33,7 @@ func Stream(
|
|||
// which sends along only one file name per call to Stream():
|
||||
fileNames []string,
|
||||
options cliutil.TOptions,
|
||||
recordTransformers []transforming.IRecordTransformer,
|
||||
recordTransformers []transformers.IRecordTransformer,
|
||||
outputStream *os.File,
|
||||
outputIsStdout bool,
|
||||
) error {
|
||||
|
|
@ -68,7 +68,7 @@ func Stream(
|
|||
// error or end-of-processing happens.
|
||||
|
||||
go recordReader.Read(fileNames, *initialContext, inputChannel, errorChannel)
|
||||
go transforming.ChainTransformer(inputChannel, recordTransformers, outputChannel)
|
||||
go transformers.ChainTransformer(inputChannel, recordTransformers, outputChannel)
|
||||
go output.ChannelWriter(outputChannel, recordWriter, doneChannel, outputStream, outputIsStdout)
|
||||
|
||||
done := false
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
Logic for transforming input records into output records as requested by the user (sort, filter, etc.).
|
||||
Logic for transforming input records into output records as requested by the user (sort, filter, etc.).:
|
||||
|
||||
* `src/transforming` contains the abstract record-transformer interface datatype, as well as the Go-channel chaining mechanism for piping one transformer into the next.
|
||||
* `src/transformers` is all the concrete record-transformers such as `cat`, `tac`, `sort`, `put`, and so on. I put it here, not in `transforming`, so all files in `transformers` would be of the same type.
|
||||
* The `IRecordTransformer` abstract record-transformer interface datatype, as well as the `ChainTransformer` Go-channel chaining mechanism for piping one transformer into the next.
|
||||
* The transformer lookup table, used for Miller command-line parsing, verb construction, and online help.
|
||||
* All the concrete record-transformers such as `cat`, `tac`, `sort`, `put`, and so on.
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package transforming
|
||||
package transformers
|
||||
|
||||
import (
|
||||
"miller/src/types"
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package transforming
|
||||
package transformers
|
||||
|
||||
import (
|
||||
"os"
|
||||
130
go/src/transformers/aaa_transformer_table.go
Normal file
130
go/src/transformers/aaa_transformer_table.go
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
package transformers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"miller/src/lib"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{
|
||||
AltkvSetup,
|
||||
BarSetup,
|
||||
BootstrapSetup,
|
||||
CatSetup,
|
||||
CheckSetup,
|
||||
CleanWhitespaceSetup,
|
||||
CountDistinctSetup,
|
||||
CountSetup,
|
||||
CountSimilarSetup,
|
||||
CutSetup,
|
||||
DecimateSetup,
|
||||
FillDownSetup,
|
||||
FillEmptySetup,
|
||||
FilterSetup,
|
||||
FlattenSetup,
|
||||
FormatValuesSetup,
|
||||
FractionSetup,
|
||||
GapSetup,
|
||||
GrepSetup,
|
||||
GroupBySetup,
|
||||
GroupLikeSetup,
|
||||
HavingFieldsSetup,
|
||||
HeadSetup,
|
||||
HistogramSetup,
|
||||
JSONParseSetup,
|
||||
JSONStringifySetup,
|
||||
JoinSetup,
|
||||
LabelSetup,
|
||||
LeastFrequentSetup,
|
||||
MergeFieldsSetup,
|
||||
MostFrequentSetup,
|
||||
NestSetup,
|
||||
NothingSetup,
|
||||
PutSetup,
|
||||
RegularizeSetup,
|
||||
RemoveEmptyColumnsSetup,
|
||||
RenameSetup,
|
||||
ReorderSetup,
|
||||
RepeatSetup,
|
||||
ReshapeSetup,
|
||||
SampleSetup,
|
||||
Sec2GMTDateSetup,
|
||||
Sec2GMTSetup,
|
||||
SeqgenSetup,
|
||||
ShuffleSetup,
|
||||
SkipTrivialRecordsSetup,
|
||||
SortSetup,
|
||||
SortWithinRecordsSetup,
|
||||
Stats1Setup,
|
||||
Stats2Setup,
|
||||
StepSetup,
|
||||
TacSetup,
|
||||
TailSetup,
|
||||
TeeSetup,
|
||||
TopSetup,
|
||||
UnflattenSetup,
|
||||
UniqSetup,
|
||||
UnsparsifySetup,
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func LookUp(verb string) *TransformerSetup {
|
||||
for _, transformerSetup := range TRANSFORMER_LOOKUP_TABLE {
|
||||
if transformerSetup.Verb == verb {
|
||||
return &transformerSetup
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func ListVerbNamesVertically() {
|
||||
for _, transformerSetup := range TRANSFORMER_LOOKUP_TABLE {
|
||||
fmt.Printf("%s\n", transformerSetup.Verb)
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func ListVerbNamesAsParagraph() {
|
||||
separator := " "
|
||||
|
||||
separatorlen := len(separator)
|
||||
linelen := 0
|
||||
j := 0
|
||||
|
||||
for _, transformerSetup := range TRANSFORMER_LOOKUP_TABLE {
|
||||
verb := transformerSetup.Verb
|
||||
verblen := len(verb)
|
||||
linelen += separatorlen + verblen
|
||||
if linelen >= 80 {
|
||||
fmt.Printf("\n")
|
||||
linelen = separatorlen + verblen
|
||||
j = 0
|
||||
}
|
||||
if j > 0 {
|
||||
fmt.Print(separator)
|
||||
}
|
||||
fmt.Print(verb)
|
||||
j++
|
||||
}
|
||||
|
||||
fmt.Printf("\n")
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func UsageVerbs() {
|
||||
separator := "================================================================"
|
||||
|
||||
for i, transformerSetup := range TRANSFORMER_LOOKUP_TABLE {
|
||||
if i > 0 {
|
||||
fmt.Println()
|
||||
}
|
||||
fmt.Printf("%s\n", separator)
|
||||
lib.InternalCodingErrorIf(transformerSetup.UsageFunc == nil)
|
||||
transformerSetup.UsageFunc(os.Stdout, false, 0)
|
||||
}
|
||||
fmt.Printf("%s\n", separator)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
|
@ -8,14 +8,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameAltkv = "altkv"
|
||||
|
||||
var AltkvSetup = transforming.TransformerSetup{
|
||||
var AltkvSetup = TransformerSetup{
|
||||
Verb: verbNameAltkv,
|
||||
UsageFunc: transformerAltkvUsage,
|
||||
ParseCLIFunc: transformerAltkvParseCLI,
|
||||
|
|
@ -41,7 +40,7 @@ func transformerAltkvParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
|
|
@ -23,7 +22,7 @@ const barDefaultWidth = 40
|
|||
// ----------------------------------------------------------------
|
||||
const verbNameBar = "bar"
|
||||
|
||||
var BarSetup = transforming.TransformerSetup{
|
||||
var BarSetup = TransformerSetup{
|
||||
Verb: verbNameBar,
|
||||
UsageFunc: transformerBarUsage,
|
||||
ParseCLIFunc: transformerBarParseCLI,
|
||||
|
|
@ -62,7 +61,7 @@ func transformerBarParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -149,7 +148,7 @@ type TransformerBar struct {
|
|||
bars []string
|
||||
recordsForAutoMode *list.List
|
||||
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -8,14 +8,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameBootstrap = "bootstrap"
|
||||
|
||||
var BootstrapSetup = transforming.TransformerSetup{
|
||||
var BootstrapSetup = TransformerSetup{
|
||||
Verb: verbNameBootstrap,
|
||||
UsageFunc: transformerBootstrapUsage,
|
||||
ParseCLIFunc: transformerBootstrapParseCLI,
|
||||
|
|
@ -49,7 +48,7 @@ func transformerBootstrapParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameCat = "cat"
|
||||
|
||||
var CatSetup = transforming.TransformerSetup{
|
||||
var CatSetup = TransformerSetup{
|
||||
Verb: verbNameCat,
|
||||
UsageFunc: transformerCatUsage,
|
||||
ParseCLIFunc: transformerCatParseCLI,
|
||||
|
|
@ -44,7 +43,7 @@ func transformerCatParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -103,7 +102,7 @@ type TransformerCat struct {
|
|||
countsByGroup map[string]int
|
||||
counterFieldName string
|
||||
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameCheck = "check"
|
||||
|
||||
var CheckSetup = transforming.TransformerSetup{
|
||||
var CheckSetup = TransformerSetup{
|
||||
Verb: verbNameCheck,
|
||||
UsageFunc: transformerCheckUsage,
|
||||
ParseCLIFunc: transformerCheckParseCLI,
|
||||
|
|
@ -42,7 +41,7 @@ func transformerCheckParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameCleanWhitespace = "clean-whitespace"
|
||||
|
||||
var CleanWhitespaceSetup = transforming.TransformerSetup{
|
||||
var CleanWhitespaceSetup = TransformerSetup{
|
||||
Verb: verbNameCleanWhitespace,
|
||||
UsageFunc: transformerCleanWhitespaceUsage,
|
||||
ParseCLIFunc: transformerCleanWhitespaceParseCLI,
|
||||
|
|
@ -50,7 +49,7 @@ func transformerCleanWhitespaceParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
doKeys := true
|
||||
doValues := true
|
||||
|
|
@ -100,7 +99,7 @@ func transformerCleanWhitespaceParseCLI(
|
|||
|
||||
// ----------------------------------------------------------------
|
||||
type TransformerCleanWhitespace struct {
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -8,14 +8,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameCountSimilar = "count-similar"
|
||||
|
||||
var CountSimilarSetup = transforming.TransformerSetup{
|
||||
var CountSimilarSetup = TransformerSetup{
|
||||
Verb: verbNameCountSimilar,
|
||||
UsageFunc: transformerCountSimilarUsage,
|
||||
ParseCLIFunc: transformerCountSimilarParseCLI,
|
||||
|
|
@ -45,7 +44,7 @@ func transformerCountSimilarParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameCount = "count"
|
||||
|
||||
var CountSetup = transforming.TransformerSetup{
|
||||
var CountSetup = TransformerSetup{
|
||||
Verb: verbNameCount,
|
||||
UsageFunc: transformerCountUsage,
|
||||
ParseCLIFunc: transformerCountParseCLI,
|
||||
|
|
@ -46,7 +45,7 @@ func transformerCountParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -103,7 +102,7 @@ type TransformerCount struct {
|
|||
outputFieldName string
|
||||
|
||||
// state
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
ungroupedCount int
|
||||
// Example:
|
||||
// * Suppose group-by fields are a,b.
|
||||
|
|
|
|||
|
|
@ -8,14 +8,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameCut = "cut"
|
||||
|
||||
var CutSetup = transforming.TransformerSetup{
|
||||
var CutSetup = TransformerSetup{
|
||||
Verb: verbNameCut,
|
||||
UsageFunc: transformerCutUsage,
|
||||
ParseCLIFunc: transformerCutParseCLI,
|
||||
|
|
@ -56,7 +55,7 @@ func transformerCutParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -125,7 +124,7 @@ type TransformerCut struct {
|
|||
doComplement bool
|
||||
regexes []*regexp.Regexp
|
||||
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
func NewTransformerCut(
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameDecimate = "decimate"
|
||||
|
||||
var DecimateSetup = transforming.TransformerSetup{
|
||||
var DecimateSetup = TransformerSetup{
|
||||
Verb: verbNameDecimate,
|
||||
UsageFunc: transformerDecimateUsage,
|
||||
ParseCLIFunc: transformerDecimateParseCLI,
|
||||
|
|
@ -45,7 +44,7 @@ func transformerDecimateParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameFillDown = "fill-down"
|
||||
|
||||
var FillDownSetup = transforming.TransformerSetup{
|
||||
var FillDownSetup = TransformerSetup{
|
||||
Verb: verbNameFillDown,
|
||||
UsageFunc: transformerFillDownUsage,
|
||||
ParseCLIFunc: transformerFillDownParseCLI,
|
||||
|
|
@ -51,7 +50,7 @@ func transformerFillDownParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -117,7 +116,7 @@ type TransformerFillDown struct {
|
|||
// state
|
||||
lastNonNullValues map[string]*types.Mlrval
|
||||
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
func NewTransformerFillDown(
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
|
|
@ -15,7 +14,7 @@ import (
|
|||
const verbNameFillEmpty = "fill-empty"
|
||||
const defaultFillEmptyString = "N/A"
|
||||
|
||||
var FillEmptySetup = transforming.TransformerSetup{
|
||||
var FillEmptySetup = TransformerSetup{
|
||||
Verb: verbNameFillEmpty,
|
||||
UsageFunc: transformerFillEmptyUsage,
|
||||
ParseCLIFunc: transformerFillEmptyParseCLI,
|
||||
|
|
@ -42,7 +41,7 @@ func transformerFillEmptyParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameFlatten = "flatten"
|
||||
|
||||
var FlattenSetup = transforming.TransformerSetup{
|
||||
var FlattenSetup = TransformerSetup{
|
||||
Verb: verbNameFlatten,
|
||||
UsageFunc: transformerFlattenUsage,
|
||||
ParseCLIFunc: transformerFlattenParseCLI,
|
||||
|
|
@ -46,7 +45,7 @@ func transformerFlattenParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -97,7 +96,7 @@ type TransformerFlatten struct {
|
|||
fieldNameSet map[string]bool
|
||||
|
||||
// state
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
func NewTransformerFlatten(
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
|
|
@ -18,7 +17,7 @@ const defaultFormatValuesStringFormat = "%s"
|
|||
const defaultFormatValuesIntFormat = "%d"
|
||||
const defaultFormatValuesFloatFormat = "%f"
|
||||
|
||||
var FormatValuesSetup = transforming.TransformerSetup{
|
||||
var FormatValuesSetup = TransformerSetup{
|
||||
Verb: verbNameFormatValues,
|
||||
UsageFunc: transformerFormatValuesUsage,
|
||||
ParseCLIFunc: transformerFormatValuesParseCLI,
|
||||
|
|
@ -71,7 +70,7 @@ func transformerFormatValuesParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -8,14 +8,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameFraction = "fraction"
|
||||
|
||||
var FractionSetup = transforming.TransformerSetup{
|
||||
var FractionSetup = TransformerSetup{
|
||||
Verb: verbNameFraction,
|
||||
UsageFunc: transformerFractionUsage,
|
||||
ParseCLIFunc: transformerFractionParseCLI,
|
||||
|
|
@ -60,7 +59,7 @@ func transformerFractionParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameGap = "gap"
|
||||
|
||||
var GapSetup = transforming.TransformerSetup{
|
||||
var GapSetup = TransformerSetup{
|
||||
Verb: verbNameGap,
|
||||
UsageFunc: transformerGapUsage,
|
||||
ParseCLIFunc: transformerGapParseCLI,
|
||||
|
|
@ -47,7 +46,7 @@ func transformerGapParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -102,7 +101,7 @@ type TransformerGap struct {
|
|||
groupByFieldNames []string
|
||||
|
||||
// state
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
recordCount int
|
||||
previousGroupingKey string
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,14 +8,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameGrep = "grep"
|
||||
|
||||
var GrepSetup = transforming.TransformerSetup{
|
||||
var GrepSetup = TransformerSetup{
|
||||
Verb: verbNameGrep,
|
||||
UsageFunc: transformerGrepUsage,
|
||||
ParseCLIFunc: transformerGrepParseCLI,
|
||||
|
|
@ -58,7 +57,7 @@ func transformerGrepParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -8,14 +8,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameGroupBy = "group-by"
|
||||
|
||||
var GroupBySetup = transforming.TransformerSetup{
|
||||
var GroupBySetup = TransformerSetup{
|
||||
Verb: verbNameGroupBy,
|
||||
UsageFunc: transformerGroupByUsage,
|
||||
ParseCLIFunc: transformerGroupByParseCLI,
|
||||
|
|
@ -42,7 +41,7 @@ func transformerGroupByParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -8,14 +8,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameGroupLike = "group-like"
|
||||
|
||||
var GroupLikeSetup = transforming.TransformerSetup{
|
||||
var GroupLikeSetup = TransformerSetup{
|
||||
Verb: verbNameGroupLike,
|
||||
UsageFunc: transformerGroupLikeUsage,
|
||||
ParseCLIFunc: transformerGroupLikeParseCLI,
|
||||
|
|
@ -42,7 +41,7 @@ func transformerGroupLikeParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
|
|
@ -27,7 +26,7 @@ const (
|
|||
// ----------------------------------------------------------------
|
||||
const verbNameHavingFields = "having-fields"
|
||||
|
||||
var HavingFieldsSetup = transforming.TransformerSetup{
|
||||
var HavingFieldsSetup = TransformerSetup{
|
||||
Verb: verbNameHavingFields,
|
||||
UsageFunc: transformerHavingFieldsUsage,
|
||||
ParseCLIFunc: transformerHavingFieldsParseCLI,
|
||||
|
|
@ -68,7 +67,7 @@ func transformerHavingFieldsParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
havingFieldsCriterion := havingFieldsCriterionUnspecified
|
||||
var fieldNames []string = nil
|
||||
|
|
@ -153,7 +152,7 @@ type TransformerHavingFields struct {
|
|||
|
||||
regex *regexp.Regexp
|
||||
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameHead = "head"
|
||||
|
||||
var HeadSetup = transforming.TransformerSetup{
|
||||
var HeadSetup = TransformerSetup{
|
||||
Verb: verbNameHead,
|
||||
UsageFunc: transformerHeadUsage,
|
||||
ParseCLIFunc: transformerHeadParseCLI,
|
||||
|
|
@ -50,7 +49,7 @@ func transformerHeadParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -101,7 +100,7 @@ type TransformerHead struct {
|
|||
groupByFieldNames []string
|
||||
|
||||
// state
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
unkeyedRecordCount int
|
||||
keyedRecordCounts map[string]int
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameHistogram = "histogram"
|
||||
|
||||
var HistogramSetup = transforming.TransformerSetup{
|
||||
var HistogramSetup = TransformerSetup{
|
||||
Verb: verbNameHistogram,
|
||||
UsageFunc: transformerHistogramUsage,
|
||||
ParseCLIFunc: transformerHistogramParseCLI,
|
||||
|
|
@ -49,7 +48,7 @@ func transformerHistogramParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -140,7 +139,7 @@ type TransformerHistogram struct {
|
|||
vectorsByFieldName map[string][]float64 // For auto-mode
|
||||
outputPrefix string
|
||||
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -11,14 +11,13 @@ import (
|
|||
"miller/src/input"
|
||||
"miller/src/lib"
|
||||
"miller/src/transformers/utils"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameJoin = "join"
|
||||
|
||||
var JoinSetup = transforming.TransformerSetup{
|
||||
var JoinSetup = TransformerSetup{
|
||||
Verb: verbNameJoin,
|
||||
UsageFunc: transformerJoinUsage,
|
||||
ParseCLIFunc: transformerJoinParseCLI,
|
||||
|
|
@ -139,7 +138,7 @@ func transformerJoinParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
mainOptions *cliutil.TOptions, // Options for the right-files
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -281,7 +280,7 @@ type TransformerJoin struct {
|
|||
// For sorted/doubly-streaming input
|
||||
joinBucketKeeper *utils.JoinBucketKeeper
|
||||
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameJSONParse = "json-parse"
|
||||
|
||||
var JSONParseSetup = transforming.TransformerSetup{
|
||||
var JSONParseSetup = TransformerSetup{
|
||||
Verb: verbNameJSONParse,
|
||||
UsageFunc: transformerJSONParseUsage,
|
||||
ParseCLIFunc: transformerJSONParseParseCLI,
|
||||
|
|
@ -45,7 +44,7 @@ func transformerJSONParseParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -90,7 +89,7 @@ type TransformerJSONParse struct {
|
|||
fieldNameSet map[string]bool
|
||||
|
||||
// state
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
func NewTransformerJSONParse(
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameJSONStringify = "json-stringify"
|
||||
|
||||
var JSONStringifySetup = transforming.TransformerSetup{
|
||||
var JSONStringifySetup = TransformerSetup{
|
||||
Verb: verbNameJSONStringify,
|
||||
UsageFunc: transformerJSONStringifyUsage,
|
||||
ParseCLIFunc: transformerJSONStringifyParseCLI,
|
||||
|
|
@ -46,7 +45,7 @@ func transformerJSONStringifyParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -107,7 +106,7 @@ type TransformerJSONStringify struct {
|
|||
fieldNameSet map[string]bool
|
||||
|
||||
// state
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
func NewTransformerJSONStringify(
|
||||
|
|
|
|||
|
|
@ -8,14 +8,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameLabel = "label"
|
||||
|
||||
var LabelSetup = transforming.TransformerSetup{
|
||||
var LabelSetup = TransformerSetup{
|
||||
Verb: verbNameLabel,
|
||||
UsageFunc: transformerLabelUsage,
|
||||
ParseCLIFunc: transformerLabelParseCLI,
|
||||
|
|
@ -46,7 +45,7 @@ func transformerLabelParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -10,14 +10,13 @@ import (
|
|||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transformers/utils"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameMergeFields = "merge-fields"
|
||||
|
||||
var MergeFieldsSetup = transforming.TransformerSetup{
|
||||
var MergeFieldsSetup = TransformerSetup{
|
||||
Verb: verbNameMergeFields,
|
||||
UsageFunc: transformerMergeFieldsUsage,
|
||||
ParseCLIFunc: transformerMergeFieldsParseCLI,
|
||||
|
|
@ -83,7 +82,7 @@ func transformerMergeFieldsParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -235,7 +234,7 @@ type TransformerMergeFields struct {
|
|||
// Ordered map from accumulator name to accumulator
|
||||
namedAccumulators *lib.OrderedMap
|
||||
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
func NewTransformerMergeFields(
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
|
|
@ -19,14 +18,14 @@ const verbNameLeastFrequent = "least-frequent"
|
|||
const mostLeastFrequentDefaultMaxOutputLength = 10
|
||||
const mostLeastFrequentDefaultOutputFieldName = "count"
|
||||
|
||||
var MostFrequentSetup = transforming.TransformerSetup{
|
||||
var MostFrequentSetup = TransformerSetup{
|
||||
Verb: verbNameMostFrequent,
|
||||
UsageFunc: transformerMostFrequentUsage,
|
||||
ParseCLIFunc: transformerMostFrequentParseCLI,
|
||||
IgnoresInput: false,
|
||||
}
|
||||
|
||||
var LeastFrequentSetup = transforming.TransformerSetup{
|
||||
var LeastFrequentSetup = TransformerSetup{
|
||||
Verb: verbNameLeastFrequent,
|
||||
UsageFunc: transformerLeastFrequentUsage,
|
||||
ParseCLIFunc: transformerLeastFrequentParseCLI,
|
||||
|
|
@ -80,7 +79,7 @@ func transformerMostFrequentParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
return transformerMostOrLeastFrequentParseCLI(pargi, argc, args, true, transformerMostFrequentUsage)
|
||||
}
|
||||
|
||||
|
|
@ -89,7 +88,7 @@ func transformerLeastFrequentParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
return transformerMostOrLeastFrequentParseCLI(pargi, argc, args, false, transformerLeastFrequentUsage)
|
||||
}
|
||||
|
||||
|
|
@ -98,8 +97,8 @@ func transformerMostOrLeastFrequentParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
descending bool,
|
||||
usageFunc transforming.TransformerUsageFunc,
|
||||
) transforming.IRecordTransformer {
|
||||
usageFunc TransformerUsageFunc,
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -11,14 +11,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameNest = "nest"
|
||||
|
||||
var NestSetup = transforming.TransformerSetup{
|
||||
var NestSetup = TransformerSetup{
|
||||
Verb: verbNameNest,
|
||||
UsageFunc: transformerNestUsage,
|
||||
ParseCLIFunc: transformerNestParseCLI,
|
||||
|
|
@ -98,7 +97,7 @@ func transformerNestParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -226,7 +225,7 @@ type TransformerNest struct {
|
|||
// For implode across records
|
||||
otherKeysToOtherValuesToBuckets *lib.OrderedMap
|
||||
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameNothing = "nothing"
|
||||
|
||||
var NothingSetup = transforming.TransformerSetup{
|
||||
var NothingSetup = TransformerSetup{
|
||||
Verb: verbNameNothing,
|
||||
ParseCLIFunc: transformerNothingParseCLI,
|
||||
UsageFunc: transformerNothingUsage,
|
||||
|
|
@ -42,7 +41,7 @@ func transformerNothingParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -13,14 +13,13 @@ import (
|
|||
"miller/src/parsing/lexer"
|
||||
"miller/src/parsing/parser"
|
||||
"miller/src/runtime"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNamePut = "put"
|
||||
|
||||
var PutSetup = transforming.TransformerSetup{
|
||||
var PutSetup = TransformerSetup{
|
||||
Verb: verbNamePut,
|
||||
UsageFunc: transformerPutUsage,
|
||||
ParseCLIFunc: transformerPutOrFilterParseCLI,
|
||||
|
|
@ -29,7 +28,7 @@ var PutSetup = transforming.TransformerSetup{
|
|||
|
||||
const verbNameFilter = "filter"
|
||||
|
||||
var FilterSetup = transforming.TransformerSetup{
|
||||
var FilterSetup = TransformerSetup{
|
||||
Verb: verbNameFilter,
|
||||
UsageFunc: transformerFilterUsage,
|
||||
ParseCLIFunc: transformerPutOrFilterParseCLI,
|
||||
|
|
@ -125,7 +124,7 @@ func transformerPutOrFilterParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
mainOptions *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameRegularize = "regularize"
|
||||
|
||||
var RegularizeSetup = transforming.TransformerSetup{
|
||||
var RegularizeSetup = TransformerSetup{
|
||||
Verb: verbNameRegularize,
|
||||
UsageFunc: transformerRegularizeUsage,
|
||||
ParseCLIFunc: transformerRegularizeParseCLI,
|
||||
|
|
@ -41,7 +40,7 @@ func transformerRegularizeParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -8,14 +8,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameRemoveEmptyColumns = "remove-empty-columns"
|
||||
|
||||
var RemoveEmptyColumnsSetup = transforming.TransformerSetup{
|
||||
var RemoveEmptyColumnsSetup = TransformerSetup{
|
||||
Verb: verbNameRemoveEmptyColumns,
|
||||
UsageFunc: transformerRemoveEmptyColumnsUsage,
|
||||
ParseCLIFunc: transformerRemoveEmptyColumnsParseCLI,
|
||||
|
|
@ -42,7 +41,7 @@ func transformerRemoveEmptyColumnsParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -10,14 +10,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameRename = "rename"
|
||||
|
||||
var RenameSetup = transforming.TransformerSetup{
|
||||
var RenameSetup = TransformerSetup{
|
||||
Verb: verbNameRename,
|
||||
UsageFunc: transformerRenameUsage,
|
||||
ParseCLIFunc: transformerRenameParseCLI,
|
||||
|
|
@ -64,7 +63,7 @@ func transformerRenameParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -132,7 +131,7 @@ type TransformerRename struct {
|
|||
oldToNewNames *lib.OrderedMap
|
||||
regexesAndReplacements *list.List
|
||||
doGsub bool
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
func NewTransformerRename(
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameReorder = "reorder"
|
||||
|
||||
var ReorderSetup = transforming.TransformerSetup{
|
||||
var ReorderSetup = TransformerSetup{
|
||||
Verb: verbNameReorder,
|
||||
UsageFunc: transformerReorderUsage,
|
||||
ParseCLIFunc: transformerReorderParseCLI,
|
||||
|
|
@ -57,7 +56,7 @@ func transformerReorderParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -130,7 +129,7 @@ type TransformerReorder struct {
|
|||
afterFieldName string
|
||||
|
||||
// state
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
func NewTransformerReorder(
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
|
|
@ -22,7 +21,7 @@ const (
|
|||
// ----------------------------------------------------------------
|
||||
const verbNameRepeat = "repeat"
|
||||
|
||||
var RepeatSetup = transforming.TransformerSetup{
|
||||
var RepeatSetup = TransformerSetup{
|
||||
Verb: verbNameRepeat,
|
||||
UsageFunc: transformerRepeatUsage,
|
||||
ParseCLIFunc: transformerRepeatParseCLI,
|
||||
|
|
@ -71,7 +70,7 @@ func transformerRepeatParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
repeatCountSource := repeatCountSourceUnspecified
|
||||
repeatCount := 0
|
||||
|
|
@ -127,7 +126,7 @@ func transformerRepeatParseCLI(
|
|||
type TransformerRepeat struct {
|
||||
repeatCount int
|
||||
repeatCountFieldName string
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -35,14 +35,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameReshape = "reshape"
|
||||
|
||||
var ReshapeSetup = transforming.TransformerSetup{
|
||||
var ReshapeSetup = TransformerSetup{
|
||||
Verb: verbNameReshape,
|
||||
UsageFunc: transformerReshapeUsage,
|
||||
ParseCLIFunc: transformerReshapeParseCLI,
|
||||
|
|
@ -124,7 +123,7 @@ func transformerReshapeParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -219,7 +218,7 @@ type TransformerReshape struct {
|
|||
splitOutValueFieldName string
|
||||
otherKeysToOtherValuesToBuckets *lib.OrderedMap
|
||||
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameSample = "sample"
|
||||
|
||||
var SampleSetup = transforming.TransformerSetup{
|
||||
var SampleSetup = TransformerSetup{
|
||||
Verb: verbNameSample,
|
||||
UsageFunc: transformerSampleUsage,
|
||||
ParseCLIFunc: transformerSampleParseCLI,
|
||||
|
|
@ -46,7 +45,7 @@ func transformerSampleParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -6,14 +6,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameSec2GMT = "sec2gmt"
|
||||
|
||||
var Sec2GMTSetup = transforming.TransformerSetup{
|
||||
var Sec2GMTSetup = TransformerSetup{
|
||||
Verb: verbNameSec2GMT,
|
||||
UsageFunc: transformerSec2GMTUsage,
|
||||
ParseCLIFunc: transformerSec2GMTParseCLI,
|
||||
|
|
@ -49,7 +48,7 @@ func transformerSec2GMTParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -6,14 +6,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameSec2GMTDate = "sec2gmtdate"
|
||||
|
||||
var Sec2GMTDateSetup = transforming.TransformerSetup{
|
||||
var Sec2GMTDateSetup = TransformerSetup{
|
||||
Verb: verbNameSec2GMTDate,
|
||||
UsageFunc: transformerSec2GMTDateUsage,
|
||||
ParseCLIFunc: transformerSec2GMTDateParseCLI,
|
||||
|
|
@ -43,7 +42,7 @@ func transformerSec2GMTDateParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -8,14 +8,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameSeqgen = "seqgen"
|
||||
|
||||
var SeqgenSetup = transforming.TransformerSetup{
|
||||
var SeqgenSetup = TransformerSetup{
|
||||
Verb: verbNameSeqgen,
|
||||
UsageFunc: transformerSeqgenUsage,
|
||||
ParseCLIFunc: transformerSeqgenParseCLI,
|
||||
|
|
@ -53,7 +52,7 @@ func transformerSeqgenParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -8,14 +8,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameShuffle = "shuffle"
|
||||
|
||||
var ShuffleSetup = transforming.TransformerSetup{
|
||||
var ShuffleSetup = TransformerSetup{
|
||||
Verb: verbNameShuffle,
|
||||
UsageFunc: transformerShuffleUsage,
|
||||
ParseCLIFunc: transformerShuffleParseCLI,
|
||||
|
|
@ -45,7 +44,7 @@ func transformerShuffleParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameSkipTrivialRecords = "skip-trivial-records"
|
||||
|
||||
var SkipTrivialRecordsSetup = transforming.TransformerSetup{
|
||||
var SkipTrivialRecordsSetup = TransformerSetup{
|
||||
Verb: verbNameSkipTrivialRecords,
|
||||
UsageFunc: transformerSkipTrivialRecordsUsage,
|
||||
ParseCLIFunc: transformerSkipTrivialRecordsParseCLI,
|
||||
|
|
@ -42,7 +41,7 @@ func transformerSkipTrivialRecordsParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameSortWithinRecords = "sort-within-records"
|
||||
|
||||
var SortWithinRecordsSetup = transforming.TransformerSetup{
|
||||
var SortWithinRecordsSetup = TransformerSetup{
|
||||
Verb: verbNameSortWithinRecords,
|
||||
UsageFunc: transformerSortWithinRecordsUsage,
|
||||
ParseCLIFunc: transformerSortWithinRecordsParseCLI,
|
||||
|
|
@ -42,7 +41,7 @@ func transformerSortWithinRecordsParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
@ -82,7 +81,7 @@ func transformerSortWithinRecordsParseCLI(
|
|||
|
||||
// ----------------------------------------------------------------
|
||||
type TransformerSortWithinRecords struct {
|
||||
recordTransformerFunc transforming.RecordTransformerFunc
|
||||
recordTransformerFunc RecordTransformerFunc
|
||||
}
|
||||
|
||||
func NewTransformerSortWithinRecords(
|
||||
|
|
|
|||
|
|
@ -50,14 +50,13 @@ import (
|
|||
|
||||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameSort = "sort"
|
||||
|
||||
var SortSetup = transforming.TransformerSetup{
|
||||
var SortSetup = TransformerSetup{
|
||||
Verb: verbNameSort,
|
||||
UsageFunc: transformerSortUsage,
|
||||
ParseCLIFunc: transformerSortParseCLI,
|
||||
|
|
@ -99,7 +98,7 @@ func transformerSortParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
|
|
@ -9,14 +9,13 @@ import (
|
|||
"miller/src/cliutil"
|
||||
"miller/src/lib"
|
||||
"miller/src/transformers/utils"
|
||||
"miller/src/transforming"
|
||||
"miller/src/types"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
const verbNameStats1 = "stats1"
|
||||
|
||||
var Stats1Setup = transforming.TransformerSetup{
|
||||
var Stats1Setup = TransformerSetup{
|
||||
Verb: verbNameStats1,
|
||||
UsageFunc: transformerStats1Usage,
|
||||
ParseCLIFunc: transformerStats1ParseCLI,
|
||||
|
|
@ -87,7 +86,7 @@ func transformerStats1ParseCLI(
|
|||
argc int,
|
||||
args []string,
|
||||
_ *cliutil.TOptions,
|
||||
) transforming.IRecordTransformer {
|
||||
) IRecordTransformer {
|
||||
|
||||
// Skip the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue