mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
Support comments in mlr -s files (#1359)
* Support comments in `mlr -s` files * doc mods * artifacts from `make dev` * neaten
This commit is contained in:
parent
c1572f4787
commit
8b22708c27
13 changed files with 99 additions and 55 deletions
|
|
@ -160,11 +160,11 @@ CITRUS COUNTY 1332.9 79974.9 483785.1
|
|||
<b> stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012</b>
|
||||
</pre>
|
||||
<pre class="pre-non-highlight-in-pair">
|
||||
tiv_2011_tiv_2012_corr 0.9730497632351692
|
||||
tiv_2011_tiv_2012_ols_m 0.9835583980337723
|
||||
tiv_2011_tiv_2012_ols_b 433854.6428968317
|
||||
tiv_2011_tiv_2012_corr 0.9730497632351701
|
||||
tiv_2011_tiv_2012_ols_m 0.9835583980337732
|
||||
tiv_2011_tiv_2012_ols_b 433854.6428968301
|
||||
tiv_2011_tiv_2012_ols_n 36634
|
||||
tiv_2011_tiv_2012_r2 0.9468258417320189
|
||||
tiv_2011_tiv_2012_r2 0.9468258417320204
|
||||
</pre>
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
|
|
@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
|
|||
</pre>
|
||||
<pre class="pre-non-highlight-in-pair">
|
||||
u_v_corr w_x_corr
|
||||
0.1334180491027861 -0.011319841199866178
|
||||
0.1334180491027861 -0.011319841199852926
|
||||
</pre>
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
|
|
@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
|
|||
</pre>
|
||||
<pre class="pre-non-highlight-in-pair">
|
||||
color shape u_v_corr w_x_corr
|
||||
red circle 0.9807984401887236 -0.01856553658708754
|
||||
orange square 0.17685855992752927 -0.07104431573806054
|
||||
green circle 0.05764419437577255 0.01179572988801509
|
||||
red square 0.05574477124893523 -0.0006801456507510942
|
||||
yellow triangle 0.04457273771962798 0.024604310103081825
|
||||
yellow square 0.04379172927296089 -0.04462197201631237
|
||||
purple circle 0.03587354936895086 0.1341133954140899
|
||||
blue square 0.03241153095761164 -0.053507648119643196
|
||||
blue triangle 0.015356427073158766 -0.0006089997461435399
|
||||
orange circle 0.010518953877704048 -0.16279397329279383
|
||||
red triangle 0.00809782571528034 0.012486621357942596
|
||||
purple triangle 0.005155190909099334 -0.045057909256220656
|
||||
purple square -0.025680276963377404 0.05769429647930396
|
||||
green square -0.0257760734502851 -0.003265173252087127
|
||||
orange triangle -0.030456661186085785 -0.1318699981926352
|
||||
yellow circle -0.06477331572781474 0.07369449819706045
|
||||
blue circle -0.10234761901929677 -0.030528539069837757
|
||||
green triangle -0.10901825107358765 -0.04848782060162929
|
||||
red circle 0.9807984401887242 -0.018565536587084836
|
||||
orange square 0.17685855992752933 -0.07104431573805543
|
||||
green circle 0.05764419437577257 0.011795729888018455
|
||||
red square 0.0557447712489348 -0.0006801456507506415
|
||||
yellow triangle 0.0445727377196281 0.024604310103079844
|
||||
yellow square 0.0437917292729612 -0.044621972016306265
|
||||
purple circle 0.03587354936895115 0.13411339541407613
|
||||
blue square 0.03241153095761152 -0.05350764811965621
|
||||
blue triangle 0.015356427073158612 -0.0006089997461408209
|
||||
orange circle 0.010518953877704181 -0.1627939732927932
|
||||
red triangle 0.00809782571528054 0.01248662135795501
|
||||
purple triangle 0.005155190909099739 -0.04505790925621933
|
||||
purple square -0.02568027696337717 0.057694296479293694
|
||||
green square -0.025776073450284875 -0.0032651732520739014
|
||||
orange triangle -0.030456661186085584 -0.13186999819263814
|
||||
yellow circle -0.06477331572781515 0.0736944981970553
|
||||
blue circle -0.1023476190192966 -0.030528539069839333
|
||||
green triangle -0.10901825107358747 -0.04848782060162855
|
||||
</pre>
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
#!/usr/bin/env mlr -s
|
||||
--c2p
|
||||
filter '$quantity != 20'
|
||||
filter '$quantity != 20' # Here is a comment
|
||||
then count-distinct -f shape
|
||||
then fraction -f count
|
||||
|
|
|
|||
|
|
@ -572,6 +572,11 @@ MILLER(1) MILLER(1)
|
|||
to be modified, except when input is from `tail -f`.
|
||||
See also
|
||||
https://miller.readthedocs.io/en/latest/reference-main-flag-list/.
|
||||
--s-no-comment-strip {file name}
|
||||
Take command-line flags from file name, like -s, but
|
||||
with no comment-stripping. For more information
|
||||
please see
|
||||
https://miller.readthedocs.io/en/latest/scripting/.
|
||||
--seed {n} with `n` of the form `12345678` or `0xcafefeed`. For
|
||||
`put`/`filter` `urand`, `urandint`, and `urand32`.
|
||||
--tz {timezone} Specify timezone, overriding `$TZ` environment
|
||||
|
|
@ -3434,5 +3439,5 @@ MILLER(1) MILLER(1)
|
|||
|
||||
|
||||
|
||||
2023-08-09 MILLER(1)
|
||||
2023-08-19 MILLER(1)
|
||||
</pre>
|
||||
|
|
|
|||
|
|
@ -551,6 +551,11 @@ MILLER(1) MILLER(1)
|
|||
to be modified, except when input is from `tail -f`.
|
||||
See also
|
||||
https://miller.readthedocs.io/en/latest/reference-main-flag-list/.
|
||||
--s-no-comment-strip {file name}
|
||||
Take command-line flags from file name, like -s, but
|
||||
with no comment-stripping. For more information
|
||||
please see
|
||||
https://miller.readthedocs.io/en/latest/scripting/.
|
||||
--seed {n} with `n` of the form `12345678` or `0xcafefeed`. For
|
||||
`put`/`filter` `urand`, `urandint`, and `urand32`.
|
||||
--tz {timezone} Specify timezone, overriding `$TZ` environment
|
||||
|
|
@ -3413,4 +3418,4 @@ MILLER(1) MILLER(1)
|
|||
|
||||
|
||||
|
||||
2023-08-09 MILLER(1)
|
||||
2023-08-19 MILLER(1)
|
||||
|
|
|
|||
|
|
@ -281,6 +281,7 @@ These are flags which don't fit into any other category.
|
|||
* `--ofmtf {n}`: Use --ofmtf 6 as shorthand for --ofmt %.6f, etc.
|
||||
* `--ofmtg {n}`: Use --ofmtg 6 as shorthand for --ofmt %.6g, etc.
|
||||
* `--records-per-batch {n}`: This is an internal parameter for maximum number of records in a batch size. Normally this does not need to be modified, except when input is from `tail -f`. See also https://miller.readthedocs.io/en/latest/reference-main-flag-list/.
|
||||
* `--s-no-comment-strip {file name}`: Take command-line flags from file name, like -s, but with no comment-stripping. For more information please see https://miller.readthedocs.io/en/latest/scripting/.
|
||||
* `--seed {n}`: with `n` of the form `12345678` or `0xcafefeed`. For `put`/`filter` `urand`, `urandint`, and `urand32`.
|
||||
* `--tz {timezone}`: Specify timezone, overriding `$TZ` environment variable (if any).
|
||||
* `-I`: Process files in-place. For each file name on the command line, output is written to a temp file in the same directory, which is then renamed over the original. Each file is processed in isolation: if the output format is CSV, CSV headers will be present in each output file, statistics are only over each file's own records; and so on.
|
||||
|
|
|
|||
|
|
@ -3307,14 +3307,14 @@ fields, optionally categorized by one or more fields.
|
|||
<b> data/medium</b>
|
||||
</pre>
|
||||
<pre class="pre-non-highlight-in-pair">
|
||||
x_y_cov 0.000042574820827444476
|
||||
x_y_corr 0.0005042001844467462
|
||||
y_y_cov 0.08461122467974003
|
||||
x_y_cov 0.00004257482082749404
|
||||
x_y_corr 0.0005042001844473328
|
||||
y_y_cov 0.08461122467974005
|
||||
y_y_corr 1
|
||||
x2_xy_cov 0.04188382281779374
|
||||
x2_xy_corr 0.630174342037994
|
||||
x2_y2_cov -0.00030953725962542085
|
||||
x2_y2_corr -0.0034249088761121966
|
||||
x2_xy_cov 0.041883822817793716
|
||||
x2_xy_corr 0.6301743420379936
|
||||
x2_y2_cov -0.0003095372596253918
|
||||
x2_y2_corr -0.003424908876111875
|
||||
</pre>
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
|
|
@ -3323,12 +3323,12 @@ x2_y2_corr -0.0034249088761121966
|
|||
<b> data/medium</b>
|
||||
</pre>
|
||||
<pre class="pre-non-highlight-in-pair">
|
||||
a x_y_ols_m x_y_ols_b x_y_ols_n x_y_r2 y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m xy_y2_ols_b xy_y2_ols_n xy_y2_r2
|
||||
pan 0.01702551273681908 0.5004028922897639 2081 0.00028691820445814767 1 0 2081 1 0.8781320866715662 0.11908230147563566 2081 0.41749827377311266
|
||||
eks 0.0407804923685586 0.48140207967651016 1965 0.0016461239223448587 1 0 1965 1 0.8978728611690183 0.10734054433612333 1965 0.45563223864254526
|
||||
wye -0.03915349075204814 0.5255096523974456 1966 0.0015051268704373607 1 0 1966 1 0.8538317334220835 0.1267454301662969 1966 0.38991721818599295
|
||||
zee 0.0027812364960399147 0.5043070448033061 2047 0.000007751652858786137 1 0 2047 1 0.8524439912011013 0.12401684308018937 2047 0.39356598090006495
|
||||
hat -0.018620577041095078 0.5179005397264935 1941 0.0003520036646055585 1 0 1941 1 0.8412305086345014 0.13557328318623216 1941 0.3687944261732265
|
||||
a x_y_ols_m x_y_ols_b x_y_ols_n x_y_r2 y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m xy_y2_ols_b xy_y2_ols_n xy_y2_r2
|
||||
pan 0.017025512736819345 0.500402892289764 2081 0.00028691820445815624 1 -0.00000000000000002890430283104539 2081 1 0.8781320866715664 0.11908230147563569 2081 0.4174982737731127
|
||||
eks 0.04078049236855813 0.4814020796765104 1965 0.0016461239223448218 1 0.00000000000000017862676354313703 1965 1 0.897872861169018 0.1073405443361234 1965 0.4556322386425451
|
||||
wye -0.03915349075204785 0.5255096523974457 1966 0.0015051268704373377 1 0.00000000000000004464425401127647 1966 1 0.8538317334220837 0.1267454301662969 1966 0.3899172181859931
|
||||
zee 0.0027812364960401333 0.5043070448033061 2047 0.000007751652858787357 1 0.00000000000000004819404567023685 2047 1 0.8524439912011011 0.12401684308018947 2047 0.39356598090006495
|
||||
hat -0.018620577041095272 0.5179005397264937 1941 0.00035200366460556604 1 -0.00000000000000003400445761787692 1941 1 0.8412305086345017 0.13557328318623207 1941 0.3687944261732266
|
||||
</pre>
|
||||
|
||||
Here's an example simple line-fit. The `x` and `y`
|
||||
|
|
@ -3414,11 +3414,11 @@ upsec_count_pca_quality 0.9999590846136102
|
|||
donesec 92.33051350964094
|
||||
|
||||
color purple
|
||||
upsec_count_pca_m -39.03009744795354
|
||||
upsec_count_pca_b 979.9883413064914
|
||||
upsec_count_pca_m -39.030097447953594
|
||||
upsec_count_pca_b 979.9883413064917
|
||||
upsec_count_pca_n 21
|
||||
upsec_count_pca_quality 0.9999908956206317
|
||||
donesec 25.10852919630297
|
||||
donesec 25.108529196302943
|
||||
</pre>
|
||||
|
||||
## step
|
||||
|
|
@ -3646,9 +3646,9 @@ distinct_count 5 5 10000 10000 10000
|
|||
mode pan wye 1 0.3467901443380824 0.7268028627434533
|
||||
sum 0 0 50005000 4986.019681679581 5062.057444929905
|
||||
mean - - 5000.5 0.49860196816795804 0.5062057444929905
|
||||
stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933
|
||||
var - - 8334166.666666667 0.08426974433144456 0.08461122467974003
|
||||
skewness - - 0 -0.0006899591185521965 -0.017849760120133784
|
||||
stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331
|
||||
var - - 8334166.666666667 0.08426974433144457 0.08461122467974005
|
||||
skewness - - 0 -0.0006899591185517494 -0.01784976012013298
|
||||
minlen 3 3 1 15 13
|
||||
maxlen 3 3 5 22 22
|
||||
min eks eks 1 0.00004509679127584487 0.00008818962627266114
|
||||
|
|
|
|||
|
|
@ -137,7 +137,7 @@ Here instead of putting `#!/bin/bash` on the first line, we can put `mlr` direct
|
|||
<pre class="pre-non-highlight-in-pair">
|
||||
#!/usr/bin/env mlr -s
|
||||
--c2p
|
||||
filter '$quantity != 20'
|
||||
filter '$quantity != 20' # Here is a comment
|
||||
then count-distinct -f shape
|
||||
then fraction -f count
|
||||
</pre>
|
||||
|
|
@ -149,6 +149,7 @@ Points:
|
|||
* You leave off the initial `mlr` since that's present on line 1.
|
||||
* You don't need all the backslashing for line-continuations.
|
||||
* You don't need the explicit `--` or `"$@"`.
|
||||
* All text from `#` to end of line is stripped out. If for any reason you need to suppress this, please use `mlr --s-no-comment-strip` in place of `mlr -s`.
|
||||
|
||||
Then you can do
|
||||
|
||||
|
|
|
|||
|
|
@ -67,6 +67,7 @@ Points:
|
|||
* You leave off the initial `mlr` since that's present on line 1.
|
||||
* You don't need all the backslashing for line-continuations.
|
||||
* You don't need the explicit `--` or `"$@"`.
|
||||
* All text from `#` to end of line is stripped out. If for any reason you need to suppress this, please use `mlr --s-no-comment-strip` in place of `mlr -s`.
|
||||
|
||||
Then you can do
|
||||
|
||||
|
|
|
|||
|
|
@ -598,8 +598,8 @@ hat pan 0.4643355557376876
|
|||
x_count 10000
|
||||
x_sum 4986.019681679581
|
||||
x_mean 0.49860196816795804
|
||||
x_var 0.08426974433144456
|
||||
x_stddev 0.2902925151144007
|
||||
x_var 0.08426974433144457
|
||||
x_stddev 0.29029251511440074
|
||||
</pre>
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
|
|
|
|||
|
|
@ -2988,5 +2988,16 @@ has its own overhead.`,
|
|||
*pargi += 2
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "--s-no-comment-strip",
|
||||
arg: "{file name}",
|
||||
help: `Take command-line flags from file name, like -s, but with no comment-stripping. For more information please see ` +
|
||||
lib.DOC_URL + `/en/latest/scripting/.`,
|
||||
parser: func(args []string, argc int, pargi *int, options *TOptions) {
|
||||
// Already handled in main(). Nothing to do here except to accept this as valid syntax.
|
||||
*pargi += 2
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package climain
|
|||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/johnkerl/miller/internal/pkg/lib"
|
||||
|
|
@ -25,10 +26,16 @@ import (
|
|||
// * This is how shebang lines work
|
||||
// * There are Miller verbs with -s flags and we don't want to disrupt their behavior.
|
||||
func maybeInterpolateDashS(args []string) ([]string, error) {
|
||||
stripComments := true
|
||||
|
||||
if len(args) < 2 {
|
||||
return args, nil
|
||||
}
|
||||
if args[1] != "-s" { // Normal case
|
||||
if args[1] == "-s" {
|
||||
stripComments = true
|
||||
} else if args[1] == "--s-no-comment-strip" {
|
||||
stripComments = false
|
||||
} else { // Normal case
|
||||
return args, nil
|
||||
}
|
||||
if len(args) < 3 {
|
||||
|
|
@ -59,9 +66,12 @@ func maybeInterpolateDashS(args []string) ([]string, error) {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: maybe support comment lines deeper within the script-file.
|
||||
// Make sure they're /^[\s]+#/ since we don't want to disrupt a "#" within
|
||||
// strings which are not actually comment characters.
|
||||
if stripComments {
|
||||
re := regexp.MustCompile(`#.*`)
|
||||
for i, _ := range lines {
|
||||
lines[i] = re.ReplaceAllString(lines[i], "")
|
||||
}
|
||||
}
|
||||
|
||||
// Re-join lines to strings, and pass off to a shell-parser to split into
|
||||
// an args[]-style array.
|
||||
|
|
|
|||
|
|
@ -551,6 +551,11 @@ MILLER(1) MILLER(1)
|
|||
to be modified, except when input is from `tail -f`.
|
||||
See also
|
||||
https://miller.readthedocs.io/en/latest/reference-main-flag-list/.
|
||||
--s-no-comment-strip {file name}
|
||||
Take command-line flags from file name, like -s, but
|
||||
with no comment-stripping. For more information
|
||||
please see
|
||||
https://miller.readthedocs.io/en/latest/scripting/.
|
||||
--seed {n} with `n` of the form `12345678` or `0xcafefeed`. For
|
||||
`put`/`filter` `urand`, `urandint`, and `urand32`.
|
||||
--tz {timezone} Specify timezone, overriding `$TZ` environment
|
||||
|
|
@ -3413,4 +3418,4 @@ MILLER(1) MILLER(1)
|
|||
|
||||
|
||||
|
||||
2023-08-09 MILLER(1)
|
||||
2023-08-19 MILLER(1)
|
||||
|
|
|
|||
|
|
@ -2,12 +2,12 @@
|
|||
.\" Title: mlr
|
||||
.\" Author: [see the "AUTHOR" section]
|
||||
.\" Generator: ./mkman.rb
|
||||
.\" Date: 2023-08-09
|
||||
.\" Date: 2023-08-19
|
||||
.\" Manual: \ \&
|
||||
.\" Source: \ \&
|
||||
.\" Language: English
|
||||
.\"
|
||||
.TH "MILLER" "1" "2023-08-09" "\ \&" "\ \&"
|
||||
.TH "MILLER" "1" "2023-08-19" "\ \&" "\ \&"
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * Portability definitions
|
||||
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
|
@ -670,6 +670,11 @@ These are flags which don't fit into any other category.
|
|||
to be modified, except when input is from `tail -f`.
|
||||
See also
|
||||
https://miller.readthedocs.io/en/latest/reference-main-flag-list/.
|
||||
--s-no-comment-strip {file name}
|
||||
Take command-line flags from file name, like -s, but
|
||||
with no comment-stripping. For more information
|
||||
please see
|
||||
https://miller.readthedocs.io/en/latest/scripting/.
|
||||
--seed {n} with `n` of the form `12345678` or `0xcafefeed`. For
|
||||
`put`/`filter` `urand`, `urandint`, and `urand32`.
|
||||
--tz {timezone} Specify timezone, overriding `$TZ` environment
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue