diff --git a/docs6/src/file-formats.md b/docs6/src/file-formats.md index b9d5c9f0b..1bbbccd72 100644 --- a/docs6/src/file-formats.md +++ b/docs6/src/file-formats.md @@ -553,6 +553,7 @@ Essentials: mlr help file-formats Flags: mlr help flags + mlr help list-separator-aliases mlr help comments-in-data-flags mlr help compressed-data-flags mlr help csv-only-flags @@ -628,6 +629,7 @@ Essentials: mlr help file-formats Flags: mlr help flags + mlr help list-separator-aliases mlr help comments-in-data-flags mlr help compressed-data-flags mlr help csv-only-flags diff --git a/docs6/src/manpage.md b/docs6/src/manpage.md index e5b4e9de1..9f7740b47 100644 --- a/docs6/src/manpage.md +++ b/docs6/src/manpage.md @@ -131,6 +131,7 @@ HELP OPTIONS mlr help file-formats Flags: mlr help flags + mlr help list-separator-aliases mlr help comments-in-data-flags mlr help compressed-data-flags mlr help csv-only-flags @@ -626,6 +627,17 @@ SEPARATOR FLAGS - C-style escape sequences, e.g. `--rs '\r\n' --fs '\t'`. - To avoid backslashing, you can use any of the following names: + ascii_esc = "\x1b" + ascii_etx = "\x04" + ascii_fs = "\x1c" + ascii_gs = "\x1d" + ascii_null = "\x01" + ascii_rs = "\x1e" + ascii_soh = "\x02" + ascii_stx = "\x03" + ascii_us = "\x1f" + asv_fs = "\x1f" + asv_rs = "\x1e" colon = ":" comma = "," cr = "\r" @@ -640,7 +652,12 @@ SEPARATOR FLAGS semicolon = ";" slash = "/" space = " " + spaces = "( )+" tab = "\t" + tabs = "(\t)+" + usv_fs = "\xe2\x90\x9f" + usv_rs = "\xe2\x90\x9e" + whitespace = "([ \t])+" * Default separators by format: @@ -2741,5 +2758,5 @@ SEE ALSO - 2021-09-20 MILLER(1) + 2021-09-21 MILLER(1) diff --git a/docs6/src/manpage.txt b/docs6/src/manpage.txt index 0f36b976b..e448fe191 100644 --- a/docs6/src/manpage.txt +++ b/docs6/src/manpage.txt @@ -110,6 +110,7 @@ HELP OPTIONS mlr help file-formats Flags: mlr help flags + mlr help list-separator-aliases mlr help comments-in-data-flags mlr help compressed-data-flags mlr help csv-only-flags @@ -605,6 +606,17 @@ SEPARATOR FLAGS - C-style escape sequences, e.g. `--rs '\r\n' --fs '\t'`. - To avoid backslashing, you can use any of the following names: + ascii_esc = "\x1b" + ascii_etx = "\x04" + ascii_fs = "\x1c" + ascii_gs = "\x1d" + ascii_null = "\x01" + ascii_rs = "\x1e" + ascii_soh = "\x02" + ascii_stx = "\x03" + ascii_us = "\x1f" + asv_fs = "\x1f" + asv_rs = "\x1e" colon = ":" comma = "," cr = "\r" @@ -619,7 +631,12 @@ SEPARATOR FLAGS semicolon = ";" slash = "/" space = " " + spaces = "( )+" tab = "\t" + tabs = "(\t)+" + usv_fs = "\xe2\x90\x9f" + usv_rs = "\xe2\x90\x9e" + whitespace = "([ \t])+" * Default separators by format: @@ -2720,4 +2737,4 @@ SEE ALSO - 2021-09-20 MILLER(1) + 2021-09-21 MILLER(1) diff --git a/docs6/src/new-in-miller-6.md b/docs6/src/new-in-miller-6.md index 22fada1f8..59ac53188 100644 --- a/docs6/src/new-in-miller-6.md +++ b/docs6/src/new-in-miller-6.md @@ -137,6 +137,15 @@ For example (see [https://github.com/johnkerl/miller/issues/178](https://github. Miller now has a read-evaluate-print-loop ([REPL](repl.md)) where you can single-step through your data-file record, express arbitrary statements to converse with the data, etc. +## Regex support for IFS and IPS + +You can now split fields on whitespace when whitespace is a mix of tabs and +spaces. As well, you can use regular expressions for the input field-separator +and the input pair-separator. Please see the section on +[multi-character and regular-expression separators](reference-main-separators.md#multi-character-and-regular-expression-separators). + +In particular, for NIDX format, the default IFS now allows splitting on one or more of space or tab. + ## Case-folded sorting options The [sort](reference-verbs.md#sort) verb now accepts `-c` and `-cr` options for case-folded ascending/descending sort, respetively. diff --git a/docs6/src/new-in-miller-6.md.in b/docs6/src/new-in-miller-6.md.in index 0922fe815..0e479ffb7 100644 --- a/docs6/src/new-in-miller-6.md.in +++ b/docs6/src/new-in-miller-6.md.in @@ -97,6 +97,15 @@ GENMD_EOF Miller now has a read-evaluate-print-loop ([REPL](repl.md)) where you can single-step through your data-file record, express arbitrary statements to converse with the data, etc. +## Regex support for IFS and IPS + +You can now split fields on whitespace when whitespace is a mix of tabs and +spaces. As well, you can use regular expressions for the input field-separator +and the input pair-separator. Please see the section on +[multi-character and regular-expression separators](reference-main-separators.md#multi-character-and-regular-expression-separators). + +In particular, for NIDX format, the default IFS now allows splitting on one or more of space or tab. + ## Case-folded sorting options The [sort](reference-verbs.md#sort) verb now accepts `-c` and `-cr` options for case-folded ascending/descending sort, respetively. diff --git a/docs6/src/online-help.md b/docs6/src/online-help.md index ad660aecc..d6137bb19 100644 --- a/docs6/src/online-help.md +++ b/docs6/src/online-help.md @@ -49,6 +49,7 @@ Essentials: mlr help file-formats Flags: mlr help flags + mlr help list-separator-aliases mlr help comments-in-data-flags mlr help compressed-data-flags mlr help csv-only-flags @@ -102,6 +103,7 @@ Essentials: mlr help file-formats Flags: mlr help flags + mlr help list-separator-aliases mlr help comments-in-data-flags mlr help compressed-data-flags mlr help csv-only-flags diff --git a/docs6/src/proofreads.txt b/docs6/src/proofreads.txt index 0fd6da871..438e50d0d 100644 --- a/docs6/src/proofreads.txt +++ b/docs6/src/proofreads.txt @@ -4,8 +4,13 @@ ? twi-dm re all-contribs: all-contributors.org * nikos materials -> fold in -C! repifs !! https://pkg.go.dev/regexp#Regexp.Split 2-for-1 -- get regexp as well ? - - make table at seps.md.in, ffmt down vs XS across +c force CSV IFS single-char at CLIP? or CSV setup? IFS1? +C need multi-IRS reader for ASV & explicit CRLF (if supported); also need no-traiing-lf handling + +? look at: + regtest/cases-pending-go-port/dsl-output-redirects/0071/cmd + regtest/cases-pending-go-port/dsl-redirects/0004/cmd wtf no diff? + regtest/cases-pending-go-port/dsl-redirects/0010/cmd jvstack ? * r-strings branch! C stats1 --fr @@ -20,6 +25,8 @@ e fzf-ish w/ head -n 4, --from, up-arrow & append verb, then cat -- find & updat https://github.com/johnkerl/miller/issues/77#issuecomment-538553828 c! seps \001 etc ! + mlrc --iasv --oxtab cat regtest/input/example.asv + mlr --iasv --oxtab cat regtest/input/example.asv mlrc --iusv --oxtab cat regtest/input/example.usv mlr --iusv --oxtab cat regtest/input/example.usv @@ -27,6 +34,9 @@ o check for determinism regtest/cases/verb-join-prepipe/0003/cmd * UT for https://github.com/johnkerl/miller/issues/653 ---------------------------------------------------------------- +headerless CSV: separate page +e nidx, --implicit-csv-header, --headerless-csv-output; surver miller/issues ... +e make a --headerless-csv-input alias record-heterogeneity: l link-to's: diff --git a/docs6/src/record-heterogeneity.md b/docs6/src/record-heterogeneity.md index 5243782c8..752a70e46 100644 --- a/docs6/src/record-heterogeneity.md +++ b/docs6/src/record-heterogeneity.md @@ -138,8 +138,9 @@ keys from the header line together with the values from each data line, the second record has a missing value for key `c` (which ought to be fillable), while the third record has a value `10` with no key for it. -Using the `--allow-ragged-csv-input` option we can fill values in too-short -rows, and provide a key (column number starting with 1) for too-long rows: +Using the [`--allow-ragged-csv-input` flag](reference-main-flag-list.md#csv-only-flags) +we can fill values in too-short rows, and provide a key (column number starting +with 1) for too-long rows:
mlr --icsv --ojson --allow-ragged-csv-input cat data/het/ragged.csv
diff --git a/docs6/src/reference-main-flag-list.md b/docs6/src/reference-main-flag-list.md
index cbe109965..f0b76b650 100644
--- a/docs6/src/reference-main-flag-list.md
+++ b/docs6/src/reference-main-flag-list.md
@@ -489,6 +489,17 @@ Notes about all other separators:
- C-style escape sequences, e.g. `--rs '\r\n' --fs '\t'`.
- To avoid backslashing, you can use any of the following names:
+ ascii_esc = "\x1b"
+ ascii_etx = "\x04"
+ ascii_fs = "\x1c"
+ ascii_gs = "\x1d"
+ ascii_null = "\x01"
+ ascii_rs = "\x1e"
+ ascii_soh = "\x02"
+ ascii_stx = "\x03"
+ ascii_us = "\x1f"
+ asv_fs = "\x1f"
+ asv_rs = "\x1e"
colon = ":"
comma = ","
cr = "\r"
@@ -503,7 +514,12 @@ Notes about all other separators:
semicolon = ";"
slash = "/"
space = " "
+ spaces = "( )+"
tab = "\t"
+ tabs = "(\t)+"
+ usv_fs = "\xe2\x90\x9f"
+ usv_rs = "\xe2\x90\x9e"
+ whitespace = "([ \t])+"
* Default separators by format:
diff --git a/docs6/src/reference-main-separators.md b/docs6/src/reference-main-separators.md
index b3c0a6e30..d46a8ba8f 100644
--- a/docs6/src/reference-main-separators.md
+++ b/docs6/src/reference-main-separators.md
@@ -50,8 +50,9 @@ part of the JSON specification.
## Input and output separators
-Miller lets you use the same separators for input and output, or, to change
-them between input and output, if you wish to transform your data in that way.
+Miller lets you use the same separators for input and output (e.g. CSV input,
+CSV output), or, to change them between input and output (e.g. CSV input, JSON
+output), if you wish to transform your data in that way.
Miller uses the names `IRS` and `ORS` for the input and output record
separators, `IFS` and `OFS` for the input and output field separators, and
@@ -59,6 +60,14 @@ separators, `IFS` and `OFS` for the input and output field separators, and
For example:
+
+cat data/a.dkvp
+
+
+a=1,b=2,c=3
+a=4,b=5,c=6
+
+
mlr --ifs , --ofs ';' --ips = --ops : cut -o -f c,a,b data/a.dkvp
@@ -71,6 +80,14 @@ If your data has non-default separators and you don't want to change those
between input and output, you can use `--rs`, `--fs`, and `--ps`. Setting `--fs
:` is the same as setting `--ifs : --ofs :`, but with fewer keystrokes.
+
+cat data/modsep.dkvp
+
+
+a:1;b:2;c:3
+a:4;b:5;c:6
+
+
mlr --fs ';' --ps : cut -o -f c,a,b data/modsep.dkvp
@@ -79,7 +96,7 @@ c:3;a:1;b:2
c:6;a:4;b:5
-## Multi-character separators
+## Multi-character and regular-expression separators
The separators default to single characters, but can be multiple characters if you like:
@@ -91,16 +108,17 @@ c:=3;;;a:=1;;;b:=2
c:=6;;;a:=4;;;b:=5
-While the separators can be multiple characters, [regular
-expressions](reference-main-regular-expressions.md) (which Miller supports in
-many ways) are not (as of mid-2021) supported by Miller. So, in the above
-example, you can say the field-separator is one semicolon, or three, but two or
-four won't be recognized using `--ifs ';;;'`.
+As of September 2021:
-To fill this need, in the absence of full regular-expression support, Miller
-has a `--repifs` option for input. This means, for example, using `--ifs
-' ' --repifs` you can have the field separator be one _or more_ spaces. (Mixes
-of spaces and tabs, however, won't be recognized as a separator.)
+* `IFS` and `IPS` can be regular expressions.
+* `IRS` must be a single character (nominally `\n`).
+* `OFS`, `OPS`, and `ORS` can be multi-character.
+
+Since `IFS` and `IPS` can be regular expressions, if your data has field
+separators which are one or more consecutive spaces, you can use `--ifs '(
+)+'`. But that gets a little tedious, so Miller has the `--repifs` and
+`--repips` flags you can use if you like. This wraps the `IFS` or `IPS`, say
+`X`, as `(X)+`.
The `--repifs` flag means that multiple successive occurrences of the field
separator count as one. For example, in CSV data we often signify nulls by
@@ -120,31 +138,19 @@ see by the dawn's
early light what so
-(TODO: FIXME)
-
mlr --ifs ' ' --repifs --inidx --oxtab cat data/extra-spaces.txt
1 oh -2 -3 -4 -5 say -6 -7 -8 can -9 you +2 say +3 can +4 you 1 see -2 -3 -4 by -5 -6 -7 -8 the -9 dawn's +2 by +3 the +4 dawn's 1 early 2 light @@ -152,6 +158,51 @@ early light what so 4 so+## Aliases + +Many things we'd like to write as separators need to be escaped from the shell +-- e.g. `--ifs ';'` or `--ofs '|'`, and so on. You can use the following if you like: + +
+mlr help list-separator-aliases ++
+ascii_esc = "\x1b" +ascii_etx = "\x04" +ascii_fs = "\x1c" +ascii_gs = "\x1d" +ascii_null = "\x01" +ascii_rs = "\x1e" +ascii_soh = "\x02" +ascii_stx = "\x03" +ascii_us = "\x1f" +asv_fs = "\x1f" +asv_rs = "\x1e" +colon = ":" +comma = "," +cr = "\r" +crcr = "\r\r" +crlf = "\r\n" +crlfcrlf = "\r\n\r\n" +equals = "=" +lf = "\n" +lflf = "\n\n" +newline = "\n" +pipe = "|" +semicolon = ";" +slash = "/" +space = " " +spaces = "( )+" +tab = "\t" +tabs = "(\t)+" +usv_fs = "\xe2\x90\x9f" +usv_rs = "\xe2\x90\x9e" +whitespace = "([ \t])+" ++ +Note that `spaces`, `tabs`, and `whitespace` already are regexes so you +shouldn't use `--repifs` with them. + ## Command-line flags Given the above, we now have seen the following flags: @@ -162,15 +213,13 @@ Given the above, we now have seen the following flags: --ps --ips --ops -Also note that you can use names for certain characters: e.g. `--fs space` is -the same as `--fs ' '`. A full list is: `colon`, `comma`, `equals`, `newline`, -`pipe`, `semicolon`, `slash`, `space`, `tab`. +See also the [separator-flags section](reference-main-flag-list.md#separator-flags). ## DSL built-in variables Miller exposes for you read-only [built-in variables](reference-dsl-variables.md#built-in-variables) with names `IRS`, `ORS`, `IFS`, `OFS`, `IPS`, and `OPS`. Unlike in AWK, you can't set these in begin-blocks -- -their values indicate what you set at the command line -- so their use is limited. +their values indicate what you specified at the command line -- so their use is limited.
mlr --ifs , --ofs ';' --ips = --ops : --from data/a.dkvp put '$d = ">>>" . IFS . "|||" . OFS . "<<<"'
@@ -182,21 +231,22 @@ a:4;b:5;c:6;d:>>>,|||;<<<
## Which separators apply to which file formats
-TODO:
+Notes:
* If CSV field separator is tab, we have TSV; see more examples (ASV, USV, etc.) at in the [CSV section](file-formats.md#csvtsvasvusvetc).
* JSON: ignores all separator flags from the command line.
+* Headerless CSV overlaps quite a bit with NIDX format using comma for IFS. See also the page on [CSV with and without headers](csv-with-and-without-headers.md).
| | **RS** | **FS** | **PS** |
|------------|---------|---------|----------|
-| **CSV** | Default `\n` * | Default `,` | None |
-| **TSV** | Default `\n` * | Default `\t` | None |
-| **JSON** | N/A; records are between `{` and `}` | `,` but not alterable | `:` but not alterable |
-| **DKVP** | Default `\n` | Default `,` | Default `=` |
-| **NIDX** | Default `\n` | Default space | None |
-| **XTAB** | `\n\n` ** | `\n` * | Space with repeats |
-| **PPRINT** | Default `\n` * | Space with repeats | None |
-| **Markdown** | `\n` * but not alterable | One or more spaces then `|` then one or more spaces | None |
+| [**CSV and CSV-lite**](file-formats.md#csvtsvasvusvetc) | Default `\n` * | Default `,` | None |
+| [**TSV**](file-formats.md#csvtsvasvusvetc) | Default `\n` * | Default `\t` | None |
+| [**JSON**](file-formats.md#json) | N/A; records are between `{` and `}` | `,` but not alterable | `:` but not alterable |
+| [**DKVP**](file-formats.md#dkvp-key-value-pairs) | Default `\n` | Default `,` | Default `=` |
+| [**NIDX**](file-formats.md#nidx-index-numbered-toolkit-style) | Default `\n` | Default space | None |
+| [**XTAB**](file-formats.md#xtab-vertical-tabular) | `\n\n` ** | `\n` * | Space with repeats |
+| [**PPRINT**](file-formats.md#pprint-pretty-printed-tabular) | Default `\n` * | Space with repeats | None |
+| [**Markdown**](file-formats.md#markdown-tabular) | `\n` * but not alterable | One or more spaces then `|` then one or more spaces; not alterable | None |
\* or `\r\n` on Windows
diff --git a/docs6/src/reference-main-separators.md.in b/docs6/src/reference-main-separators.md.in
index e1e39a031..91cb6d398 100644
--- a/docs6/src/reference-main-separators.md.in
+++ b/docs6/src/reference-main-separators.md.in
@@ -30,8 +30,9 @@ part of the JSON specification.
## Input and output separators
-Miller lets you use the same separators for input and output, or, to change
-them between input and output, if you wish to transform your data in that way.
+Miller lets you use the same separators for input and output (e.g. CSV input,
+CSV output), or, to change them between input and output (e.g. CSV input, JSON
+output), if you wish to transform your data in that way.
Miller uses the names `IRS` and `ORS` for the input and output record
separators, `IFS` and `OFS` for the input and output field separators, and
@@ -39,6 +40,10 @@ separators, `IFS` and `OFS` for the input and output field separators, and
For example:
+GENMD_RUN_COMMAND
+cat data/a.dkvp
+GENMD_EOF
+
GENMD_RUN_COMMAND
mlr --ifs , --ofs ';' --ips = --ops : cut -o -f c,a,b data/a.dkvp
GENMD_EOF
@@ -47,11 +52,15 @@ If your data has non-default separators and you don't want to change those
between input and output, you can use `--rs`, `--fs`, and `--ps`. Setting `--fs
:` is the same as setting `--ifs : --ofs :`, but with fewer keystrokes.
+GENMD_RUN_COMMAND
+cat data/modsep.dkvp
+GENMD_EOF
+
GENMD_RUN_COMMAND
mlr --fs ';' --ps : cut -o -f c,a,b data/modsep.dkvp
GENMD_EOF
-## Multi-character separators
+## Multi-character and regular-expression separators
The separators default to single characters, but can be multiple characters if you like:
@@ -59,16 +68,17 @@ GENMD_RUN_COMMAND
mlr --ifs ';' --ips : --ofs ';;;' --ops := cut -o -f c,a,b data/modsep.dkvp
GENMD_EOF
-While the separators can be multiple characters, [regular
-expressions](reference-main-regular-expressions.md) (which Miller supports in
-many ways) are not (as of mid-2021) supported by Miller. So, in the above
-example, you can say the field-separator is one semicolon, or three, but two or
-four won't be recognized using `--ifs ';;;'`.
+As of September 2021:
-To fill this need, in the absence of full regular-expression support, Miller
-has a `--repifs` option for input. This means, for example, using `--ifs
-' ' --repifs` you can have the field separator be one _or more_ spaces. (Mixes
-of spaces and tabs, however, won't be recognized as a separator.)
+* `IFS` and `IPS` can be regular expressions.
+* `IRS` must be a single character (nominally `\n`).
+* `OFS`, `OPS`, and `ORS` can be multi-character.
+
+Since `IFS` and `IPS` can be regular expressions, if your data has field
+separators which are one or more consecutive spaces, you can use `--ifs '(
+)+'`. But that gets a little tedious, so Miller has the `--repifs` and
+`--repips` flags you can use if you like. This wraps the `IFS` or `IPS`, say
+`X`, as `(X)+`.
The `--repifs` flag means that multiple successive occurrences of the field
separator count as one. For example, in CSV data we often signify nulls by
@@ -83,12 +93,22 @@ GENMD_RUN_COMMAND
cat data/extra-spaces.txt
GENMD_EOF
-(TODO: FIXME)
-
GENMD_RUN_COMMAND
mlr --ifs ' ' --repifs --inidx --oxtab cat data/extra-spaces.txt
GENMD_EOF
+## Aliases
+
+Many things we'd like to write as separators need to be escaped from the shell
+-- e.g. `--ifs ';'` or `--ofs '|'`, and so on. You can use the following if you like:
+
+GENMD_RUN_COMMAND
+mlr help list-separator-aliases
+GENMD_EOF
+
+Note that `spaces`, `tabs`, and `whitespace` already are regexes so you
+shouldn't use `--repifs` with them.
+
## Command-line flags
Given the above, we now have seen the following flags:
@@ -99,15 +119,13 @@ GENMD_CARDIFY
--ps --ips --ops
GENMD_EOF
-Also note that you can use names for certain characters: e.g. `--fs space` is
-the same as `--fs ' '`. A full list is: `colon`, `comma`, `equals`, `newline`,
-`pipe`, `semicolon`, `slash`, `space`, `tab`.
+See also the [separator-flags section](reference-main-flag-list.md#separator-flags).
## DSL built-in variables
Miller exposes for you read-only [built-in variables](reference-dsl-variables.md#built-in-variables) with
names `IRS`, `ORS`, `IFS`, `OFS`, `IPS`, and `OPS`. Unlike in AWK, you can't set these in begin-blocks --
-their values indicate what you set at the command line -- so their use is limited.
+their values indicate what you specified at the command line -- so their use is limited.
GENMD_RUN_COMMAND
mlr --ifs , --ofs ';' --ips = --ops : --from data/a.dkvp put '$d = ">>>" . IFS . "|||" . OFS . "<<<"'
@@ -115,21 +133,22 @@ GENMD_EOF
## Which separators apply to which file formats
-TODO:
+Notes:
* If CSV field separator is tab, we have TSV; see more examples (ASV, USV, etc.) at in the [CSV section](file-formats.md#csvtsvasvusvetc).
* JSON: ignores all separator flags from the command line.
+* Headerless CSV overlaps quite a bit with NIDX format using comma for IFS. See also the page on [CSV with and without headers](csv-with-and-without-headers.md).
| | **RS** | **FS** | **PS** |
|------------|---------|---------|----------|
-| **CSV** | Default `\n` * | Default `,` | None |
-| **TSV** | Default `\n` * | Default `\t` | None |
-| **JSON** | N/A; records are between `{` and `}` | `,` but not alterable | `:` but not alterable |
-| **DKVP** | Default `\n` | Default `,` | Default `=` |
-| **NIDX** | Default `\n` | Default space | None |
-| **XTAB** | `\n\n` ** | `\n` * | Space with repeats |
-| **PPRINT** | Default `\n` * | Space with repeats | None |
-| **Markdown** | `\n` * but not alterable | One or more spaces then `|` then one or more spaces | None |
+| [**CSV and CSV-lite**](file-formats.md#csvtsvasvusvetc) | Default `\n` * | Default `,` | None |
+| [**TSV**](file-formats.md#csvtsvasvusvetc) | Default `\n` * | Default `\t` | None |
+| [**JSON**](file-formats.md#json) | N/A; records are between `{` and `}` | `,` but not alterable | `:` but not alterable |
+| [**DKVP**](file-formats.md#dkvp-key-value-pairs) | Default `\n` | Default `,` | Default `=` |
+| [**NIDX**](file-formats.md#nidx-index-numbered-toolkit-style) | Default `\n` | Default space | None |
+| [**XTAB**](file-formats.md#xtab-vertical-tabular) | `\n\n` ** | `\n` * | Space with repeats |
+| [**PPRINT**](file-formats.md#pprint-pretty-printed-tabular) | Default `\n` * | Space with repeats | None |
+| [**Markdown**](file-formats.md#markdown-tabular) | `\n` * but not alterable | One or more spaces then `|` then one or more spaces; not alterable | None |
\* or `\r\n` on Windows
diff --git a/go/regtest/cases-pending-go-port/io-rfc-csv/0017/cmd b/go/regtest/cases-pending-go-port/io-rfc-csv/0017/cmd
new file mode 100644
index 000000000..23b7e071d
--- /dev/null
+++ b/go/regtest/cases-pending-go-port/io-rfc-csv/0017/cmd
@@ -0,0 +1 @@
+mlr --iasv --oxtab cat regtest/input/example.asv
diff --git a/go/regtest/cases-pending-go-port/mix-null-field-input/0018/experr b/go/regtest/cases-pending-go-port/io-rfc-csv/0017/experr
similarity index 100%
rename from go/regtest/cases-pending-go-port/mix-null-field-input/0018/experr
rename to go/regtest/cases-pending-go-port/io-rfc-csv/0017/experr
diff --git a/go/regtest/cases-pending-go-port/io-rfc-csv/0017/expout b/go/regtest/cases-pending-go-port/io-rfc-csv/0017/expout
new file mode 100644
index 000000000..eab299c2e
--- /dev/null
+++ b/go/regtest/cases-pending-go-port/io-rfc-csv/0017/expout
@@ -0,0 +1,7 @@
+a d
+b e
+c f
+
+a g
+b h
+c i
diff --git a/go/regtest/cases-pending-go-port/mix-null-field-input/0018/expout b/go/regtest/cases-pending-go-port/mix-null-field-input/0018/expout
deleted file mode 100644
index 923fbfaf4..000000000
--- a/go/regtest/cases-pending-go-port/mix-null-field-input/0018/expout
+++ /dev/null
@@ -1,2 +0,0 @@
-a b
-x_y_cov 2.000000
diff --git a/go/regtest/cases/io-separator-aliases/0001/cmd b/go/regtest/cases/io-separator-aliases/0001/cmd
new file mode 100644
index 000000000..155640c60
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0001/cmd
@@ -0,0 +1 @@
+mlr --from ${CASEDIR}/input --n2j cat
diff --git a/go/regtest/cases/io-separator-aliases/0001/experr b/go/regtest/cases/io-separator-aliases/0001/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/go/regtest/cases/io-separator-aliases/0001/expout b/go/regtest/cases/io-separator-aliases/0001/expout
new file mode 100644
index 000000000..1b55d791d
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0001/expout
@@ -0,0 +1,20 @@
+{
+ "1": "a",
+ "2": "b",
+ "3": "c"
+}
+{
+ "1": "a",
+ "2": "b",
+ "3": "c"
+}
+{
+ "1": "a",
+ "2": "b",
+ "3": "c"
+}
+{
+ "1": "a",
+ "2": "b",
+ "3": "c"
+}
diff --git a/go/regtest/cases/io-separator-aliases/0001/input b/go/regtest/cases/io-separator-aliases/0001/input
new file mode 100644
index 000000000..aa98b4674
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0001/input
@@ -0,0 +1,4 @@
+a b c
+a b c
+a b c
+a b c
diff --git a/go/regtest/cases/io-separator-aliases/0002/cmd b/go/regtest/cases/io-separator-aliases/0002/cmd
new file mode 100644
index 000000000..2cac50b1b
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0002/cmd
@@ -0,0 +1 @@
+mlr --from ${CASEDIR}/input --n2j --ifs space cat
diff --git a/go/regtest/cases/io-separator-aliases/0002/experr b/go/regtest/cases/io-separator-aliases/0002/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/go/regtest/cases/io-separator-aliases/0002/expout b/go/regtest/cases/io-separator-aliases/0002/expout
new file mode 100644
index 000000000..58c249a18
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0002/expout
@@ -0,0 +1,23 @@
+{
+ "1": "a\tb",
+ "2": "c"
+}
+{
+ "1": "a",
+ "2": "b\tc"
+}
+{
+ "1": "a\t\t\tb",
+ "2": "",
+ "3": "",
+ "4": "",
+ "5": "c"
+}
+{
+ "1": "a\t\t",
+ "2": "\tb",
+ "3": "",
+ "4": "\t",
+ "5": "",
+ "6": "c"
+}
diff --git a/go/regtest/cases/io-separator-aliases/0002/input b/go/regtest/cases/io-separator-aliases/0002/input
new file mode 100644
index 000000000..aa98b4674
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0002/input
@@ -0,0 +1,4 @@
+a b c
+a b c
+a b c
+a b c
diff --git a/go/regtest/cases/io-separator-aliases/0003/cmd b/go/regtest/cases/io-separator-aliases/0003/cmd
new file mode 100644
index 000000000..32f9f822e
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0003/cmd
@@ -0,0 +1 @@
+mlr --from ${CASEDIR}/input --n2j --ifs spaces cat
diff --git a/go/regtest/cases/io-separator-aliases/0003/experr b/go/regtest/cases/io-separator-aliases/0003/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/go/regtest/cases/io-separator-aliases/0003/expout b/go/regtest/cases/io-separator-aliases/0003/expout
new file mode 100644
index 000000000..f8b2c1be5
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0003/expout
@@ -0,0 +1,18 @@
+{
+ "1": "a\tb",
+ "2": "c"
+}
+{
+ "1": "a",
+ "2": "b\tc"
+}
+{
+ "1": "a\t\t\tb",
+ "2": "c"
+}
+{
+ "1": "a\t\t",
+ "2": "\tb",
+ "3": "\t",
+ "4": "c"
+}
diff --git a/go/regtest/cases/io-separator-aliases/0003/input b/go/regtest/cases/io-separator-aliases/0003/input
new file mode 100644
index 000000000..aa98b4674
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0003/input
@@ -0,0 +1,4 @@
+a b c
+a b c
+a b c
+a b c
diff --git a/go/regtest/cases/io-separator-aliases/0004/cmd b/go/regtest/cases/io-separator-aliases/0004/cmd
new file mode 100644
index 000000000..1181c5456
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0004/cmd
@@ -0,0 +1 @@
+mlr --from ${CASEDIR}/input --n2j --ifs tab cat
diff --git a/go/regtest/cases/io-separator-aliases/0004/experr b/go/regtest/cases/io-separator-aliases/0004/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/go/regtest/cases/io-separator-aliases/0004/expout b/go/regtest/cases/io-separator-aliases/0004/expout
new file mode 100644
index 000000000..25703f986
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0004/expout
@@ -0,0 +1,21 @@
+{
+ "1": "a",
+ "2": "b c"
+}
+{
+ "1": "a b",
+ "2": "c"
+}
+{
+ "1": "a",
+ "2": "",
+ "3": "",
+ "4": "b c"
+}
+{
+ "1": "a",
+ "2": "",
+ "3": " ",
+ "4": "b ",
+ "5": " c"
+}
diff --git a/go/regtest/cases/io-separator-aliases/0004/input b/go/regtest/cases/io-separator-aliases/0004/input
new file mode 100644
index 000000000..aa98b4674
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0004/input
@@ -0,0 +1,4 @@
+a b c
+a b c
+a b c
+a b c
diff --git a/go/regtest/cases/io-separator-aliases/0005/cmd b/go/regtest/cases/io-separator-aliases/0005/cmd
new file mode 100644
index 000000000..e8a1ca17d
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0005/cmd
@@ -0,0 +1 @@
+mlr --from ${CASEDIR}/input --n2j --ifs tabs cat
diff --git a/go/regtest/cases/io-separator-aliases/0005/experr b/go/regtest/cases/io-separator-aliases/0005/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/go/regtest/cases/io-separator-aliases/0005/expout b/go/regtest/cases/io-separator-aliases/0005/expout
new file mode 100644
index 000000000..dca9b8a13
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0005/expout
@@ -0,0 +1,18 @@
+{
+ "1": "a",
+ "2": "b c"
+}
+{
+ "1": "a b",
+ "2": "c"
+}
+{
+ "1": "a",
+ "2": "b c"
+}
+{
+ "1": "a",
+ "2": " ",
+ "3": "b ",
+ "4": " c"
+}
diff --git a/go/regtest/cases/io-separator-aliases/0005/input b/go/regtest/cases/io-separator-aliases/0005/input
new file mode 100644
index 000000000..aa98b4674
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0005/input
@@ -0,0 +1,4 @@
+a b c
+a b c
+a b c
+a b c
diff --git a/go/regtest/cases/io-separator-aliases/0006/cmd b/go/regtest/cases/io-separator-aliases/0006/cmd
new file mode 100644
index 000000000..74823216a
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0006/cmd
@@ -0,0 +1 @@
+mlr --from ${CASEDIR}/input --n2j --ifs whitespace cat
diff --git a/go/regtest/cases/io-separator-aliases/0006/experr b/go/regtest/cases/io-separator-aliases/0006/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/go/regtest/cases/io-separator-aliases/0006/expout b/go/regtest/cases/io-separator-aliases/0006/expout
new file mode 100644
index 000000000..1b55d791d
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0006/expout
@@ -0,0 +1,20 @@
+{
+ "1": "a",
+ "2": "b",
+ "3": "c"
+}
+{
+ "1": "a",
+ "2": "b",
+ "3": "c"
+}
+{
+ "1": "a",
+ "2": "b",
+ "3": "c"
+}
+{
+ "1": "a",
+ "2": "b",
+ "3": "c"
+}
diff --git a/go/regtest/cases/io-separator-aliases/0006/input b/go/regtest/cases/io-separator-aliases/0006/input
new file mode 100644
index 000000000..aa98b4674
--- /dev/null
+++ b/go/regtest/cases/io-separator-aliases/0006/input
@@ -0,0 +1,4 @@
+a b c
+a b c
+a b c
+a b c
diff --git a/go/regtest/cases-pending-go-port/mix-null-field-input/0018/cmd b/go/regtest/cases/mix-null-field-input/0018/cmd
similarity index 100%
rename from go/regtest/cases-pending-go-port/mix-null-field-input/0018/cmd
rename to go/regtest/cases/mix-null-field-input/0018/cmd
diff --git a/go/regtest/cases/mix-null-field-input/0018/experr b/go/regtest/cases/mix-null-field-input/0018/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/go/regtest/cases/mix-null-field-input/0018/expout b/go/regtest/cases/mix-null-field-input/0018/expout
new file mode 100644
index 000000000..1b392ed31
--- /dev/null
+++ b/go/regtest/cases/mix-null-field-input/0018/expout
@@ -0,0 +1,2 @@
+a b
+x_y_cov 2
diff --git a/go/regtest/input/example.asv b/go/regtest/input/example.asv
new file mode 100644
index 000000000..c0cc707fd
--- /dev/null
+++ b/go/regtest/input/example.asv
@@ -0,0 +1 @@
+abcdefghi
\ No newline at end of file
diff --git a/go/src/auxents/help/entry.go b/go/src/auxents/help/entry.go
index 19ac4ef30..300ee7add 100644
--- a/go/src/auxents/help/entry.go
+++ b/go/src/auxents/help/entry.go
@@ -71,6 +71,7 @@ func init() {
name: "Flags",
handlerInfos: []tHandlerInfo{
{name: "flags", zaryHandlerFunc: showFlagHelp},
+ {name: "list-separator-aliases", zaryHandlerFunc: listSeparatorAliases},
// Per-section entries will be computed and installed below
},
},
@@ -292,6 +293,10 @@ func showFlagHelp() {
cli.FLAG_TABLE.ShowHelp()
}
+func listSeparatorAliases() {
+ cli.ListSeparatorAliasesForOnlineHelp()
+}
+
// ----------------------------------------------------------------
func helpAuxents() {
fmt.Print(`Miller has a few otherwise-standalone executables packaged within it.
diff --git a/go/src/auxents/repl/entry.go b/go/src/auxents/repl/entry.go
index 50d9a6e43..6ab0ad29c 100644
--- a/go/src/auxents/repl/entry.go
+++ b/go/src/auxents/repl/entry.go
@@ -144,8 +144,8 @@ func ReplMain(args []string) int {
}
}
- cli.ApplyReaderOptionDefaults(&options.ReaderOptions)
- cli.ApplyWriterOptionDefaults(&options.WriterOptions)
+ cli.FinalizeReaderOptions(&options.ReaderOptions)
+ cli.FinalizeWriterOptions(&options.WriterOptions)
// --auto-flatten is on by default. But if input and output formats are both JSON,
// then we don't need to actually do anything. See also mlrcli_parse.go.
diff --git a/go/src/cli/flag_types.go b/go/src/cli/flag_types.go
index d02a7b025..a4a9cdba2 100644
--- a/go/src/cli/flag_types.go
+++ b/go/src/cli/flag_types.go
@@ -50,14 +50,11 @@ import (
// Data types used within the flags table.
// FlagParser is a function which takes a flag such as `--foo`.
-//
// * It should assume that a flag.Owns method has already been invoked to be
// sure that this function is indeed the right one to call for `--foo`.
-//
// * The FlagParser function is responsible for advancing *pargi by 1 (if
// `--foo`) or 2 (if `--foo bar`), checking to see if argc is long enough in
// the latter case, and mutating the options struct.
-//
// * Successful handling of the flag is indicated by this function making a
// non-zero increment of *pargi.
type FlagParser func(
@@ -68,6 +65,7 @@ type FlagParser func(
)
// ----------------------------------------------------------------
+
// FlagTable holds all the flags for Miller, organized into sections.
type FlagTable struct {
sections []*FlagSection
diff --git a/go/src/cli/mlrcli_util.go b/go/src/cli/mlrcli_util.go
index 7c38e796c..f0bc1063f 100644
--- a/go/src/cli/mlrcli_util.go
+++ b/go/src/cli/mlrcli_util.go
@@ -3,12 +3,10 @@ package cli
import (
"fmt"
"os"
-
- "mlr/src/lib"
)
-// For flags with values, e.g. ["-n" "10"], while we're looking at the "-n"
-// this let us see if the "10" slot exists.
+// CheckArgCount is for flags with values, e.g. ["-n" "10"], while we're
+// looking at the "-n": this let us see if the "10" slot exists.
func CheckArgCount(args []string, argi int, argc int, n int) {
if (argc - argi) < n {
fmt.Fprintf(os.Stderr, "%s: option \"%s\" missing argument(s).\n", "mlr", args[argi])
@@ -17,43 +15,13 @@ func CheckArgCount(args []string, argi int, argc int, n int) {
}
}
-// ----------------------------------------------------------------
-// TODO: give symbolic name to all the RHSes
-
-var SEPARATOR_NAMES_TO_VALUES = map[string]string{
- "colon": ":",
- "comma": ",",
- "cr": "\\r",
- "crcr": "\\r\\r",
- "crlf": "\\r\\n",
- "crlfcrlf": "\\r\\n\\r\\n",
- "equals": "=",
- "lf": "\\n",
- "lflf": "\\n\\n",
- "newline": "\\n",
- "pipe": "|",
- "semicolon": ";",
- "slash": "/",
- "space": " ",
- "tab": "\\t",
-
- "ascii_null": "\\x01",
- "ascii_soh": "\\x02",
- "ascii_stx": "\\x03",
- "ascii_etx": "\\x04",
-
- "ascii_esc": "\\x1b",
- "ascii_fs": "\\x1c",
- "ascii_gs": "\\x1d",
- "ascii_rs": "\\x1e",
- "ascii_us": "\\x1f",
-}
-
+// SeparatorFromArg is for letting people do things like `--ifs pipe`
+// rather than `--ifs '|'`.
func SeparatorFromArg(name string) string {
sep, ok := SEPARATOR_NAMES_TO_VALUES[name]
- if !ok {
- // "\001" -> control-A, etc.
- return lib.UnbackslashStringLiteral(name)
+ if ok {
+ return sep
+ } else {
+ return name
}
- return sep
}
diff --git a/go/src/cli/option_parse.go b/go/src/cli/option_parse.go
index 71ede3aa1..f206d3251 100644
--- a/go/src/cli/option_parse.go
+++ b/go/src/cli/option_parse.go
@@ -15,79 +15,13 @@ import (
"mlr/src/lib"
)
-const ASV_FS = "\x1f"
-const ASV_RS = "\x1e"
-const USV_FS = "\xe2\x90\x9f"
-const USV_RS = "\xe2\x90\x9e"
-
-const ASV_FS_FOR_HELP = "\\x1f"
-const ASV_RS_FOR_HELP = "\\x1e"
-const USV_FS_FOR_HELP = "U+241F (UTF-8 \\xe2\\x90\\x9f)"
-const USV_RS_FOR_HELP = "U+241E (UTF-8 \\xe2\\x90\\x9e)"
-const DEFAULT_JSON_FLATTEN_SEPARATOR = "."
-
-// ----------------------------------------------------------------
-// TODO: move these to their own file
-
-// E.g. if IFS isn't specified, it's space for NIDX and comma for DKVP, etc.
-
-var defaultFSes = map[string]string{
- // "gen" : // TODO
- "csv": ",",
- "csvlite": ",",
- "dkvp": ",",
- "json": "N/A", // not honored; not parameterizable in JSON format
- "nidx": " ",
- "markdown": " ",
- "pprint": " ",
- "xtab": "\n", // todo: windows-dependent ...
-}
-
-var defaultPSes = map[string]string{
- "csv": "N/A",
- "csvlite": "N/A",
- "dkvp": "=",
- "json": "N/A", // not honored; not parameterizable in JSON format
- "markdown": "N/A",
- "nidx": "N/A",
- "pprint": "N/A",
- "xtab": " ", // todo: windows-dependent ...
-}
-
-var defaultRSes = map[string]string{
- "csv": "\n",
- "csvlite": "\n",
- "dkvp": "\n",
- "json": "N/A", // not honored; not parameterizable in JSON format
- "markdown": "\n",
- "nidx": "\n",
- "pprint": "\n",
- "xtab": "\n\n", // todo: maybe jettison the idea of this being alterable
-}
-
-var defaultAllowRepeatIFSes = map[string]bool{
- "csv": false,
- "csvlite": false,
- "dkvp": false,
- "json": false,
- "markdown": false,
- "nidx": false,
- "pprint": true,
- "xtab": false,
-}
-
-var defaultAllowRepeatIPSes = map[string]bool{
- "csv": false,
- "csvlite": false,
- "dkvp": false,
- "json": false,
- "markdown": false,
- "nidx": false,
- "pprint": false,
- "xtab": true,
-}
-
-func ApplyReaderOptionDefaults(readerOptions *TReaderOptions) {
+// FinalizeReaderOptions does a few things. One is if a file format was
+// specified but one or more separators were not, a defaut specific to that
+// file format is applied. The second is computing regexes for IPS and IFS, and
+// unbackslashing IRS. This is because the '\n' at the command line which is
+// Go "\\n" (a backslash and an n) needs to become the single newline
+// character, and likewise for "\t", etc.
+func FinalizeReaderOptions(readerOptions *TReaderOptions) {
if !readerOptions.IFSWasSpecified {
readerOptions.IFS = defaultFSes[readerOptions.InputFileFormat]
}
@@ -98,14 +32,37 @@ func ApplyReaderOptionDefaults(readerOptions *TReaderOptions) {
readerOptions.IRS = defaultRSes[readerOptions.InputFileFormat]
}
if !readerOptions.AllowRepeatIFSWasSpecified {
- readerOptions.AllowRepeatIFS = defaultAllowRepeatIFSes[readerOptions.InputFileFormat]
+ // Special case for Miller 6 upgrade -- now that we have regexing for mixes of tabs
+ // and spaces, that should now be the default for NIDX. But *only* for NIDX format,
+ // and if IFS wasn't specified.
+ if readerOptions.InputFileFormat == "nidx" && !readerOptions.IFSWasSpecified {
+ readerOptions.IFS = WHITESPACE
+ } else {
+ readerOptions.AllowRepeatIFS = defaultAllowRepeatIFSes[readerOptions.InputFileFormat]
+ }
}
if !readerOptions.AllowRepeatIPSWasSpecified {
readerOptions.AllowRepeatIPS = defaultAllowRepeatIPSes[readerOptions.InputFileFormat]
}
+
+ if readerOptions.AllowRepeatIFS {
+ readerOptions.IFSRegex = lib.CompileMillerRegexOrDie("(" + readerOptions.IFS + ")+")
+ } else {
+ readerOptions.IFSRegex = lib.CompileMillerRegexOrDie(readerOptions.IFS)
+ }
+ if readerOptions.AllowRepeatIPS {
+ readerOptions.IPSRegex = lib.CompileMillerRegexOrDie("(" + readerOptions.IPS + ")+")
+ } else {
+ readerOptions.IPSRegex = lib.CompileMillerRegexOrDie(readerOptions.IPS)
+ }
+
+ readerOptions.IRS = lib.UnbackslashStringLiteral(readerOptions.IRS)
}
-func ApplyWriterOptionDefaults(writerOptions *TWriterOptions) {
+// FinalizeWriterOptions unbackslashes OPS, OFS, and ORS. This is because
+// because the '\n' at the command line which is Go "\\n" (a backslash and an
+// n) needs to become the single newline character., and likewise for "\t", etc.
+func FinalizeWriterOptions(writerOptions *TWriterOptions) {
if !writerOptions.OFSWasSpecified {
writerOptions.OFS = defaultFSes[writerOptions.OutputFileFormat]
}
@@ -115,6 +72,10 @@ func ApplyWriterOptionDefaults(writerOptions *TWriterOptions) {
if !writerOptions.ORSWasSpecified {
writerOptions.ORS = defaultRSes[writerOptions.OutputFileFormat]
}
+
+ writerOptions.OFS = lib.UnbackslashStringLiteral(writerOptions.OFS)
+ writerOptions.OPS = lib.UnbackslashStringLiteral(writerOptions.OPS)
+ writerOptions.ORS = lib.UnbackslashStringLiteral(writerOptions.ORS)
}
// ================================================================
@@ -211,11 +172,11 @@ Notes about all other separators:
fmt.Println()
// Go doesn't preserve insertion order in its arrays so here we are inlining a sort.
- aliases := lib.GetArrayKeysSorted(SEPARATOR_NAMES_TO_VALUES_FOR_ONLINE_HELP)
+ aliases := lib.GetArrayKeysSorted(SEPARATOR_NAMES_TO_VALUES)
for _, alias := range aliases {
// Really absurd level of indent needed to get fixed-with font in mkdocs here,
// I don't know why. Usually it only takes 4, not 10.
- fmt.Printf(" %-10s = \"%s\"\n", alias, SEPARATOR_NAMES_TO_VALUES_FOR_ONLINE_HELP[alias])
+ fmt.Printf(" %-10s = \"%s\"\n", alias, SEPARATOR_NAMES_TO_VALUES[alias])
}
fmt.Println()
@@ -243,6 +204,16 @@ Notes about all other separators:
}
}
+func ListSeparatorAliasesForOnlineHelp() {
+ // Go doesn't preserve insertion order in its arrays so here we are inlining a sort.
+ aliases := lib.GetArrayKeysSorted(SEPARATOR_NAMES_TO_VALUES)
+ for _, alias := range aliases {
+ // Really absurd level of indent needed to get fixed-with font in mkdocs here,
+ // I don't know why. Usually it only takes 4, not 10.
+ fmt.Printf("%-10s = \"%s\"\n", alias, SEPARATOR_NAMES_TO_VALUES[alias])
+ }
+}
+
func init() { SeparatorFlagSection.Sort() }
var SeparatorFlagSection = FlagSection{
diff --git a/go/src/cli/option_types.go b/go/src/cli/option_types.go
index ab39e19d9..5611c882e 100644
--- a/go/src/cli/option_types.go
+++ b/go/src/cli/option_types.go
@@ -7,6 +7,8 @@
package cli
import (
+ "regexp"
+
"mlr/src/lib"
)
@@ -36,6 +38,8 @@ type TReaderOptions struct {
IRS string
AllowRepeatIFS bool
AllowRepeatIPS bool
+ IFSRegex *regexp.Regexp
+ IPSRegex *regexp.Regexp
// If unspecified on the command line, these take input-format-dependent
// defaults. E.g. default FS is comma for DKVP but space for NIDX;
diff --git a/go/src/cli/separators.go b/go/src/cli/separators.go
new file mode 100644
index 000000000..3d01eb218
--- /dev/null
+++ b/go/src/cli/separators.go
@@ -0,0 +1,134 @@
+package cli
+
+const COLON = ":"
+const COMMA = ","
+const CR = "\\r"
+const CRCR = "\\r\\r"
+const CRLF = "\\r\\n"
+const CRLFCRLF = "\\r\\n\\r\\n"
+const EQUALS = "="
+const LF = "\\n"
+const LFLF = "\\n\\n"
+const NEWLINE = "\\n"
+const PIPE = "|"
+const SEMICOLON = ";"
+const SLASH = "/"
+const SPACE = " "
+const SPACES = "( )+"
+const TAB = "\\t"
+const TABS = "(\\t)+"
+const WHITESPACE = "([ \\t])+"
+
+const ASCII_ESC = "\\x1b"
+const ASCII_ETX = "\\x04"
+const ASCII_FS = "\\x1c"
+const ASCII_GS = "\\x1d"
+const ASCII_NULL = "\\x01"
+const ASCII_RS = "\\x1e"
+const ASCII_SOH = "\\x02"
+const ASCII_STX = "\\x03"
+const ASCII_US = "\\x1f"
+
+const ASV_FS = "\\x1f"
+const ASV_RS = "\\x1e"
+const USV_FS = "\\xe2\\x90\\x9f"
+const USV_RS = "\\xe2\\x90\\x9e"
+
+const ASV_FS_FOR_HELP = "\\x1f"
+const ASV_RS_FOR_HELP = "\\x1e"
+const USV_FS_FOR_HELP = "U+241F (UTF-8 \\xe2\\x90\\x9f)"
+const USV_RS_FOR_HELP = "U+241E (UTF-8 \\xe2\\x90\\x9e)"
+
+const DEFAULT_JSON_FLATTEN_SEPARATOR = "."
+
+var SEPARATOR_NAMES_TO_VALUES = map[string]string{
+ "ascii_esc": ASCII_ESC,
+ "ascii_etx": ASCII_ETX,
+ "ascii_fs": ASCII_FS,
+ "ascii_gs": ASCII_GS,
+ "ascii_null": ASCII_NULL,
+ "ascii_rs": ASCII_RS,
+ "ascii_soh": ASCII_SOH,
+ "ascii_stx": ASCII_STX,
+ "ascii_us": ASCII_US,
+ "asv_fs": ASV_FS,
+ "asv_rs": ASV_RS,
+ "colon": COLON,
+ "comma": COMMA,
+ "cr": CR,
+ "crcr": CRCR,
+ "crlf": CRLF,
+ "crlfcrlf": CRLFCRLF,
+ "equals": EQUALS,
+ "lf": LF,
+ "lflf": LFLF,
+ "newline": NEWLINE,
+ "pipe": PIPE,
+ "semicolon": SEMICOLON,
+ "slash": SLASH,
+ "space": SPACE,
+ "spaces": SPACES,
+ "tab": TAB,
+ "tabs": TABS,
+ "usv_fs": USV_FS,
+ "usv_rs": USV_RS,
+ "whitespace": WHITESPACE,
+}
+
+// E.g. if IFS isn't specified, it's space for NIDX and comma for DKVP, etc.
+
+var defaultFSes = map[string]string{
+ // "gen" : // TODO
+ "csv": ",",
+ "csvlite": ",",
+ "dkvp": ",",
+ "json": "N/A", // not alterable; not parameterizable in JSON format
+ "nidx": " ",
+ "markdown": " ",
+ "pprint": " ",
+ "xtab": "\n", // todo: windows-dependent ...
+}
+
+var defaultPSes = map[string]string{
+ "csv": "N/A",
+ "csvlite": "N/A",
+ "dkvp": "=",
+ "json": "N/A", // not alterable; not parameterizable in JSON format
+ "markdown": "N/A",
+ "nidx": "N/A",
+ "pprint": "N/A",
+ "xtab": " ", // todo: windows-dependent ...
+}
+
+var defaultRSes = map[string]string{
+ "csv": "\n",
+ "csvlite": "\n",
+ "dkvp": "\n",
+ "json": "N/A", // not alterable; not parameterizable in JSON format
+ "markdown": "\n",
+ "nidx": "\n",
+ "pprint": "\n",
+ "xtab": "\n\n", // todo: maybe jettison the idea of this being alterable
+}
+
+var defaultAllowRepeatIFSes = map[string]bool{
+ "csv": false,
+ "csvlite": false,
+ "dkvp": false,
+ "json": false,
+ "markdown": false,
+ "nidx": false,
+ "pprint": true,
+ "xtab": false,
+}
+
+var defaultAllowRepeatIPSes = map[string]bool{
+ "csv": false,
+ "csvlite": false,
+ "dkvp": false,
+ "json": false,
+ "markdown": false,
+ "nidx": false,
+ "pprint": false,
+ "xtab": true,
+}
diff --git a/go/src/climain/mlrcli_mlrrc.go b/go/src/climain/mlrcli_mlrrc.go
index bb1be4ea6..01624e416 100644
--- a/go/src/climain/mlrcli_mlrrc.go
+++ b/go/src/climain/mlrcli_mlrrc.go
@@ -11,13 +11,10 @@ import (
"mlr/src/cli"
)
-// ----------------------------------------------------------------
-// * If $MLRRC is set, use it and only it.
-// * Otherwise try first $HOME/.mlrrc and then ./.mlrrc but let them
-// stack: e.g. $HOME/.mlrrc is lots of settings and maybe in one
-// subdir you want to override just a setting or two.
-
-// TODO: move to separate file?
+// loadMlrrcOrDie rule: If $MLRRC is set, use it and only it. Otherwise try
+// first $HOME/.mlrrc and then ./.mlrrc but let them stack: e.g. $HOME/.mlrrc
+// is lots of settings and maybe in one subdir you want to override just a
+// setting or two.
func loadMlrrcOrDie(
options *cli.TOptions,
) {
@@ -41,6 +38,7 @@ func loadMlrrcOrDie(
tryLoadMlrrc(options, "./.mlrrc")
}
+// tryLoadMlrrc is a helper function for loadMlrrcOrDie.
func tryLoadMlrrc(
options *cli.TOptions,
path string,
@@ -85,6 +83,7 @@ func tryLoadMlrrc(
return true
}
+// handleMlrrcLine is a helper function for loadMlrrcOrDie.
func handleMlrrcLine(
options *cli.TOptions,
line string,
diff --git a/go/src/climain/mlrcli_parse.go b/go/src/climain/mlrcli_parse.go
index 621224d98..6e08c57a7 100644
--- a/go/src/climain/mlrcli_parse.go
+++ b/go/src/climain/mlrcli_parse.go
@@ -12,7 +12,8 @@ import (
"mlr/src/version"
)
-// ----------------------------------------------------------------
+// ParseCommandLine is the entrypoint for handling the Miller command line:
+// flags, verbs and their flags, and input file name(s).
func ParseCommandLine(args []string) (
options cli.TOptions,
recordTransformers []transformers.IRecordTransformer,
@@ -60,8 +61,8 @@ func ParseCommandLine(args []string) (
}
}
- cli.ApplyReaderOptionDefaults(&options.ReaderOptions)
- cli.ApplyWriterOptionDefaults(&options.WriterOptions)
+ cli.FinalizeReaderOptions(&options.ReaderOptions)
+ cli.FinalizeWriterOptions(&options.WriterOptions)
// Set an optional global formatter for floating-point values
if options.WriterOptions.FPOFMT != "" {
@@ -120,10 +121,9 @@ func ParseCommandLine(args []string) (
return options, recordTransformers, nil
}
-// ----------------------------------------------------------------
-// Returns a list of transformers, from the starting point in args given by *pargi.
-// Bumps *pargi to point to remaining post-transformer-setup args, i.e. filenames.
-
+// parseTransformers returns a list of transformers, from the starting point in
+// args given by *pargi. Bumps *pargi to point to remaining
+// post-transformer-setup args, i.e. filenames.
func parseTransformers(
args []string,
pargi *int,
diff --git a/go/src/input/record_reader_csvlite.go b/go/src/input/record_reader_csvlite.go
index 9590d7674..c1a0d3096 100644
--- a/go/src/input/record_reader_csvlite.go
+++ b/go/src/input/record_reader_csvlite.go
@@ -33,23 +33,20 @@ import (
// ----------------------------------------------------------------
type RecordReaderCSVLite struct {
- readerOptions *cli.TReaderOptions
- emptyStringMlrval types.Mlrval
+ readerOptions *cli.TReaderOptions
}
// ----------------------------------------------------------------
func NewRecordReaderCSVLite(readerOptions *cli.TReaderOptions) *RecordReaderCSVLite {
return &RecordReaderCSVLite{
- readerOptions: readerOptions,
- emptyStringMlrval: types.MlrvalFromString(""),
+ readerOptions: readerOptions,
}
}
// ----------------------------------------------------------------
func NewRecordReaderPPRINT(readerOptions *cli.TReaderOptions) *RecordReaderCSVLite {
return &RecordReaderCSVLite{
- readerOptions: readerOptions,
- emptyStringMlrval: types.MlrvalFromString(""),
+ readerOptions: readerOptions,
}
}
@@ -170,7 +167,7 @@ func (reader *RecordReaderCSVLite) processHandleExplicitCSVHeader(
continue
}
- fields := lib.SplitString(line, reader.readerOptions.IFS)
+ fields := lib.RegexSplitString(reader.readerOptions.IFSRegex, line, -1)
if reader.readerOptions.AllowRepeatIFS {
fields = reader.stripEmpties(fields)
}
@@ -216,7 +213,7 @@ func (reader *RecordReaderCSVLite) processHandleExplicitCSVHeader(
if nh > nd {
// if header longer than data: use "" values
for i = nd; i < nh; i++ {
- record.PutCopy(headerStrings[i], &reader.emptyStringMlrval)
+ record.PutCopy(headerStrings[i], types.MLRVAL_VOID)
}
}
}
@@ -279,7 +276,7 @@ func (reader *RecordReaderCSVLite) processHandleImplicitCSVHeader(
continue
}
- fields := lib.SplitString(line, reader.readerOptions.IFS)
+ fields := lib.RegexSplitString(reader.readerOptions.IFSRegex, line, -1)
if reader.readerOptions.AllowRepeatIFS {
fields = reader.stripEmpties(fields)
}
@@ -327,7 +324,7 @@ func (reader *RecordReaderCSVLite) processHandleImplicitCSVHeader(
if nh > nd {
// if header longer than data: use "" values
for i = nd; i < nh; i++ {
- record.PutCopy(headerStrings[i], &reader.emptyStringMlrval)
+ record.PutCopy(headerStrings[i], types.MLRVAL_VOID)
}
}
}
diff --git a/go/src/input/record_reader_dkvp.go b/go/src/input/record_reader_dkvp.go
index 4404d561d..74ee4d8c5 100644
--- a/go/src/input/record_reader_dkvp.go
+++ b/go/src/input/record_reader_dkvp.go
@@ -13,7 +13,6 @@ import (
type RecordReaderDKVP struct {
readerOptions *cli.TReaderOptions
- // TODO: parameterize IRS
}
func NewRecordReaderDKVP(readerOptions *cli.TReaderOptions) *RecordReaderDKVP {
@@ -99,7 +98,7 @@ func (reader *RecordReaderDKVP) processHandle(
// xxx temp pending autodetect, and pending more windows-port work
line = strings.TrimRight(line, "\r")
- record := reader.recordFromDKVPLine(&line)
+ record := reader.recordFromDKVPLine(line)
context.UpdateForInputRecord()
inputChannel <- types.NewRecordAndContext(
record,
@@ -110,12 +109,13 @@ func (reader *RecordReaderDKVP) processHandle(
// ----------------------------------------------------------------
func (reader *RecordReaderDKVP) recordFromDKVPLine(
- line *string,
+ line string,
) *types.Mlrmap {
record := types.NewMlrmap()
- pairs := lib.SplitString(*line, reader.readerOptions.IFS)
+ pairs := lib.RegexSplitString(reader.readerOptions.IFSRegex, line, -1)
+
for i, pair := range pairs {
- kv := strings.SplitN(pair, reader.readerOptions.IPS, 2)
+ kv := lib.RegexSplitString(reader.readerOptions.IPSRegex, pair, 2)
// TODO check length 0. also, check input is empty since "".split() -> [""] not []
if len(kv) == 1 {
// E.g the pair has no equals sign: "a" rather than "a=1" or
diff --git a/go/src/input/record_reader_nidx.go b/go/src/input/record_reader_nidx.go
index 6f09018d2..f4a201764 100644
--- a/go/src/input/record_reader_nidx.go
+++ b/go/src/input/record_reader_nidx.go
@@ -12,7 +12,6 @@ import (
)
type RecordReaderNIDX struct {
- // TODO: use the parameterization for readerOptions.IFS/readerOptions.IPS
readerOptions *cli.TReaderOptions
}
@@ -100,7 +99,7 @@ func (reader *RecordReaderNIDX) processHandle(
line = strings.TrimRight(line, "\n")
line = strings.TrimRight(line, "\r")
- record := recordFromNIDXLine(line, reader.readerOptions.IFS)
+ record := reader.recordFromNIDXLine(line)
context.UpdateForInputRecord()
inputChannel <- types.NewRecordAndContext(
@@ -111,12 +110,11 @@ func (reader *RecordReaderNIDX) processHandle(
}
// ----------------------------------------------------------------
-func recordFromNIDXLine(
+func (reader *RecordReaderNIDX) recordFromNIDXLine(
line string,
- ifs string,
) *types.Mlrmap {
record := types.NewMlrmap()
- values := lib.SplitString(line, ifs) // TODO: repifs ...
+ values := lib.RegexSplitString(reader.readerOptions.IFSRegex, line, -1)
var i int = 0
for _, value := range values {
i++
diff --git a/go/src/input/record_reader_xtab.go b/go/src/input/record_reader_xtab.go
index b8268893d..10b4c0834 100644
--- a/go/src/input/record_reader_xtab.go
+++ b/go/src/input/record_reader_xtab.go
@@ -5,7 +5,6 @@ import (
"container/list"
"errors"
"io"
- "regexp"
"strings"
"mlr/src/cli"
@@ -15,21 +14,13 @@ import (
type RecordReaderXTAB struct {
readerOptions *cli.TReaderOptions
- ifsRegex *regexp.Regexp
// TODO: parameterize IRS
-
- // TODO: port from C
- // int allow_repeat_ips;
- // int do_auto_line_term;
- // int at_eof;
}
// ----------------------------------------------------------------
func NewRecordReaderXTAB(readerOptions *cli.TReaderOptions) *RecordReaderXTAB {
return &RecordReaderXTAB{
readerOptions: readerOptions,
- // TODO: incorporate IFS
- ifsRegex: regexp.MustCompile("\\s+"),
}
}
@@ -154,8 +145,7 @@ func (reader *RecordReaderXTAB) recordFromXTABLines(
for entry := lines.Front(); entry != nil; entry = entry.Next() {
line := entry.Value.(string)
- // TODO -- incorporate IFS
- kv := reader.ifsRegex.Split(line, 2)
+ kv := lib.RegexSplitString(reader.readerOptions.IPSRegex, line, 2)
if len(kv) < 1 {
return nil, errors.New("mlr: internal coding error in XTAB reader")
}
diff --git a/go/src/lib/regex.go b/go/src/lib/regex.go
index acf1c3076..916213df8 100644
--- a/go/src/lib/regex.go
+++ b/go/src/lib/regex.go
@@ -94,6 +94,17 @@ func CompileMillerRegexOrDie(regexString string) *regexp.Regexp {
return regex
}
+// In Go as in all languages I'm aware of with a string-split, "a,b,c" splits
+// on "," to ["a", "b", "c" and "a" splits to ["a"], both of which are fine --
+// but "" splits to [""] when I wish it were []. This function does the latter.
+func RegexSplitString(regex *regexp.Regexp, input string, n int) []string {
+ if input == "" {
+ return make([]string, 0)
+ } else {
+ return regex.Split(input, n)
+ }
+}
+
// MakeEmptyRegexCaptures is for initial CST state at the start of executing
// the DSL expression for the current record. Even if '$x =~ "(..)_(...)" set
// "\1" and "\2" on the previous record, at start of processing for the current
diff --git a/go/src/output/record_writer_markdown.go b/go/src/output/record_writer_markdown.go
index d17a63224..795687883 100644
--- a/go/src/output/record_writer_markdown.go
+++ b/go/src/output/record_writer_markdown.go
@@ -12,6 +12,7 @@ import (
type RecordWriterMarkdown struct {
writerOptions *cli.TWriterOptions
+ ors string
numHeaderLinesOutput int
lastJoinedHeader string
diff --git a/go/src/output/record_writer_nidx.go b/go/src/output/record_writer_nidx.go
index 8d3edcc57..56c65924c 100644
--- a/go/src/output/record_writer_nidx.go
+++ b/go/src/output/record_writer_nidx.go
@@ -10,6 +10,8 @@ import (
type RecordWriterNIDX struct {
writerOptions *cli.TWriterOptions
+ ofs string
+ ors string
}
func NewRecordWriterNIDX(writerOptions *cli.TWriterOptions) *RecordWriterNIDX {
diff --git a/go/src/output/record_writer_pprint.go b/go/src/output/record_writer_pprint.go
index 421297332..dc35cda5d 100644
--- a/go/src/output/record_writer_pprint.go
+++ b/go/src/output/record_writer_pprint.go
@@ -67,7 +67,7 @@ func (writer *RecordWriterPPRINT) Write(
)
if nonEmpty {
// Print a newline
- ostream.Write([]byte("\n"))
+ ostream.Write([]byte(writer.writerOptions.ORS))
}
// Start a new batch
writer.batch = list.New()
@@ -166,13 +166,13 @@ func (writer *RecordWriterPPRINT) writeHeterogenousListNonBarred(
buffer.WriteString(colorizer.MaybeColorizeKey(formatted, outputIsStdout))
} else {
buffer.WriteString(colorizer.MaybeColorizeKey(pe.Key, outputIsStdout))
- buffer.WriteString("\n") // TODO: ORS
+ buffer.WriteString(writer.writerOptions.ORS)
}
} else {
formatted := fmt.Sprintf("%*s ", maxWidths[pe.Key], pe.Key)
buffer.WriteString(colorizer.MaybeColorizeKey(formatted, outputIsStdout))
if pe.Next == nil {
- buffer.WriteString("\n") // TODO: ORS
+ buffer.WriteString(writer.writerOptions.ORS)
}
}
@@ -194,13 +194,13 @@ func (writer *RecordWriterPPRINT) writeHeterogenousListNonBarred(
buffer.WriteString(colorizer.MaybeColorizeValue(formatted, outputIsStdout))
} else {
buffer.WriteString(colorizer.MaybeColorizeValue(s, outputIsStdout))
- buffer.WriteString("\n") // TODO: ORS
+ buffer.WriteString(writer.writerOptions.ORS)
}
} else {
formatted := fmt.Sprintf("%*s ", maxWidths[pe.Key], s)
buffer.WriteString(colorizer.MaybeColorizeValue(formatted, outputIsStdout))
if pe.Next == nil {
- buffer.WriteString("\n") // TODO: ORS
+ buffer.WriteString(writer.writerOptions.ORS)
}
}
}
@@ -257,7 +257,7 @@ func (writer *RecordWriterPPRINT) writeHeterogenousListBarred(
buffer.WriteString(horizontalMiddle)
} else {
buffer.WriteString(horizontalEnd)
- buffer.WriteString("\n") // TOOD: ORS
+ buffer.WriteString(writer.writerOptions.ORS)
}
}
@@ -274,7 +274,7 @@ func (writer *RecordWriterPPRINT) writeHeterogenousListBarred(
buffer.WriteString(verticalMiddle)
} else {
buffer.WriteString(verticalEnd)
- buffer.WriteString("\n") // TOOD: ORS
+ buffer.WriteString(writer.writerOptions.ORS)
}
}
@@ -285,7 +285,7 @@ func (writer *RecordWriterPPRINT) writeHeterogenousListBarred(
buffer.WriteString(horizontalMiddle)
} else {
buffer.WriteString(horizontalEnd)
- buffer.WriteString("\n") // TOOD: ORS
+ buffer.WriteString(writer.writerOptions.ORS)
}
}
@@ -309,7 +309,7 @@ func (writer *RecordWriterPPRINT) writeHeterogenousListBarred(
buffer.WriteString(fmt.Sprint(verticalMiddle))
} else {
buffer.WriteString(verticalEnd)
- buffer.WriteString("\n") // TOOD: ORS
+ buffer.WriteString(writer.writerOptions.ORS)
}
}
@@ -321,7 +321,7 @@ func (writer *RecordWriterPPRINT) writeHeterogenousListBarred(
buffer.WriteString(horizontalMiddle)
} else {
buffer.WriteString(horizontalEnd)
- buffer.WriteString("\n") // TOOD: ORS
+ buffer.WriteString(writer.writerOptions.ORS)
}
}
}
diff --git a/go/src/output/record_writer_xtab.go b/go/src/output/record_writer_xtab.go
index b67b91934..7deb57257 100644
--- a/go/src/output/record_writer_xtab.go
+++ b/go/src/output/record_writer_xtab.go
@@ -46,7 +46,7 @@ func (writer *RecordWriterXTAB) Write(
if writer.onFirst {
writer.onFirst = false
} else {
- buffer.WriteString("\n")
+ buffer.WriteString("\n") // TODO: ORS
}
for pe := outrec.Head; pe != nil; pe = pe.Next {
@@ -56,10 +56,10 @@ func (writer *RecordWriterXTAB) Write(
buffer.WriteString(colorizer.MaybeColorizeKey(pe.Key, outputIsStdout))
buffer.WriteString(" ")
for i := 0; i < padLength; i++ {
- buffer.WriteString(" ")
+ buffer.WriteString(writer.writerOptions.OPS)
}
buffer.WriteString(colorizer.MaybeColorizeValue(pe.Value.String(), outputIsStdout))
- buffer.WriteString("\n")
+ buffer.WriteString("\n") // TODO: ORS
}
ostream.Write(buffer.Bytes())
}
diff --git a/go/src/transformers/join.go b/go/src/transformers/join.go
index 2f25038fa..4f3a6fd40 100644
--- a/go/src/transformers/join.go
+++ b/go/src/transformers/join.go
@@ -219,7 +219,7 @@ func transformerJoinParseCLI(
}
}
- cli.ApplyReaderOptionDefaults(&opts.joinFlagOptions.ReaderOptions)
+ cli.FinalizeReaderOptions(&opts.joinFlagOptions.ReaderOptions)
if opts.leftFileName == "" {
fmt.Fprintf(os.Stderr, "%s %s: need left file name\n", "mlr", verb)
diff --git a/go/src/transformers/put-or-filter.go b/go/src/transformers/put-or-filter.go
index 1494a10f3..014164abc 100644
--- a/go/src/transformers/put-or-filter.go
+++ b/go/src/transformers/put-or-filter.go
@@ -249,7 +249,7 @@ func transformerPutOrFilterParseCLI(
}
}
- cli.ApplyWriterOptionDefaults(&options.WriterOptions)
+ cli.FinalizeWriterOptions(&options.WriterOptions)
// If they've used either of 'mlr put -f {filename}' or 'mlr put -e
// {expression}' then that specifies their DSL expression. But if they've
diff --git a/go/src/transformers/tee.go b/go/src/transformers/tee.go
index 41359c67f..08283e4e2 100644
--- a/go/src/transformers/tee.go
+++ b/go/src/transformers/tee.go
@@ -98,7 +98,7 @@ func transformerTeeParseCLI(
}
}
- cli.ApplyWriterOptionDefaults(&localOptions.WriterOptions)
+ cli.FinalizeWriterOptions(&localOptions.WriterOptions)
// Get the filename/command from the command line, after the flags
if argi >= argc {
diff --git a/man6/manpage.txt b/man6/manpage.txt
index 0f36b976b..e448fe191 100644
--- a/man6/manpage.txt
+++ b/man6/manpage.txt
@@ -110,6 +110,7 @@ HELP OPTIONS
mlr help file-formats
Flags:
mlr help flags
+ mlr help list-separator-aliases
mlr help comments-in-data-flags
mlr help compressed-data-flags
mlr help csv-only-flags
@@ -605,6 +606,17 @@ SEPARATOR FLAGS
- C-style escape sequences, e.g. `--rs '\r\n' --fs '\t'`.
- To avoid backslashing, you can use any of the following names:
+ ascii_esc = "\x1b"
+ ascii_etx = "\x04"
+ ascii_fs = "\x1c"
+ ascii_gs = "\x1d"
+ ascii_null = "\x01"
+ ascii_rs = "\x1e"
+ ascii_soh = "\x02"
+ ascii_stx = "\x03"
+ ascii_us = "\x1f"
+ asv_fs = "\x1f"
+ asv_rs = "\x1e"
colon = ":"
comma = ","
cr = "\r"
@@ -619,7 +631,12 @@ SEPARATOR FLAGS
semicolon = ";"
slash = "/"
space = " "
+ spaces = "( )+"
tab = "\t"
+ tabs = "(\t)+"
+ usv_fs = "\xe2\x90\x9f"
+ usv_rs = "\xe2\x90\x9e"
+ whitespace = "([ \t])+"
* Default separators by format:
@@ -2720,4 +2737,4 @@ SEE ALSO
- 2021-09-20 MILLER(1)
+ 2021-09-21 MILLER(1)
diff --git a/man6/mlr6.1 b/man6/mlr6.1
index bea053408..298d069e8 100644
--- a/man6/mlr6.1
+++ b/man6/mlr6.1
@@ -2,12 +2,12 @@
.\" Title: mlr
.\" Author: [see the "AUTHOR" section]
.\" Generator: ./mkman.rb
-.\" Date: 2021-09-20
+.\" Date: 2021-09-21
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
-.TH "MILLER" "1" "2021-09-20" "\ \&" "\ \&"
+.TH "MILLER" "1" "2021-09-21" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Portability definitions
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -145,6 +145,7 @@ Essentials:
mlr help file-formats
Flags:
mlr help flags
+ mlr help list-separator-aliases
mlr help comments-in-data-flags
mlr help compressed-data-flags
mlr help csv-only-flags
@@ -748,6 +749,17 @@ Notes about all other separators:
- C-style escape sequences, e.g. `--rs '\er\en' --fs '\et'`.
- To avoid backslashing, you can use any of the following names:
+ ascii_esc = "\ex1b"
+ ascii_etx = "\ex04"
+ ascii_fs = "\ex1c"
+ ascii_gs = "\ex1d"
+ ascii_null = "\ex01"
+ ascii_rs = "\ex1e"
+ ascii_soh = "\ex02"
+ ascii_stx = "\ex03"
+ ascii_us = "\ex1f"
+ asv_fs = "\ex1f"
+ asv_rs = "\ex1e"
colon = ":"
comma = ","
cr = "\er"
@@ -762,7 +774,12 @@ Notes about all other separators:
semicolon = ";"
slash = "/"
space = " "
+ spaces = "( )+"
tab = "\et"
+ tabs = "(\et)+"
+ usv_fs = "\exe2\ex90\ex9f"
+ usv_rs = "\exe2\ex90\ex9e"
+ whitespace = "([ \et])+"
* Default separators by format: