From 67f2cc4737adebe9fda320088d36d3fe07bfdfd0 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 24 Dec 2021 23:46:03 -0500 Subject: [PATCH] doc updates --- docs/src/manpage.md | 17 ++--- docs/src/manpage.txt | 17 ++--- docs/src/online-help.md | 1 + docs/src/reference-main-flag-list.md | 15 +++-- docs/src/reference-main-separators.md | 70 ++++++++++++++------- docs/src/reference-main-separators.md.in | 54 ++++++++++------ docs/src/reference-verbs.md | 1 - internal/pkg/auxents/help/entry.go | 5 ++ internal/pkg/cli/option_parse.go | 18 ++++++ internal/pkg/cli/option_types.go | 16 ++--- internal/pkg/input/record_reader.go | 2 +- internal/pkg/input/record_reader_csvlite.go | 6 +- internal/pkg/input/record_reader_xtab.go | 2 +- internal/pkg/transformers/join.go | 1 - man/manpage.txt | 17 ++--- man/mlr.1 | 19 +++--- test/cases/cli-help/0001/expout | 1 - todo.txt | 13 +--- 18 files changed, 169 insertions(+), 106 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 27cf9bb97..e6ed7dd25 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -132,6 +132,7 @@ HELP OPTIONS Flags: mlr help flags mlr help list-separator-aliases + mlr help list-separator-regex-aliases mlr help comments-in-data-flags mlr help compressed-data-flags mlr help csv-only-flags @@ -709,11 +710,14 @@ SEPARATOR FLAGS semicolon = ";" slash = "/" space = " " - spaces = "( )+" tab = "\t" - tabs = "(\t)+" usv_fs = "\xe2\x90\x9f" usv_rs = "\xe2\x90\x9e" + + - Similarly, you can use the following for `--ifs-regex` and `--ips-regex`: + + spaces = "( )+" + tabs = "(\t)+" whitespace = "([ \t])+" * Default separators by format: @@ -730,12 +734,10 @@ SEPARATOR FLAGS --fs {string} Specify FS for input and output. --ifs {string} Specify FS for input. + --ifs-regex {string} Specify FS for input as a regular expression. --ips {string} Specify PS for input. + --ips-regex {string} Specify PS for input as a regular expression. --irs {string} Specify RS for input. - --no-ifs-regex Don't treat IFS value as a regular expression. Useful - if your IFS is ".". - --no-ips-regex Don't treat IPS value as a regular expression. Useful - if your IPS is ".". --ofs {string} Specify FS for output. --ops {string} Specify PS for output. --ors {string} Specify RS for output. @@ -1250,7 +1252,6 @@ VERBS --ifs {field-separator character} --ips {pair-separator character} --repifs - --repips --implicit-csv-header --no-implicit-csv-header For example, if you have 'mlr --csv ... join -l foo ... ' then the left-file format will @@ -3017,5 +3018,5 @@ SEE ALSO - 2021-12-23 MILLER(1) + 2021-12-25 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index bc31f7d73..77f07daf2 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -111,6 +111,7 @@ HELP OPTIONS Flags: mlr help flags mlr help list-separator-aliases + mlr help list-separator-regex-aliases mlr help comments-in-data-flags mlr help compressed-data-flags mlr help csv-only-flags @@ -688,11 +689,14 @@ SEPARATOR FLAGS semicolon = ";" slash = "/" space = " " - spaces = "( )+" tab = "\t" - tabs = "(\t)+" usv_fs = "\xe2\x90\x9f" usv_rs = "\xe2\x90\x9e" + + - Similarly, you can use the following for `--ifs-regex` and `--ips-regex`: + + spaces = "( )+" + tabs = "(\t)+" whitespace = "([ \t])+" * Default separators by format: @@ -709,12 +713,10 @@ SEPARATOR FLAGS --fs {string} Specify FS for input and output. --ifs {string} Specify FS for input. + --ifs-regex {string} Specify FS for input as a regular expression. --ips {string} Specify PS for input. + --ips-regex {string} Specify PS for input as a regular expression. --irs {string} Specify RS for input. - --no-ifs-regex Don't treat IFS value as a regular expression. Useful - if your IFS is ".". - --no-ips-regex Don't treat IPS value as a regular expression. Useful - if your IPS is ".". --ofs {string} Specify FS for output. --ops {string} Specify PS for output. --ors {string} Specify RS for output. @@ -1229,7 +1231,6 @@ VERBS --ifs {field-separator character} --ips {pair-separator character} --repifs - --repips --implicit-csv-header --no-implicit-csv-header For example, if you have 'mlr --csv ... join -l foo ... ' then the left-file format will @@ -2996,4 +2997,4 @@ SEE ALSO - 2021-12-23 MILLER(1) + 2021-12-25 MILLER(1) diff --git a/docs/src/online-help.md b/docs/src/online-help.md index 4bea51ab4..d74921e90 100644 --- a/docs/src/online-help.md +++ b/docs/src/online-help.md @@ -50,6 +50,7 @@ Essentials: Flags: mlr help flags mlr help list-separator-aliases + mlr help list-separator-regex-aliases mlr help comments-in-data-flags mlr help compressed-data-flags mlr help csv-only-flags diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index b677c2363..124b1f027 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -541,11 +541,14 @@ Notes about all other separators: semicolon = ";" slash = "/" space = " " - spaces = "( )+" tab = "\t" - tabs = "(\t)+" usv_fs = "\xe2\x90\x9f" usv_rs = "\xe2\x90\x9e" + + - Similarly, you can use the following for `--ifs-regex` and `--ips-regex`: + + spaces = "( )+" + tabs = "(\t)+" whitespace = "([ \t])+" * Default separators by format: @@ -567,14 +570,14 @@ Notes about all other separators: `: Specify FS for input and output. * `--ifs {string} `: Specify FS for input. +* `--ifs-regex {string} +`: Specify FS for input as a regular expression. * `--ips {string} `: Specify PS for input. +* `--ips-regex {string} +`: Specify PS for input as a regular expression. * `--irs {string} `: Specify RS for input. -* `--no-ifs-regex -`: Don't treat IFS value as a regular expression. Useful if your IFS is ".". -* `--no-ips-regex -`: Don't treat IPS value as a regular expression. Useful if your IPS is ".". * `--ofs {string} `: Specify FS for output. * `--ops {string} diff --git a/docs/src/reference-main-separators.md b/docs/src/reference-main-separators.md index e096dfbd5..8b939dbca 100644 --- a/docs/src/reference-main-separators.md +++ b/docs/src/reference-main-separators.md @@ -76,6 +76,24 @@ c:3;a:1;b:2 c:6;a:4;b:5 +
+mlr --csv head -n 2 example.csv
+
+
+color,shape,flag,k,index,quantity,rate
+yellow,triangle,true,1,11,43.6498,9.8870
+red,square,true,2,15,79.2778,0.0130
+
+ +
+mlr --csv --ofs pipe head -n 2 example.csv
+
+
+color|shape|flag|k|index|quantity|rate
+yellow|triangle|true|1|11|43.6498|9.8870
+red|square|true|2|15|79.2778|0.0130
+
+ If your data has non-default separators and you don't want to change those between input and output, you can use `--rs`, `--fs`, and `--ps`. Setting `--fs :` is the same as setting `--ifs : --ofs :`, but with fewer keystrokes. @@ -96,9 +114,11 @@ c:3;a:1;b:2 c:6;a:4;b:5 -## Multi-character and regular-expression separators +## Multi-character separators -The separators default to single characters, but can be multiple characters if you like: +All separators can be multi-character, except for file formats which don't +allow parameterization (see below). And for CSV (CSV-lite doesn't have these +restrictions), IRS must be `\n` and IFS must be a single character.
 mlr --ifs ';' --ips : --ofs ';;;' --ops := cut -o -f c,a,b data/modsep.dkvp
@@ -108,23 +128,13 @@ c:=3;;;a:=1;;;b:=2
 c:=6;;;a:=4;;;b:=5
 
-As of September 2021: - -* `IFS` and `IPS` can be regular expressions. -* `IRS` can be multi-character (except for file formats which don't allow parameterization -- see below) -* `OFS`, `OPS`, and `ORS` can be multi-character. - -Since `IFS` and `IPS` can be regular expressions, if your data has field -separators which are one or more consecutive spaces, you can use `--ifs '( -)+'`. But that gets a little tedious, so Miller has the `--repifs` and -`--repips` flags you can use if you like. This wraps the `IFS` or `IPS`, say -`X`, as `(X)+`. - -The `--repifs` flag means that multiple successive occurrences of the field +If your data has field separators which are one or more consecutive spaces, you +can use `--ifs space --repifs`. +More generally, the `--repifs` flag means that multiple successive occurrences of the field separator count as one. For example, in CSV data we often signify nulls by empty strings, e.g. `2,9,,,,,6,5,4`. On the other hand, if the field separator is a space, it might be more natural to parse `2 4 5` the same as `2 4 5`: -`--repifs --ifs ' '` lets this happen. In fact, the `--ipprint` option above +`--repifs --ifs ' '` lets this happen. In fact, the `--ipprint` option is internally implemented in terms of `--repifs`. For example: @@ -158,6 +168,15 @@ early light what so 4 so +## Regular-expression separators + +`IFS` and `IPS` can be regular expressions: use `--ifs-regex` or `--ips-regex` in place of +`--ifs` or `--ips`, respectively. + +You can also use either `--ifs space --repifs` or `--ifs-regex '()+'`. (But that gets a little tedious, +so there are aliases listed below.) Note however that `--ifs space --repifs` is about 3x faster than +`--ifs-regex '( )+'` -- regular expressions are powerful, but slower. + ## Aliases Many things we'd like to write as separators need to be escaped from the shell @@ -192,16 +211,25 @@ pipe = "|" semicolon = ";" slash = "/" space = " " -spaces = "( )+" tab = "\t" -tabs = "(\t)+" usv_fs = "\xe2\x90\x9f" usv_rs = "\xe2\x90\x9e" + + +And for `--ifs-regex` and `--ips-regex`: + +
+mlr help list-separator-regex-aliases
+
+
+spaces     = "( )+"
+tabs       = "(\t)+"
 whitespace = "([ \t])+"
 
Note that `spaces`, `tabs`, and `whitespace` already are regexes so you -shouldn't use `--repifs` with them. +shouldn't use `--repifs` with them. (In fact, the `--repifs` flag is ignored +when `--ifs-regex` is provided.) ## Command-line flags @@ -209,8 +237,8 @@ Given the above, we now have seen the following flags:
 --rs --irs --ors
---fs --ifs --ofs --repifs
---ps --ips --ops
+--fs --ifs --ofs --repifs --ifs-regex
+--ps --ips --ops --ips-regex
 
See also the [separator-flags section](reference-main-flag-list.md#separator-flags). diff --git a/docs/src/reference-main-separators.md.in b/docs/src/reference-main-separators.md.in index ce6ec50ff..921b3098c 100644 --- a/docs/src/reference-main-separators.md.in +++ b/docs/src/reference-main-separators.md.in @@ -48,6 +48,14 @@ GENMD-RUN-COMMAND mlr --ifs , --ofs ';' --ips = --ops : cut -o -f c,a,b data/a.dkvp GENMD-EOF +GENMD-RUN-COMMAND +mlr --csv head -n 2 example.csv +GENMD-EOF + +GENMD-RUN-COMMAND +mlr --csv --ofs pipe head -n 2 example.csv +GENMD-EOF + If your data has non-default separators and you don't want to change those between input and output, you can use `--rs`, `--fs`, and `--ps`. Setting `--fs :` is the same as setting `--ifs : --ofs :`, but with fewer keystrokes. @@ -60,31 +68,23 @@ GENMD-RUN-COMMAND mlr --fs ';' --ps : cut -o -f c,a,b data/modsep.dkvp GENMD-EOF -## Multi-character and regular-expression separators +## Multi-character separators -The separators default to single characters, but can be multiple characters if you like: +All separators can be multi-character, except for file formats which don't +allow parameterization (see below). And for CSV (CSV-lite doesn't have these +restrictions), IRS must be `\n` and IFS must be a single character. GENMD-RUN-COMMAND mlr --ifs ';' --ips : --ofs ';;;' --ops := cut -o -f c,a,b data/modsep.dkvp GENMD-EOF -As of September 2021: - -* `IFS` and `IPS` can be regular expressions. -* `IRS` can be multi-character (except for file formats which don't allow parameterization -- see below) -* `OFS`, `OPS`, and `ORS` can be multi-character. - -Since `IFS` and `IPS` can be regular expressions, if your data has field -separators which are one or more consecutive spaces, you can use `--ifs '( -)+'`. But that gets a little tedious, so Miller has the `--repifs` and -`--repips` flags you can use if you like. This wraps the `IFS` or `IPS`, say -`X`, as `(X)+`. - -The `--repifs` flag means that multiple successive occurrences of the field +If your data has field separators which are one or more consecutive spaces, you +can use `--ifs space --repifs`. +More generally, the `--repifs` flag means that multiple successive occurrences of the field separator count as one. For example, in CSV data we often signify nulls by empty strings, e.g. `2,9,,,,,6,5,4`. On the other hand, if the field separator is a space, it might be more natural to parse `2 4 5` the same as `2 4 5`: -`--repifs --ifs ' '` lets this happen. In fact, the `--ipprint` option above +`--repifs --ifs ' '` lets this happen. In fact, the `--ipprint` option is internally implemented in terms of `--repifs`. For example: @@ -97,6 +97,15 @@ GENMD-RUN-COMMAND mlr --ifs ' ' --repifs --inidx --oxtab cat data/extra-spaces.txt GENMD-EOF +## Regular-expression separators + +`IFS` and `IPS` can be regular expressions: use `--ifs-regex` or `--ips-regex` in place of +`--ifs` or `--ips`, respectively. + +You can also use either `--ifs space --repifs` or `--ifs-regex '()+'`. (But that gets a little tedious, +so there are aliases listed below.) Note however that `--ifs space --repifs` is about 3x faster than +`--ifs-regex '( )+'` -- regular expressions are powerful, but slower. + ## Aliases Many things we'd like to write as separators need to be escaped from the shell @@ -106,8 +115,15 @@ GENMD-RUN-COMMAND mlr help list-separator-aliases GENMD-EOF +And for `--ifs-regex` and `--ips-regex`: + +GENMD-RUN-COMMAND +mlr help list-separator-regex-aliases +GENMD-EOF + Note that `spaces`, `tabs`, and `whitespace` already are regexes so you -shouldn't use `--repifs` with them. +shouldn't use `--repifs` with them. (In fact, the `--repifs` flag is ignored +when `--ifs-regex` is provided.) ## Command-line flags @@ -115,8 +131,8 @@ Given the above, we now have seen the following flags: GENMD-CARDIFY --rs --irs --ors ---fs --ifs --ofs --repifs ---ps --ips --ops +--fs --ifs --ofs --repifs --ifs-regex +--ps --ips --ops --ips-regex GENMD-EOF See also the [separator-flags section](reference-main-flag-list.md#separator-flags). diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 1dcb00762..eecdc27e8 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -1589,7 +1589,6 @@ the main "mlr --help" for more information on syntax for these arguments: --ifs {field-separator character} --ips {pair-separator character} --repifs - --repips --implicit-csv-header --no-implicit-csv-header For example, if you have 'mlr --csv ... join -l foo ... ' then the left-file format will diff --git a/internal/pkg/auxents/help/entry.go b/internal/pkg/auxents/help/entry.go index c223279e8..24bdd3f5d 100644 --- a/internal/pkg/auxents/help/entry.go +++ b/internal/pkg/auxents/help/entry.go @@ -73,6 +73,7 @@ func init() { handlerInfos: []tHandlerInfo{ {name: "flags", zaryHandlerFunc: showFlagHelp}, {name: "list-separator-aliases", zaryHandlerFunc: listSeparatorAliases}, + {name: "list-separator-regex-aliases", zaryHandlerFunc: listSeparatorRegexAliases}, // Per-section entries will be computed and installed below }, }, @@ -315,6 +316,10 @@ func listSeparatorAliases() { cli.ListSeparatorAliasesForOnlineHelp() } +func listSeparatorRegexAliases() { + cli.ListSeparatorRegexAliasesForOnlineHelp() +} + // ---------------------------------------------------------------- func helpAuxents() { fmt.Print(`Miller has a few otherwise-standalone executables packaged within it. diff --git a/internal/pkg/cli/option_parse.go b/internal/pkg/cli/option_parse.go index 4ce6a9e56..b8e637981 100644 --- a/internal/pkg/cli/option_parse.go +++ b/internal/pkg/cli/option_parse.go @@ -171,6 +171,14 @@ Notes about all other separators: } fmt.Println() + fmt.Println(" - Similarly, you can use the following for `--ifs-regex` and `--ips-regex`:") + fmt.Println() + aliases = lib.GetArrayKeysSorted(SEPARATOR_REGEX_NAMES_TO_VALUES) + for _, alias := range aliases { + fmt.Printf(" %-10s = \"%s\"\n", alias, SEPARATOR_REGEX_NAMES_TO_VALUES[alias]) + } + fmt.Println() + fmt.Println("* Default separators by format:") fmt.Println() @@ -205,6 +213,16 @@ func ListSeparatorAliasesForOnlineHelp() { } } +func ListSeparatorRegexAliasesForOnlineHelp() { + // Go doesn't preserve insertion order in its arrays so here we are inlining a sort. + aliases := lib.GetArrayKeysSorted(SEPARATOR_REGEX_NAMES_TO_VALUES) + for _, alias := range aliases { + // Really absurd level of indent needed to get fixed-with font in mkdocs here, + // I don't know why. Usually it only takes 4, not 10. + fmt.Printf("%-10s = \"%s\"\n", alias, SEPARATOR_REGEX_NAMES_TO_VALUES[alias]) + } +} + func init() { SeparatorFlagSection.Sort() } var SeparatorFlagSection = FlagSection{ diff --git a/internal/pkg/cli/option_types.go b/internal/pkg/cli/option_types.go index 17de2dcbb..68c08fc8a 100644 --- a/internal/pkg/cli/option_types.go +++ b/internal/pkg/cli/option_types.go @@ -36,14 +36,14 @@ type TGeneratorOptions struct { } type TReaderOptions struct { - InputFileFormat string - IFS string - IPS string - IRS string - AllowRepeatIFS bool - IFSRegex *regexp.Regexp - IPSRegex *regexp.Regexp - DedupeFieldNames bool + InputFileFormat string + IFS string + IPS string + IRS string + AllowRepeatIFS bool + IFSRegex *regexp.Regexp + IPSRegex *regexp.Regexp + DedupeFieldNames bool // If unspecified on the command line, these take input-format-dependent // defaults. E.g. default FS is comma for DKVP but space for NIDX; diff --git a/internal/pkg/input/record_reader.go b/internal/pkg/input/record_reader.go index f4a823fde..2228614e0 100644 --- a/internal/pkg/input/record_reader.go +++ b/internal/pkg/input/record_reader.go @@ -176,7 +176,7 @@ func newFieldSplitter(options *cli.TReaderOptions) iFieldSplitter { } type tIFSSplitter struct { - ifs string + ifs string allowRepeatIFS bool } diff --git a/internal/pkg/input/record_reader_csvlite.go b/internal/pkg/input/record_reader_csvlite.go index 414d945c0..aaca53239 100644 --- a/internal/pkg/input/record_reader_csvlite.go +++ b/internal/pkg/input/record_reader_csvlite.go @@ -49,7 +49,7 @@ type RecordReaderCSVLite struct { readerOptions *cli.TReaderOptions recordsPerBatch int // distinct from readerOptions.RecordsPerBatch for join/repl - fieldSplitter iFieldSplitter + fieldSplitter iFieldSplitter recordBatchGetter recordBatchGetterCSV inputLineNumber int @@ -63,7 +63,7 @@ func NewRecordReaderCSVLite( reader := &RecordReaderCSVLite{ readerOptions: readerOptions, recordsPerBatch: recordsPerBatch, - fieldSplitter: newFieldSplitter(readerOptions), + fieldSplitter: newFieldSplitter(readerOptions), } if reader.readerOptions.UseImplicitCSVHeader { reader.recordBatchGetter = getRecordBatchImplicitCSVHeader @@ -80,7 +80,7 @@ func NewRecordReaderPPRINT( reader := &RecordReaderCSVLite{ readerOptions: readerOptions, recordsPerBatch: recordsPerBatch, - fieldSplitter: newFieldSplitter(readerOptions), + fieldSplitter: newFieldSplitter(readerOptions), } if reader.readerOptions.UseImplicitCSVHeader { reader.recordBatchGetter = getRecordBatchImplicitCSVHeader diff --git a/internal/pkg/input/record_reader_xtab.go b/internal/pkg/input/record_reader_xtab.go index 922022e00..c2ddc7e68 100644 --- a/internal/pkg/input/record_reader_xtab.go +++ b/internal/pkg/input/record_reader_xtab.go @@ -314,7 +314,7 @@ func (s *tXTABIPSSplitter) Split(input string) (key, value string, err error) { return "", "", errors.New("mlr: internal coding error in XTAB reader") } - // ' a 1' splits as key '', value 'a 1' for compatibility with Miller 5. + // ' abc 123' splits as key '', value 'abc 123'. if strings.HasPrefix(input, s.ips) { keyStart := 0 for keyStart < n && strings.HasPrefix(input[keyStart:], s.ips) { diff --git a/internal/pkg/transformers/join.go b/internal/pkg/transformers/join.go index 3b6147fa5..d407054a5 100644 --- a/internal/pkg/transformers/join.go +++ b/internal/pkg/transformers/join.go @@ -115,7 +115,6 @@ func transformerJoinUsage( fmt.Fprintf(o, " --ifs {field-separator character}\n") fmt.Fprintf(o, " --ips {pair-separator character}\n") fmt.Fprintf(o, " --repifs\n") - fmt.Fprintf(o, " --repips\n") fmt.Fprintf(o, " --implicit-csv-header\n") fmt.Fprintf(o, " --no-implicit-csv-header\n") fmt.Fprintf(o, "For example, if you have 'mlr --csv ... join -l foo ... ' then the left-file format will\n") diff --git a/man/manpage.txt b/man/manpage.txt index bc31f7d73..77f07daf2 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -111,6 +111,7 @@ HELP OPTIONS Flags: mlr help flags mlr help list-separator-aliases + mlr help list-separator-regex-aliases mlr help comments-in-data-flags mlr help compressed-data-flags mlr help csv-only-flags @@ -688,11 +689,14 @@ SEPARATOR FLAGS semicolon = ";" slash = "/" space = " " - spaces = "( )+" tab = "\t" - tabs = "(\t)+" usv_fs = "\xe2\x90\x9f" usv_rs = "\xe2\x90\x9e" + + - Similarly, you can use the following for `--ifs-regex` and `--ips-regex`: + + spaces = "( )+" + tabs = "(\t)+" whitespace = "([ \t])+" * Default separators by format: @@ -709,12 +713,10 @@ SEPARATOR FLAGS --fs {string} Specify FS for input and output. --ifs {string} Specify FS for input. + --ifs-regex {string} Specify FS for input as a regular expression. --ips {string} Specify PS for input. + --ips-regex {string} Specify PS for input as a regular expression. --irs {string} Specify RS for input. - --no-ifs-regex Don't treat IFS value as a regular expression. Useful - if your IFS is ".". - --no-ips-regex Don't treat IPS value as a regular expression. Useful - if your IPS is ".". --ofs {string} Specify FS for output. --ops {string} Specify PS for output. --ors {string} Specify RS for output. @@ -1229,7 +1231,6 @@ VERBS --ifs {field-separator character} --ips {pair-separator character} --repifs - --repips --implicit-csv-header --no-implicit-csv-header For example, if you have 'mlr --csv ... join -l foo ... ' then the left-file format will @@ -2996,4 +2997,4 @@ SEE ALSO - 2021-12-23 MILLER(1) + 2021-12-25 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index 685f3e824..df93c3401 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2021-12-23 +.\" Date: 2021-12-25 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2021-12-23" "\ \&" "\ \&" +.TH "MILLER" "1" "2021-12-25" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -146,6 +146,7 @@ Essentials: Flags: mlr help flags mlr help list-separator-aliases + mlr help list-separator-regex-aliases mlr help comments-in-data-flags mlr help compressed-data-flags mlr help csv-only-flags @@ -831,11 +832,14 @@ Notes about all other separators: semicolon = ";" slash = "/" space = " " - spaces = "( )+" tab = "\et" - tabs = "(\et)+" usv_fs = "\exe2\ex90\ex9f" usv_rs = "\exe2\ex90\ex9e" + + - Similarly, you can use the following for `--ifs-regex` and `--ips-regex`: + + spaces = "( )+" + tabs = "(\et)+" whitespace = "([ \et])+" * Default separators by format: @@ -852,12 +856,10 @@ Notes about all other separators: --fs {string} Specify FS for input and output. --ifs {string} Specify FS for input. +--ifs-regex {string} Specify FS for input as a regular expression. --ips {string} Specify PS for input. +--ips-regex {string} Specify PS for input as a regular expression. --irs {string} Specify RS for input. ---no-ifs-regex Don't treat IFS value as a regular expression. Useful - if your IFS is ".". ---no-ips-regex Don't treat IPS value as a regular expression. Useful - if your IPS is ".". --ofs {string} Specify FS for output. --ops {string} Specify PS for output. --ors {string} Specify RS for output. @@ -1554,7 +1556,6 @@ the main "mlr --help" for more information on syntax for these arguments: --ifs {field-separator character} --ips {pair-separator character} --repifs - --repips --implicit-csv-header --no-implicit-csv-header For example, if you have 'mlr --csv ... join -l foo ... ' then the left-file format will diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index b2c0331ea..bf89a1ec3 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -455,7 +455,6 @@ the main "mlr --help" for more information on syntax for these arguments: --ifs {field-separator character} --ips {pair-separator character} --repifs - --repips --implicit-csv-header --no-implicit-csv-header For example, if you have 'mlr --csv ... join -l foo ... ' then the left-file format will diff --git a/todo.txt b/todo.txt index cabd5bc85..f4c41456e 100644 --- a/todo.txt +++ b/todo.txt @@ -2,17 +2,6 @@ PUNCHDOWN LIST * blockers: - ! --ifs-regex & --ips-regex -- guessing is not safe as evidence by '.' and '|' - > xtab ips space by default; 3x faster - * incl webdoc - > xtab splitter UT; nidx too - > regex aliases too - k make a helper class to encapsulate - k --ixs vs --ixs-regex at cli-parser - k remove SuppressIFSRegexing - k remove lib.IsRegexString - d olh/webdoc updates - - allow-repeat-ixs nidx perf mod w/o regex split string ... - fractional-strptime @@ -117,6 +106,8 @@ PUNCHDOWN LIST ================================================================ NON-BLOCKERS +* xtab splitter UT; nidx too + * integrate: o https://www.libhunt.com/r/miller o https://repology.org/project/miller/information