mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 10:15:36 +00:00
doc updates
This commit is contained in:
parent
41d82f7d13
commit
67f2cc4737
18 changed files with 169 additions and 106 deletions
|
|
@ -132,6 +132,7 @@ HELP OPTIONS
|
|||
Flags:
|
||||
mlr help flags
|
||||
mlr help list-separator-aliases
|
||||
mlr help list-separator-regex-aliases
|
||||
mlr help comments-in-data-flags
|
||||
mlr help compressed-data-flags
|
||||
mlr help csv-only-flags
|
||||
|
|
@ -709,11 +710,14 @@ SEPARATOR FLAGS
|
|||
semicolon = ";"
|
||||
slash = "/"
|
||||
space = " "
|
||||
spaces = "( )+"
|
||||
tab = "\t"
|
||||
tabs = "(\t)+"
|
||||
usv_fs = "\xe2\x90\x9f"
|
||||
usv_rs = "\xe2\x90\x9e"
|
||||
|
||||
- Similarly, you can use the following for `--ifs-regex` and `--ips-regex`:
|
||||
|
||||
spaces = "( )+"
|
||||
tabs = "(\t)+"
|
||||
whitespace = "([ \t])+"
|
||||
|
||||
* Default separators by format:
|
||||
|
|
@ -730,12 +734,10 @@ SEPARATOR FLAGS
|
|||
|
||||
--fs {string} Specify FS for input and output.
|
||||
--ifs {string} Specify FS for input.
|
||||
--ifs-regex {string} Specify FS for input as a regular expression.
|
||||
--ips {string} Specify PS for input.
|
||||
--ips-regex {string} Specify PS for input as a regular expression.
|
||||
--irs {string} Specify RS for input.
|
||||
--no-ifs-regex Don't treat IFS value as a regular expression. Useful
|
||||
if your IFS is ".".
|
||||
--no-ips-regex Don't treat IPS value as a regular expression. Useful
|
||||
if your IPS is ".".
|
||||
--ofs {string} Specify FS for output.
|
||||
--ops {string} Specify PS for output.
|
||||
--ors {string} Specify RS for output.
|
||||
|
|
@ -1250,7 +1252,6 @@ VERBS
|
|||
--ifs {field-separator character}
|
||||
--ips {pair-separator character}
|
||||
--repifs
|
||||
--repips
|
||||
--implicit-csv-header
|
||||
--no-implicit-csv-header
|
||||
For example, if you have 'mlr --csv ... join -l foo ... ' then the left-file format will
|
||||
|
|
@ -3017,5 +3018,5 @@ SEE ALSO
|
|||
|
||||
|
||||
|
||||
2021-12-23 MILLER(1)
|
||||
2021-12-25 MILLER(1)
|
||||
</pre>
|
||||
|
|
|
|||
|
|
@ -111,6 +111,7 @@ HELP OPTIONS
|
|||
Flags:
|
||||
mlr help flags
|
||||
mlr help list-separator-aliases
|
||||
mlr help list-separator-regex-aliases
|
||||
mlr help comments-in-data-flags
|
||||
mlr help compressed-data-flags
|
||||
mlr help csv-only-flags
|
||||
|
|
@ -688,11 +689,14 @@ SEPARATOR FLAGS
|
|||
semicolon = ";"
|
||||
slash = "/"
|
||||
space = " "
|
||||
spaces = "( )+"
|
||||
tab = "\t"
|
||||
tabs = "(\t)+"
|
||||
usv_fs = "\xe2\x90\x9f"
|
||||
usv_rs = "\xe2\x90\x9e"
|
||||
|
||||
- Similarly, you can use the following for `--ifs-regex` and `--ips-regex`:
|
||||
|
||||
spaces = "( )+"
|
||||
tabs = "(\t)+"
|
||||
whitespace = "([ \t])+"
|
||||
|
||||
* Default separators by format:
|
||||
|
|
@ -709,12 +713,10 @@ SEPARATOR FLAGS
|
|||
|
||||
--fs {string} Specify FS for input and output.
|
||||
--ifs {string} Specify FS for input.
|
||||
--ifs-regex {string} Specify FS for input as a regular expression.
|
||||
--ips {string} Specify PS for input.
|
||||
--ips-regex {string} Specify PS for input as a regular expression.
|
||||
--irs {string} Specify RS for input.
|
||||
--no-ifs-regex Don't treat IFS value as a regular expression. Useful
|
||||
if your IFS is ".".
|
||||
--no-ips-regex Don't treat IPS value as a regular expression. Useful
|
||||
if your IPS is ".".
|
||||
--ofs {string} Specify FS for output.
|
||||
--ops {string} Specify PS for output.
|
||||
--ors {string} Specify RS for output.
|
||||
|
|
@ -1229,7 +1231,6 @@ VERBS
|
|||
--ifs {field-separator character}
|
||||
--ips {pair-separator character}
|
||||
--repifs
|
||||
--repips
|
||||
--implicit-csv-header
|
||||
--no-implicit-csv-header
|
||||
For example, if you have 'mlr --csv ... join -l foo ... ' then the left-file format will
|
||||
|
|
@ -2996,4 +2997,4 @@ SEE ALSO
|
|||
|
||||
|
||||
|
||||
2021-12-23 MILLER(1)
|
||||
2021-12-25 MILLER(1)
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ Essentials:
|
|||
Flags:
|
||||
mlr help flags
|
||||
mlr help list-separator-aliases
|
||||
mlr help list-separator-regex-aliases
|
||||
mlr help comments-in-data-flags
|
||||
mlr help compressed-data-flags
|
||||
mlr help csv-only-flags
|
||||
|
|
|
|||
|
|
@ -541,11 +541,14 @@ Notes about all other separators:
|
|||
semicolon = ";"
|
||||
slash = "/"
|
||||
space = " "
|
||||
spaces = "( )+"
|
||||
tab = "\t"
|
||||
tabs = "(\t)+"
|
||||
usv_fs = "\xe2\x90\x9f"
|
||||
usv_rs = "\xe2\x90\x9e"
|
||||
|
||||
- Similarly, you can use the following for `--ifs-regex` and `--ips-regex`:
|
||||
|
||||
spaces = "( )+"
|
||||
tabs = "(\t)+"
|
||||
whitespace = "([ \t])+"
|
||||
|
||||
* Default separators by format:
|
||||
|
|
@ -567,14 +570,14 @@ Notes about all other separators:
|
|||
`: Specify FS for input and output.
|
||||
* `--ifs {string}
|
||||
`: Specify FS for input.
|
||||
* `--ifs-regex {string}
|
||||
`: Specify FS for input as a regular expression.
|
||||
* `--ips {string}
|
||||
`: Specify PS for input.
|
||||
* `--ips-regex {string}
|
||||
`: Specify PS for input as a regular expression.
|
||||
* `--irs {string}
|
||||
`: Specify RS for input.
|
||||
* `--no-ifs-regex
|
||||
`: Don't treat IFS value as a regular expression. Useful if your IFS is ".".
|
||||
* `--no-ips-regex
|
||||
`: Don't treat IPS value as a regular expression. Useful if your IPS is ".".
|
||||
* `--ofs {string}
|
||||
`: Specify FS for output.
|
||||
* `--ops {string}
|
||||
|
|
|
|||
|
|
@ -76,6 +76,24 @@ c:3;a:1;b:2
|
|||
c:6;a:4;b:5
|
||||
</pre>
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
<b>mlr --csv head -n 2 example.csv</b>
|
||||
</pre>
|
||||
<pre class="pre-non-highlight-in-pair">
|
||||
color,shape,flag,k,index,quantity,rate
|
||||
yellow,triangle,true,1,11,43.6498,9.8870
|
||||
red,square,true,2,15,79.2778,0.0130
|
||||
</pre>
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
<b>mlr --csv --ofs pipe head -n 2 example.csv</b>
|
||||
</pre>
|
||||
<pre class="pre-non-highlight-in-pair">
|
||||
color|shape|flag|k|index|quantity|rate
|
||||
yellow|triangle|true|1|11|43.6498|9.8870
|
||||
red|square|true|2|15|79.2778|0.0130
|
||||
</pre>
|
||||
|
||||
If your data has non-default separators and you don't want to change those
|
||||
between input and output, you can use `--rs`, `--fs`, and `--ps`. Setting `--fs
|
||||
:` is the same as setting `--ifs : --ofs :`, but with fewer keystrokes.
|
||||
|
|
@ -96,9 +114,11 @@ c:3;a:1;b:2
|
|||
c:6;a:4;b:5
|
||||
</pre>
|
||||
|
||||
## Multi-character and regular-expression separators
|
||||
## Multi-character separators
|
||||
|
||||
The separators default to single characters, but can be multiple characters if you like:
|
||||
All separators can be multi-character, except for file formats which don't
|
||||
allow parameterization (see below). And for CSV (CSV-lite doesn't have these
|
||||
restrictions), IRS must be `\n` and IFS must be a single character.
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
<b>mlr --ifs ';' --ips : --ofs ';;;' --ops := cut -o -f c,a,b data/modsep.dkvp</b>
|
||||
|
|
@ -108,23 +128,13 @@ c:=3;;;a:=1;;;b:=2
|
|||
c:=6;;;a:=4;;;b:=5
|
||||
</pre>
|
||||
|
||||
As of September 2021:
|
||||
|
||||
* `IFS` and `IPS` can be regular expressions.
|
||||
* `IRS` can be multi-character (except for file formats which don't allow parameterization -- see below)
|
||||
* `OFS`, `OPS`, and `ORS` can be multi-character.
|
||||
|
||||
Since `IFS` and `IPS` can be regular expressions, if your data has field
|
||||
separators which are one or more consecutive spaces, you can use `--ifs '(
|
||||
)+'`. But that gets a little tedious, so Miller has the `--repifs` and
|
||||
`--repips` flags you can use if you like. This wraps the `IFS` or `IPS`, say
|
||||
`X`, as `(X)+`.
|
||||
|
||||
The `--repifs` flag means that multiple successive occurrences of the field
|
||||
If your data has field separators which are one or more consecutive spaces, you
|
||||
can use `--ifs space --repifs`.
|
||||
More generally, the `--repifs` flag means that multiple successive occurrences of the field
|
||||
separator count as one. For example, in CSV data we often signify nulls by
|
||||
empty strings, e.g. `2,9,,,,,6,5,4`. On the other hand, if the field separator
|
||||
is a space, it might be more natural to parse `2 4 5` the same as `2 4 5`:
|
||||
`--repifs --ifs ' '` lets this happen. In fact, the `--ipprint` option above
|
||||
`--repifs --ifs ' '` lets this happen. In fact, the `--ipprint` option
|
||||
is internally implemented in terms of `--repifs`.
|
||||
|
||||
For example:
|
||||
|
|
@ -158,6 +168,15 @@ early light what so
|
|||
4 so
|
||||
</pre>
|
||||
|
||||
## Regular-expression separators
|
||||
|
||||
`IFS` and `IPS` can be regular expressions: use `--ifs-regex` or `--ips-regex` in place of
|
||||
`--ifs` or `--ips`, respectively.
|
||||
|
||||
You can also use either `--ifs space --repifs` or `--ifs-regex '()+'`. (But that gets a little tedious,
|
||||
so there are aliases listed below.) Note however that `--ifs space --repifs` is about 3x faster than
|
||||
`--ifs-regex '( )+'` -- regular expressions are powerful, but slower.
|
||||
|
||||
## Aliases
|
||||
|
||||
Many things we'd like to write as separators need to be escaped from the shell
|
||||
|
|
@ -192,16 +211,25 @@ pipe = "|"
|
|||
semicolon = ";"
|
||||
slash = "/"
|
||||
space = " "
|
||||
spaces = "( )+"
|
||||
tab = "\t"
|
||||
tabs = "(\t)+"
|
||||
usv_fs = "\xe2\x90\x9f"
|
||||
usv_rs = "\xe2\x90\x9e"
|
||||
</pre>
|
||||
|
||||
And for `--ifs-regex` and `--ips-regex`:
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
<b>mlr help list-separator-regex-aliases</b>
|
||||
</pre>
|
||||
<pre class="pre-non-highlight-in-pair">
|
||||
spaces = "( )+"
|
||||
tabs = "(\t)+"
|
||||
whitespace = "([ \t])+"
|
||||
</pre>
|
||||
|
||||
Note that `spaces`, `tabs`, and `whitespace` already are regexes so you
|
||||
shouldn't use `--repifs` with them.
|
||||
shouldn't use `--repifs` with them. (In fact, the `--repifs` flag is ignored
|
||||
when `--ifs-regex` is provided.)
|
||||
|
||||
## Command-line flags
|
||||
|
||||
|
|
@ -209,8 +237,8 @@ Given the above, we now have seen the following flags:
|
|||
|
||||
<pre class="pre-non-highlight-non-pair">
|
||||
--rs --irs --ors
|
||||
--fs --ifs --ofs --repifs
|
||||
--ps --ips --ops
|
||||
--fs --ifs --ofs --repifs --ifs-regex
|
||||
--ps --ips --ops --ips-regex
|
||||
</pre>
|
||||
|
||||
See also the [separator-flags section](reference-main-flag-list.md#separator-flags).
|
||||
|
|
|
|||
|
|
@ -48,6 +48,14 @@ GENMD-RUN-COMMAND
|
|||
mlr --ifs , --ofs ';' --ips = --ops : cut -o -f c,a,b data/a.dkvp
|
||||
GENMD-EOF
|
||||
|
||||
GENMD-RUN-COMMAND
|
||||
mlr --csv head -n 2 example.csv
|
||||
GENMD-EOF
|
||||
|
||||
GENMD-RUN-COMMAND
|
||||
mlr --csv --ofs pipe head -n 2 example.csv
|
||||
GENMD-EOF
|
||||
|
||||
If your data has non-default separators and you don't want to change those
|
||||
between input and output, you can use `--rs`, `--fs`, and `--ps`. Setting `--fs
|
||||
:` is the same as setting `--ifs : --ofs :`, but with fewer keystrokes.
|
||||
|
|
@ -60,31 +68,23 @@ GENMD-RUN-COMMAND
|
|||
mlr --fs ';' --ps : cut -o -f c,a,b data/modsep.dkvp
|
||||
GENMD-EOF
|
||||
|
||||
## Multi-character and regular-expression separators
|
||||
## Multi-character separators
|
||||
|
||||
The separators default to single characters, but can be multiple characters if you like:
|
||||
All separators can be multi-character, except for file formats which don't
|
||||
allow parameterization (see below). And for CSV (CSV-lite doesn't have these
|
||||
restrictions), IRS must be `\n` and IFS must be a single character.
|
||||
|
||||
GENMD-RUN-COMMAND
|
||||
mlr --ifs ';' --ips : --ofs ';;;' --ops := cut -o -f c,a,b data/modsep.dkvp
|
||||
GENMD-EOF
|
||||
|
||||
As of September 2021:
|
||||
|
||||
* `IFS` and `IPS` can be regular expressions.
|
||||
* `IRS` can be multi-character (except for file formats which don't allow parameterization -- see below)
|
||||
* `OFS`, `OPS`, and `ORS` can be multi-character.
|
||||
|
||||
Since `IFS` and `IPS` can be regular expressions, if your data has field
|
||||
separators which are one or more consecutive spaces, you can use `--ifs '(
|
||||
)+'`. But that gets a little tedious, so Miller has the `--repifs` and
|
||||
`--repips` flags you can use if you like. This wraps the `IFS` or `IPS`, say
|
||||
`X`, as `(X)+`.
|
||||
|
||||
The `--repifs` flag means that multiple successive occurrences of the field
|
||||
If your data has field separators which are one or more consecutive spaces, you
|
||||
can use `--ifs space --repifs`.
|
||||
More generally, the `--repifs` flag means that multiple successive occurrences of the field
|
||||
separator count as one. For example, in CSV data we often signify nulls by
|
||||
empty strings, e.g. `2,9,,,,,6,5,4`. On the other hand, if the field separator
|
||||
is a space, it might be more natural to parse `2 4 5` the same as `2 4 5`:
|
||||
`--repifs --ifs ' '` lets this happen. In fact, the `--ipprint` option above
|
||||
`--repifs --ifs ' '` lets this happen. In fact, the `--ipprint` option
|
||||
is internally implemented in terms of `--repifs`.
|
||||
|
||||
For example:
|
||||
|
|
@ -97,6 +97,15 @@ GENMD-RUN-COMMAND
|
|||
mlr --ifs ' ' --repifs --inidx --oxtab cat data/extra-spaces.txt
|
||||
GENMD-EOF
|
||||
|
||||
## Regular-expression separators
|
||||
|
||||
`IFS` and `IPS` can be regular expressions: use `--ifs-regex` or `--ips-regex` in place of
|
||||
`--ifs` or `--ips`, respectively.
|
||||
|
||||
You can also use either `--ifs space --repifs` or `--ifs-regex '()+'`. (But that gets a little tedious,
|
||||
so there are aliases listed below.) Note however that `--ifs space --repifs` is about 3x faster than
|
||||
`--ifs-regex '( )+'` -- regular expressions are powerful, but slower.
|
||||
|
||||
## Aliases
|
||||
|
||||
Many things we'd like to write as separators need to be escaped from the shell
|
||||
|
|
@ -106,8 +115,15 @@ GENMD-RUN-COMMAND
|
|||
mlr help list-separator-aliases
|
||||
GENMD-EOF
|
||||
|
||||
And for `--ifs-regex` and `--ips-regex`:
|
||||
|
||||
GENMD-RUN-COMMAND
|
||||
mlr help list-separator-regex-aliases
|
||||
GENMD-EOF
|
||||
|
||||
Note that `spaces`, `tabs`, and `whitespace` already are regexes so you
|
||||
shouldn't use `--repifs` with them.
|
||||
shouldn't use `--repifs` with them. (In fact, the `--repifs` flag is ignored
|
||||
when `--ifs-regex` is provided.)
|
||||
|
||||
## Command-line flags
|
||||
|
||||
|
|
@ -115,8 +131,8 @@ Given the above, we now have seen the following flags:
|
|||
|
||||
GENMD-CARDIFY
|
||||
--rs --irs --ors
|
||||
--fs --ifs --ofs --repifs
|
||||
--ps --ips --ops
|
||||
--fs --ifs --ofs --repifs --ifs-regex
|
||||
--ps --ips --ops --ips-regex
|
||||
GENMD-EOF
|
||||
|
||||
See also the [separator-flags section](reference-main-flag-list.md#separator-flags).
|
||||
|
|
|
|||
|
|
@ -1589,7 +1589,6 @@ the main "mlr --help" for more information on syntax for these arguments:
|
|||
--ifs {field-separator character}
|
||||
--ips {pair-separator character}
|
||||
--repifs
|
||||
--repips
|
||||
--implicit-csv-header
|
||||
--no-implicit-csv-header
|
||||
For example, if you have 'mlr --csv ... join -l foo ... ' then the left-file format will
|
||||
|
|
|
|||
|
|
@ -73,6 +73,7 @@ func init() {
|
|||
handlerInfos: []tHandlerInfo{
|
||||
{name: "flags", zaryHandlerFunc: showFlagHelp},
|
||||
{name: "list-separator-aliases", zaryHandlerFunc: listSeparatorAliases},
|
||||
{name: "list-separator-regex-aliases", zaryHandlerFunc: listSeparatorRegexAliases},
|
||||
// Per-section entries will be computed and installed below
|
||||
},
|
||||
},
|
||||
|
|
@ -315,6 +316,10 @@ func listSeparatorAliases() {
|
|||
cli.ListSeparatorAliasesForOnlineHelp()
|
||||
}
|
||||
|
||||
func listSeparatorRegexAliases() {
|
||||
cli.ListSeparatorRegexAliasesForOnlineHelp()
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func helpAuxents() {
|
||||
fmt.Print(`Miller has a few otherwise-standalone executables packaged within it.
|
||||
|
|
|
|||
|
|
@ -171,6 +171,14 @@ Notes about all other separators:
|
|||
}
|
||||
fmt.Println()
|
||||
|
||||
fmt.Println(" - Similarly, you can use the following for `--ifs-regex` and `--ips-regex`:")
|
||||
fmt.Println()
|
||||
aliases = lib.GetArrayKeysSorted(SEPARATOR_REGEX_NAMES_TO_VALUES)
|
||||
for _, alias := range aliases {
|
||||
fmt.Printf(" %-10s = \"%s\"\n", alias, SEPARATOR_REGEX_NAMES_TO_VALUES[alias])
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
fmt.Println("* Default separators by format:")
|
||||
fmt.Println()
|
||||
|
||||
|
|
@ -205,6 +213,16 @@ func ListSeparatorAliasesForOnlineHelp() {
|
|||
}
|
||||
}
|
||||
|
||||
func ListSeparatorRegexAliasesForOnlineHelp() {
|
||||
// Go doesn't preserve insertion order in its arrays so here we are inlining a sort.
|
||||
aliases := lib.GetArrayKeysSorted(SEPARATOR_REGEX_NAMES_TO_VALUES)
|
||||
for _, alias := range aliases {
|
||||
// Really absurd level of indent needed to get fixed-with font in mkdocs here,
|
||||
// I don't know why. Usually it only takes 4, not 10.
|
||||
fmt.Printf("%-10s = \"%s\"\n", alias, SEPARATOR_REGEX_NAMES_TO_VALUES[alias])
|
||||
}
|
||||
}
|
||||
|
||||
func init() { SeparatorFlagSection.Sort() }
|
||||
|
||||
var SeparatorFlagSection = FlagSection{
|
||||
|
|
|
|||
|
|
@ -36,14 +36,14 @@ type TGeneratorOptions struct {
|
|||
}
|
||||
|
||||
type TReaderOptions struct {
|
||||
InputFileFormat string
|
||||
IFS string
|
||||
IPS string
|
||||
IRS string
|
||||
AllowRepeatIFS bool
|
||||
IFSRegex *regexp.Regexp
|
||||
IPSRegex *regexp.Regexp
|
||||
DedupeFieldNames bool
|
||||
InputFileFormat string
|
||||
IFS string
|
||||
IPS string
|
||||
IRS string
|
||||
AllowRepeatIFS bool
|
||||
IFSRegex *regexp.Regexp
|
||||
IPSRegex *regexp.Regexp
|
||||
DedupeFieldNames bool
|
||||
|
||||
// If unspecified on the command line, these take input-format-dependent
|
||||
// defaults. E.g. default FS is comma for DKVP but space for NIDX;
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ func newFieldSplitter(options *cli.TReaderOptions) iFieldSplitter {
|
|||
}
|
||||
|
||||
type tIFSSplitter struct {
|
||||
ifs string
|
||||
ifs string
|
||||
allowRepeatIFS bool
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ type RecordReaderCSVLite struct {
|
|||
readerOptions *cli.TReaderOptions
|
||||
recordsPerBatch int // distinct from readerOptions.RecordsPerBatch for join/repl
|
||||
|
||||
fieldSplitter iFieldSplitter
|
||||
fieldSplitter iFieldSplitter
|
||||
recordBatchGetter recordBatchGetterCSV
|
||||
|
||||
inputLineNumber int
|
||||
|
|
@ -63,7 +63,7 @@ func NewRecordReaderCSVLite(
|
|||
reader := &RecordReaderCSVLite{
|
||||
readerOptions: readerOptions,
|
||||
recordsPerBatch: recordsPerBatch,
|
||||
fieldSplitter: newFieldSplitter(readerOptions),
|
||||
fieldSplitter: newFieldSplitter(readerOptions),
|
||||
}
|
||||
if reader.readerOptions.UseImplicitCSVHeader {
|
||||
reader.recordBatchGetter = getRecordBatchImplicitCSVHeader
|
||||
|
|
@ -80,7 +80,7 @@ func NewRecordReaderPPRINT(
|
|||
reader := &RecordReaderCSVLite{
|
||||
readerOptions: readerOptions,
|
||||
recordsPerBatch: recordsPerBatch,
|
||||
fieldSplitter: newFieldSplitter(readerOptions),
|
||||
fieldSplitter: newFieldSplitter(readerOptions),
|
||||
}
|
||||
if reader.readerOptions.UseImplicitCSVHeader {
|
||||
reader.recordBatchGetter = getRecordBatchImplicitCSVHeader
|
||||
|
|
|
|||
|
|
@ -314,7 +314,7 @@ func (s *tXTABIPSSplitter) Split(input string) (key, value string, err error) {
|
|||
return "", "", errors.New("mlr: internal coding error in XTAB reader")
|
||||
}
|
||||
|
||||
// ' a 1' splits as key '', value 'a 1' for compatibility with Miller 5.
|
||||
// ' abc 123' splits as key '', value 'abc 123'.
|
||||
if strings.HasPrefix(input, s.ips) {
|
||||
keyStart := 0
|
||||
for keyStart < n && strings.HasPrefix(input[keyStart:], s.ips) {
|
||||
|
|
|
|||
|
|
@ -115,7 +115,6 @@ func transformerJoinUsage(
|
|||
fmt.Fprintf(o, " --ifs {field-separator character}\n")
|
||||
fmt.Fprintf(o, " --ips {pair-separator character}\n")
|
||||
fmt.Fprintf(o, " --repifs\n")
|
||||
fmt.Fprintf(o, " --repips\n")
|
||||
fmt.Fprintf(o, " --implicit-csv-header\n")
|
||||
fmt.Fprintf(o, " --no-implicit-csv-header\n")
|
||||
fmt.Fprintf(o, "For example, if you have 'mlr --csv ... join -l foo ... ' then the left-file format will\n")
|
||||
|
|
|
|||
|
|
@ -111,6 +111,7 @@ HELP OPTIONS
|
|||
Flags:
|
||||
mlr help flags
|
||||
mlr help list-separator-aliases
|
||||
mlr help list-separator-regex-aliases
|
||||
mlr help comments-in-data-flags
|
||||
mlr help compressed-data-flags
|
||||
mlr help csv-only-flags
|
||||
|
|
@ -688,11 +689,14 @@ SEPARATOR FLAGS
|
|||
semicolon = ";"
|
||||
slash = "/"
|
||||
space = " "
|
||||
spaces = "( )+"
|
||||
tab = "\t"
|
||||
tabs = "(\t)+"
|
||||
usv_fs = "\xe2\x90\x9f"
|
||||
usv_rs = "\xe2\x90\x9e"
|
||||
|
||||
- Similarly, you can use the following for `--ifs-regex` and `--ips-regex`:
|
||||
|
||||
spaces = "( )+"
|
||||
tabs = "(\t)+"
|
||||
whitespace = "([ \t])+"
|
||||
|
||||
* Default separators by format:
|
||||
|
|
@ -709,12 +713,10 @@ SEPARATOR FLAGS
|
|||
|
||||
--fs {string} Specify FS for input and output.
|
||||
--ifs {string} Specify FS for input.
|
||||
--ifs-regex {string} Specify FS for input as a regular expression.
|
||||
--ips {string} Specify PS for input.
|
||||
--ips-regex {string} Specify PS for input as a regular expression.
|
||||
--irs {string} Specify RS for input.
|
||||
--no-ifs-regex Don't treat IFS value as a regular expression. Useful
|
||||
if your IFS is ".".
|
||||
--no-ips-regex Don't treat IPS value as a regular expression. Useful
|
||||
if your IPS is ".".
|
||||
--ofs {string} Specify FS for output.
|
||||
--ops {string} Specify PS for output.
|
||||
--ors {string} Specify RS for output.
|
||||
|
|
@ -1229,7 +1231,6 @@ VERBS
|
|||
--ifs {field-separator character}
|
||||
--ips {pair-separator character}
|
||||
--repifs
|
||||
--repips
|
||||
--implicit-csv-header
|
||||
--no-implicit-csv-header
|
||||
For example, if you have 'mlr --csv ... join -l foo ... ' then the left-file format will
|
||||
|
|
@ -2996,4 +2997,4 @@ SEE ALSO
|
|||
|
||||
|
||||
|
||||
2021-12-23 MILLER(1)
|
||||
2021-12-25 MILLER(1)
|
||||
|
|
|
|||
19
man/mlr.1
19
man/mlr.1
|
|
@ -2,12 +2,12 @@
|
|||
.\" Title: mlr
|
||||
.\" Author: [see the "AUTHOR" section]
|
||||
.\" Generator: ./mkman.rb
|
||||
.\" Date: 2021-12-23
|
||||
.\" Date: 2021-12-25
|
||||
.\" Manual: \ \&
|
||||
.\" Source: \ \&
|
||||
.\" Language: English
|
||||
.\"
|
||||
.TH "MILLER" "1" "2021-12-23" "\ \&" "\ \&"
|
||||
.TH "MILLER" "1" "2021-12-25" "\ \&" "\ \&"
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * Portability definitions
|
||||
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
|
@ -146,6 +146,7 @@ Essentials:
|
|||
Flags:
|
||||
mlr help flags
|
||||
mlr help list-separator-aliases
|
||||
mlr help list-separator-regex-aliases
|
||||
mlr help comments-in-data-flags
|
||||
mlr help compressed-data-flags
|
||||
mlr help csv-only-flags
|
||||
|
|
@ -831,11 +832,14 @@ Notes about all other separators:
|
|||
semicolon = ";"
|
||||
slash = "/"
|
||||
space = " "
|
||||
spaces = "( )+"
|
||||
tab = "\et"
|
||||
tabs = "(\et)+"
|
||||
usv_fs = "\exe2\ex90\ex9f"
|
||||
usv_rs = "\exe2\ex90\ex9e"
|
||||
|
||||
- Similarly, you can use the following for `--ifs-regex` and `--ips-regex`:
|
||||
|
||||
spaces = "( )+"
|
||||
tabs = "(\et)+"
|
||||
whitespace = "([ \et])+"
|
||||
|
||||
* Default separators by format:
|
||||
|
|
@ -852,12 +856,10 @@ Notes about all other separators:
|
|||
|
||||
--fs {string} Specify FS for input and output.
|
||||
--ifs {string} Specify FS for input.
|
||||
--ifs-regex {string} Specify FS for input as a regular expression.
|
||||
--ips {string} Specify PS for input.
|
||||
--ips-regex {string} Specify PS for input as a regular expression.
|
||||
--irs {string} Specify RS for input.
|
||||
--no-ifs-regex Don't treat IFS value as a regular expression. Useful
|
||||
if your IFS is ".".
|
||||
--no-ips-regex Don't treat IPS value as a regular expression. Useful
|
||||
if your IPS is ".".
|
||||
--ofs {string} Specify FS for output.
|
||||
--ops {string} Specify PS for output.
|
||||
--ors {string} Specify RS for output.
|
||||
|
|
@ -1554,7 +1556,6 @@ the main "mlr --help" for more information on syntax for these arguments:
|
|||
--ifs {field-separator character}
|
||||
--ips {pair-separator character}
|
||||
--repifs
|
||||
--repips
|
||||
--implicit-csv-header
|
||||
--no-implicit-csv-header
|
||||
For example, if you have 'mlr --csv ... join -l foo ... ' then the left-file format will
|
||||
|
|
|
|||
|
|
@ -455,7 +455,6 @@ the main "mlr --help" for more information on syntax for these arguments:
|
|||
--ifs {field-separator character}
|
||||
--ips {pair-separator character}
|
||||
--repifs
|
||||
--repips
|
||||
--implicit-csv-header
|
||||
--no-implicit-csv-header
|
||||
For example, if you have 'mlr --csv ... join -l foo ... ' then the left-file format will
|
||||
|
|
|
|||
13
todo.txt
13
todo.txt
|
|
@ -2,17 +2,6 @@
|
|||
PUNCHDOWN LIST
|
||||
|
||||
* blockers:
|
||||
! --ifs-regex & --ips-regex -- guessing is not safe as evidence by '.' and '|'
|
||||
> xtab ips space by default; 3x faster
|
||||
* incl webdoc
|
||||
> xtab splitter UT; nidx too
|
||||
> regex aliases too
|
||||
k make a helper class to encapsulate
|
||||
k --ixs vs --ixs-regex at cli-parser
|
||||
k remove SuppressIFSRegexing
|
||||
k remove lib.IsRegexString
|
||||
d olh/webdoc updates
|
||||
|
||||
- allow-repeat-ixs nidx perf mod w/o regex split string ...
|
||||
|
||||
- fractional-strptime
|
||||
|
|
@ -117,6 +106,8 @@ PUNCHDOWN LIST
|
|||
================================================================
|
||||
NON-BLOCKERS
|
||||
|
||||
* xtab splitter UT; nidx too
|
||||
|
||||
* integrate:
|
||||
o https://www.libhunt.com/r/miller
|
||||
o https://repology.org/project/miller/information
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue