mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
Dedupe field names by default (#794)
This commit is contained in:
parent
6b87a121b0
commit
157e567909
75 changed files with 352 additions and 195 deletions
8
.vimrc
8
.vimrc
|
|
@ -1,8 +1,2 @@
|
|||
map \d :w<C-m>:!clear;echo Building ...; echo; make mlr<C-m>
|
||||
map \f :w<C-m>:!clear;echo Building ...; echo; make tests-in-order<C-m>
|
||||
"map \r :w<C-m>:!clear;echo Building ...; echo; make mlrval-tests<C-m>
|
||||
"map \r :w<C-m>:!clear;echo Building ...; echo; make mlrmap-tests<C-m>
|
||||
"map \r :w<C-m>:!clear;echo Building ...; echo; make input-tests<C-m>
|
||||
"map \r :w<C-m>:!clear;echo Building ...; echo; make mlrval-format-test<C-m>
|
||||
"map \r :w<C-m>:!clear;echo Building ...; echo; make bifs-tests<C-m>
|
||||
map \r :w<C-m>:!clear;echo Building ...; echo; make bifs-collections-test<C-m>
|
||||
map \f :w<C-m>:!clear;echo Building ...; echo; make ut<C-m>
|
||||
|
|
|
|||
120
Makefile
120
Makefile
|
|
@ -31,126 +31,6 @@ install: build
|
|||
unit-test ut:
|
||||
go test github.com/johnkerl/miller/internal/pkg/...
|
||||
|
||||
# Keystroke-savers
|
||||
lib-unbackslash-test:
|
||||
go test internal/pkg/lib/unbackslash_test.go internal/pkg/lib/unbackslash.go
|
||||
lib_regex_test:
|
||||
go test internal/pkg/lib/regex_test.go internal/pkg/lib/regex.go
|
||||
lib-tests:
|
||||
go test github.com/johnkerl/miller/internal/pkg/lib/...
|
||||
|
||||
mlrval-new-test:
|
||||
go test internal/pkg/mlrval/new_test.go \
|
||||
internal/pkg/mlrval/mlrval_type.go \
|
||||
internal/pkg/mlrval/mlrval_constants.go \
|
||||
internal/pkg/mlrval/mlrval_new.go \
|
||||
internal/pkg/mlrval/mlrval_infer.go
|
||||
mlrval-is-test:
|
||||
go test internal/pkg/mlrval/is_test.go \
|
||||
internal/pkg/mlrval/mlrval_type.go \
|
||||
internal/pkg/mlrval/mlrval_constants.go \
|
||||
internal/pkg/mlrval/mlrval_new.go \
|
||||
internal/pkg/mlrval/mlrval_infer.go \
|
||||
internal/pkg/mlrval/mlrval_is.go
|
||||
mlrval-get-test:
|
||||
go test internal/pkg/mlrval/get_test.go \
|
||||
internal/pkg/mlrval/mlrval_type.go \
|
||||
internal/pkg/mlrval/mlrval_constants.go \
|
||||
internal/pkg/mlrval/mlrval_new.go \
|
||||
internal/pkg/mlrval/mlrval_infer.go \
|
||||
internal/pkg/mlrval/mlrval_is.go \
|
||||
internal/pkg/mlrval/mlrval_get.go
|
||||
mlrval-output-test:
|
||||
go test internal/pkg/mlrval/output_test.go \
|
||||
internal/pkg/mlrval/mlrval_type.go \
|
||||
internal/pkg/mlrval/mlrval_constants.go \
|
||||
internal/pkg/mlrval/mlrval_new.go \
|
||||
internal/pkg/mlrval/mlrval_infer.go \
|
||||
internal/pkg/mlrval/mlrval_is.go \
|
||||
internal/pkg/mlrval/mlrval_get.go \
|
||||
internal/pkg/mlrval/mlrval_output.go \
|
||||
internal/pkg/mlrval/mlrval_format.go
|
||||
mlrval-format-test:
|
||||
go test internal/pkg/mlrval/format_test.go \
|
||||
internal/pkg/mlrval/mlrval_type.go \
|
||||
internal/pkg/mlrval/mlrval_constants.go \
|
||||
internal/pkg/mlrval/mlrval_new.go \
|
||||
internal/pkg/mlrval/mlrval_infer.go \
|
||||
internal/pkg/mlrval/mlrval_is.go \
|
||||
internal/pkg/mlrval/mlrval_get.go \
|
||||
internal/pkg/mlrval/mlrval_output.go \
|
||||
internal/pkg/mlrval/mlrval_format.go
|
||||
mlrval-tests:
|
||||
go test github.com/johnkerl/miller/internal/pkg/mlrval/...
|
||||
|
||||
mlrmap-new-test:
|
||||
go test internal/pkg/mlrval/mlrmap_new_test.go \
|
||||
internal/pkg/mlrval/mlrmap.go \
|
||||
internal/pkg/mlrval/mlrval_type.go \
|
||||
internal/pkg/mlrval/mlrval_constants.go \
|
||||
internal/pkg/mlrval/mlrval_new.go \
|
||||
internal/pkg/mlrval/mlrval_infer.go \
|
||||
internal/pkg/mlrval/mlrval_is.go \
|
||||
internal/pkg/mlrval/mlrval_get.go \
|
||||
internal/pkg/mlrval/mlrval_output.go \
|
||||
internal/pkg/mlrval/mlrval_format.go
|
||||
mlrmap-accessors-test:
|
||||
go test internal/pkg/mlrval/mlrmap_accessors_test.go \
|
||||
internal/pkg/mlrval/mlrmap.go \
|
||||
internal/pkg/mlrval/mlrmap_accessors.go \
|
||||
internal/pkg/mlrval/mlrval_type.go \
|
||||
internal/pkg/mlrval/mlrval_constants.go \
|
||||
internal/pkg/mlrval/mlrval_new.go \
|
||||
internal/pkg/mlrval/mlrval_cmp.go \
|
||||
internal/pkg/mlrval/mlrval_copy.go \
|
||||
internal/pkg/mlrval/mlrval_infer.go \
|
||||
internal/pkg/mlrval/mlrval_is.go \
|
||||
internal/pkg/mlrval/mlrval_get.go \
|
||||
internal/pkg/mlrval/mlrval_output.go \
|
||||
internal/pkg/mlrval/mlrval_format.go
|
||||
|
||||
mlrmap-tests: mlrmap-new-test mlrmap-accessors-test
|
||||
|
||||
input-dkvp-test:
|
||||
go test internal/pkg/input/record_reader_dkvp_test.go \
|
||||
internal/pkg/input/record_reader.go \
|
||||
internal/pkg/input/record_reader_dkvp_nidx.go
|
||||
input-tests: input-dkvp-test
|
||||
|
||||
bifs-arithmetic-test:
|
||||
go test internal/pkg/bifs/arithmetic_test.go \
|
||||
internal/pkg/bifs/base.go \
|
||||
internal/pkg/bifs/arithmetic.go
|
||||
bifs-bits-test:
|
||||
go test internal/pkg/bifs/bits_test.go \
|
||||
internal/pkg/bifs/base.go \
|
||||
internal/pkg/bifs/arithmetic.go \
|
||||
internal/pkg/bifs/bits.go
|
||||
bifs-collections-test:
|
||||
go test internal/pkg/bifs/collections_test.go \
|
||||
internal/pkg/bifs/base.go \
|
||||
internal/pkg/bifs/arithmetic.go \
|
||||
internal/pkg/bifs/collections.go
|
||||
bifs-hashing-test:
|
||||
go test internal/pkg/bifs/hashing_test.go \
|
||||
internal/pkg/bifs/base.go \
|
||||
internal/pkg/bifs/arithmetic.go \
|
||||
internal/pkg/bifs/hashing.go
|
||||
bifs-sort-test:
|
||||
go test internal/pkg/bifs/sort_test.go \
|
||||
internal/pkg/bifs/base.go \
|
||||
internal/pkg/bifs/arithmetic.go \
|
||||
internal/pkg/bifs/sort.go
|
||||
|
||||
bifs-tests: bifs-arithmetic-test bifs-bits-test bifs-collections-test bifs-hashing-test bifs-sort-test
|
||||
|
||||
#mlrval_functions_test:
|
||||
# go test internal/pkg/mlrval/mlrval_functions_test.go $(ls internal/pkg/types/*.go | grep -v test)
|
||||
#mlrval_format_test:
|
||||
# go test internal/pkg/mlrval/mlrval_format_test.go $(ls internal/pkg/types/*.go|grep -v test)
|
||||
|
||||
tests-in-order: mlrval-tests mlrmap-tests input-tests bifs-tests
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Regression tests (large number)
|
||||
#
|
||||
|
|
|
|||
|
|
@ -121,31 +121,31 @@ Here's some sample CSV data which is values-only, i.e. headerless:
|
|||
</pre>
|
||||
|
||||
There are clearly nine fields here, but if we try to have Miller parse it as CSV, we
|
||||
see there are fewer than nine columns:
|
||||
see something happened:
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
<b>mlr --csv cat data/nas.csv</b>
|
||||
</pre>
|
||||
<pre class="pre-non-highlight-in-pair">
|
||||
-349801.10097848,4537221.43295653,2,1,NA
|
||||
-338681.59578181,4537221.43295653,14,1,0.964
|
||||
-334975.09404959,4537221.43295653,18,1,NA
|
||||
-332195.21775042,4537221.43295653,21,1,0.96
|
||||
-331268.59231736,4537221.43295653,22,1,0.962
|
||||
-330341.96688431,4537221.43295653,23,1,0.962
|
||||
-326635.46515209,4537221.43295653,27,1,0.958
|
||||
-349801.10097848,4537221.43295653,2,1,NA,NA_2,NA_3,NA_4,NA_5
|
||||
-338681.59578181,4537221.43295653,14,1,13.1,1,0.978,0.964,0.964
|
||||
-334975.09404959,4537221.43295653,18,1,13.1,1,NA,NA,NA
|
||||
-332195.21775042,4537221.43295653,21,1,13.1,1,0.978,0.974,0.96
|
||||
-331268.59231736,4537221.43295653,22,1,13.1,1,0.978,0.978,0.962
|
||||
-330341.96688431,4537221.43295653,23,1,13.1,1,0.978,0.978,0.962
|
||||
-326635.46515209,4537221.43295653,27,1,13.1,2,0.978,0.972,0.958
|
||||
</pre>
|
||||
|
||||
What happened?
|
||||
|
||||
Miller is (by central design) a mapping from name to value, rather than integer
|
||||
position to value as in most tools in the Unix toolkit such as `sort`, `cut`,
|
||||
`awk`, etc. So given input `Yea=1,Yea=2` on the same input line, first `Yea=1`
|
||||
is stored, then updated with `Yea=2`. This is in the input-parser and the value
|
||||
`Yea=1` is unavailable to any further processing.
|
||||
`awk`, etc. And its default behavior with repeated column/field names is to append `_2`, `_3`, etc to dedupe them.
|
||||
So given input `Yea=1,Yea=2` on the same input line, first `Yea=1`
|
||||
is stored, then updated with `Yea_2=2`. This is in the input-parser.
|
||||
|
||||
Here, the first data line is being seen as a header ine, and the repeated `NA`
|
||||
values are being seen as duplicate keys.
|
||||
Here, the first data line is being seen as a header line, and the repeated `NA`
|
||||
values are being seen as duplicate keys that need to be deduplicated.
|
||||
|
||||
One solution is to use `--implicit-csv-header`, or its shorter alias `--hi`:
|
||||
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ cat data/nas.csv
|
|||
GENMD-EOF
|
||||
|
||||
There are clearly nine fields here, but if we try to have Miller parse it as CSV, we
|
||||
see there are fewer than nine columns:
|
||||
see something happened:
|
||||
|
||||
GENMD-RUN-COMMAND
|
||||
mlr --csv cat data/nas.csv
|
||||
|
|
@ -58,12 +58,13 @@ What happened?
|
|||
|
||||
Miller is (by central design) a mapping from name to value, rather than integer
|
||||
position to value as in most tools in the Unix toolkit such as `sort`, `cut`,
|
||||
`awk`, etc. So given input `Yea=1,Yea=2` on the same input line, first `Yea=1`
|
||||
is stored, then updated with `Yea=2`. This is in the input-parser and the value
|
||||
`Yea=1` is unavailable to any further processing.
|
||||
`awk`, etc. And its default behavior with repeated column/field names is to
|
||||
append `_2`, `_3`, etc to dedupe them. So given input `Yea=1,Yea=2` on the
|
||||
same input line, first `Yea=1` is stored, then `Yea_2=2`. This is in the
|
||||
input-parser.
|
||||
|
||||
Here, the first data line is being seen as a header ine, and the repeated `NA`
|
||||
values are being seen as duplicate keys.
|
||||
Here, the first data line is being seen as a header line, and the repeated `NA`
|
||||
values are being seen as duplicate keys that need to be deduplicated.
|
||||
|
||||
One solution is to use `--implicit-csv-header`, or its shorter alias `--hi`:
|
||||
|
||||
|
|
|
|||
|
|
@ -507,6 +507,13 @@ MISCELLANEOUS FLAGS
|
|||
what you might hope but `--mfrom *.csv --` does.
|
||||
--mload {filenames} Like `--load` but works with more than one filename,
|
||||
e.g. `--mload *.mlr --`.
|
||||
--no-dedupe-field-names By default, if an input record has a field name x and
|
||||
another also named x, the second will be renamed x_2,
|
||||
and so on. With this flag provided, the second x's
|
||||
value will replace the first x's value when the
|
||||
record is read. This flag has no effect on JSON input
|
||||
records, where duplicate keys always result in the
|
||||
last one's value being retained.
|
||||
--no-fflush Let buffered output not be written after every output
|
||||
record. The default is flush output after every
|
||||
record if the output is to the terminal, or less
|
||||
|
|
@ -3010,5 +3017,5 @@ SEE ALSO
|
|||
|
||||
|
||||
|
||||
2021-12-22 MILLER(1)
|
||||
2021-12-23 MILLER(1)
|
||||
</pre>
|
||||
|
|
|
|||
|
|
@ -486,6 +486,13 @@ MISCELLANEOUS FLAGS
|
|||
what you might hope but `--mfrom *.csv --` does.
|
||||
--mload {filenames} Like `--load` but works with more than one filename,
|
||||
e.g. `--mload *.mlr --`.
|
||||
--no-dedupe-field-names By default, if an input record has a field name x and
|
||||
another also named x, the second will be renamed x_2,
|
||||
and so on. With this flag provided, the second x's
|
||||
value will replace the first x's value when the
|
||||
record is read. This flag has no effect on JSON input
|
||||
records, where duplicate keys always result in the
|
||||
last one's value being retained.
|
||||
--no-fflush Let buffered output not be written after every output
|
||||
record. The default is flush output after every
|
||||
record if the output is to the terminal, or less
|
||||
|
|
@ -2989,4 +2996,4 @@ SEE ALSO
|
|||
|
||||
|
||||
|
||||
2021-12-22 MILLER(1)
|
||||
2021-12-23 MILLER(1)
|
||||
|
|
|
|||
|
|
@ -264,3 +264,4 @@ The following differences are rather technical. If they don't sound familiar to
|
|||
* See also the [miscellaneous-flags reference](reference-main-flag-list.md#miscellaneous-flags).
|
||||
* Emitting a map-valued expression now requires either a temporary variable or the new `emit1` keyword. Please see the
|
||||
[page on emit statements](reference-dsl-output-statements.md#emit1-and-emitemitpemitf) for more information.
|
||||
* By default, field names are deduped for all file formats except JSON. So if you have an input record with `x=8,x=9` then the second field's key is renamed to `x_2` and so on -- the record scans as `x=8,x_2=9`. Use `mlr --no-dedupe-field-names` to suppress this, and have the record be scanned as `x=9`. For JSON, the last duplicated key in an input record is always retained, regardless of `mlr --no-dedupe-field-names`: `{"x":8,"x":9}` scans as if it were `{"x":9}`.
|
||||
|
|
|
|||
|
|
@ -222,3 +222,4 @@ The following differences are rather technical. If they don't sound familiar to
|
|||
* See also the [miscellaneous-flags reference](reference-main-flag-list.md#miscellaneous-flags).
|
||||
* Emitting a map-valued expression now requires either a temporary variable or the new `emit1` keyword. Please see the
|
||||
[page on emit statements](reference-dsl-output-statements.md#emit1-and-emitemitpemitf) for more information.
|
||||
* By default, field names are deduped for all file formats except JSON. So if you have an input record with `x=8,x=9` then the second field's key is renamed to `x_2` and so on -- the record scans as `x=8,x_2=9`. Use `mlr --no-dedupe-field-names` to suppress this, and have the record be scanned as `x=9`. For JSON, the last duplicated key in an input record is always retained, regardless of `mlr --no-dedupe-field-names`: `{"x":8,"x":9}` scans as if it were `{"x":9}`.
|
||||
|
|
|
|||
|
|
@ -355,6 +355,8 @@ These are flags which don't fit into any other category.
|
|||
`: Use this to specify one of more input files before the verb(s), rather than after. May be used more than once. The list of filename must end with `--`. This is useful for example since `--from *.csv` doesn't do what you might hope but `--mfrom *.csv --` does.
|
||||
* `--mload {filenames}
|
||||
`: Like `--load` but works with more than one filename, e.g. `--mload *.mlr --`.
|
||||
* `--no-dedupe-field-names
|
||||
`: By default, if an input record has a field name x and another also named x, the second will be renamed x_2, and so on. With this flag provided, the second x's value will replace the first x's value when the record is read. This flag has no effect on JSON input records, where duplicate keys always result in the last one's value being retained.
|
||||
* `--no-fflush
|
||||
`: Let buffered output not be written after every output record. The default is flush output after every record if the output is to the terminal, or less often if the output is to a file or a pipe. The default is a significant performance optimization for large files. Use this flag to allow less-frequent updates when output is to the terminal. This is unlikely to be a noticeable performance improvement, since direct-to-screen output for large files has its own overhead.
|
||||
* `--no-hash-records
|
||||
|
|
|
|||
|
|
@ -2557,6 +2557,18 @@ var MiscFlagSection = FlagSection{
|
|||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "--no-dedupe-field-names",
|
||||
help: `By default, if an input record has a field name x and
|
||||
another also named x, the second will be renamed x_2, and so on. With this flag provided, the
|
||||
second x's value will replace the first x's value when the record is read. This flag has no effect
|
||||
on JSON input records, where duplicate keys always result in the last one's value being retained.`,
|
||||
parser: func(args []string, argc int, pargi *int, options *TOptions) {
|
||||
options.ReaderOptions.DedupeFieldNames = false
|
||||
*pargi += 1
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "--records-per-batch",
|
||||
arg: "{n}",
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ type TReaderOptions struct {
|
|||
IPSRegex *regexp.Regexp
|
||||
SuppressIFSRegexing bool // e.g. if they want to do '--ifs .' since '.' is a regex metacharacter
|
||||
SuppressIPSRegexing bool // e.g. if they want to do '--ips .' since '.' is a regex metacharacter
|
||||
DedupeFieldNames bool
|
||||
|
||||
// If unspecified on the command line, these take input-format-dependent
|
||||
// defaults. E.g. default FS is comma for DKVP but space for NIDX;
|
||||
|
|
@ -186,6 +187,7 @@ func DefaultReaderOptions() TReaderOptions {
|
|||
StepAsString: DEFAULT_GEN_STEP_AS_STRING,
|
||||
StopAsString: DEFAULT_GEN_STOP_AS_STRING,
|
||||
},
|
||||
DedupeFieldNames: true,
|
||||
|
||||
// TODO: comment
|
||||
RecordsPerBatch: DEFAULT_RECORDS_PER_BATCH,
|
||||
|
|
|
|||
|
|
@ -182,6 +182,7 @@ func (reader *RecordReaderCSV) getRecordBatch(
|
|||
eof bool,
|
||||
) {
|
||||
recordsAndContexts = list.New()
|
||||
dedupeFieldNames := reader.readerOptions.DedupeFieldNames
|
||||
|
||||
csvRecords, more := <-csvRecordsChannel
|
||||
if !more {
|
||||
|
|
@ -226,7 +227,11 @@ func (reader *RecordReaderCSV) getRecordBatch(
|
|||
for i := 0; i < nh; i++ {
|
||||
key := reader.header[i]
|
||||
value := mlrval.FromDeferredType(csvRecord[i])
|
||||
record.PutReference(key, value)
|
||||
_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
|
||||
if err != nil {
|
||||
errorChannel <- err
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
|
|
@ -246,18 +251,30 @@ func (reader *RecordReaderCSV) getRecordBatch(
|
|||
for i = 0; i < n; i++ {
|
||||
key := reader.header[i]
|
||||
value := mlrval.FromDeferredType(csvRecord[i])
|
||||
record.PutReference(key, value)
|
||||
_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
|
||||
if err != nil {
|
||||
errorChannel <- err
|
||||
return
|
||||
}
|
||||
}
|
||||
if nh < nd {
|
||||
// if header shorter than data: use 1-up itoa keys
|
||||
key := strconv.Itoa(i + 1)
|
||||
value := mlrval.FromDeferredType(csvRecord[i])
|
||||
record.PutReference(key, value)
|
||||
_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
|
||||
if err != nil {
|
||||
errorChannel <- err
|
||||
return
|
||||
}
|
||||
}
|
||||
if nh > nd {
|
||||
// if header longer than data: use "" values
|
||||
for i = nd; i < nh; i++ {
|
||||
record.PutCopy(reader.header[i], mlrval.VOID)
|
||||
_, err := record.PutReferenceMaybeDedupe(reader.header[i], mlrval.VOID.Copy(), dedupeFieldNames)
|
||||
if err != nil {
|
||||
errorChannel <- err
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -179,6 +179,7 @@ func getRecordBatchExplicitCSVHeader(
|
|||
eof bool,
|
||||
) {
|
||||
recordsAndContexts = list.New()
|
||||
dedupeFieldNames := reader.readerOptions.DedupeFieldNames
|
||||
|
||||
lines, more := <-linesChannel
|
||||
if !more {
|
||||
|
|
@ -247,7 +248,11 @@ func getRecordBatchExplicitCSVHeader(
|
|||
if !reader.readerOptions.AllowRaggedCSVInput {
|
||||
for i, field := range fields {
|
||||
value := mlrval.FromDeferredType(field)
|
||||
record.PutReference(reader.headerStrings[i], value)
|
||||
_, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], value, dedupeFieldNames)
|
||||
if err != nil {
|
||||
errorChannel <- err
|
||||
return
|
||||
}
|
||||
}
|
||||
} else {
|
||||
nh := len(reader.headerStrings)
|
||||
|
|
@ -256,14 +261,22 @@ func getRecordBatchExplicitCSVHeader(
|
|||
var i int
|
||||
for i = 0; i < n; i++ {
|
||||
value := mlrval.FromDeferredType(fields[i])
|
||||
record.PutReference(reader.headerStrings[i], value)
|
||||
_, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], value, dedupeFieldNames)
|
||||
if err != nil {
|
||||
errorChannel <- err
|
||||
return
|
||||
}
|
||||
}
|
||||
if nh < nd {
|
||||
// if header shorter than data: use 1-up itoa keys
|
||||
for i = nh; i < nd; i++ {
|
||||
key := strconv.Itoa(i + 1)
|
||||
value := mlrval.FromDeferredType(fields[i])
|
||||
record.PutReference(key, value)
|
||||
_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
|
||||
if err != nil {
|
||||
errorChannel <- err
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
if nh > nd {
|
||||
|
|
@ -293,6 +306,7 @@ func getRecordBatchImplicitCSVHeader(
|
|||
eof bool,
|
||||
) {
|
||||
recordsAndContexts = list.New()
|
||||
dedupeFieldNames := reader.readerOptions.DedupeFieldNames
|
||||
|
||||
lines, more := <-linesChannel
|
||||
if !more {
|
||||
|
|
@ -364,7 +378,11 @@ func getRecordBatchImplicitCSVHeader(
|
|||
if !reader.readerOptions.AllowRaggedCSVInput {
|
||||
for i, field := range fields {
|
||||
value := mlrval.FromDeferredType(field)
|
||||
record.PutReference(reader.headerStrings[i], value)
|
||||
_, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], value, dedupeFieldNames)
|
||||
if err != nil {
|
||||
errorChannel <- err
|
||||
return
|
||||
}
|
||||
}
|
||||
} else {
|
||||
nh := len(reader.headerStrings)
|
||||
|
|
@ -373,18 +391,30 @@ func getRecordBatchImplicitCSVHeader(
|
|||
var i int
|
||||
for i = 0; i < n; i++ {
|
||||
value := mlrval.FromDeferredType(fields[i])
|
||||
record.PutReference(reader.headerStrings[i], value)
|
||||
_, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], value, dedupeFieldNames)
|
||||
if err != nil {
|
||||
errorChannel <- err
|
||||
return
|
||||
}
|
||||
}
|
||||
if nh < nd {
|
||||
// if header shorter than data: use 1-up itoa keys
|
||||
key := strconv.Itoa(i + 1)
|
||||
value := mlrval.FromDeferredType(fields[i])
|
||||
record.PutReference(key, value)
|
||||
_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
|
||||
if err != nil {
|
||||
errorChannel <- err
|
||||
return
|
||||
}
|
||||
}
|
||||
if nh > nd {
|
||||
// if header longer than data: use "" values
|
||||
for i = nd; i < nh; i++ {
|
||||
record.PutCopy(reader.headerStrings[i], mlrval.VOID)
|
||||
_, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], mlrval.VOID.Copy(), dedupeFieldNames)
|
||||
if err != nil {
|
||||
errorChannel <- err
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ import (
|
|||
|
||||
// splitter_DKVP_NIDX is a function type for the one bit of code differing
|
||||
// between the DKVP reader and the NIDX reader, namely, how it splits lines.
|
||||
type splitter_DKVP_NIDX func(reader *RecordReaderDKVPNIDX, line string) *mlrval.Mlrmap
|
||||
type splitter_DKVP_NIDX func(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrmap, error)
|
||||
|
||||
type RecordReaderDKVPNIDX struct {
|
||||
readerOptions *cli.TReaderOptions
|
||||
|
|
@ -100,7 +100,7 @@ func (reader *RecordReaderDKVPNIDX) processHandle(
|
|||
go channelizedLineScanner(lineScanner, linesChannel, downstreamDoneChannel, recordsPerBatch)
|
||||
|
||||
for {
|
||||
recordsAndContexts, eof := reader.getRecordBatch(linesChannel, context)
|
||||
recordsAndContexts, eof := reader.getRecordBatch(linesChannel, errorChannel, context)
|
||||
if recordsAndContexts.Len() > 0 {
|
||||
readerChannel <- recordsAndContexts
|
||||
}
|
||||
|
|
@ -113,6 +113,7 @@ func (reader *RecordReaderDKVPNIDX) processHandle(
|
|||
// TODO: comment copiously we're trying to handle slow/fast/short/long reads: tail -f, smallfile, bigfile.
|
||||
func (reader *RecordReaderDKVPNIDX) getRecordBatch(
|
||||
linesChannel <-chan *list.List,
|
||||
errorChannel chan<- error,
|
||||
context *types.Context,
|
||||
) (
|
||||
recordsAndContexts *list.List,
|
||||
|
|
@ -142,7 +143,11 @@ func (reader *RecordReaderDKVPNIDX) getRecordBatch(
|
|||
}
|
||||
}
|
||||
|
||||
record := reader.splitter(reader, line)
|
||||
record, err := reader.splitter(reader, line)
|
||||
if err != nil {
|
||||
errorChannel <- err
|
||||
return
|
||||
}
|
||||
context.UpdateForInputRecord()
|
||||
recordAndContext := types.NewRecordAndContext(record, context)
|
||||
recordsAndContexts.PushBack(recordAndContext)
|
||||
|
|
@ -151,8 +156,9 @@ func (reader *RecordReaderDKVPNIDX) getRecordBatch(
|
|||
return recordsAndContexts, false
|
||||
}
|
||||
|
||||
func recordFromDKVPLine(reader *RecordReaderDKVPNIDX, line string) *mlrval.Mlrmap {
|
||||
func recordFromDKVPLine(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrmap, error) {
|
||||
record := mlrval.NewMlrmapAsRecord()
|
||||
dedupeFieldNames := reader.readerOptions.DedupeFieldNames
|
||||
|
||||
var pairs []string
|
||||
// TODO: func-pointer this away
|
||||
|
|
@ -181,17 +187,23 @@ func recordFromDKVPLine(reader *RecordReaderDKVPNIDX, line string) *mlrval.Mlrma
|
|||
// DKVP is a generalization of NIDX.
|
||||
key := strconv.Itoa(i + 1) // Miller userspace indices are 1-up
|
||||
value := mlrval.FromDeferredType(kv[0])
|
||||
record.PutReference(key, value)
|
||||
_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
key := kv[0]
|
||||
value := mlrval.FromDeferredType(kv[1])
|
||||
record.PutReference(key, value)
|
||||
_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
return record
|
||||
return record, nil
|
||||
}
|
||||
|
||||
func recordFromNIDXLine(reader *RecordReaderDKVPNIDX, line string) *mlrval.Mlrmap {
|
||||
func recordFromNIDXLine(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrmap, error) {
|
||||
record := mlrval.NewMlrmapAsRecord()
|
||||
|
||||
var values []string
|
||||
|
|
@ -212,5 +224,5 @@ func recordFromNIDXLine(reader *RecordReaderDKVPNIDX, line string) *mlrval.Mlrma
|
|||
mval := mlrval.FromDeferredType(value)
|
||||
record.PutReference(key, mval)
|
||||
}
|
||||
return record
|
||||
return record, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,13 +16,15 @@ func TestRecordFromDKVPLine(t *testing.T) {
|
|||
assert.Nil(t, err)
|
||||
|
||||
line := ""
|
||||
record := recordFromDKVPLine(reader, line)
|
||||
record, err := recordFromDKVPLine(reader, line)
|
||||
assert.NotNil(t, record)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, 0, record.FieldCount)
|
||||
|
||||
line = "a=1,b=2,c=3"
|
||||
record = recordFromDKVPLine(reader, line)
|
||||
record, err = recordFromDKVPLine(reader, line)
|
||||
assert.NotNil(t, record)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, 3, record.FieldCount)
|
||||
|
||||
assert.NotNil(t, record.Head)
|
||||
|
|
@ -33,20 +35,25 @@ func TestRecordFromDKVPLine(t *testing.T) {
|
|||
assert.Equal(t, record.Head.Next.Key, "b")
|
||||
assert.Equal(t, record.Head.Next.Next.Key, "c")
|
||||
|
||||
// Default is to dedupe to a=1,b=2,b_2=3
|
||||
line = "a=1,b=2,b=3"
|
||||
record = recordFromDKVPLine(reader, line)
|
||||
record, err = recordFromDKVPLine(reader, line)
|
||||
assert.NotNil(t, record)
|
||||
assert.Equal(t, 2, record.FieldCount)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, 3, record.FieldCount)
|
||||
|
||||
assert.NotNil(t, record.Head)
|
||||
assert.NotNil(t, record.Head.Next)
|
||||
assert.Nil(t, record.Head.Next.Next)
|
||||
assert.NotNil(t, record.Head.Next.Next)
|
||||
assert.Nil(t, record.Head.Next.Next.Next)
|
||||
assert.Equal(t, record.Head.Key, "a")
|
||||
assert.Equal(t, record.Head.Next.Key, "b")
|
||||
assert.Equal(t, record.Head.Next.Next.Key, "b_2")
|
||||
|
||||
line = "a,b,c"
|
||||
record = recordFromDKVPLine(reader, line)
|
||||
record, err = recordFromDKVPLine(reader, line)
|
||||
assert.NotNil(t, record)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, 3, record.FieldCount)
|
||||
|
||||
assert.NotNil(t, record.Head)
|
||||
|
|
|
|||
|
|
@ -257,6 +257,7 @@ func (reader *RecordReaderXTAB) recordFromXTABLines(
|
|||
stanza *list.List,
|
||||
) (*mlrval.Mlrmap, error) {
|
||||
record := mlrval.NewMlrmapAsRecord()
|
||||
dedupeFieldNames := reader.readerOptions.DedupeFieldNames
|
||||
|
||||
for e := stanza.Front(); e != nil; e = e.Next() {
|
||||
line := e.Value.(string)
|
||||
|
|
@ -274,10 +275,16 @@ func (reader *RecordReaderXTAB) recordFromXTABLines(
|
|||
key := kv[0]
|
||||
if len(kv) == 1 {
|
||||
value := mlrval.VOID
|
||||
record.PutReference(key, value)
|
||||
_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
value := mlrval.FromDeferredType(kv[1])
|
||||
record.PutReference(key, value)
|
||||
_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ package mlrval
|
|||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/johnkerl/miller/internal/pkg/lib"
|
||||
)
|
||||
|
|
@ -28,30 +30,65 @@ func (mlrmap *Mlrmap) Get(key string) *Mlrval {
|
|||
|
||||
// PutReference copies the key but not the value. This is not safe for DSL use,
|
||||
// where we could create undesired references between different objects. Only
|
||||
// intended to be used at callsites which allocate a mlrval solely for the
|
||||
// purpose of putting into a map, e.g. input-record readers.
|
||||
// intended to be used at callsites which allocate a mlrval on the spot, solely
|
||||
// for the purpose of putting into the map.
|
||||
func (mlrmap *Mlrmap) PutReference(key string, value *Mlrval) {
|
||||
pe := mlrmap.findEntry(key)
|
||||
if pe == nil {
|
||||
pe = newMlrmapEntry(key, value)
|
||||
if mlrmap.Head == nil {
|
||||
mlrmap.Head = pe
|
||||
mlrmap.Tail = pe
|
||||
} else {
|
||||
pe.Prev = mlrmap.Tail
|
||||
pe.Next = nil
|
||||
mlrmap.Tail.Next = pe
|
||||
mlrmap.Tail = pe
|
||||
}
|
||||
if mlrmap.keysToEntries != nil {
|
||||
mlrmap.keysToEntries[key] = pe
|
||||
}
|
||||
mlrmap.FieldCount++
|
||||
mlrmap.putReferenceNewAux(key, value)
|
||||
} else {
|
||||
pe.Value = value
|
||||
}
|
||||
}
|
||||
|
||||
// putReferenceNewAux is a helper function for code shared between PutReference
|
||||
// and PutReferenceMaybeDedupe. It should not be invoked from anywhere else --
|
||||
// it doesn't do its own check if the key already exists in the record or not.
|
||||
func (mlrmap *Mlrmap) putReferenceNewAux(key string, value *Mlrval) {
|
||||
pe := newMlrmapEntry(key, value)
|
||||
if mlrmap.Head == nil {
|
||||
mlrmap.Head = pe
|
||||
mlrmap.Tail = pe
|
||||
} else {
|
||||
pe.Prev = mlrmap.Tail
|
||||
pe.Next = nil
|
||||
mlrmap.Tail.Next = pe
|
||||
mlrmap.Tail = pe
|
||||
}
|
||||
if mlrmap.keysToEntries != nil {
|
||||
mlrmap.keysToEntries[key] = pe
|
||||
}
|
||||
mlrmap.FieldCount++
|
||||
}
|
||||
|
||||
// PutReferenceMaybeDedupe is the default inserter for key-value pairs in input records --
|
||||
// if the input is 'x=8,x=9` then we make a record with x=8 and x_2=9. This can be suppressed
|
||||
// via a command-line flag which this method's dedupe flag respects.
|
||||
func (mlrmap *Mlrmap) PutReferenceMaybeDedupe(key string, value *Mlrval, dedupe bool) (string, error) {
|
||||
if !dedupe {
|
||||
mlrmap.PutReference(key, value)
|
||||
return key, nil
|
||||
}
|
||||
|
||||
pe := mlrmap.findEntry(key)
|
||||
if pe == nil {
|
||||
mlrmap.putReferenceNewAux(key, value)
|
||||
return key, nil
|
||||
}
|
||||
|
||||
for i := 2; i < 1000; i++ {
|
||||
newKey := key + "_" + strconv.Itoa(i)
|
||||
pe := mlrmap.findEntry(newKey)
|
||||
if pe == nil {
|
||||
mlrmap.putReferenceNewAux(newKey, value)
|
||||
return newKey, nil
|
||||
}
|
||||
}
|
||||
return key, errors.New(
|
||||
fmt.Sprintf("record has too many input fields named \"%s\"", key),
|
||||
)
|
||||
}
|
||||
|
||||
// PutCopy copies the key and value (deep-copying in case the value is array/map).
|
||||
// This is safe for DSL use. See also PutReference.
|
||||
func (mlrmap *Mlrmap) PutCopy(key string, value *Mlrval) {
|
||||
|
|
|
|||
|
|
@ -486,6 +486,13 @@ MISCELLANEOUS FLAGS
|
|||
what you might hope but `--mfrom *.csv --` does.
|
||||
--mload {filenames} Like `--load` but works with more than one filename,
|
||||
e.g. `--mload *.mlr --`.
|
||||
--no-dedupe-field-names By default, if an input record has a field name x and
|
||||
another also named x, the second will be renamed x_2,
|
||||
and so on. With this flag provided, the second x's
|
||||
value will replace the first x's value when the
|
||||
record is read. This flag has no effect on JSON input
|
||||
records, where duplicate keys always result in the
|
||||
last one's value being retained.
|
||||
--no-fflush Let buffered output not be written after every output
|
||||
record. The default is flush output after every
|
||||
record if the output is to the terminal, or less
|
||||
|
|
@ -2989,4 +2996,4 @@ SEE ALSO
|
|||
|
||||
|
||||
|
||||
2021-12-22 MILLER(1)
|
||||
2021-12-23 MILLER(1)
|
||||
|
|
|
|||
11
man/mlr.1
11
man/mlr.1
|
|
@ -2,12 +2,12 @@
|
|||
.\" Title: mlr
|
||||
.\" Author: [see the "AUTHOR" section]
|
||||
.\" Generator: ./mkman.rb
|
||||
.\" Date: 2021-12-22
|
||||
.\" Date: 2021-12-23
|
||||
.\" Manual: \ \&
|
||||
.\" Source: \ \&
|
||||
.\" Language: English
|
||||
.\"
|
||||
.TH "MILLER" "1" "2021-12-22" "\ \&" "\ \&"
|
||||
.TH "MILLER" "1" "2021-12-23" "\ \&" "\ \&"
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * Portability definitions
|
||||
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
|
@ -605,6 +605,13 @@ These are flags which don't fit into any other category.
|
|||
what you might hope but `--mfrom *.csv --` does.
|
||||
--mload {filenames} Like `--load` but works with more than one filename,
|
||||
e.g. `--mload *.mlr --`.
|
||||
--no-dedupe-field-names By default, if an input record has a field name x and
|
||||
another also named x, the second will be renamed x_2,
|
||||
and so on. With this flag provided, the second x's
|
||||
value will replace the first x's value when the
|
||||
record is read. This flag has no effect on JSON input
|
||||
records, where duplicate keys always result in the
|
||||
last one's value being retained.
|
||||
--no-fflush Let buffered output not be written after every output
|
||||
record. The default is flush output after every
|
||||
record if the output is to the terminal, or less
|
||||
|
|
|
|||
1
test/cases/io-dedupe-field-names/csv-no-dedupe/cmd
Normal file
1
test/cases/io-dedupe-field-names/csv-no-dedupe/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --no-dedupe-field-names --icsv --ojson cat ${CASEDIR}/input
|
||||
0
test/cases/io-dedupe-field-names/csv-no-dedupe/experr
Normal file
0
test/cases/io-dedupe-field-names/csv-no-dedupe/experr
Normal file
5
test/cases/io-dedupe-field-names/csv-no-dedupe/expout
Normal file
5
test/cases/io-dedupe-field-names/csv-no-dedupe/expout
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"a": 1,
|
||||
"b": 5,
|
||||
"c": 4
|
||||
}
|
||||
2
test/cases/io-dedupe-field-names/csv-no-dedupe/input
Normal file
2
test/cases/io-dedupe-field-names/csv-no-dedupe/input
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
a,b,b,c,b
|
||||
1,2,3,4,5
|
||||
1
test/cases/io-dedupe-field-names/csv/cmd
Normal file
1
test/cases/io-dedupe-field-names/csv/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --icsv --ojson cat ${CASEDIR}/input
|
||||
0
test/cases/io-dedupe-field-names/csv/experr
Normal file
0
test/cases/io-dedupe-field-names/csv/experr
Normal file
7
test/cases/io-dedupe-field-names/csv/expout
Normal file
7
test/cases/io-dedupe-field-names/csv/expout
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"a": 1,
|
||||
"b": 2,
|
||||
"b_2": 3,
|
||||
"c": 4,
|
||||
"b_3": 5
|
||||
}
|
||||
2
test/cases/io-dedupe-field-names/csv/input
Normal file
2
test/cases/io-dedupe-field-names/csv/input
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
a,b,b,c,b
|
||||
1,2,3,4,5
|
||||
1
test/cases/io-dedupe-field-names/csvlite-no-dedupe/cmd
Normal file
1
test/cases/io-dedupe-field-names/csvlite-no-dedupe/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --no-dedupe-field-names --icsvlite --ojson cat ${CASEDIR}/input
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"a": 1,
|
||||
"b": 5,
|
||||
"c": 4
|
||||
}
|
||||
2
test/cases/io-dedupe-field-names/csvlite-no-dedupe/input
Normal file
2
test/cases/io-dedupe-field-names/csvlite-no-dedupe/input
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
a,b,b,c,b
|
||||
1,2,3,4,5
|
||||
1
test/cases/io-dedupe-field-names/csvlite/cmd
Normal file
1
test/cases/io-dedupe-field-names/csvlite/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --icsvlite --ojson cat ${CASEDIR}/input
|
||||
0
test/cases/io-dedupe-field-names/csvlite/experr
Normal file
0
test/cases/io-dedupe-field-names/csvlite/experr
Normal file
7
test/cases/io-dedupe-field-names/csvlite/expout
Normal file
7
test/cases/io-dedupe-field-names/csvlite/expout
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"a": 1,
|
||||
"b": 2,
|
||||
"b_2": 3,
|
||||
"c": 4,
|
||||
"b_3": 5
|
||||
}
|
||||
2
test/cases/io-dedupe-field-names/csvlite/input
Normal file
2
test/cases/io-dedupe-field-names/csvlite/input
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
a,b,b,c,b
|
||||
1,2,3,4,5
|
||||
1
test/cases/io-dedupe-field-names/dkvp-no-dedupe/cmd
Normal file
1
test/cases/io-dedupe-field-names/dkvp-no-dedupe/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --no-dedupe-field-names --idkvp --ojson cat ${CASEDIR}/input
|
||||
0
test/cases/io-dedupe-field-names/dkvp-no-dedupe/experr
Normal file
0
test/cases/io-dedupe-field-names/dkvp-no-dedupe/experr
Normal file
5
test/cases/io-dedupe-field-names/dkvp-no-dedupe/expout
Normal file
5
test/cases/io-dedupe-field-names/dkvp-no-dedupe/expout
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"a": 1,
|
||||
"b": 5,
|
||||
"c": 4
|
||||
}
|
||||
1
test/cases/io-dedupe-field-names/dkvp-no-dedupe/input
Normal file
1
test/cases/io-dedupe-field-names/dkvp-no-dedupe/input
Normal file
|
|
@ -0,0 +1 @@
|
|||
a=1,b=2,b=3,c=4,b=5
|
||||
1
test/cases/io-dedupe-field-names/dkvp/cmd
Normal file
1
test/cases/io-dedupe-field-names/dkvp/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --idkvp --ojson cat ${CASEDIR}/input
|
||||
0
test/cases/io-dedupe-field-names/dkvp/experr
Normal file
0
test/cases/io-dedupe-field-names/dkvp/experr
Normal file
7
test/cases/io-dedupe-field-names/dkvp/expout
Normal file
7
test/cases/io-dedupe-field-names/dkvp/expout
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"a": 1,
|
||||
"b": 2,
|
||||
"b_2": 3,
|
||||
"c": 4,
|
||||
"b_3": 5
|
||||
}
|
||||
1
test/cases/io-dedupe-field-names/dkvp/input
Normal file
1
test/cases/io-dedupe-field-names/dkvp/input
Normal file
|
|
@ -0,0 +1 @@
|
|||
a=1,b=2,b=3,c=4,b=5
|
||||
1
test/cases/io-dedupe-field-names/json-no-dedupe/cmd
Normal file
1
test/cases/io-dedupe-field-names/json-no-dedupe/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --no-dedupe-field-names --ijson --ojson cat ${CASEDIR}/input
|
||||
0
test/cases/io-dedupe-field-names/json-no-dedupe/experr
Normal file
0
test/cases/io-dedupe-field-names/json-no-dedupe/experr
Normal file
5
test/cases/io-dedupe-field-names/json-no-dedupe/expout
Normal file
5
test/cases/io-dedupe-field-names/json-no-dedupe/expout
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"a": 1,
|
||||
"b": 5,
|
||||
"c": 4
|
||||
}
|
||||
1
test/cases/io-dedupe-field-names/json-no-dedupe/input
Normal file
1
test/cases/io-dedupe-field-names/json-no-dedupe/input
Normal file
|
|
@ -0,0 +1 @@
|
|||
{"a":1,"b":2,"b":3,"c":4,"b":5}
|
||||
1
test/cases/io-dedupe-field-names/json/cmd
Normal file
1
test/cases/io-dedupe-field-names/json/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --ijson --ojson cat ${CASEDIR}/input
|
||||
0
test/cases/io-dedupe-field-names/json/experr
Normal file
0
test/cases/io-dedupe-field-names/json/experr
Normal file
5
test/cases/io-dedupe-field-names/json/expout
Normal file
5
test/cases/io-dedupe-field-names/json/expout
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"a": 1,
|
||||
"b": 5,
|
||||
"c": 4
|
||||
}
|
||||
1
test/cases/io-dedupe-field-names/json/input
Normal file
1
test/cases/io-dedupe-field-names/json/input
Normal file
|
|
@ -0,0 +1 @@
|
|||
{"a":1,"b":2,"b":3,"c":4,"b":5}
|
||||
1
test/cases/io-dedupe-field-names/nidx-no-dedupe/cmd
Normal file
1
test/cases/io-dedupe-field-names/nidx-no-dedupe/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --no-dedupe-field-names --inidx --ojson cat ${CASEDIR}/input
|
||||
0
test/cases/io-dedupe-field-names/nidx-no-dedupe/experr
Normal file
0
test/cases/io-dedupe-field-names/nidx-no-dedupe/experr
Normal file
7
test/cases/io-dedupe-field-names/nidx-no-dedupe/expout
Normal file
7
test/cases/io-dedupe-field-names/nidx-no-dedupe/expout
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"1": 1,
|
||||
"2": 2,
|
||||
"3": 3,
|
||||
"4": 4,
|
||||
"5": 5
|
||||
}
|
||||
1
test/cases/io-dedupe-field-names/nidx-no-dedupe/input
Normal file
1
test/cases/io-dedupe-field-names/nidx-no-dedupe/input
Normal file
|
|
@ -0,0 +1 @@
|
|||
1 2 3 4 5
|
||||
1
test/cases/io-dedupe-field-names/nidx/cmd
Normal file
1
test/cases/io-dedupe-field-names/nidx/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --inidx --ojson cat ${CASEDIR}/input
|
||||
0
test/cases/io-dedupe-field-names/nidx/experr
Normal file
0
test/cases/io-dedupe-field-names/nidx/experr
Normal file
7
test/cases/io-dedupe-field-names/nidx/expout
Normal file
7
test/cases/io-dedupe-field-names/nidx/expout
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"1": 1,
|
||||
"2": 2,
|
||||
"3": 3,
|
||||
"4": 4,
|
||||
"5": 5
|
||||
}
|
||||
1
test/cases/io-dedupe-field-names/nidx/input
Normal file
1
test/cases/io-dedupe-field-names/nidx/input
Normal file
|
|
@ -0,0 +1 @@
|
|||
1 2 3 4 5
|
||||
1
test/cases/io-dedupe-field-names/pprint-no-dedupe/cmd
Normal file
1
test/cases/io-dedupe-field-names/pprint-no-dedupe/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --no-dedupe-field-names --ipprint --ojson cat ${CASEDIR}/input
|
||||
0
test/cases/io-dedupe-field-names/pprint-no-dedupe/experr
Normal file
0
test/cases/io-dedupe-field-names/pprint-no-dedupe/experr
Normal file
5
test/cases/io-dedupe-field-names/pprint-no-dedupe/expout
Normal file
5
test/cases/io-dedupe-field-names/pprint-no-dedupe/expout
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"a": 1,
|
||||
"b": 5,
|
||||
"c": 4
|
||||
}
|
||||
2
test/cases/io-dedupe-field-names/pprint-no-dedupe/input
Normal file
2
test/cases/io-dedupe-field-names/pprint-no-dedupe/input
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
a b b c b
|
||||
1 2 3 4 5
|
||||
1
test/cases/io-dedupe-field-names/pprint/cmd
Normal file
1
test/cases/io-dedupe-field-names/pprint/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --ipprint --ojson cat ${CASEDIR}/input
|
||||
0
test/cases/io-dedupe-field-names/pprint/experr
Normal file
0
test/cases/io-dedupe-field-names/pprint/experr
Normal file
7
test/cases/io-dedupe-field-names/pprint/expout
Normal file
7
test/cases/io-dedupe-field-names/pprint/expout
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"a": 1,
|
||||
"b": 2,
|
||||
"b_2": 3,
|
||||
"c": 4,
|
||||
"b_3": 5
|
||||
}
|
||||
2
test/cases/io-dedupe-field-names/pprint/input
Normal file
2
test/cases/io-dedupe-field-names/pprint/input
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
a b b c b
|
||||
1 2 3 4 5
|
||||
1
test/cases/io-dedupe-field-names/xtab-no-dedupe/cmd
Normal file
1
test/cases/io-dedupe-field-names/xtab-no-dedupe/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --no-dedupe-field-names --ixtab --ojson cat ${CASEDIR}/input
|
||||
0
test/cases/io-dedupe-field-names/xtab-no-dedupe/experr
Normal file
0
test/cases/io-dedupe-field-names/xtab-no-dedupe/experr
Normal file
5
test/cases/io-dedupe-field-names/xtab-no-dedupe/expout
Normal file
5
test/cases/io-dedupe-field-names/xtab-no-dedupe/expout
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"a": 1,
|
||||
"b": 5,
|
||||
"c": 4
|
||||
}
|
||||
5
test/cases/io-dedupe-field-names/xtab-no-dedupe/input
Normal file
5
test/cases/io-dedupe-field-names/xtab-no-dedupe/input
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
a 1
|
||||
b 2
|
||||
b 3
|
||||
c 4
|
||||
b 5
|
||||
1
test/cases/io-dedupe-field-names/xtab/cmd
Normal file
1
test/cases/io-dedupe-field-names/xtab/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --ixtab --ojson cat ${CASEDIR}/input
|
||||
0
test/cases/io-dedupe-field-names/xtab/experr
Normal file
0
test/cases/io-dedupe-field-names/xtab/experr
Normal file
7
test/cases/io-dedupe-field-names/xtab/expout
Normal file
7
test/cases/io-dedupe-field-names/xtab/expout
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"a": 1,
|
||||
"b": 2,
|
||||
"b_2": 3,
|
||||
"c": 4,
|
||||
"b_3": 5
|
||||
}
|
||||
5
test/cases/io-dedupe-field-names/xtab/input
Normal file
5
test/cases/io-dedupe-field-names/xtab/input
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
a 1
|
||||
b 2
|
||||
b 3
|
||||
c 4
|
||||
b 5
|
||||
Loading…
Add table
Add a link
Reference in a new issue