diff --git a/.vimrc b/.vimrc index bef8ce9ef..7d420eb5a 100644 --- a/.vimrc +++ b/.vimrc @@ -1,8 +1,2 @@ map \d :w:!clear;echo Building ...; echo; make mlr -map \f :w:!clear;echo Building ...; echo; make tests-in-order -"map \r :w:!clear;echo Building ...; echo; make mlrval-tests -"map \r :w:!clear;echo Building ...; echo; make mlrmap-tests -"map \r :w:!clear;echo Building ...; echo; make input-tests -"map \r :w:!clear;echo Building ...; echo; make mlrval-format-test -"map \r :w:!clear;echo Building ...; echo; make bifs-tests -map \r :w:!clear;echo Building ...; echo; make bifs-collections-test +map \f :w:!clear;echo Building ...; echo; make ut diff --git a/Makefile b/Makefile index dd8f9d228..3b7e8635d 100644 --- a/Makefile +++ b/Makefile @@ -31,126 +31,6 @@ install: build unit-test ut: go test github.com/johnkerl/miller/internal/pkg/... -# Keystroke-savers -lib-unbackslash-test: - go test internal/pkg/lib/unbackslash_test.go internal/pkg/lib/unbackslash.go -lib_regex_test: - go test internal/pkg/lib/regex_test.go internal/pkg/lib/regex.go -lib-tests: - go test github.com/johnkerl/miller/internal/pkg/lib/... - -mlrval-new-test: - go test internal/pkg/mlrval/new_test.go \ - internal/pkg/mlrval/mlrval_type.go \ - internal/pkg/mlrval/mlrval_constants.go \ - internal/pkg/mlrval/mlrval_new.go \ - internal/pkg/mlrval/mlrval_infer.go -mlrval-is-test: - go test internal/pkg/mlrval/is_test.go \ - internal/pkg/mlrval/mlrval_type.go \ - internal/pkg/mlrval/mlrval_constants.go \ - internal/pkg/mlrval/mlrval_new.go \ - internal/pkg/mlrval/mlrval_infer.go \ - internal/pkg/mlrval/mlrval_is.go -mlrval-get-test: - go test internal/pkg/mlrval/get_test.go \ - internal/pkg/mlrval/mlrval_type.go \ - internal/pkg/mlrval/mlrval_constants.go \ - internal/pkg/mlrval/mlrval_new.go \ - internal/pkg/mlrval/mlrval_infer.go \ - internal/pkg/mlrval/mlrval_is.go \ - internal/pkg/mlrval/mlrval_get.go -mlrval-output-test: - go test internal/pkg/mlrval/output_test.go \ - internal/pkg/mlrval/mlrval_type.go \ - internal/pkg/mlrval/mlrval_constants.go \ - internal/pkg/mlrval/mlrval_new.go \ - internal/pkg/mlrval/mlrval_infer.go \ - internal/pkg/mlrval/mlrval_is.go \ - internal/pkg/mlrval/mlrval_get.go \ - internal/pkg/mlrval/mlrval_output.go \ - internal/pkg/mlrval/mlrval_format.go -mlrval-format-test: - go test internal/pkg/mlrval/format_test.go \ - internal/pkg/mlrval/mlrval_type.go \ - internal/pkg/mlrval/mlrval_constants.go \ - internal/pkg/mlrval/mlrval_new.go \ - internal/pkg/mlrval/mlrval_infer.go \ - internal/pkg/mlrval/mlrval_is.go \ - internal/pkg/mlrval/mlrval_get.go \ - internal/pkg/mlrval/mlrval_output.go \ - internal/pkg/mlrval/mlrval_format.go -mlrval-tests: - go test github.com/johnkerl/miller/internal/pkg/mlrval/... - -mlrmap-new-test: - go test internal/pkg/mlrval/mlrmap_new_test.go \ - internal/pkg/mlrval/mlrmap.go \ - internal/pkg/mlrval/mlrval_type.go \ - internal/pkg/mlrval/mlrval_constants.go \ - internal/pkg/mlrval/mlrval_new.go \ - internal/pkg/mlrval/mlrval_infer.go \ - internal/pkg/mlrval/mlrval_is.go \ - internal/pkg/mlrval/mlrval_get.go \ - internal/pkg/mlrval/mlrval_output.go \ - internal/pkg/mlrval/mlrval_format.go -mlrmap-accessors-test: - go test internal/pkg/mlrval/mlrmap_accessors_test.go \ - internal/pkg/mlrval/mlrmap.go \ - internal/pkg/mlrval/mlrmap_accessors.go \ - internal/pkg/mlrval/mlrval_type.go \ - internal/pkg/mlrval/mlrval_constants.go \ - internal/pkg/mlrval/mlrval_new.go \ - internal/pkg/mlrval/mlrval_cmp.go \ - internal/pkg/mlrval/mlrval_copy.go \ - internal/pkg/mlrval/mlrval_infer.go \ - internal/pkg/mlrval/mlrval_is.go \ - internal/pkg/mlrval/mlrval_get.go \ - internal/pkg/mlrval/mlrval_output.go \ - internal/pkg/mlrval/mlrval_format.go - -mlrmap-tests: mlrmap-new-test mlrmap-accessors-test - -input-dkvp-test: - go test internal/pkg/input/record_reader_dkvp_test.go \ - internal/pkg/input/record_reader.go \ - internal/pkg/input/record_reader_dkvp_nidx.go -input-tests: input-dkvp-test - -bifs-arithmetic-test: - go test internal/pkg/bifs/arithmetic_test.go \ - internal/pkg/bifs/base.go \ - internal/pkg/bifs/arithmetic.go -bifs-bits-test: - go test internal/pkg/bifs/bits_test.go \ - internal/pkg/bifs/base.go \ - internal/pkg/bifs/arithmetic.go \ - internal/pkg/bifs/bits.go -bifs-collections-test: - go test internal/pkg/bifs/collections_test.go \ - internal/pkg/bifs/base.go \ - internal/pkg/bifs/arithmetic.go \ - internal/pkg/bifs/collections.go -bifs-hashing-test: - go test internal/pkg/bifs/hashing_test.go \ - internal/pkg/bifs/base.go \ - internal/pkg/bifs/arithmetic.go \ - internal/pkg/bifs/hashing.go -bifs-sort-test: - go test internal/pkg/bifs/sort_test.go \ - internal/pkg/bifs/base.go \ - internal/pkg/bifs/arithmetic.go \ - internal/pkg/bifs/sort.go - -bifs-tests: bifs-arithmetic-test bifs-bits-test bifs-collections-test bifs-hashing-test bifs-sort-test - -#mlrval_functions_test: -# go test internal/pkg/mlrval/mlrval_functions_test.go $(ls internal/pkg/types/*.go | grep -v test) -#mlrval_format_test: -# go test internal/pkg/mlrval/mlrval_format_test.go $(ls internal/pkg/types/*.go|grep -v test) - -tests-in-order: mlrval-tests mlrmap-tests input-tests bifs-tests - # ---------------------------------------------------------------- # Regression tests (large number) # diff --git a/docs/src/csv-with-and-without-headers.md b/docs/src/csv-with-and-without-headers.md index cfe8f02c3..7db6853b2 100644 --- a/docs/src/csv-with-and-without-headers.md +++ b/docs/src/csv-with-and-without-headers.md @@ -121,31 +121,31 @@ Here's some sample CSV data which is values-only, i.e. headerless: There are clearly nine fields here, but if we try to have Miller parse it as CSV, we -see there are fewer than nine columns: +see something happened:
 mlr --csv cat data/nas.csv
 
--349801.10097848,4537221.43295653,2,1,NA
--338681.59578181,4537221.43295653,14,1,0.964
--334975.09404959,4537221.43295653,18,1,NA
--332195.21775042,4537221.43295653,21,1,0.96
--331268.59231736,4537221.43295653,22,1,0.962
--330341.96688431,4537221.43295653,23,1,0.962
--326635.46515209,4537221.43295653,27,1,0.958
+-349801.10097848,4537221.43295653,2,1,NA,NA_2,NA_3,NA_4,NA_5
+-338681.59578181,4537221.43295653,14,1,13.1,1,0.978,0.964,0.964
+-334975.09404959,4537221.43295653,18,1,13.1,1,NA,NA,NA
+-332195.21775042,4537221.43295653,21,1,13.1,1,0.978,0.974,0.96
+-331268.59231736,4537221.43295653,22,1,13.1,1,0.978,0.978,0.962
+-330341.96688431,4537221.43295653,23,1,13.1,1,0.978,0.978,0.962
+-326635.46515209,4537221.43295653,27,1,13.1,2,0.978,0.972,0.958
 
What happened? Miller is (by central design) a mapping from name to value, rather than integer position to value as in most tools in the Unix toolkit such as `sort`, `cut`, -`awk`, etc. So given input `Yea=1,Yea=2` on the same input line, first `Yea=1` -is stored, then updated with `Yea=2`. This is in the input-parser and the value -`Yea=1` is unavailable to any further processing. +`awk`, etc. And its default behavior with repeated column/field names is to append `_2`, `_3`, etc to dedupe them. +So given input `Yea=1,Yea=2` on the same input line, first `Yea=1` +is stored, then updated with `Yea_2=2`. This is in the input-parser. -Here, the first data line is being seen as a header ine, and the repeated `NA` -values are being seen as duplicate keys. +Here, the first data line is being seen as a header line, and the repeated `NA` +values are being seen as duplicate keys that need to be deduplicated. One solution is to use `--implicit-csv-header`, or its shorter alias `--hi`: diff --git a/docs/src/csv-with-and-without-headers.md.in b/docs/src/csv-with-and-without-headers.md.in index 0d9ece9e7..9096a605c 100644 --- a/docs/src/csv-with-and-without-headers.md.in +++ b/docs/src/csv-with-and-without-headers.md.in @@ -48,7 +48,7 @@ cat data/nas.csv GENMD-EOF There are clearly nine fields here, but if we try to have Miller parse it as CSV, we -see there are fewer than nine columns: +see something happened: GENMD-RUN-COMMAND mlr --csv cat data/nas.csv @@ -58,12 +58,13 @@ What happened? Miller is (by central design) a mapping from name to value, rather than integer position to value as in most tools in the Unix toolkit such as `sort`, `cut`, -`awk`, etc. So given input `Yea=1,Yea=2` on the same input line, first `Yea=1` -is stored, then updated with `Yea=2`. This is in the input-parser and the value -`Yea=1` is unavailable to any further processing. +`awk`, etc. And its default behavior with repeated column/field names is to +append `_2`, `_3`, etc to dedupe them. So given input `Yea=1,Yea=2` on the +same input line, first `Yea=1` is stored, then `Yea_2=2`. This is in the +input-parser. -Here, the first data line is being seen as a header ine, and the repeated `NA` -values are being seen as duplicate keys. +Here, the first data line is being seen as a header line, and the repeated `NA` +values are being seen as duplicate keys that need to be deduplicated. One solution is to use `--implicit-csv-header`, or its shorter alias `--hi`: diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 4349c61d5..348668e44 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -507,6 +507,13 @@ MISCELLANEOUS FLAGS what you might hope but `--mfrom *.csv --` does. --mload {filenames} Like `--load` but works with more than one filename, e.g. `--mload *.mlr --`. + --no-dedupe-field-names By default, if an input record has a field name x and + another also named x, the second will be renamed x_2, + and so on. With this flag provided, the second x's + value will replace the first x's value when the + record is read. This flag has no effect on JSON input + records, where duplicate keys always result in the + last one's value being retained. --no-fflush Let buffered output not be written after every output record. The default is flush output after every record if the output is to the terminal, or less @@ -3010,5 +3017,5 @@ SEE ALSO - 2021-12-22 MILLER(1) + 2021-12-23 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index e4fdf8c0a..de8ce9da5 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -486,6 +486,13 @@ MISCELLANEOUS FLAGS what you might hope but `--mfrom *.csv --` does. --mload {filenames} Like `--load` but works with more than one filename, e.g. `--mload *.mlr --`. + --no-dedupe-field-names By default, if an input record has a field name x and + another also named x, the second will be renamed x_2, + and so on. With this flag provided, the second x's + value will replace the first x's value when the + record is read. This flag has no effect on JSON input + records, where duplicate keys always result in the + last one's value being retained. --no-fflush Let buffered output not be written after every output record. The default is flush output after every record if the output is to the terminal, or less @@ -2989,4 +2996,4 @@ SEE ALSO - 2021-12-22 MILLER(1) + 2021-12-23 MILLER(1) diff --git a/docs/src/new-in-miller-6.md b/docs/src/new-in-miller-6.md index 4e2df6034..40bbb5039 100644 --- a/docs/src/new-in-miller-6.md +++ b/docs/src/new-in-miller-6.md @@ -264,3 +264,4 @@ The following differences are rather technical. If they don't sound familiar to * See also the [miscellaneous-flags reference](reference-main-flag-list.md#miscellaneous-flags). * Emitting a map-valued expression now requires either a temporary variable or the new `emit1` keyword. Please see the [page on emit statements](reference-dsl-output-statements.md#emit1-and-emitemitpemitf) for more information. +* By default, field names are deduped for all file formats except JSON. So if you have an input record with `x=8,x=9` then the second field's key is renamed to `x_2` and so on -- the record scans as `x=8,x_2=9`. Use `mlr --no-dedupe-field-names` to suppress this, and have the record be scanned as `x=9`. For JSON, the last duplicated key in an input record is always retained, regardless of `mlr --no-dedupe-field-names`: `{"x":8,"x":9}` scans as if it were `{"x":9}`. diff --git a/docs/src/new-in-miller-6.md.in b/docs/src/new-in-miller-6.md.in index 9a0206345..051670970 100644 --- a/docs/src/new-in-miller-6.md.in +++ b/docs/src/new-in-miller-6.md.in @@ -222,3 +222,4 @@ The following differences are rather technical. If they don't sound familiar to * See also the [miscellaneous-flags reference](reference-main-flag-list.md#miscellaneous-flags). * Emitting a map-valued expression now requires either a temporary variable or the new `emit1` keyword. Please see the [page on emit statements](reference-dsl-output-statements.md#emit1-and-emitemitpemitf) for more information. +* By default, field names are deduped for all file formats except JSON. So if you have an input record with `x=8,x=9` then the second field's key is renamed to `x_2` and so on -- the record scans as `x=8,x_2=9`. Use `mlr --no-dedupe-field-names` to suppress this, and have the record be scanned as `x=9`. For JSON, the last duplicated key in an input record is always retained, regardless of `mlr --no-dedupe-field-names`: `{"x":8,"x":9}` scans as if it were `{"x":9}`. diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index 3aa2eeda8..065e09d7a 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -355,6 +355,8 @@ These are flags which don't fit into any other category. `: Use this to specify one of more input files before the verb(s), rather than after. May be used more than once. The list of filename must end with `--`. This is useful for example since `--from *.csv` doesn't do what you might hope but `--mfrom *.csv --` does. * `--mload {filenames} `: Like `--load` but works with more than one filename, e.g. `--mload *.mlr --`. +* `--no-dedupe-field-names +`: By default, if an input record has a field name x and another also named x, the second will be renamed x_2, and so on. With this flag provided, the second x's value will replace the first x's value when the record is read. This flag has no effect on JSON input records, where duplicate keys always result in the last one's value being retained. * `--no-fflush `: Let buffered output not be written after every output record. The default is flush output after every record if the output is to the terminal, or less often if the output is to a file or a pipe. The default is a significant performance optimization for large files. Use this flag to allow less-frequent updates when output is to the terminal. This is unlikely to be a noticeable performance improvement, since direct-to-screen output for large files has its own overhead. * `--no-hash-records diff --git a/internal/pkg/cli/option_parse.go b/internal/pkg/cli/option_parse.go index c88a7bd4c..6ad78339f 100644 --- a/internal/pkg/cli/option_parse.go +++ b/internal/pkg/cli/option_parse.go @@ -2557,6 +2557,18 @@ var MiscFlagSection = FlagSection{ }, }, + { + name: "--no-dedupe-field-names", + help: `By default, if an input record has a field name x and +another also named x, the second will be renamed x_2, and so on. With this flag provided, the +second x's value will replace the first x's value when the record is read. This flag has no effect +on JSON input records, where duplicate keys always result in the last one's value being retained.`, + parser: func(args []string, argc int, pargi *int, options *TOptions) { + options.ReaderOptions.DedupeFieldNames = false + *pargi += 1 + }, + }, + { name: "--records-per-batch", arg: "{n}", diff --git a/internal/pkg/cli/option_types.go b/internal/pkg/cli/option_types.go index 86bcf453c..c008c9553 100644 --- a/internal/pkg/cli/option_types.go +++ b/internal/pkg/cli/option_types.go @@ -46,6 +46,7 @@ type TReaderOptions struct { IPSRegex *regexp.Regexp SuppressIFSRegexing bool // e.g. if they want to do '--ifs .' since '.' is a regex metacharacter SuppressIPSRegexing bool // e.g. if they want to do '--ips .' since '.' is a regex metacharacter + DedupeFieldNames bool // If unspecified on the command line, these take input-format-dependent // defaults. E.g. default FS is comma for DKVP but space for NIDX; @@ -186,6 +187,7 @@ func DefaultReaderOptions() TReaderOptions { StepAsString: DEFAULT_GEN_STEP_AS_STRING, StopAsString: DEFAULT_GEN_STOP_AS_STRING, }, + DedupeFieldNames: true, // TODO: comment RecordsPerBatch: DEFAULT_RECORDS_PER_BATCH, diff --git a/internal/pkg/input/record_reader_csv.go b/internal/pkg/input/record_reader_csv.go index a5f89b5a5..1aef46b8d 100644 --- a/internal/pkg/input/record_reader_csv.go +++ b/internal/pkg/input/record_reader_csv.go @@ -182,6 +182,7 @@ func (reader *RecordReaderCSV) getRecordBatch( eof bool, ) { recordsAndContexts = list.New() + dedupeFieldNames := reader.readerOptions.DedupeFieldNames csvRecords, more := <-csvRecordsChannel if !more { @@ -226,7 +227,11 @@ func (reader *RecordReaderCSV) getRecordBatch( for i := 0; i < nh; i++ { key := reader.header[i] value := mlrval.FromDeferredType(csvRecord[i]) - record.PutReference(key, value) + _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } } } else { @@ -246,18 +251,30 @@ func (reader *RecordReaderCSV) getRecordBatch( for i = 0; i < n; i++ { key := reader.header[i] value := mlrval.FromDeferredType(csvRecord[i]) - record.PutReference(key, value) + _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } } if nh < nd { // if header shorter than data: use 1-up itoa keys key := strconv.Itoa(i + 1) value := mlrval.FromDeferredType(csvRecord[i]) - record.PutReference(key, value) + _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } } if nh > nd { // if header longer than data: use "" values for i = nd; i < nh; i++ { - record.PutCopy(reader.header[i], mlrval.VOID) + _, err := record.PutReferenceMaybeDedupe(reader.header[i], mlrval.VOID.Copy(), dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } } } } diff --git a/internal/pkg/input/record_reader_csvlite.go b/internal/pkg/input/record_reader_csvlite.go index eaf26b5f3..759363386 100644 --- a/internal/pkg/input/record_reader_csvlite.go +++ b/internal/pkg/input/record_reader_csvlite.go @@ -179,6 +179,7 @@ func getRecordBatchExplicitCSVHeader( eof bool, ) { recordsAndContexts = list.New() + dedupeFieldNames := reader.readerOptions.DedupeFieldNames lines, more := <-linesChannel if !more { @@ -247,7 +248,11 @@ func getRecordBatchExplicitCSVHeader( if !reader.readerOptions.AllowRaggedCSVInput { for i, field := range fields { value := mlrval.FromDeferredType(field) - record.PutReference(reader.headerStrings[i], value) + _, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } } } else { nh := len(reader.headerStrings) @@ -256,14 +261,22 @@ func getRecordBatchExplicitCSVHeader( var i int for i = 0; i < n; i++ { value := mlrval.FromDeferredType(fields[i]) - record.PutReference(reader.headerStrings[i], value) + _, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } } if nh < nd { // if header shorter than data: use 1-up itoa keys for i = nh; i < nd; i++ { key := strconv.Itoa(i + 1) value := mlrval.FromDeferredType(fields[i]) - record.PutReference(key, value) + _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } } } if nh > nd { @@ -293,6 +306,7 @@ func getRecordBatchImplicitCSVHeader( eof bool, ) { recordsAndContexts = list.New() + dedupeFieldNames := reader.readerOptions.DedupeFieldNames lines, more := <-linesChannel if !more { @@ -364,7 +378,11 @@ func getRecordBatchImplicitCSVHeader( if !reader.readerOptions.AllowRaggedCSVInput { for i, field := range fields { value := mlrval.FromDeferredType(field) - record.PutReference(reader.headerStrings[i], value) + _, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } } } else { nh := len(reader.headerStrings) @@ -373,18 +391,30 @@ func getRecordBatchImplicitCSVHeader( var i int for i = 0; i < n; i++ { value := mlrval.FromDeferredType(fields[i]) - record.PutReference(reader.headerStrings[i], value) + _, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } } if nh < nd { // if header shorter than data: use 1-up itoa keys key := strconv.Itoa(i + 1) value := mlrval.FromDeferredType(fields[i]) - record.PutReference(key, value) + _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } } if nh > nd { // if header longer than data: use "" values for i = nd; i < nh; i++ { - record.PutCopy(reader.headerStrings[i], mlrval.VOID) + _, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], mlrval.VOID.Copy(), dedupeFieldNames) + if err != nil { + errorChannel <- err + return + } } } } diff --git a/internal/pkg/input/record_reader_dkvp_nidx.go b/internal/pkg/input/record_reader_dkvp_nidx.go index 6c6039f9f..ebcbff987 100644 --- a/internal/pkg/input/record_reader_dkvp_nidx.go +++ b/internal/pkg/input/record_reader_dkvp_nidx.go @@ -16,7 +16,7 @@ import ( // splitter_DKVP_NIDX is a function type for the one bit of code differing // between the DKVP reader and the NIDX reader, namely, how it splits lines. -type splitter_DKVP_NIDX func(reader *RecordReaderDKVPNIDX, line string) *mlrval.Mlrmap +type splitter_DKVP_NIDX func(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrmap, error) type RecordReaderDKVPNIDX struct { readerOptions *cli.TReaderOptions @@ -100,7 +100,7 @@ func (reader *RecordReaderDKVPNIDX) processHandle( go channelizedLineScanner(lineScanner, linesChannel, downstreamDoneChannel, recordsPerBatch) for { - recordsAndContexts, eof := reader.getRecordBatch(linesChannel, context) + recordsAndContexts, eof := reader.getRecordBatch(linesChannel, errorChannel, context) if recordsAndContexts.Len() > 0 { readerChannel <- recordsAndContexts } @@ -113,6 +113,7 @@ func (reader *RecordReaderDKVPNIDX) processHandle( // TODO: comment copiously we're trying to handle slow/fast/short/long reads: tail -f, smallfile, bigfile. func (reader *RecordReaderDKVPNIDX) getRecordBatch( linesChannel <-chan *list.List, + errorChannel chan<- error, context *types.Context, ) ( recordsAndContexts *list.List, @@ -142,7 +143,11 @@ func (reader *RecordReaderDKVPNIDX) getRecordBatch( } } - record := reader.splitter(reader, line) + record, err := reader.splitter(reader, line) + if err != nil { + errorChannel <- err + return + } context.UpdateForInputRecord() recordAndContext := types.NewRecordAndContext(record, context) recordsAndContexts.PushBack(recordAndContext) @@ -151,8 +156,9 @@ func (reader *RecordReaderDKVPNIDX) getRecordBatch( return recordsAndContexts, false } -func recordFromDKVPLine(reader *RecordReaderDKVPNIDX, line string) *mlrval.Mlrmap { +func recordFromDKVPLine(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrmap, error) { record := mlrval.NewMlrmapAsRecord() + dedupeFieldNames := reader.readerOptions.DedupeFieldNames var pairs []string // TODO: func-pointer this away @@ -181,17 +187,23 @@ func recordFromDKVPLine(reader *RecordReaderDKVPNIDX, line string) *mlrval.Mlrma // DKVP is a generalization of NIDX. key := strconv.Itoa(i + 1) // Miller userspace indices are 1-up value := mlrval.FromDeferredType(kv[0]) - record.PutReference(key, value) + _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) + if err != nil { + return nil, err + } } else { key := kv[0] value := mlrval.FromDeferredType(kv[1]) - record.PutReference(key, value) + _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) + if err != nil { + return nil, err + } } } - return record + return record, nil } -func recordFromNIDXLine(reader *RecordReaderDKVPNIDX, line string) *mlrval.Mlrmap { +func recordFromNIDXLine(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrmap, error) { record := mlrval.NewMlrmapAsRecord() var values []string @@ -212,5 +224,5 @@ func recordFromNIDXLine(reader *RecordReaderDKVPNIDX, line string) *mlrval.Mlrma mval := mlrval.FromDeferredType(value) record.PutReference(key, mval) } - return record + return record, nil } diff --git a/internal/pkg/input/record_reader_dkvp_test.go b/internal/pkg/input/record_reader_dkvp_test.go index 33e9aa194..d4ab45daf 100644 --- a/internal/pkg/input/record_reader_dkvp_test.go +++ b/internal/pkg/input/record_reader_dkvp_test.go @@ -16,13 +16,15 @@ func TestRecordFromDKVPLine(t *testing.T) { assert.Nil(t, err) line := "" - record := recordFromDKVPLine(reader, line) + record, err := recordFromDKVPLine(reader, line) assert.NotNil(t, record) + assert.Nil(t, err) assert.Equal(t, 0, record.FieldCount) line = "a=1,b=2,c=3" - record = recordFromDKVPLine(reader, line) + record, err = recordFromDKVPLine(reader, line) assert.NotNil(t, record) + assert.Nil(t, err) assert.Equal(t, 3, record.FieldCount) assert.NotNil(t, record.Head) @@ -33,20 +35,25 @@ func TestRecordFromDKVPLine(t *testing.T) { assert.Equal(t, record.Head.Next.Key, "b") assert.Equal(t, record.Head.Next.Next.Key, "c") + // Default is to dedupe to a=1,b=2,b_2=3 line = "a=1,b=2,b=3" - record = recordFromDKVPLine(reader, line) + record, err = recordFromDKVPLine(reader, line) assert.NotNil(t, record) - assert.Equal(t, 2, record.FieldCount) + assert.Nil(t, err) + assert.Equal(t, 3, record.FieldCount) assert.NotNil(t, record.Head) assert.NotNil(t, record.Head.Next) - assert.Nil(t, record.Head.Next.Next) + assert.NotNil(t, record.Head.Next.Next) + assert.Nil(t, record.Head.Next.Next.Next) assert.Equal(t, record.Head.Key, "a") assert.Equal(t, record.Head.Next.Key, "b") + assert.Equal(t, record.Head.Next.Next.Key, "b_2") line = "a,b,c" - record = recordFromDKVPLine(reader, line) + record, err = recordFromDKVPLine(reader, line) assert.NotNil(t, record) + assert.Nil(t, err) assert.Equal(t, 3, record.FieldCount) assert.NotNil(t, record.Head) diff --git a/internal/pkg/input/record_reader_xtab.go b/internal/pkg/input/record_reader_xtab.go index ec7530e58..65ceabefb 100644 --- a/internal/pkg/input/record_reader_xtab.go +++ b/internal/pkg/input/record_reader_xtab.go @@ -257,6 +257,7 @@ func (reader *RecordReaderXTAB) recordFromXTABLines( stanza *list.List, ) (*mlrval.Mlrmap, error) { record := mlrval.NewMlrmapAsRecord() + dedupeFieldNames := reader.readerOptions.DedupeFieldNames for e := stanza.Front(); e != nil; e = e.Next() { line := e.Value.(string) @@ -274,10 +275,16 @@ func (reader *RecordReaderXTAB) recordFromXTABLines( key := kv[0] if len(kv) == 1 { value := mlrval.VOID - record.PutReference(key, value) + _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) + if err != nil { + return nil, err + } } else { value := mlrval.FromDeferredType(kv[1]) - record.PutReference(key, value) + _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) + if err != nil { + return nil, err + } } } diff --git a/internal/pkg/mlrval/mlrmap_accessors.go b/internal/pkg/mlrval/mlrmap_accessors.go index 5fccacbae..9a02bae9e 100644 --- a/internal/pkg/mlrval/mlrmap_accessors.go +++ b/internal/pkg/mlrval/mlrmap_accessors.go @@ -3,6 +3,8 @@ package mlrval import ( "bytes" "errors" + "fmt" + "strconv" "github.com/johnkerl/miller/internal/pkg/lib" ) @@ -28,30 +30,65 @@ func (mlrmap *Mlrmap) Get(key string) *Mlrval { // PutReference copies the key but not the value. This is not safe for DSL use, // where we could create undesired references between different objects. Only -// intended to be used at callsites which allocate a mlrval solely for the -// purpose of putting into a map, e.g. input-record readers. +// intended to be used at callsites which allocate a mlrval on the spot, solely +// for the purpose of putting into the map. func (mlrmap *Mlrmap) PutReference(key string, value *Mlrval) { pe := mlrmap.findEntry(key) if pe == nil { - pe = newMlrmapEntry(key, value) - if mlrmap.Head == nil { - mlrmap.Head = pe - mlrmap.Tail = pe - } else { - pe.Prev = mlrmap.Tail - pe.Next = nil - mlrmap.Tail.Next = pe - mlrmap.Tail = pe - } - if mlrmap.keysToEntries != nil { - mlrmap.keysToEntries[key] = pe - } - mlrmap.FieldCount++ + mlrmap.putReferenceNewAux(key, value) } else { pe.Value = value } } +// putReferenceNewAux is a helper function for code shared between PutReference +// and PutReferenceMaybeDedupe. It should not be invoked from anywhere else -- +// it doesn't do its own check if the key already exists in the record or not. +func (mlrmap *Mlrmap) putReferenceNewAux(key string, value *Mlrval) { + pe := newMlrmapEntry(key, value) + if mlrmap.Head == nil { + mlrmap.Head = pe + mlrmap.Tail = pe + } else { + pe.Prev = mlrmap.Tail + pe.Next = nil + mlrmap.Tail.Next = pe + mlrmap.Tail = pe + } + if mlrmap.keysToEntries != nil { + mlrmap.keysToEntries[key] = pe + } + mlrmap.FieldCount++ +} + +// PutReferenceMaybeDedupe is the default inserter for key-value pairs in input records -- +// if the input is 'x=8,x=9` then we make a record with x=8 and x_2=9. This can be suppressed +// via a command-line flag which this method's dedupe flag respects. +func (mlrmap *Mlrmap) PutReferenceMaybeDedupe(key string, value *Mlrval, dedupe bool) (string, error) { + if !dedupe { + mlrmap.PutReference(key, value) + return key, nil + } + + pe := mlrmap.findEntry(key) + if pe == nil { + mlrmap.putReferenceNewAux(key, value) + return key, nil + } + + for i := 2; i < 1000; i++ { + newKey := key + "_" + strconv.Itoa(i) + pe := mlrmap.findEntry(newKey) + if pe == nil { + mlrmap.putReferenceNewAux(newKey, value) + return newKey, nil + } + } + return key, errors.New( + fmt.Sprintf("record has too many input fields named \"%s\"", key), + ) +} + // PutCopy copies the key and value (deep-copying in case the value is array/map). // This is safe for DSL use. See also PutReference. func (mlrmap *Mlrmap) PutCopy(key string, value *Mlrval) { diff --git a/man/manpage.txt b/man/manpage.txt index e4fdf8c0a..de8ce9da5 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -486,6 +486,13 @@ MISCELLANEOUS FLAGS what you might hope but `--mfrom *.csv --` does. --mload {filenames} Like `--load` but works with more than one filename, e.g. `--mload *.mlr --`. + --no-dedupe-field-names By default, if an input record has a field name x and + another also named x, the second will be renamed x_2, + and so on. With this flag provided, the second x's + value will replace the first x's value when the + record is read. This flag has no effect on JSON input + records, where duplicate keys always result in the + last one's value being retained. --no-fflush Let buffered output not be written after every output record. The default is flush output after every record if the output is to the terminal, or less @@ -2989,4 +2996,4 @@ SEE ALSO - 2021-12-22 MILLER(1) + 2021-12-23 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index e51890ebc..092ffacee 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2021-12-22 +.\" Date: 2021-12-23 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2021-12-22" "\ \&" "\ \&" +.TH "MILLER" "1" "2021-12-23" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -605,6 +605,13 @@ These are flags which don't fit into any other category. what you might hope but `--mfrom *.csv --` does. --mload {filenames} Like `--load` but works with more than one filename, e.g. `--mload *.mlr --`. +--no-dedupe-field-names By default, if an input record has a field name x and + another also named x, the second will be renamed x_2, + and so on. With this flag provided, the second x's + value will replace the first x's value when the + record is read. This flag has no effect on JSON input + records, where duplicate keys always result in the + last one's value being retained. --no-fflush Let buffered output not be written after every output record. The default is flush output after every record if the output is to the terminal, or less diff --git a/test/cases/io-dedupe-field-names/csv-no-dedupe/cmd b/test/cases/io-dedupe-field-names/csv-no-dedupe/cmd new file mode 100644 index 000000000..92502cae9 --- /dev/null +++ b/test/cases/io-dedupe-field-names/csv-no-dedupe/cmd @@ -0,0 +1 @@ +mlr --no-dedupe-field-names --icsv --ojson cat ${CASEDIR}/input diff --git a/test/cases/io-dedupe-field-names/csv-no-dedupe/experr b/test/cases/io-dedupe-field-names/csv-no-dedupe/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-dedupe-field-names/csv-no-dedupe/expout b/test/cases/io-dedupe-field-names/csv-no-dedupe/expout new file mode 100644 index 000000000..c403e0f33 --- /dev/null +++ b/test/cases/io-dedupe-field-names/csv-no-dedupe/expout @@ -0,0 +1,5 @@ +{ + "a": 1, + "b": 5, + "c": 4 +} diff --git a/test/cases/io-dedupe-field-names/csv-no-dedupe/input b/test/cases/io-dedupe-field-names/csv-no-dedupe/input new file mode 100644 index 000000000..c14a15065 --- /dev/null +++ b/test/cases/io-dedupe-field-names/csv-no-dedupe/input @@ -0,0 +1,2 @@ +a,b,b,c,b +1,2,3,4,5 diff --git a/test/cases/io-dedupe-field-names/csv/cmd b/test/cases/io-dedupe-field-names/csv/cmd new file mode 100644 index 000000000..4767dec35 --- /dev/null +++ b/test/cases/io-dedupe-field-names/csv/cmd @@ -0,0 +1 @@ +mlr --icsv --ojson cat ${CASEDIR}/input diff --git a/test/cases/io-dedupe-field-names/csv/experr b/test/cases/io-dedupe-field-names/csv/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-dedupe-field-names/csv/expout b/test/cases/io-dedupe-field-names/csv/expout new file mode 100644 index 000000000..d688c7b6d --- /dev/null +++ b/test/cases/io-dedupe-field-names/csv/expout @@ -0,0 +1,7 @@ +{ + "a": 1, + "b": 2, + "b_2": 3, + "c": 4, + "b_3": 5 +} diff --git a/test/cases/io-dedupe-field-names/csv/input b/test/cases/io-dedupe-field-names/csv/input new file mode 100644 index 000000000..c14a15065 --- /dev/null +++ b/test/cases/io-dedupe-field-names/csv/input @@ -0,0 +1,2 @@ +a,b,b,c,b +1,2,3,4,5 diff --git a/test/cases/io-dedupe-field-names/csvlite-no-dedupe/cmd b/test/cases/io-dedupe-field-names/csvlite-no-dedupe/cmd new file mode 100644 index 000000000..b6daa8365 --- /dev/null +++ b/test/cases/io-dedupe-field-names/csvlite-no-dedupe/cmd @@ -0,0 +1 @@ +mlr --no-dedupe-field-names --icsvlite --ojson cat ${CASEDIR}/input diff --git a/test/cases/io-dedupe-field-names/csvlite-no-dedupe/experr b/test/cases/io-dedupe-field-names/csvlite-no-dedupe/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-dedupe-field-names/csvlite-no-dedupe/expout b/test/cases/io-dedupe-field-names/csvlite-no-dedupe/expout new file mode 100644 index 000000000..c403e0f33 --- /dev/null +++ b/test/cases/io-dedupe-field-names/csvlite-no-dedupe/expout @@ -0,0 +1,5 @@ +{ + "a": 1, + "b": 5, + "c": 4 +} diff --git a/test/cases/io-dedupe-field-names/csvlite-no-dedupe/input b/test/cases/io-dedupe-field-names/csvlite-no-dedupe/input new file mode 100644 index 000000000..c14a15065 --- /dev/null +++ b/test/cases/io-dedupe-field-names/csvlite-no-dedupe/input @@ -0,0 +1,2 @@ +a,b,b,c,b +1,2,3,4,5 diff --git a/test/cases/io-dedupe-field-names/csvlite/cmd b/test/cases/io-dedupe-field-names/csvlite/cmd new file mode 100644 index 000000000..649be33a0 --- /dev/null +++ b/test/cases/io-dedupe-field-names/csvlite/cmd @@ -0,0 +1 @@ +mlr --icsvlite --ojson cat ${CASEDIR}/input diff --git a/test/cases/io-dedupe-field-names/csvlite/experr b/test/cases/io-dedupe-field-names/csvlite/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-dedupe-field-names/csvlite/expout b/test/cases/io-dedupe-field-names/csvlite/expout new file mode 100644 index 000000000..d688c7b6d --- /dev/null +++ b/test/cases/io-dedupe-field-names/csvlite/expout @@ -0,0 +1,7 @@ +{ + "a": 1, + "b": 2, + "b_2": 3, + "c": 4, + "b_3": 5 +} diff --git a/test/cases/io-dedupe-field-names/csvlite/input b/test/cases/io-dedupe-field-names/csvlite/input new file mode 100644 index 000000000..c14a15065 --- /dev/null +++ b/test/cases/io-dedupe-field-names/csvlite/input @@ -0,0 +1,2 @@ +a,b,b,c,b +1,2,3,4,5 diff --git a/test/cases/io-dedupe-field-names/dkvp-no-dedupe/cmd b/test/cases/io-dedupe-field-names/dkvp-no-dedupe/cmd new file mode 100644 index 000000000..3e972e344 --- /dev/null +++ b/test/cases/io-dedupe-field-names/dkvp-no-dedupe/cmd @@ -0,0 +1 @@ +mlr --no-dedupe-field-names --idkvp --ojson cat ${CASEDIR}/input diff --git a/test/cases/io-dedupe-field-names/dkvp-no-dedupe/experr b/test/cases/io-dedupe-field-names/dkvp-no-dedupe/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-dedupe-field-names/dkvp-no-dedupe/expout b/test/cases/io-dedupe-field-names/dkvp-no-dedupe/expout new file mode 100644 index 000000000..c403e0f33 --- /dev/null +++ b/test/cases/io-dedupe-field-names/dkvp-no-dedupe/expout @@ -0,0 +1,5 @@ +{ + "a": 1, + "b": 5, + "c": 4 +} diff --git a/test/cases/io-dedupe-field-names/dkvp-no-dedupe/input b/test/cases/io-dedupe-field-names/dkvp-no-dedupe/input new file mode 100644 index 000000000..1cb2bd92d --- /dev/null +++ b/test/cases/io-dedupe-field-names/dkvp-no-dedupe/input @@ -0,0 +1 @@ +a=1,b=2,b=3,c=4,b=5 diff --git a/test/cases/io-dedupe-field-names/dkvp/cmd b/test/cases/io-dedupe-field-names/dkvp/cmd new file mode 100644 index 000000000..3e76139f6 --- /dev/null +++ b/test/cases/io-dedupe-field-names/dkvp/cmd @@ -0,0 +1 @@ +mlr --idkvp --ojson cat ${CASEDIR}/input diff --git a/test/cases/io-dedupe-field-names/dkvp/experr b/test/cases/io-dedupe-field-names/dkvp/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-dedupe-field-names/dkvp/expout b/test/cases/io-dedupe-field-names/dkvp/expout new file mode 100644 index 000000000..d688c7b6d --- /dev/null +++ b/test/cases/io-dedupe-field-names/dkvp/expout @@ -0,0 +1,7 @@ +{ + "a": 1, + "b": 2, + "b_2": 3, + "c": 4, + "b_3": 5 +} diff --git a/test/cases/io-dedupe-field-names/dkvp/input b/test/cases/io-dedupe-field-names/dkvp/input new file mode 100644 index 000000000..1cb2bd92d --- /dev/null +++ b/test/cases/io-dedupe-field-names/dkvp/input @@ -0,0 +1 @@ +a=1,b=2,b=3,c=4,b=5 diff --git a/test/cases/io-dedupe-field-names/json-no-dedupe/cmd b/test/cases/io-dedupe-field-names/json-no-dedupe/cmd new file mode 100644 index 000000000..171db9908 --- /dev/null +++ b/test/cases/io-dedupe-field-names/json-no-dedupe/cmd @@ -0,0 +1 @@ +mlr --no-dedupe-field-names --ijson --ojson cat ${CASEDIR}/input diff --git a/test/cases/io-dedupe-field-names/json-no-dedupe/experr b/test/cases/io-dedupe-field-names/json-no-dedupe/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-dedupe-field-names/json-no-dedupe/expout b/test/cases/io-dedupe-field-names/json-no-dedupe/expout new file mode 100644 index 000000000..c403e0f33 --- /dev/null +++ b/test/cases/io-dedupe-field-names/json-no-dedupe/expout @@ -0,0 +1,5 @@ +{ + "a": 1, + "b": 5, + "c": 4 +} diff --git a/test/cases/io-dedupe-field-names/json-no-dedupe/input b/test/cases/io-dedupe-field-names/json-no-dedupe/input new file mode 100644 index 000000000..47c4265dc --- /dev/null +++ b/test/cases/io-dedupe-field-names/json-no-dedupe/input @@ -0,0 +1 @@ +{"a":1,"b":2,"b":3,"c":4,"b":5} diff --git a/test/cases/io-dedupe-field-names/json/cmd b/test/cases/io-dedupe-field-names/json/cmd new file mode 100644 index 000000000..ef2d784e4 --- /dev/null +++ b/test/cases/io-dedupe-field-names/json/cmd @@ -0,0 +1 @@ +mlr --ijson --ojson cat ${CASEDIR}/input diff --git a/test/cases/io-dedupe-field-names/json/experr b/test/cases/io-dedupe-field-names/json/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-dedupe-field-names/json/expout b/test/cases/io-dedupe-field-names/json/expout new file mode 100644 index 000000000..c403e0f33 --- /dev/null +++ b/test/cases/io-dedupe-field-names/json/expout @@ -0,0 +1,5 @@ +{ + "a": 1, + "b": 5, + "c": 4 +} diff --git a/test/cases/io-dedupe-field-names/json/input b/test/cases/io-dedupe-field-names/json/input new file mode 100644 index 000000000..47c4265dc --- /dev/null +++ b/test/cases/io-dedupe-field-names/json/input @@ -0,0 +1 @@ +{"a":1,"b":2,"b":3,"c":4,"b":5} diff --git a/test/cases/io-dedupe-field-names/nidx-no-dedupe/cmd b/test/cases/io-dedupe-field-names/nidx-no-dedupe/cmd new file mode 100644 index 000000000..dface1c6b --- /dev/null +++ b/test/cases/io-dedupe-field-names/nidx-no-dedupe/cmd @@ -0,0 +1 @@ +mlr --no-dedupe-field-names --inidx --ojson cat ${CASEDIR}/input diff --git a/test/cases/io-dedupe-field-names/nidx-no-dedupe/experr b/test/cases/io-dedupe-field-names/nidx-no-dedupe/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-dedupe-field-names/nidx-no-dedupe/expout b/test/cases/io-dedupe-field-names/nidx-no-dedupe/expout new file mode 100644 index 000000000..dac3aa847 --- /dev/null +++ b/test/cases/io-dedupe-field-names/nidx-no-dedupe/expout @@ -0,0 +1,7 @@ +{ + "1": 1, + "2": 2, + "3": 3, + "4": 4, + "5": 5 +} diff --git a/test/cases/io-dedupe-field-names/nidx-no-dedupe/input b/test/cases/io-dedupe-field-names/nidx-no-dedupe/input new file mode 100644 index 000000000..aacb59525 --- /dev/null +++ b/test/cases/io-dedupe-field-names/nidx-no-dedupe/input @@ -0,0 +1 @@ +1 2 3 4 5 diff --git a/test/cases/io-dedupe-field-names/nidx/cmd b/test/cases/io-dedupe-field-names/nidx/cmd new file mode 100644 index 000000000..60236f669 --- /dev/null +++ b/test/cases/io-dedupe-field-names/nidx/cmd @@ -0,0 +1 @@ +mlr --inidx --ojson cat ${CASEDIR}/input diff --git a/test/cases/io-dedupe-field-names/nidx/experr b/test/cases/io-dedupe-field-names/nidx/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-dedupe-field-names/nidx/expout b/test/cases/io-dedupe-field-names/nidx/expout new file mode 100644 index 000000000..dac3aa847 --- /dev/null +++ b/test/cases/io-dedupe-field-names/nidx/expout @@ -0,0 +1,7 @@ +{ + "1": 1, + "2": 2, + "3": 3, + "4": 4, + "5": 5 +} diff --git a/test/cases/io-dedupe-field-names/nidx/input b/test/cases/io-dedupe-field-names/nidx/input new file mode 100644 index 000000000..aacb59525 --- /dev/null +++ b/test/cases/io-dedupe-field-names/nidx/input @@ -0,0 +1 @@ +1 2 3 4 5 diff --git a/test/cases/io-dedupe-field-names/pprint-no-dedupe/cmd b/test/cases/io-dedupe-field-names/pprint-no-dedupe/cmd new file mode 100644 index 000000000..c15d700d4 --- /dev/null +++ b/test/cases/io-dedupe-field-names/pprint-no-dedupe/cmd @@ -0,0 +1 @@ +mlr --no-dedupe-field-names --ipprint --ojson cat ${CASEDIR}/input diff --git a/test/cases/io-dedupe-field-names/pprint-no-dedupe/experr b/test/cases/io-dedupe-field-names/pprint-no-dedupe/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-dedupe-field-names/pprint-no-dedupe/expout b/test/cases/io-dedupe-field-names/pprint-no-dedupe/expout new file mode 100644 index 000000000..c403e0f33 --- /dev/null +++ b/test/cases/io-dedupe-field-names/pprint-no-dedupe/expout @@ -0,0 +1,5 @@ +{ + "a": 1, + "b": 5, + "c": 4 +} diff --git a/test/cases/io-dedupe-field-names/pprint-no-dedupe/input b/test/cases/io-dedupe-field-names/pprint-no-dedupe/input new file mode 100644 index 000000000..ae356aa22 --- /dev/null +++ b/test/cases/io-dedupe-field-names/pprint-no-dedupe/input @@ -0,0 +1,2 @@ +a b b c b +1 2 3 4 5 diff --git a/test/cases/io-dedupe-field-names/pprint/cmd b/test/cases/io-dedupe-field-names/pprint/cmd new file mode 100644 index 000000000..1e0640dfa --- /dev/null +++ b/test/cases/io-dedupe-field-names/pprint/cmd @@ -0,0 +1 @@ +mlr --ipprint --ojson cat ${CASEDIR}/input diff --git a/test/cases/io-dedupe-field-names/pprint/experr b/test/cases/io-dedupe-field-names/pprint/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-dedupe-field-names/pprint/expout b/test/cases/io-dedupe-field-names/pprint/expout new file mode 100644 index 000000000..d688c7b6d --- /dev/null +++ b/test/cases/io-dedupe-field-names/pprint/expout @@ -0,0 +1,7 @@ +{ + "a": 1, + "b": 2, + "b_2": 3, + "c": 4, + "b_3": 5 +} diff --git a/test/cases/io-dedupe-field-names/pprint/input b/test/cases/io-dedupe-field-names/pprint/input new file mode 100644 index 000000000..ae356aa22 --- /dev/null +++ b/test/cases/io-dedupe-field-names/pprint/input @@ -0,0 +1,2 @@ +a b b c b +1 2 3 4 5 diff --git a/test/cases/io-dedupe-field-names/xtab-no-dedupe/cmd b/test/cases/io-dedupe-field-names/xtab-no-dedupe/cmd new file mode 100644 index 000000000..80e85ea21 --- /dev/null +++ b/test/cases/io-dedupe-field-names/xtab-no-dedupe/cmd @@ -0,0 +1 @@ +mlr --no-dedupe-field-names --ixtab --ojson cat ${CASEDIR}/input diff --git a/test/cases/io-dedupe-field-names/xtab-no-dedupe/experr b/test/cases/io-dedupe-field-names/xtab-no-dedupe/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-dedupe-field-names/xtab-no-dedupe/expout b/test/cases/io-dedupe-field-names/xtab-no-dedupe/expout new file mode 100644 index 000000000..c403e0f33 --- /dev/null +++ b/test/cases/io-dedupe-field-names/xtab-no-dedupe/expout @@ -0,0 +1,5 @@ +{ + "a": 1, + "b": 5, + "c": 4 +} diff --git a/test/cases/io-dedupe-field-names/xtab-no-dedupe/input b/test/cases/io-dedupe-field-names/xtab-no-dedupe/input new file mode 100644 index 000000000..d717d1c01 --- /dev/null +++ b/test/cases/io-dedupe-field-names/xtab-no-dedupe/input @@ -0,0 +1,5 @@ +a 1 +b 2 +b 3 +c 4 +b 5 diff --git a/test/cases/io-dedupe-field-names/xtab/cmd b/test/cases/io-dedupe-field-names/xtab/cmd new file mode 100644 index 000000000..a60492a71 --- /dev/null +++ b/test/cases/io-dedupe-field-names/xtab/cmd @@ -0,0 +1 @@ +mlr --ixtab --ojson cat ${CASEDIR}/input diff --git a/test/cases/io-dedupe-field-names/xtab/experr b/test/cases/io-dedupe-field-names/xtab/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/io-dedupe-field-names/xtab/expout b/test/cases/io-dedupe-field-names/xtab/expout new file mode 100644 index 000000000..d688c7b6d --- /dev/null +++ b/test/cases/io-dedupe-field-names/xtab/expout @@ -0,0 +1,7 @@ +{ + "a": 1, + "b": 2, + "b_2": 3, + "c": 4, + "b_3": 5 +} diff --git a/test/cases/io-dedupe-field-names/xtab/input b/test/cases/io-dedupe-field-names/xtab/input new file mode 100644 index 000000000..d717d1c01 --- /dev/null +++ b/test/cases/io-dedupe-field-names/xtab/input @@ -0,0 +1,5 @@ +a 1 +b 2 +b 3 +c 4 +b 5