Dedupe field names by default (#794)

2026-01-23 02:14:13 +00:00 · 2021-12-22 21:07:29 -05:00 · 2021-12-22 21:07:29 -05:00 · 157e567909
commit 157e567909
parent 6b87a121b0
75 changed files with 352 additions and 195 deletions
--- a/.vimrc
+++ b/.vimrc
@ -1,8 +1,2 @@
 map \d :w<C-m>:!clear;echo Building ...; echo; make mlr<C-m>
-map \f :w<C-m>:!clear;echo Building ...; echo; make tests-in-order<C-m>
-"map \r :w<C-m>:!clear;echo Building ...; echo; make mlrval-tests<C-m>
-"map \r :w<C-m>:!clear;echo Building ...; echo; make mlrmap-tests<C-m>
-"map \r :w<C-m>:!clear;echo Building ...; echo; make input-tests<C-m>
-"map \r :w<C-m>:!clear;echo Building ...; echo; make mlrval-format-test<C-m>
-"map \r :w<C-m>:!clear;echo Building ...; echo; make bifs-tests<C-m>
-map \r :w<C-m>:!clear;echo Building ...; echo; make bifs-collections-test<C-m>
+map \f :w<C-m>:!clear;echo Building ...; echo; make ut<C-m>
--- a/120
+++ b/120
@ -31,126 +31,6 @@ install: build
 unit-test ut:
 	go test github.com/johnkerl/miller/internal/pkg/...

-# Keystroke-savers
-lib-unbackslash-test:
-	go test internal/pkg/lib/unbackslash_test.go internal/pkg/lib/unbackslash.go
-lib_regex_test:
-	go test internal/pkg/lib/regex_test.go internal/pkg/lib/regex.go
-lib-tests:
-	go test github.com/johnkerl/miller/internal/pkg/lib/...
-
-mlrval-new-test:
-	go test internal/pkg/mlrval/new_test.go \
-	  internal/pkg/mlrval/mlrval_type.go \
-	  internal/pkg/mlrval/mlrval_constants.go \
-	  internal/pkg/mlrval/mlrval_new.go \
-	  internal/pkg/mlrval/mlrval_infer.go
-mlrval-is-test:
-	go test internal/pkg/mlrval/is_test.go \
-	  internal/pkg/mlrval/mlrval_type.go \
-	  internal/pkg/mlrval/mlrval_constants.go \
-	  internal/pkg/mlrval/mlrval_new.go \
-	  internal/pkg/mlrval/mlrval_infer.go \
-	  internal/pkg/mlrval/mlrval_is.go
-mlrval-get-test:
-	go test internal/pkg/mlrval/get_test.go \
-	  internal/pkg/mlrval/mlrval_type.go \
-	  internal/pkg/mlrval/mlrval_constants.go \
-	  internal/pkg/mlrval/mlrval_new.go \
-	  internal/pkg/mlrval/mlrval_infer.go \
-	  internal/pkg/mlrval/mlrval_is.go \
-	  internal/pkg/mlrval/mlrval_get.go
-mlrval-output-test:
-	go test internal/pkg/mlrval/output_test.go \
-	  internal/pkg/mlrval/mlrval_type.go \
-	  internal/pkg/mlrval/mlrval_constants.go \
-	  internal/pkg/mlrval/mlrval_new.go \
-	  internal/pkg/mlrval/mlrval_infer.go \
-	  internal/pkg/mlrval/mlrval_is.go \
-	  internal/pkg/mlrval/mlrval_get.go \
-	  internal/pkg/mlrval/mlrval_output.go \
-	  internal/pkg/mlrval/mlrval_format.go
-mlrval-format-test:
-	go test internal/pkg/mlrval/format_test.go \
-	  internal/pkg/mlrval/mlrval_type.go \
-	  internal/pkg/mlrval/mlrval_constants.go \
-	  internal/pkg/mlrval/mlrval_new.go \
-	  internal/pkg/mlrval/mlrval_infer.go \
-	  internal/pkg/mlrval/mlrval_is.go \
-	  internal/pkg/mlrval/mlrval_get.go \
-	  internal/pkg/mlrval/mlrval_output.go \
-	  internal/pkg/mlrval/mlrval_format.go
-mlrval-tests:
-	go test github.com/johnkerl/miller/internal/pkg/mlrval/...
-
-mlrmap-new-test:
-	go test internal/pkg/mlrval/mlrmap_new_test.go \
-	  internal/pkg/mlrval/mlrmap.go \
-	  internal/pkg/mlrval/mlrval_type.go \
-	  internal/pkg/mlrval/mlrval_constants.go \
-	  internal/pkg/mlrval/mlrval_new.go \
-	  internal/pkg/mlrval/mlrval_infer.go \
-	  internal/pkg/mlrval/mlrval_is.go \
-	  internal/pkg/mlrval/mlrval_get.go \
-	  internal/pkg/mlrval/mlrval_output.go \
-	  internal/pkg/mlrval/mlrval_format.go
-mlrmap-accessors-test:
-	go test internal/pkg/mlrval/mlrmap_accessors_test.go \
-	  internal/pkg/mlrval/mlrmap.go \
-	  internal/pkg/mlrval/mlrmap_accessors.go \
-	  internal/pkg/mlrval/mlrval_type.go \
-	  internal/pkg/mlrval/mlrval_constants.go \
-	  internal/pkg/mlrval/mlrval_new.go \
-	  internal/pkg/mlrval/mlrval_cmp.go \
-	  internal/pkg/mlrval/mlrval_copy.go \
-	  internal/pkg/mlrval/mlrval_infer.go \
-	  internal/pkg/mlrval/mlrval_is.go \
-	  internal/pkg/mlrval/mlrval_get.go \
-	  internal/pkg/mlrval/mlrval_output.go \
-	  internal/pkg/mlrval/mlrval_format.go
-
-mlrmap-tests: mlrmap-new-test mlrmap-accessors-test
-
-input-dkvp-test:
-	go test internal/pkg/input/record_reader_dkvp_test.go \
-	  internal/pkg/input/record_reader.go \
-	  internal/pkg/input/record_reader_dkvp_nidx.go
-input-tests: input-dkvp-test
-
-bifs-arithmetic-test:
-	go test internal/pkg/bifs/arithmetic_test.go \
-	  internal/pkg/bifs/base.go \
-	  internal/pkg/bifs/arithmetic.go
-bifs-bits-test:
-	go test internal/pkg/bifs/bits_test.go \
-	  internal/pkg/bifs/base.go \
-	  internal/pkg/bifs/arithmetic.go \
-	  internal/pkg/bifs/bits.go
-bifs-collections-test:
-	go test internal/pkg/bifs/collections_test.go \
-	  internal/pkg/bifs/base.go \
-	  internal/pkg/bifs/arithmetic.go \
-	  internal/pkg/bifs/collections.go
-bifs-hashing-test:
-	go test internal/pkg/bifs/hashing_test.go \
-	  internal/pkg/bifs/base.go \
-	  internal/pkg/bifs/arithmetic.go \
-	  internal/pkg/bifs/hashing.go
-bifs-sort-test:
-	go test internal/pkg/bifs/sort_test.go \
-	  internal/pkg/bifs/base.go \
-	  internal/pkg/bifs/arithmetic.go \
-	  internal/pkg/bifs/sort.go
-
-bifs-tests: bifs-arithmetic-test bifs-bits-test bifs-collections-test bifs-hashing-test bifs-sort-test
-
-#mlrval_functions_test:
-#	go test internal/pkg/mlrval/mlrval_functions_test.go $(ls internal/pkg/types/*.go | grep -v test)
-#mlrval_format_test:
-#	go test internal/pkg/mlrval/mlrval_format_test.go $(ls internal/pkg/types/*.go|grep -v test)
-
-tests-in-order: mlrval-tests mlrmap-tests input-tests bifs-tests
-
 # ----------------------------------------------------------------
 # Regression tests (large number)
 #
--- a/docs/src/csv-with-and-without-headers.md
+++ b/docs/src/csv-with-and-without-headers.md
@ -121,31 +121,31 @@ Here's some sample CSV data which is values-only, i.e. headerless:
 </pre>

 There are clearly nine fields here, but if we try to have Miller parse it as CSV, we
-see there are fewer than nine columns:
+see something happened:

 <pre class="pre-highlight-in-pair">
 <b>mlr --csv cat data/nas.csv</b>
 </pre>
 <pre class="pre-non-highlight-in-pair">
-349801.10097848,4537221.43295653,2,1,NA
-338681.59578181,4537221.43295653,14,1,0.964
-334975.09404959,4537221.43295653,18,1,NA
-332195.21775042,4537221.43295653,21,1,0.96
-331268.59231736,4537221.43295653,22,1,0.962
-330341.96688431,4537221.43295653,23,1,0.962
-326635.46515209,4537221.43295653,27,1,0.958
+-349801.10097848,4537221.43295653,2,1,NA,NA_2,NA_3,NA_4,NA_5
+-338681.59578181,4537221.43295653,14,1,13.1,1,0.978,0.964,0.964
+-334975.09404959,4537221.43295653,18,1,13.1,1,NA,NA,NA
+-332195.21775042,4537221.43295653,21,1,13.1,1,0.978,0.974,0.96
+-331268.59231736,4537221.43295653,22,1,13.1,1,0.978,0.978,0.962
+-330341.96688431,4537221.43295653,23,1,13.1,1,0.978,0.978,0.962
+-326635.46515209,4537221.43295653,27,1,13.1,2,0.978,0.972,0.958
 </pre>

 What happened?

 Miller is (by central design) a mapping from name to value, rather than integer
 position to value as in most tools in the Unix toolkit such as `sort`, `cut`,
-`awk`, etc. So given input `Yea=1,Yea=2` on the same input line, first `Yea=1`
-is stored, then updated with `Yea=2`. This is in the input-parser and the value
-`Yea=1` is unavailable to any further processing.
+`awk`, etc. And its default behavior with repeated column/field names is to append `_2`, `_3`, etc to dedupe them.
+So given input `Yea=1,Yea=2` on the same input line, first `Yea=1`
+is stored, then updated with `Yea_2=2`. This is in the input-parser.

-Here, the first data line is being seen as a header ine, and the repeated `NA`
-values are being seen as duplicate keys.
+Here, the first data line is being seen as a header line, and the repeated `NA`
+values are being seen as duplicate keys that need to be deduplicated.

 One solution is to use `--implicit-csv-header`, or its shorter alias `--hi`:

--- a/docs/src/csv-with-and-without-headers.md.in
+++ b/docs/src/csv-with-and-without-headers.md.in
@ -48,7 +48,7 @@ cat data/nas.csv
 GENMD-EOF

 There are clearly nine fields here, but if we try to have Miller parse it as CSV, we
-see there are fewer than nine columns:
+see something happened:

 GENMD-RUN-COMMAND
 mlr --csv cat data/nas.csv
@ -58,12 +58,13 @@ What happened?

 Miller is (by central design) a mapping from name to value, rather than integer
 position to value as in most tools in the Unix toolkit such as `sort`, `cut`,
-`awk`, etc. So given input `Yea=1,Yea=2` on the same input line, first `Yea=1`
-is stored, then updated with `Yea=2`. This is in the input-parser and the value
-`Yea=1` is unavailable to any further processing.
+`awk`, etc. And its default behavior with repeated column/field names is to
+append `_2`, `_3`, etc to dedupe them.  So given input `Yea=1,Yea=2` on the
+same input line, first `Yea=1` is stored, then `Yea_2=2`. This is in the
+input-parser.

-Here, the first data line is being seen as a header ine, and the repeated `NA`
-values are being seen as duplicate keys.
+Here, the first data line is being seen as a header line, and the repeated `NA`
+values are being seen as duplicate keys that need to be deduplicated.

 One solution is to use `--implicit-csv-header`, or its shorter alias `--hi`:

--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@ -507,6 +507,13 @@ MISCELLANEOUS FLAGS
                                what you might hope but `--mfrom *.csv --` does.
       --mload {filenames}      Like `--load` but works with more than one filename,
                                e.g. `--mload *.mlr --`.
+       --no-dedupe-field-names  By default, if an input record has a field name x and
+                                another also named x, the second will be renamed x_2,
+                                and so on. With this flag provided, the second x's
+                                value will replace the first x's value when the
+                                record is read. This flag has no effect on JSON input
+                                records, where duplicate keys always result in the
+                                last one's value being retained.
       --no-fflush              Let buffered output not be written after every output
                                record. The default is flush output after every
                                record if the output is to the terminal, or less
@ -3010,5 +3017,5 @@ SEE ALSO



-                                  2021-12-22                         MILLER(1)
+                                  2021-12-23                         MILLER(1)
 </pre>
--- a/docs/src/manpage.txt
+++ b/docs/src/manpage.txt
@ -486,6 +486,13 @@ MISCELLANEOUS FLAGS
                                what you might hope but `--mfrom *.csv --` does.
       --mload {filenames}      Like `--load` but works with more than one filename,
                                e.g. `--mload *.mlr --`.
+       --no-dedupe-field-names  By default, if an input record has a field name x and
+                                another also named x, the second will be renamed x_2,
+                                and so on. With this flag provided, the second x's
+                                value will replace the first x's value when the
+                                record is read. This flag has no effect on JSON input
+                                records, where duplicate keys always result in the
+                                last one's value being retained.
       --no-fflush              Let buffered output not be written after every output
                                record. The default is flush output after every
                                record if the output is to the terminal, or less
@ -2989,4 +2996,4 @@ SEE ALSO



-                                  2021-12-22                         MILLER(1)
+                                  2021-12-23                         MILLER(1)
--- a/docs/src/new-in-miller-6.md
+++ b/docs/src/new-in-miller-6.md
@ -264,3 +264,4 @@ The following differences are rather technical. If they don't sound familiar to
    * See also the [miscellaneous-flags reference](reference-main-flag-list.md#miscellaneous-flags).
 * Emitting a map-valued expression now requires either a temporary variable or the new `emit1` keyword. Please see the
 [page on emit statements](reference-dsl-output-statements.md#emit1-and-emitemitpemitf) for more information.
+* By default, field names are deduped for all file formats except JSON. So if you have an input record with `x=8,x=9` then the second field's key is renamed to `x_2` and so on -- the record scans as `x=8,x_2=9`. Use `mlr --no-dedupe-field-names` to suppress this, and have the record be scanned as `x=9`. For JSON, the last duplicated key in an input record is always retained, regardless of `mlr --no-dedupe-field-names`: `{"x":8,"x":9}` scans as if it were `{"x":9}`.
--- a/docs/src/new-in-miller-6.md.in
+++ b/docs/src/new-in-miller-6.md.in
@ -222,3 +222,4 @@ The following differences are rather technical. If they don't sound familiar to
    * See also the [miscellaneous-flags reference](reference-main-flag-list.md#miscellaneous-flags).
 * Emitting a map-valued expression now requires either a temporary variable or the new `emit1` keyword. Please see the
 [page on emit statements](reference-dsl-output-statements.md#emit1-and-emitemitpemitf) for more information.
+* By default, field names are deduped for all file formats except JSON. So if you have an input record with `x=8,x=9` then the second field's key is renamed to `x_2` and so on -- the record scans as `x=8,x_2=9`. Use `mlr --no-dedupe-field-names` to suppress this, and have the record be scanned as `x=9`. For JSON, the last duplicated key in an input record is always retained, regardless of `mlr --no-dedupe-field-names`: `{"x":8,"x":9}` scans as if it were `{"x":9}`.
--- a/docs/src/reference-main-flag-list.md
+++ b/docs/src/reference-main-flag-list.md
@ -355,6 +355,8 @@ These are flags which don't fit into any other category.
 `: Use this to specify one of more input files before the verb(s), rather than after. May be used more than once.  The list of filename must end with `--`. This is useful for example since `--from *.csv` doesn't do what you might hope but `--mfrom *.csv --` does.
 * `--mload {filenames}
 `: Like `--load` but works with more than one filename, e.g. `--mload *.mlr --`.
+* `--no-dedupe-field-names
+`: By default, if an input record has a field name x and another also named x, the second will be renamed x_2, and so on.  With this flag provided, the second x's value will replace the first x's value when the record is read.  This flag has no effect on JSON input records, where duplicate keys always result in the last one's value being retained.
 * `--no-fflush
 `: Let buffered output not be written after every output record. The default is flush output after every record if the output is to the terminal, or less often if the output is to a file or a pipe. The default is a significant performance optimization for large files.  Use this flag to allow less-frequent updates when output is to the terminal. This is unlikely to be a noticeable performance improvement, since direct-to-screen output for large files has its own overhead.
 * `--no-hash-records
--- a/internal/pkg/cli/option_parse.go
+++ b/internal/pkg/cli/option_parse.go
@ -2557,6 +2557,18 @@ var MiscFlagSection = FlagSection{
 			},
 		},

+		{
+			name: "--no-dedupe-field-names",
+			help: `By default, if an input record has a field name x and
+another also named x, the second will be renamed x_2, and so on.  With this flag provided, the
+second x's value will replace the first x's value when the record is read.  This flag has no effect
+on JSON input records, where duplicate keys always result in the last one's value being retained.`,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.DedupeFieldNames = false
+				*pargi += 1
+			},
+		},
+
 		{
 			name: "--records-per-batch",
 			arg:  "{n}",
--- a/internal/pkg/cli/option_types.go
+++ b/internal/pkg/cli/option_types.go
@ -46,6 +46,7 @@ type TReaderOptions struct {
 	IPSRegex            *regexp.Regexp
 	SuppressIFSRegexing bool // e.g. if they want to do '--ifs .' since '.' is a regex metacharacter
 	SuppressIPSRegexing bool // e.g. if they want to do '--ips .' since '.' is a regex metacharacter
+	DedupeFieldNames    bool

 	// If unspecified on the command line, these take input-format-dependent
 	// defaults.  E.g. default FS is comma for DKVP but space for NIDX;
@ -186,6 +187,7 @@ func DefaultReaderOptions() TReaderOptions {
 			StepAsString:  DEFAULT_GEN_STEP_AS_STRING,
 			StopAsString:  DEFAULT_GEN_STOP_AS_STRING,
 		},
+		DedupeFieldNames: true,

 		// TODO: comment
 		RecordsPerBatch: DEFAULT_RECORDS_PER_BATCH,
--- a/internal/pkg/input/record_reader_csv.go
+++ b/internal/pkg/input/record_reader_csv.go
@ -182,6 +182,7 @@ func (reader *RecordReaderCSV) getRecordBatch(
 	eof bool,
 ) {
 	recordsAndContexts = list.New()
+	dedupeFieldNames := reader.readerOptions.DedupeFieldNames

 	csvRecords, more := <-csvRecordsChannel
 	if !more {
@ -226,7 +227,11 @@ func (reader *RecordReaderCSV) getRecordBatch(
 			for i := 0; i < nh; i++ {
 				key := reader.header[i]
 				value := mlrval.FromDeferredType(csvRecord[i])
-				record.PutReference(key, value)
+				_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
+				if err != nil {
+					errorChannel <- err
+					return
+				}
 			}

 		} else {
@ -246,18 +251,30 @@ func (reader *RecordReaderCSV) getRecordBatch(
 				for i = 0; i < n; i++ {
 					key := reader.header[i]
 					value := mlrval.FromDeferredType(csvRecord[i])
-					record.PutReference(key, value)
+					_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
+					if err != nil {
+						errorChannel <- err
+						return
+					}
 				}
 				if nh < nd {
 					// if header shorter than data: use 1-up itoa keys
 					key := strconv.Itoa(i + 1)
 					value := mlrval.FromDeferredType(csvRecord[i])
-					record.PutReference(key, value)
+					_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
+					if err != nil {
+						errorChannel <- err
+						return
+					}
 				}
 				if nh > nd {
 					// if header longer than data: use "" values
 					for i = nd; i < nh; i++ {
-						record.PutCopy(reader.header[i], mlrval.VOID)
+						_, err := record.PutReferenceMaybeDedupe(reader.header[i], mlrval.VOID.Copy(), dedupeFieldNames)
+						if err != nil {
+							errorChannel <- err
+							return
+						}
 					}
 				}
 			}
--- a/internal/pkg/input/record_reader_csvlite.go
+++ b/internal/pkg/input/record_reader_csvlite.go
@ -179,6 +179,7 @@ func getRecordBatchExplicitCSVHeader(
 	eof bool,
 ) {
 	recordsAndContexts = list.New()
+	dedupeFieldNames := reader.readerOptions.DedupeFieldNames

 	lines, more := <-linesChannel
 	if !more {
@ -247,7 +248,11 @@ func getRecordBatchExplicitCSVHeader(
 			if !reader.readerOptions.AllowRaggedCSVInput {
 				for i, field := range fields {
 					value := mlrval.FromDeferredType(field)
-					record.PutReference(reader.headerStrings[i], value)
+					_, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], value, dedupeFieldNames)
+					if err != nil {
+						errorChannel <- err
+						return
+					}
 				}
 			} else {
 				nh := len(reader.headerStrings)
@ -256,14 +261,22 @@ func getRecordBatchExplicitCSVHeader(
 				var i int
 				for i = 0; i < n; i++ {
 					value := mlrval.FromDeferredType(fields[i])
-					record.PutReference(reader.headerStrings[i], value)
+					_, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], value, dedupeFieldNames)
+					if err != nil {
+						errorChannel <- err
+						return
+					}
 				}
 				if nh < nd {
 					// if header shorter than data: use 1-up itoa keys
 					for i = nh; i < nd; i++ {
 						key := strconv.Itoa(i + 1)
 						value := mlrval.FromDeferredType(fields[i])
-						record.PutReference(key, value)
+						_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
+						if err != nil {
+							errorChannel <- err
+							return
+						}
 					}
 				}
 				if nh > nd {
@ -293,6 +306,7 @@ func getRecordBatchImplicitCSVHeader(
 	eof bool,
 ) {
 	recordsAndContexts = list.New()
+	dedupeFieldNames := reader.readerOptions.DedupeFieldNames

 	lines, more := <-linesChannel
 	if !more {
@ -364,7 +378,11 @@ func getRecordBatchImplicitCSVHeader(
 		if !reader.readerOptions.AllowRaggedCSVInput {
 			for i, field := range fields {
 				value := mlrval.FromDeferredType(field)
-				record.PutReference(reader.headerStrings[i], value)
+				_, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], value, dedupeFieldNames)
+				if err != nil {
+					errorChannel <- err
+					return
+				}
 			}
 		} else {
 			nh := len(reader.headerStrings)
@ -373,18 +391,30 @@ func getRecordBatchImplicitCSVHeader(
 			var i int
 			for i = 0; i < n; i++ {
 				value := mlrval.FromDeferredType(fields[i])
-				record.PutReference(reader.headerStrings[i], value)
+				_, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], value, dedupeFieldNames)
+				if err != nil {
+					errorChannel <- err
+					return
+				}
 			}
 			if nh < nd {
 				// if header shorter than data: use 1-up itoa keys
 				key := strconv.Itoa(i + 1)
 				value := mlrval.FromDeferredType(fields[i])
-				record.PutReference(key, value)
+				_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
+				if err != nil {
+					errorChannel <- err
+					return
+				}
 			}
 			if nh > nd {
 				// if header longer than data: use "" values
 				for i = nd; i < nh; i++ {
-					record.PutCopy(reader.headerStrings[i], mlrval.VOID)
+					_, err := record.PutReferenceMaybeDedupe(reader.headerStrings[i], mlrval.VOID.Copy(), dedupeFieldNames)
+					if err != nil {
+						errorChannel <- err
+						return
+					}
 				}
 			}
 		}
--- a/internal/pkg/input/record_reader_dkvp_nidx.go
+++ b/internal/pkg/input/record_reader_dkvp_nidx.go
@ -16,7 +16,7 @@ import (

 // splitter_DKVP_NIDX is a function type for the one bit of code differing
 // between the DKVP reader and the NIDX reader, namely, how it splits lines.
-type splitter_DKVP_NIDX func(reader *RecordReaderDKVPNIDX, line string) *mlrval.Mlrmap
+type splitter_DKVP_NIDX func(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrmap, error)

 type RecordReaderDKVPNIDX struct {
 	readerOptions   *cli.TReaderOptions
@ -100,7 +100,7 @@ func (reader *RecordReaderDKVPNIDX) processHandle(
 	go channelizedLineScanner(lineScanner, linesChannel, downstreamDoneChannel, recordsPerBatch)

 	for {
-		recordsAndContexts, eof := reader.getRecordBatch(linesChannel, context)
+		recordsAndContexts, eof := reader.getRecordBatch(linesChannel, errorChannel, context)
 		if recordsAndContexts.Len() > 0 {
 			readerChannel <- recordsAndContexts
 		}
@ -113,6 +113,7 @@ func (reader *RecordReaderDKVPNIDX) processHandle(
 // TODO: comment copiously we're trying to handle slow/fast/short/long reads: tail -f, smallfile, bigfile.
 func (reader *RecordReaderDKVPNIDX) getRecordBatch(
 	linesChannel <-chan *list.List,
+	errorChannel chan<- error,
 	context *types.Context,
 ) (
 	recordsAndContexts *list.List,
@ -142,7 +143,11 @@ func (reader *RecordReaderDKVPNIDX) getRecordBatch(
 			}
 		}

-		record := reader.splitter(reader, line)
+		record, err := reader.splitter(reader, line)
+		if err != nil {
+			errorChannel <- err
+			return
+		}
 		context.UpdateForInputRecord()
 		recordAndContext := types.NewRecordAndContext(record, context)
 		recordsAndContexts.PushBack(recordAndContext)
@ -151,8 +156,9 @@ func (reader *RecordReaderDKVPNIDX) getRecordBatch(
 	return recordsAndContexts, false
 }

-func recordFromDKVPLine(reader *RecordReaderDKVPNIDX, line string) *mlrval.Mlrmap {
+func recordFromDKVPLine(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrmap, error) {
 	record := mlrval.NewMlrmapAsRecord()
+	dedupeFieldNames := reader.readerOptions.DedupeFieldNames

 	var pairs []string
 	// TODO: func-pointer this away
@ -181,17 +187,23 @@ func recordFromDKVPLine(reader *RecordReaderDKVPNIDX, line string) *mlrval.Mlrma
 			// DKVP is a generalization of NIDX.
 			key := strconv.Itoa(i + 1) // Miller userspace indices are 1-up
 			value := mlrval.FromDeferredType(kv[0])
-			record.PutReference(key, value)
+			_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
+			if err != nil {
+				return nil, err
+			}
 		} else {
 			key := kv[0]
 			value := mlrval.FromDeferredType(kv[1])
-			record.PutReference(key, value)
+			_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
+			if err != nil {
+				return nil, err
+			}
 		}
 	}
-	return record
+	return record, nil
 }

-func recordFromNIDXLine(reader *RecordReaderDKVPNIDX, line string) *mlrval.Mlrmap {
+func recordFromNIDXLine(reader *RecordReaderDKVPNIDX, line string) (*mlrval.Mlrmap, error) {
 	record := mlrval.NewMlrmapAsRecord()

 	var values []string
@ -212,5 +224,5 @@ func recordFromNIDXLine(reader *RecordReaderDKVPNIDX, line string) *mlrval.Mlrma
 		mval := mlrval.FromDeferredType(value)
 		record.PutReference(key, mval)
 	}
-	return record
+	return record, nil
 }
--- a/internal/pkg/input/record_reader_dkvp_test.go
+++ b/internal/pkg/input/record_reader_dkvp_test.go
@ -16,13 +16,15 @@ func TestRecordFromDKVPLine(t *testing.T) {
 	assert.Nil(t, err)

 	line := ""
-	record := recordFromDKVPLine(reader, line)
+	record, err := recordFromDKVPLine(reader, line)
 	assert.NotNil(t, record)
+	assert.Nil(t, err)
 	assert.Equal(t, 0, record.FieldCount)

 	line = "a=1,b=2,c=3"
-	record = recordFromDKVPLine(reader, line)
+	record, err = recordFromDKVPLine(reader, line)
 	assert.NotNil(t, record)
+	assert.Nil(t, err)
 	assert.Equal(t, 3, record.FieldCount)

 	assert.NotNil(t, record.Head)
@ -33,20 +35,25 @@ func TestRecordFromDKVPLine(t *testing.T) {
 	assert.Equal(t, record.Head.Next.Key, "b")
 	assert.Equal(t, record.Head.Next.Next.Key, "c")

+	// Default is to dedupe to a=1,b=2,b_2=3
 	line = "a=1,b=2,b=3"
-	record = recordFromDKVPLine(reader, line)
+	record, err = recordFromDKVPLine(reader, line)
 	assert.NotNil(t, record)
-	assert.Equal(t, 2, record.FieldCount)
+	assert.Nil(t, err)
+	assert.Equal(t, 3, record.FieldCount)

 	assert.NotNil(t, record.Head)
 	assert.NotNil(t, record.Head.Next)
-	assert.Nil(t, record.Head.Next.Next)
+	assert.NotNil(t, record.Head.Next.Next)
+	assert.Nil(t, record.Head.Next.Next.Next)
 	assert.Equal(t, record.Head.Key, "a")
 	assert.Equal(t, record.Head.Next.Key, "b")
+	assert.Equal(t, record.Head.Next.Next.Key, "b_2")

 	line = "a,b,c"
-	record = recordFromDKVPLine(reader, line)
+	record, err = recordFromDKVPLine(reader, line)
 	assert.NotNil(t, record)
+	assert.Nil(t, err)
 	assert.Equal(t, 3, record.FieldCount)

 	assert.NotNil(t, record.Head)
--- a/internal/pkg/input/record_reader_xtab.go
+++ b/internal/pkg/input/record_reader_xtab.go
@ -257,6 +257,7 @@ func (reader *RecordReaderXTAB) recordFromXTABLines(
 	stanza *list.List,
 ) (*mlrval.Mlrmap, error) {
 	record := mlrval.NewMlrmapAsRecord()
+	dedupeFieldNames := reader.readerOptions.DedupeFieldNames

 	for e := stanza.Front(); e != nil; e = e.Next() {
 		line := e.Value.(string)
@ -274,10 +275,16 @@ func (reader *RecordReaderXTAB) recordFromXTABLines(
 		key := kv[0]
 		if len(kv) == 1 {
 			value := mlrval.VOID
-			record.PutReference(key, value)
+			_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
+			if err != nil {
+				return nil, err
+			}
 		} else {
 			value := mlrval.FromDeferredType(kv[1])
-			record.PutReference(key, value)
+			_, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames)
+			if err != nil {
+				return nil, err
+			}
 		}
 	}

--- a/internal/pkg/mlrval/mlrmap_accessors.go
+++ b/internal/pkg/mlrval/mlrmap_accessors.go
@ -3,6 +3,8 @@ package mlrval
 import (
 	"bytes"
 	"errors"
+	"fmt"
+	"strconv"

 	"github.com/johnkerl/miller/internal/pkg/lib"
 )
@ -28,30 +30,65 @@ func (mlrmap *Mlrmap) Get(key string) *Mlrval {

 // PutReference copies the key but not the value. This is not safe for DSL use,
 // where we could create undesired references between different objects.  Only
-// intended to be used at callsites which allocate a mlrval solely for the
-// purpose of putting into a map, e.g. input-record readers.
+// intended to be used at callsites which allocate a mlrval on the spot, solely
+// for the purpose of putting into the map.
 func (mlrmap *Mlrmap) PutReference(key string, value *Mlrval) {
 	pe := mlrmap.findEntry(key)
 	if pe == nil {
-		pe = newMlrmapEntry(key, value)
-		if mlrmap.Head == nil {
-			mlrmap.Head = pe
-			mlrmap.Tail = pe
-		} else {
-			pe.Prev = mlrmap.Tail
-			pe.Next = nil
-			mlrmap.Tail.Next = pe
-			mlrmap.Tail = pe
-		}
-		if mlrmap.keysToEntries != nil {
-			mlrmap.keysToEntries[key] = pe
-		}
-		mlrmap.FieldCount++
+		mlrmap.putReferenceNewAux(key, value)
 	} else {
 		pe.Value = value
 	}
 }

+// putReferenceNewAux is a helper function for code shared between PutReference
+// and PutReferenceMaybeDedupe. It should not be invoked from anywhere else --
+// it doesn't do its own check if the key already exists in the record or not.
+func (mlrmap *Mlrmap) putReferenceNewAux(key string, value *Mlrval) {
+	pe := newMlrmapEntry(key, value)
+	if mlrmap.Head == nil {
+		mlrmap.Head = pe
+		mlrmap.Tail = pe
+	} else {
+		pe.Prev = mlrmap.Tail
+		pe.Next = nil
+		mlrmap.Tail.Next = pe
+		mlrmap.Tail = pe
+	}
+	if mlrmap.keysToEntries != nil {
+		mlrmap.keysToEntries[key] = pe
+	}
+	mlrmap.FieldCount++
+}
+
+// PutReferenceMaybeDedupe is the default inserter for key-value pairs in input records --
+// if the input is 'x=8,x=9` then we make a record with x=8 and x_2=9. This can be suppressed
+// via a command-line flag which this method's dedupe flag respects.
+func (mlrmap *Mlrmap) PutReferenceMaybeDedupe(key string, value *Mlrval, dedupe bool) (string, error) {
+	if !dedupe {
+		mlrmap.PutReference(key, value)
+		return key, nil
+	}
+
+	pe := mlrmap.findEntry(key)
+	if pe == nil {
+		mlrmap.putReferenceNewAux(key, value)
+		return key, nil
+	}
+
+	for i := 2; i < 1000; i++ {
+		newKey := key + "_" + strconv.Itoa(i)
+		pe := mlrmap.findEntry(newKey)
+		if pe == nil {
+			mlrmap.putReferenceNewAux(newKey, value)
+			return newKey, nil
+		}
+	}
+	return key, errors.New(
+		fmt.Sprintf("record has too many input fields named \"%s\"", key),
+	)
+}
+
 // PutCopy copies the key and value (deep-copying in case the value is array/map).
 // This is safe for DSL use. See also PutReference.
 func (mlrmap *Mlrmap) PutCopy(key string, value *Mlrval) {
--- a/man/manpage.txt
+++ b/man/manpage.txt
@ -486,6 +486,13 @@ MISCELLANEOUS FLAGS
                                what you might hope but `--mfrom *.csv --` does.
       --mload {filenames}      Like `--load` but works with more than one filename,
                                e.g. `--mload *.mlr --`.
+       --no-dedupe-field-names  By default, if an input record has a field name x and
+                                another also named x, the second will be renamed x_2,
+                                and so on. With this flag provided, the second x's
+                                value will replace the first x's value when the
+                                record is read. This flag has no effect on JSON input
+                                records, where duplicate keys always result in the
+                                last one's value being retained.
       --no-fflush              Let buffered output not be written after every output
                                record. The default is flush output after every
                                record if the output is to the terminal, or less
@ -2989,4 +2996,4 @@ SEE ALSO



-                                  2021-12-22                         MILLER(1)
+                                  2021-12-23                         MILLER(1)
--- a/man/mlr.1
+++ b/man/mlr.1
@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2021-12-22
+.\"      Date: 2021-12-23
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2021-12-22" "\ \&" "\ \&"
+.TH "MILLER" "1" "2021-12-23" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -605,6 +605,13 @@ These are flags which don't fit into any other category.
                         what you might hope but `--mfrom *.csv --` does.
 --mload {filenames}      Like `--load` but works with more than one filename,
                         e.g. `--mload *.mlr --`.
+--no-dedupe-field-names  By default, if an input record has a field name x and
+                         another also named x, the second will be renamed x_2,
+                         and so on. With this flag provided, the second x's
+                         value will replace the first x's value when the
+                         record is read. This flag has no effect on JSON input
+                         records, where duplicate keys always result in the
+                         last one's value being retained.
 --no-fflush              Let buffered output not be written after every output
                         record. The default is flush output after every
                         record if the output is to the terminal, or less
--- a/test/cases/io-dedupe-field-names/csv-no-dedupe/cmd
+++ b/test/cases/io-dedupe-field-names/csv-no-dedupe/cmd
@ -0,0 +1 @@
+mlr --no-dedupe-field-names --icsv --ojson cat ${CASEDIR}/input
--- a/test/cases/io-dedupe-field-names/csv-no-dedupe/experr
+++ b/test/cases/io-dedupe-field-names/csv-no-dedupe/experr
--- a/test/cases/io-dedupe-field-names/csv-no-dedupe/expout
+++ b/test/cases/io-dedupe-field-names/csv-no-dedupe/expout
@ -0,0 +1,5 @@
+{
+  "a": 1,
+  "b": 5,
+  "c": 4
+}
--- a/test/cases/io-dedupe-field-names/csv-no-dedupe/input
+++ b/test/cases/io-dedupe-field-names/csv-no-dedupe/input
@ -0,0 +1,2 @@
+a,b,b,c,b
+1,2,3,4,5
--- a/test/cases/io-dedupe-field-names/csv/cmd
+++ b/test/cases/io-dedupe-field-names/csv/cmd
@ -0,0 +1 @@
+mlr --icsv --ojson cat ${CASEDIR}/input
--- a/test/cases/io-dedupe-field-names/csv/experr
+++ b/test/cases/io-dedupe-field-names/csv/experr
--- a/test/cases/io-dedupe-field-names/csv/expout
+++ b/test/cases/io-dedupe-field-names/csv/expout
@ -0,0 +1,7 @@
+{
+  "a": 1,
+  "b": 2,
+  "b_2": 3,
+  "c": 4,
+  "b_3": 5
+}
--- a/test/cases/io-dedupe-field-names/csv/input
+++ b/test/cases/io-dedupe-field-names/csv/input
@ -0,0 +1,2 @@
+a,b,b,c,b
+1,2,3,4,5
--- a/test/cases/io-dedupe-field-names/csvlite-no-dedupe/cmd
+++ b/test/cases/io-dedupe-field-names/csvlite-no-dedupe/cmd
@ -0,0 +1 @@
+mlr --no-dedupe-field-names --icsvlite --ojson cat ${CASEDIR}/input
--- a/test/cases/io-dedupe-field-names/csvlite-no-dedupe/experr
+++ b/test/cases/io-dedupe-field-names/csvlite-no-dedupe/experr
--- a/test/cases/io-dedupe-field-names/csvlite-no-dedupe/expout
+++ b/test/cases/io-dedupe-field-names/csvlite-no-dedupe/expout
@ -0,0 +1,5 @@
+{
+  "a": 1,
+  "b": 5,
+  "c": 4
+}
--- a/test/cases/io-dedupe-field-names/csvlite-no-dedupe/input
+++ b/test/cases/io-dedupe-field-names/csvlite-no-dedupe/input
@ -0,0 +1,2 @@
+a,b,b,c,b
+1,2,3,4,5
--- a/test/cases/io-dedupe-field-names/csvlite/cmd
+++ b/test/cases/io-dedupe-field-names/csvlite/cmd
@ -0,0 +1 @@
+mlr --icsvlite --ojson cat ${CASEDIR}/input
--- a/test/cases/io-dedupe-field-names/csvlite/experr
+++ b/test/cases/io-dedupe-field-names/csvlite/experr
--- a/test/cases/io-dedupe-field-names/csvlite/expout
+++ b/test/cases/io-dedupe-field-names/csvlite/expout
@ -0,0 +1,7 @@
+{
+  "a": 1,
+  "b": 2,
+  "b_2": 3,
+  "c": 4,
+  "b_3": 5
+}
--- a/test/cases/io-dedupe-field-names/csvlite/input
+++ b/test/cases/io-dedupe-field-names/csvlite/input
@ -0,0 +1,2 @@
+a,b,b,c,b
+1,2,3,4,5
--- a/test/cases/io-dedupe-field-names/dkvp-no-dedupe/cmd
+++ b/test/cases/io-dedupe-field-names/dkvp-no-dedupe/cmd
@ -0,0 +1 @@
+mlr --no-dedupe-field-names --idkvp --ojson cat ${CASEDIR}/input
--- a/test/cases/io-dedupe-field-names/dkvp-no-dedupe/experr
+++ b/test/cases/io-dedupe-field-names/dkvp-no-dedupe/experr
--- a/test/cases/io-dedupe-field-names/dkvp-no-dedupe/expout
+++ b/test/cases/io-dedupe-field-names/dkvp-no-dedupe/expout
@ -0,0 +1,5 @@
+{
+  "a": 1,
+  "b": 5,
+  "c": 4
+}
--- a/test/cases/io-dedupe-field-names/dkvp-no-dedupe/input
+++ b/test/cases/io-dedupe-field-names/dkvp-no-dedupe/input
@ -0,0 +1 @@
+a=1,b=2,b=3,c=4,b=5
--- a/test/cases/io-dedupe-field-names/dkvp/cmd
+++ b/test/cases/io-dedupe-field-names/dkvp/cmd
@ -0,0 +1 @@
+mlr --idkvp --ojson cat ${CASEDIR}/input
--- a/test/cases/io-dedupe-field-names/dkvp/experr
+++ b/test/cases/io-dedupe-field-names/dkvp/experr
--- a/test/cases/io-dedupe-field-names/dkvp/expout
+++ b/test/cases/io-dedupe-field-names/dkvp/expout
@ -0,0 +1,7 @@
+{
+  "a": 1,
+  "b": 2,
+  "b_2": 3,
+  "c": 4,
+  "b_3": 5
+}
--- a/test/cases/io-dedupe-field-names/dkvp/input
+++ b/test/cases/io-dedupe-field-names/dkvp/input
@ -0,0 +1 @@
+a=1,b=2,b=3,c=4,b=5
--- a/test/cases/io-dedupe-field-names/json-no-dedupe/cmd
+++ b/test/cases/io-dedupe-field-names/json-no-dedupe/cmd
@ -0,0 +1 @@
+mlr --no-dedupe-field-names --ijson --ojson cat ${CASEDIR}/input
--- a/test/cases/io-dedupe-field-names/json-no-dedupe/experr
+++ b/test/cases/io-dedupe-field-names/json-no-dedupe/experr
--- a/test/cases/io-dedupe-field-names/json-no-dedupe/expout
+++ b/test/cases/io-dedupe-field-names/json-no-dedupe/expout
@ -0,0 +1,5 @@
+{
+  "a": 1,
+  "b": 5,
+  "c": 4
+}
--- a/test/cases/io-dedupe-field-names/json-no-dedupe/input
+++ b/test/cases/io-dedupe-field-names/json-no-dedupe/input
@ -0,0 +1 @@
+{"a":1,"b":2,"b":3,"c":4,"b":5}
--- a/test/cases/io-dedupe-field-names/json/cmd
+++ b/test/cases/io-dedupe-field-names/json/cmd
@ -0,0 +1 @@
+mlr --ijson --ojson cat ${CASEDIR}/input
--- a/test/cases/io-dedupe-field-names/json/experr
+++ b/test/cases/io-dedupe-field-names/json/experr
--- a/test/cases/io-dedupe-field-names/json/expout
+++ b/test/cases/io-dedupe-field-names/json/expout
@ -0,0 +1,5 @@
+{
+  "a": 1,
+  "b": 5,
+  "c": 4
+}
--- a/test/cases/io-dedupe-field-names/json/input
+++ b/test/cases/io-dedupe-field-names/json/input
@ -0,0 +1 @@
+{"a":1,"b":2,"b":3,"c":4,"b":5}
--- a/test/cases/io-dedupe-field-names/nidx-no-dedupe/cmd
+++ b/test/cases/io-dedupe-field-names/nidx-no-dedupe/cmd
@ -0,0 +1 @@
+mlr --no-dedupe-field-names --inidx --ojson cat ${CASEDIR}/input
--- a/test/cases/io-dedupe-field-names/nidx-no-dedupe/experr
+++ b/test/cases/io-dedupe-field-names/nidx-no-dedupe/experr
--- a/test/cases/io-dedupe-field-names/nidx-no-dedupe/expout
+++ b/test/cases/io-dedupe-field-names/nidx-no-dedupe/expout
@ -0,0 +1,7 @@
+{
+  "1": 1,
+  "2": 2,
+  "3": 3,
+  "4": 4,
+  "5": 5
+}
--- a/test/cases/io-dedupe-field-names/nidx-no-dedupe/input
+++ b/test/cases/io-dedupe-field-names/nidx-no-dedupe/input
@ -0,0 +1 @@
+1 2 3 4 5
--- a/test/cases/io-dedupe-field-names/nidx/cmd
+++ b/test/cases/io-dedupe-field-names/nidx/cmd
@ -0,0 +1 @@
+mlr --inidx --ojson cat ${CASEDIR}/input
--- a/test/cases/io-dedupe-field-names/nidx/experr
+++ b/test/cases/io-dedupe-field-names/nidx/experr
--- a/test/cases/io-dedupe-field-names/nidx/expout
+++ b/test/cases/io-dedupe-field-names/nidx/expout
@ -0,0 +1,7 @@
+{
+  "1": 1,
+  "2": 2,
+  "3": 3,
+  "4": 4,
+  "5": 5
+}
--- a/test/cases/io-dedupe-field-names/nidx/input
+++ b/test/cases/io-dedupe-field-names/nidx/input
@ -0,0 +1 @@
+1 2 3 4 5
--- a/test/cases/io-dedupe-field-names/pprint-no-dedupe/cmd
+++ b/test/cases/io-dedupe-field-names/pprint-no-dedupe/cmd
@ -0,0 +1 @@
+mlr --no-dedupe-field-names --ipprint --ojson cat ${CASEDIR}/input
--- a/test/cases/io-dedupe-field-names/pprint-no-dedupe/experr
+++ b/test/cases/io-dedupe-field-names/pprint-no-dedupe/experr
--- a/test/cases/io-dedupe-field-names/pprint-no-dedupe/expout
+++ b/test/cases/io-dedupe-field-names/pprint-no-dedupe/expout
@ -0,0 +1,5 @@
+{
+  "a": 1,
+  "b": 5,
+  "c": 4
+}
--- a/test/cases/io-dedupe-field-names/pprint-no-dedupe/input
+++ b/test/cases/io-dedupe-field-names/pprint-no-dedupe/input
@ -0,0 +1,2 @@
+a b b c b
+1 2 3 4 5
--- a/test/cases/io-dedupe-field-names/pprint/cmd
+++ b/test/cases/io-dedupe-field-names/pprint/cmd
@ -0,0 +1 @@
+mlr --ipprint --ojson cat ${CASEDIR}/input
--- a/test/cases/io-dedupe-field-names/pprint/experr
+++ b/test/cases/io-dedupe-field-names/pprint/experr
--- a/test/cases/io-dedupe-field-names/pprint/expout
+++ b/test/cases/io-dedupe-field-names/pprint/expout
@ -0,0 +1,7 @@
+{
+  "a": 1,
+  "b": 2,
+  "b_2": 3,
+  "c": 4,
+  "b_3": 5
+}
--- a/test/cases/io-dedupe-field-names/pprint/input
+++ b/test/cases/io-dedupe-field-names/pprint/input
@ -0,0 +1,2 @@
+a b b c b
+1 2 3 4 5
--- a/test/cases/io-dedupe-field-names/xtab-no-dedupe/cmd
+++ b/test/cases/io-dedupe-field-names/xtab-no-dedupe/cmd
@ -0,0 +1 @@
+mlr --no-dedupe-field-names --ixtab --ojson cat ${CASEDIR}/input
--- a/test/cases/io-dedupe-field-names/xtab-no-dedupe/experr
+++ b/test/cases/io-dedupe-field-names/xtab-no-dedupe/experr
--- a/test/cases/io-dedupe-field-names/xtab-no-dedupe/expout
+++ b/test/cases/io-dedupe-field-names/xtab-no-dedupe/expout
@ -0,0 +1,5 @@
+{
+  "a": 1,
+  "b": 5,
+  "c": 4
+}
--- a/test/cases/io-dedupe-field-names/xtab-no-dedupe/input
+++ b/test/cases/io-dedupe-field-names/xtab-no-dedupe/input
@ -0,0 +1,5 @@
+a 1
+b 2
+b 3
+c 4
+b 5
--- a/test/cases/io-dedupe-field-names/xtab/cmd
+++ b/test/cases/io-dedupe-field-names/xtab/cmd
@ -0,0 +1 @@
+mlr --ixtab --ojson cat ${CASEDIR}/input
--- a/test/cases/io-dedupe-field-names/xtab/experr
+++ b/test/cases/io-dedupe-field-names/xtab/experr
--- a/test/cases/io-dedupe-field-names/xtab/expout
+++ b/test/cases/io-dedupe-field-names/xtab/expout
@ -0,0 +1,7 @@
+{
+  "a": 1,
+  "b": 2,
+  "b_2": 3,
+  "c": 4,
+  "b_3": 5
+}
--- a/test/cases/io-dedupe-field-names/xtab/input
+++ b/test/cases/io-dedupe-field-names/xtab/input
@ -0,0 +1,5 @@
+a 1
+b 2
+b 3
+c 4
+b 5
				`@ -0,0 +1 @@`
				`mlr --no-dedupe-field-names --icsv --ojson cat ${CASEDIR}/input`
				`@ -0,0 +1 @@`
				`mlr --no-dedupe-field-names --icsvlite --ojson cat ${CASEDIR}/input`
				`@ -0,0 +1 @@`
				`mlr --no-dedupe-field-names --idkvp --ojson cat ${CASEDIR}/input`
				`@ -0,0 +1 @@`
				`mlr --no-dedupe-field-names --ijson --ojson cat ${CASEDIR}/input`
				`@ -0,0 +1 @@`
				`mlr --no-dedupe-field-names --inidx --ojson cat ${CASEDIR}/input`
				`@ -0,0 +1 @@`
				`mlr --no-dedupe-field-names --ipprint --ojson cat ${CASEDIR}/input`
				`@ -0,0 +1 @@`
				`mlr --no-dedupe-field-names --ixtab --ojson cat ${CASEDIR}/input`