From 0e3a54ed68d7d77376c717f63e74a3c5bede085f Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 23 Dec 2023 16:20:11 -0500 Subject: [PATCH] Implement `mlr uniq -x` (#1457) * mlr uniq -x * unit-test cases * make dev --- docs/src/kubectl-and-helm.md | 2 +- docs/src/manpage.md | 4 +- docs/src/manpage.txt | 4 +- docs/src/reference-dsl-time.md | 4 +- .../src/reference-main-regular-expressions.md | 2 +- docs/src/reference-main-strings.md | 2 +- docs/src/reference-verbs.md | 2 + docs/src/release-docs.md | 2 +- docs/src/shapes-of-data.md | 12 ++-- docs/src/statistics-examples.md | 4 +- docs/src/why.md | 2 +- man/manpage.txt | 4 +- man/mlr.1 | 6 +- pkg/lib/ordered_map.go | 23 +++++++ pkg/mlrval/mlrmap_accessors.go | 13 ++++ pkg/mlrval/mlrmap_accessors_test.go | 19 ++++++ pkg/transformers/uniq.go | 66 ++++++++++++++----- test/cases/cli-help/0001/expout | 2 + test/cases/verb-uniq/uniq-c-x-change/cmd | 1 + test/cases/verb-uniq/uniq-c-x-change/experr | 0 test/cases/verb-uniq/uniq-c-x-change/expout | 7 ++ test/cases/verb-uniq/uniq-c-x-het/cmd | 1 + test/cases/verb-uniq/uniq-c-x-het/experr | 0 test/cases/verb-uniq/uniq-c-x-het/expout | 6 ++ test/cases/verb-uniq/uniq-c-x-long/cmd | 1 + test/cases/verb-uniq/uniq-c-x-long/experr | 0 test/cases/verb-uniq/uniq-c-x-long/expout | 7 ++ test/cases/verb-uniq/uniq-c-x-short/cmd | 1 + test/cases/verb-uniq/uniq-c-x-short/experr | 0 test/cases/verb-uniq/uniq-c-x-short/expout | 6 ++ test/cases/verb-uniq/uniq-x-change/cmd | 1 + test/cases/verb-uniq/uniq-x-change/experr | 0 test/cases/verb-uniq/uniq-x-change/expout | 7 ++ test/cases/verb-uniq/uniq-x-het/cmd | 1 + test/cases/verb-uniq/uniq-x-het/experr | 0 test/cases/verb-uniq/uniq-x-het/expout | 6 ++ test/cases/verb-uniq/uniq-x-long/cmd | 1 + test/cases/verb-uniq/uniq-x-long/experr | 0 test/cases/verb-uniq/uniq-x-long/expout | 7 ++ test/cases/verb-uniq/uniq-x-short/cmd | 1 + test/cases/verb-uniq/uniq-x-short/experr | 0 test/cases/verb-uniq/uniq-x-short/expout | 6 ++ test/input/example-with-changed-keys.dkvp | 10 +++ test/input/example-with-extra-keys.dkvp | 10 +++ test/input/example-with-missing-keys.dkvp | 10 +++ test/input/example.dkvp | 10 +++ 46 files changed, 238 insertions(+), 35 deletions(-) create mode 100644 test/cases/verb-uniq/uniq-c-x-change/cmd create mode 100644 test/cases/verb-uniq/uniq-c-x-change/experr create mode 100644 test/cases/verb-uniq/uniq-c-x-change/expout create mode 100644 test/cases/verb-uniq/uniq-c-x-het/cmd create mode 100644 test/cases/verb-uniq/uniq-c-x-het/experr create mode 100644 test/cases/verb-uniq/uniq-c-x-het/expout create mode 100644 test/cases/verb-uniq/uniq-c-x-long/cmd create mode 100644 test/cases/verb-uniq/uniq-c-x-long/experr create mode 100644 test/cases/verb-uniq/uniq-c-x-long/expout create mode 100644 test/cases/verb-uniq/uniq-c-x-short/cmd create mode 100644 test/cases/verb-uniq/uniq-c-x-short/experr create mode 100644 test/cases/verb-uniq/uniq-c-x-short/expout create mode 100644 test/cases/verb-uniq/uniq-x-change/cmd create mode 100644 test/cases/verb-uniq/uniq-x-change/experr create mode 100644 test/cases/verb-uniq/uniq-x-change/expout create mode 100644 test/cases/verb-uniq/uniq-x-het/cmd create mode 100644 test/cases/verb-uniq/uniq-x-het/experr create mode 100644 test/cases/verb-uniq/uniq-x-het/expout create mode 100644 test/cases/verb-uniq/uniq-x-long/cmd create mode 100644 test/cases/verb-uniq/uniq-x-long/experr create mode 100644 test/cases/verb-uniq/uniq-x-long/expout create mode 100644 test/cases/verb-uniq/uniq-x-short/cmd create mode 100644 test/cases/verb-uniq/uniq-x-short/experr create mode 100644 test/cases/verb-uniq/uniq-x-short/expout create mode 100644 test/input/example-with-changed-keys.dkvp create mode 100644 test/input/example-with-extra-keys.dkvp create mode 100644 test/input/example-with-missing-keys.dkvp create mode 100644 test/input/example.dkvp diff --git a/docs/src/kubectl-and-helm.md b/docs/src/kubectl-and-helm.md index 38bd31abf..5f53001be 100644 --- a/docs/src/kubectl-and-helm.md +++ b/docs/src/kubectl-and-helm.md @@ -152,7 +152,7 @@ $ helm list | mlr --itsv --ojson head -n 1 ] -A solution here is Miller's +A solution here is Miller's [clean-whitespace verb](reference-verbs.md#clean-whitespace):
diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 369a7bbf6..28182f146 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -988,6 +988,7 @@ MILLER(1)                                                            MILLER(1)
 
        Options:
        -f {a,b,c}    Field names for distinct count.
+       -x {a,b,c}    Field names to exclude for distinct count: use each record's others instead.
        -n            Show only the number of distinct values. Not compatible with -u.
        -o {name}     Field name for output count. Default "count".
                      Ignored with -u.
@@ -2154,6 +2155,7 @@ MILLER(1)                                                            MILLER(1)
 
        Options:
        -g {d,e,f}    Group-by-field names for uniq counts.
+       -x {a,b,c}    Field names to exclude for uniq: use each record's others instead.
        -c            Show repeat counts in addition to unique values.
        -n            Show only the number of distinct values.
        -o {name}     Field name for output count. Default "count".
@@ -3685,5 +3687,5 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-12-19                         MILLER(1)
+                                  2023-12-23                         MILLER(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index a7aec87a7..4262cc6c7 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -967,6 +967,7 @@ MILLER(1) MILLER(1) Options: -f {a,b,c} Field names for distinct count. + -x {a,b,c} Field names to exclude for distinct count: use each record's others instead. -n Show only the number of distinct values. Not compatible with -u. -o {name} Field name for output count. Default "count". Ignored with -u. @@ -2133,6 +2134,7 @@ MILLER(1) MILLER(1) Options: -g {d,e,f} Group-by-field names for uniq counts. + -x {a,b,c} Field names to exclude for uniq: use each record's others instead. -c Show repeat counts in addition to unique values. -n Show only the number of distinct values. -o {name} Field name for output count. Default "count". @@ -3664,4 +3666,4 @@ MILLER(1) MILLER(1) - 2023-12-19 MILLER(1) + 2023-12-23 MILLER(1) diff --git a/docs/src/reference-dsl-time.md b/docs/src/reference-dsl-time.md index 867bc8dc1..0a3aa721e 100644 --- a/docs/src/reference-dsl-time.md +++ b/docs/src/reference-dsl-time.md @@ -89,7 +89,7 @@ the [ISO8601](https://en.wikipedia.org/wiki/ISO_8601) format. This was the first (and initially only) human-readable date/time format supported by Miller going all the way back to Miller 1.0.0. -You can get these from epoch-seconds using the +You can get these from epoch-seconds using the [sec2gmt](reference-dsl-builtin-functions.md#sec2gmt) DSL function. (Note that the terms _UTC_ and _GMT_ are used interchangeably in Miller.) We also have [sec2gmtdate](reference-dsl-builtin-functions.md#sec2gmtdate) DSL function. @@ -200,7 +200,7 @@ mlr: TZ environment variable appears malformed: "This/Is/A/Typo" Note that for local times, Miller omits the `T` and the `Z` you see in GMT times. -We also have the +We also have the [gmt2localtime](reference-dsl-builtin-functions.md#gmt2localtime) and [localtime2gmt](reference-dsl-builtin-functions.md#localtime2gmt) convenience functions: diff --git a/docs/src/reference-main-regular-expressions.md b/docs/src/reference-main-regular-expressions.md index 982c60634..f679669b8 100644 --- a/docs/src/reference-main-regular-expressions.md +++ b/docs/src/reference-main-regular-expressions.md @@ -108,7 +108,7 @@ Regex captures of the form `\0` through `\9` are supported as follows: If you use `(...)` in your regular expression, then up to 9 matches are supported for the `=~` operator, and an arbitrary number of matches are supported for the `match` DSL function. -* Before any match is done, `"\1"` etc. in a string evaluate to themselves. +* Before any match is done, `"\1"` etc. in a string evaluate to themselves. * After a successful match is done, `"\1"` etc. in a string evaluate to the matched substring. * After an unsuccessful match is done, `"\1"` etc. in a string evaluate to the empty string. * You can match against `null` to reset to the original state. diff --git a/docs/src/reference-main-strings.md b/docs/src/reference-main-strings.md index df35284f4..b16b03483 100644 --- a/docs/src/reference-main-strings.md +++ b/docs/src/reference-main-strings.md @@ -197,4 +197,4 @@ See also [https://en.wikipedia.org/wiki/Escape_sequences_in_C](https://en.wikipe These replacements apply only to strings you key in for the DSL expressions for `filter` and `put`: that is, if you type `\t` in a string literal for a `filter`/`put` expression, it will be turned into a tab character. If you want a backslash followed by a `t`, then please type `\\t`. -However, these replacements are done automatically only for string literals within DSL expressions -- they are not done automatically to fields within your data stream. If you wish to make these replacements, you can do (for example) `mlr put '$field = gsub($field, "\\t", "\t")'`. If you need to make such a replacement for all fields in your data, you should probably use the system `sed` command instead. +However, these replacements are done automatically only for string literals within DSL expressions -- they are not done automatically to fields within your data stream. If you wish to make these replacements, you can do (for example) `mlr put '$field = gsub($field, "\\t", "\t")'`. If you need to make such a replacement for all fields in your data, you should probably use the system `sed` command instead. diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 106ad4bf1..9a50a1dbb 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -596,6 +596,7 @@ Same as uniq -c. Options: -f {a,b,c} Field names for distinct count. +-x {a,b,c} Field names to exclude for distinct count: use each record's others instead. -n Show only the number of distinct values. Not compatible with -u. -o {name} Field name for output count. Default "count". Ignored with -u. @@ -4066,6 +4067,7 @@ count-distinct. For uniq, -f is a synonym for -g. Options: -g {d,e,f} Group-by-field names for uniq counts. +-x {a,b,c} Field names to exclude for uniq: use each record's others instead. -c Show repeat counts in addition to unique values. -n Show only the number of distinct values. -o {name} Field name for output count. Default "count". diff --git a/docs/src/release-docs.md b/docs/src/release-docs.md index 479be3f38..85a13d804 100644 --- a/docs/src/release-docs.md +++ b/docs/src/release-docs.md @@ -16,7 +16,7 @@ Quick links: # Documents for releases -If your `mlr version` says something like `mlr 6.0.0-dev`, with the `-dev` suffix, you're likely building from source, or you've obtained a recent artifact from GitHub Actions -- +If your `mlr version` says something like `mlr 6.0.0-dev`, with the `-dev` suffix, you're likely building from source, or you've obtained a recent artifact from GitHub Actions -- the page [https://miller.readthedocs.io/en/main](https://miller.readthedocs.io/en/main) contains information for the latest contributions to the [Miller repository](https://github.com/johnkerl/miller). If your `mlr version` says something like `Miller v5.10.2` or `mlr 6.0.0`, without the `-dev` suffix, you're likely using a Miller executable from a package manager -- please see below for the documentation for Miller as of the release you're using. diff --git a/docs/src/shapes-of-data.md b/docs/src/shapes-of-data.md index bab58b7f0..f97040543 100644 --- a/docs/src/shapes-of-data.md +++ b/docs/src/shapes-of-data.md @@ -33,7 +33,7 @@ Also try `od -xcv` and/or `cat -e` on your file to check for non-printable chara Use the `file` command to see if there are CR/LF terminators (in this case, there are not):
-file data/colours.csv 
+file data/colours.csv
 
 data/colours.csv: Unicode text, UTF-8 text
@@ -42,7 +42,7 @@ data/colours.csv: Unicode text, UTF-8 text
 Look at the file to find names of fields:
 
 
-cat data/colours.csv 
+cat data/colours.csv
 
 KEY;DE;EN;ES;FI;FR;IT;NL;PL;TO;TR
@@ -53,13 +53,13 @@ masterdata_colourcode_2;Schwarz;Black;Negro;Musta;Noir;Nero;Zwart;Czarny;Negru;S
 Extract a few fields:
 
 
-mlr --csv cut -f KEY,PL,TO data/colours.csv 
+mlr --csv cut -f KEY,PL,TO data/colours.csv
 
Use XTAB output format to get a sharper picture of where records/fields are being split:
-mlr --icsv --oxtab cat data/colours.csv 
+mlr --icsv --oxtab cat data/colours.csv
 
 KEY;DE;EN;ES;FI;FR;IT;NL;PL;TO;TR masterdata_colourcode_1;Weiß;White;Blanco;Valkoinen;Blanc;Bianco;Wit;Biały;Alb;Beyaz
@@ -70,7 +70,7 @@ KEY;DE;EN;ES;FI;FR;IT;NL;PL;TO;TR masterdata_colourcode_2;Schwarz;Black;Negro;Mu
 Using XTAB output format makes it clearer that `KEY;DE;...;TR` is being treated as a single field name in the CSV header, and likewise each subsequent line is being treated as a single field value. This is because the default field separator is a comma but we have semicolons here.  Use XTAB again with different field separator (`--fs semicolon`):
 
 
-mlr --icsv --ifs semicolon --oxtab cat data/colours.csv 
+mlr --icsv --ifs semicolon --oxtab cat data/colours.csv
 
 KEY masterdata_colourcode_1
@@ -101,7 +101,7 @@ TR  Siyah
 Using the new field-separator, retry the cut:
 
 
-mlr --csv --fs semicolon cut -f KEY,PL,TO data/colours.csv 
+mlr --csv --fs semicolon cut -f KEY,PL,TO data/colours.csv
 
 KEY;PL;TO
diff --git a/docs/src/statistics-examples.md b/docs/src/statistics-examples.md
index b1b7ea7b3..2e80e8a39 100644
--- a/docs/src/statistics-examples.md
+++ b/docs/src/statistics-examples.md
@@ -23,7 +23,7 @@ For one or more specified field names, simply compute p25 and p75, then write th
 
 mlr --oxtab stats1 -f x -a p25,p75 \
     then put '$x_iqr = $x_p75 - $x_p25' \
-    data/medium 
+    data/medium
 
 x_p25 0.24667037823231752
@@ -40,7 +40,7 @@ For wildcarded field names, first compute p25 and p75, then loop over field name
         $["\1_iqr"] = $["\1_p75"] - $["\1_p25"]
       }
     }' \
-    data/medium 
+    data/medium
 
 i_p25 2501
diff --git a/docs/src/why.md b/docs/src/why.md
index a8b2ed585..937bd0386 100644
--- a/docs/src/why.md
+++ b/docs/src/why.md
@@ -48,7 +48,7 @@ Eighth thing: It's an **awful lot of fun to write**. In my experience I didn't f
 
 Miller is command-line-only by design. People who want a graphical user interface won't find it here.  This is in part (a) accommodating my personal preferences, and in part (b) guided by my experience/belief that the command line is very expressive. Steeper learning curve than a GUI, yes. I consider that price worth paying for the tool-niche which Miller occupies.
 
-Another tradeoff: supporting lists of records keeps me supporting only what can be expressed in *all* of those formats. For example, `[1,2,3,4,5]` is valid but unmillerable JSON: the list elements are not records.  So Miller can't (and won't) handle arbitrary JSON -- because Miller only handles tabular data which can be expressed in a variety of formats. 
+Another tradeoff: supporting lists of records keeps me supporting only what can be expressed in *all* of those formats. For example, `[1,2,3,4,5]` is valid but unmillerable JSON: the list elements are not records.  So Miller can't (and won't) handle arbitrary JSON -- because Miller only handles tabular data which can be expressed in a variety of formats.
 
 A third tradeoff is doing build-from-scratch in a low-level language. It'd be quicker to write (but slower to run) if written in a high-level language. If Miller were written in Python, it would be implemented in significantly fewer lines of code than its current Go implementation. The DSL would just be an `eval` of Python code. And it would run slower, but maybe not enough slower to be a problem for most folks. Later I found out about the [rows](https://github.com/turicas/rows) tool -- if you find Miller useful, you should check out `rows` as well.
 
diff --git a/man/manpage.txt b/man/manpage.txt
index a7aec87a7..4262cc6c7 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -967,6 +967,7 @@ MILLER(1)                                                            MILLER(1)
 
        Options:
        -f {a,b,c}    Field names for distinct count.
+       -x {a,b,c}    Field names to exclude for distinct count: use each record's others instead.
        -n            Show only the number of distinct values. Not compatible with -u.
        -o {name}     Field name for output count. Default "count".
                      Ignored with -u.
@@ -2133,6 +2134,7 @@ MILLER(1)                                                            MILLER(1)
 
        Options:
        -g {d,e,f}    Group-by-field names for uniq counts.
+       -x {a,b,c}    Field names to exclude for uniq: use each record's others instead.
        -c            Show repeat counts in addition to unique values.
        -n            Show only the number of distinct values.
        -o {name}     Field name for output count. Default "count".
@@ -3664,4 +3666,4 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-12-19                         MILLER(1)
+                                  2023-12-23                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 92224547a..04c2151e3 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2023-12-19
+.\"      Date: 2023-12-23
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2023-12-19" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-12-23" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1186,6 +1186,7 @@ Same as uniq -c.
 
 Options:
 -f {a,b,c}    Field names for distinct count.
+-x {a,b,c}    Field names to exclude for distinct count: use each record's others instead.
 -n            Show only the number of distinct values. Not compatible with -u.
 -o {name}     Field name for output count. Default "count".
               Ignored with -u.
@@ -2700,6 +2701,7 @@ count-distinct. For uniq, -f is a synonym for -g.
 
 Options:
 -g {d,e,f}    Group-by-field names for uniq counts.
+-x {a,b,c}    Field names to exclude for uniq: use each record's others instead.
 -c            Show repeat counts in addition to unique values.
 -n            Show only the number of distinct values.
 -o {name}     Field name for output count. Default "count".
diff --git a/pkg/lib/ordered_map.go b/pkg/lib/ordered_map.go
index 093c1ca84..a3d54bd50 100644
--- a/pkg/lib/ordered_map.go
+++ b/pkg/lib/ordered_map.go
@@ -111,6 +111,29 @@ func (omap *OrderedMap) GetWithCheck(key string) (interface{}, bool) {
 	}
 }
 
+func (omap *OrderedMap) GetKeys() []string {
+	keys := make([]string, omap.FieldCount)
+	i := 0
+	for pe := omap.Head; pe != nil; pe = pe.Next {
+		keys[i] = pe.Key
+		i++
+	}
+	return keys
+}
+
+// Returns an array of keys, not including the ones specified. The ones
+// specified are to be passed in as a map from string to bool, as Go
+// doesn't have hash-sets.
+func (omap *OrderedMap) GetKeysExcept(exceptions map[string]bool) []string {
+	keys := make([]string, 0)
+	for pe := omap.Head; pe != nil; pe = pe.Next {
+		if _, present := exceptions[pe.Key]; !present {
+			keys = append(keys, pe.Key)
+		}
+	}
+	return keys
+}
+
 // ----------------------------------------------------------------
 func (omap *OrderedMap) Clear() {
 	omap.FieldCount = 0
diff --git a/pkg/mlrval/mlrmap_accessors.go b/pkg/mlrval/mlrmap_accessors.go
index befb5f726..8540ac5dc 100644
--- a/pkg/mlrval/mlrmap_accessors.go
+++ b/pkg/mlrval/mlrmap_accessors.go
@@ -281,6 +281,19 @@ func (mlrmap *Mlrmap) GetKeys() []string {
 	return keys
 }
 
+// Returns an array of keys, not including the ones specified. The ones
+// specified are to be passed in as a map from string to bool, as Go
+// doesn't have hash-sets.
+func (mlrmap *Mlrmap) GetKeysExcept(exceptions map[string]bool) []string {
+	keys := make([]string, 0)
+	for pe := mlrmap.Head; pe != nil; pe = pe.Next {
+		if _, present := exceptions[pe.Key]; !present {
+			keys = append(keys, pe.Key)
+		}
+	}
+	return keys
+}
+
 // ----------------------------------------------------------------
 // TODO: put error-return into this API
 func (mlrmap *Mlrmap) PutNameWithPositionalIndex(position int64, name *Mlrval) {
diff --git a/pkg/mlrval/mlrmap_accessors_test.go b/pkg/mlrval/mlrmap_accessors_test.go
index 890ac6a9e..6bae83f89 100644
--- a/pkg/mlrval/mlrmap_accessors_test.go
+++ b/pkg/mlrval/mlrmap_accessors_test.go
@@ -47,3 +47,22 @@ func TestPutReference(t *testing.T) {
 }
 
 // TODO: TestPrependReference
+
+func TestGetKeysExcept(t *testing.T) {
+	mlrmap := NewMlrmap()
+	mlrmap.PutReference("a", FromInt(1))
+	mlrmap.PutReference("b", FromInt(2))
+
+	exceptions := make(map[string]bool)
+	exceptions["x"] = true
+	exceptions["y"] = true
+
+	assert.Equal(t, mlrmap.GetKeys(), []string{"a", "b"})
+	assert.Equal(t, mlrmap.GetKeysExcept(exceptions), []string{"a", "b"})
+
+	exceptions["a"] = true
+	assert.Equal(t, mlrmap.GetKeysExcept(exceptions), []string{"b"})
+
+	exceptions["b"] = true
+	assert.Equal(t, mlrmap.GetKeysExcept(exceptions), []string{})
+}
diff --git a/pkg/transformers/uniq.go b/pkg/transformers/uniq.go
index f28e6c854..5893b689a 100644
--- a/pkg/transformers/uniq.go
+++ b/pkg/transformers/uniq.go
@@ -43,6 +43,7 @@ func transformerCountDistinctUsage(
 	fmt.Fprintf(o, "\n")
 	fmt.Fprintf(o, "Options:\n")
 	fmt.Fprintf(o, "-f {a,b,c}    Field names for distinct count.\n")
+	fmt.Fprintf(o, "-x {a,b,c}    Field names to exclude for distinct count: use each record's others instead.\n")
 	fmt.Fprintf(o, "-n            Show only the number of distinct values. Not compatible with -u.\n")
 	fmt.Fprintf(o, "-o {name}     Field name for output count. Default \"%s\".\n", uniqDefaultOutputFieldName)
 	fmt.Fprintf(o, "              Ignored with -u.\n")
@@ -68,6 +69,7 @@ func transformerCountDistinctParseCLI(
 
 	// Parse local flags
 	var fieldNames []string = nil
+	invertFieldNames := false
 	showNumDistinctOnly := false
 	outputFieldName := uniqDefaultOutputFieldName
 	doLashed := true
@@ -89,6 +91,10 @@ func transformerCountDistinctParseCLI(
 		} else if opt == "-g" || opt == "-f" {
 			fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
 
+		} else if opt == "-x" {
+			fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
+			invertFieldNames = true
+
 		} else if opt == "-n" {
 			showNumDistinctOnly = true
 
@@ -123,6 +129,7 @@ func transformerCountDistinctParseCLI(
 
 	transformer, err := NewTransformerUniq(
 		fieldNames,
+		invertFieldNames,
 		showCounts,
 		showNumDistinctOnly,
 		outputFieldName,
@@ -149,6 +156,7 @@ func transformerUniqUsage(
 	fmt.Fprintf(o, "\n")
 	fmt.Fprintf(o, "Options:\n")
 	fmt.Fprintf(o, "-g {d,e,f}    Group-by-field names for uniq counts.\n")
+	fmt.Fprintf(o, "-x {a,b,c}    Field names to exclude for uniq: use each record's others instead.\n")
 	fmt.Fprintf(o, "-c            Show repeat counts in addition to unique values.\n")
 	fmt.Fprintf(o, "-n            Show only the number of distinct values.\n")
 	fmt.Fprintf(o, "-o {name}     Field name for output count. Default \"%s\".\n", uniqDefaultOutputFieldName)
@@ -173,6 +181,7 @@ func transformerUniqParseCLI(
 
 	// Parse local flags
 	var fieldNames []string = nil
+	invertFieldNames := false
 	showCounts := false
 	showNumDistinctOnly := false
 	outputFieldName := uniqDefaultOutputFieldName
@@ -195,6 +204,10 @@ func transformerUniqParseCLI(
 		} else if opt == "-g" || opt == "-f" {
 			fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
 
+		} else if opt == "-x" {
+			fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
+			invertFieldNames = true
+
 		} else if opt == "-c" {
 			showCounts = true
 
@@ -238,6 +251,7 @@ func transformerUniqParseCLI(
 
 	transformer, _ := NewTransformerUniq(
 		fieldNames,
+		invertFieldNames,
 		showCounts,
 		showNumDistinctOnly,
 		outputFieldName,
@@ -250,9 +264,11 @@ func transformerUniqParseCLI(
 
 // ----------------------------------------------------------------
 type TransformerUniq struct {
-	fieldNames      []string
-	showCounts      bool
-	outputFieldName string
+	fieldNames       []string
+	fieldNamesSet    map[string]bool
+	invertFieldNames bool
+	showCounts       bool
+	outputFieldName  string
 
 	// Example:
 	// Input is:
@@ -280,6 +296,7 @@ type TransformerUniq struct {
 	//   "a" => "4" => 4
 	uniqifiedRecordCounts *lib.OrderedMap // record-as-string -> counts
 	uniqifiedRecords      *lib.OrderedMap // record-as-string -> records
+	keysByGroup           *lib.OrderedMap // XXX COMMENT ME
 	countsByGroup         *lib.OrderedMap // grouping key -> count
 	valuesByGroup         *lib.OrderedMap // grouping key -> array of values
 	unlashedCounts        *lib.OrderedMap // field name -> string field value -> count
@@ -291,6 +308,7 @@ type TransformerUniq struct {
 // ----------------------------------------------------------------
 func NewTransformerUniq(
 	fieldNames []string,
+	invertFieldNames bool,
 	showCounts bool,
 	showNumDistinctOnly bool,
 	outputFieldName string,
@@ -299,12 +317,15 @@ func NewTransformerUniq(
 ) (*TransformerUniq, error) {
 
 	tr := &TransformerUniq{
-		fieldNames:      fieldNames,
-		showCounts:      showCounts,
-		outputFieldName: outputFieldName,
+		fieldNames:       fieldNames,
+		fieldNamesSet:    lib.StringListToSet(fieldNames),
+		invertFieldNames: invertFieldNames,
+		showCounts:       showCounts,
+		outputFieldName:  outputFieldName,
 
 		uniqifiedRecordCounts: lib.NewOrderedMap(),
 		uniqifiedRecords:      lib.NewOrderedMap(),
+		keysByGroup:           lib.NewOrderedMap(),
 		countsByGroup:         lib.NewOrderedMap(),
 		valuesByGroup:         lib.NewOrderedMap(),
 		unlashedCounts:        lib.NewOrderedMap(),
@@ -334,6 +355,16 @@ func NewTransformerUniq(
 
 // ----------------------------------------------------------------
 
+func (tr *TransformerUniq) getFieldNamesForGrouping(
+	inrec *mlrval.Mlrmap,
+) []string {
+	if tr.invertFieldNames {
+		return inrec.GetKeysExcept(tr.fieldNamesSet)
+	} else {
+		return tr.fieldNames
+	}
+}
+
 func (tr *TransformerUniq) Transform(
 	inrecAndContext *types.RecordAndContext,
 	outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
@@ -441,7 +472,7 @@ func (tr *TransformerUniq) transformUnlashed(
 	if !inrecAndContext.EndOfStream {
 		inrec := inrecAndContext.Record
 
-		for _, fieldName := range tr.fieldNames {
+		for _, fieldName := range tr.getFieldNamesForGrouping(inrec) {
 			var countsForFieldName *lib.OrderedMap = nil
 			iCountsForFieldName, present := tr.unlashedCounts.GetWithCheck(fieldName)
 			if !present {
@@ -496,7 +527,7 @@ func (tr *TransformerUniq) transformNumDistinctOnly(
 	if !inrecAndContext.EndOfStream {
 		inrec := inrecAndContext.Record
 
-		groupingKey, ok := inrec.GetSelectedValuesJoined(tr.fieldNames)
+		groupingKey, ok := inrec.GetSelectedValuesJoined(tr.getFieldNamesForGrouping(inrec))
 		if ok {
 			iCount, present := tr.countsByGroup.GetWithCheck(groupingKey)
 			if !present {
@@ -528,28 +559,33 @@ func (tr *TransformerUniq) transformWithCounts(
 	if !inrecAndContext.EndOfStream {
 		inrec := inrecAndContext.Record
 
-		groupingKey, selectedValues, ok := inrec.GetSelectedValuesAndJoined(tr.fieldNames)
+		fieldNamesForGrouping := tr.getFieldNamesForGrouping(inrec)
+
+		groupingKey, selectedValues, ok := inrec.GetSelectedValuesAndJoined(fieldNamesForGrouping)
 		if ok {
 			iCount, present := tr.countsByGroup.GetWithCheck(groupingKey)
 			if !present {
 				tr.countsByGroup.Put(groupingKey, int64(1))
 				tr.valuesByGroup.Put(groupingKey, selectedValues)
+				tr.keysByGroup.Put(groupingKey, fieldNamesForGrouping)
 			} else {
 				tr.countsByGroup.Put(groupingKey, iCount.(int64)+1)
 			}
 		}
 
 	} else { // end of record stream
-
 		for pa := tr.countsByGroup.Head; pa != nil; pa = pa.Next {
 			outrec := mlrval.NewMlrmapAsRecord()
 			valuesForGroup := tr.valuesByGroup.Get(pa.Key).([]*mlrval.Mlrval)
-			for i, fieldName := range tr.fieldNames {
+			keysForGroup := tr.keysByGroup.Get(pa.Key).([]string)
+
+			for i, fieldNameForGrouping := range keysForGroup {
 				outrec.PutCopy(
-					fieldName,
+					fieldNameForGrouping,
 					valuesForGroup[i],
 				)
 			}
+
 			if tr.showCounts {
 				outrec.PutReference(
 					tr.outputFieldName,
@@ -573,7 +609,7 @@ func (tr *TransformerUniq) transformWithoutCounts(
 	if !inrecAndContext.EndOfStream {
 		inrec := inrecAndContext.Record
 
-		groupingKey, selectedValues, ok := inrec.GetSelectedValuesAndJoined(tr.fieldNames)
+		groupingKey, selectedValues, ok := inrec.GetSelectedValuesAndJoined(tr.getFieldNamesForGrouping(inrec))
 		if !ok {
 			return
 		}
@@ -584,9 +620,9 @@ func (tr *TransformerUniq) transformWithoutCounts(
 			tr.valuesByGroup.Put(groupingKey, selectedValues)
 			outrec := mlrval.NewMlrmapAsRecord()
 
-			for i, fieldName := range tr.fieldNames {
+			for i, fieldNameForGrouping := range tr.getFieldNamesForGrouping(inrec) {
 				outrec.PutCopy(
-					fieldName,
+					fieldNameForGrouping,
 					selectedValues[i],
 				)
 			}
diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout
index bdb23ad6c..6dfaf2b80 100644
--- a/test/cases/cli-help/0001/expout
+++ b/test/cases/cli-help/0001/expout
@@ -96,6 +96,7 @@ Same as uniq -c.
 
 Options:
 -f {a,b,c}    Field names for distinct count.
+-x {a,b,c}    Field names to exclude for distinct count: use each record's others instead.
 -n            Show only the number of distinct values. Not compatible with -u.
 -o {name}     Field name for output count. Default "count".
               Ignored with -u.
@@ -1320,6 +1321,7 @@ count-distinct. For uniq, -f is a synonym for -g.
 
 Options:
 -g {d,e,f}    Group-by-field names for uniq counts.
+-x {a,b,c}    Field names to exclude for uniq: use each record's others instead.
 -c            Show repeat counts in addition to unique values.
 -n            Show only the number of distinct values.
 -o {name}     Field name for output count. Default "count".
diff --git a/test/cases/verb-uniq/uniq-c-x-change/cmd b/test/cases/verb-uniq/uniq-c-x-change/cmd
new file mode 100644
index 000000000..2f3418461
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-c-x-change/cmd
@@ -0,0 +1 @@
+mlr --dkvp uniq -c -x flag,k,index,quantity,rate test/input/example-with-changed-keys.dkvp
diff --git a/test/cases/verb-uniq/uniq-c-x-change/experr b/test/cases/verb-uniq/uniq-c-x-change/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-uniq/uniq-c-x-change/expout b/test/cases/verb-uniq/uniq-c-x-change/expout
new file mode 100644
index 000000000..a61ce84e8
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-c-x-change/expout
@@ -0,0 +1,7 @@
+color=yellow,shape=triangle,count=1
+color=red,shape=square,count=2
+weird=red,shape=circle,count=1
+color=purple,shape=triangle,count=2
+color=red,shape=square,odd=77.19910000,count=1
+color=yellow,shape=circle,count=2
+color=purple,shape=square,count=1
diff --git a/test/cases/verb-uniq/uniq-c-x-het/cmd b/test/cases/verb-uniq/uniq-c-x-het/cmd
new file mode 100644
index 000000000..051906fe1
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-c-x-het/cmd
@@ -0,0 +1 @@
+mlr --dkvp uniq -c -x flag,k,index,quantity,rate test/input/example.dkvp
diff --git a/test/cases/verb-uniq/uniq-c-x-het/experr b/test/cases/verb-uniq/uniq-c-x-het/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-uniq/uniq-c-x-het/expout b/test/cases/verb-uniq/uniq-c-x-het/expout
new file mode 100644
index 000000000..5392f140e
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-c-x-het/expout
@@ -0,0 +1,6 @@
+color=yellow,shape=triangle,count=1
+color=red,shape=square,count=3
+color=red,shape=circle,count=1
+color=purple,shape=triangle,count=2
+color=yellow,shape=circle,count=2
+color=purple,shape=square,count=1
diff --git a/test/cases/verb-uniq/uniq-c-x-long/cmd b/test/cases/verb-uniq/uniq-c-x-long/cmd
new file mode 100644
index 000000000..38fe9e5c3
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-c-x-long/cmd
@@ -0,0 +1 @@
+mlr --dkvp uniq -c -x flag,k,index,quantity,rate test/input/example-with-extra-keys.dkvp
diff --git a/test/cases/verb-uniq/uniq-c-x-long/experr b/test/cases/verb-uniq/uniq-c-x-long/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-uniq/uniq-c-x-long/expout b/test/cases/verb-uniq/uniq-c-x-long/expout
new file mode 100644
index 000000000..d77e08b27
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-c-x-long/expout
@@ -0,0 +1,7 @@
+color=yellow,shape=triangle,count=1
+color=red,shape=square,count=3
+color=red,shape=circle,count=1
+color=purple,shape=triangle,extra=here,count=1
+color=purple,shape=triangle,count=1
+color=yellow,shape=circle,count=2
+color=purple,shape=square,count=1
diff --git a/test/cases/verb-uniq/uniq-c-x-short/cmd b/test/cases/verb-uniq/uniq-c-x-short/cmd
new file mode 100644
index 000000000..9561cc361
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-c-x-short/cmd
@@ -0,0 +1 @@
+mlr --dkvp uniq -c -x flag,k,index,quantity,rate test/input/example-with-missing-keys.dkvp
diff --git a/test/cases/verb-uniq/uniq-c-x-short/experr b/test/cases/verb-uniq/uniq-c-x-short/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-uniq/uniq-c-x-short/expout b/test/cases/verb-uniq/uniq-c-x-short/expout
new file mode 100644
index 000000000..7e7269533
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-c-x-short/expout
@@ -0,0 +1,6 @@
+color=yellow,shape=triangle,count=1
+color=red,shape=square,count=3
+shape=circle,count=1
+color=purple,shape=triangle,count=2
+color=yellow,shape=circle,count=2
+color=purple,shape=square,count=1
diff --git a/test/cases/verb-uniq/uniq-x-change/cmd b/test/cases/verb-uniq/uniq-x-change/cmd
new file mode 100644
index 000000000..43006f390
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-x-change/cmd
@@ -0,0 +1 @@
+mlr --dkvp uniq -x flag,k,index,quantity,rate test/input/example-with-changed-keys.dkvp
diff --git a/test/cases/verb-uniq/uniq-x-change/experr b/test/cases/verb-uniq/uniq-x-change/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-uniq/uniq-x-change/expout b/test/cases/verb-uniq/uniq-x-change/expout
new file mode 100644
index 000000000..67f9598af
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-x-change/expout
@@ -0,0 +1,7 @@
+color=yellow,shape=triangle
+color=red,shape=square
+weird=red,shape=circle
+color=purple,shape=triangle
+color=red,shape=square,odd=77.19910000
+color=yellow,shape=circle
+color=purple,shape=square
diff --git a/test/cases/verb-uniq/uniq-x-het/cmd b/test/cases/verb-uniq/uniq-x-het/cmd
new file mode 100644
index 000000000..326412e62
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-x-het/cmd
@@ -0,0 +1 @@
+mlr --dkvp uniq -x flag,k,index,quantity,rate test/input/example.dkvp
diff --git a/test/cases/verb-uniq/uniq-x-het/experr b/test/cases/verb-uniq/uniq-x-het/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-uniq/uniq-x-het/expout b/test/cases/verb-uniq/uniq-x-het/expout
new file mode 100644
index 000000000..ddc9002b1
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-x-het/expout
@@ -0,0 +1,6 @@
+color=yellow,shape=triangle
+color=red,shape=square
+color=red,shape=circle
+color=purple,shape=triangle
+color=yellow,shape=circle
+color=purple,shape=square
diff --git a/test/cases/verb-uniq/uniq-x-long/cmd b/test/cases/verb-uniq/uniq-x-long/cmd
new file mode 100644
index 000000000..bcdfe98e0
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-x-long/cmd
@@ -0,0 +1 @@
+mlr --dkvp uniq -x flag,k,index,quantity,rate test/input/example-with-extra-keys.dkvp
diff --git a/test/cases/verb-uniq/uniq-x-long/experr b/test/cases/verb-uniq/uniq-x-long/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-uniq/uniq-x-long/expout b/test/cases/verb-uniq/uniq-x-long/expout
new file mode 100644
index 000000000..d5b3f26eb
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-x-long/expout
@@ -0,0 +1,7 @@
+color=yellow,shape=triangle
+color=red,shape=square
+color=red,shape=circle
+color=purple,shape=triangle,extra=here
+color=purple,shape=triangle
+color=yellow,shape=circle
+color=purple,shape=square
diff --git a/test/cases/verb-uniq/uniq-x-short/cmd b/test/cases/verb-uniq/uniq-x-short/cmd
new file mode 100644
index 000000000..5c2f73021
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-x-short/cmd
@@ -0,0 +1 @@
+mlr --dkvp uniq -x flag,k,index,quantity,rate test/input/example-with-missing-keys.dkvp
diff --git a/test/cases/verb-uniq/uniq-x-short/experr b/test/cases/verb-uniq/uniq-x-short/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-uniq/uniq-x-short/expout b/test/cases/verb-uniq/uniq-x-short/expout
new file mode 100644
index 000000000..b566d5536
--- /dev/null
+++ b/test/cases/verb-uniq/uniq-x-short/expout
@@ -0,0 +1,6 @@
+color=yellow,shape=triangle
+color=red,shape=square
+shape=circle
+color=purple,shape=triangle
+color=yellow,shape=circle
+color=purple,shape=square
diff --git a/test/input/example-with-changed-keys.dkvp b/test/input/example-with-changed-keys.dkvp
new file mode 100644
index 000000000..4ec2ac863
--- /dev/null
+++ b/test/input/example-with-changed-keys.dkvp
@@ -0,0 +1,10 @@
+color=yellow,shape=triangle,flag=true,k=1,index=11,quantity=43.6498,rate=9.8870
+color=red,shape=square,flag=true,k=2,index=15,quantity=79.2778,rate=0.0130
+weird=red,shape=circle,flag=true,k=3,index=16,quantity=13.8103,rate=2.9010
+color=red,shape=square,flag=false,k=4,index=48,quantity=77.5542,rate=7.4670
+color=purple,shape=triangle,flag=false,k=5,index=51,quantity=81.2290,rate=8.5910
+color=red,shape=square,flag=false,k=6,index=64,odd=77.1991,rate=9.5310
+color=purple,shape=triangle,flag=false,k=7,index=65,quantity=80.1405,rate=5.8240
+color=yellow,shape=circle,flag=true,k=8,index=73,quantity=63.9785,rate=4.2370
+color=yellow,shape=circle,flag=true,k=9,index=87,quantity=63.5058,rate=8.3350
+color=purple,shape=square,flag=false,k=10,index=91,quantity=72.3735,rate=8.2430
diff --git a/test/input/example-with-extra-keys.dkvp b/test/input/example-with-extra-keys.dkvp
new file mode 100644
index 000000000..54ecf74e2
--- /dev/null
+++ b/test/input/example-with-extra-keys.dkvp
@@ -0,0 +1,10 @@
+color=yellow,shape=triangle,flag=true,k=1,index=11,quantity=43.6498,rate=9.8870
+color=red,shape=square,flag=true,k=2,index=15,quantity=79.2778,rate=0.0130
+color=red,shape=circle,flag=true,k=3,index=16,quantity=13.8103,rate=2.9010
+color=red,shape=square,flag=false,k=4,index=48,quantity=77.5542,rate=7.4670
+color=purple,shape=triangle,flag=false,k=5,index=51,quantity=81.2290,rate=8.5910,extra=here
+color=red,shape=square,flag=false,k=6,index=64,quantity=77.1991,rate=9.5310
+color=purple,shape=triangle,flag=false,k=7,index=65,quantity=80.1405,rate=5.8240
+color=yellow,shape=circle,flag=true,k=8,index=73,quantity=63.9785,rate=4.2370
+color=yellow,shape=circle,flag=true,k=9,index=87,quantity=63.5058,rate=8.3350
+color=purple,shape=square,flag=false,k=10,index=91,quantity=72.3735,rate=8.2430
diff --git a/test/input/example-with-missing-keys.dkvp b/test/input/example-with-missing-keys.dkvp
new file mode 100644
index 000000000..ae8632ec0
--- /dev/null
+++ b/test/input/example-with-missing-keys.dkvp
@@ -0,0 +1,10 @@
+color=yellow,shape=triangle,flag=true,k=1,index=11,quantity=43.6498,rate=9.8870
+color=red,shape=square,flag=true,k=2,index=15,quantity=79.2778,rate=0.0130
+shape=circle,flag=true,k=3,index=16,quantity=13.8103,rate=2.9010
+color=red,shape=square,flag=false,k=4,index=48,quantity=77.5542,rate=7.4670
+color=purple,shape=triangle,flag=false,index=51,quantity=81.2290,rate=8.5910
+color=red,shape=square,flag=false,k=6,index=64,quantity=77.1991,rate=9.5310
+color=purple,shape=triangle,flag=false,k=7,index=65,quantity=80.1405,rate=5.8240
+color=yellow,shape=circle,flag=true,k=8,index=73,quantity=63.9785,rate=4.2370
+color=yellow,shape=circle,flag=true,k=9,index=87,quantity=63.5058,rate=8.3350
+color=purple,shape=square,flag=false,k=10,index=91,quantity=72.3735,rate=8.2430
diff --git a/test/input/example.dkvp b/test/input/example.dkvp
new file mode 100644
index 000000000..73bc10242
--- /dev/null
+++ b/test/input/example.dkvp
@@ -0,0 +1,10 @@
+color=yellow,shape=triangle,flag=true,k=1,index=11,quantity=43.6498,rate=9.8870
+color=red,shape=square,flag=true,k=2,index=15,quantity=79.2778,rate=0.0130
+color=red,shape=circle,flag=true,k=3,index=16,quantity=13.8103,rate=2.9010
+color=red,shape=square,flag=false,k=4,index=48,quantity=77.5542,rate=7.4670
+color=purple,shape=triangle,flag=false,k=5,index=51,quantity=81.2290,rate=8.5910
+color=red,shape=square,flag=false,k=6,index=64,quantity=77.1991,rate=9.5310
+color=purple,shape=triangle,flag=false,k=7,index=65,quantity=80.1405,rate=5.8240
+color=yellow,shape=circle,flag=true,k=8,index=73,quantity=63.9785,rate=4.2370
+color=yellow,shape=circle,flag=true,k=9,index=87,quantity=63.5058,rate=8.3350
+color=purple,shape=square,flag=false,k=10,index=91,quantity=72.3735,rate=8.2430