From 6ea94a002b5f2dc2fbe06b24db0bc8caef6a145c Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 4 Mar 2023 01:08:19 -0500 Subject: [PATCH] Add new `upcase` and `downcase` verbs (#1217) * upcase/downcase verbs * unit-test cases * doc files * remaining `make dev` artifacts: manpage --- docs/src/manpage.md | 31 ++- docs/src/manpage.txt | 31 ++- docs/src/reference-verbs.md | 27 +++ docs/src/reference-verbs.md.in | 11 ++ .../pkg/transformers/aaa_transformer_table.go | 2 + internal/pkg/transformers/downcase.go | 178 ++++++++++++++++++ internal/pkg/transformers/upcase.go | 178 ++++++++++++++++++ man/manpage.txt | 31 ++- man/mlr.1 | 43 ++++- test/cases/cli-help/0001/expout | 18 ++ test/cases/verb-downcase/0001/cmd | 1 + test/cases/verb-downcase/0001/experr | 0 test/cases/verb-downcase/0001/expout | 10 + test/cases/verb-downcase/0002/cmd | 1 + test/cases/verb-downcase/0002/experr | 0 test/cases/verb-downcase/0002/expout | 10 + test/cases/verb-downcase/0003/cmd | 1 + test/cases/verb-downcase/0003/experr | 0 test/cases/verb-downcase/0003/expout | 10 + test/cases/verb-downcase/0004/cmd | 1 + test/cases/verb-downcase/0004/experr | 0 test/cases/verb-downcase/0004/expout | 10 + test/cases/verb-downcase/0005/cmd | 1 + test/cases/verb-downcase/0005/experr | 0 test/cases/verb-downcase/0005/expout | 10 + test/cases/verb-downcase/0006/cmd | 1 + test/cases/verb-downcase/0006/experr | 0 test/cases/verb-downcase/0006/expout | 10 + test/cases/verb-upcase/0001/cmd | 1 + test/cases/verb-upcase/0001/experr | 0 test/cases/verb-upcase/0001/expout | 10 + test/cases/verb-upcase/0002/cmd | 1 + test/cases/verb-upcase/0002/experr | 0 test/cases/verb-upcase/0002/expout | 10 + test/cases/verb-upcase/0003/cmd | 1 + test/cases/verb-upcase/0003/experr | 0 test/cases/verb-upcase/0003/expout | 10 + test/cases/verb-upcase/0004/cmd | 1 + test/cases/verb-upcase/0004/experr | 0 test/cases/verb-upcase/0004/expout | 10 + test/cases/verb-upcase/0005/cmd | 1 + test/cases/verb-upcase/0005/experr | 0 test/cases/verb-upcase/0005/expout | 10 + test/cases/verb-upcase/0006/cmd | 1 + test/cases/verb-upcase/0006/experr | 0 test/cases/verb-upcase/0006/expout | 10 + test/input/abixy-upper | 10 + 47 files changed, 664 insertions(+), 28 deletions(-) create mode 100644 internal/pkg/transformers/downcase.go create mode 100644 internal/pkg/transformers/upcase.go create mode 100644 test/cases/verb-downcase/0001/cmd create mode 100644 test/cases/verb-downcase/0001/experr create mode 100644 test/cases/verb-downcase/0001/expout create mode 100644 test/cases/verb-downcase/0002/cmd create mode 100644 test/cases/verb-downcase/0002/experr create mode 100644 test/cases/verb-downcase/0002/expout create mode 100644 test/cases/verb-downcase/0003/cmd create mode 100644 test/cases/verb-downcase/0003/experr create mode 100644 test/cases/verb-downcase/0003/expout create mode 100644 test/cases/verb-downcase/0004/cmd create mode 100644 test/cases/verb-downcase/0004/experr create mode 100644 test/cases/verb-downcase/0004/expout create mode 100644 test/cases/verb-downcase/0005/cmd create mode 100644 test/cases/verb-downcase/0005/experr create mode 100644 test/cases/verb-downcase/0005/expout create mode 100644 test/cases/verb-downcase/0006/cmd create mode 100644 test/cases/verb-downcase/0006/experr create mode 100644 test/cases/verb-downcase/0006/expout create mode 100644 test/cases/verb-upcase/0001/cmd create mode 100644 test/cases/verb-upcase/0001/experr create mode 100644 test/cases/verb-upcase/0001/expout create mode 100644 test/cases/verb-upcase/0002/cmd create mode 100644 test/cases/verb-upcase/0002/experr create mode 100644 test/cases/verb-upcase/0002/expout create mode 100644 test/cases/verb-upcase/0003/cmd create mode 100644 test/cases/verb-upcase/0003/experr create mode 100644 test/cases/verb-upcase/0003/expout create mode 100644 test/cases/verb-upcase/0004/cmd create mode 100644 test/cases/verb-upcase/0004/experr create mode 100644 test/cases/verb-upcase/0004/expout create mode 100644 test/cases/verb-upcase/0005/cmd create mode 100644 test/cases/verb-upcase/0005/experr create mode 100644 test/cases/verb-upcase/0005/expout create mode 100644 test/cases/verb-upcase/0006/cmd create mode 100644 test/cases/verb-upcase/0006/experr create mode 100644 test/cases/verb-upcase/0006/expout create mode 100644 test/input/abixy-upper diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 1e31d4269..deef1345a 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -191,13 +191,14 @@ MILLER(1) MILLER(1) 1mVERB LIST0m altkv bar bootstrap cat check clean-whitespace count-distinct count - count-similar cut decimate fill-down fill-empty filter flatten format-values - fraction gap grep group-by group-like having-fields head histogram json-parse - json-stringify join label latin1-to-utf8 least-frequent merge-fields - most-frequent nest nothing put regularize remove-empty-columns rename reorder - repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records - sort sort-within-records split stats1 stats2 step summary tac tail tee - template top utf8-to-latin1 unflatten uniq unspace unsparsify + count-similar cut decimate downcase fill-down fill-empty filter flatten + format-values fraction gap grep group-by group-like having-fields head + histogram json-parse json-stringify join label latin1-to-utf8 least-frequent + merge-fields most-frequent nest nothing put regularize remove-empty-columns + rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle + skip-trivial-records sort sort-within-records split stats1 stats2 step summary + tac tail tee template top utf8-to-latin1 unflatten uniq unspace unsparsify + upcase 1mFUNCTION LIST0m abs acos acosh any append apply arrayify asin asinh asserting_absent @@ -1002,6 +1003,14 @@ MILLER(1) MILLER(1) -n {n} Decimation factor (default 10). -h|--help Show this message. + 1mdowncase0m + Usage: mlr downcase [options] + Lowercases strings in record keys and/or values. + Options: + -k Downcase only keys, not keys and values. + -v Downcase only values, not keys and values. + -h|--help Show this message. + 1mfill-down0m Usage: mlr fill-down [options] If a given record has a missing value for a given field, fill that from @@ -2111,6 +2120,14 @@ MILLER(1) MILLER(1) being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and 'a=,b=3,c=4'. + 1mupcase0m + Usage: mlr upcase [options] + Uppercases strings in record keys and/or values. + Options: + -k Upcase only keys, not keys and values. + -v Upcase only values, not keys and values. + -h|--help Show this message. + 1mFUNCTIONS FOR FILTER/PUT0m 1mabs0m (class=math #args=1) Absolute value. diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 9651b66a1..36c4362e6 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -170,13 +170,14 @@ MILLER(1) MILLER(1) 1mVERB LIST0m altkv bar bootstrap cat check clean-whitespace count-distinct count - count-similar cut decimate fill-down fill-empty filter flatten format-values - fraction gap grep group-by group-like having-fields head histogram json-parse - json-stringify join label latin1-to-utf8 least-frequent merge-fields - most-frequent nest nothing put regularize remove-empty-columns rename reorder - repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records - sort sort-within-records split stats1 stats2 step summary tac tail tee - template top utf8-to-latin1 unflatten uniq unspace unsparsify + count-similar cut decimate downcase fill-down fill-empty filter flatten + format-values fraction gap grep group-by group-like having-fields head + histogram json-parse json-stringify join label latin1-to-utf8 least-frequent + merge-fields most-frequent nest nothing put regularize remove-empty-columns + rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle + skip-trivial-records sort sort-within-records split stats1 stats2 step summary + tac tail tee template top utf8-to-latin1 unflatten uniq unspace unsparsify + upcase 1mFUNCTION LIST0m abs acos acosh any append apply arrayify asin asinh asserting_absent @@ -981,6 +982,14 @@ MILLER(1) MILLER(1) -n {n} Decimation factor (default 10). -h|--help Show this message. + 1mdowncase0m + Usage: mlr downcase [options] + Lowercases strings in record keys and/or values. + Options: + -k Downcase only keys, not keys and values. + -v Downcase only values, not keys and values. + -h|--help Show this message. + 1mfill-down0m Usage: mlr fill-down [options] If a given record has a missing value for a given field, fill that from @@ -2090,6 +2099,14 @@ MILLER(1) MILLER(1) being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and 'a=,b=3,c=4'. + 1mupcase0m + Usage: mlr upcase [options] + Uppercases strings in record keys and/or values. + Options: + -k Upcase only keys, not keys and values. + -v Upcase only values, not keys and values. + -h|--help Show this message. + 1mFUNCTIONS FOR FILTER/PUT0m 1mabs0m (class=math #args=1) Absolute value. diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 853adfc33..6d4cf6731 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -824,6 +824,20 @@ Options: -h|--help Show this message. +## downcase + +
+mlr downcase --help
+
+
+Usage: mlr downcase [options]
+Lowercases strings in record keys and/or values.
+Options:
+-k        Downcase only keys, not keys and values.
+-v        Downcase only values, not keys and values.
+-h|--help Show this message.
+
+ ## fill-down
@@ -4271,3 +4285,16 @@ a b v u w x
 - - 1 - 2 -
 
+## upcase + +
+mlr upcase --help
+
+
+Usage: mlr upcase [options]
+Uppercases strings in record keys and/or values.
+Options:
+-k        Upcase only keys, not keys and values.
+-v        Upcase only values, not keys and values.
+-h|--help Show this message.
+
diff --git a/docs/src/reference-verbs.md.in b/docs/src/reference-verbs.md.in index 40fbbe4f1..994c97155 100644 --- a/docs/src/reference-verbs.md.in +++ b/docs/src/reference-verbs.md.in @@ -297,6 +297,12 @@ GENMD-RUN-COMMAND mlr decimate --help GENMD-EOF +## downcase + +GENMD-RUN-COMMAND +mlr downcase --help +GENMD-EOF + ## fill-down GENMD-RUN-COMMAND @@ -1289,3 +1295,8 @@ GENMD-RUN-COMMAND mlr --ijson --opprint unsparsify -f a,b,u,v,w,x then regularize data/sparse.json GENMD-EOF +## upcase + +GENMD-RUN-COMMAND +mlr upcase --help +GENMD-EOF diff --git a/internal/pkg/transformers/aaa_transformer_table.go b/internal/pkg/transformers/aaa_transformer_table.go index 50f8d0a63..995cf54a4 100644 --- a/internal/pkg/transformers/aaa_transformer_table.go +++ b/internal/pkg/transformers/aaa_transformer_table.go @@ -22,6 +22,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{ CountSimilarSetup, CutSetup, DecimateSetup, + DowncaseSetup, FillDownSetup, FillEmptySetup, FilterSetup, @@ -75,6 +76,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{ UniqSetup, UnspaceSetup, UnsparsifySetup, + UpcaseSetup, } func ShowHelpForTransformer(verb string) bool { diff --git a/internal/pkg/transformers/downcase.go b/internal/pkg/transformers/downcase.go new file mode 100644 index 000000000..de7879343 --- /dev/null +++ b/internal/pkg/transformers/downcase.go @@ -0,0 +1,178 @@ +package transformers + +import ( + "container/list" + "fmt" + "os" + "strings" + + "github.com/johnkerl/miller/internal/pkg/cli" + "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/internal/pkg/types" +) + +// ---------------------------------------------------------------- +const verbNameDowncase = "downcase" + +var DowncaseSetup = TransformerSetup{ + Verb: verbNameDowncase, + UsageFunc: transformerDowncaseUsage, + ParseCLIFunc: transformerDowncaseParseCLI, + IgnoresInput: false, +} + +func transformerDowncaseUsage( + o *os.File, +) { + fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameDowncase) + fmt.Fprintf(o, "Lowercases strings in record keys and/or values.\n") + fmt.Fprintf(o, "Options:\n") + fmt.Fprintf(o, "-k Downcase only keys, not keys and values.\n") + fmt.Fprintf(o, "-v Downcase only values, not keys and values.\n") + fmt.Fprintf(o, "-h|--help Show this message.\n") +} + +func transformerDowncaseParseCLI( + pargi *int, + argc int, + args []string, + _ *cli.TOptions, + doConstruct bool, // false for first pass of CLI-parse, true for second pass +) IRecordTransformer { + + // Skip the verb name from the current spot in the mlr command line + argi := *pargi + argi++ + + which := "keys_and_values" + + for argi < argc /* variable increment: 1 or 2 depending on flag */ { + opt := args[argi] + if !strings.HasPrefix(opt, "-") { + break // No more flag options to process + } + if args[argi] == "--" { + break // All transformers must do this so main-flags can follow verb-flags + } + argi++ + + if opt == "-h" || opt == "--help" { + transformerDowncaseUsage(os.Stdout) + os.Exit(0) + + } else if opt == "-k" { + which = "keys_only" + + } else if opt == "-v" { + which = "values_only" + + } else { + transformerDowncaseUsage(os.Stderr) + os.Exit(1) + } + } + + *pargi = argi + if !doConstruct { // All transformers must do this for main command-line parsing + return nil + } + + transformer, err := NewTransformerDowncase(which) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + return transformer +} + +// ---------------------------------------------------------------- +type TransformerDowncase struct { + recordTransformerFunc RecordTransformerFunc +} + +func NewTransformerDowncase( + which string, +) (*TransformerDowncase, error) { + tr := &TransformerDowncase{} + if which == "keys_only" { + tr.recordTransformerFunc = tr.transformKeysOnly + } else if which == "values_only" { + tr.recordTransformerFunc = tr.transformValuesOnly + } else { + tr.recordTransformerFunc = tr.transformKeysAndValues + } + return tr, nil +} + +func (tr *TransformerDowncase) Transform( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + inputDownstreamDoneChannel <-chan bool, + outputDownstreamDoneChannel chan<- bool, +) { + HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) + if !inrecAndContext.EndOfStream { + tr.recordTransformerFunc( + inrecAndContext, + outputRecordsAndContexts, + inputDownstreamDoneChannel, + outputDownstreamDoneChannel, + ) + } else { // end of record stream + outputRecordsAndContexts.PushBack(inrecAndContext) + } +} + +func (tr *TransformerDowncase) transformKeysOnly( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + _ <-chan bool, + __ chan<- bool, +) { + inrec := inrecAndContext.Record + newrec := mlrval.NewMlrmapAsRecord() + for pe := inrec.Head; pe != nil; pe = pe.Next { + newkey := strings.ToLower(pe.Key) + // Reference not copy since this is ownership transfer of the value from the now-abandoned inrec + newrec.PutReference(newkey, pe.Value) + } + outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context)) +} + +func (tr *TransformerDowncase) transformValuesOnly( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + _ <-chan bool, + __ chan<- bool, +) { + inrec := inrecAndContext.Record + for pe := inrec.Head; pe != nil; pe = pe.Next { + stringval, ok := pe.Value.GetStringValue() + if ok { + pe.Value = mlrval.FromString(strings.ToLower(stringval)) + } + } + outputRecordsAndContexts.PushBack(types.NewRecordAndContext(inrec, &inrecAndContext.Context)) +} + +func (tr *TransformerDowncase) transformKeysAndValues( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + _ <-chan bool, + __ chan<- bool, +) { + inrec := inrecAndContext.Record + newrec := mlrval.NewMlrmapAsRecord() + for pe := inrec.Head; pe != nil; pe = pe.Next { + newkey := strings.ToLower(pe.Key) + stringval, ok := pe.Value.GetStringValue() + if ok { + stringval = strings.ToLower(stringval) + newrec.PutReference(newkey, mlrval.FromString(stringval)) + } else { + newrec.PutReference(newkey, pe.Value) + } + } + outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context)) +} diff --git a/internal/pkg/transformers/upcase.go b/internal/pkg/transformers/upcase.go new file mode 100644 index 000000000..3610a0a46 --- /dev/null +++ b/internal/pkg/transformers/upcase.go @@ -0,0 +1,178 @@ +package transformers + +import ( + "container/list" + "fmt" + "os" + "strings" + + "github.com/johnkerl/miller/internal/pkg/cli" + "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/internal/pkg/types" +) + +// ---------------------------------------------------------------- +const verbNameUpcase = "upcase" + +var UpcaseSetup = TransformerSetup{ + Verb: verbNameUpcase, + UsageFunc: transformerUpcaseUsage, + ParseCLIFunc: transformerUpcaseParseCLI, + IgnoresInput: false, +} + +func transformerUpcaseUsage( + o *os.File, +) { + fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameUpcase) + fmt.Fprintf(o, "Uppercases strings in record keys and/or values.\n") + fmt.Fprintf(o, "Options:\n") + fmt.Fprintf(o, "-k Upcase only keys, not keys and values.\n") + fmt.Fprintf(o, "-v Upcase only values, not keys and values.\n") + fmt.Fprintf(o, "-h|--help Show this message.\n") +} + +func transformerUpcaseParseCLI( + pargi *int, + argc int, + args []string, + _ *cli.TOptions, + doConstruct bool, // false for first pass of CLI-parse, true for second pass +) IRecordTransformer { + + // Skip the verb name from the current spot in the mlr command line + argi := *pargi + argi++ + + which := "keys_and_values" + + for argi < argc /* variable increment: 1 or 2 depending on flag */ { + opt := args[argi] + if !strings.HasPrefix(opt, "-") { + break // No more flag options to process + } + if args[argi] == "--" { + break // All transformers must do this so main-flags can follow verb-flags + } + argi++ + + if opt == "-h" || opt == "--help" { + transformerUpcaseUsage(os.Stdout) + os.Exit(0) + + } else if opt == "-k" { + which = "keys_only" + + } else if opt == "-v" { + which = "values_only" + + } else { + transformerUpcaseUsage(os.Stderr) + os.Exit(1) + } + } + + *pargi = argi + if !doConstruct { // All transformers must do this for main command-line parsing + return nil + } + + transformer, err := NewTransformerUpcase(which) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + return transformer +} + +// ---------------------------------------------------------------- +type TransformerUpcase struct { + recordTransformerFunc RecordTransformerFunc +} + +func NewTransformerUpcase( + which string, +) (*TransformerUpcase, error) { + tr := &TransformerUpcase{} + if which == "keys_only" { + tr.recordTransformerFunc = tr.transformKeysOnly + } else if which == "values_only" { + tr.recordTransformerFunc = tr.transformValuesOnly + } else { + tr.recordTransformerFunc = tr.transformKeysAndValues + } + return tr, nil +} + +func (tr *TransformerUpcase) Transform( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + inputDownstreamDoneChannel <-chan bool, + outputDownstreamDoneChannel chan<- bool, +) { + HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) + if !inrecAndContext.EndOfStream { + tr.recordTransformerFunc( + inrecAndContext, + outputRecordsAndContexts, + inputDownstreamDoneChannel, + outputDownstreamDoneChannel, + ) + } else { // end of record stream + outputRecordsAndContexts.PushBack(inrecAndContext) + } +} + +func (tr *TransformerUpcase) transformKeysOnly( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + _ <-chan bool, + __ chan<- bool, +) { + inrec := inrecAndContext.Record + newrec := mlrval.NewMlrmapAsRecord() + for pe := inrec.Head; pe != nil; pe = pe.Next { + newkey := strings.ToUpper(pe.Key) + // Reference not copy since this is ownership transfer of the value from the now-abandoned inrec + newrec.PutReference(newkey, pe.Value) + } + outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context)) +} + +func (tr *TransformerUpcase) transformValuesOnly( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + _ <-chan bool, + __ chan<- bool, +) { + inrec := inrecAndContext.Record + for pe := inrec.Head; pe != nil; pe = pe.Next { + stringval, ok := pe.Value.GetStringValue() + if ok { + pe.Value = mlrval.FromString(strings.ToUpper(stringval)) + } + } + outputRecordsAndContexts.PushBack(types.NewRecordAndContext(inrec, &inrecAndContext.Context)) +} + +func (tr *TransformerUpcase) transformKeysAndValues( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + _ <-chan bool, + __ chan<- bool, +) { + inrec := inrecAndContext.Record + newrec := mlrval.NewMlrmapAsRecord() + for pe := inrec.Head; pe != nil; pe = pe.Next { + newkey := strings.ToUpper(pe.Key) + stringval, ok := pe.Value.GetStringValue() + if ok { + stringval = strings.ToUpper(stringval) + newrec.PutReference(newkey, mlrval.FromString(stringval)) + } else { + newrec.PutReference(newkey, pe.Value) + } + } + outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context)) +} diff --git a/man/manpage.txt b/man/manpage.txt index 9651b66a1..36c4362e6 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -170,13 +170,14 @@ MILLER(1) MILLER(1) 1mVERB LIST0m altkv bar bootstrap cat check clean-whitespace count-distinct count - count-similar cut decimate fill-down fill-empty filter flatten format-values - fraction gap grep group-by group-like having-fields head histogram json-parse - json-stringify join label latin1-to-utf8 least-frequent merge-fields - most-frequent nest nothing put regularize remove-empty-columns rename reorder - repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records - sort sort-within-records split stats1 stats2 step summary tac tail tee - template top utf8-to-latin1 unflatten uniq unspace unsparsify + count-similar cut decimate downcase fill-down fill-empty filter flatten + format-values fraction gap grep group-by group-like having-fields head + histogram json-parse json-stringify join label latin1-to-utf8 least-frequent + merge-fields most-frequent nest nothing put regularize remove-empty-columns + rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle + skip-trivial-records sort sort-within-records split stats1 stats2 step summary + tac tail tee template top utf8-to-latin1 unflatten uniq unspace unsparsify + upcase 1mFUNCTION LIST0m abs acos acosh any append apply arrayify asin asinh asserting_absent @@ -981,6 +982,14 @@ MILLER(1) MILLER(1) -n {n} Decimation factor (default 10). -h|--help Show this message. + 1mdowncase0m + Usage: mlr downcase [options] + Lowercases strings in record keys and/or values. + Options: + -k Downcase only keys, not keys and values. + -v Downcase only values, not keys and values. + -h|--help Show this message. + 1mfill-down0m Usage: mlr fill-down [options] If a given record has a missing value for a given field, fill that from @@ -2090,6 +2099,14 @@ MILLER(1) MILLER(1) being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and 'a=,b=3,c=4'. + 1mupcase0m + Usage: mlr upcase [options] + Uppercases strings in record keys and/or values. + Options: + -k Upcase only keys, not keys and values. + -v Upcase only values, not keys and values. + -h|--help Show this message. + 1mFUNCTIONS FOR FILTER/PUT0m 1mabs0m (class=math #args=1) Absolute value. diff --git a/man/mlr.1 b/man/mlr.1 index ff9da5df7..d963689c2 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -211,13 +211,14 @@ for all things with "map" in their names. .\} .nf altkv bar bootstrap cat check clean-whitespace count-distinct count -count-similar cut decimate fill-down fill-empty filter flatten format-values -fraction gap grep group-by group-like having-fields head histogram json-parse -json-stringify join label latin1-to-utf8 least-frequent merge-fields -most-frequent nest nothing put regularize remove-empty-columns rename reorder -repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records -sort sort-within-records split stats1 stats2 step summary tac tail tee -template top utf8-to-latin1 unflatten uniq unspace unsparsify +count-similar cut decimate downcase fill-down fill-empty filter flatten +format-values fraction gap grep group-by group-like having-fields head +histogram json-parse json-stringify join label latin1-to-utf8 least-frequent +merge-fields most-frequent nest nothing put regularize remove-empty-columns +rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle +skip-trivial-records sort sort-within-records split stats1 stats2 step summary +tac tail tee template top utf8-to-latin1 unflatten uniq unspace unsparsify +upcase .fi .if n \{\ .RE @@ -1220,6 +1221,20 @@ Options: .fi .if n \{\ .RE +.SS "downcase" +.if n \{\ +.RS 0 +.\} +.nf +Usage: mlr downcase [options] +Lowercases strings in record keys and/or values. +Options: +-k Downcase only keys, not keys and values. +-v Downcase only values, not keys and values. +-h|--help Show this message. +.fi +.if n \{\ +.RE .SS "fill-down" .if n \{\ .RS 0 @@ -2647,6 +2662,20 @@ being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and .fi .if n \{\ .RE +.SS "upcase" +.if n \{\ +.RS 0 +.\} +.nf +Usage: mlr upcase [options] +Uppercases strings in record keys and/or values. +Options: +-k Upcase only keys, not keys and values. +-v Upcase only values, not keys and values. +-h|--help Show this message. +.fi +.if n \{\ +.RE .SH "FUNCTIONS FOR FILTER/PUT" .sp diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index 100f50b19..46edcff11 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -139,6 +139,15 @@ Options: -n {n} Decimation factor (default 10). -h|--help Show this message. +================================================================ +downcase +Usage: mlr downcase [options] +Lowercases strings in record keys and/or values. +Options: +-k Downcase only keys, not keys and values. +-v Downcase only values, not keys and values. +-h|--help Show this message. + ================================================================ fill-down Usage: mlr fill-down [options] @@ -1300,4 +1309,13 @@ Options: Example: if the input is two records, one being 'a=1,b=2' and the other being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and 'a=,b=3,c=4'. + +================================================================ +upcase +Usage: mlr upcase [options] +Uppercases strings in record keys and/or values. +Options: +-k Upcase only keys, not keys and values. +-v Upcase only values, not keys and values. +-h|--help Show this message. ================================================================ diff --git a/test/cases/verb-downcase/0001/cmd b/test/cases/verb-downcase/0001/cmd new file mode 100644 index 000000000..80bd342ae --- /dev/null +++ b/test/cases/verb-downcase/0001/cmd @@ -0,0 +1 @@ +mlr downcase test/input/abixy diff --git a/test/cases/verb-downcase/0001/experr b/test/cases/verb-downcase/0001/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-downcase/0001/expout b/test/cases/verb-downcase/0001/expout new file mode 100644 index 000000000..940df3d11 --- /dev/null +++ b/test/cases/verb-downcase/0001/expout @@ -0,0 +1,10 @@ +a=pan,b=pan,i=1,x=0.34679014,y=0.72680286 +a=eks,b=pan,i=2,x=0.75867996,y=0.52215111 +a=wye,b=wye,i=3,x=0.20460331,y=0.33831853 +a=eks,b=wye,i=4,x=0.38139939,y=0.13418874 +a=wye,b=pan,i=5,x=0.57328892,y=0.86362447 +a=zee,b=pan,i=6,x=0.52712616,y=0.49322129 +a=eks,b=zee,i=7,x=0.61178406,y=0.18788492 +a=zee,b=wye,i=8,x=0.59855401,y=0.97618139 +a=hat,b=wye,i=9,x=0.03144188,y=0.74955076 +a=pan,b=wye,i=10,x=0.50262601,y=0.95261836 diff --git a/test/cases/verb-downcase/0002/cmd b/test/cases/verb-downcase/0002/cmd new file mode 100644 index 000000000..ae2356962 --- /dev/null +++ b/test/cases/verb-downcase/0002/cmd @@ -0,0 +1 @@ +mlr downcase -k test/input/abixy diff --git a/test/cases/verb-downcase/0002/experr b/test/cases/verb-downcase/0002/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-downcase/0002/expout b/test/cases/verb-downcase/0002/expout new file mode 100644 index 000000000..940df3d11 --- /dev/null +++ b/test/cases/verb-downcase/0002/expout @@ -0,0 +1,10 @@ +a=pan,b=pan,i=1,x=0.34679014,y=0.72680286 +a=eks,b=pan,i=2,x=0.75867996,y=0.52215111 +a=wye,b=wye,i=3,x=0.20460331,y=0.33831853 +a=eks,b=wye,i=4,x=0.38139939,y=0.13418874 +a=wye,b=pan,i=5,x=0.57328892,y=0.86362447 +a=zee,b=pan,i=6,x=0.52712616,y=0.49322129 +a=eks,b=zee,i=7,x=0.61178406,y=0.18788492 +a=zee,b=wye,i=8,x=0.59855401,y=0.97618139 +a=hat,b=wye,i=9,x=0.03144188,y=0.74955076 +a=pan,b=wye,i=10,x=0.50262601,y=0.95261836 diff --git a/test/cases/verb-downcase/0003/cmd b/test/cases/verb-downcase/0003/cmd new file mode 100644 index 000000000..4414236c5 --- /dev/null +++ b/test/cases/verb-downcase/0003/cmd @@ -0,0 +1 @@ +mlr downcase -v test/input/abixy diff --git a/test/cases/verb-downcase/0003/experr b/test/cases/verb-downcase/0003/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-downcase/0003/expout b/test/cases/verb-downcase/0003/expout new file mode 100644 index 000000000..940df3d11 --- /dev/null +++ b/test/cases/verb-downcase/0003/expout @@ -0,0 +1,10 @@ +a=pan,b=pan,i=1,x=0.34679014,y=0.72680286 +a=eks,b=pan,i=2,x=0.75867996,y=0.52215111 +a=wye,b=wye,i=3,x=0.20460331,y=0.33831853 +a=eks,b=wye,i=4,x=0.38139939,y=0.13418874 +a=wye,b=pan,i=5,x=0.57328892,y=0.86362447 +a=zee,b=pan,i=6,x=0.52712616,y=0.49322129 +a=eks,b=zee,i=7,x=0.61178406,y=0.18788492 +a=zee,b=wye,i=8,x=0.59855401,y=0.97618139 +a=hat,b=wye,i=9,x=0.03144188,y=0.74955076 +a=pan,b=wye,i=10,x=0.50262601,y=0.95261836 diff --git a/test/cases/verb-downcase/0004/cmd b/test/cases/verb-downcase/0004/cmd new file mode 100644 index 000000000..9f249fac6 --- /dev/null +++ b/test/cases/verb-downcase/0004/cmd @@ -0,0 +1 @@ +mlr downcase test/input/abixy-upper diff --git a/test/cases/verb-downcase/0004/experr b/test/cases/verb-downcase/0004/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-downcase/0004/expout b/test/cases/verb-downcase/0004/expout new file mode 100644 index 000000000..940df3d11 --- /dev/null +++ b/test/cases/verb-downcase/0004/expout @@ -0,0 +1,10 @@ +a=pan,b=pan,i=1,x=0.34679014,y=0.72680286 +a=eks,b=pan,i=2,x=0.75867996,y=0.52215111 +a=wye,b=wye,i=3,x=0.20460331,y=0.33831853 +a=eks,b=wye,i=4,x=0.38139939,y=0.13418874 +a=wye,b=pan,i=5,x=0.57328892,y=0.86362447 +a=zee,b=pan,i=6,x=0.52712616,y=0.49322129 +a=eks,b=zee,i=7,x=0.61178406,y=0.18788492 +a=zee,b=wye,i=8,x=0.59855401,y=0.97618139 +a=hat,b=wye,i=9,x=0.03144188,y=0.74955076 +a=pan,b=wye,i=10,x=0.50262601,y=0.95261836 diff --git a/test/cases/verb-downcase/0005/cmd b/test/cases/verb-downcase/0005/cmd new file mode 100644 index 000000000..4ffd2e10a --- /dev/null +++ b/test/cases/verb-downcase/0005/cmd @@ -0,0 +1 @@ +mlr downcase -k test/input/abixy-upper diff --git a/test/cases/verb-downcase/0005/experr b/test/cases/verb-downcase/0005/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-downcase/0005/expout b/test/cases/verb-downcase/0005/expout new file mode 100644 index 000000000..38ec42e3a --- /dev/null +++ b/test/cases/verb-downcase/0005/expout @@ -0,0 +1,10 @@ +a=PAN,b=PAN,i=1,x=0.34679014,y=0.72680286 +a=EKS,b=PAN,i=2,x=0.75867996,y=0.52215111 +a=WYE,b=WYE,i=3,x=0.20460331,y=0.33831853 +a=EKS,b=WYE,i=4,x=0.38139939,y=0.13418874 +a=WYE,b=PAN,i=5,x=0.57328892,y=0.86362447 +a=ZEE,b=PAN,i=6,x=0.52712616,y=0.49322129 +a=EKS,b=ZEE,i=7,x=0.61178406,y=0.18788492 +a=ZEE,b=WYE,i=8,x=0.59855401,y=0.97618139 +a=HAT,b=WYE,i=9,x=0.03144188,y=0.74955076 +a=PAN,b=WYE,i=10,x=0.50262601,y=0.95261836 diff --git a/test/cases/verb-downcase/0006/cmd b/test/cases/verb-downcase/0006/cmd new file mode 100644 index 000000000..47d9c872d --- /dev/null +++ b/test/cases/verb-downcase/0006/cmd @@ -0,0 +1 @@ +mlr downcase -v test/input/abixy-upper diff --git a/test/cases/verb-downcase/0006/experr b/test/cases/verb-downcase/0006/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-downcase/0006/expout b/test/cases/verb-downcase/0006/expout new file mode 100644 index 000000000..db7c659b4 --- /dev/null +++ b/test/cases/verb-downcase/0006/expout @@ -0,0 +1,10 @@ +A=pan,B=pan,I=1,X=0.34679014,Y=0.72680286 +A=eks,B=pan,I=2,X=0.75867996,Y=0.52215111 +A=wye,B=wye,I=3,X=0.20460331,Y=0.33831853 +A=eks,B=wye,I=4,X=0.38139939,Y=0.13418874 +A=wye,B=pan,I=5,X=0.57328892,Y=0.86362447 +A=zee,B=pan,I=6,X=0.52712616,Y=0.49322129 +A=eks,B=zee,I=7,X=0.61178406,Y=0.18788492 +A=zee,B=wye,I=8,X=0.59855401,Y=0.97618139 +A=hat,B=wye,I=9,X=0.03144188,Y=0.74955076 +A=pan,B=wye,I=10,X=0.50262601,Y=0.95261836 diff --git a/test/cases/verb-upcase/0001/cmd b/test/cases/verb-upcase/0001/cmd new file mode 100644 index 000000000..964df9d6d --- /dev/null +++ b/test/cases/verb-upcase/0001/cmd @@ -0,0 +1 @@ +mlr upcase test/input/abixy diff --git a/test/cases/verb-upcase/0001/experr b/test/cases/verb-upcase/0001/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-upcase/0001/expout b/test/cases/verb-upcase/0001/expout new file mode 100644 index 000000000..e38b25f22 --- /dev/null +++ b/test/cases/verb-upcase/0001/expout @@ -0,0 +1,10 @@ +A=PAN,B=PAN,I=1,X=0.34679014,Y=0.72680286 +A=EKS,B=PAN,I=2,X=0.75867996,Y=0.52215111 +A=WYE,B=WYE,I=3,X=0.20460331,Y=0.33831853 +A=EKS,B=WYE,I=4,X=0.38139939,Y=0.13418874 +A=WYE,B=PAN,I=5,X=0.57328892,Y=0.86362447 +A=ZEE,B=PAN,I=6,X=0.52712616,Y=0.49322129 +A=EKS,B=ZEE,I=7,X=0.61178406,Y=0.18788492 +A=ZEE,B=WYE,I=8,X=0.59855401,Y=0.97618139 +A=HAT,B=WYE,I=9,X=0.03144188,Y=0.74955076 +A=PAN,B=WYE,I=10,X=0.50262601,Y=0.95261836 diff --git a/test/cases/verb-upcase/0002/cmd b/test/cases/verb-upcase/0002/cmd new file mode 100644 index 000000000..8a1abf078 --- /dev/null +++ b/test/cases/verb-upcase/0002/cmd @@ -0,0 +1 @@ +mlr upcase -k test/input/abixy diff --git a/test/cases/verb-upcase/0002/experr b/test/cases/verb-upcase/0002/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-upcase/0002/expout b/test/cases/verb-upcase/0002/expout new file mode 100644 index 000000000..db7c659b4 --- /dev/null +++ b/test/cases/verb-upcase/0002/expout @@ -0,0 +1,10 @@ +A=pan,B=pan,I=1,X=0.34679014,Y=0.72680286 +A=eks,B=pan,I=2,X=0.75867996,Y=0.52215111 +A=wye,B=wye,I=3,X=0.20460331,Y=0.33831853 +A=eks,B=wye,I=4,X=0.38139939,Y=0.13418874 +A=wye,B=pan,I=5,X=0.57328892,Y=0.86362447 +A=zee,B=pan,I=6,X=0.52712616,Y=0.49322129 +A=eks,B=zee,I=7,X=0.61178406,Y=0.18788492 +A=zee,B=wye,I=8,X=0.59855401,Y=0.97618139 +A=hat,B=wye,I=9,X=0.03144188,Y=0.74955076 +A=pan,B=wye,I=10,X=0.50262601,Y=0.95261836 diff --git a/test/cases/verb-upcase/0003/cmd b/test/cases/verb-upcase/0003/cmd new file mode 100644 index 000000000..019bef21f --- /dev/null +++ b/test/cases/verb-upcase/0003/cmd @@ -0,0 +1 @@ +mlr upcase -v test/input/abixy diff --git a/test/cases/verb-upcase/0003/experr b/test/cases/verb-upcase/0003/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-upcase/0003/expout b/test/cases/verb-upcase/0003/expout new file mode 100644 index 000000000..38ec42e3a --- /dev/null +++ b/test/cases/verb-upcase/0003/expout @@ -0,0 +1,10 @@ +a=PAN,b=PAN,i=1,x=0.34679014,y=0.72680286 +a=EKS,b=PAN,i=2,x=0.75867996,y=0.52215111 +a=WYE,b=WYE,i=3,x=0.20460331,y=0.33831853 +a=EKS,b=WYE,i=4,x=0.38139939,y=0.13418874 +a=WYE,b=PAN,i=5,x=0.57328892,y=0.86362447 +a=ZEE,b=PAN,i=6,x=0.52712616,y=0.49322129 +a=EKS,b=ZEE,i=7,x=0.61178406,y=0.18788492 +a=ZEE,b=WYE,i=8,x=0.59855401,y=0.97618139 +a=HAT,b=WYE,i=9,x=0.03144188,y=0.74955076 +a=PAN,b=WYE,i=10,x=0.50262601,y=0.95261836 diff --git a/test/cases/verb-upcase/0004/cmd b/test/cases/verb-upcase/0004/cmd new file mode 100644 index 000000000..ddbc84c79 --- /dev/null +++ b/test/cases/verb-upcase/0004/cmd @@ -0,0 +1 @@ +mlr upcase test/input/abixy-upper diff --git a/test/cases/verb-upcase/0004/experr b/test/cases/verb-upcase/0004/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-upcase/0004/expout b/test/cases/verb-upcase/0004/expout new file mode 100644 index 000000000..e38b25f22 --- /dev/null +++ b/test/cases/verb-upcase/0004/expout @@ -0,0 +1,10 @@ +A=PAN,B=PAN,I=1,X=0.34679014,Y=0.72680286 +A=EKS,B=PAN,I=2,X=0.75867996,Y=0.52215111 +A=WYE,B=WYE,I=3,X=0.20460331,Y=0.33831853 +A=EKS,B=WYE,I=4,X=0.38139939,Y=0.13418874 +A=WYE,B=PAN,I=5,X=0.57328892,Y=0.86362447 +A=ZEE,B=PAN,I=6,X=0.52712616,Y=0.49322129 +A=EKS,B=ZEE,I=7,X=0.61178406,Y=0.18788492 +A=ZEE,B=WYE,I=8,X=0.59855401,Y=0.97618139 +A=HAT,B=WYE,I=9,X=0.03144188,Y=0.74955076 +A=PAN,B=WYE,I=10,X=0.50262601,Y=0.95261836 diff --git a/test/cases/verb-upcase/0005/cmd b/test/cases/verb-upcase/0005/cmd new file mode 100644 index 000000000..177ef762b --- /dev/null +++ b/test/cases/verb-upcase/0005/cmd @@ -0,0 +1 @@ +mlr upcase -k test/input/abixy-upper diff --git a/test/cases/verb-upcase/0005/experr b/test/cases/verb-upcase/0005/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-upcase/0005/expout b/test/cases/verb-upcase/0005/expout new file mode 100644 index 000000000..e38b25f22 --- /dev/null +++ b/test/cases/verb-upcase/0005/expout @@ -0,0 +1,10 @@ +A=PAN,B=PAN,I=1,X=0.34679014,Y=0.72680286 +A=EKS,B=PAN,I=2,X=0.75867996,Y=0.52215111 +A=WYE,B=WYE,I=3,X=0.20460331,Y=0.33831853 +A=EKS,B=WYE,I=4,X=0.38139939,Y=0.13418874 +A=WYE,B=PAN,I=5,X=0.57328892,Y=0.86362447 +A=ZEE,B=PAN,I=6,X=0.52712616,Y=0.49322129 +A=EKS,B=ZEE,I=7,X=0.61178406,Y=0.18788492 +A=ZEE,B=WYE,I=8,X=0.59855401,Y=0.97618139 +A=HAT,B=WYE,I=9,X=0.03144188,Y=0.74955076 +A=PAN,B=WYE,I=10,X=0.50262601,Y=0.95261836 diff --git a/test/cases/verb-upcase/0006/cmd b/test/cases/verb-upcase/0006/cmd new file mode 100644 index 000000000..a92082c3a --- /dev/null +++ b/test/cases/verb-upcase/0006/cmd @@ -0,0 +1 @@ +mlr upcase -v test/input/abixy-upper diff --git a/test/cases/verb-upcase/0006/experr b/test/cases/verb-upcase/0006/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-upcase/0006/expout b/test/cases/verb-upcase/0006/expout new file mode 100644 index 000000000..e38b25f22 --- /dev/null +++ b/test/cases/verb-upcase/0006/expout @@ -0,0 +1,10 @@ +A=PAN,B=PAN,I=1,X=0.34679014,Y=0.72680286 +A=EKS,B=PAN,I=2,X=0.75867996,Y=0.52215111 +A=WYE,B=WYE,I=3,X=0.20460331,Y=0.33831853 +A=EKS,B=WYE,I=4,X=0.38139939,Y=0.13418874 +A=WYE,B=PAN,I=5,X=0.57328892,Y=0.86362447 +A=ZEE,B=PAN,I=6,X=0.52712616,Y=0.49322129 +A=EKS,B=ZEE,I=7,X=0.61178406,Y=0.18788492 +A=ZEE,B=WYE,I=8,X=0.59855401,Y=0.97618139 +A=HAT,B=WYE,I=9,X=0.03144188,Y=0.74955076 +A=PAN,B=WYE,I=10,X=0.50262601,Y=0.95261836 diff --git a/test/input/abixy-upper b/test/input/abixy-upper new file mode 100644 index 000000000..0abb59eda --- /dev/null +++ b/test/input/abixy-upper @@ -0,0 +1,10 @@ +A=PAN,B=PAN,I=1,X=0.3467901443380824,Y=0.7268028627434533 +A=EKS,B=PAN,I=2,X=0.7586799647899636,Y=0.5221511083334797 +A=WYE,B=WYE,I=3,X=0.20460330576630303,Y=0.33831852551664776 +A=EKS,B=WYE,I=4,X=0.38139939387114097,Y=0.13418874328430463 +A=WYE,B=PAN,I=5,X=0.5732889198020006,Y=0.8636244699032729 +A=ZEE,B=PAN,I=6,X=0.5271261600918548,Y=0.49322128674835697 +A=EKS,B=ZEE,I=7,X=0.6117840605678454,Y=0.1878849191181694 +A=ZEE,B=WYE,I=8,X=0.5985540091064224,Y=0.976181385699006 +A=HAT,B=WYE,I=9,X=0.03144187646093577,Y=0.7495507603507059 +A=PAN,B=WYE,I=10,X=0.5026260055412137,Y=0.9526183602969864