From dafec6dc0917dd0f2d10006a90181466e635e1b5 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 20 Aug 2022 08:42:15 -0400 Subject: [PATCH] mlr cat --filename / --filenum (#1080) * mlr cat --filename / --filenum * codespell --- .codespellignore | 1 + docs/src/data-diving-examples.md | 2 +- docs/src/data-diving-examples.md.in | 2 +- docs/src/manpage.md | 4 +- docs/src/manpage.txt | 4 +- docs/src/new-in-miller-6.md | 2 +- docs/src/new-in-miller-6.md.in | 2 +- docs/src/online-help.md | 2 + docs/src/reference-dsl-output-statements.md | 2 +- .../src/reference-dsl-output-statements.md.in | 2 +- docs/src/reference-verbs.md | 2 + internal/pkg/transformers/cat.go | 48 ++++++++++++++++++- man/manpage.txt | 4 +- man/mlr.1 | 6 ++- test/cases/cli-help/0001/expout | 2 + test/cases/verb-cat/0015/cmd | 1 + test/cases/verb-cat/0015/experr | 0 test/cases/verb-cat/0015/expout | 10 ++++ 18 files changed, 84 insertions(+), 12 deletions(-) create mode 100644 test/cases/verb-cat/0015/cmd create mode 100644 test/cases/verb-cat/0015/experr create mode 100644 test/cases/verb-cat/0015/expout diff --git a/.codespellignore b/.codespellignore index b25517b0f..1279206ca 100644 --- a/.codespellignore +++ b/.codespellignore @@ -8,3 +8,4 @@ numer Wit te wee +RO diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 99bcf588c..4a6275403 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -286,7 +286,7 @@ bin_lo bin_hi flag_count u_count v_count 1.0900000000000003 1.1900000000000002 0 0 25 -Look at univariate stats by color and shape. In particular, color-dependent flag probabilities pop out, aligning with their original Bernoulli probablities from the data-generator script: +Look at univariate stats by color and shape. In particular, color-dependent flag probabilities pop out, aligning with their original Bernoulli probabilities from the data-generator script:
 mlr --opprint stats1 -a min,mean,max -f flag,u,v -g color \
diff --git a/docs/src/data-diving-examples.md.in b/docs/src/data-diving-examples.md.in
index 2b63c97a1..d83a41eaa 100644
--- a/docs/src/data-diving-examples.md.in
+++ b/docs/src/data-diving-examples.md.in
@@ -93,7 +93,7 @@ GENMD-RUN-COMMAND
 mlr --opprint histogram -f flag,u,v --lo -0.1 --hi 1.1 --nbins 12 data/colored-shapes.dkvp
 GENMD-EOF
 
-Look at univariate stats by color and shape. In particular, color-dependent flag probabilities pop out, aligning with their original Bernoulli probablities from the data-generator script:
+Look at univariate stats by color and shape. In particular, color-dependent flag probabilities pop out, aligning with their original Bernoulli probabilities from the data-generator script:
 
 GENMD-RUN-COMMAND
 mlr --opprint stats1 -a min,mean,max -f flag,u,v -g color \
diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index e3a9d6212..59ee1aebb 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -905,6 +905,8 @@ VERBS
        -n         Prepend field "n" to each record with record-counter starting at 1.
        -N {name}  Prepend field {name} to each record with record-counter starting at 1.
        -g {a,b,c} Optional group-by-field names for counters, e.g. a,b,c
+       --filename Prepend current filename to each record.
+       --filenum  Prepend current filenum (1-up) to each record.
        -h|--help Show this message.
 
    check
@@ -3280,5 +3282,5 @@ SEE ALSO
 
 
 
-                                  2022-08-14                         MILLER(1)
+                                  2022-08-20                         MILLER(1)
 
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index dcdc20398..8fb88d820 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -884,6 +884,8 @@ VERBS -n Prepend field "n" to each record with record-counter starting at 1. -N {name} Prepend field {name} to each record with record-counter starting at 1. -g {a,b,c} Optional group-by-field names for counters, e.g. a,b,c + --filename Prepend current filename to each record. + --filenum Prepend current filenum (1-up) to each record. -h|--help Show this message. check @@ -3259,4 +3261,4 @@ SEE ALSO - 2022-08-14 MILLER(1) + 2022-08-20 MILLER(1) diff --git a/docs/src/new-in-miller-6.md b/docs/src/new-in-miller-6.md index e0cbbbe9c..2620dea51 100644 --- a/docs/src/new-in-miller-6.md +++ b/docs/src/new-in-miller-6.md @@ -84,7 +84,7 @@ For `mlr put` and `mlr filter`, parse-error messages now include location inform
 mlr: cannot parse DSL expression.
-Parse error on token ">" at line 63 columnn 7.
+Parse error on token ">" at line 63 column 7.
 
### Scripting diff --git a/docs/src/new-in-miller-6.md.in b/docs/src/new-in-miller-6.md.in index cae4192db..a314ea24a 100644 --- a/docs/src/new-in-miller-6.md.in +++ b/docs/src/new-in-miller-6.md.in @@ -68,7 +68,7 @@ For `mlr put` and `mlr filter`, parse-error messages now include location inform GENMD-CARDIFY mlr: cannot parse DSL expression. -Parse error on token ">" at line 63 columnn 7. +Parse error on token ">" at line 63 column 7. GENMD-EOF ### Scripting diff --git a/docs/src/online-help.md b/docs/src/online-help.md index 08111629d..6fec75c6c 100644 --- a/docs/src/online-help.md +++ b/docs/src/online-help.md @@ -179,6 +179,8 @@ Options: -n Prepend field "n" to each record with record-counter starting at 1. -N {name} Prepend field {name} to each record with record-counter starting at 1. -g {a,b,c} Optional group-by-field names for counters, e.g. a,b,c +--filename Prepend current filename to each record. +--filenum Prepend current filenum (1-up) to each record. -h|--help Show this message. diff --git a/docs/src/reference-dsl-output-statements.md b/docs/src/reference-dsl-output-statements.md index 46e7f89d2..0984b1fd5 100644 --- a/docs/src/reference-dsl-output-statements.md +++ b/docs/src/reference-dsl-output-statements.md @@ -342,7 +342,7 @@ JSON vs. tabular formats](flatten-unflatten.md) for more information. The reason for this is part historical and part technical. As we'll see below, you can do lots of syntactical things with `emit`, `emitp`, and `emitf`, including printing them side-by-side, index them, redirect the output to files, -etc. What this means syntatically is that Miller's parser needs to handle all +etc. What this means syntactically is that Miller's parser needs to handle all sorts of commas, parentheses, and so on:
diff --git a/docs/src/reference-dsl-output-statements.md.in b/docs/src/reference-dsl-output-statements.md.in
index dabf0f433..3b42c2bc7 100644
--- a/docs/src/reference-dsl-output-statements.md.in
+++ b/docs/src/reference-dsl-output-statements.md.in
@@ -153,7 +153,7 @@ JSON vs. tabular formats](flatten-unflatten.md) for more information.
 The reason for this is part historical and part technical. As we'll see below,
 you can do lots of syntactical things with `emit`, `emitp`, and `emitf`,
 including printing them side-by-side, index them, redirect the output to files,
-etc. What this means syntatically is that Miller's parser needs to handle all
+etc. What this means syntactically is that Miller's parser needs to handle all
 sorts of commas, parentheses, and so on:
 
 GENMD-CARDIFY
diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md
index c9dab6202..9cc5b7f84 100644
--- a/docs/src/reference-verbs.md
+++ b/docs/src/reference-verbs.md
@@ -247,6 +247,8 @@ Options:
 -n         Prepend field "n" to each record with record-counter starting at 1.
 -N {name}  Prepend field {name} to each record with record-counter starting at 1.
 -g {a,b,c} Optional group-by-field names for counters, e.g. a,b,c
+--filename Prepend current filename to each record.
+--filenum  Prepend current filenum (1-up) to each record.
 -h|--help Show this message.
 
diff --git a/internal/pkg/transformers/cat.go b/internal/pkg/transformers/cat.go index ee1f74d0d..045b57a73 100644 --- a/internal/pkg/transformers/cat.go +++ b/internal/pkg/transformers/cat.go @@ -30,6 +30,8 @@ func transformerCatUsage( fmt.Fprintf(o, "-n Prepend field \"n\" to each record with record-counter starting at 1.\n") fmt.Fprintf(o, "-N {name} Prepend field {name} to each record with record-counter starting at 1.\n") fmt.Fprintf(o, "-g {a,b,c} Optional group-by-field names for counters, e.g. a,b,c\n") + fmt.Fprintf(o, "--filename Prepend current filename to each record.\n") + fmt.Fprintf(o, "--filenum Prepend current filenum (1-up) to each record.\n") fmt.Fprintf(o, "-h|--help Show this message.\n") } @@ -50,6 +52,8 @@ func transformerCatParseCLI( doCounters := false counterFieldName := "" var groupByFieldNames []string = nil + doFileName := false + doFileNum := false for argi < argc /* variable increment: 1 or 2 depending on flag */ { opt := args[argi] @@ -74,6 +78,12 @@ func transformerCatParseCLI( } else if opt == "-g" { groupByFieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + } else if opt == "--filename" { + doFileName = true + + } else if opt == "--filenum" { + doFileNum = true + } else { transformerCatUsage(os.Stderr) os.Exit(1) @@ -89,6 +99,8 @@ func transformerCatParseCLI( doCounters, counterFieldName, groupByFieldNames, + doFileName, + doFileNum, ) if err != nil { fmt.Fprintln(os.Stderr, err) @@ -107,6 +119,9 @@ type TransformerCat struct { countsByGroup map[string]int64 counterFieldName string + doFileName bool + doFileNum bool + recordTransformerFunc RecordTransformerFunc } @@ -115,6 +130,8 @@ func NewTransformerCat( doCounters bool, counterFieldName string, groupByFieldNames []string, + doFileName bool, + doFileNum bool, ) (*TransformerCat, error) { if counterFieldName != "" { @@ -127,6 +144,8 @@ func NewTransformerCat( counter: 0, countsByGroup: make(map[string]int64), counterFieldName: counterFieldName, + doFileName: doFileName, + doFileNum: doFileNum, } if !doCounters { @@ -151,7 +170,12 @@ func (tr *TransformerCat) Transform( outputDownstreamDoneChannel chan<- bool, ) { HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) - tr.recordTransformerFunc(inrecAndContext, outputRecordsAndContexts, inputDownstreamDoneChannel, outputDownstreamDoneChannel) + tr.recordTransformerFunc( + inrecAndContext, + outputRecordsAndContexts, + inputDownstreamDoneChannel, + outputDownstreamDoneChannel, + ) } // ---------------------------------------------------------------- @@ -161,6 +185,14 @@ func (tr *TransformerCat) simpleCat( inputDownstreamDoneChannel <-chan bool, outputDownstreamDoneChannel chan<- bool, ) { + if !inrecAndContext.EndOfStream { + if tr.doFileName { + inrecAndContext.Record.PrependCopy("filename", mlrval.FromString(inrecAndContext.Context.FILENAME)) + } + if tr.doFileNum { + inrecAndContext.Record.PrependCopy("filenum", mlrval.FromInt(inrecAndContext.Context.FILENUM)) + } + } outputRecordsAndContexts.PushBack(inrecAndContext) } @@ -176,6 +208,13 @@ func (tr *TransformerCat) countersUngrouped( tr.counter++ key := tr.counterFieldName inrec.PrependCopy(key, mlrval.FromInt(tr.counter)) + + if tr.doFileName { + inrec.PrependCopy("filename", mlrval.FromString(inrecAndContext.Context.FILENAME)) + } + if tr.doFileNum { + inrec.PrependCopy("filenum", mlrval.FromInt(inrecAndContext.Context.FILENUM)) + } } outputRecordsAndContexts.PushBack(inrecAndContext) } @@ -208,6 +247,13 @@ func (tr *TransformerCat) countersGrouped( key := tr.counterFieldName inrec.PrependCopy(key, mlrval.FromInt(counter)) + + if tr.doFileName { + inrec.PrependCopy("filename", mlrval.FromString(inrecAndContext.Context.FILENAME)) + } + if tr.doFileNum { + inrec.PrependCopy("filenum", mlrval.FromInt(inrecAndContext.Context.FILENUM)) + } } outputRecordsAndContexts.PushBack(inrecAndContext) } diff --git a/man/manpage.txt b/man/manpage.txt index dcdc20398..8fb88d820 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -884,6 +884,8 @@ VERBS -n Prepend field "n" to each record with record-counter starting at 1. -N {name} Prepend field {name} to each record with record-counter starting at 1. -g {a,b,c} Optional group-by-field names for counters, e.g. a,b,c + --filename Prepend current filename to each record. + --filenum Prepend current filenum (1-up) to each record. -h|--help Show this message. check @@ -3259,4 +3261,4 @@ SEE ALSO - 2022-08-14 MILLER(1) + 2022-08-20 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index 7f963420b..7c405657b 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2022-08-14 +.\" Date: 2022-08-20 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2022-08-14" "\ \&" "\ \&" +.TH "MILLER" "1" "2022-08-20" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1071,6 +1071,8 @@ Options: -n Prepend field "n" to each record with record-counter starting at 1. -N {name} Prepend field {name} to each record with record-counter starting at 1. -g {a,b,c} Optional group-by-field names for counters, e.g. a,b,c +--filename Prepend current filename to each record. +--filenum Prepend current filenum (1-up) to each record. -h|--help Show this message. .fi .if n \{\ diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index a3cae902f..861ed329c 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -42,6 +42,8 @@ Options: -n Prepend field "n" to each record with record-counter starting at 1. -N {name} Prepend field {name} to each record with record-counter starting at 1. -g {a,b,c} Optional group-by-field names for counters, e.g. a,b,c +--filename Prepend current filename to each record. +--filenum Prepend current filenum (1-up) to each record. -h|--help Show this message. ================================================================ diff --git a/test/cases/verb-cat/0015/cmd b/test/cases/verb-cat/0015/cmd new file mode 100644 index 000000000..4e4f0a18c --- /dev/null +++ b/test/cases/verb-cat/0015/cmd @@ -0,0 +1 @@ +mlr cat --filename --filenum test/input/abixy diff --git a/test/cases/verb-cat/0015/experr b/test/cases/verb-cat/0015/experr new file mode 100644 index 000000000..e69de29bb diff --git a/test/cases/verb-cat/0015/expout b/test/cases/verb-cat/0015/expout new file mode 100644 index 000000000..eb75b16c0 --- /dev/null +++ b/test/cases/verb-cat/0015/expout @@ -0,0 +1,10 @@ +filenum=1,filename=test/input/abixy,a=pan,b=pan,i=1,x=0.34679014,y=0.72680286 +filenum=1,filename=test/input/abixy,a=eks,b=pan,i=2,x=0.75867996,y=0.52215111 +filenum=1,filename=test/input/abixy,a=wye,b=wye,i=3,x=0.20460331,y=0.33831853 +filenum=1,filename=test/input/abixy,a=eks,b=wye,i=4,x=0.38139939,y=0.13418874 +filenum=1,filename=test/input/abixy,a=wye,b=pan,i=5,x=0.57328892,y=0.86362447 +filenum=1,filename=test/input/abixy,a=zee,b=pan,i=6,x=0.52712616,y=0.49322129 +filenum=1,filename=test/input/abixy,a=eks,b=zee,i=7,x=0.61178406,y=0.18788492 +filenum=1,filename=test/input/abixy,a=zee,b=wye,i=8,x=0.59855401,y=0.97618139 +filenum=1,filename=test/input/abixy,a=hat,b=wye,i=9,x=0.03144188,y=0.74955076 +filenum=1,filename=test/input/abixy,a=pan,b=wye,i=10,x=0.50262601,y=0.95261836