mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
Add --left-keep-fields option for mlr join (#967)
* Add --left-keep-fields option for mlr join * on-line help for mlr join --lk * doc-build artifacts * test cases
This commit is contained in:
parent
f6d897bf7d
commit
9b806f9561
35 changed files with 115 additions and 14 deletions
|
|
@ -1269,6 +1269,9 @@ VERBS
|
|||
defaults to -j values if omitted.
|
||||
-r {a,b,c} Comma-separated join-field names for right input file(s);
|
||||
defaults to -j values if omitted.
|
||||
--lk|--left-keep-field-names {a,b,c} If supplied, this means keep only the specified field
|
||||
names from the left file. Automatically includes the join-field name(s). Helpful
|
||||
for when you only want a limited subset of information from the left file.
|
||||
--lp {text} Additional prefix for non-join output field names from
|
||||
the left file
|
||||
--rp {text} Additional prefix for non-join output field names from
|
||||
|
|
|
|||
|
|
@ -1248,6 +1248,9 @@ VERBS
|
|||
defaults to -j values if omitted.
|
||||
-r {a,b,c} Comma-separated join-field names for right input file(s);
|
||||
defaults to -j values if omitted.
|
||||
--lk|--left-keep-field-names {a,b,c} If supplied, this means keep only the specified field
|
||||
names from the left file. Automatically includes the join-field name(s). Helpful
|
||||
for when you only want a limited subset of information from the left file.
|
||||
--lp {text} Additional prefix for non-join output field names from
|
||||
the left file
|
||||
--rp {text} Additional prefix for non-join output field names from
|
||||
|
|
|
|||
|
|
@ -1572,6 +1572,9 @@ Options:
|
|||
defaults to -j values if omitted.
|
||||
-r {a,b,c} Comma-separated join-field names for right input file(s);
|
||||
defaults to -j values if omitted.
|
||||
--lk|--left-keep-field-names {a,b,c} If supplied, this means keep only the specified field
|
||||
names from the left file. Automatically includes the join-field name(s). Helpful
|
||||
for when you only want a limited subset of information from the left file.
|
||||
--lp {text} Additional prefix for non-join output field names from
|
||||
the left file
|
||||
--rp {text} Additional prefix for non-join output field names from
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ type tJoinOptions struct {
|
|||
rightPrefix string
|
||||
|
||||
outputJoinFieldNames []string
|
||||
leftKeepFieldNames []string
|
||||
leftJoinFieldNames []string
|
||||
rightJoinFieldNames []string
|
||||
|
||||
|
|
@ -57,6 +58,7 @@ func newJoinOptions() *tJoinOptions {
|
|||
rightPrefix: "",
|
||||
|
||||
outputJoinFieldNames: nil,
|
||||
leftKeepFieldNames: nil,
|
||||
leftJoinFieldNames: nil,
|
||||
rightJoinFieldNames: nil,
|
||||
|
||||
|
|
@ -89,6 +91,9 @@ func transformerJoinUsage(
|
|||
fmt.Fprintf(o, " defaults to -j values if omitted.\n")
|
||||
fmt.Fprintf(o, " -r {a,b,c} Comma-separated join-field names for right input file(s);\n")
|
||||
fmt.Fprintf(o, " defaults to -j values if omitted.\n")
|
||||
fmt.Fprintf(o, " --lk|--left-keep-field-names {a,b,c} If supplied, this means keep only the specified field\n")
|
||||
fmt.Fprintf(o, " names from the left file. Automatically includes the join-field name(s). Helpful\n")
|
||||
fmt.Fprintf(o, " for when you only want a limited subset of information from the left file.\n")
|
||||
fmt.Fprintf(o, " --lp {text} Additional prefix for non-join output field names from\n")
|
||||
fmt.Fprintf(o, " the left file\n")
|
||||
fmt.Fprintf(o, " --rp {text} Additional prefix for non-join output field names from\n")
|
||||
|
|
@ -185,6 +190,9 @@ func transformerJoinParseCLI(
|
|||
} else if opt == "-l" {
|
||||
opts.leftJoinFieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
|
||||
|
||||
} else if opt == "--lk" || opt == "--left-keep-field-names" {
|
||||
opts.leftKeepFieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
|
||||
|
||||
} else if opt == "-r" {
|
||||
opts.rightJoinFieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
|
||||
|
||||
|
|
@ -280,8 +288,9 @@ func transformerJoinParseCLI(
|
|||
type TransformerJoin struct {
|
||||
opts *tJoinOptions
|
||||
|
||||
leftFieldNameSet map[string]bool
|
||||
rightFieldNameSet map[string]bool
|
||||
leftFieldNameSet map[string]bool
|
||||
rightFieldNameSet map[string]bool
|
||||
leftKeepFieldNameSet map[string]bool
|
||||
|
||||
// For unsorted/half-streaming input
|
||||
ingested bool
|
||||
|
|
@ -302,14 +311,23 @@ func NewTransformerJoin(
|
|||
tr := &TransformerJoin{
|
||||
opts: opts,
|
||||
|
||||
leftFieldNameSet: lib.StringListToSet(opts.leftJoinFieldNames),
|
||||
rightFieldNameSet: lib.StringListToSet(opts.rightJoinFieldNames),
|
||||
leftFieldNameSet: lib.StringListToSet(opts.leftJoinFieldNames),
|
||||
rightFieldNameSet: lib.StringListToSet(opts.rightJoinFieldNames),
|
||||
leftKeepFieldNameSet: lib.StringListToSet(opts.leftKeepFieldNames),
|
||||
|
||||
ingested: false,
|
||||
leftBucketsByJoinFieldValues: nil,
|
||||
leftUnpairableRecordsAndContexts: nil,
|
||||
joinBucketKeeper: nil,
|
||||
}
|
||||
// Suppose left file has "id,foo,bar" and right has "id,baz,quux" and the join field name is
|
||||
// "id". If they ask for --lk id,foo we should keep only id,foo from the left file. But if
|
||||
// they ask for --lk foo we should keep id *and* foo fromn the left file.
|
||||
if tr.leftKeepFieldNameSet != nil {
|
||||
for _, name := range opts.leftJoinFieldNames {
|
||||
tr.leftKeepFieldNameSet[name] = true
|
||||
}
|
||||
}
|
||||
|
||||
if opts.allowUnsortedInput {
|
||||
// Half-streaming (default) case: ingest entire left file first.
|
||||
|
|
@ -325,10 +343,11 @@ func NewTransformerJoin(
|
|||
// too much RAM.
|
||||
|
||||
tr.joinBucketKeeper = utils.NewJoinBucketKeeper(
|
||||
// opts.prepipe,
|
||||
// opts.prepipe,
|
||||
opts.leftFileName,
|
||||
&opts.joinFlagOptions.ReaderOptions,
|
||||
opts.leftJoinFieldNames,
|
||||
tr.leftKeepFieldNameSet,
|
||||
)
|
||||
|
||||
tr.recordTransformerFunc = tr.transformDoublyStreaming
|
||||
|
|
@ -346,7 +365,8 @@ func (tr *TransformerJoin) Transform(
|
|||
outputDownstreamDoneChannel chan<- bool,
|
||||
) {
|
||||
HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
|
||||
tr.recordTransformerFunc(inrecAndContext, outputRecordsAndContexts, inputDownstreamDoneChannel, outputDownstreamDoneChannel)
|
||||
tr.recordTransformerFunc(inrecAndContext, outputRecordsAndContexts,
|
||||
inputDownstreamDoneChannel, outputDownstreamDoneChannel)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
|
@ -500,6 +520,7 @@ func (tr *TransformerJoin) ingestLeftFile() {
|
|||
// TODO: temp for batch-reader refactor
|
||||
lib.InternalCodingErrorIf(leftrecsAndContexts.Len() != 1)
|
||||
leftrecAndContext := leftrecsAndContexts.Front().Value.(*types.RecordAndContext)
|
||||
leftrecAndContext.Record = utils.KeepLeftFieldNames(leftrecAndContext.Record, tr.leftKeepFieldNameSet)
|
||||
|
||||
if leftrecAndContext.EndOfStream {
|
||||
done = true
|
||||
|
|
|
|||
|
|
@ -131,7 +131,8 @@ type JoinBucketKeeper struct {
|
|||
// TODO: merge with leof flag
|
||||
recordReaderDone bool
|
||||
|
||||
leftJoinFieldNames []string
|
||||
leftJoinFieldNames []string
|
||||
leftKeepFieldNameSet map[string]bool
|
||||
|
||||
// Given a left-file of the following form (with left-join-field name "L"):
|
||||
// +-----+
|
||||
|
|
@ -163,6 +164,7 @@ func NewJoinBucketKeeper(
|
|||
leftFileName string,
|
||||
joinReaderOptions *cli.TReaderOptions,
|
||||
leftJoinFieldNames []string,
|
||||
leftKeepFieldNameSet map[string]bool,
|
||||
) *JoinBucketKeeper {
|
||||
|
||||
// Instantiate the record-reader
|
||||
|
|
@ -194,7 +196,8 @@ func NewJoinBucketKeeper(
|
|||
errorChannel: errorChannel,
|
||||
recordReaderDone: false,
|
||||
|
||||
leftJoinFieldNames: leftJoinFieldNames,
|
||||
leftJoinFieldNames: leftJoinFieldNames,
|
||||
leftKeepFieldNameSet: leftKeepFieldNameSet,
|
||||
|
||||
JoinBucket: NewJoinBucket(nil),
|
||||
peekRecordAndContext: nil,
|
||||
|
|
@ -575,6 +578,7 @@ func (keeper *JoinBucketKeeper) readRecord() *types.RecordAndContext {
|
|||
// TODO: temp
|
||||
lib.InternalCodingErrorIf(leftrecsAndContexts.Len() != 1)
|
||||
leftrecAndContext := leftrecsAndContexts.Front().Value.(*types.RecordAndContext)
|
||||
leftrecAndContext.Record = KeepLeftFieldNames(leftrecAndContext.Record, keeper.leftKeepFieldNameSet)
|
||||
if leftrecAndContext.EndOfStream { // end-of-stream marker
|
||||
keeper.recordReaderDone = true
|
||||
return nil
|
||||
|
|
@ -624,3 +628,26 @@ func compareLexically(
|
|||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// KeepLeftFieldNames is for when the user wants only selected fields out of the left file.
|
||||
func KeepLeftFieldNames(
|
||||
inrec *mlrval.Mlrmap,
|
||||
leftKeepFieldNameSet map[string]bool,
|
||||
) *mlrval.Mlrmap {
|
||||
if inrec == nil {
|
||||
return inrec
|
||||
} else if leftKeepFieldNameSet == nil {
|
||||
// Normal case
|
||||
return inrec
|
||||
} else {
|
||||
outrec := mlrval.NewMlrmap()
|
||||
for pe := inrec.Head; pe != nil; pe = pe.Next {
|
||||
if leftKeepFieldNameSet[pe.Key] {
|
||||
// PutReference, not PutCopy, since the inrec will be freed and this
|
||||
// is an ownership transfer.
|
||||
outrec.PutReference(pe.Key, pe.Value)
|
||||
}
|
||||
}
|
||||
return outrec
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1248,6 +1248,9 @@ VERBS
|
|||
defaults to -j values if omitted.
|
||||
-r {a,b,c} Comma-separated join-field names for right input file(s);
|
||||
defaults to -j values if omitted.
|
||||
--lk|--left-keep-field-names {a,b,c} If supplied, this means keep only the specified field
|
||||
names from the left file. Automatically includes the join-field name(s). Helpful
|
||||
for when you only want a limited subset of information from the left file.
|
||||
--lp {text} Additional prefix for non-join output field names from
|
||||
the left file
|
||||
--rp {text} Additional prefix for non-join output field names from
|
||||
|
|
|
|||
|
|
@ -1573,6 +1573,9 @@ Options:
|
|||
defaults to -j values if omitted.
|
||||
-r {a,b,c} Comma-separated join-field names for right input file(s);
|
||||
defaults to -j values if omitted.
|
||||
--lk|--left-keep-field-names {a,b,c} If supplied, this means keep only the specified field
|
||||
names from the left file. Automatically includes the join-field name(s). Helpful
|
||||
for when you only want a limited subset of information from the left file.
|
||||
--lp {text} Additional prefix for non-join output field names from
|
||||
the left file
|
||||
--rp {text} Additional prefix for non-join output field names from
|
||||
|
|
|
|||
|
|
@ -430,6 +430,9 @@ Options:
|
|||
defaults to -j values if omitted.
|
||||
-r {a,b,c} Comma-separated join-field names for right input file(s);
|
||||
defaults to -j values if omitted.
|
||||
--lk|--left-keep-field-names {a,b,c} If supplied, this means keep only the specified field
|
||||
names from the left file. Automatically includes the join-field name(s). Helpful
|
||||
for when you only want a limited subset of information from the left file.
|
||||
--lp {text} Additional prefix for non-join output field names from
|
||||
the left file
|
||||
--rp {text} Additional prefix for non-join output field names from
|
||||
|
|
|
|||
1
test/cases/verb-join/0213/cmd
Normal file
1
test/cases/verb-join/0213/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --csv join -j a -f test/input/join-left-keep test/input/join-right-keep
|
||||
0
test/cases/verb-join/0213/experr
Normal file
0
test/cases/verb-join/0213/experr
Normal file
3
test/cases/verb-join/0213/expout
Normal file
3
test/cases/verb-join/0213/expout
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
a,b,c,d,x,y,z
|
||||
1,2,3,4,5,6,7
|
||||
1,2,3,4,7,8,9
|
||||
1
test/cases/verb-join/0214/cmd
Normal file
1
test/cases/verb-join/0214/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --csv join --lk b -j a -f test/input/join-left-keep test/input/join-right-keep
|
||||
0
test/cases/verb-join/0214/experr
Normal file
0
test/cases/verb-join/0214/experr
Normal file
3
test/cases/verb-join/0214/expout
Normal file
3
test/cases/verb-join/0214/expout
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
a,b,x,y,z
|
||||
1,2,5,6,7
|
||||
1,2,7,8,9
|
||||
1
test/cases/verb-join/0215/cmd
Normal file
1
test/cases/verb-join/0215/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --csv join --lk a -j a -f test/input/join-left-keep test/input/join-right-keep
|
||||
0
test/cases/verb-join/0215/experr
Normal file
0
test/cases/verb-join/0215/experr
Normal file
3
test/cases/verb-join/0215/expout
Normal file
3
test/cases/verb-join/0215/expout
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
a,x,y,z
|
||||
1,5,6,7
|
||||
1,7,8,9
|
||||
1
test/cases/verb-join/0216/cmd
Normal file
1
test/cases/verb-join/0216/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --csv join --lk a,b -j a -f test/input/join-left-keep test/input/join-right-keep
|
||||
0
test/cases/verb-join/0216/experr
Normal file
0
test/cases/verb-join/0216/experr
Normal file
3
test/cases/verb-join/0216/expout
Normal file
3
test/cases/verb-join/0216/expout
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
a,b,x,y,z
|
||||
1,2,5,6,7
|
||||
1,2,7,8,9
|
||||
1
test/cases/verb-join/0217/cmd
Normal file
1
test/cases/verb-join/0217/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --csv join -s -j a -f test/input/join-left-keep test/input/join-right-keep
|
||||
0
test/cases/verb-join/0217/experr
Normal file
0
test/cases/verb-join/0217/experr
Normal file
3
test/cases/verb-join/0217/expout
Normal file
3
test/cases/verb-join/0217/expout
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
a,b,c,d,x,y,z
|
||||
1,2,3,4,5,6,7
|
||||
1,2,3,4,7,8,9
|
||||
1
test/cases/verb-join/0218/cmd
Normal file
1
test/cases/verb-join/0218/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --csv join -s --lk b -j a -f test/input/join-left-keep test/input/join-right-keep
|
||||
0
test/cases/verb-join/0218/experr
Normal file
0
test/cases/verb-join/0218/experr
Normal file
3
test/cases/verb-join/0218/expout
Normal file
3
test/cases/verb-join/0218/expout
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
a,b,x,y,z
|
||||
1,2,5,6,7
|
||||
1,2,7,8,9
|
||||
1
test/cases/verb-join/0219/cmd
Normal file
1
test/cases/verb-join/0219/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --csv join -s --lk a -j a -f test/input/join-left-keep test/input/join-right-keep
|
||||
0
test/cases/verb-join/0219/experr
Normal file
0
test/cases/verb-join/0219/experr
Normal file
3
test/cases/verb-join/0219/expout
Normal file
3
test/cases/verb-join/0219/expout
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
a,x,y,z
|
||||
1,5,6,7
|
||||
1,7,8,9
|
||||
1
test/cases/verb-join/0220/cmd
Normal file
1
test/cases/verb-join/0220/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --csv join -s --lk a,b -j a -f test/input/join-left-keep test/input/join-right-keep
|
||||
0
test/cases/verb-join/0220/experr
Normal file
0
test/cases/verb-join/0220/experr
Normal file
3
test/cases/verb-join/0220/expout
Normal file
3
test/cases/verb-join/0220/expout
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
a,b,x,y,z
|
||||
1,2,5,6,7
|
||||
1,2,7,8,9
|
||||
3
test/input/join-left-keep
Normal file
3
test/input/join-left-keep
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
a,b,c,d
|
||||
1,2,3,4
|
||||
5,6,7,8
|
||||
5
test/input/join-right-keep
Normal file
5
test/input/join-right-keep
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
a,x,y,z
|
||||
1,5,6,7
|
||||
1,7,8,9
|
||||
2,6,7,8
|
||||
2,8,9,9
|
||||
7
todo.txt
7
todo.txt
|
|
@ -5,13 +5,8 @@ RELEASES
|
|||
o mlr join --left-fields a,b,c
|
||||
o fmt/unfmt/regex doc
|
||||
o FAQ/examples reorg
|
||||
? ??? for doc-string contents search -- ? & mlr help namegoeshere foo ...
|
||||
|
||||
o several needs-doc issues
|
||||
i https://github.com/johnkerl/miller/issues?q=is%3Aissue+is%3Aopen+label%3Aneeds-documentation
|
||||
- 908
|
||||
:context
|
||||
https://github.com/johnkerl/miller/issues/908#issuecomment-1032573038 NFR
|
||||
o https://github.com/johnkerl/miller/issues?q=is%3Aissue+is%3Aopen+label%3Aneeds-documentation
|
||||
|
||||
k strptime/882
|
||||
k fmtifnum, & recursive fmtnum/fmtifnum
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue