mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
git add some files missed on previous commits
This commit is contained in:
parent
632e8ef4c9
commit
7ccc2a460b
6 changed files with 345 additions and 0 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -89,3 +89,6 @@ mlr.exe
|
|||
mlr.linux.x86_64
|
||||
mlr.macosx
|
||||
msys-2.0.dll
|
||||
|
||||
data/big.*
|
||||
data/nmc?.*
|
||||
|
|
|
|||
282
go/src/miller/mappers/cut.go
Normal file
282
go/src/miller/mappers/cut.go
Normal file
|
|
@ -0,0 +1,282 @@
|
|||
package mappers
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"miller/clitypes"
|
||||
"miller/containers"
|
||||
"miller/mapping"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
var CutSetup = mapping.MapperSetup{
|
||||
Verb: "cut",
|
||||
ParseCLIFunc: mapperCutParseCLI,
|
||||
IgnoresInput: false,
|
||||
}
|
||||
|
||||
func mapperCutParseCLI(
|
||||
pargi *int,
|
||||
argc int,
|
||||
args []string,
|
||||
errorHandling flag.ErrorHandling, // ContinueOnError or ExitOnError
|
||||
_ *clitypes.TReaderOptions,
|
||||
__ *clitypes.TWriterOptions,
|
||||
) mapping.IRecordMapper {
|
||||
|
||||
// Get the verb name from the current spot in the mlr command line
|
||||
argi := *pargi
|
||||
verb := args[argi]
|
||||
argi++
|
||||
|
||||
// Parse local flags
|
||||
flagSet := flag.NewFlagSet(verb, errorHandling)
|
||||
|
||||
pFieldNames := flagSet.String(
|
||||
"f",
|
||||
"",
|
||||
"Comma-separated field names for cut, e.g. a,b,c",
|
||||
)
|
||||
|
||||
pDoArgOrder := flagSet.Bool(
|
||||
"o",
|
||||
false,
|
||||
`Retain fields in the order specified here in the argument list.
|
||||
Default is to retain them in the order found in the input data.`,
|
||||
)
|
||||
|
||||
pDoComplement := flagSet.Bool(
|
||||
"x",
|
||||
false,
|
||||
"Exclude, rather than include, field names specified by -f.\n",
|
||||
)
|
||||
|
||||
pDoComplementLong := flagSet.Bool(
|
||||
"complement",
|
||||
false,
|
||||
"Synonymous with -x",
|
||||
)
|
||||
|
||||
// ap_define_true_flag(pstate, "-r", &do_regexes);
|
||||
// fmt.Fprintf(o, "-r Treat field names as regular expressions. \"ab\", \"a.*b\" will\n");
|
||||
// fmt.Fprintf(o, " match any field name containing the substring \"ab\" or matching\n");
|
||||
// fmt.Fprintf(o, " \"a.*b\", respectively; anchors of the form \"^ab$\", \"^a.*b$\" may\n");
|
||||
// fmt.Fprintf(o, " be used. The -o flag is ignored when -r is present.\n");
|
||||
|
||||
flagSet.Usage = func() {
|
||||
ostream := os.Stderr
|
||||
if errorHandling == flag.ContinueOnError { // help intentionally requested
|
||||
ostream = os.Stdout
|
||||
}
|
||||
mapperCutUsage(ostream, args[0], verb, flagSet)
|
||||
}
|
||||
flagSet.Parse(args[argi:])
|
||||
if errorHandling == flag.ContinueOnError { // help intentioally requested
|
||||
return nil
|
||||
}
|
||||
|
||||
if *pFieldNames == "" {
|
||||
flagSet.Usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Find out how many flags were consumed by this verb and advance for the
|
||||
// next verb
|
||||
argi = len(args) - len(flagSet.Args())
|
||||
|
||||
fieldNameList := strings.Split(*pFieldNames, ",")
|
||||
mapper, _ := NewMapperCut(
|
||||
fieldNameList,
|
||||
*pDoArgOrder,
|
||||
*pDoComplement,
|
||||
*pDoComplementLong,
|
||||
)
|
||||
|
||||
*pargi = argi
|
||||
return mapper
|
||||
}
|
||||
|
||||
func mapperCutUsage(
|
||||
o *os.File,
|
||||
argv0 string,
|
||||
verb string,
|
||||
flagSet *flag.FlagSet,
|
||||
) {
|
||||
fmt.Fprintf(o, "Usage: %s %s [options]\n", argv0, verb)
|
||||
fmt.Fprintf(o, "Passes through input records with specified fields included/excluded.\n")
|
||||
// flagSet.PrintDefaults() doesn't let us control stdout vs stderr
|
||||
flagSet.VisitAll(func(f *flag.Flag) {
|
||||
fmt.Fprintf(o, " -%v (default %v) %v\n", f.Name, f.Value, f.Usage) // f.Name, f.Value
|
||||
})
|
||||
|
||||
fmt.Fprintf(o, "Examples:\n")
|
||||
fmt.Fprintf(o, " %s %s -f hostname,status\n", argv0, verb)
|
||||
fmt.Fprintf(o, " %s %s -x -f hostname,status\n", argv0, verb)
|
||||
// fmt.Fprintf(o, " %s %s -r -f '^status$,sda[0-9]'\n", argv0, verb);
|
||||
// fmt.Fprintf(o, " %s %s -r -f '^status$,\"sda[0-9]\"'\n", argv0, verb);
|
||||
// fmt.Fprintf(o, " %s %s -r -f '^status$,\"sda[0-9]\"i' (this is case-insensitive)\n", argv0, verb);
|
||||
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
type MapperCut struct {
|
||||
fieldNameList []string
|
||||
fieldNameSet map[string]bool
|
||||
doArgOrder bool
|
||||
doComplement bool
|
||||
doComplementLong bool
|
||||
}
|
||||
|
||||
func NewMapperCut(
|
||||
fieldNameList []string,
|
||||
doArgOrder bool,
|
||||
doComplement bool,
|
||||
doComplementLong bool,
|
||||
) (*MapperCut, error) {
|
||||
|
||||
// xxx make/find-reuse util func
|
||||
fieldNameSet := make(map[string]bool)
|
||||
for _, fieldName := range fieldNameList {
|
||||
fieldNameSet[fieldName] = true
|
||||
}
|
||||
|
||||
return &MapperCut{
|
||||
fieldNameList: fieldNameList,
|
||||
fieldNameSet: fieldNameSet,
|
||||
doArgOrder: doArgOrder,
|
||||
doComplement: doComplement,
|
||||
doComplementLong: doComplementLong,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// if (!do_regexes) {
|
||||
// pstate->pfield_name_list = pfield_name_list;
|
||||
// slls_reverse(pstate->pfield_name_list);
|
||||
// pstate->pfield_name_set = hss_from_slls(pfield_name_list);
|
||||
// pstate->nregex = 0;
|
||||
// pstate->regexes = NULL;
|
||||
// pmapper->pprocess_func = mapper_cut_process_no_regexes;
|
||||
// } else {
|
||||
// pstate->pfield_name_list = NULL;
|
||||
// pstate->pfield_name_set = NULL;
|
||||
// pstate->nregex = pfield_name_list->length;
|
||||
// pstate->regexes = mlr_malloc_or_die(pstate->nregex * sizeof(regex_t));
|
||||
// int i = 0;
|
||||
// for (sllse_t* pe = pfield_name_list->phead; pe != NULL; pe = pe->pnext, i++) {
|
||||
// // Let them type in a.*b if they want, or "a.*b", or "a.*b"i.
|
||||
// // Strip off the leading " and trailing " or "i.
|
||||
// regcomp_or_die_quoted(&pstate->regexes[i], pe->value, REG_NOSUB);
|
||||
// }
|
||||
// slls_free(pfield_name_list);
|
||||
// pmapper->pprocess_func = mapper_cut_process_with_regexes;
|
||||
// }
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
func (this *MapperCut) Map(
|
||||
inrecAndContext *containers.LrecAndContext,
|
||||
outrecsAndContexts chan<- *containers.LrecAndContext,
|
||||
) {
|
||||
if !this.doComplement {
|
||||
if !this.doArgOrder {
|
||||
this.includeWithInputOrder(inrecAndContext, outrecsAndContexts)
|
||||
} else {
|
||||
this.includeWithArgOrder(inrecAndContext, outrecsAndContexts)
|
||||
}
|
||||
} else {
|
||||
this.exclude(inrecAndContext, outrecsAndContexts)
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// mlr cut -f a,b,c
|
||||
func (this *MapperCut) includeWithInputOrder(
|
||||
inrecAndContext *containers.LrecAndContext,
|
||||
outrecsAndContexts chan<- *containers.LrecAndContext,
|
||||
) {
|
||||
inrec := inrecAndContext.Lrec
|
||||
if inrec != nil { // not end of record stream
|
||||
outrec := containers.NewLrec()
|
||||
for pe := inrec.Head; pe != nil; pe = pe.Next {
|
||||
fieldName := *pe.Key
|
||||
_, wanted := this.fieldNameSet[fieldName]
|
||||
if wanted {
|
||||
outrec.Put(&fieldName, pe.Value) // inrec will be GC'ed
|
||||
}
|
||||
}
|
||||
outrecAndContext := containers.NewLrecAndContext(outrec, &inrecAndContext.Context)
|
||||
outrecsAndContexts <- outrecAndContext
|
||||
} else {
|
||||
outrecsAndContexts <- inrecAndContext
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// mlr cut -o -f a,b,c
|
||||
func (this *MapperCut) includeWithArgOrder(
|
||||
inrecAndContext *containers.LrecAndContext,
|
||||
outrecsAndContexts chan<- *containers.LrecAndContext,
|
||||
) {
|
||||
inrec := inrecAndContext.Lrec
|
||||
if inrec != nil { // not end of record stream
|
||||
outrec := containers.NewLrec()
|
||||
for _, fieldName := range this.fieldNameList {
|
||||
value := inrec.Get(&fieldName)
|
||||
if value != nil {
|
||||
outrec.Put(&fieldName, value)
|
||||
}
|
||||
}
|
||||
outrecAndContext := containers.NewLrecAndContext(outrec, &inrecAndContext.Context)
|
||||
outrecsAndContexts <- outrecAndContext
|
||||
} else {
|
||||
outrecsAndContexts <- inrecAndContext
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// mlr cut -x -f a,b,c
|
||||
func (this *MapperCut) exclude(
|
||||
inrecAndContext *containers.LrecAndContext,
|
||||
outrecsAndContexts chan<- *containers.LrecAndContext,
|
||||
) {
|
||||
inrec := inrecAndContext.Lrec
|
||||
if inrec != nil { // not end of record stream
|
||||
for _, fieldName := range this.fieldNameList {
|
||||
if inrec.Has(&fieldName) {
|
||||
inrec.Remove(&fieldName)
|
||||
}
|
||||
}
|
||||
}
|
||||
outrecsAndContexts <- inrecAndContext
|
||||
}
|
||||
|
||||
//// ----------------------------------------------------------------
|
||||
//static sllv_t* mapper_cut_process_with_regexes(lrec_t* pinrec, context_t* pctx, void* pvstate) {
|
||||
// if (pinrec != NULL) {
|
||||
// mapper_cut_state_t* pstate = (mapper_cut_state_t*)pvstate;
|
||||
// // Loop over the record and free the fields to be discarded, being
|
||||
// // careful about the fact that we're modifying what we're looping over.
|
||||
// for (lrece_t* pe = pinrec->phead; pe != NULL; /* next in loop */) {
|
||||
// int matches_any = FALSE;
|
||||
// for (int i = 0; i < pstate->nregex; i++) {
|
||||
// if (regmatch_or_die(&pstate->regexes[i], pe->key, 0, NULL)) {
|
||||
// matches_any = TRUE;
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
// if (matches_any ^ pstate->do_complement) {
|
||||
// pe = pe->pnext;
|
||||
// } else {
|
||||
// lrece_t* pf = pe->pnext;
|
||||
// lrec_remove(pinrec, pe->key);
|
||||
// pe = pf;
|
||||
// }
|
||||
// }
|
||||
// return sllv_single(pinrec);
|
||||
// }
|
||||
// else {
|
||||
// return sllv_single(NULL);
|
||||
// }
|
||||
//}
|
||||
17
go/u/try-perf
Executable file
17
go/u/try-perf
Executable file
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
n=10
|
||||
|
||||
repeat $n justtime mlr cat ../data/nmc1.dkvp > /dev/null
|
||||
echo
|
||||
repeat $n justtime mlr --csv cat ../data/nmc1.csv > /dev/null
|
||||
echo
|
||||
repeat $n justtime mlr --csv cut -x -f ccode,version ../data/nmc1.csv > /dev/null
|
||||
echo
|
||||
echo
|
||||
repeat $n justtime ../c/mlr cat ../data/nmc1.dkvp > /dev/null
|
||||
echo
|
||||
repeat $n justtime ../c/mlr --csv cat ../data/nmc1.csv > /dev/null
|
||||
echo
|
||||
repeat $n justtime ../c/mlr --csv cut -x -f ccode,version ../data/nmc1.csv > /dev/null
|
||||
|
||||
15
go/u/try-verbs
Executable file
15
go/u/try-verbs
Executable file
|
|
@ -0,0 +1,15 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
run_mlr() {
|
||||
# Use just "mlr" for info messages
|
||||
echo
|
||||
echo ----------------------------------------------------------------
|
||||
echo mlr "$@"
|
||||
mlr "$@"
|
||||
}
|
||||
|
||||
run_mlr --opprint cut -f x,a u/s.dkvp
|
||||
run_mlr --opprint cut -o -f x,a u/s.dkvp
|
||||
run_mlr --opprint cut -x -f x,a u/s.dkvp
|
||||
24
go/u/try-verbs.out
Normal file
24
go/u/try-verbs.out
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
|
||||
----------------------------------------------------------------
|
||||
mlr --opprint cut -f x,a u/s.dkvp
|
||||
a x
|
||||
pan 0.3467901443380824
|
||||
eks 0.7586799647899636
|
||||
wye 0.20460330576630303
|
||||
eks 0.38139939387114097
|
||||
|
||||
----------------------------------------------------------------
|
||||
mlr --opprint cut -o -f x,a u/s.dkvp
|
||||
x a
|
||||
0.3467901443380824 pan
|
||||
0.7586799647899636 eks
|
||||
0.20460330576630303 wye
|
||||
0.38139939387114097 eks
|
||||
|
||||
----------------------------------------------------------------
|
||||
mlr --opprint cut -x -f x,a u/s.dkvp
|
||||
b i y
|
||||
pan 1 0.7268028627434533
|
||||
pan 2 0.5221511083334797
|
||||
wye 3 0.33831852551664776
|
||||
wye 4 0.13418874328430463
|
||||
|
|
@ -22,3 +22,7 @@ cutgo: cutgo.go
|
|||
go build cutgo.go
|
||||
cutnim: cutnim.nim
|
||||
nim compile cutnim.nim
|
||||
|
||||
|
||||
clean:
|
||||
-@rm -f catc catc0 catm catrust catgo catnim cutgo cutnim catd cutd
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue