mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
fix nidx reader for null fields
This commit is contained in:
parent
212b6554a5
commit
ddda357649
12 changed files with 83 additions and 26 deletions
|
|
@ -230,7 +230,7 @@ cli_opts_t* parse_command_line(int argc, char** argv) {
|
|||
// xxx use lookup tables?
|
||||
if (streq(rdesc, "dkvp")) popts->preader = reader_dkvp_alloc(popts->irs, popts->ifs, popts->ips, popts->allow_repeat_ifs);
|
||||
else if (streq(rdesc, "csv")) popts->preader = reader_csv_alloc(popts->irs, popts->ifs, popts->allow_repeat_ifs);
|
||||
else if (streq(rdesc, "nidx")) popts->preader = reader_nidx_alloc(popts->irs, popts->ifs);
|
||||
else if (streq(rdesc, "nidx")) popts->preader = reader_nidx_alloc(popts->irs, popts->ifs, popts->allow_repeat_ifs);
|
||||
else if (streq(rdesc, "xtab")) popts->preader = reader_xtab_alloc(popts->ips, TRUE); // xxx parameterize allow_repeat_ips
|
||||
else {
|
||||
fprintf(stderr, "b01k!\n");
|
||||
|
|
|
|||
|
|
@ -84,19 +84,22 @@ lrec_t* lrec_parse_nidx(char* line, char ifs, int allow_repeat_ifs) {
|
|||
char* value = line;
|
||||
char free_flags = 0;
|
||||
|
||||
for (char* p = line; *p; p++) {
|
||||
for (char* p = line; *p; ) {
|
||||
if (*p == ifs) {
|
||||
*p = 0;
|
||||
|
||||
idx++;
|
||||
key = make_nidx_key(idx, &free_flags);
|
||||
lrec_put(prec, key, value, free_flags);
|
||||
|
||||
p++;
|
||||
// xxx hoist loop invariant at the cost of some code duplication
|
||||
if (allow_repeat_ifs) {
|
||||
while (*p == ifs)
|
||||
p++;
|
||||
}
|
||||
idx++;
|
||||
key = make_nidx_key(idx, &free_flags);
|
||||
lrec_put(prec, key, value, free_flags);
|
||||
value = p;
|
||||
} else {
|
||||
p++;
|
||||
}
|
||||
}
|
||||
idx++;
|
||||
|
|
|
|||
|
|
@ -6,18 +6,17 @@
|
|||
typedef struct _reader_nidx_state_t {
|
||||
char irs;
|
||||
char ifs;
|
||||
int allow_repeat_ifs;
|
||||
} reader_nidx_state_t;
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// xxx repeated ifs ...
|
||||
|
||||
static lrec_t* reader_nidx_func(FILE* input_stream, void* pvstate, context_t* pctx) {
|
||||
reader_nidx_state_t* pstate = pvstate;
|
||||
char* line = mlr_get_line(input_stream, pstate->irs);
|
||||
if (line == NULL)
|
||||
return NULL;
|
||||
else
|
||||
return lrec_parse_nidx(line, pstate->ifs, FALSE);
|
||||
return lrec_parse_nidx(line, pstate->ifs, pstate->allow_repeat_ifs);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -28,13 +27,14 @@ static void reset_nidx_func(void* pvstate) {
|
|||
static void reader_nidx_free_func(void* pvstate) {
|
||||
}
|
||||
|
||||
reader_t* reader_nidx_alloc(char irs, char ifs) {
|
||||
reader_t* reader_nidx_alloc(char irs, char ifs, int allow_repeat_ifs) {
|
||||
reader_t* preader = mlr_malloc_or_die(sizeof(reader_t));
|
||||
|
||||
reader_nidx_state_t* pstate = mlr_malloc_or_die(sizeof(reader_nidx_state_t));
|
||||
pstate->irs = irs;
|
||||
pstate->ifs = ifs;
|
||||
preader->pvstate = (void*)pstate;
|
||||
pstate->irs = irs;
|
||||
pstate->ifs = ifs;
|
||||
pstate->allow_repeat_ifs = allow_repeat_ifs;
|
||||
preader->pvstate = (void*)pstate;
|
||||
|
||||
preader->preader_func = &reader_nidx_func;
|
||||
preader->preset_func = &reset_nidx_func;
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
reader_t* reader_csv_alloc(char rs, char fs, int allow_repeat_ifs);
|
||||
reader_t* reader_dkvp_alloc(char rs, char fs, char ps, int allow_repeat_ifs);
|
||||
reader_t* reader_nidx_alloc(char rs, char fs);
|
||||
reader_t* reader_nidx_alloc(char rs, char fs, int allow_repeat_ifs);
|
||||
reader_t* reader_xtab_alloc(char ps, int allow_repeat_ips);
|
||||
|
||||
#endif // READERS_H
|
||||
|
|
|
|||
|
|
@ -1313,3 +1313,21 @@ m=8,n=9,o=10
|
|||
a=1,b=2,c=3
|
||||
a=4,b=5,c=6
|
||||
|
||||
|
||||
================================================================
|
||||
NULL-FIELD INPUT
|
||||
|
||||
./test/../mlr --icsv --odkvp cat test/input/null-fields.csv
|
||||
a=1,b=2,c=3,d=4,e=5
|
||||
a=6,b=,c=,d=,e=10
|
||||
a=,b=,c=,d=11,e=12
|
||||
a=13,b=14,c=,d=,e=
|
||||
a=,b=,c=,d=,e=
|
||||
|
||||
./test/../mlr --inidx --odkvp cat test/input/null-fields.nidx
|
||||
1=a,2=b,3=c,4=d,5=e
|
||||
1=f,2=,3=,4=,5=g
|
||||
1=,2=,3=,4=h,5=i
|
||||
1=j,2=k,3=,4=,5=
|
||||
1=,2=,3=,4=,5=
|
||||
|
||||
|
|
|
|||
6
c/test/input/null-fields.csv
Normal file
6
c/test/input/null-fields.csv
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
a,b,c,d,e
|
||||
1,2,3,4,5
|
||||
6,,,,10
|
||||
,,,11,12
|
||||
13,14,,,
|
||||
,,,,
|
||||
|
5
c/test/input/null-fields.nidx
Normal file
5
c/test/input/null-fields.nidx
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
a,b,c,d,e
|
||||
f,,,,g
|
||||
,,,h,i
|
||||
j,k,,,
|
||||
,,,,
|
||||
|
|
@ -1313,3 +1313,21 @@ m=8,n=9,o=10
|
|||
a=1,b=2,c=3
|
||||
a=4,b=5,c=6
|
||||
|
||||
|
||||
================================================================
|
||||
NULL-FIELD INPUT
|
||||
|
||||
./test/../mlr --icsv --odkvp cat test/input/null-fields.csv
|
||||
a=1,b=2,c=3,d=4,e=5
|
||||
a=6,b=,,c=,10
|
||||
a=,b=,,c=11,d=12
|
||||
a=13,b=14,c=,,d=
|
||||
a=,b=,,c=,
|
||||
|
||||
./test/../mlr --inidx --odkvp cat test/input/null-fields.nidx
|
||||
1=a,2=b,3=c,4=d,5=e
|
||||
1=f,2=,3=,4=,5=g
|
||||
1=,2=,3=,4=h,5=i
|
||||
1=j,2=k,3=,4=,5=
|
||||
1=,2=,3=,4=,5=
|
||||
|
||||
|
|
|
|||
10
c/test/run
10
c/test/run
|
|
@ -260,8 +260,14 @@ run_command $mlr --ipprint --odkvp cat test/input/f.pprint test/input/g.pprint
|
|||
run_command $mlr --ipprint --odkvp cat test/input/{a,b,c,d,e,f,g}.pprint
|
||||
|
||||
# ================================================================
|
||||
#diff $expdir/out $outdir/out
|
||||
diff -C5 $expdir/out $outdir/out
|
||||
announce NULL-FIELD INPUT
|
||||
|
||||
run_command $mlr --icsv --odkvp cat test/input/null-fields.csv
|
||||
run_command $mlr --inidx --odkvp cat test/input/null-fields.nidx
|
||||
|
||||
# ================================================================
|
||||
diff $expdir/out $outdir/out
|
||||
#diff -C5 $expdir/out $outdir/out
|
||||
|
||||
# ================================================================
|
||||
echo ALL REGRESSION TESTS PASSED
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
! BUGFIXES !
|
||||
|
||||
* --ofmt ignored in put. perhaps best to reglobalize.
|
||||
* nidx not handling empty values -- ?
|
||||
* nidx not handling empty values -- ? same bug w/ csv. make UTs.
|
||||
|
||||
================================================================
|
||||
FEATURES
|
||||
|
|
|
|||
|
|
@ -1,9 +0,0 @@
|
|||
cat flins.csv | mlr --icsv --oxtab stats1 -a min,avg,max -f eq_site_deductible,hu_site_deductible,fl_site_deductible,fr_site_deductible
|
||||
echo
|
||||
cat flins.csv | mlr --icsv --oxtab stats1 -a min,avg,max -f eq_site_deductible,hu_site_deductible,fl_site_deductible,fr_site_deductible -g county
|
||||
echo
|
||||
cat flins.csv | mlr --icsv --opprint stats2 -a corr,linreg,r2 -f eq_site_deductible,tiv_2012 -g county
|
||||
echo
|
||||
cat flins.csv | mlr --icsv --opprint stats2 -a corr,linreg,r2 -f tiv_2011,tiv_2012
|
||||
echo
|
||||
cat flins.csv | mlr --icsv --opprint stats2 -a corr,linreg,r2 -f tiv_2011,tiv_2012 -g county
|
||||
10
name-ideas.txt
Normal file
10
name-ideas.txt
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
package name:
|
||||
* mohawk (more than awk ... but in some ways it's also less than awk)
|
||||
* mohoc
|
||||
* seneca (sibling tribe of the mohawk)
|
||||
* qoppa (key-value-pair)
|
||||
* qipper (key-value-pair)
|
||||
* miller (glenn or steve -- old-school-throwback style)
|
||||
* thomp (tool for ordered hash-maps)
|
||||
* thump (tool for ordered hash-maps)
|
||||
* prefer an invented name to make it more easily searchable
|
||||
Loading…
Add table
Add a link
Reference in a new issue