fix nidx reader for null fields

This commit is contained in:
John Kerl 2015-05-09 13:50:05 -07:00
parent 212b6554a5
commit ddda357649
12 changed files with 83 additions and 26 deletions

View file

@ -230,7 +230,7 @@ cli_opts_t* parse_command_line(int argc, char** argv) {
// xxx use lookup tables?
if (streq(rdesc, "dkvp")) popts->preader = reader_dkvp_alloc(popts->irs, popts->ifs, popts->ips, popts->allow_repeat_ifs);
else if (streq(rdesc, "csv")) popts->preader = reader_csv_alloc(popts->irs, popts->ifs, popts->allow_repeat_ifs);
else if (streq(rdesc, "nidx")) popts->preader = reader_nidx_alloc(popts->irs, popts->ifs);
else if (streq(rdesc, "nidx")) popts->preader = reader_nidx_alloc(popts->irs, popts->ifs, popts->allow_repeat_ifs);
else if (streq(rdesc, "xtab")) popts->preader = reader_xtab_alloc(popts->ips, TRUE); // xxx parameterize allow_repeat_ips
else {
fprintf(stderr, "b01k!\n");

View file

@ -84,19 +84,22 @@ lrec_t* lrec_parse_nidx(char* line, char ifs, int allow_repeat_ifs) {
char* value = line;
char free_flags = 0;
for (char* p = line; *p; p++) {
for (char* p = line; *p; ) {
if (*p == ifs) {
*p = 0;
idx++;
key = make_nidx_key(idx, &free_flags);
lrec_put(prec, key, value, free_flags);
p++;
// xxx hoist loop invariant at the cost of some code duplication
if (allow_repeat_ifs) {
while (*p == ifs)
p++;
}
idx++;
key = make_nidx_key(idx, &free_flags);
lrec_put(prec, key, value, free_flags);
value = p;
} else {
p++;
}
}
idx++;

View file

@ -6,18 +6,17 @@
typedef struct _reader_nidx_state_t {
char irs;
char ifs;
int allow_repeat_ifs;
} reader_nidx_state_t;
// ----------------------------------------------------------------
// xxx repeated ifs ...
static lrec_t* reader_nidx_func(FILE* input_stream, void* pvstate, context_t* pctx) {
reader_nidx_state_t* pstate = pvstate;
char* line = mlr_get_line(input_stream, pstate->irs);
if (line == NULL)
return NULL;
else
return lrec_parse_nidx(line, pstate->ifs, FALSE);
return lrec_parse_nidx(line, pstate->ifs, pstate->allow_repeat_ifs);
}
@ -28,13 +27,14 @@ static void reset_nidx_func(void* pvstate) {
static void reader_nidx_free_func(void* pvstate) {
}
reader_t* reader_nidx_alloc(char irs, char ifs) {
reader_t* reader_nidx_alloc(char irs, char ifs, int allow_repeat_ifs) {
reader_t* preader = mlr_malloc_or_die(sizeof(reader_t));
reader_nidx_state_t* pstate = mlr_malloc_or_die(sizeof(reader_nidx_state_t));
pstate->irs = irs;
pstate->ifs = ifs;
preader->pvstate = (void*)pstate;
pstate->irs = irs;
pstate->ifs = ifs;
pstate->allow_repeat_ifs = allow_repeat_ifs;
preader->pvstate = (void*)pstate;
preader->preader_func = &reader_nidx_func;
preader->preset_func = &reset_nidx_func;

View file

@ -4,7 +4,7 @@
reader_t* reader_csv_alloc(char rs, char fs, int allow_repeat_ifs);
reader_t* reader_dkvp_alloc(char rs, char fs, char ps, int allow_repeat_ifs);
reader_t* reader_nidx_alloc(char rs, char fs);
reader_t* reader_nidx_alloc(char rs, char fs, int allow_repeat_ifs);
reader_t* reader_xtab_alloc(char ps, int allow_repeat_ips);
#endif // READERS_H

View file

@ -1313,3 +1313,21 @@ m=8,n=9,o=10
a=1,b=2,c=3
a=4,b=5,c=6
================================================================
NULL-FIELD INPUT
./test/../mlr --icsv --odkvp cat test/input/null-fields.csv
a=1,b=2,c=3,d=4,e=5
a=6,b=,c=,d=,e=10
a=,b=,c=,d=11,e=12
a=13,b=14,c=,d=,e=
a=,b=,c=,d=,e=
./test/../mlr --inidx --odkvp cat test/input/null-fields.nidx
1=a,2=b,3=c,4=d,5=e
1=f,2=,3=,4=,5=g
1=,2=,3=,4=h,5=i
1=j,2=k,3=,4=,5=
1=,2=,3=,4=,5=

View file

@ -0,0 +1,6 @@
a,b,c,d,e
1,2,3,4,5
6,,,,10
,,,11,12
13,14,,,
,,,,
1 a b c d e
2 1 2 3 4 5
3 6 10
4 11 12
5 13 14
6

View file

@ -0,0 +1,5 @@
a,b,c,d,e
f,,,,g
,,,h,i
j,k,,,
,,,,

View file

@ -1313,3 +1313,21 @@ m=8,n=9,o=10
a=1,b=2,c=3
a=4,b=5,c=6
================================================================
NULL-FIELD INPUT
./test/../mlr --icsv --odkvp cat test/input/null-fields.csv
a=1,b=2,c=3,d=4,e=5
a=6,b=,,c=,10
a=,b=,,c=11,d=12
a=13,b=14,c=,,d=
a=,b=,,c=,
./test/../mlr --inidx --odkvp cat test/input/null-fields.nidx
1=a,2=b,3=c,4=d,5=e
1=f,2=,3=,4=,5=g
1=,2=,3=,4=h,5=i
1=j,2=k,3=,4=,5=
1=,2=,3=,4=,5=

View file

@ -260,8 +260,14 @@ run_command $mlr --ipprint --odkvp cat test/input/f.pprint test/input/g.pprint
run_command $mlr --ipprint --odkvp cat test/input/{a,b,c,d,e,f,g}.pprint
# ================================================================
#diff $expdir/out $outdir/out
diff -C5 $expdir/out $outdir/out
announce NULL-FIELD INPUT
run_command $mlr --icsv --odkvp cat test/input/null-fields.csv
run_command $mlr --inidx --odkvp cat test/input/null-fields.nidx
# ================================================================
diff $expdir/out $outdir/out
#diff -C5 $expdir/out $outdir/out
# ================================================================
echo ALL REGRESSION TESTS PASSED

View file

@ -2,7 +2,7 @@
! BUGFIXES !
* --ofmt ignored in put. perhaps best to reglobalize.
* nidx not handling empty values -- ?
* nidx not handling empty values -- ? same bug w/ csv. make UTs.
================================================================
FEATURES

View file

@ -1,9 +0,0 @@
cat flins.csv | mlr --icsv --oxtab stats1 -a min,avg,max -f eq_site_deductible,hu_site_deductible,fl_site_deductible,fr_site_deductible
echo
cat flins.csv | mlr --icsv --oxtab stats1 -a min,avg,max -f eq_site_deductible,hu_site_deductible,fl_site_deductible,fr_site_deductible -g county
echo
cat flins.csv | mlr --icsv --opprint stats2 -a corr,linreg,r2 -f eq_site_deductible,tiv_2012 -g county
echo
cat flins.csv | mlr --icsv --opprint stats2 -a corr,linreg,r2 -f tiv_2011,tiv_2012
echo
cat flins.csv | mlr --icsv --opprint stats2 -a corr,linreg,r2 -f tiv_2011,tiv_2012 -g county

10
name-ideas.txt Normal file
View file

@ -0,0 +1,10 @@
package name:
* mohawk (more than awk ... but in some ways it's also less than awk)
* mohoc
* seneca (sibling tribe of the mohawk)
* qoppa (key-value-pair)
* qipper (key-value-pair)
* miller (glenn or steve -- old-school-throwback style)
* thomp (tool for ordered hash-maps)
* thump (tool for ordered hash-maps)
* prefer an invented name to make it more easily searchable