diff --git a/c/mapping/mapper_stats1.c b/c/mapping/mapper_stats1.c index 59fb72293..4a3822f64 100644 --- a/c/mapping/mapper_stats1.c +++ b/c/mapping/mapper_stats1.c @@ -44,14 +44,14 @@ typedef struct _mapper_stats1_state_t { // ---------------------------------------------------------------- static void mapper_stats1_usage(FILE* o, char* argv0, char* verb); static mapper_t* mapper_stats1_parse_cli(int* pargi, int argc, char** argv); -static mapper_t* mapper_stats1_alloc(slls_t* paccumulator_names, slls_t* pvalue_field_names, slls_t* pgroup_by_field_names, - int do_iterative_stats); +static mapper_t* mapper_stats1_alloc(slls_t* paccumulator_names, slls_t* pvalue_field_names, + slls_t* pgroup_by_field_names, int do_iterative_stats); static void mapper_stats1_free(void* pvstate); static sllv_t* mapper_stats1_process(lrec_t* pinrec, context_t* pctx, void* pvstate); static lrec_t* mapper_stats1_ingest(lrec_t* pinrec, mapper_stats1_state_t* pstate); static sllv_t* mapper_stats1_emit_all(mapper_stats1_state_t* pstate); -static lrec_t* mapper_stats1_emit(mapper_stats1_state_t* pstate, lrec_t* poutrec, char* value_field_name, char* stats1_name, - lhmsv_t* acc_field_to_acc_state); +static lrec_t* mapper_stats1_emit(mapper_stats1_state_t* pstate, lrec_t* poutrec, + char* value_field_name, char* stats1_name, lhmsv_t* acc_field_to_acc_state); static stats1_t* stats1_count_alloc(char* value_field_name, char* stats1_name); static stats1_t* stats1_mode_alloc(char* value_field_name, char* stats1_name); @@ -133,7 +133,7 @@ static mapper_t* mapper_stats1_parse_cli(int* pargi, int argc, char** argv) { ap_define_string_list_flag(pstate, "-a", &paccumulator_names); ap_define_string_list_flag(pstate, "-f", &pvalue_field_names); ap_define_string_list_flag(pstate, "-g", &pgroup_by_field_names); - ap_define_true_flag(pstate, "-s", &do_iterative_stats); + ap_define_true_flag(pstate, "-s", &do_iterative_stats); if (!ap_parse(pstate, verb, pargi, argc, argv)) { mapper_stats1_usage(stderr, argv[0], verb); @@ -145,7 +145,8 @@ static mapper_t* mapper_stats1_parse_cli(int* pargi, int argc, char** argv) { return NULL; } - return mapper_stats1_alloc(paccumulator_names, pvalue_field_names, pgroup_by_field_names, do_iterative_stats); + return mapper_stats1_alloc(paccumulator_names, pvalue_field_names, pgroup_by_field_names, + do_iterative_stats); } // ---------------------------------------------------------------- @@ -404,7 +405,6 @@ static sllv_t* mapper_stats1_emit_all(mapper_stats1_state_t* pstate) { static lrec_t* mapper_stats1_emit(mapper_stats1_state_t* pstate, lrec_t* poutrec, char* value_field_name, char* stats1_name, lhmsv_t* acc_field_to_acc_state) { - // Add in fields such as x_sum=#, y_count=#, etc.: for (sllse_t* pe = pstate->paccumulator_names->phead; pe != NULL; pe = pe->pnext) { char* stats1_name = pe->value; diff --git a/c/mapping/mapper_stats2.c b/c/mapping/mapper_stats2.c index 8a9d09d2a..3381639a8 100644 --- a/c/mapping/mapper_stats2.c +++ b/c/mapping/mapper_stats2.c @@ -49,8 +49,10 @@ static mapper_t* mapper_stats2_alloc(slls_t* paccumulator_names, slls_t* pvalue_ slls_t* pgroup_by_field_names, int do_verbose, int do_iterative_stats); static void mapper_stats2_free(void* pvstate); static sllv_t* mapper_stats2_process(lrec_t* pinrec, context_t* pctx, void* pvstate); -static void mapper_stats2_ingest(lrec_t* pinrec, context_t* pctx, mapper_stats2_state_t* pstate); -static sllv_t* mapper_stats2_emit(mapper_stats2_state_t* pstate); +static lrec_t* mapper_stats2_ingest(lrec_t* pinrec, context_t* pctx, mapper_stats2_state_t* pstate); +static sllv_t* mapper_stats2_emit_all(mapper_stats2_state_t* pstate); +static void mapper_stats2_emit(mapper_stats2_state_t* pstate, lrec_t* pinrec, + char* value_field_name_1, char* value_field_name_2, lhmsv_t* acc_fields_to_acc_state); static stats2_t* make_stats2(char* value_field_name_1, char* value_field_name_2, char* stats2_name, int do_verbose); static stats2_t* stats2_linreg_ols_alloc(char* value_field_name_1, char* value_field_name_2, char* stats2_name, int do_verbose); @@ -95,6 +97,9 @@ static void mapper_stats2_usage(FILE* o, char* argv0, char* verb) { fprintf(o, " There must be an even number of names.\n"); fprintf(o, "-g {e,f,g} Optional group-by-field names.\n"); fprintf(o, "-v Print additional output for linreg-pca.\n"); + fprintf(o, "-s Print iterative stats. Useful in tail -f contexts (in which\n"); + fprintf(o, " case please avoid pprint-format output since end of input\n"); + fprintf(o, " stream will never be seen).\n"); fprintf(o, "Example: %s %s -a linreg-pca -f x,y\n", argv0, verb); fprintf(o, "Example: %s %s -a linreg-ols,r2 -f x,y -g size,shape\n", argv0, verb); fprintf(o, "Example: %s %s -a corr -f x,y\n", argv0, verb); @@ -114,6 +119,7 @@ static mapper_t* mapper_stats2_parse_cli(int* pargi, int argc, char** argv) { ap_define_string_list_flag(pstate, "-f", &pvalue_field_names); ap_define_string_list_flag(pstate, "-g", &pgroup_by_field_names); ap_define_true_flag(pstate, "-v", &do_verbose); + ap_define_true_flag(pstate, "-s", &do_iterative_stats); if (!ap_parse(pstate, verb, pargi, argc, argv)) { mapper_stats2_usage(stderr, argv[0], verb); @@ -165,26 +171,32 @@ static mapper_t* mapper_stats2_parse_cli(int* pargi, int argc, char** argv) { // } // ================================================================ -// ---------------------------------------------------------------- +// In the iterative case, add to the current record its current group's stats fields. +// In the non-iteratiive case, produce output only at end of input stream. static sllv_t* mapper_stats2_process(lrec_t* pinrec, context_t* pctx, void* pvstate) { mapper_stats2_state_t* pstate = pvstate; if (pinrec != NULL) { - mapper_stats2_ingest(pinrec, pctx, pstate); - lrec_free(pinrec); + lrec_t* poutrec = mapper_stats2_ingest(pinrec, pctx, pstate); + if (poutrec == NULL) { + lrec_free(pinrec); + return NULL; + } else { + return sllv_single(poutrec); + } + } else if (!pstate->do_iterative_stats) { + return mapper_stats2_emit_all(pstate); + } else { return NULL; } - else { - return mapper_stats2_emit(pstate); - } } // ---------------------------------------------------------------- -static void mapper_stats2_ingest(lrec_t* pinrec, context_t* pctx, mapper_stats2_state_t* pstate) { +static lrec_t* mapper_stats2_ingest(lrec_t* pinrec, context_t* pctx, mapper_stats2_state_t* pstate) { // ["s", "t"] slls_t* pgroup_by_field_values = mlr_selected_values_from_record(pinrec, pstate->pgroup_by_field_names); if (pgroup_by_field_values->length != pstate->pgroup_by_field_names->length) { slls_free(pgroup_by_field_values); - return; + return pstate->do_iterative_stats ? pinrec : NULL; } lhms2v_t* group_to_acc_field = lhmslv_get(pstate->groups, pgroup_by_field_values); @@ -231,13 +243,18 @@ static void mapper_stats2_ingest(lrec_t* pinrec, context_t* pctx, mapper_stats2_ double dval2 = mlr_double_from_string_or_die(sval2); pstats2->pingest_func(pstats2->pvstate, dval1, dval2); } + if (pstate->do_iterative_stats) { + mapper_stats2_emit(pstate, pinrec, value_field_name_1, value_field_name_2, + acc_fields_to_acc_state); + } } slls_free(pgroup_by_field_values); + return pstate->do_iterative_stats ? pinrec : NULL; } // ---------------------------------------------------------------- -static sllv_t* mapper_stats2_emit(mapper_stats2_state_t* pstate) { +static sllv_t* mapper_stats2_emit_all(mapper_stats2_state_t* pstate) { sllv_t* poutrecs = sllv_alloc(); for (lhmslve_t* pa = pstate->groups->phead; pa != NULL; pa = pa->pnext) { @@ -260,6 +277,9 @@ static sllv_t* mapper_stats2_emit(mapper_stats2_state_t* pstate) { char* value_field_name_2 = pd->key2; lhmsv_t* acc_fields_to_acc_state = pd->pvvalue; + mapper_stats2_emit(pstate, poutrec, value_field_name_1, value_field_name_2, + acc_fields_to_acc_state); + // For "corr", "linreg" for (lhmsve_t* pe = acc_fields_to_acc_state->phead; pe != NULL; pe = pe->pnext) { stats2_t* pstats2 = pe->pvvalue; @@ -273,6 +293,16 @@ static sllv_t* mapper_stats2_emit(mapper_stats2_state_t* pstate) { return poutrecs; } +static void mapper_stats2_emit(mapper_stats2_state_t* pstate, lrec_t* poutrec, + char* value_field_name_1, char* value_field_name_2, lhmsv_t* acc_fields_to_acc_state) +{ + // For "corr", "linreg" + for (lhmsve_t* pe = acc_fields_to_acc_state->phead; pe != NULL; pe = pe->pnext) { + stats2_t* pstats2 = pe->pvvalue; + pstats2->pemit_func(pstats2->pvstate, value_field_name_1, value_field_name_2, poutrec); + } +} + // ---------------------------------------------------------------- static mapper_t* mapper_stats2_alloc(slls_t* paccumulator_names, slls_t* pvalue_field_name_pairs, slls_t* pgroup_by_field_names, int do_verbose, int do_iterative_stats) @@ -285,6 +315,7 @@ static mapper_t* mapper_stats2_alloc(slls_t* paccumulator_names, slls_t* pvalue_ pstate->pgroup_by_field_names = pgroup_by_field_names; pstate->groups = lhmslv_alloc(); pstate->do_verbose = do_verbose; + pstate->do_iterative_stats = do_iterative_stats; pmapper->pvstate = pstate; pmapper->pprocess_func = mapper_stats2_process; diff --git a/c/reg_test/input/Makefile.am b/c/reg_test/input/Makefile.am index 7c7be846c..f9b14e676 100644 --- a/c/reg_test/input/Makefile.am +++ b/c/reg_test/input/Makefile.am @@ -4,6 +4,7 @@ EXTRA_DIST= \ a.pprint \ abixy \ abixy-wide \ + abixy-wide-short \ b.csv \ b.pprint \ c.csv \ diff --git a/c/reg_test/input/abixy-wide-short b/c/reg_test/input/abixy-wide-short new file mode 100644 index 000000000..a8d41ca4f --- /dev/null +++ b/c/reg_test/input/abixy-wide-short @@ -0,0 +1,20 @@ +a=cat,b=pan,i=1,x=0.5117389009583777,y=0.08295224980036853,x2=0.2618767027540883,xy=0.0424498931448654,y2=0.006881075746942741 +a=pan,b=wye,i=2,x=0.5225940442098578,y=0.511678736087022,x2=0.27310453504361476,xy=0.2674002600279053,y2=0.26181512896361225 +a=wye,b=cat,i=3,x=0.8150401717873625,y=0.07989551500795256,x2=0.6642904816271734,xy=0.06511805427712146,y2=0.006383293318385972 +a=dog,b=hat,i=4,x=0.4488733555675044,y=0.5730530513123552,x2=0.20148728933843124,xy=0.25722824606077416,y2=0.32838979961840076 +a=dog,b=pan,i=5,x=0.2946557960430134,y=0.6850437256584863,x2=0.08682203814174191,xy=0.20185210430817294,y2=0.46928490606405937 +a=wye,b=cat,i=6,x=0.048709182664292916,y=0.5851879044762575,x2=0.0023725844758234536,xy=0.02850402453206882,y2=0.34244488354531344 +a=dog,b=hat,i=7,x=0.8500003149528544,y=0.2984098741712895,x2=0.7225005354199517,xy=0.25364848703063775,y2=0.08904845300292483 +a=pan,b=pan,i=8,x=0.616507208914765,y=0.25924335982487057,x2=0.38008113864387366,xy=0.15982540019531707,y2=0.06720711961328732 +a=hat,b=hat,i=9,x=0.33786884067769307,y=0.6036735617015514,x2=0.11415535350088835,xy=0.203962486439877,y2=0.3644217690974368 +a=wye,b=hat,i=10,x=0.3834648944206174,y=0.4999709279216641,x2=0.14704532525301522,xy=0.19172129908885902,y2=0.24997092876684981 +a=pan,b=hat,i=11,x=0.025474999754416028,y=0.7861954915044592,x2=0.0006489756124874967,xy=0.020028329952999087,y2=0.6181033508619382 +a=cat,b=hat,i=12,x=0.6335445699880142,y=0.15467178563525052,x2=0.4013787221612979,xy=0.0979914699195631,y2=0.02392336127159689 +a=hat,b=wye,i=13,x=0.35922068401384877,y=0.8502678133887914,x2=0.1290394998233774,xy=0.30543378552048117,y2=0.7229553544849566 +a=dog,b=dog,i=14,x=0.5440047442770544,y=0.933608851612059,x2=0.2959411617959433,xy=0.5078876445760125,y2=0.8716254878083876 +a=wye,b=dog,i=15,x=0.4689175303764642,y=0.09048353045392021,x2=0.21988365029436224,xy=0.04242931364019586,y2=0.008187269283405506 +a=pan,b=pan,i=16,x=0.3959177828066379,y=0.6339858483805666,x2=0.15675089074252413,xy=0.25100627142161924,y2=0.4019380559468268 +a=dog,b=hat,i=17,x=0.34033844788864975,y=0.8845934733681523,x2=0.11583025911125516,xy=0.3010611697385466,y2=0.782505613125532 +a=wye,b=wye,i=18,x=0.6770613653962891,y=0.896307226056897,x2=0.4584120925122874,xy=0.6068549942886431,y2=0.8033666434818095 +a=dog,b=wye,i=19,x=0.4865373244199632,y=0.44117766146315884,x2=0.23671856805373653,xy=0.2146493990021416,y2=0.1946377289741016 +a=dog,b=dog,i=20,x=0.3223311725542929,y=0.08115611029827985,x2=0.10389738480022534,xy=0.026159144192390068,y2=0.006586314238746564 diff --git a/c/reg_test/run b/c/reg_test/run index 6f2c65296..5be99b944 100755 --- a/c/reg_test/run +++ b/c/reg_test/run @@ -232,10 +232,14 @@ run_mlr --opprint stats1 -a min,p10,p50,mode,p90,max -f i,x,y -g a,b $indi run_mlr --opprint stats1 -a mean,meaneb,stddev -f i,x,y -g a,b $indir/abixy run_mlr --opprint stats1 -s -a mean,sum,count,min,max,mode -f i,x,y -g a,b $indir/abixy -run_mlr --opprint stats2 -a linreg-ols,linreg-pca,r2,corr,cov -f x,y,xy,y2,x2,x2 $indir/abixy-wide -run_mlr --opprint stats2 -a linreg-ols,linreg-pca,r2,corr,cov -f x,y,xy,y2,x2,x2 -g a,b $indir/abixy-wide +run_mlr --opprint stats2 -a linreg-ols,linreg-pca,r2,corr,cov -f x,y,xy,y2,x2,x2 $indir/abixy-wide +run_mlr --opprint stats2 -a linreg-ols,linreg-pca,r2,corr,cov -f x,y,xy,y2,x2,x2 -g a,b $indir/abixy-wide +run_mlr --oxtab stats2 -s -a linreg-ols,linreg-pca,r2,corr,cov -f x,y,xy,y2,x2,x2 $indir/abixy-wide-short +run_mlr --oxtab stats2 -s -a linreg-ols,linreg-pca,r2,corr,cov -f x,y,xy,y2,x2,x2 -g a,b $indir/abixy-wide-short + run_mlr --opprint step -a rsum,delta,counter -f x,y $indir/abixy run_mlr --opprint step -a rsum,delta,counter -f x,y -g a $indir/abixy + run_mlr --opprint histogram -f x,y --lo 0 --hi 1 --nbins 20 $indir/small # ---------------------------------------------------------------- diff --git a/data/generators/tail-rand-gen.rb b/data/generators/tail-rand-gen.rb new file mode 100644 index 000000000..df60b25a1 --- /dev/null +++ b/data/generators/tail-rand-gen.rb @@ -0,0 +1,10 @@ +#!/usr/bin/env ruby +# For playing with stats1/2 -s +$stdout.sync = true +while true + x = rand() + y = rand() + xy = x*y + puts "x=#{x},y=#{y},xy=#{xy}" + sleep 0.1 +end